summaryrefslogtreecommitdiffstats
path: root/sqlglot/dialects/hive.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2023-09-07 11:39:48 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2023-09-07 11:39:48 +0000
commitf73e9af131151f1e058446361c35b05c4c90bf10 (patch)
treeed425b89f12d3f5e4709290bdc03d876f365bc97 /sqlglot/dialects/hive.py
parentReleasing debian version 17.12.0-1. (diff)
downloadsqlglot-f73e9af131151f1e058446361c35b05c4c90bf10.tar.xz
sqlglot-f73e9af131151f1e058446361c35b05c4c90bf10.zip
Merging upstream version 18.2.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/dialects/hive.py')
-rw-r--r--sqlglot/dialects/hive.py55
1 files changed, 42 insertions, 13 deletions
diff --git a/sqlglot/dialects/hive.py b/sqlglot/dialects/hive.py
index 584acc6..8b17c06 100644
--- a/sqlglot/dialects/hive.py
+++ b/sqlglot/dialects/hive.py
@@ -50,7 +50,7 @@ TIME_DIFF_FACTOR = {
DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH")
-def _add_date_sql(self: generator.Generator, expression: exp.DateAdd | exp.DateSub) -> str:
+def _add_date_sql(self: Hive.Generator, expression: exp.DateAdd | exp.DateSub) -> str:
unit = expression.text("unit").upper()
func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1))
@@ -69,7 +69,7 @@ def _add_date_sql(self: generator.Generator, expression: exp.DateAdd | exp.DateS
return self.func(func, expression.this, modified_increment)
-def _date_diff_sql(self: generator.Generator, expression: exp.DateDiff) -> str:
+def _date_diff_sql(self: Hive.Generator, expression: exp.DateDiff) -> str:
unit = expression.text("unit").upper()
factor = TIME_DIFF_FACTOR.get(unit)
@@ -87,7 +87,7 @@ def _date_diff_sql(self: generator.Generator, expression: exp.DateDiff) -> str:
return f"{diff_sql}{multiplier_sql}"
-def _json_format_sql(self: generator.Generator, expression: exp.JSONFormat) -> str:
+def _json_format_sql(self: Hive.Generator, expression: exp.JSONFormat) -> str:
this = expression.this
if isinstance(this, exp.Cast) and this.is_type("json") and this.this.is_string:
# Since FROM_JSON requires a nested type, we always wrap the json string with
@@ -103,21 +103,21 @@ def _json_format_sql(self: generator.Generator, expression: exp.JSONFormat) -> s
return self.func("TO_JSON", this, expression.args.get("options"))
-def _array_sort_sql(self: generator.Generator, expression: exp.ArraySort) -> str:
+def _array_sort_sql(self: Hive.Generator, expression: exp.ArraySort) -> str:
if expression.expression:
self.unsupported("Hive SORT_ARRAY does not support a comparator")
return f"SORT_ARRAY({self.sql(expression, 'this')})"
-def _property_sql(self: generator.Generator, expression: exp.Property) -> str:
+def _property_sql(self: Hive.Generator, expression: exp.Property) -> str:
return f"'{expression.name}'={self.sql(expression, 'value')}"
-def _str_to_unix_sql(self: generator.Generator, expression: exp.StrToUnix) -> str:
+def _str_to_unix_sql(self: Hive.Generator, expression: exp.StrToUnix) -> str:
return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression))
-def _str_to_date_sql(self: generator.Generator, expression: exp.StrToDate) -> str:
+def _str_to_date_sql(self: Hive.Generator, expression: exp.StrToDate) -> str:
this = self.sql(expression, "this")
time_format = self.format_time(expression)
if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT):
@@ -125,7 +125,7 @@ def _str_to_date_sql(self: generator.Generator, expression: exp.StrToDate) -> st
return f"CAST({this} AS DATE)"
-def _str_to_time_sql(self: generator.Generator, expression: exp.StrToTime) -> str:
+def _str_to_time_sql(self: Hive.Generator, expression: exp.StrToTime) -> str:
this = self.sql(expression, "this")
time_format = self.format_time(expression)
if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT):
@@ -133,13 +133,13 @@ def _str_to_time_sql(self: generator.Generator, expression: exp.StrToTime) -> st
return f"CAST({this} AS TIMESTAMP)"
-def _time_to_str(self: generator.Generator, expression: exp.TimeToStr) -> str:
+def _time_to_str(self: Hive.Generator, expression: exp.TimeToStr) -> str:
this = self.sql(expression, "this")
time_format = self.format_time(expression)
return f"DATE_FORMAT({this}, {time_format})"
-def _to_date_sql(self: generator.Generator, expression: exp.TsOrDsToDate) -> str:
+def _to_date_sql(self: Hive.Generator, expression: exp.TsOrDsToDate) -> str:
this = self.sql(expression, "this")
time_format = self.format_time(expression)
if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT):
@@ -206,6 +206,8 @@ class Hive(Dialect):
"MSCK REPAIR": TokenType.COMMAND,
"REFRESH": TokenType.COMMAND,
"WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES,
+ "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT,
+ "VERSION AS OF": TokenType.VERSION_SNAPSHOT,
}
NUMERIC_LITERALS = {
@@ -220,6 +222,7 @@ class Hive(Dialect):
class Parser(parser.Parser):
LOG_DEFAULTS_TO_LN = True
STRICT_CAST = False
+ SUPPORTS_USER_DEFINED_TYPES = False
FUNCTIONS = {
**parser.Parser.FUNCTIONS,
@@ -257,6 +260,11 @@ class Hive(Dialect):
),
"SIZE": exp.ArraySize.from_arg_list,
"SPLIT": exp.RegexpSplit.from_arg_list,
+ "STR_TO_MAP": lambda args: exp.StrToMap(
+ this=seq_get(args, 0),
+ pair_delim=seq_get(args, 1) or exp.Literal.string(","),
+ key_value_delim=seq_get(args, 2) or exp.Literal.string(":"),
+ ),
"TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"),
"TO_JSON": exp.JSONFormat.from_arg_list,
"UNBASE64": exp.FromBase64.from_arg_list,
@@ -313,7 +321,7 @@ class Hive(Dialect):
)
def _parse_types(
- self, check_func: bool = False, schema: bool = False
+ self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True
) -> t.Optional[exp.Expression]:
"""
Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to
@@ -333,7 +341,9 @@ class Hive(Dialect):
Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html
"""
- this = super()._parse_types(check_func=check_func, schema=schema)
+ this = super()._parse_types(
+ check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
+ )
if this and not schema:
return this.transform(
@@ -345,6 +355,16 @@ class Hive(Dialect):
return this
+ def _parse_partition_and_order(
+ self,
+ ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]:
+ return (
+ self._parse_csv(self._parse_conjunction)
+ if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY})
+ else [],
+ super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)),
+ )
+
class Generator(generator.Generator):
LIMIT_FETCH = "LIMIT"
TABLESAMPLE_WITH_METHOD = False
@@ -354,6 +374,7 @@ class Hive(Dialect):
QUERY_HINTS = False
INDEX_ON = "ON TABLE"
EXTRACT_ALLOWS_QUOTES = False
+ NVL2_SUPPORTED = False
TYPE_MAPPING = {
**generator.Generator.TYPE_MAPPING,
@@ -376,6 +397,7 @@ class Hive(Dialect):
]
),
exp.Property: _property_sql,
+ exp.AnyValue: rename_func("FIRST"),
exp.ApproxDistinct: approx_count_distinct_sql,
exp.ArrayConcat: rename_func("CONCAT"),
exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this),
@@ -402,6 +424,9 @@ class Hive(Dialect):
exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
exp.Min: min_or_least,
exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression),
+ exp.NotNullColumnConstraint: lambda self, e: ""
+ if e.args.get("allow_null")
+ else "NOT NULL",
exp.VarMap: var_map_sql,
exp.Create: create_with_partitions_sql,
exp.Quantile: rename_func("PERCENTILE"),
@@ -472,7 +497,7 @@ class Hive(Dialect):
elif expression.this in exp.DataType.TEMPORAL_TYPES:
expression = exp.DataType.build(expression.this)
elif expression.is_type("float"):
- size_expression = expression.find(exp.DataTypeSize)
+ size_expression = expression.find(exp.DataTypeParam)
if size_expression:
size = int(size_expression.name)
expression = (
@@ -480,3 +505,7 @@ class Hive(Dialect):
)
return super().datatype_sql(expression)
+
+ def version_sql(self, expression: exp.Version) -> str:
+ sql = super().version_sql(expression)
+ return sql.replace("FOR ", "", 1)