diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-09-07 11:39:48 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-09-07 11:39:48 +0000 |
commit | f73e9af131151f1e058446361c35b05c4c90bf10 (patch) | |
tree | ed425b89f12d3f5e4709290bdc03d876f365bc97 /sqlglot/dialects/hive.py | |
parent | Releasing debian version 17.12.0-1. (diff) | |
download | sqlglot-f73e9af131151f1e058446361c35b05c4c90bf10.tar.xz sqlglot-f73e9af131151f1e058446361c35b05c4c90bf10.zip |
Merging upstream version 18.2.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/dialects/hive.py')
-rw-r--r-- | sqlglot/dialects/hive.py | 55 |
1 files changed, 42 insertions, 13 deletions
diff --git a/sqlglot/dialects/hive.py b/sqlglot/dialects/hive.py index 584acc6..8b17c06 100644 --- a/sqlglot/dialects/hive.py +++ b/sqlglot/dialects/hive.py @@ -50,7 +50,7 @@ TIME_DIFF_FACTOR = { DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH") -def _add_date_sql(self: generator.Generator, expression: exp.DateAdd | exp.DateSub) -> str: +def _add_date_sql(self: Hive.Generator, expression: exp.DateAdd | exp.DateSub) -> str: unit = expression.text("unit").upper() func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1)) @@ -69,7 +69,7 @@ def _add_date_sql(self: generator.Generator, expression: exp.DateAdd | exp.DateS return self.func(func, expression.this, modified_increment) -def _date_diff_sql(self: generator.Generator, expression: exp.DateDiff) -> str: +def _date_diff_sql(self: Hive.Generator, expression: exp.DateDiff) -> str: unit = expression.text("unit").upper() factor = TIME_DIFF_FACTOR.get(unit) @@ -87,7 +87,7 @@ def _date_diff_sql(self: generator.Generator, expression: exp.DateDiff) -> str: return f"{diff_sql}{multiplier_sql}" -def _json_format_sql(self: generator.Generator, expression: exp.JSONFormat) -> str: +def _json_format_sql(self: Hive.Generator, expression: exp.JSONFormat) -> str: this = expression.this if isinstance(this, exp.Cast) and this.is_type("json") and this.this.is_string: # Since FROM_JSON requires a nested type, we always wrap the json string with @@ -103,21 +103,21 @@ def _json_format_sql(self: generator.Generator, expression: exp.JSONFormat) -> s return self.func("TO_JSON", this, expression.args.get("options")) -def _array_sort_sql(self: generator.Generator, expression: exp.ArraySort) -> str: +def _array_sort_sql(self: Hive.Generator, expression: exp.ArraySort) -> str: if expression.expression: self.unsupported("Hive SORT_ARRAY does not support a comparator") return f"SORT_ARRAY({self.sql(expression, 'this')})" -def _property_sql(self: generator.Generator, expression: exp.Property) -> str: +def _property_sql(self: Hive.Generator, expression: exp.Property) -> str: return f"'{expression.name}'={self.sql(expression, 'value')}" -def _str_to_unix_sql(self: generator.Generator, expression: exp.StrToUnix) -> str: +def _str_to_unix_sql(self: Hive.Generator, expression: exp.StrToUnix) -> str: return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression)) -def _str_to_date_sql(self: generator.Generator, expression: exp.StrToDate) -> str: +def _str_to_date_sql(self: Hive.Generator, expression: exp.StrToDate) -> str: this = self.sql(expression, "this") time_format = self.format_time(expression) if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): @@ -125,7 +125,7 @@ def _str_to_date_sql(self: generator.Generator, expression: exp.StrToDate) -> st return f"CAST({this} AS DATE)" -def _str_to_time_sql(self: generator.Generator, expression: exp.StrToTime) -> str: +def _str_to_time_sql(self: Hive.Generator, expression: exp.StrToTime) -> str: this = self.sql(expression, "this") time_format = self.format_time(expression) if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): @@ -133,13 +133,13 @@ def _str_to_time_sql(self: generator.Generator, expression: exp.StrToTime) -> st return f"CAST({this} AS TIMESTAMP)" -def _time_to_str(self: generator.Generator, expression: exp.TimeToStr) -> str: +def _time_to_str(self: Hive.Generator, expression: exp.TimeToStr) -> str: this = self.sql(expression, "this") time_format = self.format_time(expression) return f"DATE_FORMAT({this}, {time_format})" -def _to_date_sql(self: generator.Generator, expression: exp.TsOrDsToDate) -> str: +def _to_date_sql(self: Hive.Generator, expression: exp.TsOrDsToDate) -> str: this = self.sql(expression, "this") time_format = self.format_time(expression) if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): @@ -206,6 +206,8 @@ class Hive(Dialect): "MSCK REPAIR": TokenType.COMMAND, "REFRESH": TokenType.COMMAND, "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, + "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, + "VERSION AS OF": TokenType.VERSION_SNAPSHOT, } NUMERIC_LITERALS = { @@ -220,6 +222,7 @@ class Hive(Dialect): class Parser(parser.Parser): LOG_DEFAULTS_TO_LN = True STRICT_CAST = False + SUPPORTS_USER_DEFINED_TYPES = False FUNCTIONS = { **parser.Parser.FUNCTIONS, @@ -257,6 +260,11 @@ class Hive(Dialect): ), "SIZE": exp.ArraySize.from_arg_list, "SPLIT": exp.RegexpSplit.from_arg_list, + "STR_TO_MAP": lambda args: exp.StrToMap( + this=seq_get(args, 0), + pair_delim=seq_get(args, 1) or exp.Literal.string(","), + key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), + ), "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), "TO_JSON": exp.JSONFormat.from_arg_list, "UNBASE64": exp.FromBase64.from_arg_list, @@ -313,7 +321,7 @@ class Hive(Dialect): ) def _parse_types( - self, check_func: bool = False, schema: bool = False + self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True ) -> t.Optional[exp.Expression]: """ Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to @@ -333,7 +341,9 @@ class Hive(Dialect): Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html """ - this = super()._parse_types(check_func=check_func, schema=schema) + this = super()._parse_types( + check_func=check_func, schema=schema, allow_identifiers=allow_identifiers + ) if this and not schema: return this.transform( @@ -345,6 +355,16 @@ class Hive(Dialect): return this + def _parse_partition_and_order( + self, + ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: + return ( + self._parse_csv(self._parse_conjunction) + if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) + else [], + super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), + ) + class Generator(generator.Generator): LIMIT_FETCH = "LIMIT" TABLESAMPLE_WITH_METHOD = False @@ -354,6 +374,7 @@ class Hive(Dialect): QUERY_HINTS = False INDEX_ON = "ON TABLE" EXTRACT_ALLOWS_QUOTES = False + NVL2_SUPPORTED = False TYPE_MAPPING = { **generator.Generator.TYPE_MAPPING, @@ -376,6 +397,7 @@ class Hive(Dialect): ] ), exp.Property: _property_sql, + exp.AnyValue: rename_func("FIRST"), exp.ApproxDistinct: approx_count_distinct_sql, exp.ArrayConcat: rename_func("CONCAT"), exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), @@ -402,6 +424,9 @@ class Hive(Dialect): exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), exp.Min: min_or_least, exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), + exp.NotNullColumnConstraint: lambda self, e: "" + if e.args.get("allow_null") + else "NOT NULL", exp.VarMap: var_map_sql, exp.Create: create_with_partitions_sql, exp.Quantile: rename_func("PERCENTILE"), @@ -472,7 +497,7 @@ class Hive(Dialect): elif expression.this in exp.DataType.TEMPORAL_TYPES: expression = exp.DataType.build(expression.this) elif expression.is_type("float"): - size_expression = expression.find(exp.DataTypeSize) + size_expression = expression.find(exp.DataTypeParam) if size_expression: size = int(size_expression.name) expression = ( @@ -480,3 +505,7 @@ class Hive(Dialect): ) return super().datatype_sql(expression) + + def version_sql(self, expression: exp.Version) -> str: + sql = super().version_sql(expression) + return sql.replace("FOR ", "", 1) |