Merging upstream version 18.2.0.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2023-09-07 11:39:48 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2023-09-07 11:39:48 +0000
commit: f73e9af131151f1e058446361c35b05c4c90bf10 (patch)
tree: ed425b89f12d3f5e4709290bdc03d876f365bc97 /sqlglot/dialects/hive.py
parent: Releasing debian version 17.12.0-1. (diff)
download: sqlglot-f73e9af131151f1e058446361c35b05c4c90bf10.tar.xz
sqlglot-f73e9af131151f1e058446361c35b05c4c90bf10.zip
1 files changed, 42 insertions, 13 deletions
diff --git a/sqlglot/dialects/hive.py b/sqlglot/dialects/hive.py
index 584acc6..8b17c06 100644
--- a/sqlglot/dialects/hive.py
+++ b/sqlglot/dialects/hive.py
@@ -50,7 +50,7 @@ TIME_DIFF_FACTOR = {
 DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH")
 
 
-def _add_date_sql(self: generator.Generator, expression: exp.DateAdd | exp.DateSub) -> str:
+def _add_date_sql(self: Hive.Generator, expression: exp.DateAdd | exp.DateSub) -> str:
     unit = expression.text("unit").upper()
     func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1))
 
@@ -69,7 +69,7 @@ def _add_date_sql(self: generator.Generator, expression: exp.DateAdd | exp.DateS
     return self.func(func, expression.this, modified_increment)
 
 
-def _date_diff_sql(self: generator.Generator, expression: exp.DateDiff) -> str:
+def _date_diff_sql(self: Hive.Generator, expression: exp.DateDiff) -> str:
     unit = expression.text("unit").upper()
 
     factor = TIME_DIFF_FACTOR.get(unit)
@@ -87,7 +87,7 @@ def _date_diff_sql(self: generator.Generator, expression: exp.DateDiff) -> str:
     return f"{diff_sql}{multiplier_sql}"
 
 
-def _json_format_sql(self: generator.Generator, expression: exp.JSONFormat) -> str:
+def _json_format_sql(self: Hive.Generator, expression: exp.JSONFormat) -> str:
     this = expression.this
     if isinstance(this, exp.Cast) and this.is_type("json") and this.this.is_string:
         # Since FROM_JSON requires a nested type, we always wrap the json string with
@@ -103,21 +103,21 @@ def _json_format_sql(self: generator.Generator, expression: exp.JSONFormat) -> s
     return self.func("TO_JSON", this, expression.args.get("options"))
 
 
-def _array_sort_sql(self: generator.Generator, expression: exp.ArraySort) -> str:
+def _array_sort_sql(self: Hive.Generator, expression: exp.ArraySort) -> str:
     if expression.expression:
         self.unsupported("Hive SORT_ARRAY does not support a comparator")
     return f"SORT_ARRAY({self.sql(expression, 'this')})"
 
 
-def _property_sql(self: generator.Generator, expression: exp.Property) -> str:
+def _property_sql(self: Hive.Generator, expression: exp.Property) -> str:
     return f"'{expression.name}'={self.sql(expression, 'value')}"
 
 
-def _str_to_unix_sql(self: generator.Generator, expression: exp.StrToUnix) -> str:
+def _str_to_unix_sql(self: Hive.Generator, expression: exp.StrToUnix) -> str:
     return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression))
 
 
-def _str_to_date_sql(self: generator.Generator, expression: exp.StrToDate) -> str:
+def _str_to_date_sql(self: Hive.Generator, expression: exp.StrToDate) -> str:
     this = self.sql(expression, "this")
     time_format = self.format_time(expression)
     if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT):
@@ -125,7 +125,7 @@ def _str_to_date_sql(self: generator.Generator, expression: exp.StrToDate) -> st
     return f"CAST({this} AS DATE)"
 
 
-def _str_to_time_sql(self: generator.Generator, expression: exp.StrToTime) -> str:
+def _str_to_time_sql(self: Hive.Generator, expression: exp.StrToTime) -> str:
     this = self.sql(expression, "this")
     time_format = self.format_time(expression)
     if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT):
@@ -133,13 +133,13 @@ def _str_to_time_sql(self: generator.Generator, expression: exp.StrToTime) -> st
     return f"CAST({this} AS TIMESTAMP)"
 
 
-def _time_to_str(self: generator.Generator, expression: exp.TimeToStr) -> str:
+def _time_to_str(self: Hive.Generator, expression: exp.TimeToStr) -> str:
     this = self.sql(expression, "this")
     time_format = self.format_time(expression)
     return f"DATE_FORMAT({this}, {time_format})"
 
 
-def _to_date_sql(self: generator.Generator, expression: exp.TsOrDsToDate) -> str:
+def _to_date_sql(self: Hive.Generator, expression: exp.TsOrDsToDate) -> str:
     this = self.sql(expression, "this")
     time_format = self.format_time(expression)
     if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT):
@@ -206,6 +206,8 @@ class Hive(Dialect):
             "MSCK REPAIR": TokenType.COMMAND,
             "REFRESH": TokenType.COMMAND,
             "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES,
+            "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT,
+            "VERSION AS OF": TokenType.VERSION_SNAPSHOT,
         }
 
         NUMERIC_LITERALS = {
@@ -220,6 +222,7 @@ class Hive(Dialect):
     class Parser(parser.Parser):
         LOG_DEFAULTS_TO_LN = True
         STRICT_CAST = False
+        SUPPORTS_USER_DEFINED_TYPES = False
 
         FUNCTIONS = {
             **parser.Parser.FUNCTIONS,
@@ -257,6 +260,11 @@ class Hive(Dialect):
             ),
             "SIZE": exp.ArraySize.from_arg_list,
             "SPLIT": exp.RegexpSplit.from_arg_list,
+            "STR_TO_MAP": lambda args: exp.StrToMap(
+                this=seq_get(args, 0),
+                pair_delim=seq_get(args, 1) or exp.Literal.string(","),
+                key_value_delim=seq_get(args, 2) or exp.Literal.string(":"),
+            ),
             "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"),
             "TO_JSON": exp.JSONFormat.from_arg_list,
             "UNBASE64": exp.FromBase64.from_arg_list,
@@ -313,7 +321,7 @@ class Hive(Dialect):
             )
 
         def _parse_types(
-            self, check_func: bool = False, schema: bool = False
+            self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True
         ) -> t.Optional[exp.Expression]:
             """
             Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to
@@ -333,7 +341,9 @@ class Hive(Dialect):
 
             Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html
             """
-            this = super()._parse_types(check_func=check_func, schema=schema)
+            this = super()._parse_types(
+                check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
+            )
 
             if this and not schema:
                 return this.transform(
@@ -345,6 +355,16 @@ class Hive(Dialect):
 
             return this
 
+        def _parse_partition_and_order(
+            self,
+        ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]:
+            return (
+                self._parse_csv(self._parse_conjunction)
+                if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY})
+                else [],
+                super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)),
+            )
+
     class Generator(generator.Generator):
         LIMIT_FETCH = "LIMIT"
         TABLESAMPLE_WITH_METHOD = False
@@ -354,6 +374,7 @@ class Hive(Dialect):
         QUERY_HINTS = False
         INDEX_ON = "ON TABLE"
         EXTRACT_ALLOWS_QUOTES = False
+        NVL2_SUPPORTED = False
 
         TYPE_MAPPING = {
             **generator.Generator.TYPE_MAPPING,
@@ -376,6 +397,7 @@ class Hive(Dialect):
                 ]
             ),
             exp.Property: _property_sql,
+            exp.AnyValue: rename_func("FIRST"),
             exp.ApproxDistinct: approx_count_distinct_sql,
             exp.ArrayConcat: rename_func("CONCAT"),
             exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this),
@@ -402,6 +424,9 @@ class Hive(Dialect):
             exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
             exp.Min: min_or_least,
             exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression),
+            exp.NotNullColumnConstraint: lambda self, e: ""
+            if e.args.get("allow_null")
+            else "NOT NULL",
             exp.VarMap: var_map_sql,
             exp.Create: create_with_partitions_sql,
             exp.Quantile: rename_func("PERCENTILE"),
@@ -472,7 +497,7 @@ class Hive(Dialect):
             elif expression.this in exp.DataType.TEMPORAL_TYPES:
                 expression = exp.DataType.build(expression.this)
             elif expression.is_type("float"):
-                size_expression = expression.find(exp.DataTypeSize)
+                size_expression = expression.find(exp.DataTypeParam)
                 if size_expression:
                     size = int(size_expression.name)
                     expression = (
@@ -480,3 +505,7 @@ class Hive(Dialect):
                     )
 
             return super().datatype_sql(expression)
+
+        def version_sql(self, expression: exp.Version) -> str:
+            sql = super().version_sql(expression)
+            return sql.replace("FOR ", "", 1)
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2023-09-07 11:39:48 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2023-09-07 11:39:48 +0000
commit	f73e9af131151f1e058446361c35b05c4c90bf10 (patch)
tree	ed425b89f12d3f5e4709290bdc03d876f365bc97 /sqlglot/dialects/hive.py
parent	Releasing debian version 17.12.0-1. (diff)
download	sqlglot-f73e9af131151f1e058446361c35b05c4c90bf10.tar.xz sqlglot-f73e9af131151f1e058446361c35b05c4c90bf10.zip