Merging upstream version 21.0.1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-02-08 05:38:42 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-02-08 05:38:42 +0000
commit: c66e4a33e1a07c439f03fe47f146a6c6482bf6df (patch)
tree: cfdf01111c063b3e50841695e6c2768833aea4dc /sqlglot/dialects/duckdb.py
parent: Releasing debian version 20.11.0-1. (diff)
download: sqlglot-c66e4a33e1a07c439f03fe47f146a6c6482bf6df.tar.xz
sqlglot-c66e4a33e1a07c439f03fe47f146a6c6482bf6df.zip
1 files changed, 46 insertions, 19 deletions
diff --git a/sqlglot/dialects/duckdb.py b/sqlglot/dialects/duckdb.py
index f55ad70..d7ba729 100644
--- a/sqlglot/dialects/duckdb.py
+++ b/sqlglot/dialects/duckdb.py
@@ -8,7 +8,6 @@ from sqlglot.dialects.dialect import (
     NormalizationStrategy,
     approx_count_distinct_sql,
     arg_max_or_min_no_count,
-    arrow_json_extract_scalar_sql,
     arrow_json_extract_sql,
     binary_from_function,
     bool_xor_sql,
@@ -18,11 +17,9 @@ from sqlglot.dialects.dialect import (
     format_time_lambda,
     inline_array_sql,
     no_comment_column_constraint_sql,
-    no_properties_sql,
     no_safe_divide_sql,
     no_timestamp_sql,
     pivot_column_names,
-    prepend_dollar_to_path,
     regexp_extract_sql,
     rename_func,
     str_position_sql,
@@ -172,6 +169,18 @@ class DuckDB(Dialect):
     # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table
     NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
 
+    def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
+        if isinstance(path, exp.Literal):
+            # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`.
+            # Additionally, it allows accessing the back of lists using the `[#-i]` syntax.
+            # This check ensures we'll avoid trying to parse these as JSON paths, which can
+            # either result in a noisy warning or in an invalid representation of the path.
+            path_text = path.name
+            if path_text.startswith("/") or "[#" in path_text:
+                return path
+
+        return super().to_json_path(path)
+
     class Tokenizer(tokens.Tokenizer):
         KEYWORDS = {
             **tokens.Tokenizer.KEYWORDS,
@@ -229,6 +238,8 @@ class DuckDB(Dialect):
                 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS
             ),
             "JSON": exp.ParseJSON.from_arg_list,
+            "JSON_EXTRACT_PATH": parser.parse_extract_json_with_path(exp.JSONExtract),
+            "JSON_EXTRACT_STRING": parser.parse_extract_json_with_path(exp.JSONExtractScalar),
             "LIST_HAS": exp.ArrayContains.from_arg_list,
             "LIST_REVERSE_SORT": _sort_array_reverse,
             "LIST_SORT": exp.SortArray.from_arg_list,
@@ -319,6 +330,9 @@ class DuckDB(Dialect):
         TABLESAMPLE_SEED_KEYWORD = "REPEATABLE"
         LAST_DAY_SUPPORTS_DATE_PART = False
         JSON_KEY_VALUE_PAIR_SEP = ","
+        IGNORE_NULLS_IN_FUNC = True
+        JSON_PATH_BRACKETED_KEY_SUPPORTED = False
+        SUPPORTS_CREATE_TABLE_LIKE = False
 
         TRANSFORMS = {
             **generator.Generator.TRANSFORMS,
@@ -350,18 +364,18 @@ class DuckDB(Dialect):
                 "DATE_DIFF", f"'{e.args.get('unit') or 'DAY'}'", e.expression, e.this
             ),
             exp.DateStrToDate: datestrtodate_sql,
-            exp.DateToDi: lambda self, e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)",
+            exp.DateToDi: lambda self,
+            e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)",
             exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False),
-            exp.DiToDate: lambda self, e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)",
+            exp.DiToDate: lambda self,
+            e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)",
             exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False),
             exp.Explode: rename_func("UNNEST"),
             exp.IntDiv: lambda self, e: self.binary(e, "//"),
             exp.IsInf: rename_func("ISINF"),
             exp.IsNan: rename_func("ISNAN"),
-            exp.JSONBExtract: arrow_json_extract_sql,
-            exp.JSONBExtractScalar: arrow_json_extract_scalar_sql,
             exp.JSONExtract: arrow_json_extract_sql,
-            exp.JSONExtractScalar: arrow_json_extract_scalar_sql,
+            exp.JSONExtractScalar: arrow_json_extract_sql,
             exp.JSONFormat: _json_format_sql,
             exp.LogicalOr: rename_func("BOOL_OR"),
             exp.LogicalAnd: rename_func("BOOL_AND"),
@@ -377,7 +391,6 @@ class DuckDB(Dialect):
             # DuckDB doesn't allow qualified columns inside of PIVOT expressions.
             # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62
             exp.Pivot: transforms.preprocess([transforms.unqualify_columns]),
-            exp.Properties: no_properties_sql,
             exp.RegexpExtract: regexp_extract_sql,
             exp.RegexpReplace: lambda self, e: self.func(
                 "REGEXP_REPLACE",
@@ -395,7 +408,8 @@ class DuckDB(Dialect):
             exp.StrPosition: str_position_sql,
             exp.StrToDate: lambda self, e: f"CAST({str_to_time_sql(self, e)} AS DATE)",
             exp.StrToTime: str_to_time_sql,
-            exp.StrToUnix: lambda self, e: f"EPOCH(STRPTIME({self.sql(e, 'this')}, {self.format_time(e)}))",
+            exp.StrToUnix: lambda self,
+            e: f"EPOCH(STRPTIME({self.sql(e, 'this')}, {self.format_time(e)}))",
             exp.Struct: _struct_sql,
             exp.Timestamp: no_timestamp_sql,
             exp.TimestampDiff: lambda self, e: self.func(
@@ -405,9 +419,11 @@ class DuckDB(Dialect):
             exp.TimeStrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE)",
             exp.TimeStrToTime: timestrtotime_sql,
             exp.TimeStrToUnix: lambda self, e: f"EPOCH(CAST({self.sql(e, 'this')} AS TIMESTAMP))",
-            exp.TimeToStr: lambda self, e: f"STRFTIME({self.sql(e, 'this')}, {self.format_time(e)})",
+            exp.TimeToStr: lambda self,
+            e: f"STRFTIME({self.sql(e, 'this')}, {self.format_time(e)})",
             exp.TimeToUnix: rename_func("EPOCH"),
-            exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)",
+            exp.TsOrDiToDi: lambda self,
+            e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)",
             exp.TsOrDsAdd: _ts_or_ds_add_sql,
             exp.TsOrDsDiff: lambda self, e: self.func(
                 "DATE_DIFF",
@@ -415,7 +431,8 @@ class DuckDB(Dialect):
                 exp.cast(e.expression, "TIMESTAMP"),
                 exp.cast(e.this, "TIMESTAMP"),
             ),
-            exp.UnixToStr: lambda self, e: f"STRFTIME(TO_TIMESTAMP({self.sql(e, 'this')}), {self.format_time(e)})",
+            exp.UnixToStr: lambda self,
+            e: f"STRFTIME(TO_TIMESTAMP({self.sql(e, 'this')}), {self.format_time(e)})",
             exp.UnixToTime: _unix_to_time_sql,
             exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)",
             exp.VariancePop: rename_func("VAR_POP"),
@@ -423,6 +440,13 @@ class DuckDB(Dialect):
             exp.Xor: bool_xor_sql,
         }
 
+        SUPPORTED_JSON_PATH_PARTS = {
+            exp.JSONPathKey,
+            exp.JSONPathRoot,
+            exp.JSONPathSubscript,
+            exp.JSONPathWildcard,
+        }
+
         TYPE_MAPPING = {
             **generator.Generator.TYPE_MAPPING,
             exp.DataType.Type.BINARY: "BLOB",
@@ -442,11 +466,18 @@ class DuckDB(Dialect):
 
         UNWRAPPED_INTERVAL_VALUES = (exp.Column, exp.Literal, exp.Paren)
 
+        # DuckDB doesn't generally support CREATE TABLE .. properties
+        # https://duckdb.org/docs/sql/statements/create_table.html
         PROPERTIES_LOCATION = {
-            **generator.Generator.PROPERTIES_LOCATION,
-            exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
+            prop: exp.Properties.Location.UNSUPPORTED
+            for prop in generator.Generator.PROPERTIES_LOCATION
         }
 
+        # There are a few exceptions (e.g. temporary tables) which are supported or
+        # can be transpiled to DuckDB, so we explicitly override them accordingly
+        PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA
+        PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE
+
         def timefromparts_sql(self, expression: exp.TimeFromParts) -> str:
             nano = expression.args.get("nano")
             if nano is not None:
@@ -486,10 +517,6 @@ class DuckDB(Dialect):
                 expression, sep=sep, tablesample_keyword=tablesample_keyword
             )
 
-        def getpath_sql(self, expression: exp.GetPath) -> str:
-            expression = prepend_dollar_to_path(expression)
-            return f"{self.sql(expression, 'this')} -> {self.sql(expression, 'expression')}"
-
         def interval_sql(self, expression: exp.Interval) -> str:
             multiplier: t.Optional[int] = None
             unit = expression.text("unit").lower()
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-02-08 05:38:42 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-02-08 05:38:42 +0000
commit	c66e4a33e1a07c439f03fe47f146a6c6482bf6df (patch)
tree	cfdf01111c063b3e50841695e6c2768833aea4dc /sqlglot/dialects/duckdb.py
parent	Releasing debian version 20.11.0-1. (diff)
download	sqlglot-c66e4a33e1a07c439f03fe47f146a6c6482bf6df.tar.xz sqlglot-c66e4a33e1a07c439f03fe47f146a6c6482bf6df.zip