diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-02-08 05:38:42 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-02-08 05:38:42 +0000 |
commit | c66e4a33e1a07c439f03fe47f146a6c6482bf6df (patch) | |
tree | cfdf01111c063b3e50841695e6c2768833aea4dc /sqlglot/dialects/duckdb.py | |
parent | Releasing debian version 20.11.0-1. (diff) | |
download | sqlglot-c66e4a33e1a07c439f03fe47f146a6c6482bf6df.tar.xz sqlglot-c66e4a33e1a07c439f03fe47f146a6c6482bf6df.zip |
Merging upstream version 21.0.1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/dialects/duckdb.py')
-rw-r--r-- | sqlglot/dialects/duckdb.py | 65 |
1 files changed, 46 insertions, 19 deletions
diff --git a/sqlglot/dialects/duckdb.py b/sqlglot/dialects/duckdb.py index f55ad70..d7ba729 100644 --- a/sqlglot/dialects/duckdb.py +++ b/sqlglot/dialects/duckdb.py @@ -8,7 +8,6 @@ from sqlglot.dialects.dialect import ( NormalizationStrategy, approx_count_distinct_sql, arg_max_or_min_no_count, - arrow_json_extract_scalar_sql, arrow_json_extract_sql, binary_from_function, bool_xor_sql, @@ -18,11 +17,9 @@ from sqlglot.dialects.dialect import ( format_time_lambda, inline_array_sql, no_comment_column_constraint_sql, - no_properties_sql, no_safe_divide_sql, no_timestamp_sql, pivot_column_names, - prepend_dollar_to_path, regexp_extract_sql, rename_func, str_position_sql, @@ -172,6 +169,18 @@ class DuckDB(Dialect): # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE + def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: + if isinstance(path, exp.Literal): + # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. + # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. + # This check ensures we'll avoid trying to parse these as JSON paths, which can + # either result in a noisy warning or in an invalid representation of the path. + path_text = path.name + if path_text.startswith("/") or "[#" in path_text: + return path + + return super().to_json_path(path) + class Tokenizer(tokens.Tokenizer): KEYWORDS = { **tokens.Tokenizer.KEYWORDS, @@ -229,6 +238,8 @@ class DuckDB(Dialect): this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS ), "JSON": exp.ParseJSON.from_arg_list, + "JSON_EXTRACT_PATH": parser.parse_extract_json_with_path(exp.JSONExtract), + "JSON_EXTRACT_STRING": parser.parse_extract_json_with_path(exp.JSONExtractScalar), "LIST_HAS": exp.ArrayContains.from_arg_list, "LIST_REVERSE_SORT": _sort_array_reverse, "LIST_SORT": exp.SortArray.from_arg_list, @@ -319,6 +330,9 @@ class DuckDB(Dialect): TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" LAST_DAY_SUPPORTS_DATE_PART = False JSON_KEY_VALUE_PAIR_SEP = "," + IGNORE_NULLS_IN_FUNC = True + JSON_PATH_BRACKETED_KEY_SUPPORTED = False + SUPPORTS_CREATE_TABLE_LIKE = False TRANSFORMS = { **generator.Generator.TRANSFORMS, @@ -350,18 +364,18 @@ class DuckDB(Dialect): "DATE_DIFF", f"'{e.args.get('unit') or 'DAY'}'", e.expression, e.this ), exp.DateStrToDate: datestrtodate_sql, - exp.DateToDi: lambda self, e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", + exp.DateToDi: lambda self, + e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), - exp.DiToDate: lambda self, e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", + exp.DiToDate: lambda self, + e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), exp.Explode: rename_func("UNNEST"), exp.IntDiv: lambda self, e: self.binary(e, "//"), exp.IsInf: rename_func("ISINF"), exp.IsNan: rename_func("ISNAN"), - exp.JSONBExtract: arrow_json_extract_sql, - exp.JSONBExtractScalar: arrow_json_extract_scalar_sql, exp.JSONExtract: arrow_json_extract_sql, - exp.JSONExtractScalar: arrow_json_extract_scalar_sql, + exp.JSONExtractScalar: arrow_json_extract_sql, exp.JSONFormat: _json_format_sql, exp.LogicalOr: rename_func("BOOL_OR"), exp.LogicalAnd: rename_func("BOOL_AND"), @@ -377,7 +391,6 @@ class DuckDB(Dialect): # DuckDB doesn't allow qualified columns inside of PIVOT expressions. # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), - exp.Properties: no_properties_sql, exp.RegexpExtract: regexp_extract_sql, exp.RegexpReplace: lambda self, e: self.func( "REGEXP_REPLACE", @@ -395,7 +408,8 @@ class DuckDB(Dialect): exp.StrPosition: str_position_sql, exp.StrToDate: lambda self, e: f"CAST({str_to_time_sql(self, e)} AS DATE)", exp.StrToTime: str_to_time_sql, - exp.StrToUnix: lambda self, e: f"EPOCH(STRPTIME({self.sql(e, 'this')}, {self.format_time(e)}))", + exp.StrToUnix: lambda self, + e: f"EPOCH(STRPTIME({self.sql(e, 'this')}, {self.format_time(e)}))", exp.Struct: _struct_sql, exp.Timestamp: no_timestamp_sql, exp.TimestampDiff: lambda self, e: self.func( @@ -405,9 +419,11 @@ class DuckDB(Dialect): exp.TimeStrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE)", exp.TimeStrToTime: timestrtotime_sql, exp.TimeStrToUnix: lambda self, e: f"EPOCH(CAST({self.sql(e, 'this')} AS TIMESTAMP))", - exp.TimeToStr: lambda self, e: f"STRFTIME({self.sql(e, 'this')}, {self.format_time(e)})", + exp.TimeToStr: lambda self, + e: f"STRFTIME({self.sql(e, 'this')}, {self.format_time(e)})", exp.TimeToUnix: rename_func("EPOCH"), - exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", + exp.TsOrDiToDi: lambda self, + e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", exp.TsOrDsAdd: _ts_or_ds_add_sql, exp.TsOrDsDiff: lambda self, e: self.func( "DATE_DIFF", @@ -415,7 +431,8 @@ class DuckDB(Dialect): exp.cast(e.expression, "TIMESTAMP"), exp.cast(e.this, "TIMESTAMP"), ), - exp.UnixToStr: lambda self, e: f"STRFTIME(TO_TIMESTAMP({self.sql(e, 'this')}), {self.format_time(e)})", + exp.UnixToStr: lambda self, + e: f"STRFTIME(TO_TIMESTAMP({self.sql(e, 'this')}), {self.format_time(e)})", exp.UnixToTime: _unix_to_time_sql, exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", exp.VariancePop: rename_func("VAR_POP"), @@ -423,6 +440,13 @@ class DuckDB(Dialect): exp.Xor: bool_xor_sql, } + SUPPORTED_JSON_PATH_PARTS = { + exp.JSONPathKey, + exp.JSONPathRoot, + exp.JSONPathSubscript, + exp.JSONPathWildcard, + } + TYPE_MAPPING = { **generator.Generator.TYPE_MAPPING, exp.DataType.Type.BINARY: "BLOB", @@ -442,11 +466,18 @@ class DuckDB(Dialect): UNWRAPPED_INTERVAL_VALUES = (exp.Column, exp.Literal, exp.Paren) + # DuckDB doesn't generally support CREATE TABLE .. properties + # https://duckdb.org/docs/sql/statements/create_table.html PROPERTIES_LOCATION = { - **generator.Generator.PROPERTIES_LOCATION, - exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, + prop: exp.Properties.Location.UNSUPPORTED + for prop in generator.Generator.PROPERTIES_LOCATION } + # There are a few exceptions (e.g. temporary tables) which are supported or + # can be transpiled to DuckDB, so we explicitly override them accordingly + PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA + PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE + def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: nano = expression.args.get("nano") if nano is not None: @@ -486,10 +517,6 @@ class DuckDB(Dialect): expression, sep=sep, tablesample_keyword=tablesample_keyword ) - def getpath_sql(self, expression: exp.GetPath) -> str: - expression = prepend_dollar_to_path(expression) - return f"{self.sql(expression, 'this')} -> {self.sql(expression, 'expression')}" - def interval_sql(self, expression: exp.Interval) -> str: multiplier: t.Optional[int] = None unit = expression.text("unit").lower() |