diff options
author | Daniel Baumann <mail@daniel-baumann.ch> | 2023-12-10 10:46:01 +0000 |
---|---|---|
committer | Daniel Baumann <mail@daniel-baumann.ch> | 2023-12-10 10:46:01 +0000 |
commit | 8fe30fd23dc37ec3516e530a86d1c4b604e71241 (patch) | |
tree | 6e2ebbf565b0351fd0f003f488a8339e771ad90c /sqlglot/dialects/duckdb.py | |
parent | Releasing debian version 19.0.1-1. (diff) | |
download | sqlglot-8fe30fd23dc37ec3516e530a86d1c4b604e71241.tar.xz sqlglot-8fe30fd23dc37ec3516e530a86d1c4b604e71241.zip |
Merging upstream version 20.1.0.
Signed-off-by: Daniel Baumann <mail@daniel-baumann.ch>
Diffstat (limited to 'sqlglot/dialects/duckdb.py')
-rw-r--r-- | sqlglot/dialects/duckdb.py | 71 |
1 files changed, 51 insertions, 20 deletions
diff --git a/sqlglot/dialects/duckdb.py b/sqlglot/dialects/duckdb.py index d8d9f90..b94e3a6 100644 --- a/sqlglot/dialects/duckdb.py +++ b/sqlglot/dialects/duckdb.py @@ -2,9 +2,10 @@ from __future__ import annotations import typing as t -from sqlglot import exp, generator, parser, tokens +from sqlglot import exp, generator, parser, tokens, transforms from sqlglot.dialects.dialect import ( Dialect, + NormalizationStrategy, approx_count_distinct_sql, arg_max_or_min_no_count, arrow_json_extract_scalar_sql, @@ -36,7 +37,8 @@ from sqlglot.tokens import TokenType def _ts_or_ds_add_sql(self: DuckDB.Generator, expression: exp.TsOrDsAdd) -> str: this = self.sql(expression, "this") unit = self.sql(expression, "unit").strip("'") or "DAY" - return f"CAST({this} AS DATE) + {self.sql(exp.Interval(this=expression.expression, unit=unit))}" + interval = self.sql(exp.Interval(this=expression.expression, unit=unit)) + return f"CAST({this} AS {self.sql(expression.return_type)}) + {interval}" def _date_delta_sql(self: DuckDB.Generator, expression: exp.DateAdd | exp.DateSub) -> str: @@ -84,7 +86,8 @@ def _parse_date_diff(args: t.List) -> exp.Expression: def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: args = [ - f"'{e.name or e.this.name}': {self.sql(e, 'expression')}" for e in expression.expressions + f"'{e.name or e.this.name}': {self.sql(e.expressions[0]) if isinstance(e, exp.Bracket) else self.sql(e, 'expression')}" + for e in expression.expressions ] return f"{{{', '.join(args)}}}" @@ -105,17 +108,35 @@ def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: return f"CAST({sql} AS TEXT)" +def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: + scale = expression.args.get("scale") + timestamp = self.sql(expression, "this") + if scale in (None, exp.UnixToTime.SECONDS): + return f"TO_TIMESTAMP({timestamp})" + if scale == exp.UnixToTime.MILLIS: + return f"EPOCH_MS({timestamp})" + if scale == exp.UnixToTime.MICROS: + return f"MAKE_TIMESTAMP({timestamp})" + if scale == exp.UnixToTime.NANOS: + return f"TO_TIMESTAMP({timestamp} / 1000000000)" + + self.unsupported(f"Unsupported scale for timestamp: {scale}.") + return "" + + class DuckDB(Dialect): NULL_ORDERING = "nulls_are_last" SUPPORTS_USER_DEFINED_TYPES = False + SAFE_DIVISION = True + INDEX_OFFSET = 1 + CONCAT_COALESCE = True # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table - RESOLVES_IDENTIFIERS_AS_UPPERCASE = None + NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE class Tokenizer(tokens.Tokenizer): KEYWORDS = { **tokens.Tokenizer.KEYWORDS, - ":=": TokenType.EQ, "//": TokenType.DIV, "ATTACH": TokenType.COMMAND, "BINARY": TokenType.VARBINARY, @@ -124,8 +145,6 @@ class DuckDB(Dialect): "CHAR": TokenType.TEXT, "CHARACTER VARYING": TokenType.TEXT, "EXCLUDE": TokenType.EXCEPT, - "HUGEINT": TokenType.INT128, - "INT1": TokenType.TINYINT, "LOGICAL": TokenType.BOOLEAN, "PIVOT_WIDER": TokenType.PIVOT, "SIGNED": TokenType.INT, @@ -141,8 +160,6 @@ class DuckDB(Dialect): } class Parser(parser.Parser): - CONCAT_NULL_OUTPUTS_STRING = True - BITWISE = { **parser.Parser.BITWISE, TokenType.TILDA: exp.RegexpLike, @@ -150,6 +167,7 @@ class DuckDB(Dialect): FUNCTIONS = { **parser.Parser.FUNCTIONS, + "ARRAY_HAS": exp.ArrayContains.from_arg_list, "ARRAY_LENGTH": exp.ArraySize.from_arg_list, "ARRAY_SORT": exp.SortArray.from_arg_list, "ARRAY_REVERSE_SORT": _sort_array_reverse, @@ -157,13 +175,23 @@ class DuckDB(Dialect): "DATE_DIFF": _parse_date_diff, "DATE_TRUNC": date_trunc_to_time, "DATETRUNC": date_trunc_to_time, + "DECODE": lambda args: exp.Decode( + this=seq_get(args, 0), charset=exp.Literal.string("utf-8") + ), + "ENCODE": lambda args: exp.Encode( + this=seq_get(args, 0), charset=exp.Literal.string("utf-8") + ), "EPOCH": exp.TimeToUnix.from_arg_list, "EPOCH_MS": lambda args: exp.UnixToTime( - this=exp.Div(this=seq_get(args, 0), expression=exp.Literal.number(1000)) + this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS ), + "LIST_HAS": exp.ArrayContains.from_arg_list, "LIST_REVERSE_SORT": _sort_array_reverse, "LIST_SORT": exp.SortArray.from_arg_list, "LIST_VALUE": exp.Array.from_arg_list, + "MAKE_TIMESTAMP": lambda args: exp.UnixToTime( + this=seq_get(args, 0), scale=exp.UnixToTime.MICROS + ), "MEDIAN": lambda args: exp.PercentileCont( this=seq_get(args, 0), expression=exp.Literal.number(0.5) ), @@ -192,15 +220,8 @@ class DuckDB(Dialect): "XOR": binary_from_function(exp.BitwiseXor), } - FUNCTION_PARSERS = { - **parser.Parser.FUNCTION_PARSERS, - "DECODE": lambda self: self.expression( - exp.Decode, this=self._parse_conjunction(), charset=exp.Literal.string("utf-8") - ), - "ENCODE": lambda self: self.expression( - exp.Encode, this=self._parse_conjunction(), charset=exp.Literal.string("utf-8") - ), - } + FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() + FUNCTION_PARSERS.pop("DECODE", None) TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { TokenType.SEMI, @@ -277,6 +298,7 @@ class DuckDB(Dialect): exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), exp.Explode: rename_func("UNNEST"), exp.IntDiv: lambda self, e: self.binary(e, "//"), + exp.IsInf: rename_func("ISINF"), exp.IsNan: rename_func("ISNAN"), exp.JSONExtract: arrow_json_extract_sql, exp.JSONExtractScalar: arrow_json_extract_scalar_sql, @@ -294,6 +316,9 @@ class DuckDB(Dialect): exp.ParseJSON: rename_func("JSON"), exp.PercentileCont: rename_func("QUANTILE_CONT"), exp.PercentileDisc: rename_func("QUANTILE_DISC"), + # DuckDB doesn't allow qualified columns inside of PIVOT expressions. + # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 + exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), exp.Properties: no_properties_sql, exp.RegexpExtract: regexp_extract_sql, exp.RegexpReplace: lambda self, e: self.func( @@ -322,9 +347,15 @@ class DuckDB(Dialect): exp.TimeToUnix: rename_func("EPOCH"), exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", exp.TsOrDsAdd: _ts_or_ds_add_sql, + exp.TsOrDsDiff: lambda self, e: self.func( + "DATE_DIFF", + f"'{e.args.get('unit') or 'day'}'", + exp.cast(e.expression, "TIMESTAMP"), + exp.cast(e.this, "TIMESTAMP"), + ), exp.TsOrDsToDate: ts_or_ds_to_date_sql("duckdb"), exp.UnixToStr: lambda self, e: f"STRFTIME(TO_TIMESTAMP({self.sql(e, 'this')}), {self.format_time(e)})", - exp.UnixToTime: rename_func("TO_TIMESTAMP"), + exp.UnixToTime: _unix_to_time_sql, exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", exp.VariancePop: rename_func("VAR_POP"), exp.WeekOfYear: rename_func("WEEKOFYEAR"), |