diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-01-23 05:06:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-01-23 05:06:14 +0000 |
commit | 38e6461a8afbd7cb83709ddb998f03d40ba87755 (patch) | |
tree | 64b68a893a3b946111b9cab69503f83ca233c335 /sqlglot/dialects/duckdb.py | |
parent | Releasing debian version 20.4.0-1. (diff) | |
download | sqlglot-38e6461a8afbd7cb83709ddb998f03d40ba87755.tar.xz sqlglot-38e6461a8afbd7cb83709ddb998f03d40ba87755.zip |
Merging upstream version 20.9.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/dialects/duckdb.py')
-rw-r--r-- | sqlglot/dialects/duckdb.py | 121 |
1 files changed, 100 insertions, 21 deletions
diff --git a/sqlglot/dialects/duckdb.py b/sqlglot/dialects/duckdb.py index cd9d529..2343b35 100644 --- a/sqlglot/dialects/duckdb.py +++ b/sqlglot/dialects/duckdb.py @@ -22,15 +22,15 @@ from sqlglot.dialects.dialect import ( no_safe_divide_sql, no_timestamp_sql, pivot_column_names, + prepend_dollar_to_path, regexp_extract_sql, rename_func, str_position_sql, str_to_time_sql, timestamptrunc_sql, timestrtotime_sql, - ts_or_ds_to_date_sql, ) -from sqlglot.helper import seq_get +from sqlglot.helper import flatten, seq_get from sqlglot.tokens import TokenType @@ -141,11 +141,25 @@ def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str return f"EPOCH_MS({timestamp})" if scale == exp.UnixToTime.MICROS: return f"MAKE_TIMESTAMP({timestamp})" - if scale == exp.UnixToTime.NANOS: - return f"TO_TIMESTAMP({timestamp} / 1000000000)" - self.unsupported(f"Unsupported scale for timestamp: {scale}.") - return "" + return f"TO_TIMESTAMP({timestamp} / POW(10, {scale}))" + + +def _rename_unless_within_group( + a: str, b: str +) -> t.Callable[[DuckDB.Generator, exp.Expression], str]: + return ( + lambda self, expression: self.func(a, *flatten(expression.args.values())) + if isinstance(expression.find_ancestor(exp.Select, exp.WithinGroup), exp.WithinGroup) + else self.func(b, *flatten(expression.args.values())) + ) + + +def _parse_struct_pack(args: t.List) -> exp.Struct: + args_with_columns_as_identifiers = [ + exp.PropertyEQ(this=arg.this.this, expression=arg.expression) for arg in args + ] + return exp.Struct.from_arg_list(args_with_columns_as_identifiers) class DuckDB(Dialect): @@ -183,6 +197,11 @@ class DuckDB(Dialect): "TIMESTAMP_US": TokenType.TIMESTAMP, } + SINGLE_TOKENS = { + **tokens.Tokenizer.SINGLE_TOKENS, + "$": TokenType.PARAMETER, + } + class Parser(parser.Parser): BITWISE = { **parser.Parser.BITWISE, @@ -209,10 +228,12 @@ class DuckDB(Dialect): "EPOCH_MS": lambda args: exp.UnixToTime( this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS ), + "JSON": exp.ParseJSON.from_arg_list, "LIST_HAS": exp.ArrayContains.from_arg_list, "LIST_REVERSE_SORT": _sort_array_reverse, "LIST_SORT": exp.SortArray.from_arg_list, "LIST_VALUE": exp.Array.from_arg_list, + "MAKE_TIME": exp.TimeFromParts.from_arg_list, "MAKE_TIMESTAMP": _parse_make_timestamp, "MEDIAN": lambda args: exp.PercentileCont( this=seq_get(args, 0), expression=exp.Literal.number(0.5) @@ -234,7 +255,7 @@ class DuckDB(Dialect): "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, "STRING_TO_ARRAY": exp.Split.from_arg_list, "STRPTIME": format_time_lambda(exp.StrToTime, "duckdb"), - "STRUCT_PACK": exp.Struct.from_arg_list, + "STRUCT_PACK": _parse_struct_pack, "STR_SPLIT": exp.Split.from_arg_list, "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, @@ -250,6 +271,13 @@ class DuckDB(Dialect): TokenType.ANTI, } + PLACEHOLDER_PARSERS = { + **parser.Parser.PLACEHOLDER_PARSERS, + TokenType.PARAMETER: lambda self: self.expression(exp.Placeholder, this=self._prev.text) + if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) + else None, + } + def _parse_types( self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True ) -> t.Optional[exp.Expression]: @@ -268,7 +296,7 @@ class DuckDB(Dialect): return this - def _parse_struct_types(self) -> t.Optional[exp.Expression]: + def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: return self._parse_field_def() def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: @@ -285,6 +313,10 @@ class DuckDB(Dialect): RENAME_TABLE_WITH_DB = False NVL2_SUPPORTED = False SEMI_ANTI_JOIN_WITH_SIDE = False + TABLESAMPLE_KEYWORDS = "USING SAMPLE" + TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" + LAST_DAY_SUPPORTS_DATE_PART = False + JSON_KEY_VALUE_PAIR_SEP = "," TRANSFORMS = { **generator.Generator.TRANSFORMS, @@ -311,7 +343,7 @@ class DuckDB(Dialect): exp.DateFromParts: rename_func("MAKE_DATE"), exp.DateSub: _date_delta_sql, exp.DateDiff: lambda self, e: self.func( - "DATE_DIFF", f"'{e.args.get('unit') or 'day'}'", e.expression, e.this + "DATE_DIFF", f"'{e.args.get('unit') or 'DAY'}'", e.expression, e.this ), exp.DateStrToDate: datestrtodate_sql, exp.DateToDi: lambda self, e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", @@ -322,11 +354,11 @@ class DuckDB(Dialect): exp.IntDiv: lambda self, e: self.binary(e, "//"), exp.IsInf: rename_func("ISINF"), exp.IsNan: rename_func("ISNAN"), + exp.JSONBExtract: arrow_json_extract_sql, + exp.JSONBExtractScalar: arrow_json_extract_scalar_sql, exp.JSONExtract: arrow_json_extract_sql, exp.JSONExtractScalar: arrow_json_extract_scalar_sql, exp.JSONFormat: _json_format_sql, - exp.JSONBExtract: arrow_json_extract_sql, - exp.JSONBExtractScalar: arrow_json_extract_scalar_sql, exp.LogicalOr: rename_func("BOOL_OR"), exp.LogicalAnd: rename_func("BOOL_AND"), exp.MonthsBetween: lambda self, e: self.func( @@ -336,8 +368,8 @@ class DuckDB(Dialect): exp.cast(e.this, "timestamp", copy=True), ), exp.ParseJSON: rename_func("JSON"), - exp.PercentileCont: rename_func("QUANTILE_CONT"), - exp.PercentileDisc: rename_func("QUANTILE_DISC"), + exp.PercentileCont: _rename_unless_within_group("PERCENTILE_CONT", "QUANTILE_CONT"), + exp.PercentileDisc: _rename_unless_within_group("PERCENTILE_DISC", "QUANTILE_DISC"), # DuckDB doesn't allow qualified columns inside of PIVOT expressions. # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), @@ -362,7 +394,9 @@ class DuckDB(Dialect): exp.StrToUnix: lambda self, e: f"EPOCH(STRPTIME({self.sql(e, 'this')}, {self.format_time(e)}))", exp.Struct: _struct_sql, exp.Timestamp: no_timestamp_sql, - exp.TimestampFromParts: rename_func("MAKE_TIMESTAMP"), + exp.TimestampDiff: lambda self, e: self.func( + "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this + ), exp.TimestampTrunc: timestamptrunc_sql, exp.TimeStrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE)", exp.TimeStrToTime: timestrtotime_sql, @@ -373,11 +407,10 @@ class DuckDB(Dialect): exp.TsOrDsAdd: _ts_or_ds_add_sql, exp.TsOrDsDiff: lambda self, e: self.func( "DATE_DIFF", - f"'{e.args.get('unit') or 'day'}'", + f"'{e.args.get('unit') or 'DAY'}'", exp.cast(e.expression, "TIMESTAMP"), exp.cast(e.this, "TIMESTAMP"), ), - exp.TsOrDsToDate: ts_or_ds_to_date_sql("duckdb"), exp.UnixToStr: lambda self, e: f"STRFTIME(TO_TIMESTAMP({self.sql(e, 'this')}), {self.format_time(e)})", exp.UnixToTime: _unix_to_time_sql, exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", @@ -410,6 +443,49 @@ class DuckDB(Dialect): exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, } + def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: + nano = expression.args.get("nano") + if nano is not None: + expression.set( + "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) + ) + + return rename_func("MAKE_TIME")(self, expression) + + def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: + sec = expression.args["sec"] + + milli = expression.args.get("milli") + if milli is not None: + sec += milli.pop() / exp.Literal.number(1000.0) + + nano = expression.args.get("nano") + if nano is not None: + sec += nano.pop() / exp.Literal.number(1000000000.0) + + if milli or nano: + expression.set("sec", sec) + + return rename_func("MAKE_TIMESTAMP")(self, expression) + + def tablesample_sql( + self, + expression: exp.TableSample, + sep: str = " AS ", + tablesample_keyword: t.Optional[str] = None, + ) -> str: + if not isinstance(expression.parent, exp.Select): + # This sample clause only applies to a single source, not the entire resulting relation + tablesample_keyword = "TABLESAMPLE" + + return super().tablesample_sql( + expression, sep=sep, tablesample_keyword=tablesample_keyword + ) + + def getpath_sql(self, expression: exp.GetPath) -> str: + expression = prepend_dollar_to_path(expression) + return f"{self.sql(expression, 'this')} -> {self.sql(expression, 'expression')}" + def interval_sql(self, expression: exp.Interval) -> str: multiplier: t.Optional[int] = None unit = expression.text("unit").lower() @@ -420,11 +496,14 @@ class DuckDB(Dialect): multiplier = 90 if multiplier: - return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('day')))})" + return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" return super().interval_sql(expression) - def tablesample_sql( - self, expression: exp.TableSample, seed_prefix: str = "SEED", sep: str = " AS " - ) -> str: - return super().tablesample_sql(expression, seed_prefix="REPEATABLE", sep=sep) + def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: + if isinstance(expression.parent, exp.UserDefinedFunction): + return self.sql(expression, "this") + return super().columndef_sql(expression, sep) + + def placeholder_sql(self, expression: exp.Placeholder) -> str: + return f"${expression.name}" if expression.name else "?" |