diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 05:35:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 05:35:55 +0000 |
commit | fe979e8421c04c038353a0a2d07d81779516186a (patch) | |
tree | efb70a52261e5cf4862a7eb69e1d7cd16356fcba /sqlglot/dialects/duckdb.py | |
parent | Releasing debian version 23.13.7-1. (diff) | |
download | sqlglot-fe979e8421c04c038353a0a2d07d81779516186a.tar.xz sqlglot-fe979e8421c04c038353a0a2d07d81779516186a.zip |
Merging upstream version 23.16.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/dialects/duckdb.py')
-rw-r--r-- | sqlglot/dialects/duckdb.py | 62 |
1 files changed, 39 insertions, 23 deletions
diff --git a/sqlglot/dialects/duckdb.py b/sqlglot/dialects/duckdb.py index c11315f..19f2f4a 100644 --- a/sqlglot/dialects/duckdb.py +++ b/sqlglot/dialects/duckdb.py @@ -5,12 +5,14 @@ import typing as t from sqlglot import exp, generator, parser, tokens, transforms from sqlglot.dialects.dialect import ( Dialect, + JSON_EXTRACT_TYPE, NormalizationStrategy, approx_count_distinct_sql, arg_max_or_min_no_count, arrow_json_extract_sql, binary_from_function, bool_xor_sql, + build_default_decimal_type, date_trunc_to_time, datestrtodate_sql, encode_decode_sql, @@ -155,6 +157,13 @@ def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) +def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: + arrow_sql = arrow_json_extract_sql(self, expression) + if not expression.same_parent and isinstance(expression.parent, exp.Binary): + arrow_sql = self.wrap(arrow_sql) + return arrow_sql + + class DuckDB(Dialect): NULL_ORDERING = "nulls_are_last" SUPPORTS_USER_DEFINED_TYPES = False @@ -304,6 +313,22 @@ class DuckDB(Dialect): ), } + TYPE_CONVERTER = { + # https://duckdb.org/docs/sql/data_types/numeric + exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), + } + + def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: + # https://duckdb.org/docs/sql/samples.html + sample = super()._parse_table_sample(as_modifier=as_modifier) + if sample and not sample.args.get("method"): + if sample.args.get("size"): + sample.set("method", exp.var("RESERVOIR")) + else: + sample.set("method", exp.var("SYSTEM")) + + return sample + def _parse_bracket( self, this: t.Optional[exp.Expression] = None ) -> t.Optional[exp.Expression]: @@ -320,24 +345,6 @@ class DuckDB(Dialect): args = self._parse_wrapped_csv(self._parse_conjunction) return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) - def _parse_types( - self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True - ) -> t.Optional[exp.Expression]: - this = super()._parse_types( - check_func=check_func, schema=schema, allow_identifiers=allow_identifiers - ) - - # DuckDB treats NUMERIC and DECIMAL without precision as DECIMAL(18, 3) - # See: https://duckdb.org/docs/sql/data_types/numeric - if ( - isinstance(this, exp.DataType) - and this.is_type("numeric", "decimal") - and not this.expressions - ): - return exp.DataType.build("DECIMAL(18, 3)") - - return this - def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: return self._parse_field_def() @@ -368,6 +375,7 @@ class DuckDB(Dialect): CAN_IMPLEMENT_ARRAY_ANY = True SUPPORTS_TO_NUMBER = False COPY_HAS_INTO_KEYWORD = False + STAR_EXCEPT = "EXCLUDE" TRANSFORMS = { **generator.Generator.TRANSFORMS, @@ -406,11 +414,12 @@ class DuckDB(Dialect): exp.IntDiv: lambda self, e: self.binary(e, "//"), exp.IsInf: rename_func("ISINF"), exp.IsNan: rename_func("ISNAN"), - exp.JSONExtract: arrow_json_extract_sql, - exp.JSONExtractScalar: arrow_json_extract_sql, + exp.JSONExtract: _arrow_json_extract_sql, + exp.JSONExtractScalar: _arrow_json_extract_sql, exp.JSONFormat: _json_format_sql, exp.LogicalOr: rename_func("BOOL_OR"), exp.LogicalAnd: rename_func("BOOL_AND"), + exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), exp.MonthsBetween: lambda self, e: self.func( "DATEDIFF", "'month'", @@ -449,7 +458,7 @@ class DuckDB(Dialect): exp.TimestampDiff: lambda self, e: self.func( "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this ), - exp.TimestampTrunc: timestamptrunc_sql, + exp.TimestampTrunc: timestamptrunc_sql(), exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), exp.TimeStrToTime: timestrtotime_sql, exp.TimeStrToUnix: lambda self, e: self.func( @@ -499,8 +508,6 @@ class DuckDB(Dialect): exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", } - STAR_MAPPING = {**generator.Generator.STAR_MAPPING, "except": "EXCLUDE"} - UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) # DuckDB doesn't generally support CREATE TABLE .. properties @@ -550,6 +557,15 @@ class DuckDB(Dialect): # This sample clause only applies to a single source, not the entire resulting relation tablesample_keyword = "TABLESAMPLE" + if expression.args.get("size"): + method = expression.args.get("method") + if method and method.name.upper() != "RESERVOIR": + self.unsupported( + f"Sampling method {method} is not supported with a discrete sample count, " + "defaulting to reservoir sampling" + ) + expression.set("method", exp.var("RESERVOIR")) + return super().tablesample_sql( expression, sep=sep, tablesample_keyword=tablesample_keyword ) |