summaryrefslogtreecommitdiffstats
path: root/sqlglot/dialects/duckdb.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 05:35:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 05:35:55 +0000
commitfe979e8421c04c038353a0a2d07d81779516186a (patch)
treeefb70a52261e5cf4862a7eb69e1d7cd16356fcba /sqlglot/dialects/duckdb.py
parentReleasing debian version 23.13.7-1. (diff)
downloadsqlglot-fe979e8421c04c038353a0a2d07d81779516186a.tar.xz
sqlglot-fe979e8421c04c038353a0a2d07d81779516186a.zip
Merging upstream version 23.16.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/dialects/duckdb.py')
-rw-r--r--sqlglot/dialects/duckdb.py62
1 files changed, 39 insertions, 23 deletions
diff --git a/sqlglot/dialects/duckdb.py b/sqlglot/dialects/duckdb.py
index c11315f..19f2f4a 100644
--- a/sqlglot/dialects/duckdb.py
+++ b/sqlglot/dialects/duckdb.py
@@ -5,12 +5,14 @@ import typing as t
from sqlglot import exp, generator, parser, tokens, transforms
from sqlglot.dialects.dialect import (
Dialect,
+ JSON_EXTRACT_TYPE,
NormalizationStrategy,
approx_count_distinct_sql,
arg_max_or_min_no_count,
arrow_json_extract_sql,
binary_from_function,
bool_xor_sql,
+ build_default_decimal_type,
date_trunc_to_time,
datestrtodate_sql,
encode_decode_sql,
@@ -155,6 +157,13 @@ def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str
return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)))
+def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str:
+ arrow_sql = arrow_json_extract_sql(self, expression)
+ if not expression.same_parent and isinstance(expression.parent, exp.Binary):
+ arrow_sql = self.wrap(arrow_sql)
+ return arrow_sql
+
+
class DuckDB(Dialect):
NULL_ORDERING = "nulls_are_last"
SUPPORTS_USER_DEFINED_TYPES = False
@@ -304,6 +313,22 @@ class DuckDB(Dialect):
),
}
+ TYPE_CONVERTER = {
+ # https://duckdb.org/docs/sql/data_types/numeric
+ exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3),
+ }
+
+ def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]:
+ # https://duckdb.org/docs/sql/samples.html
+ sample = super()._parse_table_sample(as_modifier=as_modifier)
+ if sample and not sample.args.get("method"):
+ if sample.args.get("size"):
+ sample.set("method", exp.var("RESERVOIR"))
+ else:
+ sample.set("method", exp.var("SYSTEM"))
+
+ return sample
+
def _parse_bracket(
self, this: t.Optional[exp.Expression] = None
) -> t.Optional[exp.Expression]:
@@ -320,24 +345,6 @@ class DuckDB(Dialect):
args = self._parse_wrapped_csv(self._parse_conjunction)
return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1))
- def _parse_types(
- self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True
- ) -> t.Optional[exp.Expression]:
- this = super()._parse_types(
- check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
- )
-
- # DuckDB treats NUMERIC and DECIMAL without precision as DECIMAL(18, 3)
- # See: https://duckdb.org/docs/sql/data_types/numeric
- if (
- isinstance(this, exp.DataType)
- and this.is_type("numeric", "decimal")
- and not this.expressions
- ):
- return exp.DataType.build("DECIMAL(18, 3)")
-
- return this
-
def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]:
return self._parse_field_def()
@@ -368,6 +375,7 @@ class DuckDB(Dialect):
CAN_IMPLEMENT_ARRAY_ANY = True
SUPPORTS_TO_NUMBER = False
COPY_HAS_INTO_KEYWORD = False
+ STAR_EXCEPT = "EXCLUDE"
TRANSFORMS = {
**generator.Generator.TRANSFORMS,
@@ -406,11 +414,12 @@ class DuckDB(Dialect):
exp.IntDiv: lambda self, e: self.binary(e, "//"),
exp.IsInf: rename_func("ISINF"),
exp.IsNan: rename_func("ISNAN"),
- exp.JSONExtract: arrow_json_extract_sql,
- exp.JSONExtractScalar: arrow_json_extract_sql,
+ exp.JSONExtract: _arrow_json_extract_sql,
+ exp.JSONExtractScalar: _arrow_json_extract_sql,
exp.JSONFormat: _json_format_sql,
exp.LogicalOr: rename_func("BOOL_OR"),
exp.LogicalAnd: rename_func("BOOL_AND"),
+ exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
exp.MonthsBetween: lambda self, e: self.func(
"DATEDIFF",
"'month'",
@@ -449,7 +458,7 @@ class DuckDB(Dialect):
exp.TimestampDiff: lambda self, e: self.func(
"DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this
),
- exp.TimestampTrunc: timestamptrunc_sql,
+ exp.TimestampTrunc: timestamptrunc_sql(),
exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)),
exp.TimeStrToTime: timestrtotime_sql,
exp.TimeStrToUnix: lambda self, e: self.func(
@@ -499,8 +508,6 @@ class DuckDB(Dialect):
exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS",
}
- STAR_MAPPING = {**generator.Generator.STAR_MAPPING, "except": "EXCLUDE"}
-
UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren)
# DuckDB doesn't generally support CREATE TABLE .. properties
@@ -550,6 +557,15 @@ class DuckDB(Dialect):
# This sample clause only applies to a single source, not the entire resulting relation
tablesample_keyword = "TABLESAMPLE"
+ if expression.args.get("size"):
+ method = expression.args.get("method")
+ if method and method.name.upper() != "RESERVOIR":
+ self.unsupported(
+ f"Sampling method {method} is not supported with a discrete sample count, "
+ "defaulting to reservoir sampling"
+ )
+ expression.set("method", exp.var("RESERVOIR"))
+
return super().tablesample_sql(
expression, sep=sep, tablesample_keyword=tablesample_keyword
)