summaryrefslogtreecommitdiffstats
path: root/sqlglot/dialects/duckdb.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-02-08 05:38:42 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-02-08 05:38:42 +0000
commitc66e4a33e1a07c439f03fe47f146a6c6482bf6df (patch)
treecfdf01111c063b3e50841695e6c2768833aea4dc /sqlglot/dialects/duckdb.py
parentReleasing debian version 20.11.0-1. (diff)
downloadsqlglot-c66e4a33e1a07c439f03fe47f146a6c6482bf6df.tar.xz
sqlglot-c66e4a33e1a07c439f03fe47f146a6c6482bf6df.zip
Merging upstream version 21.0.1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/dialects/duckdb.py')
-rw-r--r--sqlglot/dialects/duckdb.py65
1 files changed, 46 insertions, 19 deletions
diff --git a/sqlglot/dialects/duckdb.py b/sqlglot/dialects/duckdb.py
index f55ad70..d7ba729 100644
--- a/sqlglot/dialects/duckdb.py
+++ b/sqlglot/dialects/duckdb.py
@@ -8,7 +8,6 @@ from sqlglot.dialects.dialect import (
NormalizationStrategy,
approx_count_distinct_sql,
arg_max_or_min_no_count,
- arrow_json_extract_scalar_sql,
arrow_json_extract_sql,
binary_from_function,
bool_xor_sql,
@@ -18,11 +17,9 @@ from sqlglot.dialects.dialect import (
format_time_lambda,
inline_array_sql,
no_comment_column_constraint_sql,
- no_properties_sql,
no_safe_divide_sql,
no_timestamp_sql,
pivot_column_names,
- prepend_dollar_to_path,
regexp_extract_sql,
rename_func,
str_position_sql,
@@ -172,6 +169,18 @@ class DuckDB(Dialect):
# https://duckdb.org/docs/sql/introduction.html#creating-a-new-table
NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
+ def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
+ if isinstance(path, exp.Literal):
+ # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`.
+ # Additionally, it allows accessing the back of lists using the `[#-i]` syntax.
+ # This check ensures we'll avoid trying to parse these as JSON paths, which can
+ # either result in a noisy warning or in an invalid representation of the path.
+ path_text = path.name
+ if path_text.startswith("/") or "[#" in path_text:
+ return path
+
+ return super().to_json_path(path)
+
class Tokenizer(tokens.Tokenizer):
KEYWORDS = {
**tokens.Tokenizer.KEYWORDS,
@@ -229,6 +238,8 @@ class DuckDB(Dialect):
this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS
),
"JSON": exp.ParseJSON.from_arg_list,
+ "JSON_EXTRACT_PATH": parser.parse_extract_json_with_path(exp.JSONExtract),
+ "JSON_EXTRACT_STRING": parser.parse_extract_json_with_path(exp.JSONExtractScalar),
"LIST_HAS": exp.ArrayContains.from_arg_list,
"LIST_REVERSE_SORT": _sort_array_reverse,
"LIST_SORT": exp.SortArray.from_arg_list,
@@ -319,6 +330,9 @@ class DuckDB(Dialect):
TABLESAMPLE_SEED_KEYWORD = "REPEATABLE"
LAST_DAY_SUPPORTS_DATE_PART = False
JSON_KEY_VALUE_PAIR_SEP = ","
+ IGNORE_NULLS_IN_FUNC = True
+ JSON_PATH_BRACKETED_KEY_SUPPORTED = False
+ SUPPORTS_CREATE_TABLE_LIKE = False
TRANSFORMS = {
**generator.Generator.TRANSFORMS,
@@ -350,18 +364,18 @@ class DuckDB(Dialect):
"DATE_DIFF", f"'{e.args.get('unit') or 'DAY'}'", e.expression, e.this
),
exp.DateStrToDate: datestrtodate_sql,
- exp.DateToDi: lambda self, e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)",
+ exp.DateToDi: lambda self,
+ e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)",
exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False),
- exp.DiToDate: lambda self, e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)",
+ exp.DiToDate: lambda self,
+ e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)",
exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False),
exp.Explode: rename_func("UNNEST"),
exp.IntDiv: lambda self, e: self.binary(e, "//"),
exp.IsInf: rename_func("ISINF"),
exp.IsNan: rename_func("ISNAN"),
- exp.JSONBExtract: arrow_json_extract_sql,
- exp.JSONBExtractScalar: arrow_json_extract_scalar_sql,
exp.JSONExtract: arrow_json_extract_sql,
- exp.JSONExtractScalar: arrow_json_extract_scalar_sql,
+ exp.JSONExtractScalar: arrow_json_extract_sql,
exp.JSONFormat: _json_format_sql,
exp.LogicalOr: rename_func("BOOL_OR"),
exp.LogicalAnd: rename_func("BOOL_AND"),
@@ -377,7 +391,6 @@ class DuckDB(Dialect):
# DuckDB doesn't allow qualified columns inside of PIVOT expressions.
# See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62
exp.Pivot: transforms.preprocess([transforms.unqualify_columns]),
- exp.Properties: no_properties_sql,
exp.RegexpExtract: regexp_extract_sql,
exp.RegexpReplace: lambda self, e: self.func(
"REGEXP_REPLACE",
@@ -395,7 +408,8 @@ class DuckDB(Dialect):
exp.StrPosition: str_position_sql,
exp.StrToDate: lambda self, e: f"CAST({str_to_time_sql(self, e)} AS DATE)",
exp.StrToTime: str_to_time_sql,
- exp.StrToUnix: lambda self, e: f"EPOCH(STRPTIME({self.sql(e, 'this')}, {self.format_time(e)}))",
+ exp.StrToUnix: lambda self,
+ e: f"EPOCH(STRPTIME({self.sql(e, 'this')}, {self.format_time(e)}))",
exp.Struct: _struct_sql,
exp.Timestamp: no_timestamp_sql,
exp.TimestampDiff: lambda self, e: self.func(
@@ -405,9 +419,11 @@ class DuckDB(Dialect):
exp.TimeStrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE)",
exp.TimeStrToTime: timestrtotime_sql,
exp.TimeStrToUnix: lambda self, e: f"EPOCH(CAST({self.sql(e, 'this')} AS TIMESTAMP))",
- exp.TimeToStr: lambda self, e: f"STRFTIME({self.sql(e, 'this')}, {self.format_time(e)})",
+ exp.TimeToStr: lambda self,
+ e: f"STRFTIME({self.sql(e, 'this')}, {self.format_time(e)})",
exp.TimeToUnix: rename_func("EPOCH"),
- exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)",
+ exp.TsOrDiToDi: lambda self,
+ e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)",
exp.TsOrDsAdd: _ts_or_ds_add_sql,
exp.TsOrDsDiff: lambda self, e: self.func(
"DATE_DIFF",
@@ -415,7 +431,8 @@ class DuckDB(Dialect):
exp.cast(e.expression, "TIMESTAMP"),
exp.cast(e.this, "TIMESTAMP"),
),
- exp.UnixToStr: lambda self, e: f"STRFTIME(TO_TIMESTAMP({self.sql(e, 'this')}), {self.format_time(e)})",
+ exp.UnixToStr: lambda self,
+ e: f"STRFTIME(TO_TIMESTAMP({self.sql(e, 'this')}), {self.format_time(e)})",
exp.UnixToTime: _unix_to_time_sql,
exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)",
exp.VariancePop: rename_func("VAR_POP"),
@@ -423,6 +440,13 @@ class DuckDB(Dialect):
exp.Xor: bool_xor_sql,
}
+ SUPPORTED_JSON_PATH_PARTS = {
+ exp.JSONPathKey,
+ exp.JSONPathRoot,
+ exp.JSONPathSubscript,
+ exp.JSONPathWildcard,
+ }
+
TYPE_MAPPING = {
**generator.Generator.TYPE_MAPPING,
exp.DataType.Type.BINARY: "BLOB",
@@ -442,11 +466,18 @@ class DuckDB(Dialect):
UNWRAPPED_INTERVAL_VALUES = (exp.Column, exp.Literal, exp.Paren)
+ # DuckDB doesn't generally support CREATE TABLE .. properties
+ # https://duckdb.org/docs/sql/statements/create_table.html
PROPERTIES_LOCATION = {
- **generator.Generator.PROPERTIES_LOCATION,
- exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
+ prop: exp.Properties.Location.UNSUPPORTED
+ for prop in generator.Generator.PROPERTIES_LOCATION
}
+ # There are a few exceptions (e.g. temporary tables) which are supported or
+ # can be transpiled to DuckDB, so we explicitly override them accordingly
+ PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA
+ PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE
+
def timefromparts_sql(self, expression: exp.TimeFromParts) -> str:
nano = expression.args.get("nano")
if nano is not None:
@@ -486,10 +517,6 @@ class DuckDB(Dialect):
expression, sep=sep, tablesample_keyword=tablesample_keyword
)
- def getpath_sql(self, expression: exp.GetPath) -> str:
- expression = prepend_dollar_to_path(expression)
- return f"{self.sql(expression, 'this')} -> {self.sql(expression, 'expression')}"
-
def interval_sql(self, expression: exp.Interval) -> str:
multiplier: t.Optional[int] = None
unit = expression.text("unit").lower()