diff options
Diffstat (limited to '')
-rw-r--r-- | sqlglot/dialects/duckdb.py | 156 |
1 files changed, 156 insertions, 0 deletions
diff --git a/sqlglot/dialects/duckdb.py b/sqlglot/dialects/duckdb.py new file mode 100644 index 0000000..d83a620 --- /dev/null +++ b/sqlglot/dialects/duckdb.py @@ -0,0 +1,156 @@ +from sqlglot import exp +from sqlglot.dialects.dialect import ( + Dialect, + approx_count_distinct_sql, + arrow_json_extract_scalar_sql, + arrow_json_extract_sql, + format_time_lambda, + no_safe_divide_sql, + no_tablesample_sql, + rename_func, + str_position_sql, +) +from sqlglot.generator import Generator +from sqlglot.helper import list_get +from sqlglot.parser import Parser +from sqlglot.tokens import Tokenizer, TokenType + + +def _unix_to_time(self, expression): + return f"TO_TIMESTAMP(CAST({self.sql(expression, 'this')} AS BIGINT))" + + +def _str_to_time_sql(self, expression): + return f"STRPTIME({self.sql(expression, 'this')}, {self.format_time(expression)})" + + +def _ts_or_ds_add(self, expression): + this = self.sql(expression, "this") + e = self.sql(expression, "expression") + unit = self.sql(expression, "unit").strip("'") or "DAY" + return f"CAST({this} AS DATE) + INTERVAL {e} {unit}" + + +def _ts_or_ds_to_date_sql(self, expression): + time_format = self.format_time(expression) + if time_format and time_format not in (DuckDB.time_format, DuckDB.date_format): + return f"CAST({_str_to_time_sql(self, expression)} AS DATE)" + return f"CAST({self.sql(expression, 'this')} AS DATE)" + + +def _date_add(self, expression): + this = self.sql(expression, "this") + e = self.sql(expression, "expression") + unit = self.sql(expression, "unit").strip("'") or "DAY" + return f"{this} + INTERVAL {e} {unit}" + + +def _array_sort_sql(self, expression): + if expression.expression: + self.unsupported("DUCKDB ARRAY_SORT does not support a comparator") + return f"ARRAY_SORT({self.sql(expression, 'this')})" + + +def _sort_array_sql(self, expression): + this = self.sql(expression, "this") + if expression.args.get("asc") == exp.FALSE: + return f"ARRAY_REVERSE_SORT({this})" + return f"ARRAY_SORT({this})" + + +def _sort_array_reverse(args): + return exp.SortArray(this=list_get(args, 0), asc=exp.FALSE) + + +def _struct_pack_sql(self, expression): + args = [ + self.binary(e, ":=") if isinstance(e, exp.EQ) else self.sql(e) + for e in expression.expressions + ] + return f"STRUCT_PACK({', '.join(args)})" + + +class DuckDB(Dialect): + class Tokenizer(Tokenizer): + KEYWORDS = { + **Tokenizer.KEYWORDS, + ":=": TokenType.EQ, + } + + class Parser(Parser): + FUNCTIONS = { + **Parser.FUNCTIONS, + "APPROX_COUNT_DISTINCT": exp.ApproxDistinct.from_arg_list, + "ARRAY_LENGTH": exp.ArraySize.from_arg_list, + "ARRAY_SORT": exp.SortArray.from_arg_list, + "ARRAY_REVERSE_SORT": _sort_array_reverse, + "EPOCH": exp.TimeToUnix.from_arg_list, + "EPOCH_MS": lambda args: exp.UnixToTime( + this=exp.Div( + this=list_get(args, 0), + expression=exp.Literal.number(1000), + ) + ), + "LIST_SORT": exp.SortArray.from_arg_list, + "LIST_REVERSE_SORT": _sort_array_reverse, + "LIST_VALUE": exp.Array.from_arg_list, + "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, + "STRFTIME": format_time_lambda(exp.TimeToStr, "duckdb"), + "STRPTIME": format_time_lambda(exp.StrToTime, "duckdb"), + "STR_SPLIT": exp.Split.from_arg_list, + "STRING_SPLIT": exp.Split.from_arg_list, + "STRING_TO_ARRAY": exp.Split.from_arg_list, + "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, + "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, + "STRUCT_PACK": exp.Struct.from_arg_list, + "TO_TIMESTAMP": exp.TimeStrToTime.from_arg_list, + "UNNEST": exp.Explode.from_arg_list, + } + + class Generator(Generator): + TRANSFORMS = { + **Generator.TRANSFORMS, + exp.ApproxDistinct: approx_count_distinct_sql, + exp.Array: lambda self, e: f"LIST_VALUE({self.expressions(e, flat=True)})", + exp.ArraySize: rename_func("ARRAY_LENGTH"), + exp.ArraySort: _array_sort_sql, + exp.ArraySum: rename_func("LIST_SUM"), + exp.DateAdd: _date_add, + exp.DateDiff: lambda self, e: f"""DATE_DIFF({self.sql(e, 'unit') or "'day'"}, {self.sql(e, 'expression')}, {self.sql(e, 'this')})""", + exp.DateStrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE)", + exp.DateToDi: lambda self, e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.dateint_format}) AS INT)", + exp.DiToDate: lambda self, e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.dateint_format}) AS DATE)", + exp.Explode: rename_func("UNNEST"), + exp.JSONExtract: arrow_json_extract_sql, + exp.JSONExtractScalar: arrow_json_extract_scalar_sql, + exp.JSONBExtract: arrow_json_extract_sql, + exp.JSONBExtractScalar: arrow_json_extract_scalar_sql, + exp.RegexpLike: rename_func("REGEXP_MATCHES"), + exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), + exp.SafeDivide: no_safe_divide_sql, + exp.Split: rename_func("STR_SPLIT"), + exp.SortArray: _sort_array_sql, + exp.StrPosition: str_position_sql, + exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", + exp.StrToTime: _str_to_time_sql, + exp.StrToUnix: lambda self, e: f"EPOCH(STRPTIME({self.sql(e, 'this')}, {self.format_time(e)}))", + exp.Struct: _struct_pack_sql, + exp.TableSample: no_tablesample_sql, + exp.TimeStrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE)", + exp.TimeStrToTime: lambda self, e: f"CAST({self.sql(e, 'this')} AS TIMESTAMP)", + exp.TimeStrToUnix: lambda self, e: f"EPOCH(CAST({self.sql(e, 'this')} AS TIMESTAMP))", + exp.TimeToStr: lambda self, e: f"STRFTIME({self.sql(e, 'this')}, {self.format_time(e)})", + exp.TimeToUnix: rename_func("EPOCH"), + exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", + exp.TsOrDsAdd: _ts_or_ds_add, + exp.TsOrDsToDate: _ts_or_ds_to_date_sql, + exp.UnixToStr: lambda self, e: f"STRFTIME({_unix_to_time(self, e)}, {self.format_time(e)})", + exp.UnixToTime: _unix_to_time, + exp.UnixToTimeStr: lambda self, e: f"CAST({_unix_to_time(self, e)} AS TEXT)", + } + + TYPE_MAPPING = { + **Generator.TYPE_MAPPING, + exp.DataType.Type.VARCHAR: "TEXT", + exp.DataType.Type.NVARCHAR: "TEXT", + } |