From c66e4a33e1a07c439f03fe47f146a6c6482bf6df Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 8 Feb 2024 06:38:42 +0100 Subject: Merging upstream version 21.0.1. Signed-off-by: Daniel Baumann --- sqlglot/dialects/__init__.py | 3 +- sqlglot/dialects/bigquery.py | 35 +++++++---- sqlglot/dialects/clickhouse.py | 35 ++++++++++- sqlglot/dialects/databricks.py | 4 ++ sqlglot/dialects/dialect.py | 132 +++++++++++++++++++++++++++-------------- sqlglot/dialects/doris.py | 9 ++- sqlglot/dialects/drill.py | 22 ++++--- sqlglot/dialects/duckdb.py | 65 ++++++++++++++------ sqlglot/dialects/hive.py | 52 +++++++++++++--- sqlglot/dialects/mysql.py | 17 ++++-- sqlglot/dialects/oracle.py | 6 +- sqlglot/dialects/postgres.py | 26 ++++++-- sqlglot/dialects/presto.py | 53 +++++++++++------ sqlglot/dialects/redshift.py | 35 ++++++----- sqlglot/dialects/snowflake.py | 53 +++++++++++------ sqlglot/dialects/spark.py | 5 +- sqlglot/dialects/spark2.py | 12 ++-- sqlglot/dialects/sqlite.py | 32 +++++++--- sqlglot/dialects/starrocks.py | 6 +- sqlglot/dialects/teradata.py | 3 +- sqlglot/dialects/trino.py | 9 ++- sqlglot/dialects/tsql.py | 74 +++++++++++++++++++---- 22 files changed, 496 insertions(+), 192 deletions(-) (limited to 'sqlglot/dialects') diff --git a/sqlglot/dialects/__init__.py b/sqlglot/dialects/__init__.py index 04990ac..82552c9 100644 --- a/sqlglot/dialects/__init__.py +++ b/sqlglot/dialects/__init__.py @@ -1,9 +1,10 @@ +# ruff: noqa: F401 """ ## Dialects While there is a SQL standard, most SQL engines support a variation of that standard. This makes it difficult to write portable SQL code. SQLGlot bridges all the different variations, called "dialects", with an extensible -SQL transpilation framework. +SQL transpilation framework. The base `sqlglot.dialects.dialect.Dialect` class implements a generic dialect that aims to be as universal as possible. diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py index 771ae1a..9068235 100644 --- a/sqlglot/dialects/bigquery.py +++ b/sqlglot/dialects/bigquery.py @@ -19,7 +19,6 @@ from sqlglot.dialects.dialect import ( min_or_least, no_ilike_sql, parse_date_delta_with_interval, - path_to_jsonpath, regexp_replace_sql, rename_func, timestrtotime_sql, @@ -458,8 +457,10 @@ class BigQuery(Dialect): return this - def _parse_table_parts(self, schema: bool = False) -> exp.Table: - table = super()._parse_table_parts(schema=schema) + def _parse_table_parts( + self, schema: bool = False, is_db_reference: bool = False + ) -> exp.Table: + table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) if isinstance(table.this, exp.Identifier) and "." in table.name: catalog, db, this, *rest = ( t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) @@ -474,10 +475,12 @@ class BigQuery(Dialect): return table @t.overload - def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... + def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: + ... @t.overload - def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... + def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: + ... def _parse_json_object(self, agg=False): json_object = super()._parse_json_object() @@ -536,6 +539,8 @@ class BigQuery(Dialect): UNPIVOT_ALIASES_ARE_IDENTIFIERS = False JSON_KEY_VALUE_PAIR_SEP = "," NULL_ORDERING_SUPPORTED = False + IGNORE_NULLS_IN_FUNC = True + JSON_PATH_SINGLE_QUOTE_ESCAPE = True TRANSFORMS = { **generator.Generator.TRANSFORMS, @@ -554,7 +559,8 @@ class BigQuery(Dialect): exp.Create: _create_sql, exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), exp.DateAdd: date_add_interval_sql("DATE", "ADD"), - exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", + exp.DateDiff: lambda self, + e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", exp.DateFromParts: rename_func("DATE"), exp.DateStrToDate: datestrtodate_sql, exp.DateSub: date_add_interval_sql("DATE", "SUB"), @@ -565,7 +571,6 @@ class BigQuery(Dialect): "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" ), exp.GenerateSeries: rename_func("GENERATE_ARRAY"), - exp.GetPath: path_to_jsonpath(), exp.GroupConcat: rename_func("STRING_AGG"), exp.Hex: rename_func("TO_HEX"), exp.If: if_sql(false_value="NULL"), @@ -597,12 +602,13 @@ class BigQuery(Dialect): ] ), exp.SHA2: lambda self, e: self.func( - f"SHA256" if e.text("length") == "256" else "SHA512", e.this + "SHA256" if e.text("length") == "256" else "SHA512", e.this ), exp.StabilityProperty: lambda self, e: ( - f"DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" + "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" ), - exp.StrToDate: lambda self, e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", + exp.StrToDate: lambda self, + e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", exp.StrToTime: lambda self, e: self.func( "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") ), @@ -610,9 +616,10 @@ class BigQuery(Dialect): exp.TimeFromParts: rename_func("TIME"), exp.TimeSub: date_add_interval_sql("TIME", "SUB"), exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), + exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), exp.TimeStrToTime: timestrtotime_sql, - exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), + exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), exp.TsOrDsAdd: _ts_or_ds_add_sql, exp.TsOrDsDiff: _ts_or_ds_diff_sql, exp.TsOrDsToTime: rename_func("TIME"), @@ -623,6 +630,12 @@ class BigQuery(Dialect): exp.VariancePop: rename_func("VAR_POP"), } + SUPPORTED_JSON_PATH_PARTS = { + exp.JSONPathKey, + exp.JSONPathRoot, + exp.JSONPathSubscript, + } + TYPE_MAPPING = { **generator.Generator.TYPE_MAPPING, exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", diff --git a/sqlglot/dialects/clickhouse.py b/sqlglot/dialects/clickhouse.py index 1248edc..1ec15c5 100644 --- a/sqlglot/dialects/clickhouse.py +++ b/sqlglot/dialects/clickhouse.py @@ -8,12 +8,15 @@ from sqlglot.dialects.dialect import ( arg_max_or_min_no_count, date_delta_sql, inline_array_sql, + json_extract_segments, + json_path_key_only_name, no_pivot_sql, + parse_json_extract_path, rename_func, var_map_sql, ) from sqlglot.errors import ParseError -from sqlglot.helper import seq_get +from sqlglot.helper import is_int, seq_get from sqlglot.parser import parse_var_map from sqlglot.tokens import Token, TokenType @@ -120,6 +123,9 @@ class ClickHouse(Dialect): "DATEDIFF": lambda args: exp.DateDiff( this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) ), + "JSONEXTRACTSTRING": parse_json_extract_path( + exp.JSONExtractScalar, zero_based_indexing=False + ), "MAP": parse_var_map, "MATCH": exp.RegexpLike.from_arg_list, "RANDCANONICAL": exp.Rand.from_arg_list, @@ -354,9 +360,14 @@ class ClickHouse(Dialect): joins: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None, parse_bracket: bool = False, + is_db_reference: bool = False, ) -> t.Optional[exp.Expression]: this = super()._parse_table( - schema=schema, joins=joins, alias_tokens=alias_tokens, parse_bracket=parse_bracket + schema=schema, + joins=joins, + alias_tokens=alias_tokens, + parse_bracket=parse_bracket, + is_db_reference=is_db_reference, ) if self._match(TokenType.FINAL): @@ -518,6 +529,12 @@ class ClickHouse(Dialect): exp.DataType.Type.VARCHAR: "String", } + SUPPORTED_JSON_PATH_PARTS = { + exp.JSONPathKey, + exp.JSONPathRoot, + exp.JSONPathSubscript, + } + TYPE_MAPPING = { **generator.Generator.TYPE_MAPPING, **STRING_TYPE_MAPPING, @@ -570,6 +587,10 @@ class ClickHouse(Dialect): exp.Explode: rename_func("arrayJoin"), exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", exp.IsNan: rename_func("isNaN"), + exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), + exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), + exp.JSONPathKey: json_path_key_only_name, + exp.JSONPathRoot: lambda *_: "", exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), exp.Nullif: rename_func("nullIf"), exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", @@ -579,7 +600,8 @@ class ClickHouse(Dialect): exp.Rand: rename_func("randCanonical"), exp.Select: transforms.preprocess([transforms.eliminate_qualify]), exp.StartsWith: rename_func("startsWith"), - exp.StrPosition: lambda self, e: f"position({self.format_args(e.this, e.args.get('substr'), e.args.get('position'))})", + exp.StrPosition: lambda self, + e: f"position({self.format_args(e.this, e.args.get('substr'), e.args.get('position'))})", exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), } @@ -608,6 +630,13 @@ class ClickHouse(Dialect): "NAMED COLLECTION", } + def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: + this = self.json_path_part(expression.this) + return str(int(this) + 1) if is_int(this) else this + + def likeproperty_sql(self, expression: exp.LikeProperty) -> str: + return f"AS {self.sql(expression, 'this')}" + def _any_to_has( self, expression: exp.EQ | exp.NEQ, diff --git a/sqlglot/dialects/databricks.py b/sqlglot/dialects/databricks.py index 8e55b6a..20907db 100644 --- a/sqlglot/dialects/databricks.py +++ b/sqlglot/dialects/databricks.py @@ -22,6 +22,7 @@ class Databricks(Spark): "DATEADD": parse_date_delta(exp.DateAdd), "DATE_ADD": parse_date_delta(exp.DateAdd), "DATEDIFF": parse_date_delta(exp.DateDiff), + "TIMESTAMPDIFF": parse_date_delta(exp.TimestampDiff), } FACTOR = { @@ -48,6 +49,9 @@ class Databricks(Spark): exp.DatetimeDiff: lambda self, e: self.func( "TIMESTAMPDIFF", e.text("unit"), e.expression, e.this ), + exp.TimestampDiff: lambda self, e: self.func( + "TIMESTAMPDIFF", e.text("unit"), e.expression, e.this + ), exp.DatetimeTrunc: timestamptrunc_sql, exp.JSONExtract: lambda self, e: self.binary(e, ":"), exp.Select: transforms.preprocess( diff --git a/sqlglot/dialects/dialect.py b/sqlglot/dialects/dialect.py index 6be991b..6e2d190 100644 --- a/sqlglot/dialects/dialect.py +++ b/sqlglot/dialects/dialect.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging import typing as t from enum import Enum, auto from functools import reduce @@ -7,7 +8,8 @@ from functools import reduce from sqlglot import exp from sqlglot.errors import ParseError from sqlglot.generator import Generator -from sqlglot.helper import AutoName, flatten, seq_get +from sqlglot.helper import AutoName, flatten, is_int, seq_get +from sqlglot.jsonpath import parse as parse_json_path from sqlglot.parser import Parser from sqlglot.time import TIMEZONES, format_time from sqlglot.tokens import Token, Tokenizer, TokenType @@ -17,7 +19,11 @@ DATE_ADD_OR_DIFF = t.Union[exp.DateAdd, exp.TsOrDsAdd, exp.DateDiff, exp.TsOrDsD DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TsOrDsAdd, exp.DateSub] if t.TYPE_CHECKING: - from sqlglot._typing import B, E + from sqlglot._typing import B, E, F + + JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar] + +logger = logging.getLogger("sqlglot") class Dialects(str, Enum): @@ -256,7 +262,7 @@ class Dialect(metaclass=_Dialect): INVERSE_ESCAPE_SEQUENCES: t.Dict[str, str] = {} - # Delimiters for quotes, identifiers and the corresponding escape characters + # Delimiters for string literals and identifiers QUOTE_START = "'" QUOTE_END = "'" IDENTIFIER_START = '"' @@ -373,7 +379,7 @@ class Dialect(metaclass=_Dialect): """ if ( isinstance(expression, exp.Identifier) - and not self.normalization_strategy is NormalizationStrategy.CASE_SENSITIVE + and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE and ( not expression.quoted or self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE @@ -440,6 +446,19 @@ class Dialect(metaclass=_Dialect): return expression + def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: + if isinstance(path, exp.Literal): + path_text = path.name + if path.is_number: + path_text = f"[{path_text}]" + + try: + return parse_json_path(path_text) + except ParseError as e: + logger.warning(f"Invalid JSON path syntax. {str(e)}") + + return path + def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]: return self.parser(**opts).parse(self.tokenize(sql), sql) @@ -500,14 +519,12 @@ def if_sql( return _if_sql -def arrow_json_extract_sql(self: Generator, expression: exp.JSONExtract | exp.JSONBExtract) -> str: - return self.binary(expression, "->") - +def arrow_json_extract_sql(self: Generator, expression: JSON_EXTRACT_TYPE) -> str: + this = expression.this + if self.JSON_TYPE_REQUIRED_FOR_EXTRACTION and isinstance(this, exp.Literal) and this.is_string: + this.replace(exp.cast(this, "json")) -def arrow_json_extract_scalar_sql( - self: Generator, expression: exp.JSONExtractScalar | exp.JSONBExtractScalar -) -> str: - return self.binary(expression, "->>") + return self.binary(expression, "->" if isinstance(expression, exp.JSONExtract) else "->>") def inline_array_sql(self: Generator, expression: exp.Array) -> str: @@ -552,11 +569,6 @@ def no_trycast_sql(self: Generator, expression: exp.TryCast) -> str: return self.cast_sql(expression) -def no_properties_sql(self: Generator, expression: exp.Properties) -> str: - self.unsupported("Properties unsupported") - return "" - - def no_comment_column_constraint_sql( self: Generator, expression: exp.CommentColumnConstraint ) -> str: @@ -965,32 +977,6 @@ def date_delta_sql(name: str, cast: bool = False) -> t.Callable[[Generator, DATE return _delta_sql -def prepend_dollar_to_path(expression: exp.GetPath) -> exp.GetPath: - from sqlglot.optimizer.simplify import simplify - - # Makes sure the path will be evaluated correctly at runtime to include the path root. - # For example, `[0].foo` will become `$[0].foo`, and `foo` will become `$.foo`. - path = expression.expression - path = exp.func( - "if", - exp.func("startswith", path, "'['"), - exp.func("concat", "'$'", path), - exp.func("concat", "'$.'", path), - ) - - expression.expression.replace(simplify(path)) - return expression - - -def path_to_jsonpath( - name: str = "JSON_EXTRACT", -) -> t.Callable[[Generator, exp.GetPath], str]: - def _transform(self: Generator, expression: exp.GetPath) -> str: - return rename_func(name)(self, prepend_dollar_to_path(expression)) - - return _transform - - def no_last_day_sql(self: Generator, expression: exp.LastDay) -> str: trunc_curr_date = exp.func("date_trunc", "month", expression.this) plus_one_month = exp.func("date_add", trunc_curr_date, 1, "month") @@ -1003,9 +989,8 @@ def merge_without_target_sql(self: Generator, expression: exp.Merge) -> str: """Remove table refs from columns in when statements.""" alias = expression.this.args.get("alias") - normalize = lambda identifier: ( - self.dialect.normalize_identifier(identifier).name if identifier else None - ) + def normalize(identifier: t.Optional[exp.Identifier]) -> t.Optional[str]: + return self.dialect.normalize_identifier(identifier).name if identifier else None targets = {normalize(expression.this.this)} @@ -1023,3 +1008,60 @@ def merge_without_target_sql(self: Generator, expression: exp.Merge) -> str: ) return self.merge_sql(expression) + + +def parse_json_extract_path( + expr_type: t.Type[F], zero_based_indexing: bool = True +) -> t.Callable[[t.List], F]: + def _parse_json_extract_path(args: t.List) -> F: + segments: t.List[exp.JSONPathPart] = [exp.JSONPathRoot()] + for arg in args[1:]: + if not isinstance(arg, exp.Literal): + # We use the fallback parser because we can't really transpile non-literals safely + return expr_type.from_arg_list(args) + + text = arg.name + if is_int(text): + index = int(text) + segments.append( + exp.JSONPathSubscript(this=index if zero_based_indexing else index - 1) + ) + else: + segments.append(exp.JSONPathKey(this=text)) + + # This is done to avoid failing in the expression validator due to the arg count + del args[2:] + return expr_type(this=seq_get(args, 0), expression=exp.JSONPath(expressions=segments)) + + return _parse_json_extract_path + + +def json_extract_segments( + name: str, quoted_index: bool = True +) -> t.Callable[[Generator, JSON_EXTRACT_TYPE], str]: + def _json_extract_segments(self: Generator, expression: JSON_EXTRACT_TYPE) -> str: + path = expression.expression + if not isinstance(path, exp.JSONPath): + return rename_func(name)(self, expression) + + segments = [] + for segment in path.expressions: + path = self.sql(segment) + if path: + if isinstance(segment, exp.JSONPathPart) and ( + quoted_index or not isinstance(segment, exp.JSONPathSubscript) + ): + path = f"{self.dialect.QUOTE_START}{path}{self.dialect.QUOTE_END}" + + segments.append(path) + + return self.func(name, expression.this, *segments) + + return _json_extract_segments + + +def json_path_key_only_name(self: Generator, expression: exp.JSONPathKey) -> str: + if isinstance(expression.this, exp.JSONPathWildcard): + self.unsupported("Unsupported wildcard in JSONPathKey expression") + + return expression.name diff --git a/sqlglot/dialects/doris.py b/sqlglot/dialects/doris.py index 6e229b3..7a18e8e 100644 --- a/sqlglot/dialects/doris.py +++ b/sqlglot/dialects/doris.py @@ -55,11 +55,14 @@ class Doris(MySQL): exp.Map: rename_func("ARRAY_MAP"), exp.RegexpLike: rename_func("REGEXP"), exp.RegexpSplit: rename_func("SPLIT_BY_STRING"), - exp.StrToUnix: lambda self, e: f"UNIX_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", + exp.StrToUnix: lambda self, + e: f"UNIX_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", exp.Split: rename_func("SPLIT_BY_STRING"), exp.TimeStrToDate: rename_func("TO_DATE"), - exp.ToChar: lambda self, e: f"DATE_FORMAT({self.sql(e, 'this')}, {self.format_time(e)})", - exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", # Only for day level + exp.ToChar: lambda self, + e: f"DATE_FORMAT({self.sql(e, 'this')}, {self.format_time(e)})", + exp.TsOrDsAdd: lambda self, + e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", # Only for day level exp.TsOrDsToDate: lambda self, e: self.func("TO_DATE", e.this), exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), exp.TimestampTrunc: lambda self, e: self.func( diff --git a/sqlglot/dialects/drill.py b/sqlglot/dialects/drill.py index 6bca9e7..be23355 100644 --- a/sqlglot/dialects/drill.py +++ b/sqlglot/dialects/drill.py @@ -99,6 +99,7 @@ class Drill(Dialect): QUERY_HINTS = False NVL2_SUPPORTED = False LAST_DAY_SUPPORTS_DATE_PART = False + SUPPORTS_CREATE_TABLE_LIKE = False TYPE_MAPPING = { **generator.Generator.TYPE_MAPPING, @@ -128,10 +129,14 @@ class Drill(Dialect): exp.DateAdd: _date_add_sql("ADD"), exp.DateStrToDate: datestrtodate_sql, exp.DateSub: _date_add_sql("SUB"), - exp.DateToDi: lambda self, e: f"CAST(TO_DATE({self.sql(e, 'this')}, {Drill.DATEINT_FORMAT}) AS INT)", - exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS VARCHAR), {Drill.DATEINT_FORMAT})", - exp.If: lambda self, e: f"`IF`({self.format_args(e.this, e.args.get('true'), e.args.get('false'))})", - exp.ILike: lambda self, e: f" {self.sql(e, 'this')} `ILIKE` {self.sql(e, 'expression')}", + exp.DateToDi: lambda self, + e: f"CAST(TO_DATE({self.sql(e, 'this')}, {Drill.DATEINT_FORMAT}) AS INT)", + exp.DiToDate: lambda self, + e: f"TO_DATE(CAST({self.sql(e, 'this')} AS VARCHAR), {Drill.DATEINT_FORMAT})", + exp.If: lambda self, + e: f"`IF`({self.format_args(e.this, e.args.get('true'), e.args.get('false'))})", + exp.ILike: lambda self, + e: f" {self.sql(e, 'this')} `ILIKE` {self.sql(e, 'expression')}", exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", exp.RegexpLike: rename_func("REGEXP_MATCHES"), @@ -141,7 +146,8 @@ class Drill(Dialect): exp.Select: transforms.preprocess( [transforms.eliminate_distinct_on, transforms.eliminate_semi_and_anti_joins] ), - exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", + exp.StrToTime: lambda self, + e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", exp.TimeStrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE)", exp.TimeStrToTime: timestrtotime_sql, exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), @@ -149,8 +155,10 @@ class Drill(Dialect): exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), exp.ToChar: lambda self, e: self.function_fallback_sql(e), exp.TryCast: no_trycast_sql, - exp.TsOrDsAdd: lambda self, e: f"DATE_ADD(CAST({self.sql(e, 'this')} AS DATE), {self.sql(exp.Interval(this=e.expression, unit=exp.var('DAY')))})", - exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", + exp.TsOrDsAdd: lambda self, + e: f"DATE_ADD(CAST({self.sql(e, 'this')} AS DATE), {self.sql(exp.Interval(this=e.expression, unit=exp.var('DAY')))})", + exp.TsOrDiToDi: lambda self, + e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", } def normalize_func(self, name: str) -> str: diff --git a/sqlglot/dialects/duckdb.py b/sqlglot/dialects/duckdb.py index f55ad70..d7ba729 100644 --- a/sqlglot/dialects/duckdb.py +++ b/sqlglot/dialects/duckdb.py @@ -8,7 +8,6 @@ from sqlglot.dialects.dialect import ( NormalizationStrategy, approx_count_distinct_sql, arg_max_or_min_no_count, - arrow_json_extract_scalar_sql, arrow_json_extract_sql, binary_from_function, bool_xor_sql, @@ -18,11 +17,9 @@ from sqlglot.dialects.dialect import ( format_time_lambda, inline_array_sql, no_comment_column_constraint_sql, - no_properties_sql, no_safe_divide_sql, no_timestamp_sql, pivot_column_names, - prepend_dollar_to_path, regexp_extract_sql, rename_func, str_position_sql, @@ -172,6 +169,18 @@ class DuckDB(Dialect): # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE + def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: + if isinstance(path, exp.Literal): + # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. + # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. + # This check ensures we'll avoid trying to parse these as JSON paths, which can + # either result in a noisy warning or in an invalid representation of the path. + path_text = path.name + if path_text.startswith("/") or "[#" in path_text: + return path + + return super().to_json_path(path) + class Tokenizer(tokens.Tokenizer): KEYWORDS = { **tokens.Tokenizer.KEYWORDS, @@ -229,6 +238,8 @@ class DuckDB(Dialect): this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS ), "JSON": exp.ParseJSON.from_arg_list, + "JSON_EXTRACT_PATH": parser.parse_extract_json_with_path(exp.JSONExtract), + "JSON_EXTRACT_STRING": parser.parse_extract_json_with_path(exp.JSONExtractScalar), "LIST_HAS": exp.ArrayContains.from_arg_list, "LIST_REVERSE_SORT": _sort_array_reverse, "LIST_SORT": exp.SortArray.from_arg_list, @@ -319,6 +330,9 @@ class DuckDB(Dialect): TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" LAST_DAY_SUPPORTS_DATE_PART = False JSON_KEY_VALUE_PAIR_SEP = "," + IGNORE_NULLS_IN_FUNC = True + JSON_PATH_BRACKETED_KEY_SUPPORTED = False + SUPPORTS_CREATE_TABLE_LIKE = False TRANSFORMS = { **generator.Generator.TRANSFORMS, @@ -350,18 +364,18 @@ class DuckDB(Dialect): "DATE_DIFF", f"'{e.args.get('unit') or 'DAY'}'", e.expression, e.this ), exp.DateStrToDate: datestrtodate_sql, - exp.DateToDi: lambda self, e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", + exp.DateToDi: lambda self, + e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), - exp.DiToDate: lambda self, e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", + exp.DiToDate: lambda self, + e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), exp.Explode: rename_func("UNNEST"), exp.IntDiv: lambda self, e: self.binary(e, "//"), exp.IsInf: rename_func("ISINF"), exp.IsNan: rename_func("ISNAN"), - exp.JSONBExtract: arrow_json_extract_sql, - exp.JSONBExtractScalar: arrow_json_extract_scalar_sql, exp.JSONExtract: arrow_json_extract_sql, - exp.JSONExtractScalar: arrow_json_extract_scalar_sql, + exp.JSONExtractScalar: arrow_json_extract_sql, exp.JSONFormat: _json_format_sql, exp.LogicalOr: rename_func("BOOL_OR"), exp.LogicalAnd: rename_func("BOOL_AND"), @@ -377,7 +391,6 @@ class DuckDB(Dialect): # DuckDB doesn't allow qualified columns inside of PIVOT expressions. # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), - exp.Properties: no_properties_sql, exp.RegexpExtract: regexp_extract_sql, exp.RegexpReplace: lambda self, e: self.func( "REGEXP_REPLACE", @@ -395,7 +408,8 @@ class DuckDB(Dialect): exp.StrPosition: str_position_sql, exp.StrToDate: lambda self, e: f"CAST({str_to_time_sql(self, e)} AS DATE)", exp.StrToTime: str_to_time_sql, - exp.StrToUnix: lambda self, e: f"EPOCH(STRPTIME({self.sql(e, 'this')}, {self.format_time(e)}))", + exp.StrToUnix: lambda self, + e: f"EPOCH(STRPTIME({self.sql(e, 'this')}, {self.format_time(e)}))", exp.Struct: _struct_sql, exp.Timestamp: no_timestamp_sql, exp.TimestampDiff: lambda self, e: self.func( @@ -405,9 +419,11 @@ class DuckDB(Dialect): exp.TimeStrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE)", exp.TimeStrToTime: timestrtotime_sql, exp.TimeStrToUnix: lambda self, e: f"EPOCH(CAST({self.sql(e, 'this')} AS TIMESTAMP))", - exp.TimeToStr: lambda self, e: f"STRFTIME({self.sql(e, 'this')}, {self.format_time(e)})", + exp.TimeToStr: lambda self, + e: f"STRFTIME({self.sql(e, 'this')}, {self.format_time(e)})", exp.TimeToUnix: rename_func("EPOCH"), - exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", + exp.TsOrDiToDi: lambda self, + e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", exp.TsOrDsAdd: _ts_or_ds_add_sql, exp.TsOrDsDiff: lambda self, e: self.func( "DATE_DIFF", @@ -415,7 +431,8 @@ class DuckDB(Dialect): exp.cast(e.expression, "TIMESTAMP"), exp.cast(e.this, "TIMESTAMP"), ), - exp.UnixToStr: lambda self, e: f"STRFTIME(TO_TIMESTAMP({self.sql(e, 'this')}), {self.format_time(e)})", + exp.UnixToStr: lambda self, + e: f"STRFTIME(TO_TIMESTAMP({self.sql(e, 'this')}), {self.format_time(e)})", exp.UnixToTime: _unix_to_time_sql, exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", exp.VariancePop: rename_func("VAR_POP"), @@ -423,6 +440,13 @@ class DuckDB(Dialect): exp.Xor: bool_xor_sql, } + SUPPORTED_JSON_PATH_PARTS = { + exp.JSONPathKey, + exp.JSONPathRoot, + exp.JSONPathSubscript, + exp.JSONPathWildcard, + } + TYPE_MAPPING = { **generator.Generator.TYPE_MAPPING, exp.DataType.Type.BINARY: "BLOB", @@ -442,11 +466,18 @@ class DuckDB(Dialect): UNWRAPPED_INTERVAL_VALUES = (exp.Column, exp.Literal, exp.Paren) + # DuckDB doesn't generally support CREATE TABLE .. properties + # https://duckdb.org/docs/sql/statements/create_table.html PROPERTIES_LOCATION = { - **generator.Generator.PROPERTIES_LOCATION, - exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, + prop: exp.Properties.Location.UNSUPPORTED + for prop in generator.Generator.PROPERTIES_LOCATION } + # There are a few exceptions (e.g. temporary tables) which are supported or + # can be transpiled to DuckDB, so we explicitly override them accordingly + PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA + PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE + def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: nano = expression.args.get("nano") if nano is not None: @@ -486,10 +517,6 @@ class DuckDB(Dialect): expression, sep=sep, tablesample_keyword=tablesample_keyword ) - def getpath_sql(self, expression: exp.GetPath) -> str: - expression = prepend_dollar_to_path(expression) - return f"{self.sql(expression, 'this')} -> {self.sql(expression, 'expression')}" - def interval_sql(self, expression: exp.Interval) -> str: multiplier: t.Optional[int] = None unit = expression.text("unit").lower() diff --git a/sqlglot/dialects/hive.py b/sqlglot/dialects/hive.py index 060f9bd..6337ffd 100644 --- a/sqlglot/dialects/hive.py +++ b/sqlglot/dialects/hive.py @@ -192,6 +192,18 @@ def _to_date_sql(self: Hive.Generator, expression: exp.TsOrDsToDate) -> str: return f"TO_DATE({this})" +def _parse_ignore_nulls( + exp_class: t.Type[exp.Expression], +) -> t.Callable[[t.List[exp.Expression]], exp.Expression]: + def _parse(args: t.List[exp.Expression]) -> exp.Expression: + this = exp_class(this=seq_get(args, 0)) + if seq_get(args, 1) == exp.true(): + return exp.IgnoreNulls(this=this) + return this + + return _parse + + class Hive(Dialect): ALIAS_POST_TABLESAMPLE = True IDENTIFIERS_CAN_START_WITH_DIGIT = True @@ -298,8 +310,12 @@ class Hive(Dialect): expression=exp.TsOrDsToDate(this=seq_get(args, 1)), ), "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), + "FIRST": _parse_ignore_nulls(exp.First), + "FIRST_VALUE": _parse_ignore_nulls(exp.FirstValue), "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, + "LAST": _parse_ignore_nulls(exp.Last), + "LAST_VALUE": _parse_ignore_nulls(exp.LastValue), "LOCATE": locate_to_strposition, "MAP": parse_var_map, "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), @@ -429,6 +445,7 @@ class Hive(Dialect): EXTRACT_ALLOWS_QUOTES = False NVL2_SUPPORTED = False LAST_DAY_SUPPORTS_DATE_PART = False + JSON_PATH_SINGLE_QUOTE_ESCAPE = True EXPRESSIONS_WITHOUT_NESTED_CTES = { exp.Insert, @@ -437,6 +454,13 @@ class Hive(Dialect): exp.Union, } + SUPPORTED_JSON_PATH_PARTS = { + exp.JSONPathKey, + exp.JSONPathRoot, + exp.JSONPathSubscript, + exp.JSONPathWildcard, + } + TYPE_MAPPING = { **generator.Generator.TYPE_MAPPING, exp.DataType.Type.BIT: "BOOLEAN", @@ -471,9 +495,12 @@ class Hive(Dialect): exp.DateDiff: _date_diff_sql, exp.DateStrToDate: datestrtodate_sql, exp.DateSub: _add_date_sql, - exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", - exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", - exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", + exp.DateToDi: lambda self, + e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", + exp.DiToDate: lambda self, + e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", + exp.FileFormatProperty: lambda self, + e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", exp.FromBase64: rename_func("UNBASE64"), exp.If: if_sql(), exp.ILike: no_ilike_sql, @@ -502,7 +529,8 @@ class Hive(Dialect): exp.SafeDivide: no_safe_divide_sql, exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), exp.ArrayUniqueAgg: rename_func("COLLECT_SET"), - exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", + exp.Split: lambda self, + e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", exp.StrPosition: strposition_to_locate_sql, exp.StrToDate: _str_to_date_sql, exp.StrToTime: _str_to_time_sql, @@ -514,7 +542,8 @@ class Hive(Dialect): exp.TimeToStr: _time_to_str, exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), exp.ToBase64: rename_func("BASE64"), - exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", + exp.TsOrDiToDi: lambda self, + e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", exp.TsOrDsAdd: _add_date_sql, exp.TsOrDsDiff: _date_diff_sql, exp.TsOrDsToDate: _to_date_sql, @@ -528,8 +557,10 @@ class Hive(Dialect): exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), exp.NumberToStr: rename_func("FORMAT_NUMBER"), exp.National: lambda self, e: self.national_sql(e, prefix=""), - exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", - exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", + exp.ClusteredColumnConstraint: lambda self, + e: f"({self.expressions(e, 'this', indent=False)})", + exp.NonClusteredColumnConstraint: lambda self, + e: f"({self.expressions(e, 'this', indent=False)})", exp.NotForReplicationColumnConstraint: lambda self, e: "", exp.OnProperty: lambda self, e: "", exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", @@ -543,6 +574,13 @@ class Hive(Dialect): exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, } + def _jsonpathkey_sql(self, expression: exp.JSONPathKey) -> str: + if isinstance(expression.this, exp.JSONPathWildcard): + self.unsupported("Unsupported wildcard in JSONPathKey expression") + return "" + + return super()._jsonpathkey_sql(expression) + def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: # Hive has no temporary storage provider (there are hive settings though) return expression diff --git a/sqlglot/dialects/mysql.py b/sqlglot/dialects/mysql.py index 21a9657..661ef7d 100644 --- a/sqlglot/dialects/mysql.py +++ b/sqlglot/dialects/mysql.py @@ -6,7 +6,7 @@ from sqlglot import exp, generator, parser, tokens, transforms from sqlglot.dialects.dialect import ( Dialect, NormalizationStrategy, - arrow_json_extract_scalar_sql, + arrow_json_extract_sql, date_add_interval_sql, datestrtodate_sql, format_time_lambda, @@ -19,8 +19,8 @@ from sqlglot.dialects.dialect import ( no_pivot_sql, no_tablesample_sql, no_trycast_sql, + parse_date_delta, parse_date_delta_with_interval, - path_to_jsonpath, rename_func, strposition_to_locate_sql, ) @@ -306,6 +306,7 @@ class MySQL(Dialect): format=exp.Literal.string("%B"), ), "STR_TO_DATE": _str_to_date, + "TIMESTAMPDIFF": parse_date_delta(exp.TimestampDiff), "TO_DAYS": lambda args: exp.paren( exp.DateDiff( this=exp.TsOrDsToDate(this=seq_get(args, 0)), @@ -357,6 +358,7 @@ class MySQL(Dialect): "CREATE TRIGGER": _show_parser("CREATE TRIGGER", target=True), "CREATE VIEW": _show_parser("CREATE VIEW", target=True), "DATABASES": _show_parser("DATABASES"), + "SCHEMAS": _show_parser("DATABASES"), "ENGINE": _show_parser("ENGINE", target=True), "STORAGE ENGINES": _show_parser("ENGINES"), "ENGINES": _show_parser("ENGINES"), @@ -630,6 +632,8 @@ class MySQL(Dialect): VALUES_AS_TABLE = False NVL2_SUPPORTED = False LAST_DAY_SUPPORTS_DATE_PART = False + JSON_TYPE_REQUIRED_FOR_EXTRACTION = True + JSON_PATH_BRACKETED_KEY_SUPPORTED = False JSON_KEY_VALUE_PAIR_SEP = "," TRANSFORMS = { @@ -646,10 +650,10 @@ class MySQL(Dialect): exp.DayOfMonth: _remove_ts_or_ds_to_date(rename_func("DAYOFMONTH")), exp.DayOfWeek: _remove_ts_or_ds_to_date(rename_func("DAYOFWEEK")), exp.DayOfYear: _remove_ts_or_ds_to_date(rename_func("DAYOFYEAR")), - exp.GetPath: path_to_jsonpath(), - exp.GroupConcat: lambda self, e: f"""GROUP_CONCAT({self.sql(e, "this")} SEPARATOR {self.sql(e, "separator") or "','"})""", + exp.GroupConcat: lambda self, + e: f"""GROUP_CONCAT({self.sql(e, "this")} SEPARATOR {self.sql(e, "separator") or "','"})""", exp.ILike: no_ilike_sql, - exp.JSONExtractScalar: arrow_json_extract_scalar_sql, + exp.JSONExtractScalar: arrow_json_extract_sql, exp.Max: max_or_greatest, exp.Min: min_or_least, exp.Month: _remove_ts_or_ds_to_date(), @@ -672,6 +676,9 @@ class MySQL(Dialect): exp.TableSample: no_tablesample_sql, exp.TimeFromParts: rename_func("MAKETIME"), exp.TimestampAdd: date_add_interval_sql("DATE", "ADD"), + exp.TimestampDiff: lambda self, e: self.func( + "TIMESTAMPDIFF", e.text("unit"), e.expression, e.this + ), exp.TimestampSub: date_add_interval_sql("DATE", "SUB"), exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), exp.TimeStrToTime: lambda self, e: self.sql(exp.cast(e.this, "datetime", copy=True)), diff --git a/sqlglot/dialects/oracle.py b/sqlglot/dialects/oracle.py index 4591d59..0c0d750 100644 --- a/sqlglot/dialects/oracle.py +++ b/sqlglot/dialects/oracle.py @@ -199,7 +199,8 @@ class Oracle(Dialect): transforms.eliminate_qualify, ] ), - exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", + exp.StrToTime: lambda self, + e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", exp.StrToDate: lambda self, e: f"TO_DATE({self.sql(e, 'this')}, {self.format_time(e)})", exp.Subquery: lambda self, e: self.subquery_sql(e, sep=" "), exp.Substring: rename_func("SUBSTR"), @@ -208,7 +209,8 @@ class Oracle(Dialect): exp.TimeToStr: lambda self, e: f"TO_CHAR({self.sql(e, 'this')}, {self.format_time(e)})", exp.ToChar: lambda self, e: self.function_fallback_sql(e), exp.Trim: trim_sql, - exp.UnixToTime: lambda self, e: f"TO_DATE('1970-01-01','YYYY-MM-DD') + ({self.sql(e, 'this')} / 86400)", + exp.UnixToTime: lambda self, + e: f"TO_DATE('1970-01-01','YYYY-MM-DD') + ({self.sql(e, 'this')} / 86400)", } PROPERTIES_LOCATION = { diff --git a/sqlglot/dialects/postgres.py b/sqlglot/dialects/postgres.py index 87f6b02..0404c78 100644 --- a/sqlglot/dialects/postgres.py +++ b/sqlglot/dialects/postgres.py @@ -7,11 +7,11 @@ from sqlglot.dialects.dialect import ( DATE_ADD_OR_SUB, Dialect, any_value_to_max_sql, - arrow_json_extract_scalar_sql, - arrow_json_extract_sql, bool_xor_sql, datestrtodate_sql, format_time_lambda, + json_extract_segments, + json_path_key_only_name, max_or_greatest, merge_without_target_sql, min_or_least, @@ -20,6 +20,7 @@ from sqlglot.dialects.dialect import ( no_paren_current_date_sql, no_pivot_sql, no_trycast_sql, + parse_json_extract_path, parse_timestamp_trunc, rename_func, str_position_sql, @@ -292,6 +293,8 @@ class Postgres(Dialect): **parser.Parser.FUNCTIONS, "DATE_TRUNC": parse_timestamp_trunc, "GENERATE_SERIES": _generate_series, + "JSON_EXTRACT_PATH": parse_json_extract_path(exp.JSONExtract), + "JSON_EXTRACT_PATH_TEXT": parse_json_extract_path(exp.JSONExtractScalar), "MAKE_TIME": exp.TimeFromParts.from_arg_list, "MAKE_TIMESTAMP": exp.TimestampFromParts.from_arg_list, "NOW": exp.CurrentTimestamp.from_arg_list, @@ -375,8 +378,15 @@ class Postgres(Dialect): TABLESAMPLE_SIZE_IS_ROWS = False TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" SUPPORTS_SELECT_INTO = True - # https://www.postgresql.org/docs/current/sql-createtable.html + JSON_TYPE_REQUIRED_FOR_EXTRACTION = True SUPPORTS_UNLOGGED_TABLES = True + LIKE_PROPERTY_INSIDE_SCHEMA = True + + SUPPORTED_JSON_PATH_PARTS = { + exp.JSONPathKey, + exp.JSONPathRoot, + exp.JSONPathSubscript, + } TYPE_MAPPING = { **generator.Generator.TYPE_MAPPING, @@ -412,11 +422,14 @@ class Postgres(Dialect): exp.DateSub: _date_add_sql("-"), exp.Explode: rename_func("UNNEST"), exp.GroupConcat: _string_agg_sql, - exp.JSONExtract: arrow_json_extract_sql, - exp.JSONExtractScalar: arrow_json_extract_scalar_sql, + exp.JSONExtract: json_extract_segments("JSON_EXTRACT_PATH"), + exp.JSONExtractScalar: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), exp.JSONBExtract: lambda self, e: self.binary(e, "#>"), exp.JSONBExtractScalar: lambda self, e: self.binary(e, "#>>"), exp.JSONBContains: lambda self, e: self.binary(e, "?"), + exp.JSONPathKey: json_path_key_only_name, + exp.JSONPathRoot: lambda *_: "", + exp.JSONPathSubscript: lambda self, e: self.json_path_part(e.this), exp.LastDay: no_last_day_sql, exp.LogicalOr: rename_func("BOOL_OR"), exp.LogicalAnd: rename_func("BOOL_AND"), @@ -443,7 +456,8 @@ class Postgres(Dialect): ] ), exp.StrPosition: str_position_sql, - exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", + exp.StrToTime: lambda self, + e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", exp.StructExtract: struct_extract_sql, exp.Substring: _substring_sql, exp.TimeFromParts: rename_func("MAKE_TIME"), diff --git a/sqlglot/dialects/presto.py b/sqlglot/dialects/presto.py index 6cc6030..8691192 100644 --- a/sqlglot/dialects/presto.py +++ b/sqlglot/dialects/presto.py @@ -18,7 +18,6 @@ from sqlglot.dialects.dialect import ( no_pivot_sql, no_safe_divide_sql, no_timestamp_sql, - path_to_jsonpath, regexp_extract_sql, rename_func, right_to_substring_sql, @@ -150,7 +149,7 @@ def _unnest_sequence(expression: exp.Expression) -> exp.Expression: return expression -def _first_last_sql(self: Presto.Generator, expression: exp.First | exp.Last) -> str: +def _first_last_sql(self: Presto.Generator, expression: exp.Func) -> str: """ Trino doesn't support FIRST / LAST as functions, but they're valid in the context of MATCH_RECOGNIZE, so we need to preserve them in that case. In all other cases @@ -292,6 +291,7 @@ class Presto(Dialect): STRUCT_DELIMITER = ("(", ")") LIMIT_ONLY_LITERALS = True SUPPORTS_SINGLE_ARG_CONCAT = False + LIKE_PROPERTY_INSIDE_SCHEMA = True PROPERTIES_LOCATION = { **generator.Generator.PROPERTIES_LOCATION, @@ -324,12 +324,18 @@ class Presto(Dialect): exp.ArrayContains: rename_func("CONTAINS"), exp.ArraySize: rename_func("CARDINALITY"), exp.ArrayUniqueAgg: rename_func("SET_AGG"), - exp.BitwiseAnd: lambda self, e: f"BITWISE_AND({self.sql(e, 'this')}, {self.sql(e, 'expression')})", - exp.BitwiseLeftShift: lambda self, e: f"BITWISE_ARITHMETIC_SHIFT_LEFT({self.sql(e, 'this')}, {self.sql(e, 'expression')})", + exp.AtTimeZone: rename_func("AT_TIMEZONE"), + exp.BitwiseAnd: lambda self, + e: f"BITWISE_AND({self.sql(e, 'this')}, {self.sql(e, 'expression')})", + exp.BitwiseLeftShift: lambda self, + e: f"BITWISE_ARITHMETIC_SHIFT_LEFT({self.sql(e, 'this')}, {self.sql(e, 'expression')})", exp.BitwiseNot: lambda self, e: f"BITWISE_NOT({self.sql(e, 'this')})", - exp.BitwiseOr: lambda self, e: f"BITWISE_OR({self.sql(e, 'this')}, {self.sql(e, 'expression')})", - exp.BitwiseRightShift: lambda self, e: f"BITWISE_ARITHMETIC_SHIFT_RIGHT({self.sql(e, 'this')}, {self.sql(e, 'expression')})", - exp.BitwiseXor: lambda self, e: f"BITWISE_XOR({self.sql(e, 'this')}, {self.sql(e, 'expression')})", + exp.BitwiseOr: lambda self, + e: f"BITWISE_OR({self.sql(e, 'this')}, {self.sql(e, 'expression')})", + exp.BitwiseRightShift: lambda self, + e: f"BITWISE_ARITHMETIC_SHIFT_RIGHT({self.sql(e, 'this')}, {self.sql(e, 'expression')})", + exp.BitwiseXor: lambda self, + e: f"BITWISE_XOR({self.sql(e, 'this')}, {self.sql(e, 'expression')})", exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", exp.DateAdd: lambda self, e: self.func( @@ -344,7 +350,8 @@ class Presto(Dialect): "DATE_DIFF", exp.Literal.string(e.text("unit") or "DAY"), e.expression, e.this ), exp.DateStrToDate: datestrtodate_sql, - exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", + exp.DateToDi: lambda self, + e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", exp.DateSub: lambda self, e: self.func( "DATE_ADD", exp.Literal.string(e.text("unit") or "DAY"), @@ -352,12 +359,14 @@ class Presto(Dialect): e.this, ), exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), - exp.DiToDate: lambda self, e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", + exp.DiToDate: lambda self, + e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", exp.First: _first_last_sql, - exp.FromTimeZone: lambda self, e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", - exp.GetPath: path_to_jsonpath(), + exp.FirstValue: _first_last_sql, + exp.FromTimeZone: lambda self, + e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", exp.Group: transforms.preprocess([transforms.unalias_group]), exp.GroupConcat: lambda self, e: self.func( "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") @@ -368,6 +377,7 @@ class Presto(Dialect): exp.Initcap: _initcap_sql, exp.ParseJSON: rename_func("JSON_PARSE"), exp.Last: _first_last_sql, + exp.LastValue: _first_last_sql, exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), exp.Lateral: _explode_to_unnest_sql, exp.Left: left_to_substring_sql, @@ -394,26 +404,33 @@ class Presto(Dialect): exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", exp.StrToMap: rename_func("SPLIT_TO_MAP"), exp.StrToTime: _str_to_time_sql, - exp.StrToUnix: lambda self, e: f"TO_UNIXTIME(DATE_PARSE({self.sql(e, 'this')}, {self.format_time(e)}))", + exp.StrToUnix: lambda self, + e: f"TO_UNIXTIME(DATE_PARSE({self.sql(e, 'this')}, {self.format_time(e)}))", exp.StructExtract: struct_extract_sql, exp.Table: transforms.preprocess([_unnest_sequence]), exp.Timestamp: no_timestamp_sql, exp.TimestampTrunc: timestamptrunc_sql, exp.TimeStrToDate: timestrtotime_sql, exp.TimeStrToTime: timestrtotime_sql, - exp.TimeStrToUnix: lambda self, e: f"TO_UNIXTIME(DATE_PARSE({self.sql(e, 'this')}, {Presto.TIME_FORMAT}))", - exp.TimeToStr: lambda self, e: f"DATE_FORMAT({self.sql(e, 'this')}, {self.format_time(e)})", + exp.TimeStrToUnix: lambda self, + e: f"TO_UNIXTIME(DATE_PARSE({self.sql(e, 'this')}, {Presto.TIME_FORMAT}))", + exp.TimeToStr: lambda self, + e: f"DATE_FORMAT({self.sql(e, 'this')}, {self.format_time(e)})", exp.TimeToUnix: rename_func("TO_UNIXTIME"), - exp.ToChar: lambda self, e: f"DATE_FORMAT({self.sql(e, 'this')}, {self.format_time(e)})", + exp.ToChar: lambda self, + e: f"DATE_FORMAT({self.sql(e, 'this')}, {self.format_time(e)})", exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), - exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", + exp.TsOrDiToDi: lambda self, + e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", exp.TsOrDsAdd: _ts_or_ds_add_sql, exp.TsOrDsDiff: _ts_or_ds_diff_sql, exp.TsOrDsToDate: _ts_or_ds_to_date_sql, exp.Unhex: rename_func("FROM_HEX"), - exp.UnixToStr: lambda self, e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", + exp.UnixToStr: lambda self, + e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", exp.UnixToTime: _unix_to_time_sql, - exp.UnixToTimeStr: lambda self, e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", + exp.UnixToTimeStr: lambda self, + e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", exp.VariancePop: rename_func("VAR_POP"), exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), exp.WithinGroup: transforms.preprocess( diff --git a/sqlglot/dialects/redshift.py b/sqlglot/dialects/redshift.py index 7194d81..a64c1d4 100644 --- a/sqlglot/dialects/redshift.py +++ b/sqlglot/dialects/redshift.py @@ -9,6 +9,7 @@ from sqlglot.dialects.dialect import ( concat_ws_to_dpipe_sql, date_delta_sql, generatedasidentitycolumnconstraint_sql, + json_extract_segments, no_tablesample_sql, rename_func, ) @@ -20,10 +21,6 @@ if t.TYPE_CHECKING: from sqlglot._typing import E -def _json_sql(self: Redshift.Generator, expression: exp.JSONExtract | exp.JSONExtractScalar) -> str: - return f'{self.sql(expression, "this")}."{expression.expression.name}"' - - def _parse_date_delta(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: def _parse_delta(args: t.List) -> E: expr = expr_type(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) @@ -62,6 +59,7 @@ class Redshift(Postgres): "DATE_ADD": _parse_date_delta(exp.TsOrDsAdd), "DATEDIFF": _parse_date_delta(exp.TsOrDsDiff), "DATE_DIFF": _parse_date_delta(exp.TsOrDsDiff), + "GETDATE": exp.CurrentTimestamp.from_arg_list, "LISTAGG": exp.GroupConcat.from_arg_list, "STRTOL": exp.FromBase.from_arg_list, } @@ -69,6 +67,7 @@ class Redshift(Postgres): NO_PAREN_FUNCTION_PARSERS = { **Postgres.Parser.NO_PAREN_FUNCTION_PARSERS, "APPROXIMATE": lambda self: self._parse_approximate_count(), + "SYSDATE": lambda self: self.expression(exp.CurrentTimestamp, transaction=True), } def _parse_table( @@ -77,6 +76,7 @@ class Redshift(Postgres): joins: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None, parse_bracket: bool = False, + is_db_reference: bool = False, ) -> t.Optional[exp.Expression]: # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr` unpivot = self._match(TokenType.UNPIVOT) @@ -85,6 +85,7 @@ class Redshift(Postgres): joins=joins, alias_tokens=alias_tokens, parse_bracket=parse_bracket, + is_db_reference=is_db_reference, ) return self.expression(exp.Pivot, this=table, unpivot=True) if unpivot else table @@ -153,7 +154,6 @@ class Redshift(Postgres): **Postgres.Tokenizer.KEYWORDS, "HLLSKETCH": TokenType.HLLSKETCH, "SUPER": TokenType.SUPER, - "SYSDATE": TokenType.CURRENT_TIMESTAMP, "TOP": TokenType.TOP, "UNLOAD": TokenType.COMMAND, "VARBYTE": TokenType.VARBINARY, @@ -180,31 +180,29 @@ class Redshift(Postgres): exp.DataType.Type.VARBINARY: "VARBYTE", } - PROPERTIES_LOCATION = { - **Postgres.Generator.PROPERTIES_LOCATION, - exp.LikeProperty: exp.Properties.Location.POST_WITH, - } - TRANSFORMS = { **Postgres.Generator.TRANSFORMS, exp.Concat: concat_to_dpipe_sql, exp.ConcatWs: concat_ws_to_dpipe_sql, - exp.ApproxDistinct: lambda self, e: f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})", - exp.CurrentTimestamp: lambda self, e: "SYSDATE", + exp.ApproxDistinct: lambda self, + e: f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})", + exp.CurrentTimestamp: lambda self, e: ( + "SYSDATE" if e.args.get("transaction") else "GETDATE()" + ), exp.DateAdd: date_delta_sql("DATEADD"), exp.DateDiff: date_delta_sql("DATEDIFF"), exp.DistKeyProperty: lambda self, e: f"DISTKEY({e.name})", exp.DistStyleProperty: lambda self, e: self.naked_property(e), exp.FromBase: rename_func("STRTOL"), exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, - exp.JSONExtract: _json_sql, - exp.JSONExtractScalar: _json_sql, + exp.JSONExtract: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), exp.GroupConcat: rename_func("LISTAGG"), exp.ParseJSON: rename_func("JSON_PARSE"), exp.Select: transforms.preprocess( [transforms.eliminate_distinct_on, transforms.eliminate_semi_and_anti_joins] ), - exp.SortKeyProperty: lambda self, e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})", + exp.SortKeyProperty: lambda self, + e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})", exp.TableSample: no_tablesample_sql, exp.TsOrDsAdd: date_delta_sql("DATEADD"), exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), @@ -228,6 +226,13 @@ class Redshift(Postgres): """Redshift doesn't have `WITH` as part of their with_properties so we remove it""" return self.properties(properties, prefix=" ", suffix="") + def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: + if expression.is_type(exp.DataType.Type.JSON): + # Redshift doesn't support a JSON type, so casting to it is treated as a noop + return self.sql(expression, "this") + + return super().cast_sql(expression, safe_prefix=safe_prefix) + def datatype_sql(self, expression: exp.DataType) -> str: """ Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean diff --git a/sqlglot/dialects/snowflake.py b/sqlglot/dialects/snowflake.py index 281167d..37f9761 100644 --- a/sqlglot/dialects/snowflake.py +++ b/sqlglot/dialects/snowflake.py @@ -21,19 +21,13 @@ from sqlglot.dialects.dialect import ( var_map_sql, ) from sqlglot.expressions import Literal -from sqlglot.helper import seq_get +from sqlglot.helper import is_int, seq_get from sqlglot.tokens import TokenType if t.TYPE_CHECKING: from sqlglot._typing import E -def _check_int(s: str) -> bool: - if s[0] in ("-", "+"): - return s[1:].isdigit() - return s.isdigit() - - # from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html def _parse_to_timestamp(args: t.List) -> t.Union[exp.StrToTime, exp.UnixToTime, exp.TimeStrToTime]: if len(args) == 2: @@ -53,7 +47,7 @@ def _parse_to_timestamp(args: t.List) -> t.Union[exp.StrToTime, exp.UnixToTime, return exp.TimeStrToTime.from_arg_list(args) if first_arg.is_string: - if _check_int(first_arg.this): + if is_int(first_arg.this): # case: return exp.UnixToTime.from_arg_list(args) @@ -241,7 +235,6 @@ DATE_PART_MAPPING = { "NSECOND": "NANOSECOND", "NSECONDS": "NANOSECOND", "NANOSECS": "NANOSECOND", - "NSECONDS": "NANOSECOND", "EPOCH": "EPOCH_SECOND", "EPOCH_SECONDS": "EPOCH_SECOND", "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND", @@ -291,7 +284,9 @@ def _parse_colon_get_path( path = exp.Literal.string(path.sql(dialect="snowflake")) # The extraction operator : is left-associative - this = self.expression(exp.GetPath, this=this, expression=path) + this = self.expression( + exp.JSONExtract, this=this, expression=self.dialect.to_json_path(path) + ) if target_type: this = exp.cast(this, target_type) @@ -411,6 +406,9 @@ class Snowflake(Dialect): "DATEDIFF": _parse_datediff, "DIV0": _div0_to_if, "FLATTEN": exp.Explode.from_arg_list, + "GET_PATH": lambda args, dialect: exp.JSONExtract( + this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) + ), "IFF": exp.If.from_arg_list, "LAST_DAY": lambda args: exp.LastDay( this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) @@ -474,6 +472,8 @@ class Snowflake(Dialect): "TERSE SCHEMAS": _show_parser("SCHEMAS"), "OBJECTS": _show_parser("OBJECTS"), "TERSE OBJECTS": _show_parser("OBJECTS"), + "TABLES": _show_parser("TABLES"), + "TERSE TABLES": _show_parser("TABLES"), "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), "COLUMNS": _show_parser("COLUMNS"), @@ -534,7 +534,9 @@ class Snowflake(Dialect): return table - def _parse_table_parts(self, schema: bool = False) -> exp.Table: + def _parse_table_parts( + self, schema: bool = False, is_db_reference: bool = False + ) -> exp.Table: # https://docs.snowflake.com/en/user-guide/querying-stage if self._match(TokenType.STRING, advance=False): table = self._parse_string() @@ -550,7 +552,9 @@ class Snowflake(Dialect): self._match(TokenType.L_PAREN) while self._curr and not self._match(TokenType.R_PAREN): if self._match_text_seq("FILE_FORMAT", "=>"): - file_format = self._parse_string() or super()._parse_table_parts() + file_format = self._parse_string() or super()._parse_table_parts( + is_db_reference=is_db_reference + ) elif self._match_text_seq("PATTERN", "=>"): pattern = self._parse_string() else: @@ -560,7 +564,7 @@ class Snowflake(Dialect): table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) else: - table = super()._parse_table_parts(schema=schema) + table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) return self._parse_at_before(table) @@ -587,6 +591,8 @@ class Snowflake(Dialect): # which is syntactically valid but has no effect on the output terse = self._tokens[self._index - 2].text.upper() == "TERSE" + history = self._match_text_seq("HISTORY") + like = self._parse_string() if self._match(TokenType.LIKE) else None if self._match(TokenType.IN): @@ -597,7 +603,7 @@ class Snowflake(Dialect): if self._curr: scope = self._parse_table_parts() elif self._curr: - scope_kind = "SCHEMA" if this == "OBJECTS" else "TABLE" + scope_kind = "SCHEMA" if this in ("OBJECTS", "TABLES") else "TABLE" scope = self._parse_table_parts() return self.expression( @@ -605,6 +611,7 @@ class Snowflake(Dialect): **{ "terse": terse, "this": this, + "history": history, "like": like, "scope": scope, "scope_kind": scope_kind, @@ -715,8 +722,10 @@ class Snowflake(Dialect): ), exp.GroupConcat: rename_func("LISTAGG"), exp.If: if_sql(name="IFF", false_value="NULL"), - exp.JSONExtract: lambda self, e: f"{self.sql(e, 'this')}[{self.sql(e, 'expression')}]", + exp.JSONExtract: rename_func("GET_PATH"), + exp.JSONExtractScalar: rename_func("JSON_EXTRACT_PATH_TEXT"), exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), + exp.JSONPathRoot: lambda *_: "", exp.LogicalAnd: rename_func("BOOLAND_AGG"), exp.LogicalOr: rename_func("BOOLOR_AGG"), exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), @@ -745,7 +754,8 @@ class Snowflake(Dialect): exp.StrPosition: lambda self, e: self.func( "POSITION", e.args.get("substr"), e.this, e.args.get("position") ), - exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", + exp.StrToTime: lambda self, + e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", exp.Struct: lambda self, e: self.func( "OBJECT_CONSTRUCT", *(arg for expression in e.expressions for arg in expression.flatten()), @@ -771,6 +781,12 @@ class Snowflake(Dialect): exp.Xor: rename_func("BOOLXOR"), } + SUPPORTED_JSON_PATH_PARTS = { + exp.JSONPathKey, + exp.JSONPathRoot, + exp.JSONPathSubscript, + } + TYPE_MAPPING = { **generator.Generator.TYPE_MAPPING, exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", @@ -841,6 +857,7 @@ class Snowflake(Dialect): def show_sql(self, expression: exp.Show) -> str: terse = "TERSE " if expression.args.get("terse") else "" + history = " HISTORY" if expression.args.get("history") else "" like = self.sql(expression, "like") like = f" LIKE {like}" if like else "" @@ -861,9 +878,7 @@ class Snowflake(Dialect): if from_: from_ = f" FROM {from_}" - return ( - f"SHOW {terse}{expression.name}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" - ) + return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: # Other dialects don't support all of the following parameters, so we need to diff --git a/sqlglot/dialects/spark.py b/sqlglot/dialects/spark.py index 624f76e..4c5c131 100644 --- a/sqlglot/dialects/spark.py +++ b/sqlglot/dialects/spark.py @@ -4,6 +4,7 @@ import typing as t from sqlglot import exp from sqlglot.dialects.dialect import rename_func +from sqlglot.dialects.hive import _parse_ignore_nulls from sqlglot.dialects.spark2 import Spark2 from sqlglot.helper import seq_get @@ -45,9 +46,7 @@ class Spark(Spark2): class Parser(Spark2.Parser): FUNCTIONS = { **Spark2.Parser.FUNCTIONS, - "ANY_VALUE": lambda args: exp.AnyValue( - this=seq_get(args, 0), ignore_nulls=seq_get(args, 1) - ), + "ANY_VALUE": _parse_ignore_nulls(exp.AnyValue), "DATEDIFF": _parse_datediff, } diff --git a/sqlglot/dialects/spark2.py b/sqlglot/dialects/spark2.py index e4bb30e..9378d99 100644 --- a/sqlglot/dialects/spark2.py +++ b/sqlglot/dialects/spark2.py @@ -187,8 +187,10 @@ class Spark2(Hive): TRANSFORMS = { **Hive.Generator.TRANSFORMS, exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), - exp.ArraySum: lambda self, e: f"AGGREGATE({self.sql(e, 'this')}, 0, (acc, x) -> acc + x, acc -> acc)", - exp.AtTimeZone: lambda self, e: f"FROM_UTC_TIMESTAMP({self.sql(e, 'this')}, {self.sql(e, 'zone')})", + exp.ArraySum: lambda self, + e: f"AGGREGATE({self.sql(e, 'this')}, 0, (acc, x) -> acc + x, acc -> acc)", + exp.AtTimeZone: lambda self, + e: f"FROM_UTC_TIMESTAMP({self.sql(e, 'this')}, {self.sql(e, 'zone')})", exp.BitwiseLeftShift: rename_func("SHIFTLEFT"), exp.BitwiseRightShift: rename_func("SHIFTRIGHT"), exp.DateFromParts: rename_func("MAKE_DATE"), @@ -198,7 +200,8 @@ class Spark2(Hive): exp.DayOfYear: rename_func("DAYOFYEAR"), exp.FileFormatProperty: lambda self, e: f"USING {e.name.upper()}", exp.From: transforms.preprocess([_unalias_pivot]), - exp.FromTimeZone: lambda self, e: f"TO_UTC_TIMESTAMP({self.sql(e, 'this')}, {self.sql(e, 'zone')})", + exp.FromTimeZone: lambda self, + e: f"TO_UTC_TIMESTAMP({self.sql(e, 'this')}, {self.sql(e, 'zone')})", exp.LogicalAnd: rename_func("BOOL_AND"), exp.LogicalOr: rename_func("BOOL_OR"), exp.Map: _map_sql, @@ -212,7 +215,8 @@ class Spark2(Hive): e.args.get("position"), ), exp.StrToDate: _str_to_date, - exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", + exp.StrToTime: lambda self, + e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", exp.TimestampTrunc: lambda self, e: self.func( "DATE_TRUNC", exp.Literal.string(e.text("unit")), e.this ), diff --git a/sqlglot/dialects/sqlite.py b/sqlglot/dialects/sqlite.py index 244a96e..b292c81 100644 --- a/sqlglot/dialects/sqlite.py +++ b/sqlglot/dialects/sqlite.py @@ -7,7 +7,6 @@ from sqlglot.dialects.dialect import ( Dialect, NormalizationStrategy, any_value_to_max_sql, - arrow_json_extract_scalar_sql, arrow_json_extract_sql, concat_to_dpipe_sql, count_if_to_sum, @@ -28,6 +27,12 @@ def _date_add_sql(self: SQLite.Generator, expression: exp.DateAdd) -> str: return self.func("DATE", expression.this, modifier) +def _json_extract_sql(self: SQLite.Generator, expression: exp.JSONExtract) -> str: + if expression.expressions: + return self.function_fallback_sql(expression) + return arrow_json_extract_sql(self, expression) + + def _transform_create(expression: exp.Expression) -> exp.Expression: """Move primary key to a column and enforce auto_increment on primary keys.""" schema = expression.this @@ -85,6 +90,14 @@ class SQLite(Dialect): TABLE_HINTS = False QUERY_HINTS = False NVL2_SUPPORTED = False + JSON_PATH_BRACKETED_KEY_SUPPORTED = False + SUPPORTS_CREATE_TABLE_LIKE = False + + SUPPORTED_JSON_PATH_PARTS = { + exp.JSONPathKey, + exp.JSONPathRoot, + exp.JSONPathSubscript, + } TYPE_MAPPING = { **generator.Generator.TYPE_MAPPING, @@ -120,10 +133,8 @@ class SQLite(Dialect): exp.DateAdd: _date_add_sql, exp.DateStrToDate: lambda self, e: self.sql(e, "this"), exp.ILike: no_ilike_sql, - exp.JSONExtract: arrow_json_extract_sql, - exp.JSONExtractScalar: arrow_json_extract_scalar_sql, - exp.JSONBExtract: arrow_json_extract_sql, - exp.JSONBExtractScalar: arrow_json_extract_scalar_sql, + exp.JSONExtract: _json_extract_sql, + exp.JSONExtractScalar: arrow_json_extract_sql, exp.Levenshtein: rename_func("EDITDIST3"), exp.LogicalOr: rename_func("MAX"), exp.LogicalAnd: rename_func("MIN"), @@ -141,11 +152,18 @@ class SQLite(Dialect): exp.TryCast: no_trycast_sql, } + # SQLite doesn't generally support CREATE TABLE .. properties + # https://www.sqlite.org/lang_createtable.html PROPERTIES_LOCATION = { - k: exp.Properties.Location.UNSUPPORTED - for k, v in generator.Generator.PROPERTIES_LOCATION.items() + prop: exp.Properties.Location.UNSUPPORTED + for prop in generator.Generator.PROPERTIES_LOCATION } + # There are a few exceptions (e.g. temporary tables) which are supported or + # can be transpiled to SQLite, so we explicitly override them accordingly + PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA + PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE + LIMIT_FETCH = "LIMIT" def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: diff --git a/sqlglot/dialects/starrocks.py b/sqlglot/dialects/starrocks.py index 2dba1c1..8838f34 100644 --- a/sqlglot/dialects/starrocks.py +++ b/sqlglot/dialects/starrocks.py @@ -44,12 +44,14 @@ class StarRocks(MySQL): exp.JSONExtractScalar: arrow_json_extract_sql, exp.JSONExtract: arrow_json_extract_sql, exp.RegexpLike: rename_func("REGEXP"), - exp.StrToUnix: lambda self, e: f"UNIX_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", + exp.StrToUnix: lambda self, + e: f"UNIX_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", exp.TimestampTrunc: lambda self, e: self.func( "DATE_TRUNC", exp.Literal.string(e.text("unit")), e.this ), exp.TimeStrToDate: rename_func("TO_DATE"), - exp.UnixToStr: lambda self, e: f"FROM_UNIXTIME({self.sql(e, 'this')}, {self.format_time(e)})", + exp.UnixToStr: lambda self, + e: f"FROM_UNIXTIME({self.sql(e, 'this')}, {self.format_time(e)})", exp.UnixToTime: rename_func("FROM_UNIXTIME"), } diff --git a/sqlglot/dialects/teradata.py b/sqlglot/dialects/teradata.py index 6dbad15..7f9a11a 100644 --- a/sqlglot/dialects/teradata.py +++ b/sqlglot/dialects/teradata.py @@ -200,7 +200,8 @@ class Teradata(Dialect): exp.Select: transforms.preprocess( [transforms.eliminate_distinct_on, transforms.eliminate_semi_and_anti_joins] ), - exp.StrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE FORMAT {self.format_time(e)})", + exp.StrToDate: lambda self, + e: f"CAST({self.sql(e, 'this')} AS DATE FORMAT {self.format_time(e)})", exp.ToChar: lambda self, e: self.function_fallback_sql(e), exp.Use: lambda self, e: f"DATABASE {self.sql(e, 'this')}", } diff --git a/sqlglot/dialects/trino.py b/sqlglot/dialects/trino.py index eddb70a..1bbed67 100644 --- a/sqlglot/dialects/trino.py +++ b/sqlglot/dialects/trino.py @@ -11,9 +11,16 @@ class Trino(Presto): class Generator(Presto.Generator): TRANSFORMS = { **Presto.Generator.TRANSFORMS, - exp.ArraySum: lambda self, e: f"REDUCE({self.sql(e, 'this')}, 0, (acc, x) -> acc + x, acc -> acc)", + exp.ArraySum: lambda self, + e: f"REDUCE({self.sql(e, 'this')}, 0, (acc, x) -> acc + x, acc -> acc)", exp.Merge: merge_without_target_sql, } + SUPPORTED_JSON_PATH_PARTS = { + exp.JSONPathKey, + exp.JSONPathRoot, + exp.JSONPathSubscript, + } + class Tokenizer(Presto.Tokenizer): HEX_STRINGS = [("X'", "'")] diff --git a/sqlglot/dialects/tsql.py b/sqlglot/dialects/tsql.py index a5e04da..70ea97e 100644 --- a/sqlglot/dialects/tsql.py +++ b/sqlglot/dialects/tsql.py @@ -14,7 +14,6 @@ from sqlglot.dialects.dialect import ( max_or_greatest, min_or_least, parse_date_delta, - path_to_jsonpath, rename_func, timestrtotime_sql, trim_sql, @@ -266,13 +265,32 @@ def _parse_timefromparts(args: t.List) -> exp.TimeFromParts: ) -def _parse_len(args: t.List) -> exp.Length: - this = seq_get(args, 0) +def _parse_as_text( + klass: t.Type[exp.Expression], +) -> t.Callable[[t.List[exp.Expression]], exp.Expression]: + def _parse(args: t.List[exp.Expression]) -> exp.Expression: + this = seq_get(args, 0) + + if this and not this.is_string: + this = exp.cast(this, exp.DataType.Type.TEXT) + + expression = seq_get(args, 1) + kwargs = {"this": this} + + if expression: + kwargs["expression"] = expression - if this and not this.is_string: - this = exp.cast(this, exp.DataType.Type.TEXT) + return klass(**kwargs) - return exp.Length(this=this) + return _parse + + +def _json_extract_sql( + self: TSQL.Generator, expression: exp.JSONExtract | exp.JSONExtractScalar +) -> str: + json_query = rename_func("JSON_QUERY")(self, expression) + json_value = rename_func("JSON_VALUE")(self, expression) + return self.func("ISNULL", json_query, json_value) class TSQL(Dialect): @@ -441,8 +459,11 @@ class TSQL(Dialect): "HASHBYTES": _parse_hashbytes, "IIF": exp.If.from_arg_list, "ISNULL": exp.Coalesce.from_arg_list, - "JSON_VALUE": exp.JSONExtractScalar.from_arg_list, - "LEN": _parse_len, + "JSON_QUERY": parser.parse_extract_json_with_path(exp.JSONExtract), + "JSON_VALUE": parser.parse_extract_json_with_path(exp.JSONExtractScalar), + "LEN": _parse_as_text(exp.Length), + "LEFT": _parse_as_text(exp.Left), + "RIGHT": _parse_as_text(exp.Right), "REPLICATE": exp.Repeat.from_arg_list, "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), "SYSDATETIME": exp.CurrentTimestamp.from_arg_list, @@ -677,6 +698,7 @@ class TSQL(Dialect): SUPPORTS_SINGLE_ARG_CONCAT = False TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" SUPPORTS_SELECT_INTO = True + JSON_PATH_BRACKETED_KEY_SUPPORTED = False EXPRESSIONS_WITHOUT_NESTED_CTES = { exp.Delete, @@ -688,6 +710,12 @@ class TSQL(Dialect): exp.Update, } + SUPPORTED_JSON_PATH_PARTS = { + exp.JSONPathKey, + exp.JSONPathRoot, + exp.JSONPathSubscript, + } + TYPE_MAPPING = { **generator.Generator.TYPE_MAPPING, exp.DataType.Type.BOOLEAN: "BIT", @@ -712,9 +740,10 @@ class TSQL(Dialect): exp.CurrentTimestamp: rename_func("GETDATE"), exp.Extract: rename_func("DATEPART"), exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, - exp.GetPath: path_to_jsonpath("JSON_VALUE"), exp.GroupConcat: _string_agg_sql, exp.If: rename_func("IIF"), + exp.JSONExtract: _json_extract_sql, + exp.JSONExtractScalar: _json_extract_sql, exp.LastDay: lambda self, e: self.func("EOMONTH", e.this), exp.Max: max_or_greatest, exp.MD5: lambda self, e: self.func("HASHBYTES", exp.Literal.string("MD5"), e.this), @@ -831,15 +860,21 @@ class TSQL(Dialect): exists = expression.args.pop("exists", None) sql = super().create_sql(expression) + like_property = expression.find(exp.LikeProperty) + if like_property: + ctas_expression = like_property.this + else: + ctas_expression = expression.expression + table = expression.find(exp.Table) # Convert CTAS statement to SELECT .. INTO .. - if kind == "TABLE" and expression.expression: - ctas_with = expression.expression.args.get("with") + if kind == "TABLE" and ctas_expression: + ctas_with = ctas_expression.args.get("with") if ctas_with: ctas_with = ctas_with.pop() - subquery = expression.expression + subquery = ctas_expression if isinstance(subquery, exp.Subqueryable): subquery = subquery.subquery() @@ -847,6 +882,9 @@ class TSQL(Dialect): select_into.set("into", exp.Into(this=table)) select_into.set("with", ctas_with) + if like_property: + select_into.limit(0, copy=False) + sql = self.sql(select_into) if exists: @@ -937,9 +975,19 @@ class TSQL(Dialect): return f"CONSTRAINT {this} {expressions}" def length_sql(self, expression: exp.Length) -> str: + return self._uncast_text(expression, "LEN") + + def right_sql(self, expression: exp.Right) -> str: + return self._uncast_text(expression, "RIGHT") + + def left_sql(self, expression: exp.Left) -> str: + return self._uncast_text(expression, "LEFT") + + def _uncast_text(self, expression: exp.Expression, name: str) -> str: this = expression.this if isinstance(this, exp.Cast) and this.is_type(exp.DataType.Type.TEXT): this_sql = self.sql(this, "this") else: this_sql = self.sql(this) - return self.func("LEN", this_sql) + expression_sql = self.sql(expression, "expression") + return self.func(name, this_sql, expression_sql if expression_sql else None) -- cgit v1.2.3