diff options
Diffstat (limited to 'sqlglot/dialects')
-rw-r--r-- | sqlglot/dialects/bigquery.py | 20 | ||||
-rw-r--r-- | sqlglot/dialects/clickhouse.py | 10 | ||||
-rw-r--r-- | sqlglot/dialects/dialect.py | 14 | ||||
-rw-r--r-- | sqlglot/dialects/drill.py | 14 | ||||
-rw-r--r-- | sqlglot/dialects/duckdb.py | 18 | ||||
-rw-r--r-- | sqlglot/dialects/hive.py | 18 | ||||
-rw-r--r-- | sqlglot/dialects/mysql.py | 14 | ||||
-rw-r--r-- | sqlglot/dialects/oracle.py | 6 | ||||
-rw-r--r-- | sqlglot/dialects/postgres.py | 72 | ||||
-rw-r--r-- | sqlglot/dialects/presto.py | 15 | ||||
-rw-r--r-- | sqlglot/dialects/redshift.py | 1 | ||||
-rw-r--r-- | sqlglot/dialects/snowflake.py | 56 | ||||
-rw-r--r-- | sqlglot/dialects/spark.py | 12 | ||||
-rw-r--r-- | sqlglot/dialects/sqlite.py | 6 | ||||
-rw-r--r-- | sqlglot/dialects/starrocks.py | 2 | ||||
-rw-r--r-- | sqlglot/dialects/tableau.py | 2 | ||||
-rw-r--r-- | sqlglot/dialects/tsql.py | 10 |
17 files changed, 199 insertions, 91 deletions
diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py index 6be68ac..d10cc54 100644 --- a/sqlglot/dialects/bigquery.py +++ b/sqlglot/dialects/bigquery.py @@ -1,11 +1,15 @@ +"""Supports BigQuery Standard SQL.""" + from __future__ import annotations from sqlglot import exp, generator, parser, tokens from sqlglot.dialects.dialect import ( Dialect, + datestrtodate_sql, inline_array_sql, no_ilike_sql, rename_func, + timestrtotime_sql, ) from sqlglot.helper import seq_get from sqlglot.tokens import TokenType @@ -120,13 +124,12 @@ class BigQuery(Dialect): "NOT DETERMINISTIC": TokenType.VOLATILE, "QUALIFY": TokenType.QUALIFY, "UNKNOWN": TokenType.NULL, - "WINDOW": TokenType.WINDOW, } KEYWORDS.pop("DIV") class Parser(parser.Parser): FUNCTIONS = { - **parser.Parser.FUNCTIONS, + **parser.Parser.FUNCTIONS, # type: ignore "DATE_TRUNC": _date_trunc, "DATE_ADD": _date_add(exp.DateAdd), "DATETIME_ADD": _date_add(exp.DatetimeAdd), @@ -144,31 +147,33 @@ class BigQuery(Dialect): } FUNCTION_PARSERS = { - **parser.Parser.FUNCTION_PARSERS, + **parser.Parser.FUNCTION_PARSERS, # type: ignore "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), } FUNCTION_PARSERS.pop("TRIM") NO_PAREN_FUNCTIONS = { - **parser.Parser.NO_PAREN_FUNCTIONS, + **parser.Parser.NO_PAREN_FUNCTIONS, # type: ignore TokenType.CURRENT_DATETIME: exp.CurrentDatetime, TokenType.CURRENT_TIME: exp.CurrentTime, } NESTED_TYPE_TOKENS = { - *parser.Parser.NESTED_TYPE_TOKENS, + *parser.Parser.NESTED_TYPE_TOKENS, # type: ignore TokenType.TABLE, } class Generator(generator.Generator): TRANSFORMS = { - **generator.Generator.TRANSFORMS, + **generator.Generator.TRANSFORMS, # type: ignore exp.ArraySize: rename_func("ARRAY_LENGTH"), exp.DateAdd: _date_add_sql("DATE", "ADD"), exp.DateSub: _date_add_sql("DATE", "SUB"), exp.DatetimeAdd: _date_add_sql("DATETIME", "ADD"), exp.DatetimeSub: _date_add_sql("DATETIME", "SUB"), exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", + exp.DateStrToDate: datestrtodate_sql, + exp.GroupConcat: rename_func("STRING_AGG"), exp.ILike: no_ilike_sql, exp.IntDiv: rename_func("DIV"), exp.StrToTime: lambda self, e: f"PARSE_TIMESTAMP({self.format_time(e)}, {self.sql(e, 'this')})", @@ -176,6 +181,7 @@ class BigQuery(Dialect): exp.TimeSub: _date_add_sql("TIME", "SUB"), exp.TimestampAdd: _date_add_sql("TIMESTAMP", "ADD"), exp.TimestampSub: _date_add_sql("TIMESTAMP", "SUB"), + exp.TimeStrToTime: timestrtotime_sql, exp.VariancePop: rename_func("VAR_POP"), exp.Values: _derived_table_values_to_unnest, exp.ReturnsProperty: _returnsproperty_sql, @@ -188,7 +194,7 @@ class BigQuery(Dialect): } TYPE_MAPPING = { - **generator.Generator.TYPE_MAPPING, + **generator.Generator.TYPE_MAPPING, # type: ignore exp.DataType.Type.TINYINT: "INT64", exp.DataType.Type.SMALLINT: "INT64", exp.DataType.Type.INT: "INT64", diff --git a/sqlglot/dialects/clickhouse.py b/sqlglot/dialects/clickhouse.py index cbed72e..7136340 100644 --- a/sqlglot/dialects/clickhouse.py +++ b/sqlglot/dialects/clickhouse.py @@ -35,13 +35,13 @@ class ClickHouse(Dialect): class Parser(parser.Parser): FUNCTIONS = { - **parser.Parser.FUNCTIONS, + **parser.Parser.FUNCTIONS, # type: ignore "MAP": parse_var_map, } - JOIN_KINDS = {*parser.Parser.JOIN_KINDS, TokenType.ANY, TokenType.ASOF} + JOIN_KINDS = {*parser.Parser.JOIN_KINDS, TokenType.ANY, TokenType.ASOF} # type: ignore - TABLE_ALIAS_TOKENS = {*parser.Parser.TABLE_ALIAS_TOKENS} - {TokenType.ANY} + TABLE_ALIAS_TOKENS = {*parser.Parser.TABLE_ALIAS_TOKENS} - {TokenType.ANY} # type: ignore def _parse_table(self, schema=False): this = super()._parse_table(schema) @@ -55,7 +55,7 @@ class ClickHouse(Dialect): STRUCT_DELIMITER = ("(", ")") TYPE_MAPPING = { - **generator.Generator.TYPE_MAPPING, + **generator.Generator.TYPE_MAPPING, # type: ignore exp.DataType.Type.NULLABLE: "Nullable", exp.DataType.Type.DATETIME: "DateTime64", exp.DataType.Type.MAP: "Map", @@ -70,7 +70,7 @@ class ClickHouse(Dialect): } TRANSFORMS = { - **generator.Generator.TRANSFORMS, + **generator.Generator.TRANSFORMS, # type: ignore exp.Array: inline_array_sql, exp.StrPosition: lambda self, e: f"position({self.format_args(e.this, e.args.get('substr'), e.args.get('position'))})", exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", diff --git a/sqlglot/dialects/dialect.py b/sqlglot/dialects/dialect.py index c87f8d8..e788852 100644 --- a/sqlglot/dialects/dialect.py +++ b/sqlglot/dialects/dialect.py @@ -198,7 +198,7 @@ class Dialect(metaclass=_Dialect): def rename_func(name): def _rename(self, expression): args = flatten(expression.args.values()) - return f"{name}({self.format_args(*args)})" + return f"{self.normalize_func(name)}({self.format_args(*args)})" return _rename @@ -217,11 +217,11 @@ def if_sql(self, expression): def arrow_json_extract_sql(self, expression): - return f"{self.sql(expression, 'this')}->{self.sql(expression, 'path')}" + return self.binary(expression, "->") def arrow_json_extract_scalar_sql(self, expression): - return f"{self.sql(expression, 'this')}->>{self.sql(expression, 'path')}" + return self.binary(expression, "->>") def inline_array_sql(self, expression): @@ -373,3 +373,11 @@ def strposition_to_local_sql(self, expression): expression.args.get("substr"), expression.this, expression.args.get("position") ) return f"LOCATE({args})" + + +def timestrtotime_sql(self, expression: exp.TimeStrToTime) -> str: + return f"CAST({self.sql(expression, 'this')} AS TIMESTAMP)" + + +def datestrtodate_sql(self, expression: exp.DateStrToDate) -> str: + return f"CAST({self.sql(expression, 'this')} AS DATE)" diff --git a/sqlglot/dialects/drill.py b/sqlglot/dialects/drill.py index 358eced..4e3c0e1 100644 --- a/sqlglot/dialects/drill.py +++ b/sqlglot/dialects/drill.py @@ -6,13 +6,14 @@ from sqlglot import exp, generator, parser, tokens from sqlglot.dialects.dialect import ( Dialect, create_with_partitions_sql, + datestrtodate_sql, format_time_lambda, no_pivot_sql, no_trycast_sql, rename_func, str_position_sql, + timestrtotime_sql, ) -from sqlglot.dialects.postgres import _lateral_sql def _to_timestamp(args): @@ -117,14 +118,14 @@ class Drill(Dialect): STRICT_CAST = False FUNCTIONS = { - **parser.Parser.FUNCTIONS, + **parser.Parser.FUNCTIONS, # type: ignore "TO_TIMESTAMP": exp.TimeStrToTime.from_arg_list, "TO_CHAR": format_time_lambda(exp.TimeToStr, "drill"), } class Generator(generator.Generator): TYPE_MAPPING = { - **generator.Generator.TYPE_MAPPING, + **generator.Generator.TYPE_MAPPING, # type: ignore exp.DataType.Type.INT: "INTEGER", exp.DataType.Type.SMALLINT: "INTEGER", exp.DataType.Type.TINYINT: "INTEGER", @@ -139,14 +140,13 @@ class Drill(Dialect): ROOT_PROPERTIES = {exp.PartitionedByProperty} TRANSFORMS = { - **generator.Generator.TRANSFORMS, + **generator.Generator.TRANSFORMS, # type: ignore exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", - exp.Lateral: _lateral_sql, exp.ArrayContains: rename_func("REPEATED_CONTAINS"), exp.ArraySize: rename_func("REPEATED_COUNT"), exp.Create: create_with_partitions_sql, exp.DateAdd: _date_add_sql("ADD"), - exp.DateStrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE)", + exp.DateStrToDate: datestrtodate_sql, exp.DateSub: _date_add_sql("SUB"), exp.DateToDi: lambda self, e: f"CAST(TO_DATE({self.sql(e, 'this')}, {Drill.dateint_format}) AS INT)", exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS VARCHAR), {Drill.dateint_format})", @@ -160,7 +160,7 @@ class Drill(Dialect): exp.StrToDate: _str_to_date, exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", exp.TimeStrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE)", - exp.TimeStrToTime: lambda self, e: f"CAST({self.sql(e, 'this')} AS TIMESTAMP)", + exp.TimeStrToTime: timestrtotime_sql, exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), exp.TimeToStr: lambda self, e: f"TO_CHAR({self.sql(e, 'this')}, {self.format_time(e)})", exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), diff --git a/sqlglot/dialects/duckdb.py b/sqlglot/dialects/duckdb.py index f1da72b..81941f7 100644 --- a/sqlglot/dialects/duckdb.py +++ b/sqlglot/dialects/duckdb.py @@ -6,6 +6,7 @@ from sqlglot.dialects.dialect import ( approx_count_distinct_sql, arrow_json_extract_scalar_sql, arrow_json_extract_sql, + datestrtodate_sql, format_time_lambda, no_pivot_sql, no_properties_sql, @@ -13,6 +14,7 @@ from sqlglot.dialects.dialect import ( no_tablesample_sql, rename_func, str_position_sql, + timestrtotime_sql, ) from sqlglot.helper import seq_get from sqlglot.tokens import TokenType @@ -83,11 +85,12 @@ class DuckDB(Dialect): KEYWORDS = { **tokens.Tokenizer.KEYWORDS, ":=": TokenType.EQ, + "CHARACTER VARYING": TokenType.VARCHAR, } class Parser(parser.Parser): FUNCTIONS = { - **parser.Parser.FUNCTIONS, + **parser.Parser.FUNCTIONS, # type: ignore "APPROX_COUNT_DISTINCT": exp.ApproxDistinct.from_arg_list, "ARRAY_LENGTH": exp.ArraySize.from_arg_list, "ARRAY_SORT": exp.SortArray.from_arg_list, @@ -119,16 +122,18 @@ class DuckDB(Dialect): STRUCT_DELIMITER = ("(", ")") TRANSFORMS = { - **generator.Generator.TRANSFORMS, + **generator.Generator.TRANSFORMS, # type: ignore exp.ApproxDistinct: approx_count_distinct_sql, - exp.Array: rename_func("LIST_VALUE"), + exp.Array: lambda self, e: f"{self.normalize_func('ARRAY')}({self.sql(e.expressions[0])})" + if isinstance(seq_get(e.expressions, 0), exp.Select) + else rename_func("LIST_VALUE")(self, e), exp.ArraySize: rename_func("ARRAY_LENGTH"), exp.ArraySort: _array_sort_sql, exp.ArraySum: rename_func("LIST_SUM"), exp.DataType: _datatype_sql, exp.DateAdd: _date_add, exp.DateDiff: lambda self, e: f"""DATE_DIFF({self.format_args(e.args.get("unit") or "'day'", e.expression, e.this)})""", - exp.DateStrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE)", + exp.DateStrToDate: datestrtodate_sql, exp.DateToDi: lambda self, e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.dateint_format}) AS INT)", exp.DiToDate: lambda self, e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.dateint_format}) AS DATE)", exp.Explode: rename_func("UNNEST"), @@ -136,6 +141,7 @@ class DuckDB(Dialect): exp.JSONExtractScalar: arrow_json_extract_scalar_sql, exp.JSONBExtract: arrow_json_extract_sql, exp.JSONBExtractScalar: arrow_json_extract_scalar_sql, + exp.LogicalOr: rename_func("BOOL_OR"), exp.Pivot: no_pivot_sql, exp.Properties: no_properties_sql, exp.RegexpLike: rename_func("REGEXP_MATCHES"), @@ -150,7 +156,7 @@ class DuckDB(Dialect): exp.Struct: _struct_pack_sql, exp.TableSample: no_tablesample_sql, exp.TimeStrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE)", - exp.TimeStrToTime: lambda self, e: f"CAST({self.sql(e, 'this')} AS TIMESTAMP)", + exp.TimeStrToTime: timestrtotime_sql, exp.TimeStrToUnix: lambda self, e: f"EPOCH(CAST({self.sql(e, 'this')} AS TIMESTAMP))", exp.TimeToStr: lambda self, e: f"STRFTIME({self.sql(e, 'this')}, {self.format_time(e)})", exp.TimeToUnix: rename_func("EPOCH"), @@ -163,7 +169,7 @@ class DuckDB(Dialect): } TYPE_MAPPING = { - **generator.Generator.TYPE_MAPPING, + **generator.Generator.TYPE_MAPPING, # type: ignore exp.DataType.Type.VARCHAR: "TEXT", exp.DataType.Type.NVARCHAR: "TEXT", } diff --git a/sqlglot/dialects/hive.py b/sqlglot/dialects/hive.py index 8d6e1ae..088555c 100644 --- a/sqlglot/dialects/hive.py +++ b/sqlglot/dialects/hive.py @@ -15,6 +15,7 @@ from sqlglot.dialects.dialect import ( rename_func, strposition_to_local_sql, struct_extract_sql, + timestrtotime_sql, var_map_sql, ) from sqlglot.helper import seq_get @@ -197,7 +198,7 @@ class Hive(Dialect): STRICT_CAST = False FUNCTIONS = { - **parser.Parser.FUNCTIONS, + **parser.Parser.FUNCTIONS, # type: ignore "APPROX_COUNT_DISTINCT": exp.ApproxDistinct.from_arg_list, "COLLECT_LIST": exp.ArrayAgg.from_arg_list, "DATE_ADD": lambda args: exp.TsOrDsAdd( @@ -217,7 +218,12 @@ class Hive(Dialect): ), unit=exp.Literal.string("DAY"), ), - "DATE_FORMAT": format_time_lambda(exp.TimeToStr, "hive"), + "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( + [ + exp.TimeStrToTime(this=seq_get(args, 0)), + seq_get(args, 1), + ] + ), "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, @@ -240,7 +246,7 @@ class Hive(Dialect): } PROPERTY_PARSERS = { - **parser.Parser.PROPERTY_PARSERS, + **parser.Parser.PROPERTY_PARSERS, # type: ignore TokenType.SERDE_PROPERTIES: lambda self: exp.SerdeProperties( expressions=self._parse_wrapped_csv(self._parse_property) ), @@ -248,14 +254,14 @@ class Hive(Dialect): class Generator(generator.Generator): TYPE_MAPPING = { - **generator.Generator.TYPE_MAPPING, + **generator.Generator.TYPE_MAPPING, # type: ignore exp.DataType.Type.TEXT: "STRING", exp.DataType.Type.DATETIME: "TIMESTAMP", exp.DataType.Type.VARBINARY: "BINARY", } TRANSFORMS = { - **generator.Generator.TRANSFORMS, + **generator.Generator.TRANSFORMS, # type: ignore **transforms.UNALIAS_GROUP, # type: ignore exp.Property: _property_sql, exp.ApproxDistinct: approx_count_distinct_sql, @@ -294,7 +300,7 @@ class Hive(Dialect): exp.StructExtract: struct_extract_sql, exp.TableFormatProperty: lambda self, e: f"USING {self.sql(e, 'this')}", exp.TimeStrToDate: rename_func("TO_DATE"), - exp.TimeStrToTime: lambda self, e: f"CAST({self.sql(e, 'this')} AS TIMESTAMP)", + exp.TimeStrToTime: timestrtotime_sql, exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), exp.TimeToStr: _time_to_str, exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), diff --git a/sqlglot/dialects/mysql.py b/sqlglot/dialects/mysql.py index 7627b6e..0fd7992 100644 --- a/sqlglot/dialects/mysql.py +++ b/sqlglot/dialects/mysql.py @@ -161,8 +161,6 @@ class MySQL(Dialect): "_UCS2": TokenType.INTRODUCER, "_UJIS": TokenType.INTRODUCER, # https://dev.mysql.com/doc/refman/8.0/en/string-literals.html - "N": TokenType.INTRODUCER, - "n": TokenType.INTRODUCER, "_UTF8": TokenType.INTRODUCER, "_UTF16": TokenType.INTRODUCER, "_UTF16LE": TokenType.INTRODUCER, @@ -175,10 +173,10 @@ class MySQL(Dialect): COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SET, TokenType.SHOW} class Parser(parser.Parser): - FUNC_TOKENS = {*parser.Parser.FUNC_TOKENS, TokenType.SCHEMA} + FUNC_TOKENS = {*parser.Parser.FUNC_TOKENS, TokenType.SCHEMA} # type: ignore FUNCTIONS = { - **parser.Parser.FUNCTIONS, + **parser.Parser.FUNCTIONS, # type: ignore "DATE_ADD": _date_add(exp.DateAdd), "DATE_SUB": _date_add(exp.DateSub), "STR_TO_DATE": _str_to_date, @@ -190,7 +188,7 @@ class MySQL(Dialect): } FUNCTION_PARSERS = { - **parser.Parser.FUNCTION_PARSERS, + **parser.Parser.FUNCTION_PARSERS, # type: ignore "GROUP_CONCAT": lambda self: self.expression( exp.GroupConcat, this=self._parse_lambda(), @@ -199,12 +197,12 @@ class MySQL(Dialect): } PROPERTY_PARSERS = { - **parser.Parser.PROPERTY_PARSERS, + **parser.Parser.PROPERTY_PARSERS, # type: ignore TokenType.ENGINE: lambda self: self._parse_property_assignment(exp.EngineProperty), } STATEMENT_PARSERS = { - **parser.Parser.STATEMENT_PARSERS, + **parser.Parser.STATEMENT_PARSERS, # type: ignore TokenType.SHOW: lambda self: self._parse_show(), TokenType.SET: lambda self: self._parse_set(), } @@ -429,7 +427,7 @@ class MySQL(Dialect): NULL_ORDERING_SUPPORTED = False TRANSFORMS = { - **generator.Generator.TRANSFORMS, + **generator.Generator.TRANSFORMS, # type: ignore exp.CurrentDate: no_paren_current_date_sql, exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", exp.ILike: no_ilike_sql, diff --git a/sqlglot/dialects/oracle.py b/sqlglot/dialects/oracle.py index f507513..af3d353 100644 --- a/sqlglot/dialects/oracle.py +++ b/sqlglot/dialects/oracle.py @@ -39,13 +39,13 @@ class Oracle(Dialect): class Parser(parser.Parser): FUNCTIONS = { - **parser.Parser.FUNCTIONS, + **parser.Parser.FUNCTIONS, # type: ignore "DECODE": exp.Matches.from_arg_list, } class Generator(generator.Generator): TYPE_MAPPING = { - **generator.Generator.TYPE_MAPPING, + **generator.Generator.TYPE_MAPPING, # type: ignore exp.DataType.Type.TINYINT: "NUMBER", exp.DataType.Type.SMALLINT: "NUMBER", exp.DataType.Type.INT: "NUMBER", @@ -60,7 +60,7 @@ class Oracle(Dialect): } TRANSFORMS = { - **generator.Generator.TRANSFORMS, + **generator.Generator.TRANSFORMS, # type: ignore **transforms.UNALIAS_GROUP, # type: ignore exp.ILike: no_ilike_sql, exp.Limit: _limit_sql, diff --git a/sqlglot/dialects/postgres.py b/sqlglot/dialects/postgres.py index f276af1..a092cad 100644 --- a/sqlglot/dialects/postgres.py +++ b/sqlglot/dialects/postgres.py @@ -11,9 +11,19 @@ from sqlglot.dialects.dialect import ( no_trycast_sql, str_position_sql, ) +from sqlglot.helper import seq_get from sqlglot.tokens import TokenType from sqlglot.transforms import delegate, preprocess +DATE_DIFF_FACTOR = { + "MICROSECOND": " * 1000000", + "MILLISECOND": " * 1000", + "SECOND": "", + "MINUTE": " / 60", + "HOUR": " / 3600", + "DAY": " / 86400", +} + def _date_add_sql(kind): def func(self, expression): @@ -34,16 +44,30 @@ def _date_add_sql(kind): return func -def _lateral_sql(self, expression): - this = self.sql(expression, "this") - if isinstance(expression.this, exp.Subquery): - return f"LATERAL{self.sep()}{this}" - alias = expression.args["alias"] - table = alias.name - table = f" {table}" if table else table - columns = self.expressions(alias, key="columns", flat=True) - columns = f" AS {columns}" if columns else "" - return f"LATERAL{self.sep()}{this}{table}{columns}" +def _date_diff_sql(self, expression): + unit = expression.text("unit").upper() + factor = DATE_DIFF_FACTOR.get(unit) + + end = f"CAST({expression.this} AS TIMESTAMP)" + start = f"CAST({expression.expression} AS TIMESTAMP)" + + if factor is not None: + return f"CAST(EXTRACT(epoch FROM {end} - {start}){factor} AS BIGINT)" + + age = f"AGE({end}, {start})" + + if unit == "WEEK": + extract = f"EXTRACT(year FROM {age}) * 48 + EXTRACT(month FROM {age}) * 4 + EXTRACT(day FROM {age}) / 7" + elif unit == "MONTH": + extract = f"EXTRACT(year FROM {age}) * 12 + EXTRACT(month FROM {age})" + elif unit == "QUARTER": + extract = f"EXTRACT(year FROM {age}) * 4 + EXTRACT(month FROM {age}) / 3" + elif unit == "YEAR": + extract = f"EXTRACT(year FROM {age})" + else: + self.unsupported(f"Unsupported DATEDIFF unit {unit}") + + return f"CAST({extract} AS BIGINT)" def _substring_sql(self, expression): @@ -141,7 +165,7 @@ def _serial_to_generated(expression): def _to_timestamp(args): # TO_TIMESTAMP accepts either a single double argument or (text, text) - if len(args) == 1 and args[0].is_number: + if len(args) == 1: # https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-TABLE return exp.UnixToTime.from_arg_list(args) # https://www.postgresql.org/docs/current/functions-formatting.html @@ -211,11 +235,16 @@ class Postgres(Dialect): KEYWORDS = { **tokens.Tokenizer.KEYWORDS, + "~~": TokenType.LIKE, + "~~*": TokenType.ILIKE, + "~*": TokenType.IRLIKE, + "~": TokenType.RLIKE, "ALWAYS": TokenType.ALWAYS, "BEGIN": TokenType.COMMAND, "BEGIN TRANSACTION": TokenType.BEGIN, "BIGSERIAL": TokenType.BIGSERIAL, "BY DEFAULT": TokenType.BY_DEFAULT, + "CHARACTER VARYING": TokenType.VARCHAR, "COMMENT ON": TokenType.COMMAND, "DECLARE": TokenType.COMMAND, "DO": TokenType.COMMAND, @@ -233,6 +262,7 @@ class Postgres(Dialect): "SMALLSERIAL": TokenType.SMALLSERIAL, "TEMP": TokenType.TEMPORARY, "UUID": TokenType.UUID, + "CSTRING": TokenType.PSEUDO_TYPE, **{f"CREATE {kind}": TokenType.COMMAND for kind in CREATABLES}, **{f"DROP {kind}": TokenType.COMMAND for kind in CREATABLES}, } @@ -244,17 +274,16 @@ class Postgres(Dialect): class Parser(parser.Parser): STRICT_CAST = False - LATERAL_FUNCTION_AS_VIEW = True FUNCTIONS = { - **parser.Parser.FUNCTIONS, + **parser.Parser.FUNCTIONS, # type: ignore "TO_TIMESTAMP": _to_timestamp, "TO_CHAR": format_time_lambda(exp.TimeToStr, "postgres"), } class Generator(generator.Generator): TYPE_MAPPING = { - **generator.Generator.TYPE_MAPPING, + **generator.Generator.TYPE_MAPPING, # type: ignore exp.DataType.Type.TINYINT: "SMALLINT", exp.DataType.Type.FLOAT: "REAL", exp.DataType.Type.DOUBLE: "DOUBLE PRECISION", @@ -264,7 +293,7 @@ class Postgres(Dialect): } TRANSFORMS = { - **generator.Generator.TRANSFORMS, + **generator.Generator.TRANSFORMS, # type: ignore exp.ColumnDef: preprocess( [ _auto_increment_to_serial, @@ -274,13 +303,16 @@ class Postgres(Dialect): ), exp.JSONExtract: arrow_json_extract_sql, exp.JSONExtractScalar: arrow_json_extract_scalar_sql, - exp.JSONBExtract: lambda self, e: f"{self.sql(e, 'this')}#>{self.sql(e, 'path')}", - exp.JSONBExtractScalar: lambda self, e: f"{self.sql(e, 'this')}#>>{self.sql(e, 'path')}", + exp.JSONBExtract: lambda self, e: self.binary(e, "#>"), + exp.JSONBExtractScalar: lambda self, e: self.binary(e, "#>>"), + exp.JSONBContains: lambda self, e: self.binary(e, "?"), exp.CurrentDate: no_paren_current_date_sql, exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", exp.DateAdd: _date_add_sql("+"), exp.DateSub: _date_add_sql("-"), - exp.Lateral: _lateral_sql, + exp.DateDiff: _date_diff_sql, + exp.RegexpLike: lambda self, e: self.binary(e, "~"), + exp.RegexpILike: lambda self, e: self.binary(e, "~*"), exp.StrPosition: str_position_sql, exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", exp.Substring: _substring_sql, @@ -291,5 +323,7 @@ class Postgres(Dialect): exp.UnixToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')})", exp.DataType: _datatype_sql, exp.GroupConcat: _string_agg_sql, - exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", + exp.Array: lambda self, e: f"{self.normalize_func('ARRAY')}({self.sql(e.expressions[0])})" + if isinstance(seq_get(e.expressions, 0), exp.Select) + else f"{self.normalize_func('ARRAY')}[{self.expressions(e, flat=True)}]", } diff --git a/sqlglot/dialects/presto.py b/sqlglot/dialects/presto.py index 1a09037..e16ea1d 100644 --- a/sqlglot/dialects/presto.py +++ b/sqlglot/dialects/presto.py @@ -10,6 +10,7 @@ from sqlglot.dialects.dialect import ( rename_func, str_position_sql, struct_extract_sql, + timestrtotime_sql, ) from sqlglot.dialects.mysql import MySQL from sqlglot.errors import UnsupportedError @@ -38,10 +39,6 @@ def _datatype_sql(self, expression): return sql -def _date_parse_sql(self, expression): - return f"DATE_PARSE({self.sql(expression, 'this')}, '%Y-%m-%d %H:%i:%s')" - - def _explode_to_unnest_sql(self, expression): if isinstance(expression.this, (exp.Explode, exp.Posexplode)): return self.sql( @@ -137,7 +134,7 @@ class Presto(Dialect): class Parser(parser.Parser): FUNCTIONS = { - **parser.Parser.FUNCTIONS, + **parser.Parser.FUNCTIONS, # type: ignore "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, "CARDINALITY": exp.ArraySize.from_arg_list, "CONTAINS": exp.ArrayContains.from_arg_list, @@ -174,7 +171,7 @@ class Presto(Dialect): ROOT_PROPERTIES = {exp.SchemaCommentProperty} TYPE_MAPPING = { - **generator.Generator.TYPE_MAPPING, + **generator.Generator.TYPE_MAPPING, # type: ignore exp.DataType.Type.INT: "INTEGER", exp.DataType.Type.FLOAT: "REAL", exp.DataType.Type.BINARY: "VARBINARY", @@ -184,7 +181,7 @@ class Presto(Dialect): } TRANSFORMS = { - **generator.Generator.TRANSFORMS, + **generator.Generator.TRANSFORMS, # type: ignore **transforms.UNALIAS_GROUP, # type: ignore exp.ApproxDistinct: _approx_distinct_sql, exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", @@ -224,8 +221,8 @@ class Presto(Dialect): exp.StructExtract: struct_extract_sql, exp.TableFormatProperty: lambda self, e: f"TABLE_FORMAT='{e.name.upper()}'", exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", - exp.TimeStrToDate: _date_parse_sql, - exp.TimeStrToTime: _date_parse_sql, + exp.TimeStrToDate: timestrtotime_sql, + exp.TimeStrToTime: timestrtotime_sql, exp.TimeStrToUnix: lambda self, e: f"TO_UNIXTIME(DATE_PARSE({self.sql(e, 'this')}, {Presto.time_format}))", exp.TimeToStr: lambda self, e: f"DATE_FORMAT({self.sql(e, 'this')}, {self.format_time(e)})", exp.TimeToUnix: rename_func("TO_UNIXTIME"), diff --git a/sqlglot/dialects/redshift.py b/sqlglot/dialects/redshift.py index 55ed0a6..27dfb93 100644 --- a/sqlglot/dialects/redshift.py +++ b/sqlglot/dialects/redshift.py @@ -36,7 +36,6 @@ class Redshift(Postgres): "TIMETZ": TokenType.TIMESTAMPTZ, "UNLOAD": TokenType.COMMAND, "VARBYTE": TokenType.VARBINARY, - "SIMILAR TO": TokenType.SIMILAR_TO, } class Generator(Postgres.Generator): diff --git a/sqlglot/dialects/snowflake.py b/sqlglot/dialects/snowflake.py index 75dc9dc..77b09e9 100644 --- a/sqlglot/dialects/snowflake.py +++ b/sqlglot/dialects/snowflake.py @@ -3,13 +3,15 @@ from __future__ import annotations from sqlglot import exp, generator, parser, tokens from sqlglot.dialects.dialect import ( Dialect, + datestrtodate_sql, format_time_lambda, inline_array_sql, rename_func, + timestrtotime_sql, var_map_sql, ) from sqlglot.expressions import Literal -from sqlglot.helper import seq_get +from sqlglot.helper import flatten, seq_get from sqlglot.tokens import TokenType @@ -183,7 +185,7 @@ class Snowflake(Dialect): class Tokenizer(tokens.Tokenizer): QUOTES = ["'", "$$"] - ESCAPES = ["\\"] + ESCAPES = ["\\", "'"] SINGLE_TOKENS = { **tokens.Tokenizer.SINGLE_TOKENS, @@ -206,9 +208,10 @@ class Snowflake(Dialect): CREATE_TRANSIENT = True TRANSFORMS = { - **generator.Generator.TRANSFORMS, + **generator.Generator.TRANSFORMS, # type: ignore exp.Array: inline_array_sql, exp.ArrayConcat: rename_func("ARRAY_CAT"), + exp.DateStrToDate: datestrtodate_sql, exp.DataType: _datatype_sql, exp.If: rename_func("IFF"), exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), @@ -218,13 +221,14 @@ class Snowflake(Dialect): exp.Matches: rename_func("DECODE"), exp.StrPosition: rename_func("POSITION"), exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", + exp.TimeStrToTime: timestrtotime_sql, exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", exp.Trim: lambda self, e: f"TRIM({self.format_args(e.this, e.expression)})", exp.UnixToTime: _unix_to_time_sql, } TYPE_MAPPING = { - **generator.Generator.TYPE_MAPPING, + **generator.Generator.TYPE_MAPPING, # type: ignore exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", } @@ -246,3 +250,47 @@ class Snowflake(Dialect): if not expression.args.get("distinct", False): self.unsupported("INTERSECT with All is not supported in Snowflake") return super().intersect_op(expression) + + def values_sql(self, expression: exp.Values) -> str: + """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted. + + We also want to make sure that after we find matches where we need to unquote a column that we prevent users + from adding quotes to the column by using the `identify` argument when generating the SQL. + """ + alias = expression.args.get("alias") + if alias and alias.args.get("columns"): + expression = expression.transform( + lambda node: exp.Identifier(**{**node.args, "quoted": False}) + if isinstance(node, exp.Identifier) + and isinstance(node.parent, exp.TableAlias) + and node.arg_key == "columns" + else node, + ) + return self.no_identify(lambda: super(self.__class__, self).values_sql(expression)) + return super().values_sql(expression) + + def select_sql(self, expression: exp.Select) -> str: + """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted and also + that all columns in a SELECT are unquoted. We also want to make sure that after we find matches where we need + to unquote a column that we prevent users from adding quotes to the column by using the `identify` argument when + generating the SQL. + + Note: We make an assumption that any columns referenced in a VALUES expression should be unquoted throughout the + expression. This might not be true in a case where the same column name can be sourced from another table that can + properly quote but should be true in most cases. + """ + values_expressions = expression.find_all(exp.Values) + values_identifiers = set( + flatten( + v.args.get("alias", exp.Alias()).args.get("columns", []) + for v in values_expressions + ) + ) + if values_identifiers: + expression = expression.transform( + lambda node: exp.Identifier(**{**node.args, "quoted": False}) + if isinstance(node, exp.Identifier) and node in values_identifiers + else node, + ) + return self.no_identify(lambda: super(self.__class__, self).select_sql(expression)) + return super().select_sql(expression) diff --git a/sqlglot/dialects/spark.py b/sqlglot/dialects/spark.py index 16083d1..7f05dea 100644 --- a/sqlglot/dialects/spark.py +++ b/sqlglot/dialects/spark.py @@ -76,7 +76,7 @@ class Spark(Hive): } FUNCTION_PARSERS = { - **parser.Parser.FUNCTION_PARSERS, + **parser.Parser.FUNCTION_PARSERS, # type: ignore "BROADCAST": lambda self: self._parse_join_hint("BROADCAST"), "BROADCASTJOIN": lambda self: self._parse_join_hint("BROADCASTJOIN"), "MAPJOIN": lambda self: self._parse_join_hint("MAPJOIN"), @@ -87,6 +87,16 @@ class Spark(Hive): "SHUFFLE_REPLICATE_NL": lambda self: self._parse_join_hint("SHUFFLE_REPLICATE_NL"), } + def _parse_add_column(self): + return self._match_text_seq("ADD", "COLUMNS") and self._parse_schema() + + def _parse_drop_column(self): + return self._match_text_seq("DROP", "COLUMNS") and self.expression( + exp.Drop, + this=self._parse_schema(), + kind="COLUMNS", + ) + class Generator(Hive.Generator): TYPE_MAPPING = { **Hive.Generator.TYPE_MAPPING, # type: ignore diff --git a/sqlglot/dialects/sqlite.py b/sqlglot/dialects/sqlite.py index bbb752b..a0c4942 100644 --- a/sqlglot/dialects/sqlite.py +++ b/sqlglot/dialects/sqlite.py @@ -42,13 +42,13 @@ class SQLite(Dialect): class Parser(parser.Parser): FUNCTIONS = { - **parser.Parser.FUNCTIONS, + **parser.Parser.FUNCTIONS, # type: ignore "EDITDIST3": exp.Levenshtein.from_arg_list, } class Generator(generator.Generator): TYPE_MAPPING = { - **generator.Generator.TYPE_MAPPING, + **generator.Generator.TYPE_MAPPING, # type: ignore exp.DataType.Type.BOOLEAN: "INTEGER", exp.DataType.Type.TINYINT: "INTEGER", exp.DataType.Type.SMALLINT: "INTEGER", @@ -70,7 +70,7 @@ class SQLite(Dialect): } TRANSFORMS = { - **generator.Generator.TRANSFORMS, + **generator.Generator.TRANSFORMS, # type: ignore exp.ILike: no_ilike_sql, exp.JSONExtract: arrow_json_extract_sql, exp.JSONExtractScalar: arrow_json_extract_scalar_sql, diff --git a/sqlglot/dialects/starrocks.py b/sqlglot/dialects/starrocks.py index 3519c09..01e6357 100644 --- a/sqlglot/dialects/starrocks.py +++ b/sqlglot/dialects/starrocks.py @@ -8,7 +8,7 @@ from sqlglot.dialects.mysql import MySQL class StarRocks(MySQL): class Generator(MySQL.Generator): # type: ignore TYPE_MAPPING = { - **MySQL.Generator.TYPE_MAPPING, + **MySQL.Generator.TYPE_MAPPING, # type: ignore exp.DataType.Type.TEXT: "STRING", exp.DataType.Type.TIMESTAMP: "DATETIME", exp.DataType.Type.TIMESTAMPTZ: "DATETIME", diff --git a/sqlglot/dialects/tableau.py b/sqlglot/dialects/tableau.py index 63e7275..36c085f 100644 --- a/sqlglot/dialects/tableau.py +++ b/sqlglot/dialects/tableau.py @@ -30,7 +30,7 @@ class Tableau(Dialect): class Parser(parser.Parser): FUNCTIONS = { - **parser.Parser.FUNCTIONS, + **parser.Parser.FUNCTIONS, # type: ignore "IFNULL": exp.Coalesce.from_arg_list, "COUNTD": lambda args: exp.Count(this=exp.Distinct(expressions=args)), } diff --git a/sqlglot/dialects/tsql.py b/sqlglot/dialects/tsql.py index a552e7b..7f0f2d7 100644 --- a/sqlglot/dialects/tsql.py +++ b/sqlglot/dialects/tsql.py @@ -224,11 +224,7 @@ class TSQL(Dialect): class Tokenizer(tokens.Tokenizer): IDENTIFIERS = ['"', ("[", "]")] - QUOTES = [ - (prefix + quote, quote) if prefix else quote - for quote in ["'", '"'] - for prefix in ["", "n", "N"] - ] + QUOTES = ["'", '"'] KEYWORDS = { **tokens.Tokenizer.KEYWORDS, @@ -253,7 +249,7 @@ class TSQL(Dialect): class Parser(parser.Parser): FUNCTIONS = { - **parser.Parser.FUNCTIONS, + **parser.Parser.FUNCTIONS, # type: ignore "CHARINDEX": exp.StrPosition.from_arg_list, "ISNULL": exp.Coalesce.from_arg_list, "DATEADD": parse_date_delta(exp.DateAdd, unit_mapping=DATE_DELTA_INTERVAL), @@ -314,7 +310,7 @@ class TSQL(Dialect): class Generator(generator.Generator): TYPE_MAPPING = { - **generator.Generator.TYPE_MAPPING, + **generator.Generator.TYPE_MAPPING, # type: ignore exp.DataType.Type.BOOLEAN: "BIT", exp.DataType.Type.INT: "INTEGER", exp.DataType.Type.DECIMAL: "NUMERIC", |