diff options
Diffstat (limited to 'sqlglot/dialects')
-rw-r--r-- | sqlglot/dialects/bigquery.py | 6 | ||||
-rw-r--r-- | sqlglot/dialects/clickhouse.py | 33 | ||||
-rw-r--r-- | sqlglot/dialects/dialect.py | 17 | ||||
-rw-r--r-- | sqlglot/dialects/hive.py | 40 | ||||
-rw-r--r-- | sqlglot/dialects/oracle.py | 29 | ||||
-rw-r--r-- | sqlglot/dialects/postgres.py | 14 | ||||
-rw-r--r-- | sqlglot/dialects/snowflake.py | 5 | ||||
-rw-r--r-- | sqlglot/dialects/spark.py | 21 |
8 files changed, 114 insertions, 51 deletions
diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py index 40298e7..86e46cf 100644 --- a/sqlglot/dialects/bigquery.py +++ b/sqlglot/dialects/bigquery.py @@ -135,6 +135,7 @@ class BigQuery(Dialect): exp.DateSub: _date_add_sql("DATE", "SUB"), exp.DatetimeAdd: _date_add_sql("DATETIME", "ADD"), exp.DatetimeSub: _date_add_sql("DATETIME", "SUB"), + exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", exp.ILike: no_ilike_sql, exp.TimeAdd: _date_add_sql("TIME", "ADD"), exp.TimeSub: _date_add_sql("TIME", "SUB"), @@ -172,12 +173,11 @@ class BigQuery(Dialect): exp.AnonymousProperty, } + EXPLICIT_UNION = True + def in_unnest_op(self, unnest): return self.sql(unnest) - def union_op(self, expression): - return f"UNION{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" - def except_op(self, expression): if not expression.args.get("distinct", False): self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") diff --git a/sqlglot/dialects/clickhouse.py b/sqlglot/dialects/clickhouse.py index 55dad7a..da5c856 100644 --- a/sqlglot/dialects/clickhouse.py +++ b/sqlglot/dialects/clickhouse.py @@ -1,10 +1,16 @@ from sqlglot import exp -from sqlglot.dialects.dialect import Dialect, inline_array_sql +from sqlglot.dialects.dialect import Dialect, inline_array_sql, var_map_sql from sqlglot.generator import Generator -from sqlglot.parser import Parser +from sqlglot.helper import csv +from sqlglot.parser import Parser, parse_var_map from sqlglot.tokens import Tokenizer, TokenType +def _lower_func(sql): + index = sql.index("(") + return sql[:index].lower() + sql[index:] + + class ClickHouse(Dialect): normalize_functions = None null_ordering = "nulls_are_last" @@ -14,17 +20,23 @@ class ClickHouse(Dialect): KEYWORDS = { **Tokenizer.KEYWORDS, - "NULLABLE": TokenType.NULLABLE, "FINAL": TokenType.FINAL, + "DATETIME64": TokenType.DATETIME, "INT8": TokenType.TINYINT, "INT16": TokenType.SMALLINT, "INT32": TokenType.INT, "INT64": TokenType.BIGINT, "FLOAT32": TokenType.FLOAT, "FLOAT64": TokenType.DOUBLE, + "TUPLE": TokenType.STRUCT, } class Parser(Parser): + FUNCTIONS = { + **Parser.FUNCTIONS, + "MAP": parse_var_map, + } + def _parse_table(self, schema=False): this = super()._parse_table(schema) @@ -39,10 +51,25 @@ class ClickHouse(Dialect): TYPE_MAPPING = { **Generator.TYPE_MAPPING, exp.DataType.Type.NULLABLE: "Nullable", + exp.DataType.Type.DATETIME: "DateTime64", + exp.DataType.Type.MAP: "Map", + exp.DataType.Type.ARRAY: "Array", + exp.DataType.Type.STRUCT: "Tuple", + exp.DataType.Type.TINYINT: "Int8", + exp.DataType.Type.SMALLINT: "Int16", + exp.DataType.Type.INT: "Int32", + exp.DataType.Type.BIGINT: "Int64", + exp.DataType.Type.FLOAT: "Float32", + exp.DataType.Type.DOUBLE: "Float64", } TRANSFORMS = { **Generator.TRANSFORMS, exp.Array: inline_array_sql, + exp.StrPosition: lambda self, e: f"position({csv(self.sql(e, 'this'), self.sql(e, 'substr'), self.sql(e, 'position'))})", exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", + exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), + exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), } + + EXPLICIT_UNION = True diff --git a/sqlglot/dialects/dialect.py b/sqlglot/dialects/dialect.py index 98dc330..f7c6cb5 100644 --- a/sqlglot/dialects/dialect.py +++ b/sqlglot/dialects/dialect.py @@ -77,7 +77,6 @@ class Dialect(metaclass=_Dialect): alias_post_tablesample = False normalize_functions = "upper" null_ordering = "nulls_are_small" - wrap_derived_values = True date_format = "'%Y-%m-%d'" dateint_format = "'%Y%m%d'" @@ -170,7 +169,6 @@ class Dialect(metaclass=_Dialect): "alias_post_tablesample": self.alias_post_tablesample, "normalize_functions": self.normalize_functions, "null_ordering": self.null_ordering, - "wrap_derived_values": self.wrap_derived_values, **opts, } ) @@ -271,6 +269,21 @@ def struct_extract_sql(self, expression): return f"{this}.{struct_key}" +def var_map_sql(self, expression): + keys = expression.args["keys"] + values = expression.args["values"] + + if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array): + self.unsupported("Cannot convert array columns into map.") + return f"MAP({self.sql(keys)}, {self.sql(values)})" + + args = [] + for key, value in zip(keys.expressions, values.expressions): + args.append(self.sql(key)) + args.append(self.sql(value)) + return f"MAP({csv(*args)})" + + def format_time_lambda(exp_class, dialect, default=None): """Helper used for time expressions. diff --git a/sqlglot/dialects/hive.py b/sqlglot/dialects/hive.py index 7a27bb3..55d7bcc 100644 --- a/sqlglot/dialects/hive.py +++ b/sqlglot/dialects/hive.py @@ -11,40 +11,14 @@ from sqlglot.dialects.dialect import ( no_trycast_sql, rename_func, struct_extract_sql, + var_map_sql, ) from sqlglot.generator import Generator from sqlglot.helper import csv, list_get -from sqlglot.parser import Parser +from sqlglot.parser import Parser, parse_var_map from sqlglot.tokens import Tokenizer -def _parse_map(args): - keys = [] - values = [] - for i in range(0, len(args), 2): - keys.append(args[i]) - values.append(args[i + 1]) - return HiveMap( - keys=exp.Array(expressions=keys), - values=exp.Array(expressions=values), - ) - - -def _map_sql(self, expression): - keys = expression.args["keys"] - values = expression.args["values"] - - if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array): - self.unsupported("Cannot convert array columns into map use SparkSQL instead.") - return f"MAP({self.sql(keys)}, {self.sql(values)})" - - args = [] - for key, value in zip(keys.expressions, values.expressions): - args.append(self.sql(key)) - args.append(self.sql(value)) - return f"MAP({csv(*args)})" - - def _array_sort(self, expression): if expression.expression: self.unsupported("Hive SORT_ARRAY does not support a comparator") @@ -122,10 +96,6 @@ def _index_sql(self, expression): return f"{this} ON TABLE {table} {columns}" -class HiveMap(exp.Map): - is_var_len_args = True - - class Hive(Dialect): alias_post_tablesample = True @@ -206,7 +176,7 @@ class Hive(Dialect): position=list_get(args, 2), ), "LOG": (lambda args: exp.Log.from_arg_list(args) if len(args) > 1 else exp.Ln.from_arg_list(args)), - "MAP": _parse_map, + "MAP": parse_var_map, "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), "PERCENTILE": exp.Quantile.from_arg_list, "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, @@ -245,8 +215,8 @@ class Hive(Dialect): exp.Join: _unnest_to_explode_sql, exp.JSONExtract: rename_func("GET_JSON_OBJECT"), exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), - exp.Map: _map_sql, - HiveMap: _map_sql, + exp.Map: var_map_sql, + exp.VarMap: var_map_sql, exp.Create: create_with_partitions_sql, exp.Quantile: rename_func("PERCENTILE"), exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), diff --git a/sqlglot/dialects/oracle.py b/sqlglot/dialects/oracle.py index 91e30b2..8041ff0 100644 --- a/sqlglot/dialects/oracle.py +++ b/sqlglot/dialects/oracle.py @@ -10,6 +10,32 @@ def _limit_sql(self, expression): class Oracle(Dialect): + # https://docs.oracle.com/database/121/SQLRF/sql_elements004.htm#SQLRF00212 + # https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes + time_mapping = { + "AM": "%p", # Meridian indicator with or without periods + "A.M.": "%p", # Meridian indicator with or without periods + "PM": "%p", # Meridian indicator with or without periods + "P.M.": "%p", # Meridian indicator with or without periods + "D": "%u", # Day of week (1-7) + "DAY": "%A", # name of day + "DD": "%d", # day of month (1-31) + "DDD": "%j", # day of year (1-366) + "DY": "%a", # abbreviated name of day + "HH": "%I", # Hour of day (1-12) + "HH12": "%I", # alias for HH + "HH24": "%H", # Hour of day (0-23) + "IW": "%V", # Calendar week of year (1-52 or 1-53), as defined by the ISO 8601 standard + "MI": "%M", # Minute (0-59) + "MM": "%m", # Month (01-12; January = 01) + "MON": "%b", # Abbreviated name of month + "MONTH": "%B", # Name of month + "SS": "%S", # Second (0-59) + "WW": "%W", # Week of year (1-53) + "YY": "%y", # 15 + "YYYY": "%Y", # 2015 + } + class Generator(Generator): TYPE_MAPPING = { **Generator.TYPE_MAPPING, @@ -30,6 +56,9 @@ class Oracle(Dialect): **transforms.UNALIAS_GROUP, exp.ILike: no_ilike_sql, exp.Limit: _limit_sql, + exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", + exp.TimeToStr: lambda self, e: f"TO_CHAR({self.sql(e, 'this')}, {self.format_time(e)})", + exp.UnixToTime: lambda self, e: f"TO_DATE('1970-01-01','YYYY-MM-DD') + ({self.sql(e, 'this')} / 86400)", } def query_modifiers(self, expression, *sqls): diff --git a/sqlglot/dialects/postgres.py b/sqlglot/dialects/postgres.py index aaa07a1..731e28e 100644 --- a/sqlglot/dialects/postgres.py +++ b/sqlglot/dialects/postgres.py @@ -118,13 +118,22 @@ def _serial_to_generated(expression): return expression +def _to_timestamp(args): + # TO_TIMESTAMP accepts either a single double argument or (text, text) + if len(args) == 1 and args[0].is_number: + # https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-TABLE + return exp.UnixToTime.from_arg_list(args) + # https://www.postgresql.org/docs/current/functions-formatting.html + return format_time_lambda(exp.StrToTime, "postgres")(args) + + class Postgres(Dialect): null_ordering = "nulls_are_large" time_format = "'YYYY-MM-DD HH24:MI:SS'" time_mapping = { "AM": "%p", "PM": "%p", - "D": "%w", # 1-based day of week + "D": "%u", # 1-based day of week "DD": "%d", # day of month "DDD": "%j", # zero padded day of year "FMDD": "%-d", # - is no leading zero for Python; same for FM in postgres @@ -172,7 +181,7 @@ class Postgres(Dialect): FUNCTIONS = { **Parser.FUNCTIONS, - "TO_TIMESTAMP": format_time_lambda(exp.StrToTime, "postgres"), + "TO_TIMESTAMP": _to_timestamp, "TO_CHAR": format_time_lambda(exp.TimeToStr, "postgres"), } @@ -211,4 +220,5 @@ class Postgres(Dialect): exp.TableSample: no_tablesample_sql, exp.Trim: _trim_sql, exp.TryCast: no_trycast_sql, + exp.UnixToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')})", } diff --git a/sqlglot/dialects/snowflake.py b/sqlglot/dialects/snowflake.py index fb2d900..19a427c 100644 --- a/sqlglot/dialects/snowflake.py +++ b/sqlglot/dialects/snowflake.py @@ -121,6 +121,7 @@ class Snowflake(Dialect): FUNC_TOKENS = { *Parser.FUNC_TOKENS, TokenType.RLIKE, + TokenType.TABLE, } COLUMN_OPERATORS = { @@ -143,7 +144,7 @@ class Snowflake(Dialect): SINGLE_TOKENS = { **Tokenizer.SINGLE_TOKENS, - "$": TokenType.DOLLAR, # needed to break for quotes + "$": TokenType.PARAMETER, } KEYWORDS = { @@ -164,6 +165,8 @@ class Snowflake(Dialect): exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", exp.UnixToTime: _unix_to_time, exp.Array: inline_array_sql, + exp.StrPosition: rename_func("POSITION"), + exp.Parameter: lambda self, e: f"${self.sql(e, 'this')}", exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'value')}", } diff --git a/sqlglot/dialects/spark.py b/sqlglot/dialects/spark.py index e8da07a..95a7ab4 100644 --- a/sqlglot/dialects/spark.py +++ b/sqlglot/dialects/spark.py @@ -4,8 +4,9 @@ from sqlglot.dialects.dialect import ( no_ilike_sql, rename_func, ) -from sqlglot.dialects.hive import Hive, HiveMap +from sqlglot.dialects.hive import Hive from sqlglot.helper import list_get +from sqlglot.parser import Parser def _create_sql(self, e): @@ -47,8 +48,6 @@ def _unix_to_time(self, expression): class Spark(Hive): - wrap_derived_values = False - class Parser(Hive.Parser): FUNCTIONS = { **Hive.Parser.FUNCTIONS, @@ -78,8 +77,19 @@ class Spark(Hive): "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, } - class Generator(Hive.Generator): + FUNCTION_PARSERS = { + **Parser.FUNCTION_PARSERS, + "BROADCAST": lambda self: self._parse_join_hint("BROADCAST"), + "BROADCASTJOIN": lambda self: self._parse_join_hint("BROADCASTJOIN"), + "MAPJOIN": lambda self: self._parse_join_hint("MAPJOIN"), + "MERGE": lambda self: self._parse_join_hint("MERGE"), + "SHUFFLEMERGE": lambda self: self._parse_join_hint("SHUFFLEMERGE"), + "MERGEJOIN": lambda self: self._parse_join_hint("MERGEJOIN"), + "SHUFFLE_HASH": lambda self: self._parse_join_hint("SHUFFLE_HASH"), + "SHUFFLE_REPLICATE_NL": lambda self: self._parse_join_hint("SHUFFLE_REPLICATE_NL"), + } + class Generator(Hive.Generator): TYPE_MAPPING = { **Hive.Generator.TYPE_MAPPING, exp.DataType.Type.TINYINT: "BYTE", @@ -102,8 +112,9 @@ class Spark(Hive): exp.Map: _map_sql, exp.Reduce: rename_func("AGGREGATE"), exp.StructKwarg: lambda self, e: f"{self.sql(e, 'this')}: {self.sql(e, 'expression')}", - HiveMap: _map_sql, } + WRAP_DERIVED_VALUES = False + class Tokenizer(Hive.Tokenizer): HEX_STRINGS = [("X'", "'")] |