diff options
author | Daniel Baumann <mail@daniel-baumann.ch> | 2023-12-10 10:46:01 +0000 |
---|---|---|
committer | Daniel Baumann <mail@daniel-baumann.ch> | 2023-12-10 10:46:01 +0000 |
commit | 8fe30fd23dc37ec3516e530a86d1c4b604e71241 (patch) | |
tree | 6e2ebbf565b0351fd0f003f488a8339e771ad90c /sqlglot/dialects/bigquery.py | |
parent | Releasing debian version 19.0.1-1. (diff) | |
download | sqlglot-8fe30fd23dc37ec3516e530a86d1c4b604e71241.tar.xz sqlglot-8fe30fd23dc37ec3516e530a86d1c4b604e71241.zip |
Merging upstream version 20.1.0.
Signed-off-by: Daniel Baumann <mail@daniel-baumann.ch>
Diffstat (limited to 'sqlglot/dialects/bigquery.py')
-rw-r--r-- | sqlglot/dialects/bigquery.py | 117 |
1 files changed, 112 insertions, 5 deletions
diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py index fc9a3ae..2a9dde9 100644 --- a/sqlglot/dialects/bigquery.py +++ b/sqlglot/dialects/bigquery.py @@ -8,6 +8,7 @@ from sqlglot import exp, generator, parser, tokens, transforms from sqlglot._typing import E from sqlglot.dialects.dialect import ( Dialect, + NormalizationStrategy, arg_max_or_min_no_count, binary_from_function, date_add_interval_sql, @@ -23,6 +24,7 @@ from sqlglot.dialects.dialect import ( regexp_replace_sql, rename_func, timestrtotime_sql, + ts_or_ds_add_cast, ts_or_ds_to_date_sql, ) from sqlglot.helper import seq_get, split_num_words @@ -174,6 +176,44 @@ def _parse_to_hex(args: t.List) -> exp.Hex | exp.MD5: return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.Hex(this=arg) +def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: + return self.sql( + exp.Exists( + this=exp.select("1") + .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) + .where(exp.column("_col").eq(expression.right)) + ) + ) + + +def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: + return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) + + +def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: + expression.this.replace(exp.cast(expression.this, "TIMESTAMP", copy=True)) + expression.expression.replace(exp.cast(expression.expression, "TIMESTAMP", copy=True)) + unit = expression.args.get("unit") or "DAY" + return self.func("DATE_DIFF", expression.this, expression.expression, unit) + + +def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: + scale = expression.args.get("scale") + timestamp = self.sql(expression, "this") + if scale in (None, exp.UnixToTime.SECONDS): + return f"TIMESTAMP_SECONDS({timestamp})" + if scale == exp.UnixToTime.MILLIS: + return f"TIMESTAMP_MILLIS({timestamp})" + if scale == exp.UnixToTime.MICROS: + return f"TIMESTAMP_MICROS({timestamp})" + if scale == exp.UnixToTime.NANOS: + # We need to cast to INT64 because that's what BQ expects + return f"TIMESTAMP_MICROS(CAST({timestamp} / 1000 AS INT64))" + + self.unsupported(f"Unsupported scale for timestamp: {scale}.") + return "" + + class BigQuery(Dialect): UNNEST_COLUMN_ONLY = True SUPPORTS_USER_DEFINED_TYPES = False @@ -181,7 +221,7 @@ class BigQuery(Dialect): LOG_BASE_FIRST = False # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity - RESOLVES_IDENTIFIERS_AS_UPPERCASE = None + NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE # bigquery udfs are case sensitive NORMALIZE_FUNCTIONS = False @@ -220,8 +260,7 @@ class BigQuery(Dialect): # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} - @classmethod - def normalize_identifier(cls, expression: E) -> E: + def normalize_identifier(self, expression: E) -> E: if isinstance(expression, exp.Identifier): parent = expression.parent while isinstance(parent, exp.Dot): @@ -265,7 +304,6 @@ class BigQuery(Dialect): "DECLARE": TokenType.COMMAND, "FLOAT64": TokenType.DOUBLE, "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, - "INT64": TokenType.BIGINT, "MODEL": TokenType.MODEL, "NOT DETERMINISTIC": TokenType.VOLATILE, "RECORD": TokenType.STRUCT, @@ -316,6 +354,15 @@ class BigQuery(Dialect): "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), + "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( + this=seq_get(args, 0), scale=exp.UnixToTime.MICROS + ), + "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( + this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS + ), + "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime( + this=seq_get(args, 0), scale=exp.UnixToTime.SECONDS + ), "TO_JSON_STRING": exp.JSONFormat.from_arg_list, } @@ -358,6 +405,24 @@ class BigQuery(Dialect): NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} + STATEMENT_PARSERS = { + **parser.Parser.STATEMENT_PARSERS, + TokenType.END: lambda self: self._parse_as_command(self._prev), + TokenType.FOR: lambda self: self._parse_for_in(), + } + + BRACKET_OFFSETS = { + "OFFSET": (0, False), + "ORDINAL": (1, False), + "SAFE_OFFSET": (0, True), + "SAFE_ORDINAL": (1, True), + } + + def _parse_for_in(self) -> exp.ForIn: + this = self._parse_range() + self._match_text_seq("DO") + return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) + def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: this = super()._parse_table_part(schema=schema) or self._parse_number() @@ -419,6 +484,26 @@ class BigQuery(Dialect): return json_object + def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: + bracket = super()._parse_bracket(this) + + if this is bracket: + return bracket + + if isinstance(bracket, exp.Bracket): + for expression in bracket.expressions: + name = expression.name.upper() + + if name not in self.BRACKET_OFFSETS: + break + + offset, safe = self.BRACKET_OFFSETS[name] + bracket.set("offset", offset) + bracket.set("safe", safe) + expression.replace(expression.expressions[0]) + + return bracket + class Generator(generator.Generator): EXPLICIT_UNION = True INTERVAL_ALLOWS_PLURAL_FORM = False @@ -430,12 +515,14 @@ class BigQuery(Dialect): NVL2_SUPPORTED = False UNNEST_WITH_ORDINALITY = False COLLATE_IS_FUNC = True + LIMIT_ONLY_LITERALS = True TRANSFORMS = { **generator.Generator.TRANSFORMS, exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), + exp.ArrayContains: _array_contains_sql, exp.ArraySize: rename_func("ARRAY_LENGTH"), exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), exp.CollateProperty: lambda self, e: f"DEFAULT COLLATE {self.sql(e, 'this')}" @@ -498,10 +585,13 @@ class BigQuery(Dialect): exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), exp.TimeStrToTime: timestrtotime_sql, + exp.TimeToStr: lambda self, e: f"FORMAT_DATE({self.format_time(e)}, {self.sql(e, 'this')})", exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), - exp.TsOrDsAdd: date_add_interval_sql("DATE", "ADD"), + exp.TsOrDsAdd: _ts_or_ds_add_sql, + exp.TsOrDsDiff: _ts_or_ds_diff_sql, exp.TsOrDsToDate: ts_or_ds_to_date_sql("bigquery"), exp.Unhex: rename_func("FROM_HEX"), + exp.UnixToTime: _unix_to_time_sql, exp.Values: _derived_table_values_to_unnest, exp.VariancePop: rename_func("VAR_POP"), } @@ -671,6 +761,23 @@ class BigQuery(Dialect): return inline_array_sql(self, expression) + def bracket_sql(self, expression: exp.Bracket) -> str: + expressions = expression.expressions + expressions_sql = ", ".join(self.sql(e) for e in expressions) + offset = expression.args.get("offset") + + if offset == 0: + expressions_sql = f"OFFSET({expressions_sql})" + elif offset == 1: + expressions_sql = f"ORDINAL({expressions_sql})" + else: + self.unsupported(f"Unsupported array offset: {offset}") + + if expression.args.get("safe"): + expressions_sql = f"SAFE_{expressions_sql}" + + return f"{self.sql(expression, 'this')}[{expressions_sql}]" + def transaction_sql(self, *_) -> str: return "BEGIN TRANSACTION" |