diff options
Diffstat (limited to 'sqlglot/dialects/bigquery.py')
-rw-r--r-- | sqlglot/dialects/bigquery.py | 97 |
1 files changed, 61 insertions, 36 deletions
diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py index 9705b35..1a58337 100644 --- a/sqlglot/dialects/bigquery.py +++ b/sqlglot/dialects/bigquery.py @@ -1,5 +1,3 @@ -"""Supports BigQuery Standard SQL.""" - from __future__ import annotations import re @@ -18,11 +16,9 @@ from sqlglot.dialects.dialect import ( timestrtotime_sql, ts_or_ds_to_date_sql, ) -from sqlglot.helper import seq_get +from sqlglot.helper import seq_get, split_num_words from sqlglot.tokens import TokenType -E = t.TypeVar("E", bound=exp.Expression) - def _date_add_sql( data_type: str, kind: str @@ -96,19 +92,12 @@ def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: These are added by the optimizer's qualify_column step. """ if isinstance(expression, exp.Select): - unnests = { - unnest.alias - for unnest in expression.args.get("from", exp.From(expressions=[])).expressions - if isinstance(unnest, exp.Unnest) and unnest.alias - } - - if unnests: - expression = expression.copy() - - for select in expression.expressions: - for column in select.find_all(exp.Column): - if column.table in unnests: - column.set("table", None) + for unnest in expression.find_all(exp.Unnest): + if isinstance(unnest.parent, (exp.From, exp.Join)) and unnest.alias: + for select in expression.selects: + for column in select.find_all(exp.Column): + if column.table == unnest.alias: + column.set("table", None) return expression @@ -127,16 +116,20 @@ class BigQuery(Dialect): } class Tokenizer(tokens.Tokenizer): - QUOTES = [ - (prefix + quote, quote) if prefix else quote - for quote in ["'", '"', '"""', "'''"] - for prefix in ["", "r", "R"] - ] + QUOTES = ["'", '"', '"""', "'''"] COMMENTS = ["--", "#", ("/*", "*/")] IDENTIFIERS = ["`"] STRING_ESCAPES = ["\\"] + HEX_STRINGS = [("0x", ""), ("0X", "")] - BYTE_STRINGS = [("b'", "'"), ("B'", "'")] + + BYTE_STRINGS = [ + (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") + ] + + RAW_STRINGS = [ + (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") + ] KEYWORDS = { **tokens.Tokenizer.KEYWORDS, @@ -144,11 +137,11 @@ class BigQuery(Dialect): "BEGIN": TokenType.COMMAND, "BEGIN TRANSACTION": TokenType.BEGIN, "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, + "BYTES": TokenType.BINARY, "DECLARE": TokenType.COMMAND, - "GEOGRAPHY": TokenType.GEOGRAPHY, "FLOAT64": TokenType.DOUBLE, "INT64": TokenType.BIGINT, - "BYTES": TokenType.BINARY, + "RECORD": TokenType.STRUCT, "NOT DETERMINISTIC": TokenType.VOLATILE, "UNKNOWN": TokenType.NULL, } @@ -161,7 +154,7 @@ class BigQuery(Dialect): LOG_DEFAULTS_TO_LN = True FUNCTIONS = { - **parser.Parser.FUNCTIONS, # type: ignore + **parser.Parser.FUNCTIONS, "DATE_TRUNC": lambda args: exp.DateTrunc( unit=exp.Literal.string(str(seq_get(args, 1))), this=seq_get(args, 0), @@ -191,28 +184,28 @@ class BigQuery(Dialect): } FUNCTION_PARSERS = { - **parser.Parser.FUNCTION_PARSERS, # type: ignore + **parser.Parser.FUNCTION_PARSERS, "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), } FUNCTION_PARSERS.pop("TRIM") NO_PAREN_FUNCTIONS = { - **parser.Parser.NO_PAREN_FUNCTIONS, # type: ignore + **parser.Parser.NO_PAREN_FUNCTIONS, TokenType.CURRENT_DATETIME: exp.CurrentDatetime, } NESTED_TYPE_TOKENS = { - *parser.Parser.NESTED_TYPE_TOKENS, # type: ignore + *parser.Parser.NESTED_TYPE_TOKENS, TokenType.TABLE, } ID_VAR_TOKENS = { - *parser.Parser.ID_VAR_TOKENS, # type: ignore + *parser.Parser.ID_VAR_TOKENS, TokenType.VALUES, } PROPERTY_PARSERS = { - **parser.Parser.PROPERTY_PARSERS, # type: ignore + **parser.Parser.PROPERTY_PARSERS, "NOT DETERMINISTIC": lambda self: self.expression( exp.StabilityProperty, this=exp.Literal.string("VOLATILE") ), @@ -220,19 +213,50 @@ class BigQuery(Dialect): } CONSTRAINT_PARSERS = { - **parser.Parser.CONSTRAINT_PARSERS, # type: ignore + **parser.Parser.CONSTRAINT_PARSERS, "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), } + def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: + this = super()._parse_table_part(schema=schema) + + # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names + if isinstance(this, exp.Identifier): + table_name = this.name + while self._match(TokenType.DASH, advance=False) and self._next: + self._advance(2) + table_name += f"-{self._prev.text}" + + this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) + + return this + + def _parse_table_parts(self, schema: bool = False) -> exp.Table: + table = super()._parse_table_parts(schema=schema) + if isinstance(table.this, exp.Identifier) and "." in table.name: + catalog, db, this, *rest = ( + t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) + for x in split_num_words(table.name, ".", 3) + ) + + if rest and this: + this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) + + table = exp.Table(this=this, db=db, catalog=catalog) + + return table + class Generator(generator.Generator): EXPLICIT_UNION = True INTERVAL_ALLOWS_PLURAL_FORM = False JOIN_HINTS = False TABLE_HINTS = False LIMIT_FETCH = "LIMIT" + RENAME_TABLE_WITH_DB = False TRANSFORMS = { - **generator.Generator.TRANSFORMS, # type: ignore + **generator.Generator.TRANSFORMS, + exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), exp.ArraySize: rename_func("ARRAY_LENGTH"), exp.AtTimeZone: lambda self, e: self.func( "TIMESTAMP", self.func("DATETIME", e.this, e.args.get("zone")) @@ -259,6 +283,7 @@ class BigQuery(Dialect): exp.TimestampAdd: _date_add_sql("TIMESTAMP", "ADD"), exp.TimestampSub: _date_add_sql("TIMESTAMP", "SUB"), exp.TimeStrToTime: timestrtotime_sql, + exp.TryCast: lambda self, e: f"SAFE_CAST({self.sql(e, 'this')} AS {self.sql(e, 'to')})", exp.TsOrDsToDate: ts_or_ds_to_date_sql("bigquery"), exp.TsOrDsAdd: _date_add_sql("DATE", "ADD"), exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", @@ -274,7 +299,7 @@ class BigQuery(Dialect): } TYPE_MAPPING = { - **generator.Generator.TYPE_MAPPING, # type: ignore + **generator.Generator.TYPE_MAPPING, exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", exp.DataType.Type.BIGINT: "INT64", exp.DataType.Type.BINARY: "BYTES", @@ -297,7 +322,7 @@ class BigQuery(Dialect): } PROPERTIES_LOCATION = { - **generator.Generator.PROPERTIES_LOCATION, # type: ignore + **generator.Generator.PROPERTIES_LOCATION, exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, } |