Merging upstream version 15.0.0.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2023-06-02 23:59:40 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2023-06-02 23:59:46 +0000
commit: 20739a12c39121a9e7ad3c9a2469ec5a6876199d (patch)
tree: c000de91c59fd29b2d9beecf9f93b84e69727f37 /sqlglot/dialects/bigquery.py
parent: Releasing debian version 12.2.0-1. (diff)
download: sqlglot-20739a12c39121a9e7ad3c9a2469ec5a6876199d.tar.xz
sqlglot-20739a12c39121a9e7ad3c9a2469ec5a6876199d.zip
1 files changed, 61 insertions, 36 deletions
diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py
index 9705b35..1a58337 100644
--- a/sqlglot/dialects/bigquery.py
+++ b/sqlglot/dialects/bigquery.py
@@ -1,5 +1,3 @@
-"""Supports BigQuery Standard SQL."""
-
 from __future__ import annotations
 
 import re
@@ -18,11 +16,9 @@ from sqlglot.dialects.dialect import (
     timestrtotime_sql,
     ts_or_ds_to_date_sql,
 )
-from sqlglot.helper import seq_get
+from sqlglot.helper import seq_get, split_num_words
 from sqlglot.tokens import TokenType
 
-E = t.TypeVar("E", bound=exp.Expression)
-
 
 def _date_add_sql(
     data_type: str, kind: str
@@ -96,19 +92,12 @@ def _unqualify_unnest(expression: exp.Expression) -> exp.Expression:
     These are added by the optimizer's qualify_column step.
     """
     if isinstance(expression, exp.Select):
-        unnests = {
-            unnest.alias
-            for unnest in expression.args.get("from", exp.From(expressions=[])).expressions
-            if isinstance(unnest, exp.Unnest) and unnest.alias
-        }
-
-        if unnests:
-            expression = expression.copy()
-
-            for select in expression.expressions:
-                for column in select.find_all(exp.Column):
-                    if column.table in unnests:
-                        column.set("table", None)
+        for unnest in expression.find_all(exp.Unnest):
+            if isinstance(unnest.parent, (exp.From, exp.Join)) and unnest.alias:
+                for select in expression.selects:
+                    for column in select.find_all(exp.Column):
+                        if column.table == unnest.alias:
+                            column.set("table", None)
 
     return expression
 
@@ -127,16 +116,20 @@ class BigQuery(Dialect):
     }
 
     class Tokenizer(tokens.Tokenizer):
-        QUOTES = [
-            (prefix + quote, quote) if prefix else quote
-            for quote in ["'", '"', '"""', "'''"]
-            for prefix in ["", "r", "R"]
-        ]
+        QUOTES = ["'", '"', '"""', "'''"]
         COMMENTS = ["--", "#", ("/*", "*/")]
         IDENTIFIERS = ["`"]
         STRING_ESCAPES = ["\\"]
+
         HEX_STRINGS = [("0x", ""), ("0X", "")]
-        BYTE_STRINGS = [("b'", "'"), ("B'", "'")]
+
+        BYTE_STRINGS = [
+            (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B")
+        ]
+
+        RAW_STRINGS = [
+            (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R")
+        ]
 
         KEYWORDS = {
             **tokens.Tokenizer.KEYWORDS,
@@ -144,11 +137,11 @@ class BigQuery(Dialect):
             "BEGIN": TokenType.COMMAND,
             "BEGIN TRANSACTION": TokenType.BEGIN,
             "CURRENT_DATETIME": TokenType.CURRENT_DATETIME,
+            "BYTES": TokenType.BINARY,
             "DECLARE": TokenType.COMMAND,
-            "GEOGRAPHY": TokenType.GEOGRAPHY,
             "FLOAT64": TokenType.DOUBLE,
             "INT64": TokenType.BIGINT,
-            "BYTES": TokenType.BINARY,
+            "RECORD": TokenType.STRUCT,
             "NOT DETERMINISTIC": TokenType.VOLATILE,
             "UNKNOWN": TokenType.NULL,
         }
@@ -161,7 +154,7 @@ class BigQuery(Dialect):
         LOG_DEFAULTS_TO_LN = True
 
         FUNCTIONS = {
-            **parser.Parser.FUNCTIONS,  # type: ignore
+            **parser.Parser.FUNCTIONS,
             "DATE_TRUNC": lambda args: exp.DateTrunc(
                 unit=exp.Literal.string(str(seq_get(args, 1))),
                 this=seq_get(args, 0),
@@ -191,28 +184,28 @@ class BigQuery(Dialect):
         }
 
         FUNCTION_PARSERS = {
-            **parser.Parser.FUNCTION_PARSERS,  # type: ignore
+            **parser.Parser.FUNCTION_PARSERS,
             "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]),
         }
         FUNCTION_PARSERS.pop("TRIM")
 
         NO_PAREN_FUNCTIONS = {
-            **parser.Parser.NO_PAREN_FUNCTIONS,  # type: ignore
+            **parser.Parser.NO_PAREN_FUNCTIONS,
             TokenType.CURRENT_DATETIME: exp.CurrentDatetime,
         }
 
         NESTED_TYPE_TOKENS = {
-            *parser.Parser.NESTED_TYPE_TOKENS,  # type: ignore
+            *parser.Parser.NESTED_TYPE_TOKENS,
             TokenType.TABLE,
         }
 
         ID_VAR_TOKENS = {
-            *parser.Parser.ID_VAR_TOKENS,  # type: ignore
+            *parser.Parser.ID_VAR_TOKENS,
             TokenType.VALUES,
         }
 
         PROPERTY_PARSERS = {
-            **parser.Parser.PROPERTY_PARSERS,  # type: ignore
+            **parser.Parser.PROPERTY_PARSERS,
             "NOT DETERMINISTIC": lambda self: self.expression(
                 exp.StabilityProperty, this=exp.Literal.string("VOLATILE")
             ),
@@ -220,19 +213,50 @@ class BigQuery(Dialect):
         }
 
         CONSTRAINT_PARSERS = {
-            **parser.Parser.CONSTRAINT_PARSERS,  # type: ignore
+            **parser.Parser.CONSTRAINT_PARSERS,
             "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()),
         }
 
+        def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
+            this = super()._parse_table_part(schema=schema)
+
+            # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names
+            if isinstance(this, exp.Identifier):
+                table_name = this.name
+                while self._match(TokenType.DASH, advance=False) and self._next:
+                    self._advance(2)
+                    table_name += f"-{self._prev.text}"
+
+                this = exp.Identifier(this=table_name, quoted=this.args.get("quoted"))
+
+            return this
+
+        def _parse_table_parts(self, schema: bool = False) -> exp.Table:
+            table = super()._parse_table_parts(schema=schema)
+            if isinstance(table.this, exp.Identifier) and "." in table.name:
+                catalog, db, this, *rest = (
+                    t.cast(t.Optional[exp.Expression], exp.to_identifier(x))
+                    for x in split_num_words(table.name, ".", 3)
+                )
+
+                if rest and this:
+                    this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest]))
+
+                table = exp.Table(this=this, db=db, catalog=catalog)
+
+            return table
+
     class Generator(generator.Generator):
         EXPLICIT_UNION = True
         INTERVAL_ALLOWS_PLURAL_FORM = False
         JOIN_HINTS = False
         TABLE_HINTS = False
         LIMIT_FETCH = "LIMIT"
+        RENAME_TABLE_WITH_DB = False
 
         TRANSFORMS = {
-            **generator.Generator.TRANSFORMS,  # type: ignore
+            **generator.Generator.TRANSFORMS,
+            exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"),
             exp.ArraySize: rename_func("ARRAY_LENGTH"),
             exp.AtTimeZone: lambda self, e: self.func(
                 "TIMESTAMP", self.func("DATETIME", e.this, e.args.get("zone"))
@@ -259,6 +283,7 @@ class BigQuery(Dialect):
             exp.TimestampAdd: _date_add_sql("TIMESTAMP", "ADD"),
             exp.TimestampSub: _date_add_sql("TIMESTAMP", "SUB"),
             exp.TimeStrToTime: timestrtotime_sql,
+            exp.TryCast: lambda self, e: f"SAFE_CAST({self.sql(e, 'this')} AS {self.sql(e, 'to')})",
             exp.TsOrDsToDate: ts_or_ds_to_date_sql("bigquery"),
             exp.TsOrDsAdd: _date_add_sql("DATE", "ADD"),
             exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
@@ -274,7 +299,7 @@ class BigQuery(Dialect):
         }
 
         TYPE_MAPPING = {
-            **generator.Generator.TYPE_MAPPING,  # type: ignore
+            **generator.Generator.TYPE_MAPPING,
             exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC",
             exp.DataType.Type.BIGINT: "INT64",
             exp.DataType.Type.BINARY: "BYTES",
@@ -297,7 +322,7 @@ class BigQuery(Dialect):
         }
 
         PROPERTIES_LOCATION = {
-            **generator.Generator.PROPERTIES_LOCATION,  # type: ignore
+            **generator.Generator.PROPERTIES_LOCATION,
             exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA,
             exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
         }
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2023-06-02 23:59:40 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2023-06-02 23:59:46 +0000
commit	20739a12c39121a9e7ad3c9a2469ec5a6876199d (patch)
tree	c000de91c59fd29b2d9beecf9f93b84e69727f37 /sqlglot/dialects/bigquery.py
parent	Releasing debian version 12.2.0-1. (diff)
download	sqlglot-20739a12c39121a9e7ad3c9a2469ec5a6876199d.tar.xz sqlglot-20739a12c39121a9e7ad3c9a2469ec5a6876199d.zip