summaryrefslogtreecommitdiffstats
path: root/sqlglot/dialects/bigquery.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2023-06-02 23:59:40 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2023-06-02 23:59:46 +0000
commit20739a12c39121a9e7ad3c9a2469ec5a6876199d (patch)
treec000de91c59fd29b2d9beecf9f93b84e69727f37 /sqlglot/dialects/bigquery.py
parentReleasing debian version 12.2.0-1. (diff)
downloadsqlglot-20739a12c39121a9e7ad3c9a2469ec5a6876199d.tar.xz
sqlglot-20739a12c39121a9e7ad3c9a2469ec5a6876199d.zip
Merging upstream version 15.0.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/dialects/bigquery.py')
-rw-r--r--sqlglot/dialects/bigquery.py97
1 files changed, 61 insertions, 36 deletions
diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py
index 9705b35..1a58337 100644
--- a/sqlglot/dialects/bigquery.py
+++ b/sqlglot/dialects/bigquery.py
@@ -1,5 +1,3 @@
-"""Supports BigQuery Standard SQL."""
-
from __future__ import annotations
import re
@@ -18,11 +16,9 @@ from sqlglot.dialects.dialect import (
timestrtotime_sql,
ts_or_ds_to_date_sql,
)
-from sqlglot.helper import seq_get
+from sqlglot.helper import seq_get, split_num_words
from sqlglot.tokens import TokenType
-E = t.TypeVar("E", bound=exp.Expression)
-
def _date_add_sql(
data_type: str, kind: str
@@ -96,19 +92,12 @@ def _unqualify_unnest(expression: exp.Expression) -> exp.Expression:
These are added by the optimizer's qualify_column step.
"""
if isinstance(expression, exp.Select):
- unnests = {
- unnest.alias
- for unnest in expression.args.get("from", exp.From(expressions=[])).expressions
- if isinstance(unnest, exp.Unnest) and unnest.alias
- }
-
- if unnests:
- expression = expression.copy()
-
- for select in expression.expressions:
- for column in select.find_all(exp.Column):
- if column.table in unnests:
- column.set("table", None)
+ for unnest in expression.find_all(exp.Unnest):
+ if isinstance(unnest.parent, (exp.From, exp.Join)) and unnest.alias:
+ for select in expression.selects:
+ for column in select.find_all(exp.Column):
+ if column.table == unnest.alias:
+ column.set("table", None)
return expression
@@ -127,16 +116,20 @@ class BigQuery(Dialect):
}
class Tokenizer(tokens.Tokenizer):
- QUOTES = [
- (prefix + quote, quote) if prefix else quote
- for quote in ["'", '"', '"""', "'''"]
- for prefix in ["", "r", "R"]
- ]
+ QUOTES = ["'", '"', '"""', "'''"]
COMMENTS = ["--", "#", ("/*", "*/")]
IDENTIFIERS = ["`"]
STRING_ESCAPES = ["\\"]
+
HEX_STRINGS = [("0x", ""), ("0X", "")]
- BYTE_STRINGS = [("b'", "'"), ("B'", "'")]
+
+ BYTE_STRINGS = [
+ (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B")
+ ]
+
+ RAW_STRINGS = [
+ (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R")
+ ]
KEYWORDS = {
**tokens.Tokenizer.KEYWORDS,
@@ -144,11 +137,11 @@ class BigQuery(Dialect):
"BEGIN": TokenType.COMMAND,
"BEGIN TRANSACTION": TokenType.BEGIN,
"CURRENT_DATETIME": TokenType.CURRENT_DATETIME,
+ "BYTES": TokenType.BINARY,
"DECLARE": TokenType.COMMAND,
- "GEOGRAPHY": TokenType.GEOGRAPHY,
"FLOAT64": TokenType.DOUBLE,
"INT64": TokenType.BIGINT,
- "BYTES": TokenType.BINARY,
+ "RECORD": TokenType.STRUCT,
"NOT DETERMINISTIC": TokenType.VOLATILE,
"UNKNOWN": TokenType.NULL,
}
@@ -161,7 +154,7 @@ class BigQuery(Dialect):
LOG_DEFAULTS_TO_LN = True
FUNCTIONS = {
- **parser.Parser.FUNCTIONS, # type: ignore
+ **parser.Parser.FUNCTIONS,
"DATE_TRUNC": lambda args: exp.DateTrunc(
unit=exp.Literal.string(str(seq_get(args, 1))),
this=seq_get(args, 0),
@@ -191,28 +184,28 @@ class BigQuery(Dialect):
}
FUNCTION_PARSERS = {
- **parser.Parser.FUNCTION_PARSERS, # type: ignore
+ **parser.Parser.FUNCTION_PARSERS,
"ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]),
}
FUNCTION_PARSERS.pop("TRIM")
NO_PAREN_FUNCTIONS = {
- **parser.Parser.NO_PAREN_FUNCTIONS, # type: ignore
+ **parser.Parser.NO_PAREN_FUNCTIONS,
TokenType.CURRENT_DATETIME: exp.CurrentDatetime,
}
NESTED_TYPE_TOKENS = {
- *parser.Parser.NESTED_TYPE_TOKENS, # type: ignore
+ *parser.Parser.NESTED_TYPE_TOKENS,
TokenType.TABLE,
}
ID_VAR_TOKENS = {
- *parser.Parser.ID_VAR_TOKENS, # type: ignore
+ *parser.Parser.ID_VAR_TOKENS,
TokenType.VALUES,
}
PROPERTY_PARSERS = {
- **parser.Parser.PROPERTY_PARSERS, # type: ignore
+ **parser.Parser.PROPERTY_PARSERS,
"NOT DETERMINISTIC": lambda self: self.expression(
exp.StabilityProperty, this=exp.Literal.string("VOLATILE")
),
@@ -220,19 +213,50 @@ class BigQuery(Dialect):
}
CONSTRAINT_PARSERS = {
- **parser.Parser.CONSTRAINT_PARSERS, # type: ignore
+ **parser.Parser.CONSTRAINT_PARSERS,
"OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()),
}
+ def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
+ this = super()._parse_table_part(schema=schema)
+
+ # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names
+ if isinstance(this, exp.Identifier):
+ table_name = this.name
+ while self._match(TokenType.DASH, advance=False) and self._next:
+ self._advance(2)
+ table_name += f"-{self._prev.text}"
+
+ this = exp.Identifier(this=table_name, quoted=this.args.get("quoted"))
+
+ return this
+
+ def _parse_table_parts(self, schema: bool = False) -> exp.Table:
+ table = super()._parse_table_parts(schema=schema)
+ if isinstance(table.this, exp.Identifier) and "." in table.name:
+ catalog, db, this, *rest = (
+ t.cast(t.Optional[exp.Expression], exp.to_identifier(x))
+ for x in split_num_words(table.name, ".", 3)
+ )
+
+ if rest and this:
+ this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest]))
+
+ table = exp.Table(this=this, db=db, catalog=catalog)
+
+ return table
+
class Generator(generator.Generator):
EXPLICIT_UNION = True
INTERVAL_ALLOWS_PLURAL_FORM = False
JOIN_HINTS = False
TABLE_HINTS = False
LIMIT_FETCH = "LIMIT"
+ RENAME_TABLE_WITH_DB = False
TRANSFORMS = {
- **generator.Generator.TRANSFORMS, # type: ignore
+ **generator.Generator.TRANSFORMS,
+ exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"),
exp.ArraySize: rename_func("ARRAY_LENGTH"),
exp.AtTimeZone: lambda self, e: self.func(
"TIMESTAMP", self.func("DATETIME", e.this, e.args.get("zone"))
@@ -259,6 +283,7 @@ class BigQuery(Dialect):
exp.TimestampAdd: _date_add_sql("TIMESTAMP", "ADD"),
exp.TimestampSub: _date_add_sql("TIMESTAMP", "SUB"),
exp.TimeStrToTime: timestrtotime_sql,
+ exp.TryCast: lambda self, e: f"SAFE_CAST({self.sql(e, 'this')} AS {self.sql(e, 'to')})",
exp.TsOrDsToDate: ts_or_ds_to_date_sql("bigquery"),
exp.TsOrDsAdd: _date_add_sql("DATE", "ADD"),
exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
@@ -274,7 +299,7 @@ class BigQuery(Dialect):
}
TYPE_MAPPING = {
- **generator.Generator.TYPE_MAPPING, # type: ignore
+ **generator.Generator.TYPE_MAPPING,
exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC",
exp.DataType.Type.BIGINT: "INT64",
exp.DataType.Type.BINARY: "BYTES",
@@ -297,7 +322,7 @@ class BigQuery(Dialect):
}
PROPERTIES_LOCATION = {
- **generator.Generator.PROPERTIES_LOCATION, # type: ignore
+ **generator.Generator.PROPERTIES_LOCATION,
exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA,
exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
}