summaryrefslogtreecommitdiffstats
path: root/sqlglot/dialects/bigquery.py
diff options
context:
space:
mode:
authorDaniel Baumann <mail@daniel-baumann.ch>2023-12-10 10:46:01 +0000
committerDaniel Baumann <mail@daniel-baumann.ch>2023-12-10 10:46:01 +0000
commit8fe30fd23dc37ec3516e530a86d1c4b604e71241 (patch)
tree6e2ebbf565b0351fd0f003f488a8339e771ad90c /sqlglot/dialects/bigquery.py
parentReleasing debian version 19.0.1-1. (diff)
downloadsqlglot-8fe30fd23dc37ec3516e530a86d1c4b604e71241.tar.xz
sqlglot-8fe30fd23dc37ec3516e530a86d1c4b604e71241.zip
Merging upstream version 20.1.0.
Signed-off-by: Daniel Baumann <mail@daniel-baumann.ch>
Diffstat (limited to 'sqlglot/dialects/bigquery.py')
-rw-r--r--sqlglot/dialects/bigquery.py117
1 files changed, 112 insertions, 5 deletions
diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py
index fc9a3ae..2a9dde9 100644
--- a/sqlglot/dialects/bigquery.py
+++ b/sqlglot/dialects/bigquery.py
@@ -8,6 +8,7 @@ from sqlglot import exp, generator, parser, tokens, transforms
from sqlglot._typing import E
from sqlglot.dialects.dialect import (
Dialect,
+ NormalizationStrategy,
arg_max_or_min_no_count,
binary_from_function,
date_add_interval_sql,
@@ -23,6 +24,7 @@ from sqlglot.dialects.dialect import (
regexp_replace_sql,
rename_func,
timestrtotime_sql,
+ ts_or_ds_add_cast,
ts_or_ds_to_date_sql,
)
from sqlglot.helper import seq_get, split_num_words
@@ -174,6 +176,44 @@ def _parse_to_hex(args: t.List) -> exp.Hex | exp.MD5:
return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.Hex(this=arg)
+def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str:
+ return self.sql(
+ exp.Exists(
+ this=exp.select("1")
+ .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"]))
+ .where(exp.column("_col").eq(expression.right))
+ )
+ )
+
+
+def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str:
+ return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression))
+
+
+def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str:
+ expression.this.replace(exp.cast(expression.this, "TIMESTAMP", copy=True))
+ expression.expression.replace(exp.cast(expression.expression, "TIMESTAMP", copy=True))
+ unit = expression.args.get("unit") or "DAY"
+ return self.func("DATE_DIFF", expression.this, expression.expression, unit)
+
+
+def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str:
+ scale = expression.args.get("scale")
+ timestamp = self.sql(expression, "this")
+ if scale in (None, exp.UnixToTime.SECONDS):
+ return f"TIMESTAMP_SECONDS({timestamp})"
+ if scale == exp.UnixToTime.MILLIS:
+ return f"TIMESTAMP_MILLIS({timestamp})"
+ if scale == exp.UnixToTime.MICROS:
+ return f"TIMESTAMP_MICROS({timestamp})"
+ if scale == exp.UnixToTime.NANOS:
+ # We need to cast to INT64 because that's what BQ expects
+ return f"TIMESTAMP_MICROS(CAST({timestamp} / 1000 AS INT64))"
+
+ self.unsupported(f"Unsupported scale for timestamp: {scale}.")
+ return ""
+
+
class BigQuery(Dialect):
UNNEST_COLUMN_ONLY = True
SUPPORTS_USER_DEFINED_TYPES = False
@@ -181,7 +221,7 @@ class BigQuery(Dialect):
LOG_BASE_FIRST = False
# https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity
- RESOLVES_IDENTIFIERS_AS_UPPERCASE = None
+ NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
# bigquery udfs are case sensitive
NORMALIZE_FUNCTIONS = False
@@ -220,8 +260,7 @@ class BigQuery(Dialect):
# https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table
PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"}
- @classmethod
- def normalize_identifier(cls, expression: E) -> E:
+ def normalize_identifier(self, expression: E) -> E:
if isinstance(expression, exp.Identifier):
parent = expression.parent
while isinstance(parent, exp.Dot):
@@ -265,7 +304,6 @@ class BigQuery(Dialect):
"DECLARE": TokenType.COMMAND,
"FLOAT64": TokenType.DOUBLE,
"FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT,
- "INT64": TokenType.BIGINT,
"MODEL": TokenType.MODEL,
"NOT DETERMINISTIC": TokenType.VOLATILE,
"RECORD": TokenType.STRUCT,
@@ -316,6 +354,15 @@ class BigQuery(Dialect):
"TIME_SUB": parse_date_delta_with_interval(exp.TimeSub),
"TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd),
"TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub),
+ "TIMESTAMP_MICROS": lambda args: exp.UnixToTime(
+ this=seq_get(args, 0), scale=exp.UnixToTime.MICROS
+ ),
+ "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime(
+ this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS
+ ),
+ "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(
+ this=seq_get(args, 0), scale=exp.UnixToTime.SECONDS
+ ),
"TO_JSON_STRING": exp.JSONFormat.from_arg_list,
}
@@ -358,6 +405,24 @@ class BigQuery(Dialect):
NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN}
+ STATEMENT_PARSERS = {
+ **parser.Parser.STATEMENT_PARSERS,
+ TokenType.END: lambda self: self._parse_as_command(self._prev),
+ TokenType.FOR: lambda self: self._parse_for_in(),
+ }
+
+ BRACKET_OFFSETS = {
+ "OFFSET": (0, False),
+ "ORDINAL": (1, False),
+ "SAFE_OFFSET": (0, True),
+ "SAFE_ORDINAL": (1, True),
+ }
+
+ def _parse_for_in(self) -> exp.ForIn:
+ this = self._parse_range()
+ self._match_text_seq("DO")
+ return self.expression(exp.ForIn, this=this, expression=self._parse_statement())
+
def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
this = super()._parse_table_part(schema=schema) or self._parse_number()
@@ -419,6 +484,26 @@ class BigQuery(Dialect):
return json_object
+ def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
+ bracket = super()._parse_bracket(this)
+
+ if this is bracket:
+ return bracket
+
+ if isinstance(bracket, exp.Bracket):
+ for expression in bracket.expressions:
+ name = expression.name.upper()
+
+ if name not in self.BRACKET_OFFSETS:
+ break
+
+ offset, safe = self.BRACKET_OFFSETS[name]
+ bracket.set("offset", offset)
+ bracket.set("safe", safe)
+ expression.replace(expression.expressions[0])
+
+ return bracket
+
class Generator(generator.Generator):
EXPLICIT_UNION = True
INTERVAL_ALLOWS_PLURAL_FORM = False
@@ -430,12 +515,14 @@ class BigQuery(Dialect):
NVL2_SUPPORTED = False
UNNEST_WITH_ORDINALITY = False
COLLATE_IS_FUNC = True
+ LIMIT_ONLY_LITERALS = True
TRANSFORMS = {
**generator.Generator.TRANSFORMS,
exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"),
exp.ArgMax: arg_max_or_min_no_count("MAX_BY"),
exp.ArgMin: arg_max_or_min_no_count("MIN_BY"),
+ exp.ArrayContains: _array_contains_sql,
exp.ArraySize: rename_func("ARRAY_LENGTH"),
exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]),
exp.CollateProperty: lambda self, e: f"DEFAULT COLLATE {self.sql(e, 'this')}"
@@ -498,10 +585,13 @@ class BigQuery(Dialect):
exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"),
exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"),
exp.TimeStrToTime: timestrtotime_sql,
+ exp.TimeToStr: lambda self, e: f"FORMAT_DATE({self.format_time(e)}, {self.sql(e, 'this')})",
exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression),
- exp.TsOrDsAdd: date_add_interval_sql("DATE", "ADD"),
+ exp.TsOrDsAdd: _ts_or_ds_add_sql,
+ exp.TsOrDsDiff: _ts_or_ds_diff_sql,
exp.TsOrDsToDate: ts_or_ds_to_date_sql("bigquery"),
exp.Unhex: rename_func("FROM_HEX"),
+ exp.UnixToTime: _unix_to_time_sql,
exp.Values: _derived_table_values_to_unnest,
exp.VariancePop: rename_func("VAR_POP"),
}
@@ -671,6 +761,23 @@ class BigQuery(Dialect):
return inline_array_sql(self, expression)
+ def bracket_sql(self, expression: exp.Bracket) -> str:
+ expressions = expression.expressions
+ expressions_sql = ", ".join(self.sql(e) for e in expressions)
+ offset = expression.args.get("offset")
+
+ if offset == 0:
+ expressions_sql = f"OFFSET({expressions_sql})"
+ elif offset == 1:
+ expressions_sql = f"ORDINAL({expressions_sql})"
+ else:
+ self.unsupported(f"Unsupported array offset: {offset}")
+
+ if expression.args.get("safe"):
+ expressions_sql = f"SAFE_{expressions_sql}"
+
+ return f"{self.sql(expression, 'this')}[{expressions_sql}]"
+
def transaction_sql(self, *_) -> str:
return "BEGIN TRANSACTION"