summaryrefslogtreecommitdiffstats
path: root/sqlglot/dialects/bigquery.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-01-23 05:06:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-01-23 05:06:14 +0000
commit38e6461a8afbd7cb83709ddb998f03d40ba87755 (patch)
tree64b68a893a3b946111b9cab69503f83ca233c335 /sqlglot/dialects/bigquery.py
parentReleasing debian version 20.4.0-1. (diff)
downloadsqlglot-38e6461a8afbd7cb83709ddb998f03d40ba87755.tar.xz
sqlglot-38e6461a8afbd7cb83709ddb998f03d40ba87755.zip
Merging upstream version 20.9.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/dialects/bigquery.py')
-rw-r--r--sqlglot/dialects/bigquery.py91
1 files changed, 75 insertions, 16 deletions
diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py
index 7a573e7..0151e6c 100644
--- a/sqlglot/dialects/bigquery.py
+++ b/sqlglot/dialects/bigquery.py
@@ -16,20 +16,22 @@ from sqlglot.dialects.dialect import (
format_time_lambda,
if_sql,
inline_array_sql,
- json_keyvalue_comma_sql,
max_or_greatest,
min_or_least,
no_ilike_sql,
parse_date_delta_with_interval,
+ path_to_jsonpath,
regexp_replace_sql,
rename_func,
timestrtotime_sql,
ts_or_ds_add_cast,
- ts_or_ds_to_date_sql,
)
from sqlglot.helper import seq_get, split_num_words
from sqlglot.tokens import TokenType
+if t.TYPE_CHECKING:
+ from typing_extensions import Literal
+
logger = logging.getLogger("sqlglot")
@@ -206,12 +208,17 @@ def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> s
return f"TIMESTAMP_MILLIS({timestamp})"
if scale == exp.UnixToTime.MICROS:
return f"TIMESTAMP_MICROS({timestamp})"
- if scale == exp.UnixToTime.NANOS:
- # We need to cast to INT64 because that's what BQ expects
- return f"TIMESTAMP_MICROS(CAST({timestamp} / 1000 AS INT64))"
- self.unsupported(f"Unsupported scale for timestamp: {scale}.")
- return ""
+ return f"TIMESTAMP_SECONDS(CAST({timestamp} / POW(10, {scale}) AS INT64))"
+
+
+def _parse_time(args: t.List) -> exp.Func:
+ if len(args) == 1:
+ return exp.TsOrDsToTime(this=args[0])
+ if len(args) == 3:
+ return exp.TimeFromParts.from_arg_list(args)
+
+ return exp.Anonymous(this="TIME", expressions=args)
class BigQuery(Dialect):
@@ -329,7 +336,13 @@ class BigQuery(Dialect):
"DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd),
"DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub),
"DIV": binary_from_function(exp.IntDiv),
+ "FORMAT_DATE": lambda args: exp.TimeToStr(
+ this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0)
+ ),
"GENERATE_ARRAY": exp.GenerateSeries.from_arg_list,
+ "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar(
+ this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$")
+ ),
"MD5": exp.MD5Digest.from_arg_list,
"TO_HEX": _parse_to_hex,
"PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")(
@@ -351,6 +364,7 @@ class BigQuery(Dialect):
this=seq_get(args, 0),
expression=seq_get(args, 1) or exp.Literal.string(","),
),
+ "TIME": _parse_time,
"TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd),
"TIME_SUB": parse_date_delta_with_interval(exp.TimeSub),
"TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd),
@@ -361,9 +375,7 @@ class BigQuery(Dialect):
"TIMESTAMP_MILLIS": lambda args: exp.UnixToTime(
this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS
),
- "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(
- this=seq_get(args, 0), scale=exp.UnixToTime.SECONDS
- ),
+ "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)),
"TO_JSON_STRING": exp.JSONFormat.from_arg_list,
}
@@ -460,7 +472,15 @@ class BigQuery(Dialect):
return table
- def _parse_json_object(self) -> exp.JSONObject:
+ @t.overload
+ def _parse_json_object(self, agg: Literal[False]) -> exp.JSONObject:
+ ...
+
+ @t.overload
+ def _parse_json_object(self, agg: Literal[True]) -> exp.JSONObjectAgg:
+ ...
+
+ def _parse_json_object(self, agg=False):
json_object = super()._parse_json_object()
array_kv_pair = seq_get(json_object.expressions, 0)
@@ -513,6 +533,10 @@ class BigQuery(Dialect):
UNNEST_WITH_ORDINALITY = False
COLLATE_IS_FUNC = True
LIMIT_ONLY_LITERALS = True
+ SUPPORTS_TABLE_ALIAS_COLUMNS = False
+ UNPIVOT_ALIASES_ARE_IDENTIFIERS = False
+ JSON_KEY_VALUE_PAIR_SEP = ","
+ NULL_ORDERING_SUPPORTED = False
TRANSFORMS = {
**generator.Generator.TRANSFORMS,
@@ -525,6 +549,7 @@ class BigQuery(Dialect):
exp.CollateProperty: lambda self, e: f"DEFAULT COLLATE {self.sql(e, 'this')}"
if e.args.get("default")
else f"COLLATE {self.sql(e, 'this')}",
+ exp.CountIf: rename_func("COUNTIF"),
exp.Create: _create_sql,
exp.CTE: transforms.preprocess([_pushdown_cte_column_names]),
exp.DateAdd: date_add_interval_sql("DATE", "ADD"),
@@ -536,13 +561,13 @@ class BigQuery(Dialect):
exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"),
exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")),
exp.GenerateSeries: rename_func("GENERATE_ARRAY"),
+ exp.GetPath: path_to_jsonpath(),
exp.GroupConcat: rename_func("STRING_AGG"),
exp.Hex: rename_func("TO_HEX"),
exp.If: if_sql(false_value="NULL"),
exp.ILike: no_ilike_sql,
exp.IntDiv: rename_func("DIV"),
exp.JSONFormat: rename_func("TO_JSON_STRING"),
- exp.JSONKeyValue: json_keyvalue_comma_sql,
exp.Max: max_or_greatest,
exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)),
exp.MD5Digest: rename_func("MD5"),
@@ -578,16 +603,17 @@ class BigQuery(Dialect):
"PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone")
),
exp.TimeAdd: date_add_interval_sql("TIME", "ADD"),
+ exp.TimeFromParts: rename_func("TIME"),
exp.TimeSub: date_add_interval_sql("TIME", "SUB"),
exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"),
exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"),
exp.TimeStrToTime: timestrtotime_sql,
- exp.TimeToStr: lambda self, e: f"FORMAT_DATE({self.format_time(e)}, {self.sql(e, 'this')})",
exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression),
exp.TsOrDsAdd: _ts_or_ds_add_sql,
exp.TsOrDsDiff: _ts_or_ds_diff_sql,
- exp.TsOrDsToDate: ts_or_ds_to_date_sql("bigquery"),
+ exp.TsOrDsToTime: rename_func("TIME"),
exp.Unhex: rename_func("FROM_HEX"),
+ exp.UnixDate: rename_func("UNIX_DATE"),
exp.UnixToTime: _unix_to_time_sql,
exp.Values: _derived_table_values_to_unnest,
exp.VariancePop: rename_func("VAR_POP"),
@@ -724,6 +750,26 @@ class BigQuery(Dialect):
"within",
}
+ def timetostr_sql(self, expression: exp.TimeToStr) -> str:
+ if isinstance(expression.this, exp.TsOrDsToDate):
+ this: exp.Expression = expression.this
+ else:
+ this = expression
+
+ return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})"
+
+ def struct_sql(self, expression: exp.Struct) -> str:
+ args = []
+ for expr in expression.expressions:
+ if isinstance(expr, self.KEY_VALUE_DEFINITIONS):
+ arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}"
+ else:
+ arg = self.sql(expr)
+
+ args.append(arg)
+
+ return self.func("STRUCT", *args)
+
def eq_sql(self, expression: exp.EQ) -> str:
# Operands of = cannot be NULL in BigQuery
if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null):
@@ -760,7 +806,20 @@ class BigQuery(Dialect):
return inline_array_sql(self, expression)
def bracket_sql(self, expression: exp.Bracket) -> str:
+ this = self.sql(expression, "this")
expressions = expression.expressions
+
+ if len(expressions) == 1:
+ arg = expressions[0]
+ if arg.type is None:
+ from sqlglot.optimizer.annotate_types import annotate_types
+
+ arg = annotate_types(arg)
+
+ if arg.type and arg.type.this in exp.DataType.TEXT_TYPES:
+ # BQ doesn't support bracket syntax with string values
+ return f"{this}.{arg.name}"
+
expressions_sql = ", ".join(self.sql(e) for e in expressions)
offset = expression.args.get("offset")
@@ -768,13 +827,13 @@ class BigQuery(Dialect):
expressions_sql = f"OFFSET({expressions_sql})"
elif offset == 1:
expressions_sql = f"ORDINAL({expressions_sql})"
- else:
+ elif offset is not None:
self.unsupported(f"Unsupported array offset: {offset}")
if expression.args.get("safe"):
expressions_sql = f"SAFE_{expressions_sql}"
- return f"{self.sql(expression, 'this')}[{expressions_sql}]"
+ return f"{this}[{expressions_sql}]"
def transaction_sql(self, *_) -> str:
return "BEGIN TRANSACTION"