diff options
Diffstat (limited to 'sqlglot/dialects/bigquery.py')
-rw-r--r-- | sqlglot/dialects/bigquery.py | 91 |
1 files changed, 75 insertions, 16 deletions
diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py index 7a573e7..0151e6c 100644 --- a/sqlglot/dialects/bigquery.py +++ b/sqlglot/dialects/bigquery.py @@ -16,20 +16,22 @@ from sqlglot.dialects.dialect import ( format_time_lambda, if_sql, inline_array_sql, - json_keyvalue_comma_sql, max_or_greatest, min_or_least, no_ilike_sql, parse_date_delta_with_interval, + path_to_jsonpath, regexp_replace_sql, rename_func, timestrtotime_sql, ts_or_ds_add_cast, - ts_or_ds_to_date_sql, ) from sqlglot.helper import seq_get, split_num_words from sqlglot.tokens import TokenType +if t.TYPE_CHECKING: + from typing_extensions import Literal + logger = logging.getLogger("sqlglot") @@ -206,12 +208,17 @@ def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> s return f"TIMESTAMP_MILLIS({timestamp})" if scale == exp.UnixToTime.MICROS: return f"TIMESTAMP_MICROS({timestamp})" - if scale == exp.UnixToTime.NANOS: - # We need to cast to INT64 because that's what BQ expects - return f"TIMESTAMP_MICROS(CAST({timestamp} / 1000 AS INT64))" - self.unsupported(f"Unsupported scale for timestamp: {scale}.") - return "" + return f"TIMESTAMP_SECONDS(CAST({timestamp} / POW(10, {scale}) AS INT64))" + + +def _parse_time(args: t.List) -> exp.Func: + if len(args) == 1: + return exp.TsOrDsToTime(this=args[0]) + if len(args) == 3: + return exp.TimeFromParts.from_arg_list(args) + + return exp.Anonymous(this="TIME", expressions=args) class BigQuery(Dialect): @@ -329,7 +336,13 @@ class BigQuery(Dialect): "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), "DIV": binary_from_function(exp.IntDiv), + "FORMAT_DATE": lambda args: exp.TimeToStr( + this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) + ), "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, + "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( + this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") + ), "MD5": exp.MD5Digest.from_arg_list, "TO_HEX": _parse_to_hex, "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( @@ -351,6 +364,7 @@ class BigQuery(Dialect): this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string(","), ), + "TIME": _parse_time, "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), @@ -361,9 +375,7 @@ class BigQuery(Dialect): "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS ), - "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime( - this=seq_get(args, 0), scale=exp.UnixToTime.SECONDS - ), + "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), "TO_JSON_STRING": exp.JSONFormat.from_arg_list, } @@ -460,7 +472,15 @@ class BigQuery(Dialect): return table - def _parse_json_object(self) -> exp.JSONObject: + @t.overload + def _parse_json_object(self, agg: Literal[False]) -> exp.JSONObject: + ... + + @t.overload + def _parse_json_object(self, agg: Literal[True]) -> exp.JSONObjectAgg: + ... + + def _parse_json_object(self, agg=False): json_object = super()._parse_json_object() array_kv_pair = seq_get(json_object.expressions, 0) @@ -513,6 +533,10 @@ class BigQuery(Dialect): UNNEST_WITH_ORDINALITY = False COLLATE_IS_FUNC = True LIMIT_ONLY_LITERALS = True + SUPPORTS_TABLE_ALIAS_COLUMNS = False + UNPIVOT_ALIASES_ARE_IDENTIFIERS = False + JSON_KEY_VALUE_PAIR_SEP = "," + NULL_ORDERING_SUPPORTED = False TRANSFORMS = { **generator.Generator.TRANSFORMS, @@ -525,6 +549,7 @@ class BigQuery(Dialect): exp.CollateProperty: lambda self, e: f"DEFAULT COLLATE {self.sql(e, 'this')}" if e.args.get("default") else f"COLLATE {self.sql(e, 'this')}", + exp.CountIf: rename_func("COUNTIF"), exp.Create: _create_sql, exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), exp.DateAdd: date_add_interval_sql("DATE", "ADD"), @@ -536,13 +561,13 @@ class BigQuery(Dialect): exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), exp.GenerateSeries: rename_func("GENERATE_ARRAY"), + exp.GetPath: path_to_jsonpath(), exp.GroupConcat: rename_func("STRING_AGG"), exp.Hex: rename_func("TO_HEX"), exp.If: if_sql(false_value="NULL"), exp.ILike: no_ilike_sql, exp.IntDiv: rename_func("DIV"), exp.JSONFormat: rename_func("TO_JSON_STRING"), - exp.JSONKeyValue: json_keyvalue_comma_sql, exp.Max: max_or_greatest, exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), exp.MD5Digest: rename_func("MD5"), @@ -578,16 +603,17 @@ class BigQuery(Dialect): "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") ), exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), + exp.TimeFromParts: rename_func("TIME"), exp.TimeSub: date_add_interval_sql("TIME", "SUB"), exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), exp.TimeStrToTime: timestrtotime_sql, - exp.TimeToStr: lambda self, e: f"FORMAT_DATE({self.format_time(e)}, {self.sql(e, 'this')})", exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), exp.TsOrDsAdd: _ts_or_ds_add_sql, exp.TsOrDsDiff: _ts_or_ds_diff_sql, - exp.TsOrDsToDate: ts_or_ds_to_date_sql("bigquery"), + exp.TsOrDsToTime: rename_func("TIME"), exp.Unhex: rename_func("FROM_HEX"), + exp.UnixDate: rename_func("UNIX_DATE"), exp.UnixToTime: _unix_to_time_sql, exp.Values: _derived_table_values_to_unnest, exp.VariancePop: rename_func("VAR_POP"), @@ -724,6 +750,26 @@ class BigQuery(Dialect): "within", } + def timetostr_sql(self, expression: exp.TimeToStr) -> str: + if isinstance(expression.this, exp.TsOrDsToDate): + this: exp.Expression = expression.this + else: + this = expression + + return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" + + def struct_sql(self, expression: exp.Struct) -> str: + args = [] + for expr in expression.expressions: + if isinstance(expr, self.KEY_VALUE_DEFINITIONS): + arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" + else: + arg = self.sql(expr) + + args.append(arg) + + return self.func("STRUCT", *args) + def eq_sql(self, expression: exp.EQ) -> str: # Operands of = cannot be NULL in BigQuery if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): @@ -760,7 +806,20 @@ class BigQuery(Dialect): return inline_array_sql(self, expression) def bracket_sql(self, expression: exp.Bracket) -> str: + this = self.sql(expression, "this") expressions = expression.expressions + + if len(expressions) == 1: + arg = expressions[0] + if arg.type is None: + from sqlglot.optimizer.annotate_types import annotate_types + + arg = annotate_types(arg) + + if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: + # BQ doesn't support bracket syntax with string values + return f"{this}.{arg.name}" + expressions_sql = ", ".join(self.sql(e) for e in expressions) offset = expression.args.get("offset") @@ -768,13 +827,13 @@ class BigQuery(Dialect): expressions_sql = f"OFFSET({expressions_sql})" elif offset == 1: expressions_sql = f"ORDINAL({expressions_sql})" - else: + elif offset is not None: self.unsupported(f"Unsupported array offset: {offset}") if expression.args.get("safe"): expressions_sql = f"SAFE_{expressions_sql}" - return f"{self.sql(expression, 'this')}[{expressions_sql}]" + return f"{this}[{expressions_sql}]" def transaction_sql(self, *_) -> str: return "BEGIN TRANSACTION" |