diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-03-03 14:11:07 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-03-03 14:11:07 +0000 |
commit | 42a1548cecf48d18233f56e3385cf9c89abcb9c2 (patch) | |
tree | 5e0fff4ecbd1fd7dd1022a7580139038df2a824c /sqlglot/dialects/bigquery.py | |
parent | Releasing debian version 21.1.2-1. (diff) | |
download | sqlglot-42a1548cecf48d18233f56e3385cf9c89abcb9c2.tar.xz sqlglot-42a1548cecf48d18233f56e3385cf9c89abcb9c2.zip |
Merging upstream version 22.2.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/dialects/bigquery.py')
-rw-r--r-- | sqlglot/dialects/bigquery.py | 102 |
1 files changed, 70 insertions, 32 deletions
diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py index f867617..5bfc3ea 100644 --- a/sqlglot/dialects/bigquery.py +++ b/sqlglot/dialects/bigquery.py @@ -42,7 +42,10 @@ def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Va alias = expression.args.get("alias") for tup in expression.find_all(exp.Tuple): field_aliases = alias.columns if alias else (f"_c{i}" for i in range(len(tup.expressions))) - expressions = [exp.alias_(fld, name) for fld, name in zip(tup.expressions, field_aliases)] + expressions = [ + exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) + for name, fld in zip(field_aliases, tup.expressions) + ] structs.append(exp.Struct(expressions=expressions)) return self.unnest_sql(exp.Unnest(expressions=[exp.array(*structs, copy=False)])) @@ -111,6 +114,8 @@ def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: } for grouped in group.expressions: + if grouped.is_int: + continue alias = aliases.get(grouped) if alias: grouped.replace(exp.column(alias)) @@ -226,8 +231,11 @@ class BigQuery(Dialect): # bigquery udfs are case sensitive NORMALIZE_FUNCTIONS = False + # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time TIME_MAPPING = { "%D": "%m/%d/%y", + "%E*S": "%S.%f", + "%E6S": "%S.%f", } ESCAPE_SEQUENCES = { @@ -266,14 +274,20 @@ class BigQuery(Dialect): while isinstance(parent, exp.Dot): parent = parent.parent - # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). - # The following check is essentially a heuristic to detect tables based on whether or - # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. - if ( - not isinstance(parent, exp.UserDefinedFunction) - and not (isinstance(parent, exp.Table) and parent.db) - and not expression.meta.get("is_table") - ): + # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive + # by default. The following check uses a heuristic to detect tables based on whether + # they are qualified. This should generally be correct, because tables in BigQuery + # must be qualified with at least a dataset, unless @@dataset_id is set. + case_sensitive = ( + isinstance(parent, exp.UserDefinedFunction) + or ( + isinstance(parent, exp.Table) + and parent.db + and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) + ) + or expression.meta.get("is_table") + ) + if not case_sensitive: expression.set("this", expression.this.lower()) return expression @@ -302,6 +316,7 @@ class BigQuery(Dialect): "BYTES": TokenType.BINARY, "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, "DECLARE": TokenType.COMMAND, + "ELSEIF": TokenType.COMMAND, "EXCEPTION": TokenType.COMMAND, "FLOAT64": TokenType.DOUBLE, "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, @@ -315,8 +330,8 @@ class BigQuery(Dialect): class Parser(parser.Parser): PREFIXED_PIVOT_COLUMNS = True - LOG_DEFAULTS_TO_LN = True + SUPPORTS_IMPLICIT_UNNEST = True FUNCTIONS = { **parser.Parser.FUNCTIONS, @@ -410,6 +425,7 @@ class BigQuery(Dialect): STATEMENT_PARSERS = { **parser.Parser.STATEMENT_PARSERS, + TokenType.ELSE: lambda self: self._parse_as_command(self._prev), TokenType.END: lambda self: self._parse_as_command(self._prev), TokenType.FOR: lambda self: self._parse_for_in(), } @@ -433,8 +449,11 @@ class BigQuery(Dialect): if isinstance(this, exp.Identifier): table_name = this.name while self._match(TokenType.DASH, advance=False) and self._next: - self._advance(2) - table_name += f"-{self._prev.text}" + text = "" + while self._curr and self._curr.token_type != TokenType.DOT: + self._advance() + text += self._prev.text + table_name += text this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) elif isinstance(this, exp.Literal): @@ -448,12 +467,28 @@ class BigQuery(Dialect): return this def _parse_table_parts( - self, schema: bool = False, is_db_reference: bool = False + self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False ) -> exp.Table: - table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) + table = super()._parse_table_parts( + schema=schema, is_db_reference=is_db_reference, wildcard=True + ) + + # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here + if not table.catalog: + if table.db: + parts = table.db.split(".") + if len(parts) == 2 and not table.args["db"].quoted: + table.set("catalog", exp.Identifier(this=parts[0])) + table.set("db", exp.Identifier(this=parts[1])) + else: + parts = table.name.split(".") + if len(parts) == 2 and not table.this.quoted: + table.set("db", exp.Identifier(this=parts[0])) + table.set("this", exp.Identifier(this=parts[1])) + if isinstance(table.this, exp.Identifier) and "." in table.name: catalog, db, this, *rest = ( - t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) + t.cast(t.Optional[exp.Expression], exp.to_identifier(x, quoted=True)) for x in split_num_words(table.name, ".", 3) ) @@ -461,16 +496,15 @@ class BigQuery(Dialect): this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) table = exp.Table(this=this, db=db, catalog=catalog) + table.meta["quoted_table"] = True return table @t.overload - def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: - ... + def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... @t.overload - def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: - ... + def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... def _parse_json_object(self, agg=False): json_object = super()._parse_json_object() @@ -532,6 +566,7 @@ class BigQuery(Dialect): IGNORE_NULLS_IN_FUNC = True JSON_PATH_SINGLE_QUOTE_ESCAPE = True CAN_IMPLEMENT_ARRAY_ANY = True + NAMED_PLACEHOLDER_TOKEN = "@" TRANSFORMS = { **generator.Generator.TRANSFORMS, @@ -762,22 +797,25 @@ class BigQuery(Dialect): "within", } + def table_parts(self, expression: exp.Table) -> str: + # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so + # we need to make sure the correct quoting is used in each case. + # + # For example, if there is a CTE x that clashes with a schema name, then the former will + # return the table y in that schema, whereas the latter will return the CTE's y column: + # + # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join + # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest + if expression.meta.get("quoted_table"): + table_parts = ".".join(p.name for p in expression.parts) + return self.sql(exp.Identifier(this=table_parts, quoted=True)) + + return super().table_parts(expression) + def timetostr_sql(self, expression: exp.TimeToStr) -> str: this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression return self.func("FORMAT_DATE", self.format_time(expression), this.this) - def struct_sql(self, expression: exp.Struct) -> str: - args = [] - for expr in expression.expressions: - if isinstance(expr, self.KEY_VALUE_DEFINITIONS): - arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" - else: - arg = self.sql(expr) - - args.append(arg) - - return self.func("STRUCT", *args) - def eq_sql(self, expression: exp.EQ) -> str: # Operands of = cannot be NULL in BigQuery if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): @@ -803,7 +841,7 @@ class BigQuery(Dialect): def array_sql(self, expression: exp.Array) -> str: first_arg = seq_get(expression.expressions, 0) - if isinstance(first_arg, exp.Subqueryable): + if isinstance(first_arg, exp.Query): return f"ARRAY{self.wrap(self.sql(first_arg))}" return inline_array_sql(self, expression) |