summaryrefslogtreecommitdiffstats
path: root/sqlglot/dialects/bigquery.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-03-03 14:11:07 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-03-03 14:11:07 +0000
commit42a1548cecf48d18233f56e3385cf9c89abcb9c2 (patch)
tree5e0fff4ecbd1fd7dd1022a7580139038df2a824c /sqlglot/dialects/bigquery.py
parentReleasing debian version 21.1.2-1. (diff)
downloadsqlglot-42a1548cecf48d18233f56e3385cf9c89abcb9c2.tar.xz
sqlglot-42a1548cecf48d18233f56e3385cf9c89abcb9c2.zip
Merging upstream version 22.2.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/dialects/bigquery.py')
-rw-r--r--sqlglot/dialects/bigquery.py102
1 files changed, 70 insertions, 32 deletions
diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py
index f867617..5bfc3ea 100644
--- a/sqlglot/dialects/bigquery.py
+++ b/sqlglot/dialects/bigquery.py
@@ -42,7 +42,10 @@ def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Va
alias = expression.args.get("alias")
for tup in expression.find_all(exp.Tuple):
field_aliases = alias.columns if alias else (f"_c{i}" for i in range(len(tup.expressions)))
- expressions = [exp.alias_(fld, name) for fld, name in zip(tup.expressions, field_aliases)]
+ expressions = [
+ exp.PropertyEQ(this=exp.to_identifier(name), expression=fld)
+ for name, fld in zip(field_aliases, tup.expressions)
+ ]
structs.append(exp.Struct(expressions=expressions))
return self.unnest_sql(exp.Unnest(expressions=[exp.array(*structs, copy=False)]))
@@ -111,6 +114,8 @@ def _alias_ordered_group(expression: exp.Expression) -> exp.Expression:
}
for grouped in group.expressions:
+ if grouped.is_int:
+ continue
alias = aliases.get(grouped)
if alias:
grouped.replace(exp.column(alias))
@@ -226,8 +231,11 @@ class BigQuery(Dialect):
# bigquery udfs are case sensitive
NORMALIZE_FUNCTIONS = False
+ # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time
TIME_MAPPING = {
"%D": "%m/%d/%y",
+ "%E*S": "%S.%f",
+ "%E6S": "%S.%f",
}
ESCAPE_SEQUENCES = {
@@ -266,14 +274,20 @@ class BigQuery(Dialect):
while isinstance(parent, exp.Dot):
parent = parent.parent
- # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least).
- # The following check is essentially a heuristic to detect tables based on whether or
- # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive.
- if (
- not isinstance(parent, exp.UserDefinedFunction)
- and not (isinstance(parent, exp.Table) and parent.db)
- and not expression.meta.get("is_table")
- ):
+ # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive
+ # by default. The following check uses a heuristic to detect tables based on whether
+ # they are qualified. This should generally be correct, because tables in BigQuery
+ # must be qualified with at least a dataset, unless @@dataset_id is set.
+ case_sensitive = (
+ isinstance(parent, exp.UserDefinedFunction)
+ or (
+ isinstance(parent, exp.Table)
+ and parent.db
+ and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column"))
+ )
+ or expression.meta.get("is_table")
+ )
+ if not case_sensitive:
expression.set("this", expression.this.lower())
return expression
@@ -302,6 +316,7 @@ class BigQuery(Dialect):
"BYTES": TokenType.BINARY,
"CURRENT_DATETIME": TokenType.CURRENT_DATETIME,
"DECLARE": TokenType.COMMAND,
+ "ELSEIF": TokenType.COMMAND,
"EXCEPTION": TokenType.COMMAND,
"FLOAT64": TokenType.DOUBLE,
"FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT,
@@ -315,8 +330,8 @@ class BigQuery(Dialect):
class Parser(parser.Parser):
PREFIXED_PIVOT_COLUMNS = True
-
LOG_DEFAULTS_TO_LN = True
+ SUPPORTS_IMPLICIT_UNNEST = True
FUNCTIONS = {
**parser.Parser.FUNCTIONS,
@@ -410,6 +425,7 @@ class BigQuery(Dialect):
STATEMENT_PARSERS = {
**parser.Parser.STATEMENT_PARSERS,
+ TokenType.ELSE: lambda self: self._parse_as_command(self._prev),
TokenType.END: lambda self: self._parse_as_command(self._prev),
TokenType.FOR: lambda self: self._parse_for_in(),
}
@@ -433,8 +449,11 @@ class BigQuery(Dialect):
if isinstance(this, exp.Identifier):
table_name = this.name
while self._match(TokenType.DASH, advance=False) and self._next:
- self._advance(2)
- table_name += f"-{self._prev.text}"
+ text = ""
+ while self._curr and self._curr.token_type != TokenType.DOT:
+ self._advance()
+ text += self._prev.text
+ table_name += text
this = exp.Identifier(this=table_name, quoted=this.args.get("quoted"))
elif isinstance(this, exp.Literal):
@@ -448,12 +467,28 @@ class BigQuery(Dialect):
return this
def _parse_table_parts(
- self, schema: bool = False, is_db_reference: bool = False
+ self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False
) -> exp.Table:
- table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference)
+ table = super()._parse_table_parts(
+ schema=schema, is_db_reference=is_db_reference, wildcard=True
+ )
+
+ # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here
+ if not table.catalog:
+ if table.db:
+ parts = table.db.split(".")
+ if len(parts) == 2 and not table.args["db"].quoted:
+ table.set("catalog", exp.Identifier(this=parts[0]))
+ table.set("db", exp.Identifier(this=parts[1]))
+ else:
+ parts = table.name.split(".")
+ if len(parts) == 2 and not table.this.quoted:
+ table.set("db", exp.Identifier(this=parts[0]))
+ table.set("this", exp.Identifier(this=parts[1]))
+
if isinstance(table.this, exp.Identifier) and "." in table.name:
catalog, db, this, *rest = (
- t.cast(t.Optional[exp.Expression], exp.to_identifier(x))
+ t.cast(t.Optional[exp.Expression], exp.to_identifier(x, quoted=True))
for x in split_num_words(table.name, ".", 3)
)
@@ -461,16 +496,15 @@ class BigQuery(Dialect):
this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest]))
table = exp.Table(this=this, db=db, catalog=catalog)
+ table.meta["quoted_table"] = True
return table
@t.overload
- def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject:
- ...
+ def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ...
@t.overload
- def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg:
- ...
+ def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ...
def _parse_json_object(self, agg=False):
json_object = super()._parse_json_object()
@@ -532,6 +566,7 @@ class BigQuery(Dialect):
IGNORE_NULLS_IN_FUNC = True
JSON_PATH_SINGLE_QUOTE_ESCAPE = True
CAN_IMPLEMENT_ARRAY_ANY = True
+ NAMED_PLACEHOLDER_TOKEN = "@"
TRANSFORMS = {
**generator.Generator.TRANSFORMS,
@@ -762,22 +797,25 @@ class BigQuery(Dialect):
"within",
}
+ def table_parts(self, expression: exp.Table) -> str:
+ # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so
+ # we need to make sure the correct quoting is used in each case.
+ #
+ # For example, if there is a CTE x that clashes with a schema name, then the former will
+ # return the table y in that schema, whereas the latter will return the CTE's y column:
+ #
+ # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join
+ # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest
+ if expression.meta.get("quoted_table"):
+ table_parts = ".".join(p.name for p in expression.parts)
+ return self.sql(exp.Identifier(this=table_parts, quoted=True))
+
+ return super().table_parts(expression)
+
def timetostr_sql(self, expression: exp.TimeToStr) -> str:
this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression
return self.func("FORMAT_DATE", self.format_time(expression), this.this)
- def struct_sql(self, expression: exp.Struct) -> str:
- args = []
- for expr in expression.expressions:
- if isinstance(expr, self.KEY_VALUE_DEFINITIONS):
- arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}"
- else:
- arg = self.sql(expr)
-
- args.append(arg)
-
- return self.func("STRUCT", *args)
-
def eq_sql(self, expression: exp.EQ) -> str:
# Operands of = cannot be NULL in BigQuery
if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null):
@@ -803,7 +841,7 @@ class BigQuery(Dialect):
def array_sql(self, expression: exp.Array) -> str:
first_arg = seq_get(expression.expressions, 0)
- if isinstance(first_arg, exp.Subqueryable):
+ if isinstance(first_arg, exp.Query):
return f"ARRAY{self.wrap(self.sql(first_arg))}"
return inline_array_sql(self, expression)