Edit on GitHub

sqlglot.dialects.bigquery

View Source

  1from __future__ import annotations
  2
  3import re
  4import typing as t
  5
  6from sqlglot import exp, generator, parser, tokens, transforms
  7from sqlglot._typing import E
  8from sqlglot.dialects.dialect import (
  9    Dialect,
 10    datestrtodate_sql,
 11    format_time_lambda,
 12    inline_array_sql,
 13    max_or_greatest,
 14    min_or_least,
 15    no_ilike_sql,
 16    parse_date_delta_with_interval,
 17    rename_func,
 18    timestrtotime_sql,
 19    ts_or_ds_to_date_sql,
 20)
 21from sqlglot.helper import seq_get, split_num_words
 22from sqlglot.tokens import TokenType
 23
 24
 25def _date_add_sql(
 26    data_type: str, kind: str
 27) -> t.Callable[[generator.Generator, exp.Expression], str]:
 28    def func(self, expression):
 29        this = self.sql(expression, "this")
 30        unit = expression.args.get("unit")
 31        unit = exp.var(unit.name.upper() if unit else "DAY")
 32        interval = exp.Interval(this=expression.expression, unit=unit)
 33        return f"{data_type}_{kind}({this}, {self.sql(interval)})"
 34
 35    return func
 36
 37
 38def _derived_table_values_to_unnest(self: generator.Generator, expression: exp.Values) -> str:
 39    if not isinstance(expression.unnest().parent, exp.From):
 40        return self.values_sql(expression)
 41
 42    alias = expression.args.get("alias")
 43
 44    structs = [
 45        exp.Struct(
 46            expressions=[
 47                exp.alias_(value, column_name)
 48                for value, column_name in zip(
 49                    t.expressions,
 50                    alias.columns
 51                    if alias and alias.columns
 52                    else (f"_c{i}" for i in range(len(t.expressions))),
 53                )
 54            ]
 55        )
 56        for t in expression.find_all(exp.Tuple)
 57    ]
 58
 59    return self.unnest_sql(exp.Unnest(expressions=[exp.Array(expressions=structs)]))
 60
 61
 62def _returnsproperty_sql(self: generator.Generator, expression: exp.ReturnsProperty) -> str:
 63    this = expression.this
 64    if isinstance(this, exp.Schema):
 65        this = f"{this.this} <{self.expressions(this)}>"
 66    else:
 67        this = self.sql(this)
 68    return f"RETURNS {this}"
 69
 70
 71def _create_sql(self: generator.Generator, expression: exp.Create) -> str:
 72    kind = expression.args["kind"]
 73    returns = expression.find(exp.ReturnsProperty)
 74    if kind.upper() == "FUNCTION" and returns and returns.args.get("is_table"):
 75        expression = expression.copy()
 76        expression.set("kind", "TABLE FUNCTION")
 77        if isinstance(
 78            expression.expression,
 79            (
 80                exp.Subquery,
 81                exp.Literal,
 82            ),
 83        ):
 84            expression.set("expression", expression.expression.this)
 85
 86        return self.create_sql(expression)
 87
 88    return self.create_sql(expression)
 89
 90
 91def _unqualify_unnest(expression: exp.Expression) -> exp.Expression:
 92    """Remove references to unnest table aliases since bigquery doesn't allow them.
 93
 94    These are added by the optimizer's qualify_column step.
 95    """
 96    if isinstance(expression, exp.Select):
 97        for unnest in expression.find_all(exp.Unnest):
 98            if isinstance(unnest.parent, (exp.From, exp.Join)) and unnest.alias:
 99                for select in expression.selects:
100                    for column in select.find_all(exp.Column):
101                        if column.table == unnest.alias:
102                            column.set("table", None)
103
104    return expression
105
106
107class BigQuery(Dialect):
108    UNNEST_COLUMN_ONLY = True
109
110    # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity
111    RESOLVES_IDENTIFIERS_AS_UPPERCASE = None
112
113    TIME_MAPPING = {
114        "%D": "%m/%d/%y",
115    }
116
117    FORMAT_MAPPING = {
118        "DD": "%d",
119        "MM": "%m",
120        "MON": "%b",
121        "MONTH": "%B",
122        "YYYY": "%Y",
123        "YY": "%y",
124        "HH": "%I",
125        "HH12": "%I",
126        "HH24": "%H",
127        "MI": "%M",
128        "SS": "%S",
129        "SSSSS": "%f",
130        "TZH": "%z",
131    }
132
133    @classmethod
134    def normalize_identifier(cls, expression: E) -> E:
135        # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least).
136        # The following check is essentially a heuristic to detect tables based on whether or
137        # not they're qualified.
138        if (
139            isinstance(expression, exp.Identifier)
140            and not (isinstance(expression.parent, exp.Table) and expression.parent.db)
141            and not expression.meta.get("is_table")
142        ):
143            expression.set("this", expression.this.lower())
144
145        return expression
146
147    class Tokenizer(tokens.Tokenizer):
148        QUOTES = ["'", '"', '"""', "'''"]
149        COMMENTS = ["--", "#", ("/*", "*/")]
150        IDENTIFIERS = ["`"]
151        STRING_ESCAPES = ["\\"]
152
153        HEX_STRINGS = [("0x", ""), ("0X", "")]
154
155        BYTE_STRINGS = [
156            (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B")
157        ]
158
159        RAW_STRINGS = [
160            (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R")
161        ]
162
163        KEYWORDS = {
164            **tokens.Tokenizer.KEYWORDS,
165            "ANY TYPE": TokenType.VARIANT,
166            "BEGIN": TokenType.COMMAND,
167            "BEGIN TRANSACTION": TokenType.BEGIN,
168            "CURRENT_DATETIME": TokenType.CURRENT_DATETIME,
169            "BYTES": TokenType.BINARY,
170            "DECLARE": TokenType.COMMAND,
171            "FLOAT64": TokenType.DOUBLE,
172            "INT64": TokenType.BIGINT,
173            "RECORD": TokenType.STRUCT,
174            "TIMESTAMP": TokenType.TIMESTAMPTZ,
175            "NOT DETERMINISTIC": TokenType.VOLATILE,
176            "UNKNOWN": TokenType.NULL,
177        }
178        KEYWORDS.pop("DIV")
179
180    class Parser(parser.Parser):
181        PREFIXED_PIVOT_COLUMNS = True
182
183        LOG_BASE_FIRST = False
184        LOG_DEFAULTS_TO_LN = True
185
186        FUNCTIONS = {
187            **parser.Parser.FUNCTIONS,
188            "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd),
189            "DATE_SUB": parse_date_delta_with_interval(exp.DateSub),
190            "DATE_TRUNC": lambda args: exp.DateTrunc(
191                unit=exp.Literal.string(str(seq_get(args, 1))),
192                this=seq_get(args, 0),
193            ),
194            "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd),
195            "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub),
196            "DIV": lambda args: exp.IntDiv(this=seq_get(args, 0), expression=seq_get(args, 1)),
197            "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")(
198                [seq_get(args, 1), seq_get(args, 0)]
199            ),
200            "PARSE_TIMESTAMP": lambda args: format_time_lambda(exp.StrToTime, "bigquery")(
201                [seq_get(args, 1), seq_get(args, 0)]
202            ),
203            "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list,
204            "REGEXP_EXTRACT": lambda args: exp.RegexpExtract(
205                this=seq_get(args, 0),
206                expression=seq_get(args, 1),
207                position=seq_get(args, 2),
208                occurrence=seq_get(args, 3),
209                group=exp.Literal.number(1)
210                if re.compile(str(seq_get(args, 1))).groups == 1
211                else None,
212            ),
213            "SPLIT": lambda args: exp.Split(
214                # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split
215                this=seq_get(args, 0),
216                expression=seq_get(args, 1) or exp.Literal.string(","),
217            ),
218            "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd),
219            "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub),
220            "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd),
221            "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub),
222        }
223
224        FUNCTION_PARSERS = {
225            **parser.Parser.FUNCTION_PARSERS,
226            "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]),
227        }
228        FUNCTION_PARSERS.pop("TRIM")
229
230        NO_PAREN_FUNCTIONS = {
231            **parser.Parser.NO_PAREN_FUNCTIONS,
232            TokenType.CURRENT_DATETIME: exp.CurrentDatetime,
233        }
234
235        NESTED_TYPE_TOKENS = {
236            *parser.Parser.NESTED_TYPE_TOKENS,
237            TokenType.TABLE,
238        }
239
240        ID_VAR_TOKENS = {
241            *parser.Parser.ID_VAR_TOKENS,
242            TokenType.VALUES,
243        }
244
245        PROPERTY_PARSERS = {
246            **parser.Parser.PROPERTY_PARSERS,
247            "NOT DETERMINISTIC": lambda self: self.expression(
248                exp.StabilityProperty, this=exp.Literal.string("VOLATILE")
249            ),
250            "OPTIONS": lambda self: self._parse_with_property(),
251        }
252
253        CONSTRAINT_PARSERS = {
254            **parser.Parser.CONSTRAINT_PARSERS,
255            "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()),
256        }
257
258        def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
259            this = super()._parse_table_part(schema=schema)
260
261            # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names
262            if isinstance(this, exp.Identifier):
263                table_name = this.name
264                while self._match(TokenType.DASH, advance=False) and self._next:
265                    self._advance(2)
266                    table_name += f"-{self._prev.text}"
267
268                this = exp.Identifier(this=table_name, quoted=this.args.get("quoted"))
269
270            return this
271
272        def _parse_table_parts(self, schema: bool = False) -> exp.Table:
273            table = super()._parse_table_parts(schema=schema)
274            if isinstance(table.this, exp.Identifier) and "." in table.name:
275                catalog, db, this, *rest = (
276                    t.cast(t.Optional[exp.Expression], exp.to_identifier(x))
277                    for x in split_num_words(table.name, ".", 3)
278                )
279
280                if rest and this:
281                    this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest]))
282
283                table = exp.Table(this=this, db=db, catalog=catalog)
284
285            return table
286
287    class Generator(generator.Generator):
288        EXPLICIT_UNION = True
289        INTERVAL_ALLOWS_PLURAL_FORM = False
290        JOIN_HINTS = False
291        TABLE_HINTS = False
292        LIMIT_FETCH = "LIMIT"
293        RENAME_TABLE_WITH_DB = False
294
295        TRANSFORMS = {
296            **generator.Generator.TRANSFORMS,
297            exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"),
298            exp.ArraySize: rename_func("ARRAY_LENGTH"),
299            exp.AtTimeZone: lambda self, e: self.func(
300                "TIMESTAMP", self.func("DATETIME", e.this, e.args.get("zone"))
301            ),
302            exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]),
303            exp.DateAdd: _date_add_sql("DATE", "ADD"),
304            exp.DateSub: _date_add_sql("DATE", "SUB"),
305            exp.DatetimeAdd: _date_add_sql("DATETIME", "ADD"),
306            exp.DatetimeSub: _date_add_sql("DATETIME", "SUB"),
307            exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})",
308            exp.DateStrToDate: datestrtodate_sql,
309            exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")),
310            exp.GroupConcat: rename_func("STRING_AGG"),
311            exp.ILike: no_ilike_sql,
312            exp.IntDiv: rename_func("DIV"),
313            exp.Max: max_or_greatest,
314            exp.Min: min_or_least,
315            exp.RegexpExtract: lambda self, e: self.func(
316                "REGEXP_EXTRACT",
317                e.this,
318                e.expression,
319                e.args.get("position"),
320                e.args.get("occurrence"),
321            ),
322            exp.RegexpLike: rename_func("REGEXP_CONTAINS"),
323            exp.Select: transforms.preprocess(
324                [_unqualify_unnest, transforms.eliminate_distinct_on]
325            ),
326            exp.StrToDate: lambda self, e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})",
327            exp.StrToTime: lambda self, e: f"PARSE_TIMESTAMP({self.format_time(e)}, {self.sql(e, 'this')})",
328            exp.TimeAdd: _date_add_sql("TIME", "ADD"),
329            exp.TimeSub: _date_add_sql("TIME", "SUB"),
330            exp.TimestampAdd: _date_add_sql("TIMESTAMP", "ADD"),
331            exp.TimestampSub: _date_add_sql("TIMESTAMP", "SUB"),
332            exp.TimeStrToTime: timestrtotime_sql,
333            exp.TryCast: lambda self, e: f"SAFE_CAST({self.sql(e, 'this')} AS {self.sql(e, 'to')})",
334            exp.TsOrDsToDate: ts_or_ds_to_date_sql("bigquery"),
335            exp.TsOrDsAdd: _date_add_sql("DATE", "ADD"),
336            exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
337            exp.VariancePop: rename_func("VAR_POP"),
338            exp.Values: _derived_table_values_to_unnest,
339            exp.ReturnsProperty: _returnsproperty_sql,
340            exp.Create: _create_sql,
341            exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression),
342            exp.StabilityProperty: lambda self, e: f"DETERMINISTIC"
343            if e.name == "IMMUTABLE"
344            else "NOT DETERMINISTIC",
345        }
346
347        TYPE_MAPPING = {
348            **generator.Generator.TYPE_MAPPING,
349            exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC",
350            exp.DataType.Type.BIGINT: "INT64",
351            exp.DataType.Type.BINARY: "BYTES",
352            exp.DataType.Type.BOOLEAN: "BOOL",
353            exp.DataType.Type.CHAR: "STRING",
354            exp.DataType.Type.DECIMAL: "NUMERIC",
355            exp.DataType.Type.DOUBLE: "FLOAT64",
356            exp.DataType.Type.FLOAT: "FLOAT64",
357            exp.DataType.Type.INT: "INT64",
358            exp.DataType.Type.NCHAR: "STRING",
359            exp.DataType.Type.NVARCHAR: "STRING",
360            exp.DataType.Type.SMALLINT: "INT64",
361            exp.DataType.Type.TEXT: "STRING",
362            exp.DataType.Type.TIMESTAMP: "DATETIME",
363            exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP",
364            exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP",
365            exp.DataType.Type.TINYINT: "INT64",
366            exp.DataType.Type.VARBINARY: "BYTES",
367            exp.DataType.Type.VARCHAR: "STRING",
368            exp.DataType.Type.VARIANT: "ANY TYPE",
369        }
370
371        PROPERTIES_LOCATION = {
372            **generator.Generator.PROPERTIES_LOCATION,
373            exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA,
374            exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
375        }
376
377        RESERVED_KEYWORDS = {*generator.Generator.RESERVED_KEYWORDS, "hash"}
378
379        def array_sql(self, expression: exp.Array) -> str:
380            first_arg = seq_get(expression.expressions, 0)
381            if isinstance(first_arg, exp.Subqueryable):
382                return f"ARRAY{self.wrap(self.sql(first_arg))}"
383
384            return inline_array_sql(self, expression)
385
386        def transaction_sql(self, *_) -> str:
387            return "BEGIN TRANSACTION"
388
389        def commit_sql(self, *_) -> str:
390            return "COMMIT TRANSACTION"
391
392        def rollback_sql(self, *_) -> str:
393            return "ROLLBACK TRANSACTION"
394
395        def in_unnest_op(self, expression: exp.Unnest) -> str:
396            return self.sql(expression)
397
398        def except_op(self, expression: exp.Except) -> str:
399            if not expression.args.get("distinct", False):
400                self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery")
401            return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}"
402
403        def intersect_op(self, expression: exp.Intersect) -> str:
404            if not expression.args.get("distinct", False):
405                self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery")
406            return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}"
407
408        def with_properties(self, properties: exp.Properties) -> str:
409            return self.properties(properties, prefix=self.seg("OPTIONS"))

class BigQuery(sqlglot.dialects.dialect.Dialect): View Source

108class BigQuery(Dialect):
109    UNNEST_COLUMN_ONLY = True
110
111    # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity
112    RESOLVES_IDENTIFIERS_AS_UPPERCASE = None
113
114    TIME_MAPPING = {
115        "%D": "%m/%d/%y",
116    }
117
118    FORMAT_MAPPING = {
119        "DD": "%d",
120        "MM": "%m",
121        "MON": "%b",
122        "MONTH": "%B",
123        "YYYY": "%Y",
124        "YY": "%y",
125        "HH": "%I",
126        "HH12": "%I",
127        "HH24": "%H",
128        "MI": "%M",
129        "SS": "%S",
130        "SSSSS": "%f",
131        "TZH": "%z",
132    }
133
134    @classmethod
135    def normalize_identifier(cls, expression: E) -> E:
136        # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least).
137        # The following check is essentially a heuristic to detect tables based on whether or
138        # not they're qualified.
139        if (
140            isinstance(expression, exp.Identifier)
141            and not (isinstance(expression.parent, exp.Table) and expression.parent.db)
142            and not expression.meta.get("is_table")
143        ):
144            expression.set("this", expression.this.lower())
145
146        return expression
147
148    class Tokenizer(tokens.Tokenizer):
149        QUOTES = ["'", '"', '"""', "'''"]
150        COMMENTS = ["--", "#", ("/*", "*/")]
151        IDENTIFIERS = ["`"]
152        STRING_ESCAPES = ["\\"]
153
154        HEX_STRINGS = [("0x", ""), ("0X", "")]
155
156        BYTE_STRINGS = [
157            (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B")
158        ]
159
160        RAW_STRINGS = [
161            (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R")
162        ]
163
164        KEYWORDS = {
165            **tokens.Tokenizer.KEYWORDS,
166            "ANY TYPE": TokenType.VARIANT,
167            "BEGIN": TokenType.COMMAND,
168            "BEGIN TRANSACTION": TokenType.BEGIN,
169            "CURRENT_DATETIME": TokenType.CURRENT_DATETIME,
170            "BYTES": TokenType.BINARY,
171            "DECLARE": TokenType.COMMAND,
172            "FLOAT64": TokenType.DOUBLE,
173            "INT64": TokenType.BIGINT,
174            "RECORD": TokenType.STRUCT,
175            "TIMESTAMP": TokenType.TIMESTAMPTZ,
176            "NOT DETERMINISTIC": TokenType.VOLATILE,
177            "UNKNOWN": TokenType.NULL,
178        }
179        KEYWORDS.pop("DIV")
180
181    class Parser(parser.Parser):
182        PREFIXED_PIVOT_COLUMNS = True
183
184        LOG_BASE_FIRST = False
185        LOG_DEFAULTS_TO_LN = True
186
187        FUNCTIONS = {
188            **parser.Parser.FUNCTIONS,
189            "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd),
190            "DATE_SUB": parse_date_delta_with_interval(exp.DateSub),
191            "DATE_TRUNC": lambda args: exp.DateTrunc(
192                unit=exp.Literal.string(str(seq_get(args, 1))),
193                this=seq_get(args, 0),
194            ),
195            "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd),
196            "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub),
197            "DIV": lambda args: exp.IntDiv(this=seq_get(args, 0), expression=seq_get(args, 1)),
198            "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")(
199                [seq_get(args, 1), seq_get(args, 0)]
200            ),
201            "PARSE_TIMESTAMP": lambda args: format_time_lambda(exp.StrToTime, "bigquery")(
202                [seq_get(args, 1), seq_get(args, 0)]
203            ),
204            "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list,
205            "REGEXP_EXTRACT": lambda args: exp.RegexpExtract(
206                this=seq_get(args, 0),
207                expression=seq_get(args, 1),
208                position=seq_get(args, 2),
209                occurrence=seq_get(args, 3),
210                group=exp.Literal.number(1)
211                if re.compile(str(seq_get(args, 1))).groups == 1
212                else None,
213            ),
214            "SPLIT": lambda args: exp.Split(
215                # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split
216                this=seq_get(args, 0),
217                expression=seq_get(args, 1) or exp.Literal.string(","),
218            ),
219            "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd),
220            "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub),
221            "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd),
222            "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub),
223        }
224
225        FUNCTION_PARSERS = {
226            **parser.Parser.FUNCTION_PARSERS,
227            "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]),
228        }
229        FUNCTION_PARSERS.pop("TRIM")
230
231        NO_PAREN_FUNCTIONS = {
232            **parser.Parser.NO_PAREN_FUNCTIONS,
233            TokenType.CURRENT_DATETIME: exp.CurrentDatetime,
234        }
235
236        NESTED_TYPE_TOKENS = {
237            *parser.Parser.NESTED_TYPE_TOKENS,
238            TokenType.TABLE,
239        }
240
241        ID_VAR_TOKENS = {
242            *parser.Parser.ID_VAR_TOKENS,
243            TokenType.VALUES,
244        }
245
246        PROPERTY_PARSERS = {
247            **parser.Parser.PROPERTY_PARSERS,
248            "NOT DETERMINISTIC": lambda self: self.expression(
249                exp.StabilityProperty, this=exp.Literal.string("VOLATILE")
250            ),
251            "OPTIONS": lambda self: self._parse_with_property(),
252        }
253
254        CONSTRAINT_PARSERS = {
255            **parser.Parser.CONSTRAINT_PARSERS,
256            "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()),
257        }
258
259        def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
260            this = super()._parse_table_part(schema=schema)
261
262            # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names
263            if isinstance(this, exp.Identifier):
264                table_name = this.name
265                while self._match(TokenType.DASH, advance=False) and self._next:
266                    self._advance(2)
267                    table_name += f"-{self._prev.text}"
268
269                this = exp.Identifier(this=table_name, quoted=this.args.get("quoted"))
270
271            return this
272
273        def _parse_table_parts(self, schema: bool = False) -> exp.Table:
274            table = super()._parse_table_parts(schema=schema)
275            if isinstance(table.this, exp.Identifier) and "." in table.name:
276                catalog, db, this, *rest = (
277                    t.cast(t.Optional[exp.Expression], exp.to_identifier(x))
278                    for x in split_num_words(table.name, ".", 3)
279                )
280
281                if rest and this:
282                    this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest]))
283
284                table = exp.Table(this=this, db=db, catalog=catalog)
285
286            return table
287
288    class Generator(generator.Generator):
289        EXPLICIT_UNION = True
290        INTERVAL_ALLOWS_PLURAL_FORM = False
291        JOIN_HINTS = False
292        TABLE_HINTS = False
293        LIMIT_FETCH = "LIMIT"
294        RENAME_TABLE_WITH_DB = False
295
296        TRANSFORMS = {
297            **generator.Generator.TRANSFORMS,
298            exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"),
299            exp.ArraySize: rename_func("ARRAY_LENGTH"),
300            exp.AtTimeZone: lambda self, e: self.func(
301                "TIMESTAMP", self.func("DATETIME", e.this, e.args.get("zone"))
302            ),
303            exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]),
304            exp.DateAdd: _date_add_sql("DATE", "ADD"),
305            exp.DateSub: _date_add_sql("DATE", "SUB"),
306            exp.DatetimeAdd: _date_add_sql("DATETIME", "ADD"),
307            exp.DatetimeSub: _date_add_sql("DATETIME", "SUB"),
308            exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})",
309            exp.DateStrToDate: datestrtodate_sql,
310            exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")),
311            exp.GroupConcat: rename_func("STRING_AGG"),
312            exp.ILike: no_ilike_sql,
313            exp.IntDiv: rename_func("DIV"),
314            exp.Max: max_or_greatest,
315            exp.Min: min_or_least,
316            exp.RegexpExtract: lambda self, e: self.func(
317                "REGEXP_EXTRACT",
318                e.this,
319                e.expression,
320                e.args.get("position"),
321                e.args.get("occurrence"),
322            ),
323            exp.RegexpLike: rename_func("REGEXP_CONTAINS"),
324            exp.Select: transforms.preprocess(
325                [_unqualify_unnest, transforms.eliminate_distinct_on]
326            ),
327            exp.StrToDate: lambda self, e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})",
328            exp.StrToTime: lambda self, e: f"PARSE_TIMESTAMP({self.format_time(e)}, {self.sql(e, 'this')})",
329            exp.TimeAdd: _date_add_sql("TIME", "ADD"),
330            exp.TimeSub: _date_add_sql("TIME", "SUB"),
331            exp.TimestampAdd: _date_add_sql("TIMESTAMP", "ADD"),
332            exp.TimestampSub: _date_add_sql("TIMESTAMP", "SUB"),
333            exp.TimeStrToTime: timestrtotime_sql,
334            exp.TryCast: lambda self, e: f"SAFE_CAST({self.sql(e, 'this')} AS {self.sql(e, 'to')})",
335            exp.TsOrDsToDate: ts_or_ds_to_date_sql("bigquery"),
336            exp.TsOrDsAdd: _date_add_sql("DATE", "ADD"),
337            exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
338            exp.VariancePop: rename_func("VAR_POP"),
339            exp.Values: _derived_table_values_to_unnest,
340            exp.ReturnsProperty: _returnsproperty_sql,
341            exp.Create: _create_sql,
342            exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression),
343            exp.StabilityProperty: lambda self, e: f"DETERMINISTIC"
344            if e.name == "IMMUTABLE"
345            else "NOT DETERMINISTIC",
346        }
347
348        TYPE_MAPPING = {
349            **generator.Generator.TYPE_MAPPING,
350            exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC",
351            exp.DataType.Type.BIGINT: "INT64",
352            exp.DataType.Type.BINARY: "BYTES",
353            exp.DataType.Type.BOOLEAN: "BOOL",
354            exp.DataType.Type.CHAR: "STRING",
355            exp.DataType.Type.DECIMAL: "NUMERIC",
356            exp.DataType.Type.DOUBLE: "FLOAT64",
357            exp.DataType.Type.FLOAT: "FLOAT64",
358            exp.DataType.Type.INT: "INT64",
359            exp.DataType.Type.NCHAR: "STRING",
360            exp.DataType.Type.NVARCHAR: "STRING",
361            exp.DataType.Type.SMALLINT: "INT64",
362            exp.DataType.Type.TEXT: "STRING",
363            exp.DataType.Type.TIMESTAMP: "DATETIME",
364            exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP",
365            exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP",
366            exp.DataType.Type.TINYINT: "INT64",
367            exp.DataType.Type.VARBINARY: "BYTES",
368            exp.DataType.Type.VARCHAR: "STRING",
369            exp.DataType.Type.VARIANT: "ANY TYPE",
370        }
371
372        PROPERTIES_LOCATION = {
373            **generator.Generator.PROPERTIES_LOCATION,
374            exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA,
375            exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
376        }
377
378        RESERVED_KEYWORDS = {*generator.Generator.RESERVED_KEYWORDS, "hash"}
379
380        def array_sql(self, expression: exp.Array) -> str:
381            first_arg = seq_get(expression.expressions, 0)
382            if isinstance(first_arg, exp.Subqueryable):
383                return f"ARRAY{self.wrap(self.sql(first_arg))}"
384
385            return inline_array_sql(self, expression)
386
387        def transaction_sql(self, *_) -> str:
388            return "BEGIN TRANSACTION"
389
390        def commit_sql(self, *_) -> str:
391            return "COMMIT TRANSACTION"
392
393        def rollback_sql(self, *_) -> str:
394            return "ROLLBACK TRANSACTION"
395
396        def in_unnest_op(self, expression: exp.Unnest) -> str:
397            return self.sql(expression)
398
399        def except_op(self, expression: exp.Except) -> str:
400            if not expression.args.get("distinct", False):
401                self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery")
402            return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}"
403
404        def intersect_op(self, expression: exp.Intersect) -> str:
405            if not expression.args.get("distinct", False):
406                self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery")
407            return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}"
408
409        def with_properties(self, properties: exp.Properties) -> str:
410            return self.properties(properties, prefix=self.seg("OPTIONS"))

@classmethod

def normalize_identifier(cls, expression: ~E) -> ~E: View Source

134    @classmethod
135    def normalize_identifier(cls, expression: E) -> E:
136        # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least).
137        # The following check is essentially a heuristic to detect tables based on whether or
138        # not they're qualified.
139        if (
140            isinstance(expression, exp.Identifier)
141            and not (isinstance(expression.parent, exp.Table) and expression.parent.db)
142            and not expression.meta.get("is_table")
143        ):
144            expression.set("this", expression.this.lower())
145
146        return expression

Normalizes an unquoted identifier to either lower or upper case, thus essentially making it case-insensitive. If a dialect treats all identifiers as case-insensitive, they will be normalized regardless of being quoted or not.

Inherited Members

sqlglot.dialects.dialect.Dialect: get_or_raise; format_time; case_sensitive; can_identify; quote_identifier; parse; parse_into; generate; transpile; tokenize; parser; generator

class BigQuery.Tokenizer(sqlglot.tokens.Tokenizer): View Source

148    class Tokenizer(tokens.Tokenizer):
149        QUOTES = ["'", '"', '"""', "'''"]
150        COMMENTS = ["--", "#", ("/*", "*/")]
151        IDENTIFIERS = ["`"]
152        STRING_ESCAPES = ["\\"]
153
154        HEX_STRINGS = [("0x", ""), ("0X", "")]
155
156        BYTE_STRINGS = [
157            (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B")
158        ]
159
160        RAW_STRINGS = [
161            (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R")
162        ]
163
164        KEYWORDS = {
165            **tokens.Tokenizer.KEYWORDS,
166            "ANY TYPE": TokenType.VARIANT,
167            "BEGIN": TokenType.COMMAND,
168            "BEGIN TRANSACTION": TokenType.BEGIN,
169            "CURRENT_DATETIME": TokenType.CURRENT_DATETIME,
170            "BYTES": TokenType.BINARY,
171            "DECLARE": TokenType.COMMAND,
172            "FLOAT64": TokenType.DOUBLE,
173            "INT64": TokenType.BIGINT,
174            "RECORD": TokenType.STRUCT,
175            "TIMESTAMP": TokenType.TIMESTAMPTZ,
176            "NOT DETERMINISTIC": TokenType.VOLATILE,
177            "UNKNOWN": TokenType.NULL,
178        }
179        KEYWORDS.pop("DIV")

Inherited Members

sqlglot.tokens.Tokenizer: reset; tokenize; peek

class BigQuery.Parser(sqlglot.parser.Parser): View Source

181    class Parser(parser.Parser):
182        PREFIXED_PIVOT_COLUMNS = True
183
184        LOG_BASE_FIRST = False
185        LOG_DEFAULTS_TO_LN = True
186
187        FUNCTIONS = {
188            **parser.Parser.FUNCTIONS,
189            "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd),
190            "DATE_SUB": parse_date_delta_with_interval(exp.DateSub),
191            "DATE_TRUNC": lambda args: exp.DateTrunc(
192                unit=exp.Literal.string(str(seq_get(args, 1))),
193                this=seq_get(args, 0),
194            ),
195            "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd),
196            "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub),
197            "DIV": lambda args: exp.IntDiv(this=seq_get(args, 0), expression=seq_get(args, 1)),
198            "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")(
199                [seq_get(args, 1), seq_get(args, 0)]
200            ),
201            "PARSE_TIMESTAMP": lambda args: format_time_lambda(exp.StrToTime, "bigquery")(
202                [seq_get(args, 1), seq_get(args, 0)]
203            ),
204            "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list,
205            "REGEXP_EXTRACT": lambda args: exp.RegexpExtract(
206                this=seq_get(args, 0),
207                expression=seq_get(args, 1),
208                position=seq_get(args, 2),
209                occurrence=seq_get(args, 3),
210                group=exp.Literal.number(1)
211                if re.compile(str(seq_get(args, 1))).groups == 1
212                else None,
213            ),
214            "SPLIT": lambda args: exp.Split(
215                # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split
216                this=seq_get(args, 0),
217                expression=seq_get(args, 1) or exp.Literal.string(","),
218            ),
219            "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd),
220            "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub),
221            "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd),
222            "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub),
223        }
224
225        FUNCTION_PARSERS = {
226            **parser.Parser.FUNCTION_PARSERS,
227            "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]),
228        }
229        FUNCTION_PARSERS.pop("TRIM")
230
231        NO_PAREN_FUNCTIONS = {
232            **parser.Parser.NO_PAREN_FUNCTIONS,
233            TokenType.CURRENT_DATETIME: exp.CurrentDatetime,
234        }
235
236        NESTED_TYPE_TOKENS = {
237            *parser.Parser.NESTED_TYPE_TOKENS,
238            TokenType.TABLE,
239        }
240
241        ID_VAR_TOKENS = {
242            *parser.Parser.ID_VAR_TOKENS,
243            TokenType.VALUES,
244        }
245
246        PROPERTY_PARSERS = {
247            **parser.Parser.PROPERTY_PARSERS,
248            "NOT DETERMINISTIC": lambda self: self.expression(
249                exp.StabilityProperty, this=exp.Literal.string("VOLATILE")
250            ),
251            "OPTIONS": lambda self: self._parse_with_property(),
252        }
253
254        CONSTRAINT_PARSERS = {
255            **parser.Parser.CONSTRAINT_PARSERS,
256            "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()),
257        }
258
259        def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
260            this = super()._parse_table_part(schema=schema)
261
262            # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names
263            if isinstance(this, exp.Identifier):
264                table_name = this.name
265                while self._match(TokenType.DASH, advance=False) and self._next:
266                    self._advance(2)
267                    table_name += f"-{self._prev.text}"
268
269                this = exp.Identifier(this=table_name, quoted=this.args.get("quoted"))
270
271            return this
272
273        def _parse_table_parts(self, schema: bool = False) -> exp.Table:
274            table = super()._parse_table_parts(schema=schema)
275            if isinstance(table.this, exp.Identifier) and "." in table.name:
276                catalog, db, this, *rest = (
277                    t.cast(t.Optional[exp.Expression], exp.to_identifier(x))
278                    for x in split_num_words(table.name, ".", 3)
279                )
280
281                if rest and this:
282                    this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest]))
283
284                table = exp.Table(this=this, db=db, catalog=catalog)
285
286            return table

Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.

Arguments:

error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3

Inherited Members

sqlglot.parser.Parser: Parser; reset; parse; parse_into; check_errors; raise_error; expression; validate_expression

sqlglot.dialects.bigquery

Inherited Members

Inherited Members

Arguments:

Inherited Members

Arguments:

Arguments:

Returns:

Inherited Members