sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 format_time_lambda, 16 if_sql, 17 inline_array_sql, 18 max_or_greatest, 19 min_or_least, 20 no_ilike_sql, 21 parse_date_delta_with_interval, 22 regexp_replace_sql, 23 rename_func, 24 timestrtotime_sql, 25 ts_or_ds_add_cast, 26) 27from sqlglot.helper import seq_get, split_num_words 28from sqlglot.tokens import TokenType 29 30if t.TYPE_CHECKING: 31 from sqlglot._typing import E, Lit 32 33logger = logging.getLogger("sqlglot") 34 35 36def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 37 if not expression.find_ancestor(exp.From, exp.Join): 38 return self.values_sql(expression) 39 40 alias = expression.args.get("alias") 41 42 return self.unnest_sql( 43 exp.Unnest( 44 expressions=[ 45 exp.array( 46 *( 47 exp.Struct( 48 expressions=[ 49 exp.alias_(value, column_name) 50 for value, column_name in zip( 51 t.expressions, 52 ( 53 alias.columns 54 if alias and alias.columns 55 else (f"_c{i}" for i in range(len(t.expressions))) 56 ), 57 ) 58 ] 59 ) 60 for t in expression.find_all(exp.Tuple) 61 ), 62 copy=False, 63 ) 64 ] 65 ) 66 ) 67 68 69def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 70 this = expression.this 71 if isinstance(this, exp.Schema): 72 this = f"{this.this} <{self.expressions(this)}>" 73 else: 74 this = self.sql(this) 75 return f"RETURNS {this}" 76 77 78def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 79 kind = expression.args["kind"] 80 returns = expression.find(exp.ReturnsProperty) 81 82 if kind.upper() == "FUNCTION" and returns and returns.args.get("is_table"): 83 expression.set("kind", "TABLE FUNCTION") 84 85 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 86 expression.set("expression", expression.expression.this) 87 88 return self.create_sql(expression) 89 90 return self.create_sql(expression) 91 92 93def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: 94 """Remove references to unnest table aliases since bigquery doesn't allow them. 95 96 These are added by the optimizer's qualify_column step. 97 """ 98 from sqlglot.optimizer.scope import find_all_in_scope 99 100 if isinstance(expression, exp.Select): 101 unnest_aliases = { 102 unnest.alias 103 for unnest in find_all_in_scope(expression, exp.Unnest) 104 if isinstance(unnest.parent, (exp.From, exp.Join)) 105 } 106 if unnest_aliases: 107 for column in expression.find_all(exp.Column): 108 if column.table in unnest_aliases: 109 column.set("table", None) 110 elif column.db in unnest_aliases: 111 column.set("db", None) 112 113 return expression 114 115 116# https://issuetracker.google.com/issues/162294746 117# workaround for bigquery bug when grouping by an expression and then ordering 118# WITH x AS (SELECT 1 y) 119# SELECT y + 1 z 120# FROM x 121# GROUP BY x + 1 122# ORDER by z 123def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 124 if isinstance(expression, exp.Select): 125 group = expression.args.get("group") 126 order = expression.args.get("order") 127 128 if group and order: 129 aliases = { 130 select.this: select.args["alias"] 131 for select in expression.selects 132 if isinstance(select, exp.Alias) 133 } 134 135 for e in group.expressions: 136 alias = aliases.get(e) 137 138 if alias: 139 e.replace(exp.column(alias)) 140 141 return expression 142 143 144def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 145 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 146 if isinstance(expression, exp.CTE) and expression.alias_column_names: 147 cte_query = expression.this 148 149 if cte_query.is_star: 150 logger.warning( 151 "Can't push down CTE column names for star queries. Run the query through" 152 " the optimizer or use 'qualify' to expand the star projections first." 153 ) 154 return expression 155 156 column_names = expression.alias_column_names 157 expression.args["alias"].set("columns", None) 158 159 for name, select in zip(column_names, cte_query.selects): 160 to_replace = select 161 162 if isinstance(select, exp.Alias): 163 select = select.this 164 165 # Inner aliases are shadowed by the CTE column names 166 to_replace.replace(exp.alias_(select, name)) 167 168 return expression 169 170 171def _parse_parse_timestamp(args: t.List) -> exp.StrToTime: 172 this = format_time_lambda(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 173 this.set("zone", seq_get(args, 2)) 174 return this 175 176 177def _parse_timestamp(args: t.List) -> exp.Timestamp: 178 timestamp = exp.Timestamp.from_arg_list(args) 179 timestamp.set("with_tz", True) 180 return timestamp 181 182 183def _parse_date(args: t.List) -> exp.Date | exp.DateFromParts: 184 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 185 return expr_type.from_arg_list(args) 186 187 188def _parse_to_hex(args: t.List) -> exp.Hex | exp.MD5: 189 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 190 arg = seq_get(args, 0) 191 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.Hex(this=arg) 192 193 194def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 195 return self.sql( 196 exp.Exists( 197 this=exp.select("1") 198 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 199 .where(exp.column("_col").eq(expression.right)) 200 ) 201 ) 202 203 204def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 205 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 206 207 208def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 209 expression.this.replace(exp.cast(expression.this, "TIMESTAMP", copy=True)) 210 expression.expression.replace(exp.cast(expression.expression, "TIMESTAMP", copy=True)) 211 unit = expression.args.get("unit") or "DAY" 212 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 213 214 215def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 216 scale = expression.args.get("scale") 217 timestamp = self.sql(expression, "this") 218 if scale in (None, exp.UnixToTime.SECONDS): 219 return f"TIMESTAMP_SECONDS({timestamp})" 220 if scale == exp.UnixToTime.MILLIS: 221 return f"TIMESTAMP_MILLIS({timestamp})" 222 if scale == exp.UnixToTime.MICROS: 223 return f"TIMESTAMP_MICROS({timestamp})" 224 225 return f"TIMESTAMP_SECONDS(CAST({timestamp} / POW(10, {scale}) AS INT64))" 226 227 228def _parse_time(args: t.List) -> exp.Func: 229 if len(args) == 1: 230 return exp.TsOrDsToTime(this=args[0]) 231 if len(args) == 3: 232 return exp.TimeFromParts.from_arg_list(args) 233 234 return exp.Anonymous(this="TIME", expressions=args) 235 236 237class BigQuery(Dialect): 238 WEEK_OFFSET = -1 239 UNNEST_COLUMN_ONLY = True 240 SUPPORTS_USER_DEFINED_TYPES = False 241 SUPPORTS_SEMI_ANTI_JOIN = False 242 LOG_BASE_FIRST = False 243 244 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 245 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 246 247 # bigquery udfs are case sensitive 248 NORMALIZE_FUNCTIONS = False 249 250 TIME_MAPPING = { 251 "%D": "%m/%d/%y", 252 } 253 254 ESCAPE_SEQUENCES = { 255 "\\a": "\a", 256 "\\b": "\b", 257 "\\f": "\f", 258 "\\n": "\n", 259 "\\r": "\r", 260 "\\t": "\t", 261 "\\v": "\v", 262 } 263 264 FORMAT_MAPPING = { 265 "DD": "%d", 266 "MM": "%m", 267 "MON": "%b", 268 "MONTH": "%B", 269 "YYYY": "%Y", 270 "YY": "%y", 271 "HH": "%I", 272 "HH12": "%I", 273 "HH24": "%H", 274 "MI": "%M", 275 "SS": "%S", 276 "SSSSS": "%f", 277 "TZH": "%z", 278 } 279 280 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 281 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 282 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 283 284 def normalize_identifier(self, expression: E) -> E: 285 if isinstance(expression, exp.Identifier): 286 parent = expression.parent 287 while isinstance(parent, exp.Dot): 288 parent = parent.parent 289 290 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 291 # The following check is essentially a heuristic to detect tables based on whether or 292 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 293 if ( 294 not isinstance(parent, exp.UserDefinedFunction) 295 and not (isinstance(parent, exp.Table) and parent.db) 296 and not expression.meta.get("is_table") 297 ): 298 expression.set("this", expression.this.lower()) 299 300 return expression 301 302 class Tokenizer(tokens.Tokenizer): 303 QUOTES = ["'", '"', '"""', "'''"] 304 COMMENTS = ["--", "#", ("/*", "*/")] 305 IDENTIFIERS = ["`"] 306 STRING_ESCAPES = ["\\"] 307 308 HEX_STRINGS = [("0x", ""), ("0X", "")] 309 310 BYTE_STRINGS = [ 311 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 312 ] 313 314 RAW_STRINGS = [ 315 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 316 ] 317 318 KEYWORDS = { 319 **tokens.Tokenizer.KEYWORDS, 320 "ANY TYPE": TokenType.VARIANT, 321 "BEGIN": TokenType.COMMAND, 322 "BEGIN TRANSACTION": TokenType.BEGIN, 323 "BYTES": TokenType.BINARY, 324 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 325 "DECLARE": TokenType.COMMAND, 326 "FLOAT64": TokenType.DOUBLE, 327 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 328 "MODEL": TokenType.MODEL, 329 "NOT DETERMINISTIC": TokenType.VOLATILE, 330 "RECORD": TokenType.STRUCT, 331 "TIMESTAMP": TokenType.TIMESTAMPTZ, 332 } 333 KEYWORDS.pop("DIV") 334 KEYWORDS.pop("VALUES") 335 336 class Parser(parser.Parser): 337 PREFIXED_PIVOT_COLUMNS = True 338 339 LOG_DEFAULTS_TO_LN = True 340 341 FUNCTIONS = { 342 **parser.Parser.FUNCTIONS, 343 "DATE": _parse_date, 344 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 345 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 346 "DATE_TRUNC": lambda args: exp.DateTrunc( 347 unit=exp.Literal.string(str(seq_get(args, 1))), 348 this=seq_get(args, 0), 349 ), 350 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 351 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 352 "DIV": binary_from_function(exp.IntDiv), 353 "FORMAT_DATE": lambda args: exp.TimeToStr( 354 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 355 ), 356 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 357 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 358 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 359 ), 360 "MD5": exp.MD5Digest.from_arg_list, 361 "TO_HEX": _parse_to_hex, 362 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 363 [seq_get(args, 1), seq_get(args, 0)] 364 ), 365 "PARSE_TIMESTAMP": _parse_parse_timestamp, 366 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 367 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 368 this=seq_get(args, 0), 369 expression=seq_get(args, 1), 370 position=seq_get(args, 2), 371 occurrence=seq_get(args, 3), 372 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 373 ), 374 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 375 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 376 "SPLIT": lambda args: exp.Split( 377 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 378 this=seq_get(args, 0), 379 expression=seq_get(args, 1) or exp.Literal.string(","), 380 ), 381 "TIME": _parse_time, 382 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 383 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 384 "TIMESTAMP": _parse_timestamp, 385 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 386 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 387 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 388 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 389 ), 390 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 391 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 392 ), 393 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 394 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 395 } 396 397 FUNCTION_PARSERS = { 398 **parser.Parser.FUNCTION_PARSERS, 399 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 400 } 401 FUNCTION_PARSERS.pop("TRIM") 402 403 NO_PAREN_FUNCTIONS = { 404 **parser.Parser.NO_PAREN_FUNCTIONS, 405 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 406 } 407 408 NESTED_TYPE_TOKENS = { 409 *parser.Parser.NESTED_TYPE_TOKENS, 410 TokenType.TABLE, 411 } 412 413 PROPERTY_PARSERS = { 414 **parser.Parser.PROPERTY_PARSERS, 415 "NOT DETERMINISTIC": lambda self: self.expression( 416 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 417 ), 418 "OPTIONS": lambda self: self._parse_with_property(), 419 } 420 421 CONSTRAINT_PARSERS = { 422 **parser.Parser.CONSTRAINT_PARSERS, 423 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 424 } 425 426 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 427 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 428 429 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 430 431 STATEMENT_PARSERS = { 432 **parser.Parser.STATEMENT_PARSERS, 433 TokenType.END: lambda self: self._parse_as_command(self._prev), 434 TokenType.FOR: lambda self: self._parse_for_in(), 435 } 436 437 BRACKET_OFFSETS = { 438 "OFFSET": (0, False), 439 "ORDINAL": (1, False), 440 "SAFE_OFFSET": (0, True), 441 "SAFE_ORDINAL": (1, True), 442 } 443 444 def _parse_for_in(self) -> exp.ForIn: 445 this = self._parse_range() 446 self._match_text_seq("DO") 447 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 448 449 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 450 this = super()._parse_table_part(schema=schema) or self._parse_number() 451 452 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 453 if isinstance(this, exp.Identifier): 454 table_name = this.name 455 while self._match(TokenType.DASH, advance=False) and self._next: 456 self._advance(2) 457 table_name += f"-{self._prev.text}" 458 459 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 460 elif isinstance(this, exp.Literal): 461 table_name = this.name 462 463 if self._is_connected() and self._parse_var(any_token=True): 464 table_name += self._prev.text 465 466 this = exp.Identifier(this=table_name, quoted=True) 467 468 return this 469 470 def _parse_table_parts( 471 self, schema: bool = False, is_db_reference: bool = False 472 ) -> exp.Table: 473 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 474 if isinstance(table.this, exp.Identifier) and "." in table.name: 475 catalog, db, this, *rest = ( 476 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 477 for x in split_num_words(table.name, ".", 3) 478 ) 479 480 if rest and this: 481 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 482 483 table = exp.Table(this=this, db=db, catalog=catalog) 484 485 return table 486 487 @t.overload 488 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 489 ... 490 491 @t.overload 492 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 493 ... 494 495 def _parse_json_object(self, agg=False): 496 json_object = super()._parse_json_object() 497 array_kv_pair = seq_get(json_object.expressions, 0) 498 499 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 500 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 501 if ( 502 array_kv_pair 503 and isinstance(array_kv_pair.this, exp.Array) 504 and isinstance(array_kv_pair.expression, exp.Array) 505 ): 506 keys = array_kv_pair.this.expressions 507 values = array_kv_pair.expression.expressions 508 509 json_object.set( 510 "expressions", 511 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 512 ) 513 514 return json_object 515 516 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 517 bracket = super()._parse_bracket(this) 518 519 if this is bracket: 520 return bracket 521 522 if isinstance(bracket, exp.Bracket): 523 for expression in bracket.expressions: 524 name = expression.name.upper() 525 526 if name not in self.BRACKET_OFFSETS: 527 break 528 529 offset, safe = self.BRACKET_OFFSETS[name] 530 bracket.set("offset", offset) 531 bracket.set("safe", safe) 532 expression.replace(expression.expressions[0]) 533 534 return bracket 535 536 class Generator(generator.Generator): 537 EXPLICIT_UNION = True 538 INTERVAL_ALLOWS_PLURAL_FORM = False 539 JOIN_HINTS = False 540 QUERY_HINTS = False 541 TABLE_HINTS = False 542 LIMIT_FETCH = "LIMIT" 543 RENAME_TABLE_WITH_DB = False 544 NVL2_SUPPORTED = False 545 UNNEST_WITH_ORDINALITY = False 546 COLLATE_IS_FUNC = True 547 LIMIT_ONLY_LITERALS = True 548 SUPPORTS_TABLE_ALIAS_COLUMNS = False 549 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 550 JSON_KEY_VALUE_PAIR_SEP = "," 551 NULL_ORDERING_SUPPORTED = False 552 IGNORE_NULLS_IN_FUNC = True 553 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 554 555 TRANSFORMS = { 556 **generator.Generator.TRANSFORMS, 557 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 558 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 559 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 560 exp.ArrayContains: _array_contains_sql, 561 exp.ArraySize: rename_func("ARRAY_LENGTH"), 562 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 563 exp.CollateProperty: lambda self, e: ( 564 f"DEFAULT COLLATE {self.sql(e, 'this')}" 565 if e.args.get("default") 566 else f"COLLATE {self.sql(e, 'this')}" 567 ), 568 exp.CountIf: rename_func("COUNTIF"), 569 exp.Create: _create_sql, 570 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 571 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 572 exp.DateDiff: lambda self, 573 e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 574 exp.DateFromParts: rename_func("DATE"), 575 exp.DateStrToDate: datestrtodate_sql, 576 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 577 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 578 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 579 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 580 exp.FromTimeZone: lambda self, e: self.func( 581 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 582 ), 583 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 584 exp.GroupConcat: rename_func("STRING_AGG"), 585 exp.Hex: rename_func("TO_HEX"), 586 exp.If: if_sql(false_value="NULL"), 587 exp.ILike: no_ilike_sql, 588 exp.IntDiv: rename_func("DIV"), 589 exp.JSONFormat: rename_func("TO_JSON_STRING"), 590 exp.Max: max_or_greatest, 591 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 592 exp.MD5Digest: rename_func("MD5"), 593 exp.Min: min_or_least, 594 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 595 exp.RegexpExtract: lambda self, e: self.func( 596 "REGEXP_EXTRACT", 597 e.this, 598 e.expression, 599 e.args.get("position"), 600 e.args.get("occurrence"), 601 ), 602 exp.RegexpReplace: regexp_replace_sql, 603 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 604 exp.ReturnsProperty: _returnsproperty_sql, 605 exp.Select: transforms.preprocess( 606 [ 607 transforms.explode_to_unnest(), 608 _unqualify_unnest, 609 transforms.eliminate_distinct_on, 610 _alias_ordered_group, 611 transforms.eliminate_semi_and_anti_joins, 612 ] 613 ), 614 exp.SHA2: lambda self, e: self.func( 615 "SHA256" if e.text("length") == "256" else "SHA512", e.this 616 ), 617 exp.StabilityProperty: lambda self, e: ( 618 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 619 ), 620 exp.StrToDate: lambda self, 621 e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 622 exp.StrToTime: lambda self, e: self.func( 623 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 624 ), 625 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 626 exp.TimeFromParts: rename_func("TIME"), 627 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 628 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 629 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 630 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 631 exp.TimeStrToTime: timestrtotime_sql, 632 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 633 exp.TsOrDsAdd: _ts_or_ds_add_sql, 634 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 635 exp.TsOrDsToTime: rename_func("TIME"), 636 exp.Unhex: rename_func("FROM_HEX"), 637 exp.UnixDate: rename_func("UNIX_DATE"), 638 exp.UnixToTime: _unix_to_time_sql, 639 exp.Values: _derived_table_values_to_unnest, 640 exp.VariancePop: rename_func("VAR_POP"), 641 } 642 643 SUPPORTED_JSON_PATH_PARTS = { 644 exp.JSONPathKey, 645 exp.JSONPathRoot, 646 exp.JSONPathSubscript, 647 } 648 649 TYPE_MAPPING = { 650 **generator.Generator.TYPE_MAPPING, 651 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 652 exp.DataType.Type.BIGINT: "INT64", 653 exp.DataType.Type.BINARY: "BYTES", 654 exp.DataType.Type.BOOLEAN: "BOOL", 655 exp.DataType.Type.CHAR: "STRING", 656 exp.DataType.Type.DECIMAL: "NUMERIC", 657 exp.DataType.Type.DOUBLE: "FLOAT64", 658 exp.DataType.Type.FLOAT: "FLOAT64", 659 exp.DataType.Type.INT: "INT64", 660 exp.DataType.Type.NCHAR: "STRING", 661 exp.DataType.Type.NVARCHAR: "STRING", 662 exp.DataType.Type.SMALLINT: "INT64", 663 exp.DataType.Type.TEXT: "STRING", 664 exp.DataType.Type.TIMESTAMP: "DATETIME", 665 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 666 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 667 exp.DataType.Type.TINYINT: "INT64", 668 exp.DataType.Type.VARBINARY: "BYTES", 669 exp.DataType.Type.VARCHAR: "STRING", 670 exp.DataType.Type.VARIANT: "ANY TYPE", 671 } 672 673 PROPERTIES_LOCATION = { 674 **generator.Generator.PROPERTIES_LOCATION, 675 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 676 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 677 } 678 679 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 680 RESERVED_KEYWORDS = { 681 *generator.Generator.RESERVED_KEYWORDS, 682 "all", 683 "and", 684 "any", 685 "array", 686 "as", 687 "asc", 688 "assert_rows_modified", 689 "at", 690 "between", 691 "by", 692 "case", 693 "cast", 694 "collate", 695 "contains", 696 "create", 697 "cross", 698 "cube", 699 "current", 700 "default", 701 "define", 702 "desc", 703 "distinct", 704 "else", 705 "end", 706 "enum", 707 "escape", 708 "except", 709 "exclude", 710 "exists", 711 "extract", 712 "false", 713 "fetch", 714 "following", 715 "for", 716 "from", 717 "full", 718 "group", 719 "grouping", 720 "groups", 721 "hash", 722 "having", 723 "if", 724 "ignore", 725 "in", 726 "inner", 727 "intersect", 728 "interval", 729 "into", 730 "is", 731 "join", 732 "lateral", 733 "left", 734 "like", 735 "limit", 736 "lookup", 737 "merge", 738 "natural", 739 "new", 740 "no", 741 "not", 742 "null", 743 "nulls", 744 "of", 745 "on", 746 "or", 747 "order", 748 "outer", 749 "over", 750 "partition", 751 "preceding", 752 "proto", 753 "qualify", 754 "range", 755 "recursive", 756 "respect", 757 "right", 758 "rollup", 759 "rows", 760 "select", 761 "set", 762 "some", 763 "struct", 764 "tablesample", 765 "then", 766 "to", 767 "treat", 768 "true", 769 "unbounded", 770 "union", 771 "unnest", 772 "using", 773 "when", 774 "where", 775 "window", 776 "with", 777 "within", 778 } 779 780 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 781 if isinstance(expression.this, exp.TsOrDsToDate): 782 this: exp.Expression = expression.this 783 else: 784 this = expression 785 786 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 787 788 def struct_sql(self, expression: exp.Struct) -> str: 789 args = [] 790 for expr in expression.expressions: 791 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 792 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 793 else: 794 arg = self.sql(expr) 795 796 args.append(arg) 797 798 return self.func("STRUCT", *args) 799 800 def eq_sql(self, expression: exp.EQ) -> str: 801 # Operands of = cannot be NULL in BigQuery 802 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 803 if not isinstance(expression.parent, exp.Update): 804 return "NULL" 805 806 return self.binary(expression, "=") 807 808 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 809 parent = expression.parent 810 811 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 812 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 813 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 814 return self.func( 815 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 816 ) 817 818 return super().attimezone_sql(expression) 819 820 def trycast_sql(self, expression: exp.TryCast) -> str: 821 return self.cast_sql(expression, safe_prefix="SAFE_") 822 823 def cte_sql(self, expression: exp.CTE) -> str: 824 if expression.alias_column_names: 825 self.unsupported("Column names in CTE definition are not supported.") 826 return super().cte_sql(expression) 827 828 def array_sql(self, expression: exp.Array) -> str: 829 first_arg = seq_get(expression.expressions, 0) 830 if isinstance(first_arg, exp.Subqueryable): 831 return f"ARRAY{self.wrap(self.sql(first_arg))}" 832 833 return inline_array_sql(self, expression) 834 835 def bracket_sql(self, expression: exp.Bracket) -> str: 836 this = self.sql(expression, "this") 837 expressions = expression.expressions 838 839 if len(expressions) == 1: 840 arg = expressions[0] 841 if arg.type is None: 842 from sqlglot.optimizer.annotate_types import annotate_types 843 844 arg = annotate_types(arg) 845 846 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 847 # BQ doesn't support bracket syntax with string values 848 return f"{this}.{arg.name}" 849 850 expressions_sql = ", ".join(self.sql(e) for e in expressions) 851 offset = expression.args.get("offset") 852 853 if offset == 0: 854 expressions_sql = f"OFFSET({expressions_sql})" 855 elif offset == 1: 856 expressions_sql = f"ORDINAL({expressions_sql})" 857 elif offset is not None: 858 self.unsupported(f"Unsupported array offset: {offset}") 859 860 if expression.args.get("safe"): 861 expressions_sql = f"SAFE_{expressions_sql}" 862 863 return f"{this}[{expressions_sql}]" 864 865 def transaction_sql(self, *_) -> str: 866 return "BEGIN TRANSACTION" 867 868 def commit_sql(self, *_) -> str: 869 return "COMMIT TRANSACTION" 870 871 def rollback_sql(self, *_) -> str: 872 return "ROLLBACK TRANSACTION" 873 874 def in_unnest_op(self, expression: exp.Unnest) -> str: 875 return self.sql(expression) 876 877 def except_op(self, expression: exp.Except) -> str: 878 if not expression.args.get("distinct", False): 879 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 880 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 881 882 def intersect_op(self, expression: exp.Intersect) -> str: 883 if not expression.args.get("distinct", False): 884 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 885 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 886 887 def with_properties(self, properties: exp.Properties) -> str: 888 return self.properties(properties, prefix=self.seg("OPTIONS")) 889 890 def version_sql(self, expression: exp.Version) -> str: 891 if expression.name == "TIMESTAMP": 892 expression.set("this", "SYSTEM_TIME") 893 return super().version_sql(expression)
238class BigQuery(Dialect): 239 WEEK_OFFSET = -1 240 UNNEST_COLUMN_ONLY = True 241 SUPPORTS_USER_DEFINED_TYPES = False 242 SUPPORTS_SEMI_ANTI_JOIN = False 243 LOG_BASE_FIRST = False 244 245 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 246 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 247 248 # bigquery udfs are case sensitive 249 NORMALIZE_FUNCTIONS = False 250 251 TIME_MAPPING = { 252 "%D": "%m/%d/%y", 253 } 254 255 ESCAPE_SEQUENCES = { 256 "\\a": "\a", 257 "\\b": "\b", 258 "\\f": "\f", 259 "\\n": "\n", 260 "\\r": "\r", 261 "\\t": "\t", 262 "\\v": "\v", 263 } 264 265 FORMAT_MAPPING = { 266 "DD": "%d", 267 "MM": "%m", 268 "MON": "%b", 269 "MONTH": "%B", 270 "YYYY": "%Y", 271 "YY": "%y", 272 "HH": "%I", 273 "HH12": "%I", 274 "HH24": "%H", 275 "MI": "%M", 276 "SS": "%S", 277 "SSSSS": "%f", 278 "TZH": "%z", 279 } 280 281 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 282 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 283 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 284 285 def normalize_identifier(self, expression: E) -> E: 286 if isinstance(expression, exp.Identifier): 287 parent = expression.parent 288 while isinstance(parent, exp.Dot): 289 parent = parent.parent 290 291 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 292 # The following check is essentially a heuristic to detect tables based on whether or 293 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 294 if ( 295 not isinstance(parent, exp.UserDefinedFunction) 296 and not (isinstance(parent, exp.Table) and parent.db) 297 and not expression.meta.get("is_table") 298 ): 299 expression.set("this", expression.this.lower()) 300 301 return expression 302 303 class Tokenizer(tokens.Tokenizer): 304 QUOTES = ["'", '"', '"""', "'''"] 305 COMMENTS = ["--", "#", ("/*", "*/")] 306 IDENTIFIERS = ["`"] 307 STRING_ESCAPES = ["\\"] 308 309 HEX_STRINGS = [("0x", ""), ("0X", "")] 310 311 BYTE_STRINGS = [ 312 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 313 ] 314 315 RAW_STRINGS = [ 316 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 317 ] 318 319 KEYWORDS = { 320 **tokens.Tokenizer.KEYWORDS, 321 "ANY TYPE": TokenType.VARIANT, 322 "BEGIN": TokenType.COMMAND, 323 "BEGIN TRANSACTION": TokenType.BEGIN, 324 "BYTES": TokenType.BINARY, 325 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 326 "DECLARE": TokenType.COMMAND, 327 "FLOAT64": TokenType.DOUBLE, 328 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 329 "MODEL": TokenType.MODEL, 330 "NOT DETERMINISTIC": TokenType.VOLATILE, 331 "RECORD": TokenType.STRUCT, 332 "TIMESTAMP": TokenType.TIMESTAMPTZ, 333 } 334 KEYWORDS.pop("DIV") 335 KEYWORDS.pop("VALUES") 336 337 class Parser(parser.Parser): 338 PREFIXED_PIVOT_COLUMNS = True 339 340 LOG_DEFAULTS_TO_LN = True 341 342 FUNCTIONS = { 343 **parser.Parser.FUNCTIONS, 344 "DATE": _parse_date, 345 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 346 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 347 "DATE_TRUNC": lambda args: exp.DateTrunc( 348 unit=exp.Literal.string(str(seq_get(args, 1))), 349 this=seq_get(args, 0), 350 ), 351 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 352 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 353 "DIV": binary_from_function(exp.IntDiv), 354 "FORMAT_DATE": lambda args: exp.TimeToStr( 355 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 356 ), 357 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 358 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 359 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 360 ), 361 "MD5": exp.MD5Digest.from_arg_list, 362 "TO_HEX": _parse_to_hex, 363 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 364 [seq_get(args, 1), seq_get(args, 0)] 365 ), 366 "PARSE_TIMESTAMP": _parse_parse_timestamp, 367 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 368 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 369 this=seq_get(args, 0), 370 expression=seq_get(args, 1), 371 position=seq_get(args, 2), 372 occurrence=seq_get(args, 3), 373 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 374 ), 375 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 376 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 377 "SPLIT": lambda args: exp.Split( 378 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 379 this=seq_get(args, 0), 380 expression=seq_get(args, 1) or exp.Literal.string(","), 381 ), 382 "TIME": _parse_time, 383 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 384 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 385 "TIMESTAMP": _parse_timestamp, 386 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 387 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 388 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 389 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 390 ), 391 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 392 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 393 ), 394 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 395 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 396 } 397 398 FUNCTION_PARSERS = { 399 **parser.Parser.FUNCTION_PARSERS, 400 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 401 } 402 FUNCTION_PARSERS.pop("TRIM") 403 404 NO_PAREN_FUNCTIONS = { 405 **parser.Parser.NO_PAREN_FUNCTIONS, 406 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 407 } 408 409 NESTED_TYPE_TOKENS = { 410 *parser.Parser.NESTED_TYPE_TOKENS, 411 TokenType.TABLE, 412 } 413 414 PROPERTY_PARSERS = { 415 **parser.Parser.PROPERTY_PARSERS, 416 "NOT DETERMINISTIC": lambda self: self.expression( 417 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 418 ), 419 "OPTIONS": lambda self: self._parse_with_property(), 420 } 421 422 CONSTRAINT_PARSERS = { 423 **parser.Parser.CONSTRAINT_PARSERS, 424 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 425 } 426 427 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 428 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 429 430 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 431 432 STATEMENT_PARSERS = { 433 **parser.Parser.STATEMENT_PARSERS, 434 TokenType.END: lambda self: self._parse_as_command(self._prev), 435 TokenType.FOR: lambda self: self._parse_for_in(), 436 } 437 438 BRACKET_OFFSETS = { 439 "OFFSET": (0, False), 440 "ORDINAL": (1, False), 441 "SAFE_OFFSET": (0, True), 442 "SAFE_ORDINAL": (1, True), 443 } 444 445 def _parse_for_in(self) -> exp.ForIn: 446 this = self._parse_range() 447 self._match_text_seq("DO") 448 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 449 450 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 451 this = super()._parse_table_part(schema=schema) or self._parse_number() 452 453 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 454 if isinstance(this, exp.Identifier): 455 table_name = this.name 456 while self._match(TokenType.DASH, advance=False) and self._next: 457 self._advance(2) 458 table_name += f"-{self._prev.text}" 459 460 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 461 elif isinstance(this, exp.Literal): 462 table_name = this.name 463 464 if self._is_connected() and self._parse_var(any_token=True): 465 table_name += self._prev.text 466 467 this = exp.Identifier(this=table_name, quoted=True) 468 469 return this 470 471 def _parse_table_parts( 472 self, schema: bool = False, is_db_reference: bool = False 473 ) -> exp.Table: 474 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 475 if isinstance(table.this, exp.Identifier) and "." in table.name: 476 catalog, db, this, *rest = ( 477 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 478 for x in split_num_words(table.name, ".", 3) 479 ) 480 481 if rest and this: 482 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 483 484 table = exp.Table(this=this, db=db, catalog=catalog) 485 486 return table 487 488 @t.overload 489 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 490 ... 491 492 @t.overload 493 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 494 ... 495 496 def _parse_json_object(self, agg=False): 497 json_object = super()._parse_json_object() 498 array_kv_pair = seq_get(json_object.expressions, 0) 499 500 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 501 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 502 if ( 503 array_kv_pair 504 and isinstance(array_kv_pair.this, exp.Array) 505 and isinstance(array_kv_pair.expression, exp.Array) 506 ): 507 keys = array_kv_pair.this.expressions 508 values = array_kv_pair.expression.expressions 509 510 json_object.set( 511 "expressions", 512 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 513 ) 514 515 return json_object 516 517 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 518 bracket = super()._parse_bracket(this) 519 520 if this is bracket: 521 return bracket 522 523 if isinstance(bracket, exp.Bracket): 524 for expression in bracket.expressions: 525 name = expression.name.upper() 526 527 if name not in self.BRACKET_OFFSETS: 528 break 529 530 offset, safe = self.BRACKET_OFFSETS[name] 531 bracket.set("offset", offset) 532 bracket.set("safe", safe) 533 expression.replace(expression.expressions[0]) 534 535 return bracket 536 537 class Generator(generator.Generator): 538 EXPLICIT_UNION = True 539 INTERVAL_ALLOWS_PLURAL_FORM = False 540 JOIN_HINTS = False 541 QUERY_HINTS = False 542 TABLE_HINTS = False 543 LIMIT_FETCH = "LIMIT" 544 RENAME_TABLE_WITH_DB = False 545 NVL2_SUPPORTED = False 546 UNNEST_WITH_ORDINALITY = False 547 COLLATE_IS_FUNC = True 548 LIMIT_ONLY_LITERALS = True 549 SUPPORTS_TABLE_ALIAS_COLUMNS = False 550 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 551 JSON_KEY_VALUE_PAIR_SEP = "," 552 NULL_ORDERING_SUPPORTED = False 553 IGNORE_NULLS_IN_FUNC = True 554 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 555 556 TRANSFORMS = { 557 **generator.Generator.TRANSFORMS, 558 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 559 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 560 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 561 exp.ArrayContains: _array_contains_sql, 562 exp.ArraySize: rename_func("ARRAY_LENGTH"), 563 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 564 exp.CollateProperty: lambda self, e: ( 565 f"DEFAULT COLLATE {self.sql(e, 'this')}" 566 if e.args.get("default") 567 else f"COLLATE {self.sql(e, 'this')}" 568 ), 569 exp.CountIf: rename_func("COUNTIF"), 570 exp.Create: _create_sql, 571 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 572 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 573 exp.DateDiff: lambda self, 574 e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 575 exp.DateFromParts: rename_func("DATE"), 576 exp.DateStrToDate: datestrtodate_sql, 577 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 578 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 579 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 580 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 581 exp.FromTimeZone: lambda self, e: self.func( 582 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 583 ), 584 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 585 exp.GroupConcat: rename_func("STRING_AGG"), 586 exp.Hex: rename_func("TO_HEX"), 587 exp.If: if_sql(false_value="NULL"), 588 exp.ILike: no_ilike_sql, 589 exp.IntDiv: rename_func("DIV"), 590 exp.JSONFormat: rename_func("TO_JSON_STRING"), 591 exp.Max: max_or_greatest, 592 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 593 exp.MD5Digest: rename_func("MD5"), 594 exp.Min: min_or_least, 595 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 596 exp.RegexpExtract: lambda self, e: self.func( 597 "REGEXP_EXTRACT", 598 e.this, 599 e.expression, 600 e.args.get("position"), 601 e.args.get("occurrence"), 602 ), 603 exp.RegexpReplace: regexp_replace_sql, 604 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 605 exp.ReturnsProperty: _returnsproperty_sql, 606 exp.Select: transforms.preprocess( 607 [ 608 transforms.explode_to_unnest(), 609 _unqualify_unnest, 610 transforms.eliminate_distinct_on, 611 _alias_ordered_group, 612 transforms.eliminate_semi_and_anti_joins, 613 ] 614 ), 615 exp.SHA2: lambda self, e: self.func( 616 "SHA256" if e.text("length") == "256" else "SHA512", e.this 617 ), 618 exp.StabilityProperty: lambda self, e: ( 619 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 620 ), 621 exp.StrToDate: lambda self, 622 e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 623 exp.StrToTime: lambda self, e: self.func( 624 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 625 ), 626 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 627 exp.TimeFromParts: rename_func("TIME"), 628 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 629 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 630 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 631 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 632 exp.TimeStrToTime: timestrtotime_sql, 633 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 634 exp.TsOrDsAdd: _ts_or_ds_add_sql, 635 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 636 exp.TsOrDsToTime: rename_func("TIME"), 637 exp.Unhex: rename_func("FROM_HEX"), 638 exp.UnixDate: rename_func("UNIX_DATE"), 639 exp.UnixToTime: _unix_to_time_sql, 640 exp.Values: _derived_table_values_to_unnest, 641 exp.VariancePop: rename_func("VAR_POP"), 642 } 643 644 SUPPORTED_JSON_PATH_PARTS = { 645 exp.JSONPathKey, 646 exp.JSONPathRoot, 647 exp.JSONPathSubscript, 648 } 649 650 TYPE_MAPPING = { 651 **generator.Generator.TYPE_MAPPING, 652 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 653 exp.DataType.Type.BIGINT: "INT64", 654 exp.DataType.Type.BINARY: "BYTES", 655 exp.DataType.Type.BOOLEAN: "BOOL", 656 exp.DataType.Type.CHAR: "STRING", 657 exp.DataType.Type.DECIMAL: "NUMERIC", 658 exp.DataType.Type.DOUBLE: "FLOAT64", 659 exp.DataType.Type.FLOAT: "FLOAT64", 660 exp.DataType.Type.INT: "INT64", 661 exp.DataType.Type.NCHAR: "STRING", 662 exp.DataType.Type.NVARCHAR: "STRING", 663 exp.DataType.Type.SMALLINT: "INT64", 664 exp.DataType.Type.TEXT: "STRING", 665 exp.DataType.Type.TIMESTAMP: "DATETIME", 666 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 667 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 668 exp.DataType.Type.TINYINT: "INT64", 669 exp.DataType.Type.VARBINARY: "BYTES", 670 exp.DataType.Type.VARCHAR: "STRING", 671 exp.DataType.Type.VARIANT: "ANY TYPE", 672 } 673 674 PROPERTIES_LOCATION = { 675 **generator.Generator.PROPERTIES_LOCATION, 676 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 677 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 678 } 679 680 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 681 RESERVED_KEYWORDS = { 682 *generator.Generator.RESERVED_KEYWORDS, 683 "all", 684 "and", 685 "any", 686 "array", 687 "as", 688 "asc", 689 "assert_rows_modified", 690 "at", 691 "between", 692 "by", 693 "case", 694 "cast", 695 "collate", 696 "contains", 697 "create", 698 "cross", 699 "cube", 700 "current", 701 "default", 702 "define", 703 "desc", 704 "distinct", 705 "else", 706 "end", 707 "enum", 708 "escape", 709 "except", 710 "exclude", 711 "exists", 712 "extract", 713 "false", 714 "fetch", 715 "following", 716 "for", 717 "from", 718 "full", 719 "group", 720 "grouping", 721 "groups", 722 "hash", 723 "having", 724 "if", 725 "ignore", 726 "in", 727 "inner", 728 "intersect", 729 "interval", 730 "into", 731 "is", 732 "join", 733 "lateral", 734 "left", 735 "like", 736 "limit", 737 "lookup", 738 "merge", 739 "natural", 740 "new", 741 "no", 742 "not", 743 "null", 744 "nulls", 745 "of", 746 "on", 747 "or", 748 "order", 749 "outer", 750 "over", 751 "partition", 752 "preceding", 753 "proto", 754 "qualify", 755 "range", 756 "recursive", 757 "respect", 758 "right", 759 "rollup", 760 "rows", 761 "select", 762 "set", 763 "some", 764 "struct", 765 "tablesample", 766 "then", 767 "to", 768 "treat", 769 "true", 770 "unbounded", 771 "union", 772 "unnest", 773 "using", 774 "when", 775 "where", 776 "window", 777 "with", 778 "within", 779 } 780 781 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 782 if isinstance(expression.this, exp.TsOrDsToDate): 783 this: exp.Expression = expression.this 784 else: 785 this = expression 786 787 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 788 789 def struct_sql(self, expression: exp.Struct) -> str: 790 args = [] 791 for expr in expression.expressions: 792 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 793 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 794 else: 795 arg = self.sql(expr) 796 797 args.append(arg) 798 799 return self.func("STRUCT", *args) 800 801 def eq_sql(self, expression: exp.EQ) -> str: 802 # Operands of = cannot be NULL in BigQuery 803 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 804 if not isinstance(expression.parent, exp.Update): 805 return "NULL" 806 807 return self.binary(expression, "=") 808 809 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 810 parent = expression.parent 811 812 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 813 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 814 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 815 return self.func( 816 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 817 ) 818 819 return super().attimezone_sql(expression) 820 821 def trycast_sql(self, expression: exp.TryCast) -> str: 822 return self.cast_sql(expression, safe_prefix="SAFE_") 823 824 def cte_sql(self, expression: exp.CTE) -> str: 825 if expression.alias_column_names: 826 self.unsupported("Column names in CTE definition are not supported.") 827 return super().cte_sql(expression) 828 829 def array_sql(self, expression: exp.Array) -> str: 830 first_arg = seq_get(expression.expressions, 0) 831 if isinstance(first_arg, exp.Subqueryable): 832 return f"ARRAY{self.wrap(self.sql(first_arg))}" 833 834 return inline_array_sql(self, expression) 835 836 def bracket_sql(self, expression: exp.Bracket) -> str: 837 this = self.sql(expression, "this") 838 expressions = expression.expressions 839 840 if len(expressions) == 1: 841 arg = expressions[0] 842 if arg.type is None: 843 from sqlglot.optimizer.annotate_types import annotate_types 844 845 arg = annotate_types(arg) 846 847 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 848 # BQ doesn't support bracket syntax with string values 849 return f"{this}.{arg.name}" 850 851 expressions_sql = ", ".join(self.sql(e) for e in expressions) 852 offset = expression.args.get("offset") 853 854 if offset == 0: 855 expressions_sql = f"OFFSET({expressions_sql})" 856 elif offset == 1: 857 expressions_sql = f"ORDINAL({expressions_sql})" 858 elif offset is not None: 859 self.unsupported(f"Unsupported array offset: {offset}") 860 861 if expression.args.get("safe"): 862 expressions_sql = f"SAFE_{expressions_sql}" 863 864 return f"{this}[{expressions_sql}]" 865 866 def transaction_sql(self, *_) -> str: 867 return "BEGIN TRANSACTION" 868 869 def commit_sql(self, *_) -> str: 870 return "COMMIT TRANSACTION" 871 872 def rollback_sql(self, *_) -> str: 873 return "ROLLBACK TRANSACTION" 874 875 def in_unnest_op(self, expression: exp.Unnest) -> str: 876 return self.sql(expression) 877 878 def except_op(self, expression: exp.Except) -> str: 879 if not expression.args.get("distinct", False): 880 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 881 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 882 883 def intersect_op(self, expression: exp.Intersect) -> str: 884 if not expression.args.get("distinct", False): 885 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 886 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 887 888 def with_properties(self, properties: exp.Properties) -> str: 889 return self.properties(properties, prefix=self.seg("OPTIONS")) 890 891 def version_sql(self, expression: exp.Version) -> str: 892 if expression.name == "TIMESTAMP": 893 expression.set("this", "SYSTEM_TIME") 894 return super().version_sql(expression)
Determines the day of week of DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Determines whether or not UNNEST
table aliases are treated as column aliases.
Determines whether or not user-defined data types are supported.
Specifies the strategy according to which identifiers should be normalized.
Associates this dialect's time formats with their equivalent Python strftime
format.
Mapping of an unescaped escape sequence to the corresponding character.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
285 def normalize_identifier(self, expression: E) -> E: 286 if isinstance(expression, exp.Identifier): 287 parent = expression.parent 288 while isinstance(parent, exp.Dot): 289 parent = parent.parent 290 291 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 292 # The following check is essentially a heuristic to detect tables based on whether or 293 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 294 if ( 295 not isinstance(parent, exp.UserDefinedFunction) 296 and not (isinstance(parent, exp.Table) and parent.db) 297 and not expression.meta.get("is_table") 298 ): 299 expression.set("this", expression.this.lower()) 300 301 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
303 class Tokenizer(tokens.Tokenizer): 304 QUOTES = ["'", '"', '"""', "'''"] 305 COMMENTS = ["--", "#", ("/*", "*/")] 306 IDENTIFIERS = ["`"] 307 STRING_ESCAPES = ["\\"] 308 309 HEX_STRINGS = [("0x", ""), ("0X", "")] 310 311 BYTE_STRINGS = [ 312 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 313 ] 314 315 RAW_STRINGS = [ 316 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 317 ] 318 319 KEYWORDS = { 320 **tokens.Tokenizer.KEYWORDS, 321 "ANY TYPE": TokenType.VARIANT, 322 "BEGIN": TokenType.COMMAND, 323 "BEGIN TRANSACTION": TokenType.BEGIN, 324 "BYTES": TokenType.BINARY, 325 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 326 "DECLARE": TokenType.COMMAND, 327 "FLOAT64": TokenType.DOUBLE, 328 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 329 "MODEL": TokenType.MODEL, 330 "NOT DETERMINISTIC": TokenType.VOLATILE, 331 "RECORD": TokenType.STRUCT, 332 "TIMESTAMP": TokenType.TIMESTAMPTZ, 333 } 334 KEYWORDS.pop("DIV") 335 KEYWORDS.pop("VALUES")
Inherited Members
337 class Parser(parser.Parser): 338 PREFIXED_PIVOT_COLUMNS = True 339 340 LOG_DEFAULTS_TO_LN = True 341 342 FUNCTIONS = { 343 **parser.Parser.FUNCTIONS, 344 "DATE": _parse_date, 345 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 346 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 347 "DATE_TRUNC": lambda args: exp.DateTrunc( 348 unit=exp.Literal.string(str(seq_get(args, 1))), 349 this=seq_get(args, 0), 350 ), 351 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 352 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 353 "DIV": binary_from_function(exp.IntDiv), 354 "FORMAT_DATE": lambda args: exp.TimeToStr( 355 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 356 ), 357 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 358 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 359 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 360 ), 361 "MD5": exp.MD5Digest.from_arg_list, 362 "TO_HEX": _parse_to_hex, 363 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 364 [seq_get(args, 1), seq_get(args, 0)] 365 ), 366 "PARSE_TIMESTAMP": _parse_parse_timestamp, 367 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 368 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 369 this=seq_get(args, 0), 370 expression=seq_get(args, 1), 371 position=seq_get(args, 2), 372 occurrence=seq_get(args, 3), 373 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 374 ), 375 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 376 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 377 "SPLIT": lambda args: exp.Split( 378 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 379 this=seq_get(args, 0), 380 expression=seq_get(args, 1) or exp.Literal.string(","), 381 ), 382 "TIME": _parse_time, 383 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 384 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 385 "TIMESTAMP": _parse_timestamp, 386 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 387 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 388 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 389 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 390 ), 391 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 392 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 393 ), 394 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 395 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 396 } 397 398 FUNCTION_PARSERS = { 399 **parser.Parser.FUNCTION_PARSERS, 400 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 401 } 402 FUNCTION_PARSERS.pop("TRIM") 403 404 NO_PAREN_FUNCTIONS = { 405 **parser.Parser.NO_PAREN_FUNCTIONS, 406 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 407 } 408 409 NESTED_TYPE_TOKENS = { 410 *parser.Parser.NESTED_TYPE_TOKENS, 411 TokenType.TABLE, 412 } 413 414 PROPERTY_PARSERS = { 415 **parser.Parser.PROPERTY_PARSERS, 416 "NOT DETERMINISTIC": lambda self: self.expression( 417 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 418 ), 419 "OPTIONS": lambda self: self._parse_with_property(), 420 } 421 422 CONSTRAINT_PARSERS = { 423 **parser.Parser.CONSTRAINT_PARSERS, 424 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 425 } 426 427 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 428 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 429 430 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 431 432 STATEMENT_PARSERS = { 433 **parser.Parser.STATEMENT_PARSERS, 434 TokenType.END: lambda self: self._parse_as_command(self._prev), 435 TokenType.FOR: lambda self: self._parse_for_in(), 436 } 437 438 BRACKET_OFFSETS = { 439 "OFFSET": (0, False), 440 "ORDINAL": (1, False), 441 "SAFE_OFFSET": (0, True), 442 "SAFE_ORDINAL": (1, True), 443 } 444 445 def _parse_for_in(self) -> exp.ForIn: 446 this = self._parse_range() 447 self._match_text_seq("DO") 448 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 449 450 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 451 this = super()._parse_table_part(schema=schema) or self._parse_number() 452 453 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 454 if isinstance(this, exp.Identifier): 455 table_name = this.name 456 while self._match(TokenType.DASH, advance=False) and self._next: 457 self._advance(2) 458 table_name += f"-{self._prev.text}" 459 460 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 461 elif isinstance(this, exp.Literal): 462 table_name = this.name 463 464 if self._is_connected() and self._parse_var(any_token=True): 465 table_name += self._prev.text 466 467 this = exp.Identifier(this=table_name, quoted=True) 468 469 return this 470 471 def _parse_table_parts( 472 self, schema: bool = False, is_db_reference: bool = False 473 ) -> exp.Table: 474 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 475 if isinstance(table.this, exp.Identifier) and "." in table.name: 476 catalog, db, this, *rest = ( 477 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 478 for x in split_num_words(table.name, ".", 3) 479 ) 480 481 if rest and this: 482 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 483 484 table = exp.Table(this=this, db=db, catalog=catalog) 485 486 return table 487 488 @t.overload 489 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 490 ... 491 492 @t.overload 493 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 494 ... 495 496 def _parse_json_object(self, agg=False): 497 json_object = super()._parse_json_object() 498 array_kv_pair = seq_get(json_object.expressions, 0) 499 500 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 501 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 502 if ( 503 array_kv_pair 504 and isinstance(array_kv_pair.this, exp.Array) 505 and isinstance(array_kv_pair.expression, exp.Array) 506 ): 507 keys = array_kv_pair.this.expressions 508 values = array_kv_pair.expression.expressions 509 510 json_object.set( 511 "expressions", 512 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 513 ) 514 515 return json_object 516 517 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 518 bracket = super()._parse_bracket(this) 519 520 if this is bracket: 521 return bracket 522 523 if isinstance(bracket, exp.Bracket): 524 for expression in bracket.expressions: 525 name = expression.name.upper() 526 527 if name not in self.BRACKET_OFFSETS: 528 break 529 530 offset, safe = self.BRACKET_OFFSETS[name] 531 bracket.set("offset", offset) 532 bracket.set("safe", safe) 533 expression.replace(expression.expressions[0]) 534 535 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- VALUES_FOLLOWED_BY_PAREN
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
537 class Generator(generator.Generator): 538 EXPLICIT_UNION = True 539 INTERVAL_ALLOWS_PLURAL_FORM = False 540 JOIN_HINTS = False 541 QUERY_HINTS = False 542 TABLE_HINTS = False 543 LIMIT_FETCH = "LIMIT" 544 RENAME_TABLE_WITH_DB = False 545 NVL2_SUPPORTED = False 546 UNNEST_WITH_ORDINALITY = False 547 COLLATE_IS_FUNC = True 548 LIMIT_ONLY_LITERALS = True 549 SUPPORTS_TABLE_ALIAS_COLUMNS = False 550 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 551 JSON_KEY_VALUE_PAIR_SEP = "," 552 NULL_ORDERING_SUPPORTED = False 553 IGNORE_NULLS_IN_FUNC = True 554 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 555 556 TRANSFORMS = { 557 **generator.Generator.TRANSFORMS, 558 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 559 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 560 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 561 exp.ArrayContains: _array_contains_sql, 562 exp.ArraySize: rename_func("ARRAY_LENGTH"), 563 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 564 exp.CollateProperty: lambda self, e: ( 565 f"DEFAULT COLLATE {self.sql(e, 'this')}" 566 if e.args.get("default") 567 else f"COLLATE {self.sql(e, 'this')}" 568 ), 569 exp.CountIf: rename_func("COUNTIF"), 570 exp.Create: _create_sql, 571 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 572 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 573 exp.DateDiff: lambda self, 574 e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 575 exp.DateFromParts: rename_func("DATE"), 576 exp.DateStrToDate: datestrtodate_sql, 577 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 578 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 579 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 580 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 581 exp.FromTimeZone: lambda self, e: self.func( 582 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 583 ), 584 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 585 exp.GroupConcat: rename_func("STRING_AGG"), 586 exp.Hex: rename_func("TO_HEX"), 587 exp.If: if_sql(false_value="NULL"), 588 exp.ILike: no_ilike_sql, 589 exp.IntDiv: rename_func("DIV"), 590 exp.JSONFormat: rename_func("TO_JSON_STRING"), 591 exp.Max: max_or_greatest, 592 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 593 exp.MD5Digest: rename_func("MD5"), 594 exp.Min: min_or_least, 595 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 596 exp.RegexpExtract: lambda self, e: self.func( 597 "REGEXP_EXTRACT", 598 e.this, 599 e.expression, 600 e.args.get("position"), 601 e.args.get("occurrence"), 602 ), 603 exp.RegexpReplace: regexp_replace_sql, 604 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 605 exp.ReturnsProperty: _returnsproperty_sql, 606 exp.Select: transforms.preprocess( 607 [ 608 transforms.explode_to_unnest(), 609 _unqualify_unnest, 610 transforms.eliminate_distinct_on, 611 _alias_ordered_group, 612 transforms.eliminate_semi_and_anti_joins, 613 ] 614 ), 615 exp.SHA2: lambda self, e: self.func( 616 "SHA256" if e.text("length") == "256" else "SHA512", e.this 617 ), 618 exp.StabilityProperty: lambda self, e: ( 619 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 620 ), 621 exp.StrToDate: lambda self, 622 e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 623 exp.StrToTime: lambda self, e: self.func( 624 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 625 ), 626 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 627 exp.TimeFromParts: rename_func("TIME"), 628 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 629 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 630 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 631 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 632 exp.TimeStrToTime: timestrtotime_sql, 633 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 634 exp.TsOrDsAdd: _ts_or_ds_add_sql, 635 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 636 exp.TsOrDsToTime: rename_func("TIME"), 637 exp.Unhex: rename_func("FROM_HEX"), 638 exp.UnixDate: rename_func("UNIX_DATE"), 639 exp.UnixToTime: _unix_to_time_sql, 640 exp.Values: _derived_table_values_to_unnest, 641 exp.VariancePop: rename_func("VAR_POP"), 642 } 643 644 SUPPORTED_JSON_PATH_PARTS = { 645 exp.JSONPathKey, 646 exp.JSONPathRoot, 647 exp.JSONPathSubscript, 648 } 649 650 TYPE_MAPPING = { 651 **generator.Generator.TYPE_MAPPING, 652 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 653 exp.DataType.Type.BIGINT: "INT64", 654 exp.DataType.Type.BINARY: "BYTES", 655 exp.DataType.Type.BOOLEAN: "BOOL", 656 exp.DataType.Type.CHAR: "STRING", 657 exp.DataType.Type.DECIMAL: "NUMERIC", 658 exp.DataType.Type.DOUBLE: "FLOAT64", 659 exp.DataType.Type.FLOAT: "FLOAT64", 660 exp.DataType.Type.INT: "INT64", 661 exp.DataType.Type.NCHAR: "STRING", 662 exp.DataType.Type.NVARCHAR: "STRING", 663 exp.DataType.Type.SMALLINT: "INT64", 664 exp.DataType.Type.TEXT: "STRING", 665 exp.DataType.Type.TIMESTAMP: "DATETIME", 666 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 667 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 668 exp.DataType.Type.TINYINT: "INT64", 669 exp.DataType.Type.VARBINARY: "BYTES", 670 exp.DataType.Type.VARCHAR: "STRING", 671 exp.DataType.Type.VARIANT: "ANY TYPE", 672 } 673 674 PROPERTIES_LOCATION = { 675 **generator.Generator.PROPERTIES_LOCATION, 676 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 677 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 678 } 679 680 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 681 RESERVED_KEYWORDS = { 682 *generator.Generator.RESERVED_KEYWORDS, 683 "all", 684 "and", 685 "any", 686 "array", 687 "as", 688 "asc", 689 "assert_rows_modified", 690 "at", 691 "between", 692 "by", 693 "case", 694 "cast", 695 "collate", 696 "contains", 697 "create", 698 "cross", 699 "cube", 700 "current", 701 "default", 702 "define", 703 "desc", 704 "distinct", 705 "else", 706 "end", 707 "enum", 708 "escape", 709 "except", 710 "exclude", 711 "exists", 712 "extract", 713 "false", 714 "fetch", 715 "following", 716 "for", 717 "from", 718 "full", 719 "group", 720 "grouping", 721 "groups", 722 "hash", 723 "having", 724 "if", 725 "ignore", 726 "in", 727 "inner", 728 "intersect", 729 "interval", 730 "into", 731 "is", 732 "join", 733 "lateral", 734 "left", 735 "like", 736 "limit", 737 "lookup", 738 "merge", 739 "natural", 740 "new", 741 "no", 742 "not", 743 "null", 744 "nulls", 745 "of", 746 "on", 747 "or", 748 "order", 749 "outer", 750 "over", 751 "partition", 752 "preceding", 753 "proto", 754 "qualify", 755 "range", 756 "recursive", 757 "respect", 758 "right", 759 "rollup", 760 "rows", 761 "select", 762 "set", 763 "some", 764 "struct", 765 "tablesample", 766 "then", 767 "to", 768 "treat", 769 "true", 770 "unbounded", 771 "union", 772 "unnest", 773 "using", 774 "when", 775 "where", 776 "window", 777 "with", 778 "within", 779 } 780 781 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 782 if isinstance(expression.this, exp.TsOrDsToDate): 783 this: exp.Expression = expression.this 784 else: 785 this = expression 786 787 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 788 789 def struct_sql(self, expression: exp.Struct) -> str: 790 args = [] 791 for expr in expression.expressions: 792 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 793 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 794 else: 795 arg = self.sql(expr) 796 797 args.append(arg) 798 799 return self.func("STRUCT", *args) 800 801 def eq_sql(self, expression: exp.EQ) -> str: 802 # Operands of = cannot be NULL in BigQuery 803 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 804 if not isinstance(expression.parent, exp.Update): 805 return "NULL" 806 807 return self.binary(expression, "=") 808 809 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 810 parent = expression.parent 811 812 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 813 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 814 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 815 return self.func( 816 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 817 ) 818 819 return super().attimezone_sql(expression) 820 821 def trycast_sql(self, expression: exp.TryCast) -> str: 822 return self.cast_sql(expression, safe_prefix="SAFE_") 823 824 def cte_sql(self, expression: exp.CTE) -> str: 825 if expression.alias_column_names: 826 self.unsupported("Column names in CTE definition are not supported.") 827 return super().cte_sql(expression) 828 829 def array_sql(self, expression: exp.Array) -> str: 830 first_arg = seq_get(expression.expressions, 0) 831 if isinstance(first_arg, exp.Subqueryable): 832 return f"ARRAY{self.wrap(self.sql(first_arg))}" 833 834 return inline_array_sql(self, expression) 835 836 def bracket_sql(self, expression: exp.Bracket) -> str: 837 this = self.sql(expression, "this") 838 expressions = expression.expressions 839 840 if len(expressions) == 1: 841 arg = expressions[0] 842 if arg.type is None: 843 from sqlglot.optimizer.annotate_types import annotate_types 844 845 arg = annotate_types(arg) 846 847 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 848 # BQ doesn't support bracket syntax with string values 849 return f"{this}.{arg.name}" 850 851 expressions_sql = ", ".join(self.sql(e) for e in expressions) 852 offset = expression.args.get("offset") 853 854 if offset == 0: 855 expressions_sql = f"OFFSET({expressions_sql})" 856 elif offset == 1: 857 expressions_sql = f"ORDINAL({expressions_sql})" 858 elif offset is not None: 859 self.unsupported(f"Unsupported array offset: {offset}") 860 861 if expression.args.get("safe"): 862 expressions_sql = f"SAFE_{expressions_sql}" 863 864 return f"{this}[{expressions_sql}]" 865 866 def transaction_sql(self, *_) -> str: 867 return "BEGIN TRANSACTION" 868 869 def commit_sql(self, *_) -> str: 870 return "COMMIT TRANSACTION" 871 872 def rollback_sql(self, *_) -> str: 873 return "ROLLBACK TRANSACTION" 874 875 def in_unnest_op(self, expression: exp.Unnest) -> str: 876 return self.sql(expression) 877 878 def except_op(self, expression: exp.Except) -> str: 879 if not expression.args.get("distinct", False): 880 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 881 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 882 883 def intersect_op(self, expression: exp.Intersect) -> str: 884 if not expression.args.get("distinct", False): 885 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 886 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 887 888 def with_properties(self, properties: exp.Properties) -> str: 889 return self.properties(properties, prefix=self.seg("OPTIONS")) 890 891 def version_sql(self, expression: exp.Version) -> str: 892 if expression.name == "TIMESTAMP": 893 expression.set("this", "SYSTEM_TIME") 894 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
789 def struct_sql(self, expression: exp.Struct) -> str: 790 args = [] 791 for expr in expression.expressions: 792 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 793 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 794 else: 795 arg = self.sql(expr) 796 797 args.append(arg) 798 799 return self.func("STRUCT", *args)
809 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 810 parent = expression.parent 811 812 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 813 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 814 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 815 return self.func( 816 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 817 ) 818 819 return super().attimezone_sql(expression)
836 def bracket_sql(self, expression: exp.Bracket) -> str: 837 this = self.sql(expression, "this") 838 expressions = expression.expressions 839 840 if len(expressions) == 1: 841 arg = expressions[0] 842 if arg.type is None: 843 from sqlglot.optimizer.annotate_types import annotate_types 844 845 arg = annotate_types(arg) 846 847 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 848 # BQ doesn't support bracket syntax with string values 849 return f"{this}.{arg.name}" 850 851 expressions_sql = ", ".join(self.sql(e) for e in expressions) 852 offset = expression.args.get("offset") 853 854 if offset == 0: 855 expressions_sql = f"OFFSET({expressions_sql})" 856 elif offset == 1: 857 expressions_sql = f"ORDINAL({expressions_sql})" 858 elif offset is not None: 859 self.unsupported(f"Unsupported array offset: {offset}") 860 861 if expression.args.get("safe"): 862 expressions_sql = f"SAFE_{expressions_sql}" 863 864 return f"{this}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- KEY_VALUE_DEFINITIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql