sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import re 4import typing as t 5 6from sqlglot import exp, generator, parser, tokens, transforms 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 datestrtodate_sql, 10 inline_array_sql, 11 max_or_greatest, 12 min_or_least, 13 no_ilike_sql, 14 parse_date_delta_with_interval, 15 rename_func, 16 timestrtotime_sql, 17 ts_or_ds_to_date_sql, 18) 19from sqlglot.helper import seq_get, split_num_words 20from sqlglot.tokens import TokenType 21 22 23def _date_add_sql( 24 data_type: str, kind: str 25) -> t.Callable[[generator.Generator, exp.Expression], str]: 26 def func(self, expression): 27 this = self.sql(expression, "this") 28 unit = expression.args.get("unit") 29 unit = exp.var(unit.name.upper() if unit else "DAY") 30 interval = exp.Interval(this=expression.expression, unit=unit) 31 return f"{data_type}_{kind}({this}, {self.sql(interval)})" 32 33 return func 34 35 36def _derived_table_values_to_unnest(self: generator.Generator, expression: exp.Values) -> str: 37 if not isinstance(expression.unnest().parent, exp.From): 38 return self.values_sql(expression) 39 40 alias = expression.args.get("alias") 41 42 structs = [ 43 exp.Struct( 44 expressions=[ 45 exp.alias_(value, column_name) 46 for value, column_name in zip( 47 t.expressions, 48 alias.columns 49 if alias and alias.columns 50 else (f"_c{i}" for i in range(len(t.expressions))), 51 ) 52 ] 53 ) 54 for t in expression.find_all(exp.Tuple) 55 ] 56 57 return self.unnest_sql(exp.Unnest(expressions=[exp.Array(expressions=structs)])) 58 59 60def _returnsproperty_sql(self: generator.Generator, expression: exp.ReturnsProperty) -> str: 61 this = expression.this 62 if isinstance(this, exp.Schema): 63 this = f"{this.this} <{self.expressions(this)}>" 64 else: 65 this = self.sql(this) 66 return f"RETURNS {this}" 67 68 69def _create_sql(self: generator.Generator, expression: exp.Create) -> str: 70 kind = expression.args["kind"] 71 returns = expression.find(exp.ReturnsProperty) 72 if kind.upper() == "FUNCTION" and returns and returns.args.get("is_table"): 73 expression = expression.copy() 74 expression.set("kind", "TABLE FUNCTION") 75 if isinstance( 76 expression.expression, 77 ( 78 exp.Subquery, 79 exp.Literal, 80 ), 81 ): 82 expression.set("expression", expression.expression.this) 83 84 return self.create_sql(expression) 85 86 return self.create_sql(expression) 87 88 89def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: 90 """Remove references to unnest table aliases since bigquery doesn't allow them. 91 92 These are added by the optimizer's qualify_column step. 93 """ 94 if isinstance(expression, exp.Select): 95 for unnest in expression.find_all(exp.Unnest): 96 if isinstance(unnest.parent, (exp.From, exp.Join)) and unnest.alias: 97 for select in expression.selects: 98 for column in select.find_all(exp.Column): 99 if column.table == unnest.alias: 100 column.set("table", None) 101 102 return expression 103 104 105class BigQuery(Dialect): 106 unnest_column_only = True 107 time_mapping = { 108 "%M": "%-M", 109 "%d": "%-d", 110 "%m": "%-m", 111 "%y": "%-y", 112 "%H": "%-H", 113 "%I": "%-I", 114 "%S": "%-S", 115 "%j": "%-j", 116 } 117 118 class Tokenizer(tokens.Tokenizer): 119 QUOTES = ["'", '"', '"""', "'''"] 120 COMMENTS = ["--", "#", ("/*", "*/")] 121 IDENTIFIERS = ["`"] 122 STRING_ESCAPES = ["\\"] 123 124 HEX_STRINGS = [("0x", ""), ("0X", "")] 125 126 BYTE_STRINGS = [ 127 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 128 ] 129 130 RAW_STRINGS = [ 131 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 132 ] 133 134 KEYWORDS = { 135 **tokens.Tokenizer.KEYWORDS, 136 "ANY TYPE": TokenType.VARIANT, 137 "BEGIN": TokenType.COMMAND, 138 "BEGIN TRANSACTION": TokenType.BEGIN, 139 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 140 "DECLARE": TokenType.COMMAND, 141 "FLOAT64": TokenType.DOUBLE, 142 "INT64": TokenType.BIGINT, 143 "BYTES": TokenType.BINARY, 144 "NOT DETERMINISTIC": TokenType.VOLATILE, 145 "UNKNOWN": TokenType.NULL, 146 } 147 KEYWORDS.pop("DIV") 148 149 class Parser(parser.Parser): 150 PREFIXED_PIVOT_COLUMNS = True 151 152 LOG_BASE_FIRST = False 153 LOG_DEFAULTS_TO_LN = True 154 155 FUNCTIONS = { 156 **parser.Parser.FUNCTIONS, 157 "DATE_TRUNC": lambda args: exp.DateTrunc( 158 unit=exp.Literal.string(str(seq_get(args, 1))), 159 this=seq_get(args, 0), 160 ), 161 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 162 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 163 "DIV": lambda args: exp.IntDiv(this=seq_get(args, 0), expression=seq_get(args, 1)), 164 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 165 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 166 this=seq_get(args, 0), 167 expression=seq_get(args, 1), 168 position=seq_get(args, 2), 169 occurrence=seq_get(args, 3), 170 group=exp.Literal.number(1) 171 if re.compile(str(seq_get(args, 1))).groups == 1 172 else None, 173 ), 174 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 175 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 176 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 177 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 178 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 179 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 180 "PARSE_TIMESTAMP": lambda args: exp.StrToTime( 181 this=seq_get(args, 1), format=seq_get(args, 0) 182 ), 183 } 184 185 FUNCTION_PARSERS = { 186 **parser.Parser.FUNCTION_PARSERS, 187 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 188 } 189 FUNCTION_PARSERS.pop("TRIM") 190 191 NO_PAREN_FUNCTIONS = { 192 **parser.Parser.NO_PAREN_FUNCTIONS, 193 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 194 } 195 196 NESTED_TYPE_TOKENS = { 197 *parser.Parser.NESTED_TYPE_TOKENS, 198 TokenType.TABLE, 199 } 200 201 ID_VAR_TOKENS = { 202 *parser.Parser.ID_VAR_TOKENS, 203 TokenType.VALUES, 204 } 205 206 PROPERTY_PARSERS = { 207 **parser.Parser.PROPERTY_PARSERS, 208 "NOT DETERMINISTIC": lambda self: self.expression( 209 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 210 ), 211 "OPTIONS": lambda self: self._parse_with_property(), 212 } 213 214 CONSTRAINT_PARSERS = { 215 **parser.Parser.CONSTRAINT_PARSERS, 216 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 217 } 218 219 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 220 this = super()._parse_table_part(schema=schema) 221 222 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 223 if isinstance(this, exp.Identifier): 224 table_name = this.name 225 while self._match(TokenType.DASH, advance=False) and self._next: 226 self._advance(2) 227 table_name += f"-{self._prev.text}" 228 229 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 230 231 return this 232 233 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 234 table = super()._parse_table_parts(schema=schema) 235 if isinstance(table.this, exp.Identifier) and "." in table.name: 236 catalog, db, this, *rest = ( 237 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 238 for x in split_num_words(table.name, ".", 3) 239 ) 240 241 if rest and this: 242 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 243 244 table = exp.Table(this=this, db=db, catalog=catalog) 245 246 return table 247 248 class Generator(generator.Generator): 249 EXPLICIT_UNION = True 250 INTERVAL_ALLOWS_PLURAL_FORM = False 251 JOIN_HINTS = False 252 TABLE_HINTS = False 253 LIMIT_FETCH = "LIMIT" 254 RENAME_TABLE_WITH_DB = False 255 256 TRANSFORMS = { 257 **generator.Generator.TRANSFORMS, 258 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 259 exp.ArraySize: rename_func("ARRAY_LENGTH"), 260 exp.AtTimeZone: lambda self, e: self.func( 261 "TIMESTAMP", self.func("DATETIME", e.this, e.args.get("zone")) 262 ), 263 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 264 exp.DateAdd: _date_add_sql("DATE", "ADD"), 265 exp.DateSub: _date_add_sql("DATE", "SUB"), 266 exp.DatetimeAdd: _date_add_sql("DATETIME", "ADD"), 267 exp.DatetimeSub: _date_add_sql("DATETIME", "SUB"), 268 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 269 exp.DateStrToDate: datestrtodate_sql, 270 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 271 exp.GroupConcat: rename_func("STRING_AGG"), 272 exp.ILike: no_ilike_sql, 273 exp.IntDiv: rename_func("DIV"), 274 exp.Max: max_or_greatest, 275 exp.Min: min_or_least, 276 exp.Select: transforms.preprocess( 277 [_unqualify_unnest, transforms.eliminate_distinct_on] 278 ), 279 exp.StrToTime: lambda self, e: f"PARSE_TIMESTAMP({self.format_time(e)}, {self.sql(e, 'this')})", 280 exp.TimeAdd: _date_add_sql("TIME", "ADD"), 281 exp.TimeSub: _date_add_sql("TIME", "SUB"), 282 exp.TimestampAdd: _date_add_sql("TIMESTAMP", "ADD"), 283 exp.TimestampSub: _date_add_sql("TIMESTAMP", "SUB"), 284 exp.TimeStrToTime: timestrtotime_sql, 285 exp.TryCast: lambda self, e: f"SAFE_CAST({self.sql(e, 'this')} AS {self.sql(e, 'to')})", 286 exp.TsOrDsToDate: ts_or_ds_to_date_sql("bigquery"), 287 exp.TsOrDsAdd: _date_add_sql("DATE", "ADD"), 288 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 289 exp.VariancePop: rename_func("VAR_POP"), 290 exp.Values: _derived_table_values_to_unnest, 291 exp.ReturnsProperty: _returnsproperty_sql, 292 exp.Create: _create_sql, 293 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 294 exp.StabilityProperty: lambda self, e: f"DETERMINISTIC" 295 if e.name == "IMMUTABLE" 296 else "NOT DETERMINISTIC", 297 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 298 } 299 300 TYPE_MAPPING = { 301 **generator.Generator.TYPE_MAPPING, 302 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 303 exp.DataType.Type.BIGINT: "INT64", 304 exp.DataType.Type.BINARY: "BYTES", 305 exp.DataType.Type.BOOLEAN: "BOOL", 306 exp.DataType.Type.CHAR: "STRING", 307 exp.DataType.Type.DECIMAL: "NUMERIC", 308 exp.DataType.Type.DOUBLE: "FLOAT64", 309 exp.DataType.Type.FLOAT: "FLOAT64", 310 exp.DataType.Type.INT: "INT64", 311 exp.DataType.Type.NCHAR: "STRING", 312 exp.DataType.Type.NVARCHAR: "STRING", 313 exp.DataType.Type.SMALLINT: "INT64", 314 exp.DataType.Type.TEXT: "STRING", 315 exp.DataType.Type.TIMESTAMP: "DATETIME", 316 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 317 exp.DataType.Type.TINYINT: "INT64", 318 exp.DataType.Type.VARBINARY: "BYTES", 319 exp.DataType.Type.VARCHAR: "STRING", 320 exp.DataType.Type.VARIANT: "ANY TYPE", 321 } 322 323 PROPERTIES_LOCATION = { 324 **generator.Generator.PROPERTIES_LOCATION, 325 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 326 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 327 } 328 329 def array_sql(self, expression: exp.Array) -> str: 330 first_arg = seq_get(expression.expressions, 0) 331 if isinstance(first_arg, exp.Subqueryable): 332 return f"ARRAY{self.wrap(self.sql(first_arg))}" 333 334 return inline_array_sql(self, expression) 335 336 def transaction_sql(self, *_) -> str: 337 return "BEGIN TRANSACTION" 338 339 def commit_sql(self, *_) -> str: 340 return "COMMIT TRANSACTION" 341 342 def rollback_sql(self, *_) -> str: 343 return "ROLLBACK TRANSACTION" 344 345 def in_unnest_op(self, expression: exp.Unnest) -> str: 346 return self.sql(expression) 347 348 def except_op(self, expression: exp.Except) -> str: 349 if not expression.args.get("distinct", False): 350 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 351 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 352 353 def intersect_op(self, expression: exp.Intersect) -> str: 354 if not expression.args.get("distinct", False): 355 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 356 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 357 358 def with_properties(self, properties: exp.Properties) -> str: 359 return self.properties(properties, prefix=self.seg("OPTIONS"))
106class BigQuery(Dialect): 107 unnest_column_only = True 108 time_mapping = { 109 "%M": "%-M", 110 "%d": "%-d", 111 "%m": "%-m", 112 "%y": "%-y", 113 "%H": "%-H", 114 "%I": "%-I", 115 "%S": "%-S", 116 "%j": "%-j", 117 } 118 119 class Tokenizer(tokens.Tokenizer): 120 QUOTES = ["'", '"', '"""', "'''"] 121 COMMENTS = ["--", "#", ("/*", "*/")] 122 IDENTIFIERS = ["`"] 123 STRING_ESCAPES = ["\\"] 124 125 HEX_STRINGS = [("0x", ""), ("0X", "")] 126 127 BYTE_STRINGS = [ 128 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 129 ] 130 131 RAW_STRINGS = [ 132 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 133 ] 134 135 KEYWORDS = { 136 **tokens.Tokenizer.KEYWORDS, 137 "ANY TYPE": TokenType.VARIANT, 138 "BEGIN": TokenType.COMMAND, 139 "BEGIN TRANSACTION": TokenType.BEGIN, 140 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 141 "DECLARE": TokenType.COMMAND, 142 "FLOAT64": TokenType.DOUBLE, 143 "INT64": TokenType.BIGINT, 144 "BYTES": TokenType.BINARY, 145 "NOT DETERMINISTIC": TokenType.VOLATILE, 146 "UNKNOWN": TokenType.NULL, 147 } 148 KEYWORDS.pop("DIV") 149 150 class Parser(parser.Parser): 151 PREFIXED_PIVOT_COLUMNS = True 152 153 LOG_BASE_FIRST = False 154 LOG_DEFAULTS_TO_LN = True 155 156 FUNCTIONS = { 157 **parser.Parser.FUNCTIONS, 158 "DATE_TRUNC": lambda args: exp.DateTrunc( 159 unit=exp.Literal.string(str(seq_get(args, 1))), 160 this=seq_get(args, 0), 161 ), 162 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 163 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 164 "DIV": lambda args: exp.IntDiv(this=seq_get(args, 0), expression=seq_get(args, 1)), 165 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 166 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 167 this=seq_get(args, 0), 168 expression=seq_get(args, 1), 169 position=seq_get(args, 2), 170 occurrence=seq_get(args, 3), 171 group=exp.Literal.number(1) 172 if re.compile(str(seq_get(args, 1))).groups == 1 173 else None, 174 ), 175 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 176 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 177 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 178 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 179 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 180 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 181 "PARSE_TIMESTAMP": lambda args: exp.StrToTime( 182 this=seq_get(args, 1), format=seq_get(args, 0) 183 ), 184 } 185 186 FUNCTION_PARSERS = { 187 **parser.Parser.FUNCTION_PARSERS, 188 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 189 } 190 FUNCTION_PARSERS.pop("TRIM") 191 192 NO_PAREN_FUNCTIONS = { 193 **parser.Parser.NO_PAREN_FUNCTIONS, 194 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 195 } 196 197 NESTED_TYPE_TOKENS = { 198 *parser.Parser.NESTED_TYPE_TOKENS, 199 TokenType.TABLE, 200 } 201 202 ID_VAR_TOKENS = { 203 *parser.Parser.ID_VAR_TOKENS, 204 TokenType.VALUES, 205 } 206 207 PROPERTY_PARSERS = { 208 **parser.Parser.PROPERTY_PARSERS, 209 "NOT DETERMINISTIC": lambda self: self.expression( 210 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 211 ), 212 "OPTIONS": lambda self: self._parse_with_property(), 213 } 214 215 CONSTRAINT_PARSERS = { 216 **parser.Parser.CONSTRAINT_PARSERS, 217 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 218 } 219 220 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 221 this = super()._parse_table_part(schema=schema) 222 223 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 224 if isinstance(this, exp.Identifier): 225 table_name = this.name 226 while self._match(TokenType.DASH, advance=False) and self._next: 227 self._advance(2) 228 table_name += f"-{self._prev.text}" 229 230 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 231 232 return this 233 234 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 235 table = super()._parse_table_parts(schema=schema) 236 if isinstance(table.this, exp.Identifier) and "." in table.name: 237 catalog, db, this, *rest = ( 238 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 239 for x in split_num_words(table.name, ".", 3) 240 ) 241 242 if rest and this: 243 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 244 245 table = exp.Table(this=this, db=db, catalog=catalog) 246 247 return table 248 249 class Generator(generator.Generator): 250 EXPLICIT_UNION = True 251 INTERVAL_ALLOWS_PLURAL_FORM = False 252 JOIN_HINTS = False 253 TABLE_HINTS = False 254 LIMIT_FETCH = "LIMIT" 255 RENAME_TABLE_WITH_DB = False 256 257 TRANSFORMS = { 258 **generator.Generator.TRANSFORMS, 259 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 260 exp.ArraySize: rename_func("ARRAY_LENGTH"), 261 exp.AtTimeZone: lambda self, e: self.func( 262 "TIMESTAMP", self.func("DATETIME", e.this, e.args.get("zone")) 263 ), 264 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 265 exp.DateAdd: _date_add_sql("DATE", "ADD"), 266 exp.DateSub: _date_add_sql("DATE", "SUB"), 267 exp.DatetimeAdd: _date_add_sql("DATETIME", "ADD"), 268 exp.DatetimeSub: _date_add_sql("DATETIME", "SUB"), 269 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 270 exp.DateStrToDate: datestrtodate_sql, 271 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 272 exp.GroupConcat: rename_func("STRING_AGG"), 273 exp.ILike: no_ilike_sql, 274 exp.IntDiv: rename_func("DIV"), 275 exp.Max: max_or_greatest, 276 exp.Min: min_or_least, 277 exp.Select: transforms.preprocess( 278 [_unqualify_unnest, transforms.eliminate_distinct_on] 279 ), 280 exp.StrToTime: lambda self, e: f"PARSE_TIMESTAMP({self.format_time(e)}, {self.sql(e, 'this')})", 281 exp.TimeAdd: _date_add_sql("TIME", "ADD"), 282 exp.TimeSub: _date_add_sql("TIME", "SUB"), 283 exp.TimestampAdd: _date_add_sql("TIMESTAMP", "ADD"), 284 exp.TimestampSub: _date_add_sql("TIMESTAMP", "SUB"), 285 exp.TimeStrToTime: timestrtotime_sql, 286 exp.TryCast: lambda self, e: f"SAFE_CAST({self.sql(e, 'this')} AS {self.sql(e, 'to')})", 287 exp.TsOrDsToDate: ts_or_ds_to_date_sql("bigquery"), 288 exp.TsOrDsAdd: _date_add_sql("DATE", "ADD"), 289 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 290 exp.VariancePop: rename_func("VAR_POP"), 291 exp.Values: _derived_table_values_to_unnest, 292 exp.ReturnsProperty: _returnsproperty_sql, 293 exp.Create: _create_sql, 294 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 295 exp.StabilityProperty: lambda self, e: f"DETERMINISTIC" 296 if e.name == "IMMUTABLE" 297 else "NOT DETERMINISTIC", 298 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 299 } 300 301 TYPE_MAPPING = { 302 **generator.Generator.TYPE_MAPPING, 303 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 304 exp.DataType.Type.BIGINT: "INT64", 305 exp.DataType.Type.BINARY: "BYTES", 306 exp.DataType.Type.BOOLEAN: "BOOL", 307 exp.DataType.Type.CHAR: "STRING", 308 exp.DataType.Type.DECIMAL: "NUMERIC", 309 exp.DataType.Type.DOUBLE: "FLOAT64", 310 exp.DataType.Type.FLOAT: "FLOAT64", 311 exp.DataType.Type.INT: "INT64", 312 exp.DataType.Type.NCHAR: "STRING", 313 exp.DataType.Type.NVARCHAR: "STRING", 314 exp.DataType.Type.SMALLINT: "INT64", 315 exp.DataType.Type.TEXT: "STRING", 316 exp.DataType.Type.TIMESTAMP: "DATETIME", 317 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 318 exp.DataType.Type.TINYINT: "INT64", 319 exp.DataType.Type.VARBINARY: "BYTES", 320 exp.DataType.Type.VARCHAR: "STRING", 321 exp.DataType.Type.VARIANT: "ANY TYPE", 322 } 323 324 PROPERTIES_LOCATION = { 325 **generator.Generator.PROPERTIES_LOCATION, 326 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 327 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 328 } 329 330 def array_sql(self, expression: exp.Array) -> str: 331 first_arg = seq_get(expression.expressions, 0) 332 if isinstance(first_arg, exp.Subqueryable): 333 return f"ARRAY{self.wrap(self.sql(first_arg))}" 334 335 return inline_array_sql(self, expression) 336 337 def transaction_sql(self, *_) -> str: 338 return "BEGIN TRANSACTION" 339 340 def commit_sql(self, *_) -> str: 341 return "COMMIT TRANSACTION" 342 343 def rollback_sql(self, *_) -> str: 344 return "ROLLBACK TRANSACTION" 345 346 def in_unnest_op(self, expression: exp.Unnest) -> str: 347 return self.sql(expression) 348 349 def except_op(self, expression: exp.Except) -> str: 350 if not expression.args.get("distinct", False): 351 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 352 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 353 354 def intersect_op(self, expression: exp.Intersect) -> str: 355 if not expression.args.get("distinct", False): 356 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 357 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 358 359 def with_properties(self, properties: exp.Properties) -> str: 360 return self.properties(properties, prefix=self.seg("OPTIONS"))
119 class Tokenizer(tokens.Tokenizer): 120 QUOTES = ["'", '"', '"""', "'''"] 121 COMMENTS = ["--", "#", ("/*", "*/")] 122 IDENTIFIERS = ["`"] 123 STRING_ESCAPES = ["\\"] 124 125 HEX_STRINGS = [("0x", ""), ("0X", "")] 126 127 BYTE_STRINGS = [ 128 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 129 ] 130 131 RAW_STRINGS = [ 132 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 133 ] 134 135 KEYWORDS = { 136 **tokens.Tokenizer.KEYWORDS, 137 "ANY TYPE": TokenType.VARIANT, 138 "BEGIN": TokenType.COMMAND, 139 "BEGIN TRANSACTION": TokenType.BEGIN, 140 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 141 "DECLARE": TokenType.COMMAND, 142 "FLOAT64": TokenType.DOUBLE, 143 "INT64": TokenType.BIGINT, 144 "BYTES": TokenType.BINARY, 145 "NOT DETERMINISTIC": TokenType.VOLATILE, 146 "UNKNOWN": TokenType.NULL, 147 } 148 KEYWORDS.pop("DIV")
Inherited Members
150 class Parser(parser.Parser): 151 PREFIXED_PIVOT_COLUMNS = True 152 153 LOG_BASE_FIRST = False 154 LOG_DEFAULTS_TO_LN = True 155 156 FUNCTIONS = { 157 **parser.Parser.FUNCTIONS, 158 "DATE_TRUNC": lambda args: exp.DateTrunc( 159 unit=exp.Literal.string(str(seq_get(args, 1))), 160 this=seq_get(args, 0), 161 ), 162 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 163 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 164 "DIV": lambda args: exp.IntDiv(this=seq_get(args, 0), expression=seq_get(args, 1)), 165 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 166 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 167 this=seq_get(args, 0), 168 expression=seq_get(args, 1), 169 position=seq_get(args, 2), 170 occurrence=seq_get(args, 3), 171 group=exp.Literal.number(1) 172 if re.compile(str(seq_get(args, 1))).groups == 1 173 else None, 174 ), 175 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 176 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 177 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 178 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 179 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 180 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 181 "PARSE_TIMESTAMP": lambda args: exp.StrToTime( 182 this=seq_get(args, 1), format=seq_get(args, 0) 183 ), 184 } 185 186 FUNCTION_PARSERS = { 187 **parser.Parser.FUNCTION_PARSERS, 188 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 189 } 190 FUNCTION_PARSERS.pop("TRIM") 191 192 NO_PAREN_FUNCTIONS = { 193 **parser.Parser.NO_PAREN_FUNCTIONS, 194 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 195 } 196 197 NESTED_TYPE_TOKENS = { 198 *parser.Parser.NESTED_TYPE_TOKENS, 199 TokenType.TABLE, 200 } 201 202 ID_VAR_TOKENS = { 203 *parser.Parser.ID_VAR_TOKENS, 204 TokenType.VALUES, 205 } 206 207 PROPERTY_PARSERS = { 208 **parser.Parser.PROPERTY_PARSERS, 209 "NOT DETERMINISTIC": lambda self: self.expression( 210 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 211 ), 212 "OPTIONS": lambda self: self._parse_with_property(), 213 } 214 215 CONSTRAINT_PARSERS = { 216 **parser.Parser.CONSTRAINT_PARSERS, 217 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 218 } 219 220 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 221 this = super()._parse_table_part(schema=schema) 222 223 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 224 if isinstance(this, exp.Identifier): 225 table_name = this.name 226 while self._match(TokenType.DASH, advance=False) and self._next: 227 self._advance(2) 228 table_name += f"-{self._prev.text}" 229 230 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 231 232 return this 233 234 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 235 table = super()._parse_table_parts(schema=schema) 236 if isinstance(table.this, exp.Identifier) and "." in table.name: 237 catalog, db, this, *rest = ( 238 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 239 for x in split_num_words(table.name, ".", 3) 240 ) 241 242 if rest and this: 243 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 244 245 table = exp.Table(this=this, db=db, catalog=catalog) 246 247 return table
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
Inherited Members
249 class Generator(generator.Generator): 250 EXPLICIT_UNION = True 251 INTERVAL_ALLOWS_PLURAL_FORM = False 252 JOIN_HINTS = False 253 TABLE_HINTS = False 254 LIMIT_FETCH = "LIMIT" 255 RENAME_TABLE_WITH_DB = False 256 257 TRANSFORMS = { 258 **generator.Generator.TRANSFORMS, 259 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 260 exp.ArraySize: rename_func("ARRAY_LENGTH"), 261 exp.AtTimeZone: lambda self, e: self.func( 262 "TIMESTAMP", self.func("DATETIME", e.this, e.args.get("zone")) 263 ), 264 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 265 exp.DateAdd: _date_add_sql("DATE", "ADD"), 266 exp.DateSub: _date_add_sql("DATE", "SUB"), 267 exp.DatetimeAdd: _date_add_sql("DATETIME", "ADD"), 268 exp.DatetimeSub: _date_add_sql("DATETIME", "SUB"), 269 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 270 exp.DateStrToDate: datestrtodate_sql, 271 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 272 exp.GroupConcat: rename_func("STRING_AGG"), 273 exp.ILike: no_ilike_sql, 274 exp.IntDiv: rename_func("DIV"), 275 exp.Max: max_or_greatest, 276 exp.Min: min_or_least, 277 exp.Select: transforms.preprocess( 278 [_unqualify_unnest, transforms.eliminate_distinct_on] 279 ), 280 exp.StrToTime: lambda self, e: f"PARSE_TIMESTAMP({self.format_time(e)}, {self.sql(e, 'this')})", 281 exp.TimeAdd: _date_add_sql("TIME", "ADD"), 282 exp.TimeSub: _date_add_sql("TIME", "SUB"), 283 exp.TimestampAdd: _date_add_sql("TIMESTAMP", "ADD"), 284 exp.TimestampSub: _date_add_sql("TIMESTAMP", "SUB"), 285 exp.TimeStrToTime: timestrtotime_sql, 286 exp.TryCast: lambda self, e: f"SAFE_CAST({self.sql(e, 'this')} AS {self.sql(e, 'to')})", 287 exp.TsOrDsToDate: ts_or_ds_to_date_sql("bigquery"), 288 exp.TsOrDsAdd: _date_add_sql("DATE", "ADD"), 289 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 290 exp.VariancePop: rename_func("VAR_POP"), 291 exp.Values: _derived_table_values_to_unnest, 292 exp.ReturnsProperty: _returnsproperty_sql, 293 exp.Create: _create_sql, 294 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 295 exp.StabilityProperty: lambda self, e: f"DETERMINISTIC" 296 if e.name == "IMMUTABLE" 297 else "NOT DETERMINISTIC", 298 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 299 } 300 301 TYPE_MAPPING = { 302 **generator.Generator.TYPE_MAPPING, 303 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 304 exp.DataType.Type.BIGINT: "INT64", 305 exp.DataType.Type.BINARY: "BYTES", 306 exp.DataType.Type.BOOLEAN: "BOOL", 307 exp.DataType.Type.CHAR: "STRING", 308 exp.DataType.Type.DECIMAL: "NUMERIC", 309 exp.DataType.Type.DOUBLE: "FLOAT64", 310 exp.DataType.Type.FLOAT: "FLOAT64", 311 exp.DataType.Type.INT: "INT64", 312 exp.DataType.Type.NCHAR: "STRING", 313 exp.DataType.Type.NVARCHAR: "STRING", 314 exp.DataType.Type.SMALLINT: "INT64", 315 exp.DataType.Type.TEXT: "STRING", 316 exp.DataType.Type.TIMESTAMP: "DATETIME", 317 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 318 exp.DataType.Type.TINYINT: "INT64", 319 exp.DataType.Type.VARBINARY: "BYTES", 320 exp.DataType.Type.VARCHAR: "STRING", 321 exp.DataType.Type.VARIANT: "ANY TYPE", 322 } 323 324 PROPERTIES_LOCATION = { 325 **generator.Generator.PROPERTIES_LOCATION, 326 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 327 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 328 } 329 330 def array_sql(self, expression: exp.Array) -> str: 331 first_arg = seq_get(expression.expressions, 0) 332 if isinstance(first_arg, exp.Subqueryable): 333 return f"ARRAY{self.wrap(self.sql(first_arg))}" 334 335 return inline_array_sql(self, expression) 336 337 def transaction_sql(self, *_) -> str: 338 return "BEGIN TRANSACTION" 339 340 def commit_sql(self, *_) -> str: 341 return "COMMIT TRANSACTION" 342 343 def rollback_sql(self, *_) -> str: 344 return "ROLLBACK TRANSACTION" 345 346 def in_unnest_op(self, expression: exp.Unnest) -> str: 347 return self.sql(expression) 348 349 def except_op(self, expression: exp.Except) -> str: 350 if not expression.args.get("distinct", False): 351 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 352 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 353 354 def intersect_op(self, expression: exp.Intersect) -> str: 355 if not expression.args.get("distinct", False): 356 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 357 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 358 359 def with_properties(self, properties: exp.Properties) -> str: 360 return self.properties(properties, prefix=self.seg("OPTIONS"))
Generator interprets the given syntax tree and produces a SQL string as an output.
Arguments:
- time_mapping (dict): the dictionary of custom time mappings in which the key represents a python time format and the output the target time format
- time_trie (trie): a trie of the time_mapping keys
- pretty (bool): if set to True the returned string will be formatted. Default: False.
- quote_start (str): specifies which starting character to use to delimit quotes. Default: '.
- quote_end (str): specifies which ending character to use to delimit quotes. Default: '.
- identifier_start (str): specifies which starting character to use to delimit identifiers. Default: ".
- identifier_end (str): specifies which ending character to use to delimit identifiers. Default: ".
- bit_start (str): specifies which starting character to use to delimit bit literals. Default: None.
- bit_end (str): specifies which ending character to use to delimit bit literals. Default: None.
- hex_start (str): specifies which starting character to use to delimit hex literals. Default: None.
- hex_end (str): specifies which ending character to use to delimit hex literals. Default: None.
- byte_start (str): specifies which starting character to use to delimit byte literals. Default: None.
- byte_end (str): specifies which ending character to use to delimit byte literals. Default: None.
- raw_start (str): specifies which starting character to use to delimit raw literals. Default: None.
- raw_end (str): specifies which ending character to use to delimit raw literals. Default: None.
- identify (bool | str): 'always': always quote, 'safe': quote identifiers if they don't contain an upcase, True defaults to always.
- normalize (bool): if set to True all identifiers will lower cased
- string_escape (str): specifies a string escape character. Default: '.
- identifier_escape (str): specifies an identifier escape character. Default: ".
- pad (int): determines padding in a formatted string. Default: 2.
- indent (int): determines the size of indentation in a formatted string. Default: 4.
- unnest_column_only (bool): if true unnest table aliases are considered only as column aliases
- normalize_functions (str): normalize function names, "upper", "lower", or None Default: "upper"
- alias_post_tablesample (bool): if the table alias comes after tablesample Default: False
- unsupported_level (ErrorLevel): determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- null_ordering (str): Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
- max_unsupported (int): Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma (bool): if the the comma is leading or trailing in select statements Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
Inherited Members
- sqlglot.generator.Generator
- Generator
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- rawstring_sql
- datatypesize_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- pseudotype_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- concat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- unique_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonobject_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql