Supports BigQuery Standard SQL.
1"""Supports BigQuery Standard SQL.""" 2 3from __future__ import annotations 4 5import re 6import typing as t 7 8from sqlglot import exp, generator, parser, tokens, transforms 9from sqlglot.dialects.dialect import ( 10 Dialect, 11 datestrtodate_sql, 12 inline_array_sql, 13 max_or_greatest, 14 min_or_least, 15 no_ilike_sql, 16 parse_date_delta_with_interval, 17 rename_func, 18 timestrtotime_sql, 19 ts_or_ds_to_date_sql, 20) 21from sqlglot.helper import seq_get 22from sqlglot.tokens import TokenType 23 24E = t.TypeVar("E", bound=exp.Expression) 25 26 27def _date_add_sql( 28 data_type: str, kind: str 29) -> t.Callable[[generator.Generator, exp.Expression], str]: 30 def func(self, expression): 31 this = self.sql(expression, "this") 32 unit = expression.args.get("unit") 33 unit = exp.var(unit.name.upper() if unit else "DAY") 34 interval = exp.Interval(this=expression.expression, unit=unit) 35 return f"{data_type}_{kind}({this}, {self.sql(interval)})" 36 37 return func 38 39 40def _derived_table_values_to_unnest(self: generator.Generator, expression: exp.Values) -> str: 41 if not isinstance(expression.unnest().parent, exp.From): 42 return self.values_sql(expression) 43 44 structs = [ 45 exp.Struct( 46 expressions=[ 47 exp.alias_(value, column_name) 48 for value, column_name in zip( 49 t.expressions, expression.args["alias"].args["columns"] 50 ) 51 ] 52 ) 53 for t in expression.find_all(exp.Tuple) 54 ] 55 56 return self.unnest_sql(exp.Unnest(expressions=[exp.Array(expressions=structs)])) 57 58 59def _returnsproperty_sql(self: generator.Generator, expression: exp.ReturnsProperty) -> str: 60 this = expression.this 61 if isinstance(this, exp.Schema): 62 this = f"{this.this} <{self.expressions(this)}>" 63 else: 64 this = self.sql(this) 65 return f"RETURNS {this}" 66 67 68def _create_sql(self: generator.Generator, expression: exp.Create) -> str: 69 kind = expression.args["kind"] 70 returns = expression.find(exp.ReturnsProperty) 71 if kind.upper() == "FUNCTION" and returns and returns.args.get("is_table"): 72 expression = expression.copy() 73 expression.set("kind", "TABLE FUNCTION") 74 if isinstance( 75 expression.expression, 76 ( 77 exp.Subquery, 78 exp.Literal, 79 ), 80 ): 81 expression.set("expression", expression.expression.this) 82 83 return self.create_sql(expression) 84 85 return self.create_sql(expression) 86 87 88def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: 89 """Remove references to unnest table aliases since bigquery doesn't allow them. 90 91 These are added by the optimizer's qualify_column step. 92 """ 93 if isinstance(expression, exp.Select): 94 unnests = { 95 unnest.alias 96 for unnest in expression.args.get("from", exp.From(expressions=[])).expressions 97 if isinstance(unnest, exp.Unnest) and unnest.alias 98 } 99 100 if unnests: 101 expression = expression.copy() 102 103 for select in expression.expressions: 104 for column in select.find_all(exp.Column): 105 if column.table in unnests: 106 column.set("table", None) 107 108 return expression 109 110 111class BigQuery(Dialect): 112 unnest_column_only = True 113 time_mapping = { 114 "%M": "%-M", 115 "%d": "%-d", 116 "%m": "%-m", 117 "%y": "%-y", 118 "%H": "%-H", 119 "%I": "%-I", 120 "%S": "%-S", 121 "%j": "%-j", 122 } 123 124 class Tokenizer(tokens.Tokenizer): 125 QUOTES = [ 126 (prefix + quote, quote) if prefix else quote 127 for quote in ["'", '"', '"""', "'''"] 128 for prefix in ["", "r", "R"] 129 ] 130 COMMENTS = ["--", "#", ("/*", "*/")] 131 IDENTIFIERS = ["`"] 132 STRING_ESCAPES = ["\\"] 133 HEX_STRINGS = [("0x", ""), ("0X", "")] 134 135 KEYWORDS = { 136 **tokens.Tokenizer.KEYWORDS, 137 "ANY TYPE": TokenType.VARIANT, 138 "BEGIN": TokenType.COMMAND, 139 "BEGIN TRANSACTION": TokenType.BEGIN, 140 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 141 "DECLARE": TokenType.COMMAND, 142 "GEOGRAPHY": TokenType.GEOGRAPHY, 143 "FLOAT64": TokenType.DOUBLE, 144 "INT64": TokenType.BIGINT, 145 "BYTES": TokenType.BINARY, 146 "NOT DETERMINISTIC": TokenType.VOLATILE, 147 "UNKNOWN": TokenType.NULL, 148 } 149 KEYWORDS.pop("DIV") 150 151 class Parser(parser.Parser): 152 PREFIXED_PIVOT_COLUMNS = True 153 154 LOG_BASE_FIRST = False 155 LOG_DEFAULTS_TO_LN = True 156 157 FUNCTIONS = { 158 **parser.Parser.FUNCTIONS, # type: ignore 159 "DATE_TRUNC": lambda args: exp.DateTrunc( 160 unit=exp.Literal.string(seq_get(args, 1).name), # type: ignore 161 this=seq_get(args, 0), 162 ), 163 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 164 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 165 "DIV": lambda args: exp.IntDiv(this=seq_get(args, 0), expression=seq_get(args, 1)), 166 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 167 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 168 this=seq_get(args, 0), 169 expression=seq_get(args, 1), 170 position=seq_get(args, 2), 171 occurrence=seq_get(args, 3), 172 group=exp.Literal.number(1) 173 if re.compile(str(seq_get(args, 1))).groups == 1 174 else None, 175 ), 176 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 177 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 178 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 179 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 180 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 181 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 182 "PARSE_TIMESTAMP": lambda args: exp.StrToTime( 183 this=seq_get(args, 1), format=seq_get(args, 0) 184 ), 185 } 186 187 FUNCTION_PARSERS = { 188 **parser.Parser.FUNCTION_PARSERS, # type: ignore 189 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 190 } 191 FUNCTION_PARSERS.pop("TRIM") 192 193 NO_PAREN_FUNCTIONS = { 194 **parser.Parser.NO_PAREN_FUNCTIONS, # type: ignore 195 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 196 } 197 198 NESTED_TYPE_TOKENS = { 199 *parser.Parser.NESTED_TYPE_TOKENS, # type: ignore 200 TokenType.TABLE, 201 } 202 203 ID_VAR_TOKENS = { 204 *parser.Parser.ID_VAR_TOKENS, # type: ignore 205 TokenType.VALUES, 206 } 207 208 PROPERTY_PARSERS = { 209 **parser.Parser.PROPERTY_PARSERS, # type: ignore 210 "NOT DETERMINISTIC": lambda self: self.expression( 211 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 212 ), 213 "OPTIONS": lambda self: self._parse_with_property(), 214 } 215 216 CONSTRAINT_PARSERS = { 217 **parser.Parser.CONSTRAINT_PARSERS, # type: ignore 218 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 219 } 220 221 class Generator(generator.Generator): 222 EXPLICIT_UNION = True 223 INTERVAL_ALLOWS_PLURAL_FORM = False 224 JOIN_HINTS = False 225 TABLE_HINTS = False 226 LIMIT_FETCH = "LIMIT" 227 228 TRANSFORMS = { 229 **generator.Generator.TRANSFORMS, # type: ignore 230 exp.ArraySize: rename_func("ARRAY_LENGTH"), 231 exp.AtTimeZone: lambda self, e: self.func( 232 "TIMESTAMP", self.func("DATETIME", e.this, e.args.get("zone")) 233 ), 234 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 235 exp.DateAdd: _date_add_sql("DATE", "ADD"), 236 exp.DateSub: _date_add_sql("DATE", "SUB"), 237 exp.DatetimeAdd: _date_add_sql("DATETIME", "ADD"), 238 exp.DatetimeSub: _date_add_sql("DATETIME", "SUB"), 239 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 240 exp.DateStrToDate: datestrtodate_sql, 241 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 242 exp.GroupConcat: rename_func("STRING_AGG"), 243 exp.ILike: no_ilike_sql, 244 exp.IntDiv: rename_func("DIV"), 245 exp.Max: max_or_greatest, 246 exp.Min: min_or_least, 247 exp.Select: transforms.preprocess( 248 [_unqualify_unnest, transforms.eliminate_distinct_on] 249 ), 250 exp.StrToTime: lambda self, e: f"PARSE_TIMESTAMP({self.format_time(e)}, {self.sql(e, 'this')})", 251 exp.TimeAdd: _date_add_sql("TIME", "ADD"), 252 exp.TimeSub: _date_add_sql("TIME", "SUB"), 253 exp.TimestampAdd: _date_add_sql("TIMESTAMP", "ADD"), 254 exp.TimestampSub: _date_add_sql("TIMESTAMP", "SUB"), 255 exp.TimeStrToTime: timestrtotime_sql, 256 exp.TsOrDsToDate: ts_or_ds_to_date_sql("bigquery"), 257 exp.TsOrDsAdd: _date_add_sql("DATE", "ADD"), 258 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 259 exp.VariancePop: rename_func("VAR_POP"), 260 exp.Values: _derived_table_values_to_unnest, 261 exp.ReturnsProperty: _returnsproperty_sql, 262 exp.Create: _create_sql, 263 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 264 exp.StabilityProperty: lambda self, e: f"DETERMINISTIC" 265 if e.name == "IMMUTABLE" 266 else "NOT DETERMINISTIC", 267 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 268 } 269 270 TYPE_MAPPING = { 271 **generator.Generator.TYPE_MAPPING, # type: ignore 272 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 273 exp.DataType.Type.BIGINT: "INT64", 274 exp.DataType.Type.BINARY: "BYTES", 275 exp.DataType.Type.BOOLEAN: "BOOL", 276 exp.DataType.Type.CHAR: "STRING", 277 exp.DataType.Type.DECIMAL: "NUMERIC", 278 exp.DataType.Type.DOUBLE: "FLOAT64", 279 exp.DataType.Type.FLOAT: "FLOAT64", 280 exp.DataType.Type.INT: "INT64", 281 exp.DataType.Type.NCHAR: "STRING", 282 exp.DataType.Type.NVARCHAR: "STRING", 283 exp.DataType.Type.SMALLINT: "INT64", 284 exp.DataType.Type.TEXT: "STRING", 285 exp.DataType.Type.TIMESTAMP: "DATETIME", 286 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 287 exp.DataType.Type.TINYINT: "INT64", 288 exp.DataType.Type.VARBINARY: "BYTES", 289 exp.DataType.Type.VARCHAR: "STRING", 290 exp.DataType.Type.VARIANT: "ANY TYPE", 291 } 292 293 PROPERTIES_LOCATION = { 294 **generator.Generator.PROPERTIES_LOCATION, # type: ignore 295 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 296 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 297 } 298 299 def array_sql(self, expression: exp.Array) -> str: 300 first_arg = seq_get(expression.expressions, 0) 301 if isinstance(first_arg, exp.Subqueryable): 302 return f"ARRAY{self.wrap(self.sql(first_arg))}" 303 304 return inline_array_sql(self, expression) 305 306 def transaction_sql(self, *_) -> str: 307 return "BEGIN TRANSACTION" 308 309 def commit_sql(self, *_) -> str: 310 return "COMMIT TRANSACTION" 311 312 def rollback_sql(self, *_) -> str: 313 return "ROLLBACK TRANSACTION" 314 315 def in_unnest_op(self, expression: exp.Unnest) -> str: 316 return self.sql(expression) 317 318 def except_op(self, expression: exp.Except) -> str: 319 if not expression.args.get("distinct", False): 320 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 321 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 322 323 def intersect_op(self, expression: exp.Intersect) -> str: 324 if not expression.args.get("distinct", False): 325 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 326 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 327 328 def with_properties(self, properties: exp.Properties) -> str: 329 return self.properties(properties, prefix=self.seg("OPTIONS"))
112class BigQuery(Dialect): 113 unnest_column_only = True 114 time_mapping = { 115 "%M": "%-M", 116 "%d": "%-d", 117 "%m": "%-m", 118 "%y": "%-y", 119 "%H": "%-H", 120 "%I": "%-I", 121 "%S": "%-S", 122 "%j": "%-j", 123 } 124 125 class Tokenizer(tokens.Tokenizer): 126 QUOTES = [ 127 (prefix + quote, quote) if prefix else quote 128 for quote in ["'", '"', '"""', "'''"] 129 for prefix in ["", "r", "R"] 130 ] 131 COMMENTS = ["--", "#", ("/*", "*/")] 132 IDENTIFIERS = ["`"] 133 STRING_ESCAPES = ["\\"] 134 HEX_STRINGS = [("0x", ""), ("0X", "")] 135 136 KEYWORDS = { 137 **tokens.Tokenizer.KEYWORDS, 138 "ANY TYPE": TokenType.VARIANT, 139 "BEGIN": TokenType.COMMAND, 140 "BEGIN TRANSACTION": TokenType.BEGIN, 141 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 142 "DECLARE": TokenType.COMMAND, 143 "GEOGRAPHY": TokenType.GEOGRAPHY, 144 "FLOAT64": TokenType.DOUBLE, 145 "INT64": TokenType.BIGINT, 146 "BYTES": TokenType.BINARY, 147 "NOT DETERMINISTIC": TokenType.VOLATILE, 148 "UNKNOWN": TokenType.NULL, 149 } 150 KEYWORDS.pop("DIV") 151 152 class Parser(parser.Parser): 153 PREFIXED_PIVOT_COLUMNS = True 154 155 LOG_BASE_FIRST = False 156 LOG_DEFAULTS_TO_LN = True 157 158 FUNCTIONS = { 159 **parser.Parser.FUNCTIONS, # type: ignore 160 "DATE_TRUNC": lambda args: exp.DateTrunc( 161 unit=exp.Literal.string(seq_get(args, 1).name), # type: ignore 162 this=seq_get(args, 0), 163 ), 164 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 165 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 166 "DIV": lambda args: exp.IntDiv(this=seq_get(args, 0), expression=seq_get(args, 1)), 167 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 168 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 169 this=seq_get(args, 0), 170 expression=seq_get(args, 1), 171 position=seq_get(args, 2), 172 occurrence=seq_get(args, 3), 173 group=exp.Literal.number(1) 174 if re.compile(str(seq_get(args, 1))).groups == 1 175 else None, 176 ), 177 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 178 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 179 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 180 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 181 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 182 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 183 "PARSE_TIMESTAMP": lambda args: exp.StrToTime( 184 this=seq_get(args, 1), format=seq_get(args, 0) 185 ), 186 } 187 188 FUNCTION_PARSERS = { 189 **parser.Parser.FUNCTION_PARSERS, # type: ignore 190 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 191 } 192 FUNCTION_PARSERS.pop("TRIM") 193 194 NO_PAREN_FUNCTIONS = { 195 **parser.Parser.NO_PAREN_FUNCTIONS, # type: ignore 196 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 197 } 198 199 NESTED_TYPE_TOKENS = { 200 *parser.Parser.NESTED_TYPE_TOKENS, # type: ignore 201 TokenType.TABLE, 202 } 203 204 ID_VAR_TOKENS = { 205 *parser.Parser.ID_VAR_TOKENS, # type: ignore 206 TokenType.VALUES, 207 } 208 209 PROPERTY_PARSERS = { 210 **parser.Parser.PROPERTY_PARSERS, # type: ignore 211 "NOT DETERMINISTIC": lambda self: self.expression( 212 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 213 ), 214 "OPTIONS": lambda self: self._parse_with_property(), 215 } 216 217 CONSTRAINT_PARSERS = { 218 **parser.Parser.CONSTRAINT_PARSERS, # type: ignore 219 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 220 } 221 222 class Generator(generator.Generator): 223 EXPLICIT_UNION = True 224 INTERVAL_ALLOWS_PLURAL_FORM = False 225 JOIN_HINTS = False 226 TABLE_HINTS = False 227 LIMIT_FETCH = "LIMIT" 228 229 TRANSFORMS = { 230 **generator.Generator.TRANSFORMS, # type: ignore 231 exp.ArraySize: rename_func("ARRAY_LENGTH"), 232 exp.AtTimeZone: lambda self, e: self.func( 233 "TIMESTAMP", self.func("DATETIME", e.this, e.args.get("zone")) 234 ), 235 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 236 exp.DateAdd: _date_add_sql("DATE", "ADD"), 237 exp.DateSub: _date_add_sql("DATE", "SUB"), 238 exp.DatetimeAdd: _date_add_sql("DATETIME", "ADD"), 239 exp.DatetimeSub: _date_add_sql("DATETIME", "SUB"), 240 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 241 exp.DateStrToDate: datestrtodate_sql, 242 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 243 exp.GroupConcat: rename_func("STRING_AGG"), 244 exp.ILike: no_ilike_sql, 245 exp.IntDiv: rename_func("DIV"), 246 exp.Max: max_or_greatest, 247 exp.Min: min_or_least, 248 exp.Select: transforms.preprocess( 249 [_unqualify_unnest, transforms.eliminate_distinct_on] 250 ), 251 exp.StrToTime: lambda self, e: f"PARSE_TIMESTAMP({self.format_time(e)}, {self.sql(e, 'this')})", 252 exp.TimeAdd: _date_add_sql("TIME", "ADD"), 253 exp.TimeSub: _date_add_sql("TIME", "SUB"), 254 exp.TimestampAdd: _date_add_sql("TIMESTAMP", "ADD"), 255 exp.TimestampSub: _date_add_sql("TIMESTAMP", "SUB"), 256 exp.TimeStrToTime: timestrtotime_sql, 257 exp.TsOrDsToDate: ts_or_ds_to_date_sql("bigquery"), 258 exp.TsOrDsAdd: _date_add_sql("DATE", "ADD"), 259 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 260 exp.VariancePop: rename_func("VAR_POP"), 261 exp.Values: _derived_table_values_to_unnest, 262 exp.ReturnsProperty: _returnsproperty_sql, 263 exp.Create: _create_sql, 264 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 265 exp.StabilityProperty: lambda self, e: f"DETERMINISTIC" 266 if e.name == "IMMUTABLE" 267 else "NOT DETERMINISTIC", 268 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 269 } 270 271 TYPE_MAPPING = { 272 **generator.Generator.TYPE_MAPPING, # type: ignore 273 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 274 exp.DataType.Type.BIGINT: "INT64", 275 exp.DataType.Type.BINARY: "BYTES", 276 exp.DataType.Type.BOOLEAN: "BOOL", 277 exp.DataType.Type.CHAR: "STRING", 278 exp.DataType.Type.DECIMAL: "NUMERIC", 279 exp.DataType.Type.DOUBLE: "FLOAT64", 280 exp.DataType.Type.FLOAT: "FLOAT64", 281 exp.DataType.Type.INT: "INT64", 282 exp.DataType.Type.NCHAR: "STRING", 283 exp.DataType.Type.NVARCHAR: "STRING", 284 exp.DataType.Type.SMALLINT: "INT64", 285 exp.DataType.Type.TEXT: "STRING", 286 exp.DataType.Type.TIMESTAMP: "DATETIME", 287 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 288 exp.DataType.Type.TINYINT: "INT64", 289 exp.DataType.Type.VARBINARY: "BYTES", 290 exp.DataType.Type.VARCHAR: "STRING", 291 exp.DataType.Type.VARIANT: "ANY TYPE", 292 } 293 294 PROPERTIES_LOCATION = { 295 **generator.Generator.PROPERTIES_LOCATION, # type: ignore 296 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 297 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 298 } 299 300 def array_sql(self, expression: exp.Array) -> str: 301 first_arg = seq_get(expression.expressions, 0) 302 if isinstance(first_arg, exp.Subqueryable): 303 return f"ARRAY{self.wrap(self.sql(first_arg))}" 304 305 return inline_array_sql(self, expression) 306 307 def transaction_sql(self, *_) -> str: 308 return "BEGIN TRANSACTION" 309 310 def commit_sql(self, *_) -> str: 311 return "COMMIT TRANSACTION" 312 313 def rollback_sql(self, *_) -> str: 314 return "ROLLBACK TRANSACTION" 315 316 def in_unnest_op(self, expression: exp.Unnest) -> str: 317 return self.sql(expression) 318 319 def except_op(self, expression: exp.Except) -> str: 320 if not expression.args.get("distinct", False): 321 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 322 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 323 324 def intersect_op(self, expression: exp.Intersect) -> str: 325 if not expression.args.get("distinct", False): 326 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 327 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 328 329 def with_properties(self, properties: exp.Properties) -> str: 330 return self.properties(properties, prefix=self.seg("OPTIONS"))
125 class Tokenizer(tokens.Tokenizer): 126 QUOTES = [ 127 (prefix + quote, quote) if prefix else quote 128 for quote in ["'", '"', '"""', "'''"] 129 for prefix in ["", "r", "R"] 130 ] 131 COMMENTS = ["--", "#", ("/*", "*/")] 132 IDENTIFIERS = ["`"] 133 STRING_ESCAPES = ["\\"] 134 HEX_STRINGS = [("0x", ""), ("0X", "")] 135 136 KEYWORDS = { 137 **tokens.Tokenizer.KEYWORDS, 138 "ANY TYPE": TokenType.VARIANT, 139 "BEGIN": TokenType.COMMAND, 140 "BEGIN TRANSACTION": TokenType.BEGIN, 141 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 142 "DECLARE": TokenType.COMMAND, 143 "GEOGRAPHY": TokenType.GEOGRAPHY, 144 "FLOAT64": TokenType.DOUBLE, 145 "INT64": TokenType.BIGINT, 146 "BYTES": TokenType.BINARY, 147 "NOT DETERMINISTIC": TokenType.VOLATILE, 148 "UNKNOWN": TokenType.NULL, 149 } 150 KEYWORDS.pop("DIV")
Inherited Members
152 class Parser(parser.Parser): 153 PREFIXED_PIVOT_COLUMNS = True 154 155 LOG_BASE_FIRST = False 156 LOG_DEFAULTS_TO_LN = True 157 158 FUNCTIONS = { 159 **parser.Parser.FUNCTIONS, # type: ignore 160 "DATE_TRUNC": lambda args: exp.DateTrunc( 161 unit=exp.Literal.string(seq_get(args, 1).name), # type: ignore 162 this=seq_get(args, 0), 163 ), 164 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 165 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 166 "DIV": lambda args: exp.IntDiv(this=seq_get(args, 0), expression=seq_get(args, 1)), 167 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 168 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 169 this=seq_get(args, 0), 170 expression=seq_get(args, 1), 171 position=seq_get(args, 2), 172 occurrence=seq_get(args, 3), 173 group=exp.Literal.number(1) 174 if re.compile(str(seq_get(args, 1))).groups == 1 175 else None, 176 ), 177 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 178 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 179 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 180 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 181 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 182 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 183 "PARSE_TIMESTAMP": lambda args: exp.StrToTime( 184 this=seq_get(args, 1), format=seq_get(args, 0) 185 ), 186 } 187 188 FUNCTION_PARSERS = { 189 **parser.Parser.FUNCTION_PARSERS, # type: ignore 190 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 191 } 192 FUNCTION_PARSERS.pop("TRIM") 193 194 NO_PAREN_FUNCTIONS = { 195 **parser.Parser.NO_PAREN_FUNCTIONS, # type: ignore 196 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 197 } 198 199 NESTED_TYPE_TOKENS = { 200 *parser.Parser.NESTED_TYPE_TOKENS, # type: ignore 201 TokenType.TABLE, 202 } 203 204 ID_VAR_TOKENS = { 205 *parser.Parser.ID_VAR_TOKENS, # type: ignore 206 TokenType.VALUES, 207 } 208 209 PROPERTY_PARSERS = { 210 **parser.Parser.PROPERTY_PARSERS, # type: ignore 211 "NOT DETERMINISTIC": lambda self: self.expression( 212 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 213 ), 214 "OPTIONS": lambda self: self._parse_with_property(), 215 } 216 217 CONSTRAINT_PARSERS = { 218 **parser.Parser.CONSTRAINT_PARSERS, # type: ignore 219 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 220 }
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
Inherited Members
222 class Generator(generator.Generator): 223 EXPLICIT_UNION = True 224 INTERVAL_ALLOWS_PLURAL_FORM = False 225 JOIN_HINTS = False 226 TABLE_HINTS = False 227 LIMIT_FETCH = "LIMIT" 228 229 TRANSFORMS = { 230 **generator.Generator.TRANSFORMS, # type: ignore 231 exp.ArraySize: rename_func("ARRAY_LENGTH"), 232 exp.AtTimeZone: lambda self, e: self.func( 233 "TIMESTAMP", self.func("DATETIME", e.this, e.args.get("zone")) 234 ), 235 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 236 exp.DateAdd: _date_add_sql("DATE", "ADD"), 237 exp.DateSub: _date_add_sql("DATE", "SUB"), 238 exp.DatetimeAdd: _date_add_sql("DATETIME", "ADD"), 239 exp.DatetimeSub: _date_add_sql("DATETIME", "SUB"), 240 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 241 exp.DateStrToDate: datestrtodate_sql, 242 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 243 exp.GroupConcat: rename_func("STRING_AGG"), 244 exp.ILike: no_ilike_sql, 245 exp.IntDiv: rename_func("DIV"), 246 exp.Max: max_or_greatest, 247 exp.Min: min_or_least, 248 exp.Select: transforms.preprocess( 249 [_unqualify_unnest, transforms.eliminate_distinct_on] 250 ), 251 exp.StrToTime: lambda self, e: f"PARSE_TIMESTAMP({self.format_time(e)}, {self.sql(e, 'this')})", 252 exp.TimeAdd: _date_add_sql("TIME", "ADD"), 253 exp.TimeSub: _date_add_sql("TIME", "SUB"), 254 exp.TimestampAdd: _date_add_sql("TIMESTAMP", "ADD"), 255 exp.TimestampSub: _date_add_sql("TIMESTAMP", "SUB"), 256 exp.TimeStrToTime: timestrtotime_sql, 257 exp.TsOrDsToDate: ts_or_ds_to_date_sql("bigquery"), 258 exp.TsOrDsAdd: _date_add_sql("DATE", "ADD"), 259 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 260 exp.VariancePop: rename_func("VAR_POP"), 261 exp.Values: _derived_table_values_to_unnest, 262 exp.ReturnsProperty: _returnsproperty_sql, 263 exp.Create: _create_sql, 264 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 265 exp.StabilityProperty: lambda self, e: f"DETERMINISTIC" 266 if e.name == "IMMUTABLE" 267 else "NOT DETERMINISTIC", 268 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 269 } 270 271 TYPE_MAPPING = { 272 **generator.Generator.TYPE_MAPPING, # type: ignore 273 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 274 exp.DataType.Type.BIGINT: "INT64", 275 exp.DataType.Type.BINARY: "BYTES", 276 exp.DataType.Type.BOOLEAN: "BOOL", 277 exp.DataType.Type.CHAR: "STRING", 278 exp.DataType.Type.DECIMAL: "NUMERIC", 279 exp.DataType.Type.DOUBLE: "FLOAT64", 280 exp.DataType.Type.FLOAT: "FLOAT64", 281 exp.DataType.Type.INT: "INT64", 282 exp.DataType.Type.NCHAR: "STRING", 283 exp.DataType.Type.NVARCHAR: "STRING", 284 exp.DataType.Type.SMALLINT: "INT64", 285 exp.DataType.Type.TEXT: "STRING", 286 exp.DataType.Type.TIMESTAMP: "DATETIME", 287 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 288 exp.DataType.Type.TINYINT: "INT64", 289 exp.DataType.Type.VARBINARY: "BYTES", 290 exp.DataType.Type.VARCHAR: "STRING", 291 exp.DataType.Type.VARIANT: "ANY TYPE", 292 } 293 294 PROPERTIES_LOCATION = { 295 **generator.Generator.PROPERTIES_LOCATION, # type: ignore 296 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 297 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 298 } 299 300 def array_sql(self, expression: exp.Array) -> str: 301 first_arg = seq_get(expression.expressions, 0) 302 if isinstance(first_arg, exp.Subqueryable): 303 return f"ARRAY{self.wrap(self.sql(first_arg))}" 304 305 return inline_array_sql(self, expression) 306 307 def transaction_sql(self, *_) -> str: 308 return "BEGIN TRANSACTION" 309 310 def commit_sql(self, *_) -> str: 311 return "COMMIT TRANSACTION" 312 313 def rollback_sql(self, *_) -> str: 314 return "ROLLBACK TRANSACTION" 315 316 def in_unnest_op(self, expression: exp.Unnest) -> str: 317 return self.sql(expression) 318 319 def except_op(self, expression: exp.Except) -> str: 320 if not expression.args.get("distinct", False): 321 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 322 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 323 324 def intersect_op(self, expression: exp.Intersect) -> str: 325 if not expression.args.get("distinct", False): 326 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 327 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 328 329 def with_properties(self, properties: exp.Properties) -> str: 330 return self.properties(properties, prefix=self.seg("OPTIONS"))
Generator interprets the given syntax tree and produces a SQL string as an output.
Arguments:
- time_mapping (dict): the dictionary of custom time mappings in which the key represents a python time format and the output the target time format
- time_trie (trie): a trie of the time_mapping keys
- pretty (bool): if set to True the returned string will be formatted. Default: False.
- quote_start (str): specifies which starting character to use to delimit quotes. Default: '.
- quote_end (str): specifies which ending character to use to delimit quotes. Default: '.
- identifier_start (str): specifies which starting character to use to delimit identifiers. Default: ".
- identifier_end (str): specifies which ending character to use to delimit identifiers. Default: ".
- identify (bool | str): 'always': always quote, 'safe': quote identifiers if they don't contain an upcase, True defaults to always.
- normalize (bool): if set to True all identifiers will lower cased
- string_escape (str): specifies a string escape character. Default: '.
- identifier_escape (str): specifies an identifier escape character. Default: ".
- pad (int): determines padding in a formatted string. Default: 2.
- indent (int): determines the size of indentation in a formatted string. Default: 4.
- unnest_column_only (bool): if true unnest table aliases are considered only as column aliases
- normalize_functions (str): normalize function names, "upper", "lower", or None Default: "upper"
- alias_post_tablesample (bool): if the table alias comes after tablesample Default: False
- unsupported_level (ErrorLevel): determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- null_ordering (str): Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
- max_unsupported (int): Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma (bool): if the the comma is leading or trailing in select statements Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
Inherited Members
- sqlglot.generator.Generator
- Generator
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- afterjournalproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- pseudotype_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- select_sql
- schema_sql
- star_sql
- structkwarg_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- concat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- unique_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonobject_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql