Supports BigQuery Standard SQL.
1"""Supports BigQuery Standard SQL.""" 2 3from __future__ import annotations 4 5import re 6import typing as t 7 8from sqlglot import exp, generator, parser, tokens, transforms 9from sqlglot.dialects.dialect import ( 10 Dialect, 11 datestrtodate_sql, 12 inline_array_sql, 13 no_ilike_sql, 14 rename_func, 15 timestrtotime_sql, 16 ts_or_ds_to_date_sql, 17) 18from sqlglot.helper import seq_get 19from sqlglot.tokens import TokenType 20 21E = t.TypeVar("E", bound=exp.Expression) 22 23 24def _date_add(expression_class: t.Type[E]) -> t.Callable[[t.Sequence], E]: 25 def func(args): 26 interval = seq_get(args, 1) 27 return expression_class( 28 this=seq_get(args, 0), 29 expression=interval.this, 30 unit=interval.args.get("unit"), 31 ) 32 33 return func 34 35 36def _date_add_sql( 37 data_type: str, kind: str 38) -> t.Callable[[generator.Generator, exp.Expression], str]: 39 def func(self, expression): 40 this = self.sql(expression, "this") 41 return f"{data_type}_{kind}({this}, {self.sql(exp.Interval(this=expression.expression, unit=expression.args.get('unit') or exp.Literal.string('day')))})" 42 43 return func 44 45 46def _derived_table_values_to_unnest(self: generator.Generator, expression: exp.Values) -> str: 47 if not isinstance(expression.unnest().parent, exp.From): 48 expression = t.cast(exp.Values, transforms.remove_precision_parameterized_types(expression)) 49 return self.values_sql(expression) 50 rows = [tuple_exp.expressions for tuple_exp in expression.find_all(exp.Tuple)] 51 structs = [] 52 for row in rows: 53 aliases = [ 54 exp.alias_(value, column_name) 55 for value, column_name in zip(row, expression.args["alias"].args["columns"]) 56 ] 57 structs.append(exp.Struct(expressions=aliases)) 58 unnest_exp = exp.Unnest(expressions=[exp.Array(expressions=structs)]) 59 return self.unnest_sql(unnest_exp) 60 61 62def _returnsproperty_sql(self: generator.Generator, expression: exp.ReturnsProperty) -> str: 63 this = expression.this 64 if isinstance(this, exp.Schema): 65 this = f"{this.this} <{self.expressions(this)}>" 66 else: 67 this = self.sql(this) 68 return f"RETURNS {this}" 69 70 71def _create_sql(self: generator.Generator, expression: exp.Create) -> str: 72 kind = expression.args["kind"] 73 returns = expression.find(exp.ReturnsProperty) 74 if kind.upper() == "FUNCTION" and returns and returns.args.get("is_table"): 75 expression = expression.copy() 76 expression.set("kind", "TABLE FUNCTION") 77 if isinstance( 78 expression.expression, 79 ( 80 exp.Subquery, 81 exp.Literal, 82 ), 83 ): 84 expression.set("expression", expression.expression.this) 85 86 return self.create_sql(expression) 87 88 return self.create_sql(expression) 89 90 91def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: 92 """Remove references to unnest table aliases since bigquery doesn't allow them. 93 94 These are added by the optimizer's qualify_column step. 95 """ 96 if isinstance(expression, exp.Select): 97 unnests = { 98 unnest.alias 99 for unnest in expression.args.get("from", exp.From(expressions=[])).expressions 100 if isinstance(unnest, exp.Unnest) and unnest.alias 101 } 102 103 if unnests: 104 expression = expression.copy() 105 106 for select in expression.expressions: 107 for column in select.find_all(exp.Column): 108 if column.table in unnests: 109 column.set("table", None) 110 111 return expression 112 113 114class BigQuery(Dialect): 115 unnest_column_only = True 116 time_mapping = { 117 "%M": "%-M", 118 "%d": "%-d", 119 "%m": "%-m", 120 "%y": "%-y", 121 "%H": "%-H", 122 "%I": "%-I", 123 "%S": "%-S", 124 "%j": "%-j", 125 } 126 127 class Tokenizer(tokens.Tokenizer): 128 QUOTES = [ 129 (prefix + quote, quote) if prefix else quote 130 for quote in ["'", '"', '"""', "'''"] 131 for prefix in ["", "r", "R"] 132 ] 133 COMMENTS = ["--", "#", ("/*", "*/")] 134 IDENTIFIERS = ["`"] 135 STRING_ESCAPES = ["\\"] 136 HEX_STRINGS = [("0x", ""), ("0X", "")] 137 138 KEYWORDS = { 139 **tokens.Tokenizer.KEYWORDS, 140 "BEGIN": TokenType.COMMAND, 141 "BEGIN TRANSACTION": TokenType.BEGIN, 142 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 143 "CURRENT_TIME": TokenType.CURRENT_TIME, 144 "DECLARE": TokenType.COMMAND, 145 "GEOGRAPHY": TokenType.GEOGRAPHY, 146 "FLOAT64": TokenType.DOUBLE, 147 "INT64": TokenType.BIGINT, 148 "NOT DETERMINISTIC": TokenType.VOLATILE, 149 "UNKNOWN": TokenType.NULL, 150 } 151 KEYWORDS.pop("DIV") 152 153 class Parser(parser.Parser): 154 FUNCTIONS = { 155 **parser.Parser.FUNCTIONS, # type: ignore 156 "DATE_TRUNC": lambda args: exp.DateTrunc( 157 unit=exp.Literal.string(seq_get(args, 1).name), # type: ignore 158 this=seq_get(args, 0), 159 ), 160 "DATE_ADD": _date_add(exp.DateAdd), 161 "DATETIME_ADD": _date_add(exp.DatetimeAdd), 162 "DIV": lambda args: exp.IntDiv(this=seq_get(args, 0), expression=seq_get(args, 1)), 163 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 164 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 165 this=seq_get(args, 0), 166 expression=seq_get(args, 1), 167 position=seq_get(args, 2), 168 occurrence=seq_get(args, 3), 169 group=exp.Literal.number(1) 170 if re.compile(str(seq_get(args, 1))).groups == 1 171 else None, 172 ), 173 "TIME_ADD": _date_add(exp.TimeAdd), 174 "TIMESTAMP_ADD": _date_add(exp.TimestampAdd), 175 "DATE_SUB": _date_add(exp.DateSub), 176 "DATETIME_SUB": _date_add(exp.DatetimeSub), 177 "TIME_SUB": _date_add(exp.TimeSub), 178 "TIMESTAMP_SUB": _date_add(exp.TimestampSub), 179 "PARSE_TIMESTAMP": lambda args: exp.StrToTime( 180 this=seq_get(args, 1), format=seq_get(args, 0) 181 ), 182 } 183 184 FUNCTION_PARSERS = { 185 **parser.Parser.FUNCTION_PARSERS, # type: ignore 186 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 187 } 188 FUNCTION_PARSERS.pop("TRIM") 189 190 NO_PAREN_FUNCTIONS = { 191 **parser.Parser.NO_PAREN_FUNCTIONS, # type: ignore 192 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 193 TokenType.CURRENT_TIME: exp.CurrentTime, 194 } 195 196 NESTED_TYPE_TOKENS = { 197 *parser.Parser.NESTED_TYPE_TOKENS, # type: ignore 198 TokenType.TABLE, 199 } 200 201 ID_VAR_TOKENS = { 202 *parser.Parser.ID_VAR_TOKENS, # type: ignore 203 TokenType.VALUES, 204 } 205 206 PROPERTY_PARSERS = { 207 **parser.Parser.PROPERTY_PARSERS, # type: ignore 208 "NOT DETERMINISTIC": lambda self: self.expression( 209 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") 210 ), 211 } 212 213 class Generator(generator.Generator): 214 TRANSFORMS = { 215 **generator.Generator.TRANSFORMS, # type: ignore 216 **transforms.REMOVE_PRECISION_PARAMETERIZED_TYPES, # type: ignore 217 exp.ArraySize: rename_func("ARRAY_LENGTH"), 218 exp.DateAdd: _date_add_sql("DATE", "ADD"), 219 exp.DateSub: _date_add_sql("DATE", "SUB"), 220 exp.DatetimeAdd: _date_add_sql("DATETIME", "ADD"), 221 exp.DatetimeSub: _date_add_sql("DATETIME", "SUB"), 222 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 223 exp.DateStrToDate: datestrtodate_sql, 224 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 225 exp.GroupConcat: rename_func("STRING_AGG"), 226 exp.ILike: no_ilike_sql, 227 exp.IntDiv: rename_func("DIV"), 228 exp.Select: transforms.preprocess( 229 [_unqualify_unnest], transforms.delegate("select_sql") 230 ), 231 exp.StrToTime: lambda self, e: f"PARSE_TIMESTAMP({self.format_time(e)}, {self.sql(e, 'this')})", 232 exp.TimeAdd: _date_add_sql("TIME", "ADD"), 233 exp.TimeSub: _date_add_sql("TIME", "SUB"), 234 exp.TimestampAdd: _date_add_sql("TIMESTAMP", "ADD"), 235 exp.TimestampSub: _date_add_sql("TIMESTAMP", "SUB"), 236 exp.TimeStrToTime: timestrtotime_sql, 237 exp.TsOrDsToDate: ts_or_ds_to_date_sql("bigquery"), 238 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 239 exp.VariancePop: rename_func("VAR_POP"), 240 exp.Values: _derived_table_values_to_unnest, 241 exp.ReturnsProperty: _returnsproperty_sql, 242 exp.Create: _create_sql, 243 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 244 exp.VolatilityProperty: lambda self, e: f"DETERMINISTIC" 245 if e.name == "IMMUTABLE" 246 else "NOT DETERMINISTIC", 247 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 248 } 249 250 TYPE_MAPPING = { 251 **generator.Generator.TYPE_MAPPING, # type: ignore 252 exp.DataType.Type.TINYINT: "INT64", 253 exp.DataType.Type.SMALLINT: "INT64", 254 exp.DataType.Type.INT: "INT64", 255 exp.DataType.Type.BIGINT: "INT64", 256 exp.DataType.Type.DECIMAL: "NUMERIC", 257 exp.DataType.Type.FLOAT: "FLOAT64", 258 exp.DataType.Type.DOUBLE: "FLOAT64", 259 exp.DataType.Type.BOOLEAN: "BOOL", 260 exp.DataType.Type.TEXT: "STRING", 261 exp.DataType.Type.VARCHAR: "STRING", 262 exp.DataType.Type.NVARCHAR: "STRING", 263 } 264 PROPERTIES_LOCATION = { 265 **generator.Generator.PROPERTIES_LOCATION, # type: ignore 266 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 267 } 268 269 EXPLICIT_UNION = True 270 271 def array_sql(self, expression: exp.Array) -> str: 272 first_arg = seq_get(expression.expressions, 0) 273 if isinstance(first_arg, exp.Subqueryable): 274 return f"ARRAY{self.wrap(self.sql(first_arg))}" 275 276 return inline_array_sql(self, expression) 277 278 def transaction_sql(self, *_) -> str: 279 return "BEGIN TRANSACTION" 280 281 def commit_sql(self, *_) -> str: 282 return "COMMIT TRANSACTION" 283 284 def rollback_sql(self, *_) -> str: 285 return "ROLLBACK TRANSACTION" 286 287 def in_unnest_op(self, expression: exp.Unnest) -> str: 288 return self.sql(expression) 289 290 def except_op(self, expression: exp.Except) -> str: 291 if not expression.args.get("distinct", False): 292 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 293 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 294 295 def intersect_op(self, expression: exp.Intersect) -> str: 296 if not expression.args.get("distinct", False): 297 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 298 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}"
115class BigQuery(Dialect): 116 unnest_column_only = True 117 time_mapping = { 118 "%M": "%-M", 119 "%d": "%-d", 120 "%m": "%-m", 121 "%y": "%-y", 122 "%H": "%-H", 123 "%I": "%-I", 124 "%S": "%-S", 125 "%j": "%-j", 126 } 127 128 class Tokenizer(tokens.Tokenizer): 129 QUOTES = [ 130 (prefix + quote, quote) if prefix else quote 131 for quote in ["'", '"', '"""', "'''"] 132 for prefix in ["", "r", "R"] 133 ] 134 COMMENTS = ["--", "#", ("/*", "*/")] 135 IDENTIFIERS = ["`"] 136 STRING_ESCAPES = ["\\"] 137 HEX_STRINGS = [("0x", ""), ("0X", "")] 138 139 KEYWORDS = { 140 **tokens.Tokenizer.KEYWORDS, 141 "BEGIN": TokenType.COMMAND, 142 "BEGIN TRANSACTION": TokenType.BEGIN, 143 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 144 "CURRENT_TIME": TokenType.CURRENT_TIME, 145 "DECLARE": TokenType.COMMAND, 146 "GEOGRAPHY": TokenType.GEOGRAPHY, 147 "FLOAT64": TokenType.DOUBLE, 148 "INT64": TokenType.BIGINT, 149 "NOT DETERMINISTIC": TokenType.VOLATILE, 150 "UNKNOWN": TokenType.NULL, 151 } 152 KEYWORDS.pop("DIV") 153 154 class Parser(parser.Parser): 155 FUNCTIONS = { 156 **parser.Parser.FUNCTIONS, # type: ignore 157 "DATE_TRUNC": lambda args: exp.DateTrunc( 158 unit=exp.Literal.string(seq_get(args, 1).name), # type: ignore 159 this=seq_get(args, 0), 160 ), 161 "DATE_ADD": _date_add(exp.DateAdd), 162 "DATETIME_ADD": _date_add(exp.DatetimeAdd), 163 "DIV": lambda args: exp.IntDiv(this=seq_get(args, 0), expression=seq_get(args, 1)), 164 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 165 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 166 this=seq_get(args, 0), 167 expression=seq_get(args, 1), 168 position=seq_get(args, 2), 169 occurrence=seq_get(args, 3), 170 group=exp.Literal.number(1) 171 if re.compile(str(seq_get(args, 1))).groups == 1 172 else None, 173 ), 174 "TIME_ADD": _date_add(exp.TimeAdd), 175 "TIMESTAMP_ADD": _date_add(exp.TimestampAdd), 176 "DATE_SUB": _date_add(exp.DateSub), 177 "DATETIME_SUB": _date_add(exp.DatetimeSub), 178 "TIME_SUB": _date_add(exp.TimeSub), 179 "TIMESTAMP_SUB": _date_add(exp.TimestampSub), 180 "PARSE_TIMESTAMP": lambda args: exp.StrToTime( 181 this=seq_get(args, 1), format=seq_get(args, 0) 182 ), 183 } 184 185 FUNCTION_PARSERS = { 186 **parser.Parser.FUNCTION_PARSERS, # type: ignore 187 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 188 } 189 FUNCTION_PARSERS.pop("TRIM") 190 191 NO_PAREN_FUNCTIONS = { 192 **parser.Parser.NO_PAREN_FUNCTIONS, # type: ignore 193 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 194 TokenType.CURRENT_TIME: exp.CurrentTime, 195 } 196 197 NESTED_TYPE_TOKENS = { 198 *parser.Parser.NESTED_TYPE_TOKENS, # type: ignore 199 TokenType.TABLE, 200 } 201 202 ID_VAR_TOKENS = { 203 *parser.Parser.ID_VAR_TOKENS, # type: ignore 204 TokenType.VALUES, 205 } 206 207 PROPERTY_PARSERS = { 208 **parser.Parser.PROPERTY_PARSERS, # type: ignore 209 "NOT DETERMINISTIC": lambda self: self.expression( 210 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") 211 ), 212 } 213 214 class Generator(generator.Generator): 215 TRANSFORMS = { 216 **generator.Generator.TRANSFORMS, # type: ignore 217 **transforms.REMOVE_PRECISION_PARAMETERIZED_TYPES, # type: ignore 218 exp.ArraySize: rename_func("ARRAY_LENGTH"), 219 exp.DateAdd: _date_add_sql("DATE", "ADD"), 220 exp.DateSub: _date_add_sql("DATE", "SUB"), 221 exp.DatetimeAdd: _date_add_sql("DATETIME", "ADD"), 222 exp.DatetimeSub: _date_add_sql("DATETIME", "SUB"), 223 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 224 exp.DateStrToDate: datestrtodate_sql, 225 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 226 exp.GroupConcat: rename_func("STRING_AGG"), 227 exp.ILike: no_ilike_sql, 228 exp.IntDiv: rename_func("DIV"), 229 exp.Select: transforms.preprocess( 230 [_unqualify_unnest], transforms.delegate("select_sql") 231 ), 232 exp.StrToTime: lambda self, e: f"PARSE_TIMESTAMP({self.format_time(e)}, {self.sql(e, 'this')})", 233 exp.TimeAdd: _date_add_sql("TIME", "ADD"), 234 exp.TimeSub: _date_add_sql("TIME", "SUB"), 235 exp.TimestampAdd: _date_add_sql("TIMESTAMP", "ADD"), 236 exp.TimestampSub: _date_add_sql("TIMESTAMP", "SUB"), 237 exp.TimeStrToTime: timestrtotime_sql, 238 exp.TsOrDsToDate: ts_or_ds_to_date_sql("bigquery"), 239 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 240 exp.VariancePop: rename_func("VAR_POP"), 241 exp.Values: _derived_table_values_to_unnest, 242 exp.ReturnsProperty: _returnsproperty_sql, 243 exp.Create: _create_sql, 244 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 245 exp.VolatilityProperty: lambda self, e: f"DETERMINISTIC" 246 if e.name == "IMMUTABLE" 247 else "NOT DETERMINISTIC", 248 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 249 } 250 251 TYPE_MAPPING = { 252 **generator.Generator.TYPE_MAPPING, # type: ignore 253 exp.DataType.Type.TINYINT: "INT64", 254 exp.DataType.Type.SMALLINT: "INT64", 255 exp.DataType.Type.INT: "INT64", 256 exp.DataType.Type.BIGINT: "INT64", 257 exp.DataType.Type.DECIMAL: "NUMERIC", 258 exp.DataType.Type.FLOAT: "FLOAT64", 259 exp.DataType.Type.DOUBLE: "FLOAT64", 260 exp.DataType.Type.BOOLEAN: "BOOL", 261 exp.DataType.Type.TEXT: "STRING", 262 exp.DataType.Type.VARCHAR: "STRING", 263 exp.DataType.Type.NVARCHAR: "STRING", 264 } 265 PROPERTIES_LOCATION = { 266 **generator.Generator.PROPERTIES_LOCATION, # type: ignore 267 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 268 } 269 270 EXPLICIT_UNION = True 271 272 def array_sql(self, expression: exp.Array) -> str: 273 first_arg = seq_get(expression.expressions, 0) 274 if isinstance(first_arg, exp.Subqueryable): 275 return f"ARRAY{self.wrap(self.sql(first_arg))}" 276 277 return inline_array_sql(self, expression) 278 279 def transaction_sql(self, *_) -> str: 280 return "BEGIN TRANSACTION" 281 282 def commit_sql(self, *_) -> str: 283 return "COMMIT TRANSACTION" 284 285 def rollback_sql(self, *_) -> str: 286 return "ROLLBACK TRANSACTION" 287 288 def in_unnest_op(self, expression: exp.Unnest) -> str: 289 return self.sql(expression) 290 291 def except_op(self, expression: exp.Except) -> str: 292 if not expression.args.get("distinct", False): 293 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 294 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 295 296 def intersect_op(self, expression: exp.Intersect) -> str: 297 if not expression.args.get("distinct", False): 298 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 299 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}"
Inherited Members
128 class Tokenizer(tokens.Tokenizer): 129 QUOTES = [ 130 (prefix + quote, quote) if prefix else quote 131 for quote in ["'", '"', '"""', "'''"] 132 for prefix in ["", "r", "R"] 133 ] 134 COMMENTS = ["--", "#", ("/*", "*/")] 135 IDENTIFIERS = ["`"] 136 STRING_ESCAPES = ["\\"] 137 HEX_STRINGS = [("0x", ""), ("0X", "")] 138 139 KEYWORDS = { 140 **tokens.Tokenizer.KEYWORDS, 141 "BEGIN": TokenType.COMMAND, 142 "BEGIN TRANSACTION": TokenType.BEGIN, 143 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 144 "CURRENT_TIME": TokenType.CURRENT_TIME, 145 "DECLARE": TokenType.COMMAND, 146 "GEOGRAPHY": TokenType.GEOGRAPHY, 147 "FLOAT64": TokenType.DOUBLE, 148 "INT64": TokenType.BIGINT, 149 "NOT DETERMINISTIC": TokenType.VOLATILE, 150 "UNKNOWN": TokenType.NULL, 151 } 152 KEYWORDS.pop("DIV")
Inherited Members
154 class Parser(parser.Parser): 155 FUNCTIONS = { 156 **parser.Parser.FUNCTIONS, # type: ignore 157 "DATE_TRUNC": lambda args: exp.DateTrunc( 158 unit=exp.Literal.string(seq_get(args, 1).name), # type: ignore 159 this=seq_get(args, 0), 160 ), 161 "DATE_ADD": _date_add(exp.DateAdd), 162 "DATETIME_ADD": _date_add(exp.DatetimeAdd), 163 "DIV": lambda args: exp.IntDiv(this=seq_get(args, 0), expression=seq_get(args, 1)), 164 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 165 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 166 this=seq_get(args, 0), 167 expression=seq_get(args, 1), 168 position=seq_get(args, 2), 169 occurrence=seq_get(args, 3), 170 group=exp.Literal.number(1) 171 if re.compile(str(seq_get(args, 1))).groups == 1 172 else None, 173 ), 174 "TIME_ADD": _date_add(exp.TimeAdd), 175 "TIMESTAMP_ADD": _date_add(exp.TimestampAdd), 176 "DATE_SUB": _date_add(exp.DateSub), 177 "DATETIME_SUB": _date_add(exp.DatetimeSub), 178 "TIME_SUB": _date_add(exp.TimeSub), 179 "TIMESTAMP_SUB": _date_add(exp.TimestampSub), 180 "PARSE_TIMESTAMP": lambda args: exp.StrToTime( 181 this=seq_get(args, 1), format=seq_get(args, 0) 182 ), 183 } 184 185 FUNCTION_PARSERS = { 186 **parser.Parser.FUNCTION_PARSERS, # type: ignore 187 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 188 } 189 FUNCTION_PARSERS.pop("TRIM") 190 191 NO_PAREN_FUNCTIONS = { 192 **parser.Parser.NO_PAREN_FUNCTIONS, # type: ignore 193 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 194 TokenType.CURRENT_TIME: exp.CurrentTime, 195 } 196 197 NESTED_TYPE_TOKENS = { 198 *parser.Parser.NESTED_TYPE_TOKENS, # type: ignore 199 TokenType.TABLE, 200 } 201 202 ID_VAR_TOKENS = { 203 *parser.Parser.ID_VAR_TOKENS, # type: ignore 204 TokenType.VALUES, 205 } 206 207 PROPERTY_PARSERS = { 208 **parser.Parser.PROPERTY_PARSERS, # type: ignore 209 "NOT DETERMINISTIC": lambda self: self.expression( 210 exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") 211 ), 212 }
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
Inherited Members
214 class Generator(generator.Generator): 215 TRANSFORMS = { 216 **generator.Generator.TRANSFORMS, # type: ignore 217 **transforms.REMOVE_PRECISION_PARAMETERIZED_TYPES, # type: ignore 218 exp.ArraySize: rename_func("ARRAY_LENGTH"), 219 exp.DateAdd: _date_add_sql("DATE", "ADD"), 220 exp.DateSub: _date_add_sql("DATE", "SUB"), 221 exp.DatetimeAdd: _date_add_sql("DATETIME", "ADD"), 222 exp.DatetimeSub: _date_add_sql("DATETIME", "SUB"), 223 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 224 exp.DateStrToDate: datestrtodate_sql, 225 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 226 exp.GroupConcat: rename_func("STRING_AGG"), 227 exp.ILike: no_ilike_sql, 228 exp.IntDiv: rename_func("DIV"), 229 exp.Select: transforms.preprocess( 230 [_unqualify_unnest], transforms.delegate("select_sql") 231 ), 232 exp.StrToTime: lambda self, e: f"PARSE_TIMESTAMP({self.format_time(e)}, {self.sql(e, 'this')})", 233 exp.TimeAdd: _date_add_sql("TIME", "ADD"), 234 exp.TimeSub: _date_add_sql("TIME", "SUB"), 235 exp.TimestampAdd: _date_add_sql("TIMESTAMP", "ADD"), 236 exp.TimestampSub: _date_add_sql("TIMESTAMP", "SUB"), 237 exp.TimeStrToTime: timestrtotime_sql, 238 exp.TsOrDsToDate: ts_or_ds_to_date_sql("bigquery"), 239 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 240 exp.VariancePop: rename_func("VAR_POP"), 241 exp.Values: _derived_table_values_to_unnest, 242 exp.ReturnsProperty: _returnsproperty_sql, 243 exp.Create: _create_sql, 244 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 245 exp.VolatilityProperty: lambda self, e: f"DETERMINISTIC" 246 if e.name == "IMMUTABLE" 247 else "NOT DETERMINISTIC", 248 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 249 } 250 251 TYPE_MAPPING = { 252 **generator.Generator.TYPE_MAPPING, # type: ignore 253 exp.DataType.Type.TINYINT: "INT64", 254 exp.DataType.Type.SMALLINT: "INT64", 255 exp.DataType.Type.INT: "INT64", 256 exp.DataType.Type.BIGINT: "INT64", 257 exp.DataType.Type.DECIMAL: "NUMERIC", 258 exp.DataType.Type.FLOAT: "FLOAT64", 259 exp.DataType.Type.DOUBLE: "FLOAT64", 260 exp.DataType.Type.BOOLEAN: "BOOL", 261 exp.DataType.Type.TEXT: "STRING", 262 exp.DataType.Type.VARCHAR: "STRING", 263 exp.DataType.Type.NVARCHAR: "STRING", 264 } 265 PROPERTIES_LOCATION = { 266 **generator.Generator.PROPERTIES_LOCATION, # type: ignore 267 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 268 } 269 270 EXPLICIT_UNION = True 271 272 def array_sql(self, expression: exp.Array) -> str: 273 first_arg = seq_get(expression.expressions, 0) 274 if isinstance(first_arg, exp.Subqueryable): 275 return f"ARRAY{self.wrap(self.sql(first_arg))}" 276 277 return inline_array_sql(self, expression) 278 279 def transaction_sql(self, *_) -> str: 280 return "BEGIN TRANSACTION" 281 282 def commit_sql(self, *_) -> str: 283 return "COMMIT TRANSACTION" 284 285 def rollback_sql(self, *_) -> str: 286 return "ROLLBACK TRANSACTION" 287 288 def in_unnest_op(self, expression: exp.Unnest) -> str: 289 return self.sql(expression) 290 291 def except_op(self, expression: exp.Except) -> str: 292 if not expression.args.get("distinct", False): 293 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 294 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 295 296 def intersect_op(self, expression: exp.Intersect) -> str: 297 if not expression.args.get("distinct", False): 298 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 299 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}"
Generator interprets the given syntax tree and produces a SQL string as an output.
Arguments:
- time_mapping (dict): the dictionary of custom time mappings in which the key represents a python time format and the output the target time format
- time_trie (trie): a trie of the time_mapping keys
- pretty (bool): if set to True the returned string will be formatted. Default: False.
- quote_start (str): specifies which starting character to use to delimit quotes. Default: '.
- quote_end (str): specifies which ending character to use to delimit quotes. Default: '.
- identifier_start (str): specifies which starting character to use to delimit identifiers. Default: ".
- identifier_end (str): specifies which ending character to use to delimit identifiers. Default: ".
- identify (bool): if set to True all identifiers will be delimited by the corresponding character.
- normalize (bool): if set to True all identifiers will lower cased
- string_escape (str): specifies a string escape character. Default: '.
- identifier_escape (str): specifies an identifier escape character. Default: ".
- pad (int): determines padding in a formatted string. Default: 2.
- indent (int): determines the size of indentation in a formatted string. Default: 4.
- unnest_column_only (bool): if true unnest table aliases are considered only as column aliases
- normalize_functions (str): normalize function names, "upper", "lower", or None Default: "upper"
- alias_post_tablesample (bool): if the table alias comes after tablesample Default: False
- unsupported_level (ErrorLevel): determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- null_ordering (str): Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
- max_unsupported (int): Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma (bool): if the the comma is leading or trailing in select statements Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
Inherited Members
- sqlglot.generator.Generator
- Generator
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columndef_sql
- columnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- afterjournalproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- pseudotype_sql
- rowformatdelimitedproperty_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- lock_sql
- literal_sql
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- select_sql
- schema_sql
- star_sql
- structkwarg_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- window_spec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- extract_sql
- trim_sql
- concat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- unique_sql
- if_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- div_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- is_sql
- like_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql