sqlglot.dialects.snowflake
1from __future__ import annotations 2 3from sqlglot import exp, generator, parser, tokens 4from sqlglot.dialects.dialect import ( 5 Dialect, 6 datestrtodate_sql, 7 format_time_lambda, 8 inline_array_sql, 9 rename_func, 10 timestrtotime_sql, 11 ts_or_ds_to_date_sql, 12 var_map_sql, 13) 14from sqlglot.expressions import Literal 15from sqlglot.helper import flatten, seq_get 16from sqlglot.tokens import TokenType 17 18 19def _check_int(s): 20 if s[0] in ("-", "+"): 21 return s[1:].isdigit() 22 return s.isdigit() 23 24 25# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 26def _snowflake_to_timestamp(args): 27 if len(args) == 2: 28 first_arg, second_arg = args 29 if second_arg.is_string: 30 # case: <string_expr> [ , <format> ] 31 return format_time_lambda(exp.StrToTime, "snowflake")(args) 32 33 # case: <numeric_expr> [ , <scale> ] 34 if second_arg.name not in ["0", "3", "9"]: 35 raise ValueError( 36 f"Scale for snowflake numeric timestamp is {second_arg}, but should be 0, 3, or 9" 37 ) 38 39 if second_arg.name == "0": 40 timescale = exp.UnixToTime.SECONDS 41 elif second_arg.name == "3": 42 timescale = exp.UnixToTime.MILLIS 43 elif second_arg.name == "9": 44 timescale = exp.UnixToTime.MICROS 45 46 return exp.UnixToTime(this=first_arg, scale=timescale) 47 48 first_arg = seq_get(args, 0) 49 if not isinstance(first_arg, Literal): 50 # case: <variant_expr> 51 return format_time_lambda(exp.StrToTime, "snowflake", default=True)(args) 52 53 if first_arg.is_string: 54 if _check_int(first_arg.this): 55 # case: <integer> 56 return exp.UnixToTime.from_arg_list(args) 57 58 # case: <date_expr> 59 return format_time_lambda(exp.StrToTime, "snowflake", default=True)(args) 60 61 # case: <numeric_expr> 62 return exp.UnixToTime.from_arg_list(args) 63 64 65def _unix_to_time_sql(self, expression): 66 scale = expression.args.get("scale") 67 timestamp = self.sql(expression, "this") 68 if scale in [None, exp.UnixToTime.SECONDS]: 69 return f"TO_TIMESTAMP({timestamp})" 70 if scale == exp.UnixToTime.MILLIS: 71 return f"TO_TIMESTAMP({timestamp}, 3)" 72 if scale == exp.UnixToTime.MICROS: 73 return f"TO_TIMESTAMP({timestamp}, 9)" 74 75 raise ValueError("Improper scale for timestamp") 76 77 78# https://docs.snowflake.com/en/sql-reference/functions/date_part.html 79# https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 80def _parse_date_part(self): 81 this = self._parse_var() or self._parse_type() 82 self._match(TokenType.COMMA) 83 expression = self._parse_bitwise() 84 85 name = this.name.upper() 86 if name.startswith("EPOCH"): 87 if name.startswith("EPOCH_MILLISECOND"): 88 scale = 10**3 89 elif name.startswith("EPOCH_MICROSECOND"): 90 scale = 10**6 91 elif name.startswith("EPOCH_NANOSECOND"): 92 scale = 10**9 93 else: 94 scale = None 95 96 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 97 to_unix = self.expression(exp.TimeToUnix, this=ts) 98 99 if scale: 100 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 101 102 return to_unix 103 104 return self.expression(exp.Extract, this=this, expression=expression) 105 106 107def _datatype_sql(self, expression): 108 if expression.this == exp.DataType.Type.ARRAY: 109 return "ARRAY" 110 elif expression.this == exp.DataType.Type.MAP: 111 return "OBJECT" 112 return self.datatype_sql(expression) 113 114 115class Snowflake(Dialect): 116 null_ordering = "nulls_are_large" 117 time_format = "'yyyy-mm-dd hh24:mi:ss'" 118 119 time_mapping = { 120 "YYYY": "%Y", 121 "yyyy": "%Y", 122 "YY": "%y", 123 "yy": "%y", 124 "MMMM": "%B", 125 "mmmm": "%B", 126 "MON": "%b", 127 "mon": "%b", 128 "MM": "%m", 129 "mm": "%m", 130 "DD": "%d", 131 "dd": "%d", 132 "d": "%-d", 133 "DY": "%w", 134 "dy": "%w", 135 "HH24": "%H", 136 "hh24": "%H", 137 "HH12": "%I", 138 "hh12": "%I", 139 "MI": "%M", 140 "mi": "%M", 141 "SS": "%S", 142 "ss": "%S", 143 "FF": "%f", 144 "ff": "%f", 145 "FF6": "%f", 146 "ff6": "%f", 147 } 148 149 class Parser(parser.Parser): 150 FUNCTIONS = { 151 **parser.Parser.FUNCTIONS, 152 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 153 "DATE_TRUNC": lambda args: exp.DateTrunc( 154 unit=exp.Literal.string(seq_get(args, 0).name), # type: ignore 155 this=seq_get(args, 1), 156 ), 157 "IFF": exp.If.from_arg_list, 158 "TO_TIMESTAMP": _snowflake_to_timestamp, 159 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 160 "RLIKE": exp.RegexpLike.from_arg_list, 161 "DECODE": exp.Matches.from_arg_list, 162 "OBJECT_CONSTRUCT": parser.parse_var_map, 163 } 164 165 FUNCTION_PARSERS = { 166 **parser.Parser.FUNCTION_PARSERS, 167 "DATE_PART": _parse_date_part, 168 } 169 FUNCTION_PARSERS.pop("TRIM") 170 171 FUNC_TOKENS = { 172 *parser.Parser.FUNC_TOKENS, 173 TokenType.RLIKE, 174 TokenType.TABLE, 175 } 176 177 COLUMN_OPERATORS = { 178 **parser.Parser.COLUMN_OPERATORS, # type: ignore 179 TokenType.COLON: lambda self, this, path: self.expression( 180 exp.Bracket, 181 this=this, 182 expressions=[path], 183 ), 184 } 185 186 RANGE_PARSERS = { 187 **parser.Parser.RANGE_PARSERS, # type: ignore 188 TokenType.LIKE_ANY: lambda self, this: self._parse_escape( 189 self.expression(exp.LikeAny, this=this, expression=self._parse_bitwise()) 190 ), 191 TokenType.ILIKE_ANY: lambda self, this: self._parse_escape( 192 self.expression(exp.ILikeAny, this=this, expression=self._parse_bitwise()) 193 ), 194 } 195 196 class Tokenizer(tokens.Tokenizer): 197 QUOTES = ["'", "$$"] 198 STRING_ESCAPES = ["\\", "'"] 199 200 KEYWORDS = { 201 **tokens.Tokenizer.KEYWORDS, 202 "EXCLUDE": TokenType.EXCEPT, 203 "ILIKE ANY": TokenType.ILIKE_ANY, 204 "LIKE ANY": TokenType.LIKE_ANY, 205 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 206 "PUT": TokenType.COMMAND, 207 "RENAME": TokenType.REPLACE, 208 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 209 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 210 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 211 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 212 "MINUS": TokenType.EXCEPT, 213 "SAMPLE": TokenType.TABLE_SAMPLE, 214 } 215 216 SINGLE_TOKENS = { 217 **tokens.Tokenizer.SINGLE_TOKENS, 218 "$": TokenType.PARAMETER, 219 } 220 221 class Generator(generator.Generator): 222 PARAMETER_TOKEN = "$" 223 224 TRANSFORMS = { 225 **generator.Generator.TRANSFORMS, # type: ignore 226 exp.Array: inline_array_sql, 227 exp.ArrayConcat: rename_func("ARRAY_CAT"), 228 exp.DateAdd: rename_func("DATEADD"), 229 exp.DateStrToDate: datestrtodate_sql, 230 exp.DataType: _datatype_sql, 231 exp.If: rename_func("IFF"), 232 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 233 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 234 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 235 exp.Matches: rename_func("DECODE"), 236 exp.StrPosition: lambda self, e: self.func( 237 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 238 ), 239 exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 240 exp.TimeStrToTime: timestrtotime_sql, 241 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 242 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 243 exp.TsOrDsToDate: ts_or_ds_to_date_sql("snowflake"), 244 exp.UnixToTime: _unix_to_time_sql, 245 exp.DayOfWeek: rename_func("DAYOFWEEK"), 246 } 247 248 TYPE_MAPPING = { 249 **generator.Generator.TYPE_MAPPING, # type: ignore 250 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 251 } 252 253 STAR_MAPPING = { 254 "except": "EXCLUDE", 255 "replace": "RENAME", 256 } 257 258 PROPERTIES_LOCATION = { 259 **generator.Generator.PROPERTIES_LOCATION, # type: ignore 260 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 261 } 262 263 def ilikeany_sql(self, expression: exp.ILikeAny) -> str: 264 return self.binary(expression, "ILIKE ANY") 265 266 def likeany_sql(self, expression: exp.LikeAny) -> str: 267 return self.binary(expression, "LIKE ANY") 268 269 def except_op(self, expression): 270 if not expression.args.get("distinct", False): 271 self.unsupported("EXCEPT with All is not supported in Snowflake") 272 return super().except_op(expression) 273 274 def intersect_op(self, expression): 275 if not expression.args.get("distinct", False): 276 self.unsupported("INTERSECT with All is not supported in Snowflake") 277 return super().intersect_op(expression) 278 279 def values_sql(self, expression: exp.Values) -> str: 280 """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted. 281 282 We also want to make sure that after we find matches where we need to unquote a column that we prevent users 283 from adding quotes to the column by using the `identify` argument when generating the SQL. 284 """ 285 alias = expression.args.get("alias") 286 if alias and alias.args.get("columns"): 287 expression = expression.transform( 288 lambda node: exp.Identifier(**{**node.args, "quoted": False}) 289 if isinstance(node, exp.Identifier) 290 and isinstance(node.parent, exp.TableAlias) 291 and node.arg_key == "columns" 292 else node, 293 ) 294 return self.no_identify(lambda: super(self.__class__, self).values_sql(expression)) 295 return super().values_sql(expression) 296 297 def select_sql(self, expression: exp.Select) -> str: 298 """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted and also 299 that all columns in a SELECT are unquoted. We also want to make sure that after we find matches where we need 300 to unquote a column that we prevent users from adding quotes to the column by using the `identify` argument when 301 generating the SQL. 302 303 Note: We make an assumption that any columns referenced in a VALUES expression should be unquoted throughout the 304 expression. This might not be true in a case where the same column name can be sourced from another table that can 305 properly quote but should be true in most cases. 306 """ 307 values_expressions = expression.find_all(exp.Values) 308 values_identifiers = set( 309 flatten( 310 v.args.get("alias", exp.Alias()).args.get("columns", []) 311 for v in values_expressions 312 ) 313 ) 314 if values_identifiers: 315 expression = expression.transform( 316 lambda node: exp.Identifier(**{**node.args, "quoted": False}) 317 if isinstance(node, exp.Identifier) and node in values_identifiers 318 else node, 319 ) 320 return self.no_identify(lambda: super(self.__class__, self).select_sql(expression)) 321 return super().select_sql(expression) 322 323 def describe_sql(self, expression: exp.Describe) -> str: 324 # Default to table if kind is unknown 325 kind_value = expression.args.get("kind") or "TABLE" 326 kind = f" {kind_value}" if kind_value else "" 327 this = f" {self.sql(expression, 'this')}" 328 return f"DESCRIBE{kind}{this}" 329 330 def generatedasidentitycolumnconstraint_sql( 331 self, expression: exp.GeneratedAsIdentityColumnConstraint 332 ) -> str: 333 start = expression.args.get("start") 334 start = f" START {start}" if start else "" 335 increment = expression.args.get("increment") 336 increment = f" INCREMENT {increment}" if increment else "" 337 return f"AUTOINCREMENT{start}{increment}"
116class Snowflake(Dialect): 117 null_ordering = "nulls_are_large" 118 time_format = "'yyyy-mm-dd hh24:mi:ss'" 119 120 time_mapping = { 121 "YYYY": "%Y", 122 "yyyy": "%Y", 123 "YY": "%y", 124 "yy": "%y", 125 "MMMM": "%B", 126 "mmmm": "%B", 127 "MON": "%b", 128 "mon": "%b", 129 "MM": "%m", 130 "mm": "%m", 131 "DD": "%d", 132 "dd": "%d", 133 "d": "%-d", 134 "DY": "%w", 135 "dy": "%w", 136 "HH24": "%H", 137 "hh24": "%H", 138 "HH12": "%I", 139 "hh12": "%I", 140 "MI": "%M", 141 "mi": "%M", 142 "SS": "%S", 143 "ss": "%S", 144 "FF": "%f", 145 "ff": "%f", 146 "FF6": "%f", 147 "ff6": "%f", 148 } 149 150 class Parser(parser.Parser): 151 FUNCTIONS = { 152 **parser.Parser.FUNCTIONS, 153 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 154 "DATE_TRUNC": lambda args: exp.DateTrunc( 155 unit=exp.Literal.string(seq_get(args, 0).name), # type: ignore 156 this=seq_get(args, 1), 157 ), 158 "IFF": exp.If.from_arg_list, 159 "TO_TIMESTAMP": _snowflake_to_timestamp, 160 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 161 "RLIKE": exp.RegexpLike.from_arg_list, 162 "DECODE": exp.Matches.from_arg_list, 163 "OBJECT_CONSTRUCT": parser.parse_var_map, 164 } 165 166 FUNCTION_PARSERS = { 167 **parser.Parser.FUNCTION_PARSERS, 168 "DATE_PART": _parse_date_part, 169 } 170 FUNCTION_PARSERS.pop("TRIM") 171 172 FUNC_TOKENS = { 173 *parser.Parser.FUNC_TOKENS, 174 TokenType.RLIKE, 175 TokenType.TABLE, 176 } 177 178 COLUMN_OPERATORS = { 179 **parser.Parser.COLUMN_OPERATORS, # type: ignore 180 TokenType.COLON: lambda self, this, path: self.expression( 181 exp.Bracket, 182 this=this, 183 expressions=[path], 184 ), 185 } 186 187 RANGE_PARSERS = { 188 **parser.Parser.RANGE_PARSERS, # type: ignore 189 TokenType.LIKE_ANY: lambda self, this: self._parse_escape( 190 self.expression(exp.LikeAny, this=this, expression=self._parse_bitwise()) 191 ), 192 TokenType.ILIKE_ANY: lambda self, this: self._parse_escape( 193 self.expression(exp.ILikeAny, this=this, expression=self._parse_bitwise()) 194 ), 195 } 196 197 class Tokenizer(tokens.Tokenizer): 198 QUOTES = ["'", "$$"] 199 STRING_ESCAPES = ["\\", "'"] 200 201 KEYWORDS = { 202 **tokens.Tokenizer.KEYWORDS, 203 "EXCLUDE": TokenType.EXCEPT, 204 "ILIKE ANY": TokenType.ILIKE_ANY, 205 "LIKE ANY": TokenType.LIKE_ANY, 206 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 207 "PUT": TokenType.COMMAND, 208 "RENAME": TokenType.REPLACE, 209 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 210 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 211 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 212 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 213 "MINUS": TokenType.EXCEPT, 214 "SAMPLE": TokenType.TABLE_SAMPLE, 215 } 216 217 SINGLE_TOKENS = { 218 **tokens.Tokenizer.SINGLE_TOKENS, 219 "$": TokenType.PARAMETER, 220 } 221 222 class Generator(generator.Generator): 223 PARAMETER_TOKEN = "$" 224 225 TRANSFORMS = { 226 **generator.Generator.TRANSFORMS, # type: ignore 227 exp.Array: inline_array_sql, 228 exp.ArrayConcat: rename_func("ARRAY_CAT"), 229 exp.DateAdd: rename_func("DATEADD"), 230 exp.DateStrToDate: datestrtodate_sql, 231 exp.DataType: _datatype_sql, 232 exp.If: rename_func("IFF"), 233 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 234 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 235 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 236 exp.Matches: rename_func("DECODE"), 237 exp.StrPosition: lambda self, e: self.func( 238 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 239 ), 240 exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 241 exp.TimeStrToTime: timestrtotime_sql, 242 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 243 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 244 exp.TsOrDsToDate: ts_or_ds_to_date_sql("snowflake"), 245 exp.UnixToTime: _unix_to_time_sql, 246 exp.DayOfWeek: rename_func("DAYOFWEEK"), 247 } 248 249 TYPE_MAPPING = { 250 **generator.Generator.TYPE_MAPPING, # type: ignore 251 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 252 } 253 254 STAR_MAPPING = { 255 "except": "EXCLUDE", 256 "replace": "RENAME", 257 } 258 259 PROPERTIES_LOCATION = { 260 **generator.Generator.PROPERTIES_LOCATION, # type: ignore 261 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 262 } 263 264 def ilikeany_sql(self, expression: exp.ILikeAny) -> str: 265 return self.binary(expression, "ILIKE ANY") 266 267 def likeany_sql(self, expression: exp.LikeAny) -> str: 268 return self.binary(expression, "LIKE ANY") 269 270 def except_op(self, expression): 271 if not expression.args.get("distinct", False): 272 self.unsupported("EXCEPT with All is not supported in Snowflake") 273 return super().except_op(expression) 274 275 def intersect_op(self, expression): 276 if not expression.args.get("distinct", False): 277 self.unsupported("INTERSECT with All is not supported in Snowflake") 278 return super().intersect_op(expression) 279 280 def values_sql(self, expression: exp.Values) -> str: 281 """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted. 282 283 We also want to make sure that after we find matches where we need to unquote a column that we prevent users 284 from adding quotes to the column by using the `identify` argument when generating the SQL. 285 """ 286 alias = expression.args.get("alias") 287 if alias and alias.args.get("columns"): 288 expression = expression.transform( 289 lambda node: exp.Identifier(**{**node.args, "quoted": False}) 290 if isinstance(node, exp.Identifier) 291 and isinstance(node.parent, exp.TableAlias) 292 and node.arg_key == "columns" 293 else node, 294 ) 295 return self.no_identify(lambda: super(self.__class__, self).values_sql(expression)) 296 return super().values_sql(expression) 297 298 def select_sql(self, expression: exp.Select) -> str: 299 """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted and also 300 that all columns in a SELECT are unquoted. We also want to make sure that after we find matches where we need 301 to unquote a column that we prevent users from adding quotes to the column by using the `identify` argument when 302 generating the SQL. 303 304 Note: We make an assumption that any columns referenced in a VALUES expression should be unquoted throughout the 305 expression. This might not be true in a case where the same column name can be sourced from another table that can 306 properly quote but should be true in most cases. 307 """ 308 values_expressions = expression.find_all(exp.Values) 309 values_identifiers = set( 310 flatten( 311 v.args.get("alias", exp.Alias()).args.get("columns", []) 312 for v in values_expressions 313 ) 314 ) 315 if values_identifiers: 316 expression = expression.transform( 317 lambda node: exp.Identifier(**{**node.args, "quoted": False}) 318 if isinstance(node, exp.Identifier) and node in values_identifiers 319 else node, 320 ) 321 return self.no_identify(lambda: super(self.__class__, self).select_sql(expression)) 322 return super().select_sql(expression) 323 324 def describe_sql(self, expression: exp.Describe) -> str: 325 # Default to table if kind is unknown 326 kind_value = expression.args.get("kind") or "TABLE" 327 kind = f" {kind_value}" if kind_value else "" 328 this = f" {self.sql(expression, 'this')}" 329 return f"DESCRIBE{kind}{this}" 330 331 def generatedasidentitycolumnconstraint_sql( 332 self, expression: exp.GeneratedAsIdentityColumnConstraint 333 ) -> str: 334 start = expression.args.get("start") 335 start = f" START {start}" if start else "" 336 increment = expression.args.get("increment") 337 increment = f" INCREMENT {increment}" if increment else "" 338 return f"AUTOINCREMENT{start}{increment}"
Inherited Members
150 class Parser(parser.Parser): 151 FUNCTIONS = { 152 **parser.Parser.FUNCTIONS, 153 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 154 "DATE_TRUNC": lambda args: exp.DateTrunc( 155 unit=exp.Literal.string(seq_get(args, 0).name), # type: ignore 156 this=seq_get(args, 1), 157 ), 158 "IFF": exp.If.from_arg_list, 159 "TO_TIMESTAMP": _snowflake_to_timestamp, 160 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 161 "RLIKE": exp.RegexpLike.from_arg_list, 162 "DECODE": exp.Matches.from_arg_list, 163 "OBJECT_CONSTRUCT": parser.parse_var_map, 164 } 165 166 FUNCTION_PARSERS = { 167 **parser.Parser.FUNCTION_PARSERS, 168 "DATE_PART": _parse_date_part, 169 } 170 FUNCTION_PARSERS.pop("TRIM") 171 172 FUNC_TOKENS = { 173 *parser.Parser.FUNC_TOKENS, 174 TokenType.RLIKE, 175 TokenType.TABLE, 176 } 177 178 COLUMN_OPERATORS = { 179 **parser.Parser.COLUMN_OPERATORS, # type: ignore 180 TokenType.COLON: lambda self, this, path: self.expression( 181 exp.Bracket, 182 this=this, 183 expressions=[path], 184 ), 185 } 186 187 RANGE_PARSERS = { 188 **parser.Parser.RANGE_PARSERS, # type: ignore 189 TokenType.LIKE_ANY: lambda self, this: self._parse_escape( 190 self.expression(exp.LikeAny, this=this, expression=self._parse_bitwise()) 191 ), 192 TokenType.ILIKE_ANY: lambda self, this: self._parse_escape( 193 self.expression(exp.ILikeAny, this=this, expression=self._parse_bitwise()) 194 ), 195 }
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
Inherited Members
197 class Tokenizer(tokens.Tokenizer): 198 QUOTES = ["'", "$$"] 199 STRING_ESCAPES = ["\\", "'"] 200 201 KEYWORDS = { 202 **tokens.Tokenizer.KEYWORDS, 203 "EXCLUDE": TokenType.EXCEPT, 204 "ILIKE ANY": TokenType.ILIKE_ANY, 205 "LIKE ANY": TokenType.LIKE_ANY, 206 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 207 "PUT": TokenType.COMMAND, 208 "RENAME": TokenType.REPLACE, 209 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 210 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 211 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 212 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 213 "MINUS": TokenType.EXCEPT, 214 "SAMPLE": TokenType.TABLE_SAMPLE, 215 } 216 217 SINGLE_TOKENS = { 218 **tokens.Tokenizer.SINGLE_TOKENS, 219 "$": TokenType.PARAMETER, 220 }
Inherited Members
222 class Generator(generator.Generator): 223 PARAMETER_TOKEN = "$" 224 225 TRANSFORMS = { 226 **generator.Generator.TRANSFORMS, # type: ignore 227 exp.Array: inline_array_sql, 228 exp.ArrayConcat: rename_func("ARRAY_CAT"), 229 exp.DateAdd: rename_func("DATEADD"), 230 exp.DateStrToDate: datestrtodate_sql, 231 exp.DataType: _datatype_sql, 232 exp.If: rename_func("IFF"), 233 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 234 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 235 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 236 exp.Matches: rename_func("DECODE"), 237 exp.StrPosition: lambda self, e: self.func( 238 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 239 ), 240 exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 241 exp.TimeStrToTime: timestrtotime_sql, 242 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 243 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 244 exp.TsOrDsToDate: ts_or_ds_to_date_sql("snowflake"), 245 exp.UnixToTime: _unix_to_time_sql, 246 exp.DayOfWeek: rename_func("DAYOFWEEK"), 247 } 248 249 TYPE_MAPPING = { 250 **generator.Generator.TYPE_MAPPING, # type: ignore 251 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 252 } 253 254 STAR_MAPPING = { 255 "except": "EXCLUDE", 256 "replace": "RENAME", 257 } 258 259 PROPERTIES_LOCATION = { 260 **generator.Generator.PROPERTIES_LOCATION, # type: ignore 261 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 262 } 263 264 def ilikeany_sql(self, expression: exp.ILikeAny) -> str: 265 return self.binary(expression, "ILIKE ANY") 266 267 def likeany_sql(self, expression: exp.LikeAny) -> str: 268 return self.binary(expression, "LIKE ANY") 269 270 def except_op(self, expression): 271 if not expression.args.get("distinct", False): 272 self.unsupported("EXCEPT with All is not supported in Snowflake") 273 return super().except_op(expression) 274 275 def intersect_op(self, expression): 276 if not expression.args.get("distinct", False): 277 self.unsupported("INTERSECT with All is not supported in Snowflake") 278 return super().intersect_op(expression) 279 280 def values_sql(self, expression: exp.Values) -> str: 281 """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted. 282 283 We also want to make sure that after we find matches where we need to unquote a column that we prevent users 284 from adding quotes to the column by using the `identify` argument when generating the SQL. 285 """ 286 alias = expression.args.get("alias") 287 if alias and alias.args.get("columns"): 288 expression = expression.transform( 289 lambda node: exp.Identifier(**{**node.args, "quoted": False}) 290 if isinstance(node, exp.Identifier) 291 and isinstance(node.parent, exp.TableAlias) 292 and node.arg_key == "columns" 293 else node, 294 ) 295 return self.no_identify(lambda: super(self.__class__, self).values_sql(expression)) 296 return super().values_sql(expression) 297 298 def select_sql(self, expression: exp.Select) -> str: 299 """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted and also 300 that all columns in a SELECT are unquoted. We also want to make sure that after we find matches where we need 301 to unquote a column that we prevent users from adding quotes to the column by using the `identify` argument when 302 generating the SQL. 303 304 Note: We make an assumption that any columns referenced in a VALUES expression should be unquoted throughout the 305 expression. This might not be true in a case where the same column name can be sourced from another table that can 306 properly quote but should be true in most cases. 307 """ 308 values_expressions = expression.find_all(exp.Values) 309 values_identifiers = set( 310 flatten( 311 v.args.get("alias", exp.Alias()).args.get("columns", []) 312 for v in values_expressions 313 ) 314 ) 315 if values_identifiers: 316 expression = expression.transform( 317 lambda node: exp.Identifier(**{**node.args, "quoted": False}) 318 if isinstance(node, exp.Identifier) and node in values_identifiers 319 else node, 320 ) 321 return self.no_identify(lambda: super(self.__class__, self).select_sql(expression)) 322 return super().select_sql(expression) 323 324 def describe_sql(self, expression: exp.Describe) -> str: 325 # Default to table if kind is unknown 326 kind_value = expression.args.get("kind") or "TABLE" 327 kind = f" {kind_value}" if kind_value else "" 328 this = f" {self.sql(expression, 'this')}" 329 return f"DESCRIBE{kind}{this}" 330 331 def generatedasidentitycolumnconstraint_sql( 332 self, expression: exp.GeneratedAsIdentityColumnConstraint 333 ) -> str: 334 start = expression.args.get("start") 335 start = f" START {start}" if start else "" 336 increment = expression.args.get("increment") 337 increment = f" INCREMENT {increment}" if increment else "" 338 return f"AUTOINCREMENT{start}{increment}"
Generator interprets the given syntax tree and produces a SQL string as an output.
Arguments:
- time_mapping (dict): the dictionary of custom time mappings in which the key represents a python time format and the output the target time format
- time_trie (trie): a trie of the time_mapping keys
- pretty (bool): if set to True the returned string will be formatted. Default: False.
- quote_start (str): specifies which starting character to use to delimit quotes. Default: '.
- quote_end (str): specifies which ending character to use to delimit quotes. Default: '.
- identifier_start (str): specifies which starting character to use to delimit identifiers. Default: ".
- identifier_end (str): specifies which ending character to use to delimit identifiers. Default: ".
- identify (bool): if set to True all identifiers will be delimited by the corresponding character.
- normalize (bool): if set to True all identifiers will lower cased
- string_escape (str): specifies a string escape character. Default: '.
- identifier_escape (str): specifies an identifier escape character. Default: ".
- pad (int): determines padding in a formatted string. Default: 2.
- indent (int): determines the size of indentation in a formatted string. Default: 4.
- unnest_column_only (bool): if true unnest table aliases are considered only as column aliases
- normalize_functions (str): normalize function names, "upper", "lower", or None Default: "upper"
- alias_post_tablesample (bool): if the table alias comes after tablesample Default: False
- unsupported_level (ErrorLevel): determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- null_ordering (str): Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
- max_unsupported (int): Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma (bool): if the the comma is leading or trailing in select statements Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
280 def values_sql(self, expression: exp.Values) -> str: 281 """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted. 282 283 We also want to make sure that after we find matches where we need to unquote a column that we prevent users 284 from adding quotes to the column by using the `identify` argument when generating the SQL. 285 """ 286 alias = expression.args.get("alias") 287 if alias and alias.args.get("columns"): 288 expression = expression.transform( 289 lambda node: exp.Identifier(**{**node.args, "quoted": False}) 290 if isinstance(node, exp.Identifier) 291 and isinstance(node.parent, exp.TableAlias) 292 and node.arg_key == "columns" 293 else node, 294 ) 295 return self.no_identify(lambda: super(self.__class__, self).values_sql(expression)) 296 return super().values_sql(expression)
Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted.
We also want to make sure that after we find matches where we need to unquote a column that we prevent users
from adding quotes to the column by using the identify
argument when generating the SQL.
298 def select_sql(self, expression: exp.Select) -> str: 299 """Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted and also 300 that all columns in a SELECT are unquoted. We also want to make sure that after we find matches where we need 301 to unquote a column that we prevent users from adding quotes to the column by using the `identify` argument when 302 generating the SQL. 303 304 Note: We make an assumption that any columns referenced in a VALUES expression should be unquoted throughout the 305 expression. This might not be true in a case where the same column name can be sourced from another table that can 306 properly quote but should be true in most cases. 307 """ 308 values_expressions = expression.find_all(exp.Values) 309 values_identifiers = set( 310 flatten( 311 v.args.get("alias", exp.Alias()).args.get("columns", []) 312 for v in values_expressions 313 ) 314 ) 315 if values_identifiers: 316 expression = expression.transform( 317 lambda node: exp.Identifier(**{**node.args, "quoted": False}) 318 if isinstance(node, exp.Identifier) and node in values_identifiers 319 else node, 320 ) 321 return self.no_identify(lambda: super(self.__class__, self).select_sql(expression)) 322 return super().select_sql(expression)
Due to a bug in Snowflake we want to make sure that all columns in a VALUES table alias are unquoted and also
that all columns in a SELECT are unquoted. We also want to make sure that after we find matches where we need
to unquote a column that we prevent users from adding quotes to the column by using the identify
argument when
generating the SQL.
Note: We make an assumption that any columns referenced in a VALUES expression should be unquoted throughout the expression. This might not be true in a case where the same column name can be sourced from another table that can properly quote but should be true in most cases.
331 def generatedasidentitycolumnconstraint_sql( 332 self, expression: exp.GeneratedAsIdentityColumnConstraint 333 ) -> str: 334 start = expression.args.get("start") 335 start = f" START {start}" if start else "" 336 increment = expression.args.get("increment") 337 increment = f" INCREMENT {increment}" if increment else "" 338 return f"AUTOINCREMENT{start}{increment}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columndef_sql
- columnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- afterjournalproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- pseudotype_sql
- rowformatdelimitedproperty_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- lock_sql
- literal_sql
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- schema_sql
- star_sql
- structkwarg_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- window_spec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- extract_sql
- trim_sql
- concat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- unique_sql
- if_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- div_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- is_sql
- like_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql