sqlglot.dialects.hive
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 approx_count_distinct_sql, 9 create_with_partitions_sql, 10 format_time_lambda, 11 if_sql, 12 left_to_substring_sql, 13 locate_to_strposition, 14 max_or_greatest, 15 min_or_least, 16 no_ilike_sql, 17 no_recursive_cte_sql, 18 no_safe_divide_sql, 19 no_trycast_sql, 20 regexp_extract_sql, 21 rename_func, 22 right_to_substring_sql, 23 strposition_to_locate_sql, 24 struct_extract_sql, 25 timestrtotime_sql, 26 var_map_sql, 27) 28from sqlglot.helper import seq_get 29from sqlglot.parser import parse_var_map 30from sqlglot.tokens import TokenType 31 32# (FuncType, Multiplier) 33DATE_DELTA_INTERVAL = { 34 "YEAR": ("ADD_MONTHS", 12), 35 "MONTH": ("ADD_MONTHS", 1), 36 "QUARTER": ("ADD_MONTHS", 3), 37 "WEEK": ("DATE_ADD", 7), 38 "DAY": ("DATE_ADD", 1), 39} 40 41TIME_DIFF_FACTOR = { 42 "MILLISECOND": " * 1000", 43 "SECOND": "", 44 "MINUTE": " / 60", 45 "HOUR": " / 3600", 46} 47 48DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH") 49 50 51def _add_date_sql(self: generator.Generator, expression: exp.DateAdd | exp.DateSub) -> str: 52 unit = expression.text("unit").upper() 53 func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1)) 54 55 if isinstance(expression, exp.DateSub): 56 multiplier *= -1 57 58 if expression.expression.is_number: 59 modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier) 60 else: 61 modified_increment = expression.expression 62 if multiplier != 1: 63 modified_increment = exp.Mul( # type: ignore 64 this=modified_increment, expression=exp.Literal.number(multiplier) 65 ) 66 67 return self.func(func, expression.this, modified_increment) 68 69 70def _date_diff_sql(self: generator.Generator, expression: exp.DateDiff) -> str: 71 unit = expression.text("unit").upper() 72 73 factor = TIME_DIFF_FACTOR.get(unit) 74 if factor is not None: 75 left = self.sql(expression, "this") 76 right = self.sql(expression, "expression") 77 sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})" 78 return f"({sec_diff}){factor}" if factor else sec_diff 79 80 sql_func = "MONTHS_BETWEEN" if unit in DIFF_MONTH_SWITCH else "DATEDIFF" 81 _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1)) 82 multiplier_sql = f" / {multiplier}" if multiplier > 1 else "" 83 diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})" 84 85 return f"{diff_sql}{multiplier_sql}" 86 87 88def _json_format_sql(self: generator.Generator, expression: exp.JSONFormat) -> str: 89 this = expression.this 90 if isinstance(this, exp.Cast) and this.is_type("json") and this.this.is_string: 91 # Since FROM_JSON requires a nested type, we always wrap the json string with 92 # an array to ensure that "naked" strings like "'a'" will be handled correctly 93 wrapped_json = exp.Literal.string(f"[{this.this.name}]") 94 95 from_json = self.func("FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json)) 96 to_json = self.func("TO_JSON", from_json) 97 98 # This strips the [, ] delimiters of the dummy array printed by TO_JSON 99 return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1") 100 101 return self.func("TO_JSON", this, expression.args.get("options")) 102 103 104def _array_sort_sql(self: generator.Generator, expression: exp.ArraySort) -> str: 105 if expression.expression: 106 self.unsupported("Hive SORT_ARRAY does not support a comparator") 107 return f"SORT_ARRAY({self.sql(expression, 'this')})" 108 109 110def _property_sql(self: generator.Generator, expression: exp.Property) -> str: 111 return f"'{expression.name}'={self.sql(expression, 'value')}" 112 113 114def _str_to_unix_sql(self: generator.Generator, expression: exp.StrToUnix) -> str: 115 return self.func("UNIX_TIMESTAMP", expression.this, _time_format(self, expression)) 116 117 118def _str_to_date_sql(self: generator.Generator, expression: exp.StrToDate) -> str: 119 this = self.sql(expression, "this") 120 time_format = self.format_time(expression) 121 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 122 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 123 return f"CAST({this} AS DATE)" 124 125 126def _str_to_time_sql(self: generator.Generator, expression: exp.StrToTime) -> str: 127 this = self.sql(expression, "this") 128 time_format = self.format_time(expression) 129 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 130 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 131 return f"CAST({this} AS TIMESTAMP)" 132 133 134def _time_format( 135 self: generator.Generator, expression: exp.UnixToStr | exp.StrToUnix 136) -> t.Optional[str]: 137 time_format = self.format_time(expression) 138 if time_format == Hive.TIME_FORMAT: 139 return None 140 return time_format 141 142 143def _time_to_str(self: generator.Generator, expression: exp.TimeToStr) -> str: 144 this = self.sql(expression, "this") 145 time_format = self.format_time(expression) 146 return f"DATE_FORMAT({this}, {time_format})" 147 148 149def _to_date_sql(self: generator.Generator, expression: exp.TsOrDsToDate) -> str: 150 this = self.sql(expression, "this") 151 time_format = self.format_time(expression) 152 if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 153 return f"TO_DATE({this}, {time_format})" 154 return f"TO_DATE({this})" 155 156 157class Hive(Dialect): 158 ALIAS_POST_TABLESAMPLE = True 159 IDENTIFIERS_CAN_START_WITH_DIGIT = True 160 161 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 162 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 163 164 TIME_MAPPING = { 165 "y": "%Y", 166 "Y": "%Y", 167 "YYYY": "%Y", 168 "yyyy": "%Y", 169 "YY": "%y", 170 "yy": "%y", 171 "MMMM": "%B", 172 "MMM": "%b", 173 "MM": "%m", 174 "M": "%-m", 175 "dd": "%d", 176 "d": "%-d", 177 "HH": "%H", 178 "H": "%-H", 179 "hh": "%I", 180 "h": "%-I", 181 "mm": "%M", 182 "m": "%-M", 183 "ss": "%S", 184 "s": "%-S", 185 "SSSSSS": "%f", 186 "a": "%p", 187 "DD": "%j", 188 "D": "%-j", 189 "E": "%a", 190 "EE": "%a", 191 "EEE": "%a", 192 "EEEE": "%A", 193 } 194 195 DATE_FORMAT = "'yyyy-MM-dd'" 196 DATEINT_FORMAT = "'yyyyMMdd'" 197 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 198 199 class Tokenizer(tokens.Tokenizer): 200 QUOTES = ["'", '"'] 201 IDENTIFIERS = ["`"] 202 STRING_ESCAPES = ["\\"] 203 ENCODE = "utf-8" 204 205 KEYWORDS = { 206 **tokens.Tokenizer.KEYWORDS, 207 "ADD ARCHIVE": TokenType.COMMAND, 208 "ADD ARCHIVES": TokenType.COMMAND, 209 "ADD FILE": TokenType.COMMAND, 210 "ADD FILES": TokenType.COMMAND, 211 "ADD JAR": TokenType.COMMAND, 212 "ADD JARS": TokenType.COMMAND, 213 "MSCK REPAIR": TokenType.COMMAND, 214 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 215 } 216 217 NUMERIC_LITERALS = { 218 "L": "BIGINT", 219 "S": "SMALLINT", 220 "Y": "TINYINT", 221 "D": "DOUBLE", 222 "F": "FLOAT", 223 "BD": "DECIMAL", 224 } 225 226 class Parser(parser.Parser): 227 LOG_DEFAULTS_TO_LN = True 228 STRICT_CAST = False 229 230 FUNCTIONS = { 231 **parser.Parser.FUNCTIONS, 232 "BASE64": exp.ToBase64.from_arg_list, 233 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 234 "COLLECT_SET": exp.SetAgg.from_arg_list, 235 "DATE_ADD": lambda args: exp.TsOrDsAdd( 236 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 237 ), 238 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 239 [ 240 exp.TimeStrToTime(this=seq_get(args, 0)), 241 seq_get(args, 1), 242 ] 243 ), 244 "DATE_SUB": lambda args: exp.TsOrDsAdd( 245 this=seq_get(args, 0), 246 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 247 unit=exp.Literal.string("DAY"), 248 ), 249 "DATEDIFF": lambda args: exp.DateDiff( 250 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 251 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 252 ), 253 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 254 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 255 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 256 "LOCATE": locate_to_strposition, 257 "MAP": parse_var_map, 258 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 259 "PERCENTILE": exp.Quantile.from_arg_list, 260 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 261 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 262 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 263 ), 264 "SIZE": exp.ArraySize.from_arg_list, 265 "SPLIT": exp.RegexpSplit.from_arg_list, 266 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 267 "TO_JSON": exp.JSONFormat.from_arg_list, 268 "UNBASE64": exp.FromBase64.from_arg_list, 269 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 270 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 271 } 272 273 PROPERTY_PARSERS = { 274 **parser.Parser.PROPERTY_PARSERS, 275 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 276 expressions=self._parse_wrapped_csv(self._parse_property) 277 ), 278 } 279 280 def _parse_types( 281 self, check_func: bool = False, schema: bool = False 282 ) -> t.Optional[exp.Expression]: 283 """ 284 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 285 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 286 287 spark-sql (default)> select cast(1234 as varchar(2)); 288 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 289 char/varchar type and simply treats them as string type. Please use string type 290 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 291 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 292 293 1234 294 Time taken: 4.265 seconds, Fetched 1 row(s) 295 296 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 297 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 298 299 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 300 """ 301 this = super()._parse_types(check_func=check_func, schema=schema) 302 303 if this and not schema: 304 return this.transform( 305 lambda node: node.replace(exp.DataType.build("text")) 306 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 307 else node, 308 copy=False, 309 ) 310 311 return this 312 313 class Generator(generator.Generator): 314 LIMIT_FETCH = "LIMIT" 315 TABLESAMPLE_WITH_METHOD = False 316 TABLESAMPLE_SIZE_IS_PERCENT = True 317 JOIN_HINTS = False 318 TABLE_HINTS = False 319 QUERY_HINTS = False 320 INDEX_ON = "ON TABLE" 321 322 TYPE_MAPPING = { 323 **generator.Generator.TYPE_MAPPING, 324 exp.DataType.Type.TEXT: "STRING", 325 exp.DataType.Type.DATETIME: "TIMESTAMP", 326 exp.DataType.Type.VARBINARY: "BINARY", 327 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 328 exp.DataType.Type.BIT: "BOOLEAN", 329 } 330 331 TRANSFORMS = { 332 **generator.Generator.TRANSFORMS, 333 exp.Group: transforms.preprocess([transforms.unalias_group]), 334 exp.Select: transforms.preprocess( 335 [ 336 transforms.eliminate_qualify, 337 transforms.eliminate_distinct_on, 338 transforms.unnest_to_explode, 339 ] 340 ), 341 exp.Property: _property_sql, 342 exp.ApproxDistinct: approx_count_distinct_sql, 343 exp.ArrayConcat: rename_func("CONCAT"), 344 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 345 exp.ArraySize: rename_func("SIZE"), 346 exp.ArraySort: _array_sort_sql, 347 exp.With: no_recursive_cte_sql, 348 exp.DateAdd: _add_date_sql, 349 exp.DateDiff: _date_diff_sql, 350 exp.DateStrToDate: rename_func("TO_DATE"), 351 exp.DateSub: _add_date_sql, 352 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 353 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 354 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 355 exp.FromBase64: rename_func("UNBASE64"), 356 exp.If: if_sql, 357 exp.ILike: no_ilike_sql, 358 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 359 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 360 exp.JSONFormat: _json_format_sql, 361 exp.Left: left_to_substring_sql, 362 exp.Map: var_map_sql, 363 exp.Max: max_or_greatest, 364 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 365 exp.Min: min_or_least, 366 exp.VarMap: var_map_sql, 367 exp.Create: create_with_partitions_sql, 368 exp.Quantile: rename_func("PERCENTILE"), 369 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 370 exp.RegexpExtract: regexp_extract_sql, 371 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 372 exp.RegexpSplit: rename_func("SPLIT"), 373 exp.Right: right_to_substring_sql, 374 exp.SafeDivide: no_safe_divide_sql, 375 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 376 exp.SetAgg: rename_func("COLLECT_SET"), 377 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 378 exp.StrPosition: strposition_to_locate_sql, 379 exp.StrToDate: _str_to_date_sql, 380 exp.StrToTime: _str_to_time_sql, 381 exp.StrToUnix: _str_to_unix_sql, 382 exp.StructExtract: struct_extract_sql, 383 exp.TimeStrToDate: rename_func("TO_DATE"), 384 exp.TimeStrToTime: timestrtotime_sql, 385 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 386 exp.TimeToStr: _time_to_str, 387 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 388 exp.ToBase64: rename_func("BASE64"), 389 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 390 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 391 exp.TsOrDsToDate: _to_date_sql, 392 exp.TryCast: no_trycast_sql, 393 exp.UnixToStr: lambda self, e: self.func( 394 "FROM_UNIXTIME", e.this, _time_format(self, e) 395 ), 396 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 397 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 398 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 399 exp.RowFormatSerdeProperty: lambda self, e: f"ROW FORMAT SERDE {self.sql(e, 'this')}", 400 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 401 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 402 exp.LastDateOfMonth: rename_func("LAST_DAY"), 403 exp.National: lambda self, e: self.national_sql(e, prefix=""), 404 } 405 406 PROPERTIES_LOCATION = { 407 **generator.Generator.PROPERTIES_LOCATION, 408 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 409 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 410 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 411 } 412 413 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 414 return self.func( 415 "COLLECT_LIST", 416 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 417 ) 418 419 def with_properties(self, properties: exp.Properties) -> str: 420 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 421 422 def datatype_sql(self, expression: exp.DataType) -> str: 423 if ( 424 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 425 and not expression.expressions 426 ): 427 expression = exp.DataType.build("text") 428 elif expression.this in exp.DataType.TEMPORAL_TYPES: 429 expression = exp.DataType.build(expression.this) 430 elif expression.is_type("float"): 431 size_expression = expression.find(exp.DataTypeSize) 432 if size_expression: 433 size = int(size_expression.name) 434 expression = ( 435 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 436 ) 437 438 return super().datatype_sql(expression)
DATE_DELTA_INTERVAL =
{'YEAR': ('ADD_MONTHS', 12), 'MONTH': ('ADD_MONTHS', 1), 'QUARTER': ('ADD_MONTHS', 3), 'WEEK': ('DATE_ADD', 7), 'DAY': ('DATE_ADD', 1)}
TIME_DIFF_FACTOR =
{'MILLISECOND': ' * 1000', 'SECOND': '', 'MINUTE': ' / 60', 'HOUR': ' / 3600'}
DIFF_MONTH_SWITCH =
('YEAR', 'QUARTER', 'MONTH')
158class Hive(Dialect): 159 ALIAS_POST_TABLESAMPLE = True 160 IDENTIFIERS_CAN_START_WITH_DIGIT = True 161 162 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 163 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 164 165 TIME_MAPPING = { 166 "y": "%Y", 167 "Y": "%Y", 168 "YYYY": "%Y", 169 "yyyy": "%Y", 170 "YY": "%y", 171 "yy": "%y", 172 "MMMM": "%B", 173 "MMM": "%b", 174 "MM": "%m", 175 "M": "%-m", 176 "dd": "%d", 177 "d": "%-d", 178 "HH": "%H", 179 "H": "%-H", 180 "hh": "%I", 181 "h": "%-I", 182 "mm": "%M", 183 "m": "%-M", 184 "ss": "%S", 185 "s": "%-S", 186 "SSSSSS": "%f", 187 "a": "%p", 188 "DD": "%j", 189 "D": "%-j", 190 "E": "%a", 191 "EE": "%a", 192 "EEE": "%a", 193 "EEEE": "%A", 194 } 195 196 DATE_FORMAT = "'yyyy-MM-dd'" 197 DATEINT_FORMAT = "'yyyyMMdd'" 198 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 199 200 class Tokenizer(tokens.Tokenizer): 201 QUOTES = ["'", '"'] 202 IDENTIFIERS = ["`"] 203 STRING_ESCAPES = ["\\"] 204 ENCODE = "utf-8" 205 206 KEYWORDS = { 207 **tokens.Tokenizer.KEYWORDS, 208 "ADD ARCHIVE": TokenType.COMMAND, 209 "ADD ARCHIVES": TokenType.COMMAND, 210 "ADD FILE": TokenType.COMMAND, 211 "ADD FILES": TokenType.COMMAND, 212 "ADD JAR": TokenType.COMMAND, 213 "ADD JARS": TokenType.COMMAND, 214 "MSCK REPAIR": TokenType.COMMAND, 215 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 216 } 217 218 NUMERIC_LITERALS = { 219 "L": "BIGINT", 220 "S": "SMALLINT", 221 "Y": "TINYINT", 222 "D": "DOUBLE", 223 "F": "FLOAT", 224 "BD": "DECIMAL", 225 } 226 227 class Parser(parser.Parser): 228 LOG_DEFAULTS_TO_LN = True 229 STRICT_CAST = False 230 231 FUNCTIONS = { 232 **parser.Parser.FUNCTIONS, 233 "BASE64": exp.ToBase64.from_arg_list, 234 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 235 "COLLECT_SET": exp.SetAgg.from_arg_list, 236 "DATE_ADD": lambda args: exp.TsOrDsAdd( 237 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 238 ), 239 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 240 [ 241 exp.TimeStrToTime(this=seq_get(args, 0)), 242 seq_get(args, 1), 243 ] 244 ), 245 "DATE_SUB": lambda args: exp.TsOrDsAdd( 246 this=seq_get(args, 0), 247 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 248 unit=exp.Literal.string("DAY"), 249 ), 250 "DATEDIFF": lambda args: exp.DateDiff( 251 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 252 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 253 ), 254 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 255 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 256 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 257 "LOCATE": locate_to_strposition, 258 "MAP": parse_var_map, 259 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 260 "PERCENTILE": exp.Quantile.from_arg_list, 261 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 262 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 263 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 264 ), 265 "SIZE": exp.ArraySize.from_arg_list, 266 "SPLIT": exp.RegexpSplit.from_arg_list, 267 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 268 "TO_JSON": exp.JSONFormat.from_arg_list, 269 "UNBASE64": exp.FromBase64.from_arg_list, 270 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 271 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 272 } 273 274 PROPERTY_PARSERS = { 275 **parser.Parser.PROPERTY_PARSERS, 276 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 277 expressions=self._parse_wrapped_csv(self._parse_property) 278 ), 279 } 280 281 def _parse_types( 282 self, check_func: bool = False, schema: bool = False 283 ) -> t.Optional[exp.Expression]: 284 """ 285 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 286 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 287 288 spark-sql (default)> select cast(1234 as varchar(2)); 289 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 290 char/varchar type and simply treats them as string type. Please use string type 291 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 292 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 293 294 1234 295 Time taken: 4.265 seconds, Fetched 1 row(s) 296 297 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 298 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 299 300 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 301 """ 302 this = super()._parse_types(check_func=check_func, schema=schema) 303 304 if this and not schema: 305 return this.transform( 306 lambda node: node.replace(exp.DataType.build("text")) 307 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 308 else node, 309 copy=False, 310 ) 311 312 return this 313 314 class Generator(generator.Generator): 315 LIMIT_FETCH = "LIMIT" 316 TABLESAMPLE_WITH_METHOD = False 317 TABLESAMPLE_SIZE_IS_PERCENT = True 318 JOIN_HINTS = False 319 TABLE_HINTS = False 320 QUERY_HINTS = False 321 INDEX_ON = "ON TABLE" 322 323 TYPE_MAPPING = { 324 **generator.Generator.TYPE_MAPPING, 325 exp.DataType.Type.TEXT: "STRING", 326 exp.DataType.Type.DATETIME: "TIMESTAMP", 327 exp.DataType.Type.VARBINARY: "BINARY", 328 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 329 exp.DataType.Type.BIT: "BOOLEAN", 330 } 331 332 TRANSFORMS = { 333 **generator.Generator.TRANSFORMS, 334 exp.Group: transforms.preprocess([transforms.unalias_group]), 335 exp.Select: transforms.preprocess( 336 [ 337 transforms.eliminate_qualify, 338 transforms.eliminate_distinct_on, 339 transforms.unnest_to_explode, 340 ] 341 ), 342 exp.Property: _property_sql, 343 exp.ApproxDistinct: approx_count_distinct_sql, 344 exp.ArrayConcat: rename_func("CONCAT"), 345 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 346 exp.ArraySize: rename_func("SIZE"), 347 exp.ArraySort: _array_sort_sql, 348 exp.With: no_recursive_cte_sql, 349 exp.DateAdd: _add_date_sql, 350 exp.DateDiff: _date_diff_sql, 351 exp.DateStrToDate: rename_func("TO_DATE"), 352 exp.DateSub: _add_date_sql, 353 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 354 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 355 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 356 exp.FromBase64: rename_func("UNBASE64"), 357 exp.If: if_sql, 358 exp.ILike: no_ilike_sql, 359 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 360 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 361 exp.JSONFormat: _json_format_sql, 362 exp.Left: left_to_substring_sql, 363 exp.Map: var_map_sql, 364 exp.Max: max_or_greatest, 365 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 366 exp.Min: min_or_least, 367 exp.VarMap: var_map_sql, 368 exp.Create: create_with_partitions_sql, 369 exp.Quantile: rename_func("PERCENTILE"), 370 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 371 exp.RegexpExtract: regexp_extract_sql, 372 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 373 exp.RegexpSplit: rename_func("SPLIT"), 374 exp.Right: right_to_substring_sql, 375 exp.SafeDivide: no_safe_divide_sql, 376 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 377 exp.SetAgg: rename_func("COLLECT_SET"), 378 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 379 exp.StrPosition: strposition_to_locate_sql, 380 exp.StrToDate: _str_to_date_sql, 381 exp.StrToTime: _str_to_time_sql, 382 exp.StrToUnix: _str_to_unix_sql, 383 exp.StructExtract: struct_extract_sql, 384 exp.TimeStrToDate: rename_func("TO_DATE"), 385 exp.TimeStrToTime: timestrtotime_sql, 386 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 387 exp.TimeToStr: _time_to_str, 388 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 389 exp.ToBase64: rename_func("BASE64"), 390 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 391 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 392 exp.TsOrDsToDate: _to_date_sql, 393 exp.TryCast: no_trycast_sql, 394 exp.UnixToStr: lambda self, e: self.func( 395 "FROM_UNIXTIME", e.this, _time_format(self, e) 396 ), 397 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 398 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 399 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 400 exp.RowFormatSerdeProperty: lambda self, e: f"ROW FORMAT SERDE {self.sql(e, 'this')}", 401 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 402 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 403 exp.LastDateOfMonth: rename_func("LAST_DAY"), 404 exp.National: lambda self, e: self.national_sql(e, prefix=""), 405 } 406 407 PROPERTIES_LOCATION = { 408 **generator.Generator.PROPERTIES_LOCATION, 409 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 410 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 411 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 412 } 413 414 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 415 return self.func( 416 "COLLECT_LIST", 417 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 418 ) 419 420 def with_properties(self, properties: exp.Properties) -> str: 421 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 422 423 def datatype_sql(self, expression: exp.DataType) -> str: 424 if ( 425 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 426 and not expression.expressions 427 ): 428 expression = exp.DataType.build("text") 429 elif expression.this in exp.DataType.TEMPORAL_TYPES: 430 expression = exp.DataType.build(expression.this) 431 elif expression.is_type("float"): 432 size_expression = expression.find(exp.DataTypeSize) 433 if size_expression: 434 size = int(size_expression.name) 435 expression = ( 436 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 437 ) 438 439 return super().datatype_sql(expression)
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
tokenizer_class =
<class 'sqlglot.dialects.hive.Hive.Tokenizer'>
parser_class =
<class 'sqlglot.dialects.hive.Hive.Parser'>
generator_class =
<class 'sqlglot.dialects.hive.Hive.Generator'>
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
Inherited Members
200 class Tokenizer(tokens.Tokenizer): 201 QUOTES = ["'", '"'] 202 IDENTIFIERS = ["`"] 203 STRING_ESCAPES = ["\\"] 204 ENCODE = "utf-8" 205 206 KEYWORDS = { 207 **tokens.Tokenizer.KEYWORDS, 208 "ADD ARCHIVE": TokenType.COMMAND, 209 "ADD ARCHIVES": TokenType.COMMAND, 210 "ADD FILE": TokenType.COMMAND, 211 "ADD FILES": TokenType.COMMAND, 212 "ADD JAR": TokenType.COMMAND, 213 "ADD JARS": TokenType.COMMAND, 214 "MSCK REPAIR": TokenType.COMMAND, 215 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 216 } 217 218 NUMERIC_LITERALS = { 219 "L": "BIGINT", 220 "S": "SMALLINT", 221 "Y": "TINYINT", 222 "D": "DOUBLE", 223 "F": "FLOAT", 224 "BD": "DECIMAL", 225 }
KEYWORDS =
{'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'IF': <TokenType.IF: 'IF'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NEXT VALUE FOR': <TokenType.NEXT_VALUE_FOR: 'NEXT_VALUE_FOR'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.BIGINT: 'BIGINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'COPY': <TokenType.COMMAND: 'COMMAND'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'TRUNCATE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'ADD ARCHIVE': <TokenType.COMMAND: 'COMMAND'>, 'ADD ARCHIVES': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILE': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILES': <TokenType.COMMAND: 'COMMAND'>, 'ADD JAR': <TokenType.COMMAND: 'COMMAND'>, 'ADD JARS': <TokenType.COMMAND: 'COMMAND'>, 'MSCK REPAIR': <TokenType.COMMAND: 'COMMAND'>, 'WITH SERDEPROPERTIES': <TokenType.SERDE_PROPERTIES: 'SERDE_PROPERTIES'>}
227 class Parser(parser.Parser): 228 LOG_DEFAULTS_TO_LN = True 229 STRICT_CAST = False 230 231 FUNCTIONS = { 232 **parser.Parser.FUNCTIONS, 233 "BASE64": exp.ToBase64.from_arg_list, 234 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 235 "COLLECT_SET": exp.SetAgg.from_arg_list, 236 "DATE_ADD": lambda args: exp.TsOrDsAdd( 237 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 238 ), 239 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 240 [ 241 exp.TimeStrToTime(this=seq_get(args, 0)), 242 seq_get(args, 1), 243 ] 244 ), 245 "DATE_SUB": lambda args: exp.TsOrDsAdd( 246 this=seq_get(args, 0), 247 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 248 unit=exp.Literal.string("DAY"), 249 ), 250 "DATEDIFF": lambda args: exp.DateDiff( 251 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 252 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 253 ), 254 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 255 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 256 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 257 "LOCATE": locate_to_strposition, 258 "MAP": parse_var_map, 259 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 260 "PERCENTILE": exp.Quantile.from_arg_list, 261 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 262 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 263 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 264 ), 265 "SIZE": exp.ArraySize.from_arg_list, 266 "SPLIT": exp.RegexpSplit.from_arg_list, 267 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 268 "TO_JSON": exp.JSONFormat.from_arg_list, 269 "UNBASE64": exp.FromBase64.from_arg_list, 270 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 271 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 272 } 273 274 PROPERTY_PARSERS = { 275 **parser.Parser.PROPERTY_PARSERS, 276 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 277 expressions=self._parse_wrapped_csv(self._parse_property) 278 ), 279 } 280 281 def _parse_types( 282 self, check_func: bool = False, schema: bool = False 283 ) -> t.Optional[exp.Expression]: 284 """ 285 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 286 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 287 288 spark-sql (default)> select cast(1234 as varchar(2)); 289 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 290 char/varchar type and simply treats them as string type. Please use string type 291 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 292 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 293 294 1234 295 Time taken: 4.265 seconds, Fetched 1 row(s) 296 297 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 298 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 299 300 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 301 """ 302 this = super()._parse_types(check_func=check_func, schema=schema) 303 304 if this and not schema: 305 return this.transform( 306 lambda node: node.replace(exp.DataType.build("text")) 307 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 308 else node, 309 copy=False, 310 ) 311 312 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
FUNCTIONS =
{'ABS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Abs'>>, 'ANY_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnyValue'>>, 'APPROX_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_COUNT_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Array'>>, 'ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'ARRAY_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAll'>>, 'ARRAY_ANY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAny'>>, 'ARRAY_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContains'>>, 'FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_JOIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayJoin'>>, 'ARRAY_SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_SORT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySort'>>, 'ARRAY_SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySum'>>, 'ARRAY_UNION_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUnionAgg'>>, 'AVG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Avg'>>, 'CASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Case'>>, 'CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cast'>>, 'CAST_TO_STR_TYPE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CastToStrType'>>, 'CEIL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CEILING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'COALESCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'IFNULL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'NVL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Concat'>>, 'CONCAT_WS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ConcatWs'>>, 'COUNT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Count'>>, 'COUNT_IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'CURRENT_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDate'>>, 'CURRENT_DATETIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDatetime'>>, 'CURRENT_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTime'>>, 'CURRENT_TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTimestamp'>>, 'CURRENT_USER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentUser'>>, 'DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Date'>>, 'DATE_ADD': <function Hive.Parser.<lambda>>, 'DATEDIFF': <function Hive.Parser.<lambda>>, 'DATE_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateDiff'>>, 'DATEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATE_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateStrToDate'>>, 'DATE_SUB': <function Hive.Parser.<lambda>>, 'DATE_TO_DATE_STR': <function Parser.<lambda>>, 'DATE_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateToDi'>>, 'DATE_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateTrunc'>>, 'DATETIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeAdd'>>, 'DATETIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeDiff'>>, 'DATETIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeSub'>>, 'DATETIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeTrunc'>>, 'DAY': <function Hive.Parser.<lambda>>, 'DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAYOFMONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAY_OF_WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAYOFWEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAY_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DAYOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DECODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Decode'>>, 'DI_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DiToDate'>>, 'ENCODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Encode'>>, 'EXP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Exp'>>, 'EXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Explode'>>, 'EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Extract'>>, 'FLOOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Floor'>>, 'FROM_BASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase'>>, 'FROM_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'GENERATE_SERIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GenerateSeries'>>, 'GREATEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Greatest'>>, 'GROUP_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GroupConcat'>>, 'HEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hex'>>, 'HLL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hll'>>, 'IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'INITCAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Initcap'>>, 'JSON_ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayContains'>>, 'JSONB_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtract'>>, 'JSONB_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtractScalar'>>, 'JSON_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtract'>>, 'JSON_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'JSON_FORMAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'J_S_O_N_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObject'>>, 'LAST_DATE_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDateOfMonth'>>, 'LEAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Least'>>, 'LEFT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Left'>>, 'LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEVENSHTEIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Levenshtein'>>, 'LN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ln'>>, 'LOG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log'>>, 'LOG10': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log10'>>, 'LOG2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log2'>>, 'LOGICAL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOLAND_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'LOGICAL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOLOR_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'LOWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'LCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'MD5': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5'>>, 'MD5_DIGEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5Digest'>>, 'MAP': <function parse_var_map>, 'MAP_FROM_ENTRIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MapFromEntries'>>, 'MATCH_AGAINST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MatchAgainst'>>, 'MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Max'>>, 'MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Min'>>, 'MONTH': <function Hive.Parser.<lambda>>, 'NEXT_VALUE_FOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NextValueFor'>>, 'NUMBER_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NumberToStr'>>, 'NVL2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nvl2'>>, 'OPEN_J_S_O_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.OpenJSON'>>, 'PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParameterizedAgg'>>, 'PERCENTILE_CONT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileCont'>>, 'PERCENTILE_DISC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileDisc'>>, 'POSEXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Posexplode'>>, 'POWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'POW': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'RANGE_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RangeN'>>, 'READ_CSV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ReadCSV'>>, 'REDUCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Reduce'>>, 'REGEXP_EXTRACT': <function Hive.Parser.<lambda>>, 'REGEXP_I_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpILike'>>, 'REGEXP_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpLike'>>, 'REGEXP_SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'REPEAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Repeat'>>, 'RIGHT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Right'>>, 'ROUND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Round'>>, 'ROW_NUMBER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RowNumber'>>, 'SHA': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA1': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA2'>>, 'SAFE_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeConcat'>>, 'SAFE_DIVIDE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeDivide'>>, 'SET_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'SORT_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SortArray'>>, 'SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'SQRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sqrt'>>, 'STANDARD_HASH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StandardHash'>>, 'STAR_MAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StarMap'>>, 'STDDEV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stddev'>>, 'STDDEV_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevPop'>>, 'STDDEV_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevSamp'>>, 'STR_POSITION': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrPosition'>>, 'STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToDate'>>, 'STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToTime'>>, 'STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToUnix'>>, 'STRUCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Struct'>>, 'STRUCT_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StructExtract'>>, 'SUBSTRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Substring'>>, 'SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sum'>>, 'TIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeAdd'>>, 'TIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeDiff'>>, 'TIME_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToDate'>>, 'TIME_STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToTime'>>, 'TIME_STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToUnix'>>, 'TIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeSub'>>, 'TIME_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToStr'>>, 'TIME_TO_TIME_STR': <function Parser.<lambda>>, 'TIME_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToUnix'>>, 'TIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeTrunc'>>, 'TIMESTAMP_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampAdd'>>, 'TIMESTAMP_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampSub'>>, 'TIMESTAMP_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampTrunc'>>, 'TO_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'TO_CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToChar'>>, 'TRIM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Trim'>>, 'TRY_CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TryCast'>>, 'TS_OR_DI_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDiToDi'>>, 'TS_OR_DS_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsAdd'>>, 'TS_OR_DS_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToDate'>>, 'TS_OR_DS_TO_DATE_STR': <function Parser.<lambda>>, 'UNHEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Unhex'>>, 'UNIX_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToStr'>>, 'UNIX_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTime'>>, 'UNIX_TO_TIME_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTimeStr'>>, 'UPPER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'UCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'VAR_MAP': <function parse_var_map>, 'VARIANCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VAR_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'VAR_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Week'>>, 'WEEK_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WEEKOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WHEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.When'>>, 'X_M_L_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.XMLTable'>>, 'YEAR': <function Hive.Parser.<lambda>>, 'GLOB': <function Parser.<lambda>>, 'LIKE': <function parse_like>, 'BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'COLLECT_LIST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'COLLECT_SET': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'DATE_FORMAT': <function Hive.Parser.<lambda>>, 'FROM_UNIXTIME': <function format_time_lambda.<locals>._format_time>, 'GET_JSON_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'LOCATE': <function locate_to_strposition>, 'PERCENTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'PERCENTILE_APPROX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'TO_DATE': <function format_time_lambda.<locals>._format_time>, 'TO_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'UNBASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'UNIX_TIMESTAMP': <function format_time_lambda.<locals>._format_time>}
PROPERTY_PARSERS =
{'ALGORITHM': <function Parser.<lambda>>, 'AUTO_INCREMENT': <function Parser.<lambda>>, 'BLOCKCOMPRESSION': <function Parser.<lambda>>, 'CHARACTER SET': <function Parser.<lambda>>, 'CHECKSUM': <function Parser.<lambda>>, 'CLUSTER BY': <function Parser.<lambda>>, 'CLUSTERED': <function Parser.<lambda>>, 'COLLATE': <function Parser.<lambda>>, 'COMMENT': <function Parser.<lambda>>, 'COPY': <function Parser.<lambda>>, 'DATABLOCKSIZE': <function Parser.<lambda>>, 'DEFINER': <function Parser.<lambda>>, 'DETERMINISTIC': <function Parser.<lambda>>, 'DISTKEY': <function Parser.<lambda>>, 'DISTSTYLE': <function Parser.<lambda>>, 'ENGINE': <function Parser.<lambda>>, 'EXECUTE': <function Parser.<lambda>>, 'EXTERNAL': <function Parser.<lambda>>, 'FALLBACK': <function Parser.<lambda>>, 'FORMAT': <function Parser.<lambda>>, 'FREESPACE': <function Parser.<lambda>>, 'IMMUTABLE': <function Parser.<lambda>>, 'JOURNAL': <function Parser.<lambda>>, 'LANGUAGE': <function Parser.<lambda>>, 'LAYOUT': <function Parser.<lambda>>, 'LIFETIME': <function Parser.<lambda>>, 'LIKE': <function Parser.<lambda>>, 'LOCATION': <function Parser.<lambda>>, 'LOCK': <function Parser.<lambda>>, 'LOCKING': <function Parser.<lambda>>, 'LOG': <function Parser.<lambda>>, 'MATERIALIZED': <function Parser.<lambda>>, 'MERGEBLOCKRATIO': <function Parser.<lambda>>, 'MULTISET': <function Parser.<lambda>>, 'NO': <function Parser.<lambda>>, 'ON': <function Parser.<lambda>>, 'ORDER BY': <function Parser.<lambda>>, 'PARTITION BY': <function Parser.<lambda>>, 'PARTITIONED BY': <function Parser.<lambda>>, 'PARTITIONED_BY': <function Parser.<lambda>>, 'PRIMARY KEY': <function Parser.<lambda>>, 'RANGE': <function Parser.<lambda>>, 'RETURNS': <function Parser.<lambda>>, 'ROW': <function Parser.<lambda>>, 'ROW_FORMAT': <function Parser.<lambda>>, 'SET': <function Parser.<lambda>>, 'SETTINGS': <function Parser.<lambda>>, 'SORTKEY': <function Parser.<lambda>>, 'SOURCE': <function Parser.<lambda>>, 'STABLE': <function Parser.<lambda>>, 'STORED': <function Parser.<lambda>>, 'TBLPROPERTIES': <function Parser.<lambda>>, 'TEMP': <function Parser.<lambda>>, 'TEMPORARY': <function Parser.<lambda>>, 'TO': <function Parser.<lambda>>, 'TRANSIENT': <function Parser.<lambda>>, 'TTL': <function Parser.<lambda>>, 'USING': <function Parser.<lambda>>, 'VOLATILE': <function Parser.<lambda>>, 'WITH': <function Parser.<lambda>>, 'WITH SERDEPROPERTIES': <function Hive.Parser.<lambda>>}
SET_TRIE: Dict =
{'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- TYPE_TOKENS
- SUBQUERY_PREDICATES
- RESERVED_KEYWORDS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- FUNCTIONS_WITH_ALIASED_ARGS
- FUNCTION_PARSERS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KINDS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- ADD_CONSTRAINT_TOKENS
- CONCAT_NULL_OUTPUTS_STRING
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_BASE_FIRST
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NULL_ORDERING
- FORMAT_MAPPING
- error_level
- error_message_context
- max_errors
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
314 class Generator(generator.Generator): 315 LIMIT_FETCH = "LIMIT" 316 TABLESAMPLE_WITH_METHOD = False 317 TABLESAMPLE_SIZE_IS_PERCENT = True 318 JOIN_HINTS = False 319 TABLE_HINTS = False 320 QUERY_HINTS = False 321 INDEX_ON = "ON TABLE" 322 323 TYPE_MAPPING = { 324 **generator.Generator.TYPE_MAPPING, 325 exp.DataType.Type.TEXT: "STRING", 326 exp.DataType.Type.DATETIME: "TIMESTAMP", 327 exp.DataType.Type.VARBINARY: "BINARY", 328 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 329 exp.DataType.Type.BIT: "BOOLEAN", 330 } 331 332 TRANSFORMS = { 333 **generator.Generator.TRANSFORMS, 334 exp.Group: transforms.preprocess([transforms.unalias_group]), 335 exp.Select: transforms.preprocess( 336 [ 337 transforms.eliminate_qualify, 338 transforms.eliminate_distinct_on, 339 transforms.unnest_to_explode, 340 ] 341 ), 342 exp.Property: _property_sql, 343 exp.ApproxDistinct: approx_count_distinct_sql, 344 exp.ArrayConcat: rename_func("CONCAT"), 345 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 346 exp.ArraySize: rename_func("SIZE"), 347 exp.ArraySort: _array_sort_sql, 348 exp.With: no_recursive_cte_sql, 349 exp.DateAdd: _add_date_sql, 350 exp.DateDiff: _date_diff_sql, 351 exp.DateStrToDate: rename_func("TO_DATE"), 352 exp.DateSub: _add_date_sql, 353 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 354 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 355 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 356 exp.FromBase64: rename_func("UNBASE64"), 357 exp.If: if_sql, 358 exp.ILike: no_ilike_sql, 359 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 360 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 361 exp.JSONFormat: _json_format_sql, 362 exp.Left: left_to_substring_sql, 363 exp.Map: var_map_sql, 364 exp.Max: max_or_greatest, 365 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 366 exp.Min: min_or_least, 367 exp.VarMap: var_map_sql, 368 exp.Create: create_with_partitions_sql, 369 exp.Quantile: rename_func("PERCENTILE"), 370 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 371 exp.RegexpExtract: regexp_extract_sql, 372 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 373 exp.RegexpSplit: rename_func("SPLIT"), 374 exp.Right: right_to_substring_sql, 375 exp.SafeDivide: no_safe_divide_sql, 376 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 377 exp.SetAgg: rename_func("COLLECT_SET"), 378 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 379 exp.StrPosition: strposition_to_locate_sql, 380 exp.StrToDate: _str_to_date_sql, 381 exp.StrToTime: _str_to_time_sql, 382 exp.StrToUnix: _str_to_unix_sql, 383 exp.StructExtract: struct_extract_sql, 384 exp.TimeStrToDate: rename_func("TO_DATE"), 385 exp.TimeStrToTime: timestrtotime_sql, 386 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 387 exp.TimeToStr: _time_to_str, 388 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 389 exp.ToBase64: rename_func("BASE64"), 390 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 391 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 392 exp.TsOrDsToDate: _to_date_sql, 393 exp.TryCast: no_trycast_sql, 394 exp.UnixToStr: lambda self, e: self.func( 395 "FROM_UNIXTIME", e.this, _time_format(self, e) 396 ), 397 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 398 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 399 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 400 exp.RowFormatSerdeProperty: lambda self, e: f"ROW FORMAT SERDE {self.sql(e, 'this')}", 401 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 402 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 403 exp.LastDateOfMonth: rename_func("LAST_DAY"), 404 exp.National: lambda self, e: self.national_sql(e, prefix=""), 405 } 406 407 PROPERTIES_LOCATION = { 408 **generator.Generator.PROPERTIES_LOCATION, 409 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 410 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 411 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 412 } 413 414 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 415 return self.func( 416 "COLLECT_LIST", 417 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 418 ) 419 420 def with_properties(self, properties: exp.Properties) -> str: 421 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 422 423 def datatype_sql(self, expression: exp.DataType) -> str: 424 if ( 425 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 426 and not expression.expressions 427 ): 428 expression = exp.DataType.build("text") 429 elif expression.this in exp.DataType.TEMPORAL_TYPES: 430 expression = exp.DataType.build(expression.this) 431 elif expression.is_type("float"): 432 size_expression = expression.find(exp.DataTypeSize) 433 if size_expression: 434 size = int(size_expression.name) 435 expression = ( 436 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 437 ) 438 439 return super().datatype_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
TYPE_MAPPING =
{<Type.NCHAR: 'NCHAR'>: 'CHAR', <Type.NVARCHAR: 'NVARCHAR'>: 'VARCHAR', <Type.MEDIUMTEXT: 'MEDIUMTEXT'>: 'TEXT', <Type.LONGTEXT: 'LONGTEXT'>: 'TEXT', <Type.MEDIUMBLOB: 'MEDIUMBLOB'>: 'BLOB', <Type.LONGBLOB: 'LONGBLOB'>: 'BLOB', <Type.INET: 'INET'>: 'INET', <Type.TEXT: 'TEXT'>: 'STRING', <Type.DATETIME: 'DATETIME'>: 'TIMESTAMP', <Type.VARBINARY: 'VARBINARY'>: 'BINARY', <Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>: 'TIMESTAMP', <Type.BIT: 'BIT'>: 'BOOLEAN'}
TRANSFORMS =
{<class 'sqlglot.expressions.DateAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.TsOrDsAdd'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CaseSpecificColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CheckColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CollateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CommentColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DateFormatColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DefaultColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.EncodeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExternalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InlineLengthColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LanguageProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LocationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.MaterializedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnCommitProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnUpdateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.PathColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ReturnsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SettingsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.StabilityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TemporaryProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ToTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransientProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TitleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.UppercaseColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.VarMap'>: <function var_map_sql>, <class 'sqlglot.expressions.VolatileProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Group'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Select'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Property'>: <function _property_sql>, <class 'sqlglot.expressions.ApproxDistinct'>: <function approx_count_distinct_sql>, <class 'sqlglot.expressions.ArrayConcat'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArrayJoin'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArraySize'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArraySort'>: <function _array_sort_sql>, <class 'sqlglot.expressions.With'>: <function no_recursive_cte_sql>, <class 'sqlglot.expressions.DateDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.DateStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.DateSub'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.DiToDate'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FileFormatProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FromBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.If'>: <function if_sql>, <class 'sqlglot.expressions.ILike'>: <function no_ilike_sql>, <class 'sqlglot.expressions.JSONExtract'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONFormat'>: <function _json_format_sql>, <class 'sqlglot.expressions.Left'>: <function left_to_substring_sql>, <class 'sqlglot.expressions.Map'>: <function var_map_sql>, <class 'sqlglot.expressions.Max'>: <function max_or_greatest>, <class 'sqlglot.expressions.MD5Digest'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Min'>: <function min_or_least>, <class 'sqlglot.expressions.Create'>: <function create_with_partitions_sql>, <class 'sqlglot.expressions.Quantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxQuantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.RegexpExtract'>: <function regexp_extract_sql>, <class 'sqlglot.expressions.RegexpLike'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.RegexpSplit'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Right'>: <function right_to_substring_sql>, <class 'sqlglot.expressions.SafeDivide'>: <function no_safe_divide_sql>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SetAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Split'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.StrPosition'>: <function strposition_to_locate_sql>, <class 'sqlglot.expressions.StrToDate'>: <function _str_to_date_sql>, <class 'sqlglot.expressions.StrToTime'>: <function _str_to_time_sql>, <class 'sqlglot.expressions.StrToUnix'>: <function _str_to_unix_sql>, <class 'sqlglot.expressions.StructExtract'>: <function struct_extract_sql>, <class 'sqlglot.expressions.TimeStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToTime'>: <function timestrtotime_sql>, <class 'sqlglot.expressions.TimeStrToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToStr'>: <function _time_to_str>, <class 'sqlglot.expressions.TimeToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ToBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _to_date_sql>, <class 'sqlglot.expressions.TryCast'>: <function no_trycast_sql>, <class 'sqlglot.expressions.UnixToStr'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.UnixToTime'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.PartitionedByProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.RowFormatSerdeProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SerdeProperties'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NumberToStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.LastDateOfMonth'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.National'>: <function Hive.Generator.<lambda>>}
PROPERTIES_LOCATION =
{<class 'sqlglot.expressions.AlgorithmProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.AutoIncrementProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.BlockCompressionProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CharacterSetProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ChecksumProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CollateProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Cluster'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ClusteredByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DataBlocksizeProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.DefinerProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.DictRange'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DictProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistStyleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.EngineProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExternalProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.FallbackProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.FileFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.FreespaceProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.IsolatedLoadingProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.JournalProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.LanguageProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LikeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LocationProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LockingProperty'>: <Location.POST_ALIAS: 'POST_ALIAS'>, <class 'sqlglot.expressions.LogProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.MaterializedProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeBlockRatioProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.OnCommitProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.Order'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PrimaryKey'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Property'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.ReturnsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatDelimitedProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatSerdeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SerdeProperties'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Set'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SettingsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SetProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.SortKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.StabilityProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TemporaryProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.ToTableProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TransientProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeTreeTTL'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.VolatileProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithDataProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <Location.POST_NAME: 'POST_NAME'>}
423 def datatype_sql(self, expression: exp.DataType) -> str: 424 if ( 425 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 426 and not expression.expressions 427 ): 428 expression = exp.DataType.build("text") 429 elif expression.this in exp.DataType.TEMPORAL_TYPES: 430 expression = exp.DataType.build(expression.this) 431 elif expression.is_type("float"): 432 size_expression = expression.find(exp.DataTypeSize) 433 if size_expression: 434 size = int(size_expression.name) 435 expression = ( 436 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 437 ) 438 439 return super().datatype_sql(expression)
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
@classmethod
def
can_identify(text: str, identify: str | bool = 'safe') -> bool:
246 @classmethod 247 def can_identify(cls, text: str, identify: str | bool = "safe") -> bool: 248 """Checks if text can be identified given an identify option. 249 250 Args: 251 text: The text to check. 252 identify: 253 "always" or `True`: Always returns true. 254 "safe": True if the identifier is case-insensitive. 255 256 Returns: 257 Whether or not the given text can be identified. 258 """ 259 if identify is True or identify == "always": 260 return True 261 262 if identify == "safe": 263 return not cls.case_sensitive(text) 264 265 return False
Checks if text can be identified given an identify option.
Arguments:
- text: The text to check.
- identify: "always" or
True
: Always returns true. "safe": True if the identifier is case-insensitive.
Returns:
Whether or not the given text can be identified.
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- SENTINEL_LINE_BREAK
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- ESCAPE_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- normalize_functions
- unsupported_messages
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- rawstring_sql
- datatypesize_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- pseudotype_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- safebracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- safeconcat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonobject_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- safedpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql