sqlglot.dialects.hive
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 approx_count_distinct_sql, 9 create_with_partitions_sql, 10 format_time_lambda, 11 if_sql, 12 is_parse_json, 13 left_to_substring_sql, 14 locate_to_strposition, 15 max_or_greatest, 16 min_or_least, 17 no_ilike_sql, 18 no_recursive_cte_sql, 19 no_safe_divide_sql, 20 no_trycast_sql, 21 regexp_extract_sql, 22 regexp_replace_sql, 23 rename_func, 24 right_to_substring_sql, 25 strposition_to_locate_sql, 26 struct_extract_sql, 27 time_format, 28 timestrtotime_sql, 29 var_map_sql, 30) 31from sqlglot.helper import seq_get 32from sqlglot.parser import parse_var_map 33from sqlglot.tokens import TokenType 34 35# (FuncType, Multiplier) 36DATE_DELTA_INTERVAL = { 37 "YEAR": ("ADD_MONTHS", 12), 38 "MONTH": ("ADD_MONTHS", 1), 39 "QUARTER": ("ADD_MONTHS", 3), 40 "WEEK": ("DATE_ADD", 7), 41 "DAY": ("DATE_ADD", 1), 42} 43 44TIME_DIFF_FACTOR = { 45 "MILLISECOND": " * 1000", 46 "SECOND": "", 47 "MINUTE": " / 60", 48 "HOUR": " / 3600", 49} 50 51DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH") 52 53 54def _create_sql(self, expression: exp.Create) -> str: 55 expression = expression.copy() 56 57 # remove UNIQUE column constraints 58 for constraint in expression.find_all(exp.UniqueColumnConstraint): 59 if constraint.parent: 60 constraint.parent.pop() 61 62 properties = expression.args.get("properties") 63 temporary = any( 64 isinstance(prop, exp.TemporaryProperty) 65 for prop in (properties.expressions if properties else []) 66 ) 67 68 # CTAS with temp tables map to CREATE TEMPORARY VIEW 69 kind = expression.args["kind"] 70 if kind.upper() == "TABLE" and temporary: 71 if expression.expression: 72 return f"CREATE TEMPORARY VIEW {self.sql(expression, 'this')} AS {self.sql(expression, 'expression')}" 73 else: 74 # CREATE TEMPORARY TABLE may require storage provider 75 expression = self.temporary_storage_provider(expression) 76 77 return create_with_partitions_sql(self, expression) 78 79 80def _add_date_sql(self: Hive.Generator, expression: exp.DateAdd | exp.DateSub) -> str: 81 unit = expression.text("unit").upper() 82 func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1)) 83 84 if isinstance(expression, exp.DateSub): 85 multiplier *= -1 86 87 if expression.expression.is_number: 88 modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier) 89 else: 90 modified_increment = expression.expression.copy() 91 if multiplier != 1: 92 modified_increment = exp.Mul( # type: ignore 93 this=modified_increment, expression=exp.Literal.number(multiplier) 94 ) 95 96 return self.func(func, expression.this, modified_increment) 97 98 99def _date_diff_sql(self: Hive.Generator, expression: exp.DateDiff) -> str: 100 unit = expression.text("unit").upper() 101 102 factor = TIME_DIFF_FACTOR.get(unit) 103 if factor is not None: 104 left = self.sql(expression, "this") 105 right = self.sql(expression, "expression") 106 sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})" 107 return f"({sec_diff}){factor}" if factor else sec_diff 108 109 sql_func = "MONTHS_BETWEEN" if unit in DIFF_MONTH_SWITCH else "DATEDIFF" 110 _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1)) 111 multiplier_sql = f" / {multiplier}" if multiplier > 1 else "" 112 diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})" 113 114 return f"{diff_sql}{multiplier_sql}" 115 116 117def _json_format_sql(self: Hive.Generator, expression: exp.JSONFormat) -> str: 118 this = expression.this 119 if is_parse_json(this) and this.this.is_string: 120 # Since FROM_JSON requires a nested type, we always wrap the json string with 121 # an array to ensure that "naked" strings like "'a'" will be handled correctly 122 wrapped_json = exp.Literal.string(f"[{this.this.name}]") 123 124 from_json = self.func("FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json)) 125 to_json = self.func("TO_JSON", from_json) 126 127 # This strips the [, ] delimiters of the dummy array printed by TO_JSON 128 return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1") 129 130 return self.func("TO_JSON", this, expression.args.get("options")) 131 132 133def _array_sort_sql(self: Hive.Generator, expression: exp.ArraySort) -> str: 134 if expression.expression: 135 self.unsupported("Hive SORT_ARRAY does not support a comparator") 136 return f"SORT_ARRAY({self.sql(expression, 'this')})" 137 138 139def _property_sql(self: Hive.Generator, expression: exp.Property) -> str: 140 return f"{self.property_name(expression, string_key=True)}={self.sql(expression, 'value')}" 141 142 143def _str_to_unix_sql(self: Hive.Generator, expression: exp.StrToUnix) -> str: 144 return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression)) 145 146 147def _str_to_date_sql(self: Hive.Generator, expression: exp.StrToDate) -> str: 148 this = self.sql(expression, "this") 149 time_format = self.format_time(expression) 150 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 151 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 152 return f"CAST({this} AS DATE)" 153 154 155def _str_to_time_sql(self: Hive.Generator, expression: exp.StrToTime) -> str: 156 this = self.sql(expression, "this") 157 time_format = self.format_time(expression) 158 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 159 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 160 return f"CAST({this} AS TIMESTAMP)" 161 162 163def _time_to_str(self: Hive.Generator, expression: exp.TimeToStr) -> str: 164 this = self.sql(expression, "this") 165 time_format = self.format_time(expression) 166 return f"DATE_FORMAT({this}, {time_format})" 167 168 169def _to_date_sql(self: Hive.Generator, expression: exp.TsOrDsToDate) -> str: 170 this = self.sql(expression, "this") 171 time_format = self.format_time(expression) 172 if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 173 return f"TO_DATE({this}, {time_format})" 174 return f"TO_DATE({this})" 175 176 177class Hive(Dialect): 178 ALIAS_POST_TABLESAMPLE = True 179 IDENTIFIERS_CAN_START_WITH_DIGIT = True 180 SUPPORTS_USER_DEFINED_TYPES = False 181 182 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 183 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 184 185 TIME_MAPPING = { 186 "y": "%Y", 187 "Y": "%Y", 188 "YYYY": "%Y", 189 "yyyy": "%Y", 190 "YY": "%y", 191 "yy": "%y", 192 "MMMM": "%B", 193 "MMM": "%b", 194 "MM": "%m", 195 "M": "%-m", 196 "dd": "%d", 197 "d": "%-d", 198 "HH": "%H", 199 "H": "%-H", 200 "hh": "%I", 201 "h": "%-I", 202 "mm": "%M", 203 "m": "%-M", 204 "ss": "%S", 205 "s": "%-S", 206 "SSSSSS": "%f", 207 "a": "%p", 208 "DD": "%j", 209 "D": "%-j", 210 "E": "%a", 211 "EE": "%a", 212 "EEE": "%a", 213 "EEEE": "%A", 214 } 215 216 DATE_FORMAT = "'yyyy-MM-dd'" 217 DATEINT_FORMAT = "'yyyyMMdd'" 218 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 219 220 class Tokenizer(tokens.Tokenizer): 221 QUOTES = ["'", '"'] 222 IDENTIFIERS = ["`"] 223 STRING_ESCAPES = ["\\"] 224 ENCODE = "utf-8" 225 226 KEYWORDS = { 227 **tokens.Tokenizer.KEYWORDS, 228 "ADD ARCHIVE": TokenType.COMMAND, 229 "ADD ARCHIVES": TokenType.COMMAND, 230 "ADD FILE": TokenType.COMMAND, 231 "ADD FILES": TokenType.COMMAND, 232 "ADD JAR": TokenType.COMMAND, 233 "ADD JARS": TokenType.COMMAND, 234 "MSCK REPAIR": TokenType.COMMAND, 235 "REFRESH": TokenType.COMMAND, 236 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 237 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 238 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 239 } 240 241 NUMERIC_LITERALS = { 242 "L": "BIGINT", 243 "S": "SMALLINT", 244 "Y": "TINYINT", 245 "D": "DOUBLE", 246 "F": "FLOAT", 247 "BD": "DECIMAL", 248 } 249 250 class Parser(parser.Parser): 251 LOG_DEFAULTS_TO_LN = True 252 STRICT_CAST = False 253 254 FUNCTIONS = { 255 **parser.Parser.FUNCTIONS, 256 "BASE64": exp.ToBase64.from_arg_list, 257 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 258 "COLLECT_SET": exp.SetAgg.from_arg_list, 259 "DATE_ADD": lambda args: exp.TsOrDsAdd( 260 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 261 ), 262 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 263 [ 264 exp.TimeStrToTime(this=seq_get(args, 0)), 265 seq_get(args, 1), 266 ] 267 ), 268 "DATE_SUB": lambda args: exp.TsOrDsAdd( 269 this=seq_get(args, 0), 270 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 271 unit=exp.Literal.string("DAY"), 272 ), 273 "DATEDIFF": lambda args: exp.DateDiff( 274 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 275 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 276 ), 277 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 278 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 279 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 280 "LOCATE": locate_to_strposition, 281 "MAP": parse_var_map, 282 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 283 "PERCENTILE": exp.Quantile.from_arg_list, 284 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 285 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 286 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 287 ), 288 "SIZE": exp.ArraySize.from_arg_list, 289 "SPLIT": exp.RegexpSplit.from_arg_list, 290 "STR_TO_MAP": lambda args: exp.StrToMap( 291 this=seq_get(args, 0), 292 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 293 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 294 ), 295 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 296 "TO_JSON": exp.JSONFormat.from_arg_list, 297 "UNBASE64": exp.FromBase64.from_arg_list, 298 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 299 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 300 } 301 302 NO_PAREN_FUNCTION_PARSERS = { 303 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 304 "TRANSFORM": lambda self: self._parse_transform(), 305 } 306 307 PROPERTY_PARSERS = { 308 **parser.Parser.PROPERTY_PARSERS, 309 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 310 expressions=self._parse_wrapped_csv(self._parse_property) 311 ), 312 } 313 314 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 315 if not self._match(TokenType.L_PAREN, advance=False): 316 self._retreat(self._index - 1) 317 return None 318 319 args = self._parse_wrapped_csv(self._parse_lambda) 320 row_format_before = self._parse_row_format(match_row=True) 321 322 record_writer = None 323 if self._match_text_seq("RECORDWRITER"): 324 record_writer = self._parse_string() 325 326 if not self._match(TokenType.USING): 327 return exp.Transform.from_arg_list(args) 328 329 command_script = self._parse_string() 330 331 self._match(TokenType.ALIAS) 332 schema = self._parse_schema() 333 334 row_format_after = self._parse_row_format(match_row=True) 335 record_reader = None 336 if self._match_text_seq("RECORDREADER"): 337 record_reader = self._parse_string() 338 339 return self.expression( 340 exp.QueryTransform, 341 expressions=args, 342 command_script=command_script, 343 schema=schema, 344 row_format_before=row_format_before, 345 record_writer=record_writer, 346 row_format_after=row_format_after, 347 record_reader=record_reader, 348 ) 349 350 def _parse_types( 351 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 352 ) -> t.Optional[exp.Expression]: 353 """ 354 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 355 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 356 357 spark-sql (default)> select cast(1234 as varchar(2)); 358 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 359 char/varchar type and simply treats them as string type. Please use string type 360 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 361 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 362 363 1234 364 Time taken: 4.265 seconds, Fetched 1 row(s) 365 366 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 367 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 368 369 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 370 """ 371 this = super()._parse_types( 372 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 373 ) 374 375 if this and not schema: 376 return this.transform( 377 lambda node: node.replace(exp.DataType.build("text")) 378 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 379 else node, 380 copy=False, 381 ) 382 383 return this 384 385 def _parse_partition_and_order( 386 self, 387 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 388 return ( 389 self._parse_csv(self._parse_conjunction) 390 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 391 else [], 392 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 393 ) 394 395 class Generator(generator.Generator): 396 LIMIT_FETCH = "LIMIT" 397 TABLESAMPLE_WITH_METHOD = False 398 TABLESAMPLE_SIZE_IS_PERCENT = True 399 JOIN_HINTS = False 400 TABLE_HINTS = False 401 QUERY_HINTS = False 402 INDEX_ON = "ON TABLE" 403 EXTRACT_ALLOWS_QUOTES = False 404 NVL2_SUPPORTED = False 405 406 TYPE_MAPPING = { 407 **generator.Generator.TYPE_MAPPING, 408 exp.DataType.Type.BIT: "BOOLEAN", 409 exp.DataType.Type.DATETIME: "TIMESTAMP", 410 exp.DataType.Type.TEXT: "STRING", 411 exp.DataType.Type.TIME: "TIMESTAMP", 412 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 413 exp.DataType.Type.VARBINARY: "BINARY", 414 } 415 416 TRANSFORMS = { 417 **generator.Generator.TRANSFORMS, 418 exp.Group: transforms.preprocess([transforms.unalias_group]), 419 exp.Select: transforms.preprocess( 420 [ 421 transforms.eliminate_qualify, 422 transforms.eliminate_distinct_on, 423 transforms.unnest_to_explode, 424 ] 425 ), 426 exp.Property: _property_sql, 427 exp.AnyValue: rename_func("FIRST"), 428 exp.ApproxDistinct: approx_count_distinct_sql, 429 exp.ArrayConcat: rename_func("CONCAT"), 430 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 431 exp.ArraySize: rename_func("SIZE"), 432 exp.ArraySort: _array_sort_sql, 433 exp.With: no_recursive_cte_sql, 434 exp.DateAdd: _add_date_sql, 435 exp.DateDiff: _date_diff_sql, 436 exp.DateStrToDate: rename_func("TO_DATE"), 437 exp.DateSub: _add_date_sql, 438 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 439 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 440 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 441 exp.FromBase64: rename_func("UNBASE64"), 442 exp.If: if_sql(), 443 exp.ILike: no_ilike_sql, 444 exp.IsNan: rename_func("ISNAN"), 445 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 446 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 447 exp.JSONFormat: _json_format_sql, 448 exp.Left: left_to_substring_sql, 449 exp.Map: var_map_sql, 450 exp.Max: max_or_greatest, 451 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 452 exp.Min: min_or_least, 453 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 454 exp.NotNullColumnConstraint: lambda self, e: "" 455 if e.args.get("allow_null") 456 else "NOT NULL", 457 exp.VarMap: var_map_sql, 458 exp.Create: _create_sql, 459 exp.Quantile: rename_func("PERCENTILE"), 460 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 461 exp.RegexpExtract: regexp_extract_sql, 462 exp.RegexpReplace: regexp_replace_sql, 463 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 464 exp.RegexpSplit: rename_func("SPLIT"), 465 exp.Right: right_to_substring_sql, 466 exp.SafeDivide: no_safe_divide_sql, 467 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 468 exp.SetAgg: rename_func("COLLECT_SET"), 469 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 470 exp.StrPosition: strposition_to_locate_sql, 471 exp.StrToDate: _str_to_date_sql, 472 exp.StrToTime: _str_to_time_sql, 473 exp.StrToUnix: _str_to_unix_sql, 474 exp.StructExtract: struct_extract_sql, 475 exp.TimeStrToDate: rename_func("TO_DATE"), 476 exp.TimeStrToTime: timestrtotime_sql, 477 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 478 exp.TimeToStr: _time_to_str, 479 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 480 exp.ToBase64: rename_func("BASE64"), 481 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 482 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 483 exp.TsOrDsToDate: _to_date_sql, 484 exp.TryCast: no_trycast_sql, 485 exp.UnixToStr: lambda self, e: self.func( 486 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 487 ), 488 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 489 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 490 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 491 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 492 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 493 exp.LastDateOfMonth: rename_func("LAST_DAY"), 494 exp.National: lambda self, e: self.national_sql(e, prefix=""), 495 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 496 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 497 exp.NotForReplicationColumnConstraint: lambda self, e: "", 498 exp.OnProperty: lambda self, e: "", 499 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 500 } 501 502 PROPERTIES_LOCATION = { 503 **generator.Generator.PROPERTIES_LOCATION, 504 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 505 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 506 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 507 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 508 } 509 510 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 511 # Hive has no temporary storage provider (there are hive settings though) 512 return expression 513 514 def parameter_sql(self, expression: exp.Parameter) -> str: 515 this = self.sql(expression, "this") 516 parent = expression.parent 517 518 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 519 # We need to produce SET key = value instead of SET ${key} = value 520 return this 521 522 return f"${{{this}}}" 523 524 def schema_sql(self, expression: exp.Schema) -> str: 525 expression = expression.copy() 526 527 for ordered in expression.find_all(exp.Ordered): 528 if ordered.args.get("desc") is False: 529 ordered.set("desc", None) 530 531 return super().schema_sql(expression) 532 533 def constraint_sql(self, expression: exp.Constraint) -> str: 534 expression = expression.copy() 535 536 for prop in list(expression.find_all(exp.Properties)): 537 prop.pop() 538 539 this = self.sql(expression, "this") 540 expressions = self.expressions(expression, sep=" ", flat=True) 541 return f"CONSTRAINT {this} {expressions}" 542 543 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 544 serde_props = self.sql(expression, "serde_properties") 545 serde_props = f" {serde_props}" if serde_props else "" 546 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 547 548 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 549 return self.func( 550 "COLLECT_LIST", 551 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 552 ) 553 554 def with_properties(self, properties: exp.Properties) -> str: 555 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 556 557 def datatype_sql(self, expression: exp.DataType) -> str: 558 if ( 559 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 560 and not expression.expressions 561 ): 562 expression = exp.DataType.build("text") 563 elif expression.this in exp.DataType.TEMPORAL_TYPES: 564 expression = exp.DataType.build(expression.this) 565 elif expression.is_type("float"): 566 size_expression = expression.find(exp.DataTypeParam) 567 if size_expression: 568 size = int(size_expression.name) 569 expression = ( 570 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 571 ) 572 573 return super().datatype_sql(expression) 574 575 def version_sql(self, expression: exp.Version) -> str: 576 sql = super().version_sql(expression) 577 return sql.replace("FOR ", "", 1)
DATE_DELTA_INTERVAL =
{'YEAR': ('ADD_MONTHS', 12), 'MONTH': ('ADD_MONTHS', 1), 'QUARTER': ('ADD_MONTHS', 3), 'WEEK': ('DATE_ADD', 7), 'DAY': ('DATE_ADD', 1)}
TIME_DIFF_FACTOR =
{'MILLISECOND': ' * 1000', 'SECOND': '', 'MINUTE': ' / 60', 'HOUR': ' / 3600'}
DIFF_MONTH_SWITCH =
('YEAR', 'QUARTER', 'MONTH')
178class Hive(Dialect): 179 ALIAS_POST_TABLESAMPLE = True 180 IDENTIFIERS_CAN_START_WITH_DIGIT = True 181 SUPPORTS_USER_DEFINED_TYPES = False 182 183 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 184 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 185 186 TIME_MAPPING = { 187 "y": "%Y", 188 "Y": "%Y", 189 "YYYY": "%Y", 190 "yyyy": "%Y", 191 "YY": "%y", 192 "yy": "%y", 193 "MMMM": "%B", 194 "MMM": "%b", 195 "MM": "%m", 196 "M": "%-m", 197 "dd": "%d", 198 "d": "%-d", 199 "HH": "%H", 200 "H": "%-H", 201 "hh": "%I", 202 "h": "%-I", 203 "mm": "%M", 204 "m": "%-M", 205 "ss": "%S", 206 "s": "%-S", 207 "SSSSSS": "%f", 208 "a": "%p", 209 "DD": "%j", 210 "D": "%-j", 211 "E": "%a", 212 "EE": "%a", 213 "EEE": "%a", 214 "EEEE": "%A", 215 } 216 217 DATE_FORMAT = "'yyyy-MM-dd'" 218 DATEINT_FORMAT = "'yyyyMMdd'" 219 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 220 221 class Tokenizer(tokens.Tokenizer): 222 QUOTES = ["'", '"'] 223 IDENTIFIERS = ["`"] 224 STRING_ESCAPES = ["\\"] 225 ENCODE = "utf-8" 226 227 KEYWORDS = { 228 **tokens.Tokenizer.KEYWORDS, 229 "ADD ARCHIVE": TokenType.COMMAND, 230 "ADD ARCHIVES": TokenType.COMMAND, 231 "ADD FILE": TokenType.COMMAND, 232 "ADD FILES": TokenType.COMMAND, 233 "ADD JAR": TokenType.COMMAND, 234 "ADD JARS": TokenType.COMMAND, 235 "MSCK REPAIR": TokenType.COMMAND, 236 "REFRESH": TokenType.COMMAND, 237 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 238 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 239 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 240 } 241 242 NUMERIC_LITERALS = { 243 "L": "BIGINT", 244 "S": "SMALLINT", 245 "Y": "TINYINT", 246 "D": "DOUBLE", 247 "F": "FLOAT", 248 "BD": "DECIMAL", 249 } 250 251 class Parser(parser.Parser): 252 LOG_DEFAULTS_TO_LN = True 253 STRICT_CAST = False 254 255 FUNCTIONS = { 256 **parser.Parser.FUNCTIONS, 257 "BASE64": exp.ToBase64.from_arg_list, 258 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 259 "COLLECT_SET": exp.SetAgg.from_arg_list, 260 "DATE_ADD": lambda args: exp.TsOrDsAdd( 261 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 262 ), 263 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 264 [ 265 exp.TimeStrToTime(this=seq_get(args, 0)), 266 seq_get(args, 1), 267 ] 268 ), 269 "DATE_SUB": lambda args: exp.TsOrDsAdd( 270 this=seq_get(args, 0), 271 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 272 unit=exp.Literal.string("DAY"), 273 ), 274 "DATEDIFF": lambda args: exp.DateDiff( 275 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 276 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 277 ), 278 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 279 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 280 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 281 "LOCATE": locate_to_strposition, 282 "MAP": parse_var_map, 283 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 284 "PERCENTILE": exp.Quantile.from_arg_list, 285 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 286 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 287 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 288 ), 289 "SIZE": exp.ArraySize.from_arg_list, 290 "SPLIT": exp.RegexpSplit.from_arg_list, 291 "STR_TO_MAP": lambda args: exp.StrToMap( 292 this=seq_get(args, 0), 293 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 294 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 295 ), 296 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 297 "TO_JSON": exp.JSONFormat.from_arg_list, 298 "UNBASE64": exp.FromBase64.from_arg_list, 299 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 300 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 301 } 302 303 NO_PAREN_FUNCTION_PARSERS = { 304 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 305 "TRANSFORM": lambda self: self._parse_transform(), 306 } 307 308 PROPERTY_PARSERS = { 309 **parser.Parser.PROPERTY_PARSERS, 310 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 311 expressions=self._parse_wrapped_csv(self._parse_property) 312 ), 313 } 314 315 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 316 if not self._match(TokenType.L_PAREN, advance=False): 317 self._retreat(self._index - 1) 318 return None 319 320 args = self._parse_wrapped_csv(self._parse_lambda) 321 row_format_before = self._parse_row_format(match_row=True) 322 323 record_writer = None 324 if self._match_text_seq("RECORDWRITER"): 325 record_writer = self._parse_string() 326 327 if not self._match(TokenType.USING): 328 return exp.Transform.from_arg_list(args) 329 330 command_script = self._parse_string() 331 332 self._match(TokenType.ALIAS) 333 schema = self._parse_schema() 334 335 row_format_after = self._parse_row_format(match_row=True) 336 record_reader = None 337 if self._match_text_seq("RECORDREADER"): 338 record_reader = self._parse_string() 339 340 return self.expression( 341 exp.QueryTransform, 342 expressions=args, 343 command_script=command_script, 344 schema=schema, 345 row_format_before=row_format_before, 346 record_writer=record_writer, 347 row_format_after=row_format_after, 348 record_reader=record_reader, 349 ) 350 351 def _parse_types( 352 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 353 ) -> t.Optional[exp.Expression]: 354 """ 355 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 356 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 357 358 spark-sql (default)> select cast(1234 as varchar(2)); 359 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 360 char/varchar type and simply treats them as string type. Please use string type 361 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 362 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 363 364 1234 365 Time taken: 4.265 seconds, Fetched 1 row(s) 366 367 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 368 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 369 370 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 371 """ 372 this = super()._parse_types( 373 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 374 ) 375 376 if this and not schema: 377 return this.transform( 378 lambda node: node.replace(exp.DataType.build("text")) 379 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 380 else node, 381 copy=False, 382 ) 383 384 return this 385 386 def _parse_partition_and_order( 387 self, 388 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 389 return ( 390 self._parse_csv(self._parse_conjunction) 391 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 392 else [], 393 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 394 ) 395 396 class Generator(generator.Generator): 397 LIMIT_FETCH = "LIMIT" 398 TABLESAMPLE_WITH_METHOD = False 399 TABLESAMPLE_SIZE_IS_PERCENT = True 400 JOIN_HINTS = False 401 TABLE_HINTS = False 402 QUERY_HINTS = False 403 INDEX_ON = "ON TABLE" 404 EXTRACT_ALLOWS_QUOTES = False 405 NVL2_SUPPORTED = False 406 407 TYPE_MAPPING = { 408 **generator.Generator.TYPE_MAPPING, 409 exp.DataType.Type.BIT: "BOOLEAN", 410 exp.DataType.Type.DATETIME: "TIMESTAMP", 411 exp.DataType.Type.TEXT: "STRING", 412 exp.DataType.Type.TIME: "TIMESTAMP", 413 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 414 exp.DataType.Type.VARBINARY: "BINARY", 415 } 416 417 TRANSFORMS = { 418 **generator.Generator.TRANSFORMS, 419 exp.Group: transforms.preprocess([transforms.unalias_group]), 420 exp.Select: transforms.preprocess( 421 [ 422 transforms.eliminate_qualify, 423 transforms.eliminate_distinct_on, 424 transforms.unnest_to_explode, 425 ] 426 ), 427 exp.Property: _property_sql, 428 exp.AnyValue: rename_func("FIRST"), 429 exp.ApproxDistinct: approx_count_distinct_sql, 430 exp.ArrayConcat: rename_func("CONCAT"), 431 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 432 exp.ArraySize: rename_func("SIZE"), 433 exp.ArraySort: _array_sort_sql, 434 exp.With: no_recursive_cte_sql, 435 exp.DateAdd: _add_date_sql, 436 exp.DateDiff: _date_diff_sql, 437 exp.DateStrToDate: rename_func("TO_DATE"), 438 exp.DateSub: _add_date_sql, 439 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 440 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 441 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 442 exp.FromBase64: rename_func("UNBASE64"), 443 exp.If: if_sql(), 444 exp.ILike: no_ilike_sql, 445 exp.IsNan: rename_func("ISNAN"), 446 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 447 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 448 exp.JSONFormat: _json_format_sql, 449 exp.Left: left_to_substring_sql, 450 exp.Map: var_map_sql, 451 exp.Max: max_or_greatest, 452 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 453 exp.Min: min_or_least, 454 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 455 exp.NotNullColumnConstraint: lambda self, e: "" 456 if e.args.get("allow_null") 457 else "NOT NULL", 458 exp.VarMap: var_map_sql, 459 exp.Create: _create_sql, 460 exp.Quantile: rename_func("PERCENTILE"), 461 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 462 exp.RegexpExtract: regexp_extract_sql, 463 exp.RegexpReplace: regexp_replace_sql, 464 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 465 exp.RegexpSplit: rename_func("SPLIT"), 466 exp.Right: right_to_substring_sql, 467 exp.SafeDivide: no_safe_divide_sql, 468 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 469 exp.SetAgg: rename_func("COLLECT_SET"), 470 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 471 exp.StrPosition: strposition_to_locate_sql, 472 exp.StrToDate: _str_to_date_sql, 473 exp.StrToTime: _str_to_time_sql, 474 exp.StrToUnix: _str_to_unix_sql, 475 exp.StructExtract: struct_extract_sql, 476 exp.TimeStrToDate: rename_func("TO_DATE"), 477 exp.TimeStrToTime: timestrtotime_sql, 478 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 479 exp.TimeToStr: _time_to_str, 480 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 481 exp.ToBase64: rename_func("BASE64"), 482 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 483 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 484 exp.TsOrDsToDate: _to_date_sql, 485 exp.TryCast: no_trycast_sql, 486 exp.UnixToStr: lambda self, e: self.func( 487 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 488 ), 489 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 490 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 491 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 492 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 493 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 494 exp.LastDateOfMonth: rename_func("LAST_DAY"), 495 exp.National: lambda self, e: self.national_sql(e, prefix=""), 496 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 497 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 498 exp.NotForReplicationColumnConstraint: lambda self, e: "", 499 exp.OnProperty: lambda self, e: "", 500 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 501 } 502 503 PROPERTIES_LOCATION = { 504 **generator.Generator.PROPERTIES_LOCATION, 505 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 506 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 507 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 508 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 509 } 510 511 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 512 # Hive has no temporary storage provider (there are hive settings though) 513 return expression 514 515 def parameter_sql(self, expression: exp.Parameter) -> str: 516 this = self.sql(expression, "this") 517 parent = expression.parent 518 519 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 520 # We need to produce SET key = value instead of SET ${key} = value 521 return this 522 523 return f"${{{this}}}" 524 525 def schema_sql(self, expression: exp.Schema) -> str: 526 expression = expression.copy() 527 528 for ordered in expression.find_all(exp.Ordered): 529 if ordered.args.get("desc") is False: 530 ordered.set("desc", None) 531 532 return super().schema_sql(expression) 533 534 def constraint_sql(self, expression: exp.Constraint) -> str: 535 expression = expression.copy() 536 537 for prop in list(expression.find_all(exp.Properties)): 538 prop.pop() 539 540 this = self.sql(expression, "this") 541 expressions = self.expressions(expression, sep=" ", flat=True) 542 return f"CONSTRAINT {this} {expressions}" 543 544 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 545 serde_props = self.sql(expression, "serde_properties") 546 serde_props = f" {serde_props}" if serde_props else "" 547 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 548 549 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 550 return self.func( 551 "COLLECT_LIST", 552 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 553 ) 554 555 def with_properties(self, properties: exp.Properties) -> str: 556 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 557 558 def datatype_sql(self, expression: exp.DataType) -> str: 559 if ( 560 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 561 and not expression.expressions 562 ): 563 expression = exp.DataType.build("text") 564 elif expression.this in exp.DataType.TEMPORAL_TYPES: 565 expression = exp.DataType.build(expression.this) 566 elif expression.is_type("float"): 567 size_expression = expression.find(exp.DataTypeParam) 568 if size_expression: 569 size = int(size_expression.name) 570 expression = ( 571 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 572 ) 573 574 return super().datatype_sql(expression) 575 576 def version_sql(self, expression: exp.Version) -> str: 577 sql = super().version_sql(expression) 578 return sql.replace("FOR ", "", 1)
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
tokenizer_class =
<class 'Hive.Tokenizer'>
parser_class =
<class 'Hive.Parser'>
generator_class =
<class 'Hive.Generator'>
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
Inherited Members
- sqlglot.dialects.dialect.Dialect
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- NULL_ORDERING
- FORMAT_MAPPING
- ESCAPE_SEQUENCES
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
221 class Tokenizer(tokens.Tokenizer): 222 QUOTES = ["'", '"'] 223 IDENTIFIERS = ["`"] 224 STRING_ESCAPES = ["\\"] 225 ENCODE = "utf-8" 226 227 KEYWORDS = { 228 **tokens.Tokenizer.KEYWORDS, 229 "ADD ARCHIVE": TokenType.COMMAND, 230 "ADD ARCHIVES": TokenType.COMMAND, 231 "ADD FILE": TokenType.COMMAND, 232 "ADD FILES": TokenType.COMMAND, 233 "ADD JAR": TokenType.COMMAND, 234 "ADD JARS": TokenType.COMMAND, 235 "MSCK REPAIR": TokenType.COMMAND, 236 "REFRESH": TokenType.COMMAND, 237 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 238 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 239 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 240 } 241 242 NUMERIC_LITERALS = { 243 "L": "BIGINT", 244 "S": "SMALLINT", 245 "Y": "TINYINT", 246 "D": "DOUBLE", 247 "F": "FLOAT", 248 "BD": "DECIMAL", 249 }
KEYWORDS =
{'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, '??': <TokenType.DQMARK: 'DQMARK'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONNECT BY': <TokenType.CONNECT_BY: 'CONNECT_BY'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'KILL': <TokenType.KILL: 'KILL'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'START WITH': <TokenType.START_WITH: 'START_WITH'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNKNOWN': <TokenType.UNKNOWN: 'UNKNOWN'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'MEDIUMINT': <TokenType.MEDIUMINT: 'MEDIUMINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT128': <TokenType.INT128: 'INT128'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.BIGINT: 'BIGINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'LONGTEXT': <TokenType.LONGTEXT: 'LONGTEXT'>, 'MEDIUMTEXT': <TokenType.MEDIUMTEXT: 'MEDIUMTEXT'>, 'TINYTEXT': <TokenType.TINYTEXT: 'TINYTEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'LONGBLOB': <TokenType.LONGBLOB: 'LONGBLOB'>, 'MEDIUMBLOB': <TokenType.MEDIUMBLOB: 'MEDIUMBLOB'>, 'TINYBLOB': <TokenType.TINYBLOB: 'TINYBLOB'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMETZ': <TokenType.TIMETZ: 'TIMETZ'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'COPY': <TokenType.COMMAND: 'COMMAND'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'TRUNCATE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'FOR VERSION': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'FOR TIMESTAMP': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'ADD ARCHIVE': <TokenType.COMMAND: 'COMMAND'>, 'ADD ARCHIVES': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILE': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILES': <TokenType.COMMAND: 'COMMAND'>, 'ADD JAR': <TokenType.COMMAND: 'COMMAND'>, 'ADD JARS': <TokenType.COMMAND: 'COMMAND'>, 'MSCK REPAIR': <TokenType.COMMAND: 'COMMAND'>, 'REFRESH': <TokenType.COMMAND: 'COMMAND'>, 'WITH SERDEPROPERTIES': <TokenType.SERDE_PROPERTIES: 'SERDE_PROPERTIES'>, 'TIMESTAMP AS OF': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'VERSION AS OF': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>}
251 class Parser(parser.Parser): 252 LOG_DEFAULTS_TO_LN = True 253 STRICT_CAST = False 254 255 FUNCTIONS = { 256 **parser.Parser.FUNCTIONS, 257 "BASE64": exp.ToBase64.from_arg_list, 258 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 259 "COLLECT_SET": exp.SetAgg.from_arg_list, 260 "DATE_ADD": lambda args: exp.TsOrDsAdd( 261 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 262 ), 263 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 264 [ 265 exp.TimeStrToTime(this=seq_get(args, 0)), 266 seq_get(args, 1), 267 ] 268 ), 269 "DATE_SUB": lambda args: exp.TsOrDsAdd( 270 this=seq_get(args, 0), 271 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 272 unit=exp.Literal.string("DAY"), 273 ), 274 "DATEDIFF": lambda args: exp.DateDiff( 275 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 276 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 277 ), 278 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 279 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 280 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 281 "LOCATE": locate_to_strposition, 282 "MAP": parse_var_map, 283 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 284 "PERCENTILE": exp.Quantile.from_arg_list, 285 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 286 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 287 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 288 ), 289 "SIZE": exp.ArraySize.from_arg_list, 290 "SPLIT": exp.RegexpSplit.from_arg_list, 291 "STR_TO_MAP": lambda args: exp.StrToMap( 292 this=seq_get(args, 0), 293 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 294 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 295 ), 296 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 297 "TO_JSON": exp.JSONFormat.from_arg_list, 298 "UNBASE64": exp.FromBase64.from_arg_list, 299 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 300 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 301 } 302 303 NO_PAREN_FUNCTION_PARSERS = { 304 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 305 "TRANSFORM": lambda self: self._parse_transform(), 306 } 307 308 PROPERTY_PARSERS = { 309 **parser.Parser.PROPERTY_PARSERS, 310 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 311 expressions=self._parse_wrapped_csv(self._parse_property) 312 ), 313 } 314 315 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 316 if not self._match(TokenType.L_PAREN, advance=False): 317 self._retreat(self._index - 1) 318 return None 319 320 args = self._parse_wrapped_csv(self._parse_lambda) 321 row_format_before = self._parse_row_format(match_row=True) 322 323 record_writer = None 324 if self._match_text_seq("RECORDWRITER"): 325 record_writer = self._parse_string() 326 327 if not self._match(TokenType.USING): 328 return exp.Transform.from_arg_list(args) 329 330 command_script = self._parse_string() 331 332 self._match(TokenType.ALIAS) 333 schema = self._parse_schema() 334 335 row_format_after = self._parse_row_format(match_row=True) 336 record_reader = None 337 if self._match_text_seq("RECORDREADER"): 338 record_reader = self._parse_string() 339 340 return self.expression( 341 exp.QueryTransform, 342 expressions=args, 343 command_script=command_script, 344 schema=schema, 345 row_format_before=row_format_before, 346 record_writer=record_writer, 347 row_format_after=row_format_after, 348 record_reader=record_reader, 349 ) 350 351 def _parse_types( 352 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 353 ) -> t.Optional[exp.Expression]: 354 """ 355 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 356 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 357 358 spark-sql (default)> select cast(1234 as varchar(2)); 359 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 360 char/varchar type and simply treats them as string type. Please use string type 361 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 362 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 363 364 1234 365 Time taken: 4.265 seconds, Fetched 1 row(s) 366 367 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 368 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 369 370 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 371 """ 372 this = super()._parse_types( 373 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 374 ) 375 376 if this and not schema: 377 return this.transform( 378 lambda node: node.replace(exp.DataType.build("text")) 379 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 380 else node, 381 copy=False, 382 ) 383 384 return this 385 386 def _parse_partition_and_order( 387 self, 388 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 389 return ( 390 self._parse_csv(self._parse_conjunction) 391 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 392 else [], 393 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 394 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
FUNCTIONS =
{'ABS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Abs'>>, 'ANY_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnyValue'>>, 'APPROX_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_COUNT_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Array'>>, 'ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'ARRAY_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAll'>>, 'ARRAY_ANY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAny'>>, 'ARRAY_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContains'>>, 'FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_JOIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayJoin'>>, 'ARRAY_SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_SORT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySort'>>, 'ARRAY_SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySum'>>, 'ARRAY_UNION_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUnionAgg'>>, 'AVG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Avg'>>, 'CASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Case'>>, 'CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cast'>>, 'CAST_TO_STR_TYPE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CastToStrType'>>, 'CEIL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CEILING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CHR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'COALESCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'IFNULL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'NVL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'COLLATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Collate'>>, 'CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Concat'>>, 'CONCAT_WS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ConcatWs'>>, 'COUNT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Count'>>, 'COUNT_IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'CURRENT_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDate'>>, 'CURRENT_DATETIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDatetime'>>, 'CURRENT_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTime'>>, 'CURRENT_TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTimestamp'>>, 'CURRENT_USER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentUser'>>, 'DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Date'>>, 'DATE_ADD': <function Hive.Parser.<lambda>>, 'DATEDIFF': <function Hive.Parser.<lambda>>, 'DATE_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateDiff'>>, 'DATEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATE_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateStrToDate'>>, 'DATE_SUB': <function Hive.Parser.<lambda>>, 'DATE_TO_DATE_STR': <function Parser.<lambda>>, 'DATE_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateToDi'>>, 'DATE_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateTrunc'>>, 'DATETIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeAdd'>>, 'DATETIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeDiff'>>, 'DATETIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeSub'>>, 'DATETIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeTrunc'>>, 'DAY': <function Hive.Parser.<lambda>>, 'DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAYOFMONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAY_OF_WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAYOFWEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAY_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DAYOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DECODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Decode'>>, 'DI_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DiToDate'>>, 'ENCODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Encode'>>, 'EXP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Exp'>>, 'EXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Explode'>>, 'EXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ExplodeOuter'>>, 'EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Extract'>>, 'FIRST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.First'>>, 'FLOOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Floor'>>, 'FROM_BASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase'>>, 'FROM_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'GENERATE_SERIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GenerateSeries'>>, 'GREATEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Greatest'>>, 'GROUP_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GroupConcat'>>, 'HEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hex'>>, 'HLL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hll'>>, 'IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'INITCAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Initcap'>>, 'IS_NAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'ISNAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'J_S_O_N_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArray'>>, 'J_S_O_N_ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayAgg'>>, 'JSON_ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayContains'>>, 'JSONB_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtract'>>, 'JSONB_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtractScalar'>>, 'JSON_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtract'>>, 'JSON_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'JSON_FORMAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'J_S_O_N_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObject'>>, 'J_S_O_N_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONTable'>>, 'LAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Last'>>, 'LAST_DATE_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDateOfMonth'>>, 'LEAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Least'>>, 'LEFT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Left'>>, 'LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEVENSHTEIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Levenshtein'>>, 'LN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ln'>>, 'LOG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log'>>, 'LOG10': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log10'>>, 'LOG2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log2'>>, 'LOGICAL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOLAND_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'LOGICAL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOLOR_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'LOWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'LCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'MD5': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5'>>, 'MD5_DIGEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5Digest'>>, 'MAP': <function parse_var_map>, 'MAP_FROM_ENTRIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MapFromEntries'>>, 'MATCH_AGAINST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MatchAgainst'>>, 'MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Max'>>, 'MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Min'>>, 'MONTH': <function Hive.Parser.<lambda>>, 'MONTHS_BETWEEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MonthsBetween'>>, 'NEXT_VALUE_FOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NextValueFor'>>, 'NUMBER_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NumberToStr'>>, 'NVL2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nvl2'>>, 'OPEN_J_S_O_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.OpenJSON'>>, 'PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParameterizedAgg'>>, 'PARSE_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'JSON_PARSE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'PERCENTILE_CONT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileCont'>>, 'PERCENTILE_DISC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileDisc'>>, 'POSEXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Posexplode'>>, 'POSEXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PosexplodeOuter'>>, 'POWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'POW': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'PREDICT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Predict'>>, 'QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'RANGE_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RangeN'>>, 'READ_CSV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ReadCSV'>>, 'REDUCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Reduce'>>, 'REGEXP_EXTRACT': <function Hive.Parser.<lambda>>, 'REGEXP_I_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpILike'>>, 'REGEXP_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpLike'>>, 'REGEXP_REPLACE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpReplace'>>, 'REGEXP_SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'REPEAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Repeat'>>, 'RIGHT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Right'>>, 'ROUND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Round'>>, 'ROW_NUMBER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RowNumber'>>, 'SHA': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA1': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA2'>>, 'SAFE_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeConcat'>>, 'SAFE_DIVIDE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeDivide'>>, 'SET_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'SORT_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SortArray'>>, 'SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'SQRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sqrt'>>, 'STANDARD_HASH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StandardHash'>>, 'STAR_MAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StarMap'>>, 'STARTS_WITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STARTSWITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STDDEV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stddev'>>, 'STDDEV_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevPop'>>, 'STDDEV_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevSamp'>>, 'STR_POSITION': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrPosition'>>, 'STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToDate'>>, 'STR_TO_MAP': <function Hive.Parser.<lambda>>, 'STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToTime'>>, 'STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToUnix'>>, 'STRUCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Struct'>>, 'STRUCT_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StructExtract'>>, 'STUFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'INSERT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'SUBSTRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Substring'>>, 'SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sum'>>, 'TIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeAdd'>>, 'TIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeDiff'>>, 'TIME_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToDate'>>, 'TIME_STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToTime'>>, 'TIME_STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToUnix'>>, 'TIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeSub'>>, 'TIME_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToStr'>>, 'TIME_TO_TIME_STR': <function Parser.<lambda>>, 'TIME_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToUnix'>>, 'TIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeTrunc'>>, 'TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Timestamp'>>, 'TIMESTAMP_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampAdd'>>, 'TIMESTAMP_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampSub'>>, 'TIMESTAMP_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampTrunc'>>, 'TO_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'TO_CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToChar'>>, 'TO_DAYS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToDays'>>, 'TRANSFORM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Transform'>>, 'TRIM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Trim'>>, 'TRY_CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TryCast'>>, 'TS_OR_DI_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDiToDi'>>, 'TS_OR_DS_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsAdd'>>, 'TS_OR_DS_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToDate'>>, 'TS_OR_DS_TO_DATE_STR': <function Parser.<lambda>>, 'UNHEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Unhex'>>, 'UNIX_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToStr'>>, 'UNIX_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTime'>>, 'UNIX_TO_TIME_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTimeStr'>>, 'UPPER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'UCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'VAR_MAP': <function parse_var_map>, 'VARIANCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VAR_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'VAR_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Week'>>, 'WEEK_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WEEKOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WHEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.When'>>, 'X_M_L_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.XMLTable'>>, 'XOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Xor'>>, 'YEAR': <function Hive.Parser.<lambda>>, 'GLOB': <function Parser.<lambda>>, 'LIKE': <function parse_like>, 'BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'COLLECT_LIST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'COLLECT_SET': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'DATE_FORMAT': <function Hive.Parser.<lambda>>, 'FROM_UNIXTIME': <function format_time_lambda.<locals>._format_time>, 'GET_JSON_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'LOCATE': <function locate_to_strposition>, 'PERCENTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'PERCENTILE_APPROX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'TO_DATE': <function format_time_lambda.<locals>._format_time>, 'TO_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'UNBASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'UNIX_TIMESTAMP': <function format_time_lambda.<locals>._format_time>}
NO_PAREN_FUNCTION_PARSERS =
{'ANY': <function Parser.<lambda>>, 'CASE': <function Parser.<lambda>>, 'IF': <function Parser.<lambda>>, 'NEXT': <function Parser.<lambda>>, 'TRANSFORM': <function Hive.Parser.<lambda>>}
PROPERTY_PARSERS =
{'ALGORITHM': <function Parser.<lambda>>, 'AUTO_INCREMENT': <function Parser.<lambda>>, 'BLOCKCOMPRESSION': <function Parser.<lambda>>, 'CHARACTER SET': <function Parser.<lambda>>, 'CHECKSUM': <function Parser.<lambda>>, 'CLUSTER BY': <function Parser.<lambda>>, 'CLUSTERED': <function Parser.<lambda>>, 'COLLATE': <function Parser.<lambda>>, 'COMMENT': <function Parser.<lambda>>, 'COPY': <function Parser.<lambda>>, 'DATABLOCKSIZE': <function Parser.<lambda>>, 'DEFINER': <function Parser.<lambda>>, 'DETERMINISTIC': <function Parser.<lambda>>, 'DISTKEY': <function Parser.<lambda>>, 'DISTSTYLE': <function Parser.<lambda>>, 'ENGINE': <function Parser.<lambda>>, 'EXECUTE': <function Parser.<lambda>>, 'EXTERNAL': <function Parser.<lambda>>, 'FALLBACK': <function Parser.<lambda>>, 'FORMAT': <function Parser.<lambda>>, 'FREESPACE': <function Parser.<lambda>>, 'HEAP': <function Parser.<lambda>>, 'IMMUTABLE': <function Parser.<lambda>>, 'INPUT': <function Parser.<lambda>>, 'JOURNAL': <function Parser.<lambda>>, 'LANGUAGE': <function Parser.<lambda>>, 'LAYOUT': <function Parser.<lambda>>, 'LIFETIME': <function Parser.<lambda>>, 'LIKE': <function Parser.<lambda>>, 'LOCATION': <function Parser.<lambda>>, 'LOCK': <function Parser.<lambda>>, 'LOCKING': <function Parser.<lambda>>, 'LOG': <function Parser.<lambda>>, 'MATERIALIZED': <function Parser.<lambda>>, 'MERGEBLOCKRATIO': <function Parser.<lambda>>, 'MULTISET': <function Parser.<lambda>>, 'NO': <function Parser.<lambda>>, 'ON': <function Parser.<lambda>>, 'ORDER BY': <function Parser.<lambda>>, 'OUTPUT': <function Parser.<lambda>>, 'PARTITION BY': <function Parser.<lambda>>, 'PARTITIONED BY': <function Parser.<lambda>>, 'PARTITIONED_BY': <function Parser.<lambda>>, 'PRIMARY KEY': <function Parser.<lambda>>, 'RANGE': <function Parser.<lambda>>, 'REMOTE': <function Parser.<lambda>>, 'RETURNS': <function Parser.<lambda>>, 'ROW': <function Parser.<lambda>>, 'ROW_FORMAT': <function Parser.<lambda>>, 'SAMPLE': <function Parser.<lambda>>, 'SET': <function Parser.<lambda>>, 'SETTINGS': <function Parser.<lambda>>, 'SORTKEY': <function Parser.<lambda>>, 'SOURCE': <function Parser.<lambda>>, 'STABLE': <function Parser.<lambda>>, 'STORED': <function Parser.<lambda>>, 'TBLPROPERTIES': <function Parser.<lambda>>, 'TEMP': <function Parser.<lambda>>, 'TEMPORARY': <function Parser.<lambda>>, 'TO': <function Parser.<lambda>>, 'TRANSIENT': <function Parser.<lambda>>, 'TRANSFORM': <function Parser.<lambda>>, 'TTL': <function Parser.<lambda>>, 'USING': <function Parser.<lambda>>, 'VOLATILE': <function Parser.<lambda>>, 'WITH': <function Parser.<lambda>>, 'WITH SERDEPROPERTIES': <function Hive.Parser.<lambda>>}
SET_TRIE: Dict =
{'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_KEYWORDS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- FUNCTION_PARSERS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- CLONE_KINDS
- OPCLASS_FOLLOW_KEYWORDS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- CONCAT_NULL_OUTPUTS_STRING
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_BASE_FIRST
- ALTER_TABLE_ADD_COLUMN_KEYWORD
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- FORMAT_MAPPING
- error_level
- error_message_context
- max_errors
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
396 class Generator(generator.Generator): 397 LIMIT_FETCH = "LIMIT" 398 TABLESAMPLE_WITH_METHOD = False 399 TABLESAMPLE_SIZE_IS_PERCENT = True 400 JOIN_HINTS = False 401 TABLE_HINTS = False 402 QUERY_HINTS = False 403 INDEX_ON = "ON TABLE" 404 EXTRACT_ALLOWS_QUOTES = False 405 NVL2_SUPPORTED = False 406 407 TYPE_MAPPING = { 408 **generator.Generator.TYPE_MAPPING, 409 exp.DataType.Type.BIT: "BOOLEAN", 410 exp.DataType.Type.DATETIME: "TIMESTAMP", 411 exp.DataType.Type.TEXT: "STRING", 412 exp.DataType.Type.TIME: "TIMESTAMP", 413 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 414 exp.DataType.Type.VARBINARY: "BINARY", 415 } 416 417 TRANSFORMS = { 418 **generator.Generator.TRANSFORMS, 419 exp.Group: transforms.preprocess([transforms.unalias_group]), 420 exp.Select: transforms.preprocess( 421 [ 422 transforms.eliminate_qualify, 423 transforms.eliminate_distinct_on, 424 transforms.unnest_to_explode, 425 ] 426 ), 427 exp.Property: _property_sql, 428 exp.AnyValue: rename_func("FIRST"), 429 exp.ApproxDistinct: approx_count_distinct_sql, 430 exp.ArrayConcat: rename_func("CONCAT"), 431 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 432 exp.ArraySize: rename_func("SIZE"), 433 exp.ArraySort: _array_sort_sql, 434 exp.With: no_recursive_cte_sql, 435 exp.DateAdd: _add_date_sql, 436 exp.DateDiff: _date_diff_sql, 437 exp.DateStrToDate: rename_func("TO_DATE"), 438 exp.DateSub: _add_date_sql, 439 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 440 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 441 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 442 exp.FromBase64: rename_func("UNBASE64"), 443 exp.If: if_sql(), 444 exp.ILike: no_ilike_sql, 445 exp.IsNan: rename_func("ISNAN"), 446 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 447 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 448 exp.JSONFormat: _json_format_sql, 449 exp.Left: left_to_substring_sql, 450 exp.Map: var_map_sql, 451 exp.Max: max_or_greatest, 452 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 453 exp.Min: min_or_least, 454 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 455 exp.NotNullColumnConstraint: lambda self, e: "" 456 if e.args.get("allow_null") 457 else "NOT NULL", 458 exp.VarMap: var_map_sql, 459 exp.Create: _create_sql, 460 exp.Quantile: rename_func("PERCENTILE"), 461 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 462 exp.RegexpExtract: regexp_extract_sql, 463 exp.RegexpReplace: regexp_replace_sql, 464 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 465 exp.RegexpSplit: rename_func("SPLIT"), 466 exp.Right: right_to_substring_sql, 467 exp.SafeDivide: no_safe_divide_sql, 468 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 469 exp.SetAgg: rename_func("COLLECT_SET"), 470 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 471 exp.StrPosition: strposition_to_locate_sql, 472 exp.StrToDate: _str_to_date_sql, 473 exp.StrToTime: _str_to_time_sql, 474 exp.StrToUnix: _str_to_unix_sql, 475 exp.StructExtract: struct_extract_sql, 476 exp.TimeStrToDate: rename_func("TO_DATE"), 477 exp.TimeStrToTime: timestrtotime_sql, 478 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 479 exp.TimeToStr: _time_to_str, 480 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 481 exp.ToBase64: rename_func("BASE64"), 482 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 483 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 484 exp.TsOrDsToDate: _to_date_sql, 485 exp.TryCast: no_trycast_sql, 486 exp.UnixToStr: lambda self, e: self.func( 487 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 488 ), 489 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 490 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 491 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 492 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 493 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 494 exp.LastDateOfMonth: rename_func("LAST_DAY"), 495 exp.National: lambda self, e: self.national_sql(e, prefix=""), 496 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 497 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 498 exp.NotForReplicationColumnConstraint: lambda self, e: "", 499 exp.OnProperty: lambda self, e: "", 500 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 501 } 502 503 PROPERTIES_LOCATION = { 504 **generator.Generator.PROPERTIES_LOCATION, 505 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 506 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 507 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 508 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 509 } 510 511 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 512 # Hive has no temporary storage provider (there are hive settings though) 513 return expression 514 515 def parameter_sql(self, expression: exp.Parameter) -> str: 516 this = self.sql(expression, "this") 517 parent = expression.parent 518 519 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 520 # We need to produce SET key = value instead of SET ${key} = value 521 return this 522 523 return f"${{{this}}}" 524 525 def schema_sql(self, expression: exp.Schema) -> str: 526 expression = expression.copy() 527 528 for ordered in expression.find_all(exp.Ordered): 529 if ordered.args.get("desc") is False: 530 ordered.set("desc", None) 531 532 return super().schema_sql(expression) 533 534 def constraint_sql(self, expression: exp.Constraint) -> str: 535 expression = expression.copy() 536 537 for prop in list(expression.find_all(exp.Properties)): 538 prop.pop() 539 540 this = self.sql(expression, "this") 541 expressions = self.expressions(expression, sep=" ", flat=True) 542 return f"CONSTRAINT {this} {expressions}" 543 544 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 545 serde_props = self.sql(expression, "serde_properties") 546 serde_props = f" {serde_props}" if serde_props else "" 547 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 548 549 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 550 return self.func( 551 "COLLECT_LIST", 552 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 553 ) 554 555 def with_properties(self, properties: exp.Properties) -> str: 556 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 557 558 def datatype_sql(self, expression: exp.DataType) -> str: 559 if ( 560 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 561 and not expression.expressions 562 ): 563 expression = exp.DataType.build("text") 564 elif expression.this in exp.DataType.TEMPORAL_TYPES: 565 expression = exp.DataType.build(expression.this) 566 elif expression.is_type("float"): 567 size_expression = expression.find(exp.DataTypeParam) 568 if size_expression: 569 size = int(size_expression.name) 570 expression = ( 571 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 572 ) 573 574 return super().datatype_sql(expression) 575 576 def version_sql(self, expression: exp.Version) -> str: 577 sql = super().version_sql(expression) 578 return sql.replace("FOR ", "", 1)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
TYPE_MAPPING =
{<Type.NCHAR: 'NCHAR'>: 'CHAR', <Type.NVARCHAR: 'NVARCHAR'>: 'VARCHAR', <Type.MEDIUMTEXT: 'MEDIUMTEXT'>: 'TEXT', <Type.LONGTEXT: 'LONGTEXT'>: 'TEXT', <Type.TINYTEXT: 'TINYTEXT'>: 'TEXT', <Type.MEDIUMBLOB: 'MEDIUMBLOB'>: 'BLOB', <Type.LONGBLOB: 'LONGBLOB'>: 'BLOB', <Type.TINYBLOB: 'TINYBLOB'>: 'BLOB', <Type.INET: 'INET'>: 'INET', <Type.BIT: 'BIT'>: 'BOOLEAN', <Type.DATETIME: 'DATETIME'>: 'TIMESTAMP', <Type.TEXT: 'TEXT'>: 'STRING', <Type.TIME: 'TIME'>: 'TIMESTAMP', <Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>: 'TIMESTAMP', <Type.VARBINARY: 'VARBINARY'>: 'BINARY'}
TRANSFORMS =
{<class 'sqlglot.expressions.DateAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.TsOrDsAdd'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CaseSpecificColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CheckColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CollateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CommentColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DateFormatColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DefaultColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.EncodeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExternalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.HeapProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InlineLengthColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.IntervalSpan'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LanguageProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LocationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.MaterializedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NonClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotForReplicationColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnCommitProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnUpdateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OutputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.PathColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ReturnsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SampleProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SettingsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.StabilityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TemporaryProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ToTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransientProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransformModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TitleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.UppercaseColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.VarMap'>: <function var_map_sql>, <class 'sqlglot.expressions.VolatileProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Group'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Select'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Property'>: <function _property_sql>, <class 'sqlglot.expressions.AnyValue'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxDistinct'>: <function approx_count_distinct_sql>, <class 'sqlglot.expressions.ArrayConcat'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArrayJoin'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArraySize'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArraySort'>: <function _array_sort_sql>, <class 'sqlglot.expressions.With'>: <function no_recursive_cte_sql>, <class 'sqlglot.expressions.DateDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.DateStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.DateSub'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.DiToDate'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FileFormatProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FromBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.If'>: <function if_sql.<locals>._if_sql>, <class 'sqlglot.expressions.ILike'>: <function no_ilike_sql>, <class 'sqlglot.expressions.IsNan'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtract'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONFormat'>: <function _json_format_sql>, <class 'sqlglot.expressions.Left'>: <function left_to_substring_sql>, <class 'sqlglot.expressions.Map'>: <function var_map_sql>, <class 'sqlglot.expressions.Max'>: <function max_or_greatest>, <class 'sqlglot.expressions.MD5Digest'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Min'>: <function min_or_least>, <class 'sqlglot.expressions.MonthsBetween'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotNullColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Create'>: <function _create_sql>, <class 'sqlglot.expressions.Quantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxQuantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.RegexpExtract'>: <function regexp_extract_sql>, <class 'sqlglot.expressions.RegexpReplace'>: <function regexp_replace_sql>, <class 'sqlglot.expressions.RegexpLike'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.RegexpSplit'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Right'>: <function right_to_substring_sql>, <class 'sqlglot.expressions.SafeDivide'>: <function no_safe_divide_sql>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SetAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Split'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.StrPosition'>: <function strposition_to_locate_sql>, <class 'sqlglot.expressions.StrToDate'>: <function _str_to_date_sql>, <class 'sqlglot.expressions.StrToTime'>: <function _str_to_time_sql>, <class 'sqlglot.expressions.StrToUnix'>: <function _str_to_unix_sql>, <class 'sqlglot.expressions.StructExtract'>: <function struct_extract_sql>, <class 'sqlglot.expressions.TimeStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToTime'>: <function timestrtotime_sql>, <class 'sqlglot.expressions.TimeStrToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToStr'>: <function _time_to_str>, <class 'sqlglot.expressions.TimeToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ToBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _to_date_sql>, <class 'sqlglot.expressions.TryCast'>: <function no_trycast_sql>, <class 'sqlglot.expressions.UnixToStr'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.UnixToTime'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.PartitionedByProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SerdeProperties'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NumberToStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.LastDateOfMonth'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.National'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.PrimaryKeyColumnConstraint'>: <function Hive.Generator.<lambda>>}
PROPERTIES_LOCATION =
{<class 'sqlglot.expressions.AlgorithmProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.AutoIncrementProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.BlockCompressionProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CharacterSetProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ChecksumProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CollateProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Cluster'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ClusteredByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DataBlocksizeProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.DefinerProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.DictRange'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DictProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistStyleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.EngineProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExternalProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.FallbackProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.FileFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.FreespaceProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.HeapProperty'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.InputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.IsolatedLoadingProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.JournalProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.LanguageProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LikeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LocationProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LockingProperty'>: <Location.POST_ALIAS: 'POST_ALIAS'>, <class 'sqlglot.expressions.LogProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.MaterializedProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeBlockRatioProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.OnProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OnCommitProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.Order'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OutputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PrimaryKey'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Property'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ReturnsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatDelimitedProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatSerdeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SampleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SerdeProperties'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Set'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SettingsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SetProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.SortKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.StabilityProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TemporaryProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.ToTableProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TransientProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.TransformModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.MergeTreeTTL'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.VolatileProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithDataProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <Location.POST_NAME: 'POST_NAME'>}
def
temporary_storage_provider( self, expression: sqlglot.expressions.Create) -> sqlglot.expressions.Create:
515 def parameter_sql(self, expression: exp.Parameter) -> str: 516 this = self.sql(expression, "this") 517 parent = expression.parent 518 519 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 520 # We need to produce SET key = value instead of SET ${key} = value 521 return this 522 523 return f"${{{this}}}"
534 def constraint_sql(self, expression: exp.Constraint) -> str: 535 expression = expression.copy() 536 537 for prop in list(expression.find_all(exp.Properties)): 538 prop.pop() 539 540 this = self.sql(expression, "this") 541 expressions = self.expressions(expression, sep=" ", flat=True) 542 return f"CONSTRAINT {this} {expressions}"
def
rowformatserdeproperty_sql(self, expression: sqlglot.expressions.RowFormatSerdeProperty) -> str:
558 def datatype_sql(self, expression: exp.DataType) -> str: 559 if ( 560 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 561 and not expression.expressions 562 ): 563 expression = exp.DataType.build("text") 564 elif expression.this in exp.DataType.TEMPORAL_TYPES: 565 expression = exp.DataType.build(expression.this) 566 elif expression.is_type("float"): 567 size_expression = expression.find(exp.DataTypeParam) 568 if size_expression: 569 size = int(size_expression.name) 570 expression = ( 571 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 572 ) 573 574 return super().datatype_sql(expression)
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
@classmethod
def
can_identify(text: str, identify: str | bool = 'safe') -> bool:
279 @classmethod 280 def can_identify(cls, text: str, identify: str | bool = "safe") -> bool: 281 """Checks if text can be identified given an identify option. 282 283 Args: 284 text: The text to check. 285 identify: 286 "always" or `True`: Always returns true. 287 "safe": True if the identifier is case-insensitive. 288 289 Returns: 290 Whether or not the given text can be identified. 291 """ 292 if identify is True or identify == "always": 293 return True 294 295 if identify == "safe": 296 return not cls.case_sensitive(text) 297 298 return False
Checks if text can be identified given an identify option.
Arguments:
- text: The text to check.
- identify: "always" or
True
: Always returns true. "safe": True if the identifier is case-insensitive.
Returns:
Whether or not the given text can be identified.
TOKENIZER_CLASS =
<class 'Hive.Tokenizer'>
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOG_BASE_FIRST
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_ADD_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- SUPPORTS_PARAMETERS
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- SENTINEL_LINE_BREAK
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- normalize_functions
- unsupported_messages
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_columns_sql
- star_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- safebracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- safeconcat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- formatjson_sql
- jsonobject_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- safedpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql