sqlglot.dialects.hive
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 approx_count_distinct_sql, 9 create_with_partitions_sql, 10 format_time_lambda, 11 if_sql, 12 left_to_substring_sql, 13 locate_to_strposition, 14 max_or_greatest, 15 min_or_least, 16 no_ilike_sql, 17 no_recursive_cte_sql, 18 no_safe_divide_sql, 19 no_trycast_sql, 20 regexp_extract_sql, 21 regexp_replace_sql, 22 rename_func, 23 right_to_substring_sql, 24 strposition_to_locate_sql, 25 struct_extract_sql, 26 time_format, 27 timestrtotime_sql, 28 var_map_sql, 29) 30from sqlglot.helper import seq_get 31from sqlglot.parser import parse_var_map 32from sqlglot.tokens import TokenType 33 34# (FuncType, Multiplier) 35DATE_DELTA_INTERVAL = { 36 "YEAR": ("ADD_MONTHS", 12), 37 "MONTH": ("ADD_MONTHS", 1), 38 "QUARTER": ("ADD_MONTHS", 3), 39 "WEEK": ("DATE_ADD", 7), 40 "DAY": ("DATE_ADD", 1), 41} 42 43TIME_DIFF_FACTOR = { 44 "MILLISECOND": " * 1000", 45 "SECOND": "", 46 "MINUTE": " / 60", 47 "HOUR": " / 3600", 48} 49 50DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH") 51 52 53def _add_date_sql(self: Hive.Generator, expression: exp.DateAdd | exp.DateSub) -> str: 54 unit = expression.text("unit").upper() 55 func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1)) 56 57 if isinstance(expression, exp.DateSub): 58 multiplier *= -1 59 60 if expression.expression.is_number: 61 modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier) 62 else: 63 modified_increment = expression.expression.copy() 64 if multiplier != 1: 65 modified_increment = exp.Mul( # type: ignore 66 this=modified_increment, expression=exp.Literal.number(multiplier) 67 ) 68 69 return self.func(func, expression.this, modified_increment) 70 71 72def _date_diff_sql(self: Hive.Generator, expression: exp.DateDiff) -> str: 73 unit = expression.text("unit").upper() 74 75 factor = TIME_DIFF_FACTOR.get(unit) 76 if factor is not None: 77 left = self.sql(expression, "this") 78 right = self.sql(expression, "expression") 79 sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})" 80 return f"({sec_diff}){factor}" if factor else sec_diff 81 82 sql_func = "MONTHS_BETWEEN" if unit in DIFF_MONTH_SWITCH else "DATEDIFF" 83 _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1)) 84 multiplier_sql = f" / {multiplier}" if multiplier > 1 else "" 85 diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})" 86 87 return f"{diff_sql}{multiplier_sql}" 88 89 90def _json_format_sql(self: Hive.Generator, expression: exp.JSONFormat) -> str: 91 this = expression.this 92 if isinstance(this, exp.Cast) and this.is_type("json") and this.this.is_string: 93 # Since FROM_JSON requires a nested type, we always wrap the json string with 94 # an array to ensure that "naked" strings like "'a'" will be handled correctly 95 wrapped_json = exp.Literal.string(f"[{this.this.name}]") 96 97 from_json = self.func("FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json)) 98 to_json = self.func("TO_JSON", from_json) 99 100 # This strips the [, ] delimiters of the dummy array printed by TO_JSON 101 return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1") 102 103 return self.func("TO_JSON", this, expression.args.get("options")) 104 105 106def _array_sort_sql(self: Hive.Generator, expression: exp.ArraySort) -> str: 107 if expression.expression: 108 self.unsupported("Hive SORT_ARRAY does not support a comparator") 109 return f"SORT_ARRAY({self.sql(expression, 'this')})" 110 111 112def _property_sql(self: Hive.Generator, expression: exp.Property) -> str: 113 return f"'{expression.name}'={self.sql(expression, 'value')}" 114 115 116def _str_to_unix_sql(self: Hive.Generator, expression: exp.StrToUnix) -> str: 117 return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression)) 118 119 120def _str_to_date_sql(self: Hive.Generator, expression: exp.StrToDate) -> str: 121 this = self.sql(expression, "this") 122 time_format = self.format_time(expression) 123 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 124 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 125 return f"CAST({this} AS DATE)" 126 127 128def _str_to_time_sql(self: Hive.Generator, expression: exp.StrToTime) -> str: 129 this = self.sql(expression, "this") 130 time_format = self.format_time(expression) 131 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 132 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 133 return f"CAST({this} AS TIMESTAMP)" 134 135 136def _time_to_str(self: Hive.Generator, expression: exp.TimeToStr) -> str: 137 this = self.sql(expression, "this") 138 time_format = self.format_time(expression) 139 return f"DATE_FORMAT({this}, {time_format})" 140 141 142def _to_date_sql(self: Hive.Generator, expression: exp.TsOrDsToDate) -> str: 143 this = self.sql(expression, "this") 144 time_format = self.format_time(expression) 145 if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 146 return f"TO_DATE({this}, {time_format})" 147 return f"TO_DATE({this})" 148 149 150class Hive(Dialect): 151 ALIAS_POST_TABLESAMPLE = True 152 IDENTIFIERS_CAN_START_WITH_DIGIT = True 153 154 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 155 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 156 157 TIME_MAPPING = { 158 "y": "%Y", 159 "Y": "%Y", 160 "YYYY": "%Y", 161 "yyyy": "%Y", 162 "YY": "%y", 163 "yy": "%y", 164 "MMMM": "%B", 165 "MMM": "%b", 166 "MM": "%m", 167 "M": "%-m", 168 "dd": "%d", 169 "d": "%-d", 170 "HH": "%H", 171 "H": "%-H", 172 "hh": "%I", 173 "h": "%-I", 174 "mm": "%M", 175 "m": "%-M", 176 "ss": "%S", 177 "s": "%-S", 178 "SSSSSS": "%f", 179 "a": "%p", 180 "DD": "%j", 181 "D": "%-j", 182 "E": "%a", 183 "EE": "%a", 184 "EEE": "%a", 185 "EEEE": "%A", 186 } 187 188 DATE_FORMAT = "'yyyy-MM-dd'" 189 DATEINT_FORMAT = "'yyyyMMdd'" 190 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 191 192 class Tokenizer(tokens.Tokenizer): 193 QUOTES = ["'", '"'] 194 IDENTIFIERS = ["`"] 195 STRING_ESCAPES = ["\\"] 196 ENCODE = "utf-8" 197 198 KEYWORDS = { 199 **tokens.Tokenizer.KEYWORDS, 200 "ADD ARCHIVE": TokenType.COMMAND, 201 "ADD ARCHIVES": TokenType.COMMAND, 202 "ADD FILE": TokenType.COMMAND, 203 "ADD FILES": TokenType.COMMAND, 204 "ADD JAR": TokenType.COMMAND, 205 "ADD JARS": TokenType.COMMAND, 206 "MSCK REPAIR": TokenType.COMMAND, 207 "REFRESH": TokenType.COMMAND, 208 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 209 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 210 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 211 } 212 213 NUMERIC_LITERALS = { 214 "L": "BIGINT", 215 "S": "SMALLINT", 216 "Y": "TINYINT", 217 "D": "DOUBLE", 218 "F": "FLOAT", 219 "BD": "DECIMAL", 220 } 221 222 class Parser(parser.Parser): 223 LOG_DEFAULTS_TO_LN = True 224 STRICT_CAST = False 225 SUPPORTS_USER_DEFINED_TYPES = False 226 227 FUNCTIONS = { 228 **parser.Parser.FUNCTIONS, 229 "BASE64": exp.ToBase64.from_arg_list, 230 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 231 "COLLECT_SET": exp.SetAgg.from_arg_list, 232 "DATE_ADD": lambda args: exp.TsOrDsAdd( 233 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 234 ), 235 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 236 [ 237 exp.TimeStrToTime(this=seq_get(args, 0)), 238 seq_get(args, 1), 239 ] 240 ), 241 "DATE_SUB": lambda args: exp.TsOrDsAdd( 242 this=seq_get(args, 0), 243 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 244 unit=exp.Literal.string("DAY"), 245 ), 246 "DATEDIFF": lambda args: exp.DateDiff( 247 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 248 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 249 ), 250 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 251 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 252 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 253 "LOCATE": locate_to_strposition, 254 "MAP": parse_var_map, 255 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 256 "PERCENTILE": exp.Quantile.from_arg_list, 257 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 258 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 259 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 260 ), 261 "SIZE": exp.ArraySize.from_arg_list, 262 "SPLIT": exp.RegexpSplit.from_arg_list, 263 "STR_TO_MAP": lambda args: exp.StrToMap( 264 this=seq_get(args, 0), 265 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 266 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 267 ), 268 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 269 "TO_JSON": exp.JSONFormat.from_arg_list, 270 "UNBASE64": exp.FromBase64.from_arg_list, 271 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 272 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 273 } 274 275 NO_PAREN_FUNCTION_PARSERS = { 276 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 277 "TRANSFORM": lambda self: self._parse_transform(), 278 } 279 280 PROPERTY_PARSERS = { 281 **parser.Parser.PROPERTY_PARSERS, 282 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 283 expressions=self._parse_wrapped_csv(self._parse_property) 284 ), 285 } 286 287 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 288 if not self._match(TokenType.L_PAREN, advance=False): 289 self._retreat(self._index - 1) 290 return None 291 292 args = self._parse_wrapped_csv(self._parse_lambda) 293 row_format_before = self._parse_row_format(match_row=True) 294 295 record_writer = None 296 if self._match_text_seq("RECORDWRITER"): 297 record_writer = self._parse_string() 298 299 if not self._match(TokenType.USING): 300 return exp.Transform.from_arg_list(args) 301 302 command_script = self._parse_string() 303 304 self._match(TokenType.ALIAS) 305 schema = self._parse_schema() 306 307 row_format_after = self._parse_row_format(match_row=True) 308 record_reader = None 309 if self._match_text_seq("RECORDREADER"): 310 record_reader = self._parse_string() 311 312 return self.expression( 313 exp.QueryTransform, 314 expressions=args, 315 command_script=command_script, 316 schema=schema, 317 row_format_before=row_format_before, 318 record_writer=record_writer, 319 row_format_after=row_format_after, 320 record_reader=record_reader, 321 ) 322 323 def _parse_types( 324 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 325 ) -> t.Optional[exp.Expression]: 326 """ 327 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 328 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 329 330 spark-sql (default)> select cast(1234 as varchar(2)); 331 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 332 char/varchar type and simply treats them as string type. Please use string type 333 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 334 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 335 336 1234 337 Time taken: 4.265 seconds, Fetched 1 row(s) 338 339 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 340 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 341 342 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 343 """ 344 this = super()._parse_types( 345 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 346 ) 347 348 if this and not schema: 349 return this.transform( 350 lambda node: node.replace(exp.DataType.build("text")) 351 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 352 else node, 353 copy=False, 354 ) 355 356 return this 357 358 def _parse_partition_and_order( 359 self, 360 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 361 return ( 362 self._parse_csv(self._parse_conjunction) 363 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 364 else [], 365 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 366 ) 367 368 class Generator(generator.Generator): 369 LIMIT_FETCH = "LIMIT" 370 TABLESAMPLE_WITH_METHOD = False 371 TABLESAMPLE_SIZE_IS_PERCENT = True 372 JOIN_HINTS = False 373 TABLE_HINTS = False 374 QUERY_HINTS = False 375 INDEX_ON = "ON TABLE" 376 EXTRACT_ALLOWS_QUOTES = False 377 NVL2_SUPPORTED = False 378 379 TYPE_MAPPING = { 380 **generator.Generator.TYPE_MAPPING, 381 exp.DataType.Type.BIT: "BOOLEAN", 382 exp.DataType.Type.DATETIME: "TIMESTAMP", 383 exp.DataType.Type.TEXT: "STRING", 384 exp.DataType.Type.TIME: "TIMESTAMP", 385 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 386 exp.DataType.Type.VARBINARY: "BINARY", 387 } 388 389 TRANSFORMS = { 390 **generator.Generator.TRANSFORMS, 391 exp.Group: transforms.preprocess([transforms.unalias_group]), 392 exp.Select: transforms.preprocess( 393 [ 394 transforms.eliminate_qualify, 395 transforms.eliminate_distinct_on, 396 transforms.unnest_to_explode, 397 ] 398 ), 399 exp.Property: _property_sql, 400 exp.AnyValue: rename_func("FIRST"), 401 exp.ApproxDistinct: approx_count_distinct_sql, 402 exp.ArrayConcat: rename_func("CONCAT"), 403 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 404 exp.ArraySize: rename_func("SIZE"), 405 exp.ArraySort: _array_sort_sql, 406 exp.With: no_recursive_cte_sql, 407 exp.DateAdd: _add_date_sql, 408 exp.DateDiff: _date_diff_sql, 409 exp.DateStrToDate: rename_func("TO_DATE"), 410 exp.DateSub: _add_date_sql, 411 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 412 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 413 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 414 exp.FromBase64: rename_func("UNBASE64"), 415 exp.If: if_sql, 416 exp.ILike: no_ilike_sql, 417 exp.IsNan: rename_func("ISNAN"), 418 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 419 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 420 exp.JSONFormat: _json_format_sql, 421 exp.Left: left_to_substring_sql, 422 exp.Map: var_map_sql, 423 exp.Max: max_or_greatest, 424 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 425 exp.Min: min_or_least, 426 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 427 exp.NotNullColumnConstraint: lambda self, e: "" 428 if e.args.get("allow_null") 429 else "NOT NULL", 430 exp.VarMap: var_map_sql, 431 exp.Create: create_with_partitions_sql, 432 exp.Quantile: rename_func("PERCENTILE"), 433 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 434 exp.RegexpExtract: regexp_extract_sql, 435 exp.RegexpReplace: regexp_replace_sql, 436 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 437 exp.RegexpSplit: rename_func("SPLIT"), 438 exp.Right: right_to_substring_sql, 439 exp.SafeDivide: no_safe_divide_sql, 440 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 441 exp.SetAgg: rename_func("COLLECT_SET"), 442 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 443 exp.StrPosition: strposition_to_locate_sql, 444 exp.StrToDate: _str_to_date_sql, 445 exp.StrToTime: _str_to_time_sql, 446 exp.StrToUnix: _str_to_unix_sql, 447 exp.StructExtract: struct_extract_sql, 448 exp.TimeStrToDate: rename_func("TO_DATE"), 449 exp.TimeStrToTime: timestrtotime_sql, 450 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 451 exp.TimeToStr: _time_to_str, 452 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 453 exp.ToBase64: rename_func("BASE64"), 454 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 455 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 456 exp.TsOrDsToDate: _to_date_sql, 457 exp.TryCast: no_trycast_sql, 458 exp.UnixToStr: lambda self, e: self.func( 459 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 460 ), 461 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 462 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 463 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 464 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 465 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 466 exp.LastDateOfMonth: rename_func("LAST_DAY"), 467 exp.National: lambda self, e: self.national_sql(e, prefix=""), 468 } 469 470 PROPERTIES_LOCATION = { 471 **generator.Generator.PROPERTIES_LOCATION, 472 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 473 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 474 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 475 } 476 477 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 478 serde_props = self.sql(expression, "serde_properties") 479 serde_props = f" {serde_props}" if serde_props else "" 480 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 481 482 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 483 return self.func( 484 "COLLECT_LIST", 485 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 486 ) 487 488 def with_properties(self, properties: exp.Properties) -> str: 489 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 490 491 def datatype_sql(self, expression: exp.DataType) -> str: 492 if ( 493 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 494 and not expression.expressions 495 ): 496 expression = exp.DataType.build("text") 497 elif expression.this in exp.DataType.TEMPORAL_TYPES: 498 expression = exp.DataType.build(expression.this) 499 elif expression.is_type("float"): 500 size_expression = expression.find(exp.DataTypeParam) 501 if size_expression: 502 size = int(size_expression.name) 503 expression = ( 504 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 505 ) 506 507 return super().datatype_sql(expression) 508 509 def version_sql(self, expression: exp.Version) -> str: 510 sql = super().version_sql(expression) 511 return sql.replace("FOR ", "", 1)
DATE_DELTA_INTERVAL =
{'YEAR': ('ADD_MONTHS', 12), 'MONTH': ('ADD_MONTHS', 1), 'QUARTER': ('ADD_MONTHS', 3), 'WEEK': ('DATE_ADD', 7), 'DAY': ('DATE_ADD', 1)}
TIME_DIFF_FACTOR =
{'MILLISECOND': ' * 1000', 'SECOND': '', 'MINUTE': ' / 60', 'HOUR': ' / 3600'}
DIFF_MONTH_SWITCH =
('YEAR', 'QUARTER', 'MONTH')
151class Hive(Dialect): 152 ALIAS_POST_TABLESAMPLE = True 153 IDENTIFIERS_CAN_START_WITH_DIGIT = True 154 155 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 156 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 157 158 TIME_MAPPING = { 159 "y": "%Y", 160 "Y": "%Y", 161 "YYYY": "%Y", 162 "yyyy": "%Y", 163 "YY": "%y", 164 "yy": "%y", 165 "MMMM": "%B", 166 "MMM": "%b", 167 "MM": "%m", 168 "M": "%-m", 169 "dd": "%d", 170 "d": "%-d", 171 "HH": "%H", 172 "H": "%-H", 173 "hh": "%I", 174 "h": "%-I", 175 "mm": "%M", 176 "m": "%-M", 177 "ss": "%S", 178 "s": "%-S", 179 "SSSSSS": "%f", 180 "a": "%p", 181 "DD": "%j", 182 "D": "%-j", 183 "E": "%a", 184 "EE": "%a", 185 "EEE": "%a", 186 "EEEE": "%A", 187 } 188 189 DATE_FORMAT = "'yyyy-MM-dd'" 190 DATEINT_FORMAT = "'yyyyMMdd'" 191 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 192 193 class Tokenizer(tokens.Tokenizer): 194 QUOTES = ["'", '"'] 195 IDENTIFIERS = ["`"] 196 STRING_ESCAPES = ["\\"] 197 ENCODE = "utf-8" 198 199 KEYWORDS = { 200 **tokens.Tokenizer.KEYWORDS, 201 "ADD ARCHIVE": TokenType.COMMAND, 202 "ADD ARCHIVES": TokenType.COMMAND, 203 "ADD FILE": TokenType.COMMAND, 204 "ADD FILES": TokenType.COMMAND, 205 "ADD JAR": TokenType.COMMAND, 206 "ADD JARS": TokenType.COMMAND, 207 "MSCK REPAIR": TokenType.COMMAND, 208 "REFRESH": TokenType.COMMAND, 209 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 210 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 211 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 212 } 213 214 NUMERIC_LITERALS = { 215 "L": "BIGINT", 216 "S": "SMALLINT", 217 "Y": "TINYINT", 218 "D": "DOUBLE", 219 "F": "FLOAT", 220 "BD": "DECIMAL", 221 } 222 223 class Parser(parser.Parser): 224 LOG_DEFAULTS_TO_LN = True 225 STRICT_CAST = False 226 SUPPORTS_USER_DEFINED_TYPES = False 227 228 FUNCTIONS = { 229 **parser.Parser.FUNCTIONS, 230 "BASE64": exp.ToBase64.from_arg_list, 231 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 232 "COLLECT_SET": exp.SetAgg.from_arg_list, 233 "DATE_ADD": lambda args: exp.TsOrDsAdd( 234 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 235 ), 236 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 237 [ 238 exp.TimeStrToTime(this=seq_get(args, 0)), 239 seq_get(args, 1), 240 ] 241 ), 242 "DATE_SUB": lambda args: exp.TsOrDsAdd( 243 this=seq_get(args, 0), 244 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 245 unit=exp.Literal.string("DAY"), 246 ), 247 "DATEDIFF": lambda args: exp.DateDiff( 248 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 249 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 250 ), 251 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 252 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 253 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 254 "LOCATE": locate_to_strposition, 255 "MAP": parse_var_map, 256 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 257 "PERCENTILE": exp.Quantile.from_arg_list, 258 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 259 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 260 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 261 ), 262 "SIZE": exp.ArraySize.from_arg_list, 263 "SPLIT": exp.RegexpSplit.from_arg_list, 264 "STR_TO_MAP": lambda args: exp.StrToMap( 265 this=seq_get(args, 0), 266 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 267 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 268 ), 269 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 270 "TO_JSON": exp.JSONFormat.from_arg_list, 271 "UNBASE64": exp.FromBase64.from_arg_list, 272 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 273 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 274 } 275 276 NO_PAREN_FUNCTION_PARSERS = { 277 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 278 "TRANSFORM": lambda self: self._parse_transform(), 279 } 280 281 PROPERTY_PARSERS = { 282 **parser.Parser.PROPERTY_PARSERS, 283 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 284 expressions=self._parse_wrapped_csv(self._parse_property) 285 ), 286 } 287 288 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 289 if not self._match(TokenType.L_PAREN, advance=False): 290 self._retreat(self._index - 1) 291 return None 292 293 args = self._parse_wrapped_csv(self._parse_lambda) 294 row_format_before = self._parse_row_format(match_row=True) 295 296 record_writer = None 297 if self._match_text_seq("RECORDWRITER"): 298 record_writer = self._parse_string() 299 300 if not self._match(TokenType.USING): 301 return exp.Transform.from_arg_list(args) 302 303 command_script = self._parse_string() 304 305 self._match(TokenType.ALIAS) 306 schema = self._parse_schema() 307 308 row_format_after = self._parse_row_format(match_row=True) 309 record_reader = None 310 if self._match_text_seq("RECORDREADER"): 311 record_reader = self._parse_string() 312 313 return self.expression( 314 exp.QueryTransform, 315 expressions=args, 316 command_script=command_script, 317 schema=schema, 318 row_format_before=row_format_before, 319 record_writer=record_writer, 320 row_format_after=row_format_after, 321 record_reader=record_reader, 322 ) 323 324 def _parse_types( 325 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 326 ) -> t.Optional[exp.Expression]: 327 """ 328 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 329 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 330 331 spark-sql (default)> select cast(1234 as varchar(2)); 332 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 333 char/varchar type and simply treats them as string type. Please use string type 334 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 335 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 336 337 1234 338 Time taken: 4.265 seconds, Fetched 1 row(s) 339 340 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 341 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 342 343 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 344 """ 345 this = super()._parse_types( 346 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 347 ) 348 349 if this and not schema: 350 return this.transform( 351 lambda node: node.replace(exp.DataType.build("text")) 352 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 353 else node, 354 copy=False, 355 ) 356 357 return this 358 359 def _parse_partition_and_order( 360 self, 361 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 362 return ( 363 self._parse_csv(self._parse_conjunction) 364 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 365 else [], 366 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 367 ) 368 369 class Generator(generator.Generator): 370 LIMIT_FETCH = "LIMIT" 371 TABLESAMPLE_WITH_METHOD = False 372 TABLESAMPLE_SIZE_IS_PERCENT = True 373 JOIN_HINTS = False 374 TABLE_HINTS = False 375 QUERY_HINTS = False 376 INDEX_ON = "ON TABLE" 377 EXTRACT_ALLOWS_QUOTES = False 378 NVL2_SUPPORTED = False 379 380 TYPE_MAPPING = { 381 **generator.Generator.TYPE_MAPPING, 382 exp.DataType.Type.BIT: "BOOLEAN", 383 exp.DataType.Type.DATETIME: "TIMESTAMP", 384 exp.DataType.Type.TEXT: "STRING", 385 exp.DataType.Type.TIME: "TIMESTAMP", 386 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 387 exp.DataType.Type.VARBINARY: "BINARY", 388 } 389 390 TRANSFORMS = { 391 **generator.Generator.TRANSFORMS, 392 exp.Group: transforms.preprocess([transforms.unalias_group]), 393 exp.Select: transforms.preprocess( 394 [ 395 transforms.eliminate_qualify, 396 transforms.eliminate_distinct_on, 397 transforms.unnest_to_explode, 398 ] 399 ), 400 exp.Property: _property_sql, 401 exp.AnyValue: rename_func("FIRST"), 402 exp.ApproxDistinct: approx_count_distinct_sql, 403 exp.ArrayConcat: rename_func("CONCAT"), 404 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 405 exp.ArraySize: rename_func("SIZE"), 406 exp.ArraySort: _array_sort_sql, 407 exp.With: no_recursive_cte_sql, 408 exp.DateAdd: _add_date_sql, 409 exp.DateDiff: _date_diff_sql, 410 exp.DateStrToDate: rename_func("TO_DATE"), 411 exp.DateSub: _add_date_sql, 412 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 413 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 414 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 415 exp.FromBase64: rename_func("UNBASE64"), 416 exp.If: if_sql, 417 exp.ILike: no_ilike_sql, 418 exp.IsNan: rename_func("ISNAN"), 419 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 420 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 421 exp.JSONFormat: _json_format_sql, 422 exp.Left: left_to_substring_sql, 423 exp.Map: var_map_sql, 424 exp.Max: max_or_greatest, 425 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 426 exp.Min: min_or_least, 427 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 428 exp.NotNullColumnConstraint: lambda self, e: "" 429 if e.args.get("allow_null") 430 else "NOT NULL", 431 exp.VarMap: var_map_sql, 432 exp.Create: create_with_partitions_sql, 433 exp.Quantile: rename_func("PERCENTILE"), 434 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 435 exp.RegexpExtract: regexp_extract_sql, 436 exp.RegexpReplace: regexp_replace_sql, 437 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 438 exp.RegexpSplit: rename_func("SPLIT"), 439 exp.Right: right_to_substring_sql, 440 exp.SafeDivide: no_safe_divide_sql, 441 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 442 exp.SetAgg: rename_func("COLLECT_SET"), 443 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 444 exp.StrPosition: strposition_to_locate_sql, 445 exp.StrToDate: _str_to_date_sql, 446 exp.StrToTime: _str_to_time_sql, 447 exp.StrToUnix: _str_to_unix_sql, 448 exp.StructExtract: struct_extract_sql, 449 exp.TimeStrToDate: rename_func("TO_DATE"), 450 exp.TimeStrToTime: timestrtotime_sql, 451 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 452 exp.TimeToStr: _time_to_str, 453 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 454 exp.ToBase64: rename_func("BASE64"), 455 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 456 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 457 exp.TsOrDsToDate: _to_date_sql, 458 exp.TryCast: no_trycast_sql, 459 exp.UnixToStr: lambda self, e: self.func( 460 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 461 ), 462 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 463 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 464 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 465 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 466 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 467 exp.LastDateOfMonth: rename_func("LAST_DAY"), 468 exp.National: lambda self, e: self.national_sql(e, prefix=""), 469 } 470 471 PROPERTIES_LOCATION = { 472 **generator.Generator.PROPERTIES_LOCATION, 473 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 474 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 475 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 476 } 477 478 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 479 serde_props = self.sql(expression, "serde_properties") 480 serde_props = f" {serde_props}" if serde_props else "" 481 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 482 483 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 484 return self.func( 485 "COLLECT_LIST", 486 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 487 ) 488 489 def with_properties(self, properties: exp.Properties) -> str: 490 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 491 492 def datatype_sql(self, expression: exp.DataType) -> str: 493 if ( 494 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 495 and not expression.expressions 496 ): 497 expression = exp.DataType.build("text") 498 elif expression.this in exp.DataType.TEMPORAL_TYPES: 499 expression = exp.DataType.build(expression.this) 500 elif expression.is_type("float"): 501 size_expression = expression.find(exp.DataTypeParam) 502 if size_expression: 503 size = int(size_expression.name) 504 expression = ( 505 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 506 ) 507 508 return super().datatype_sql(expression) 509 510 def version_sql(self, expression: exp.Version) -> str: 511 sql = super().version_sql(expression) 512 return sql.replace("FOR ", "", 1)
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
tokenizer_class =
<class 'sqlglot.dialects.hive.Hive.Tokenizer'>
parser_class =
<class 'sqlglot.dialects.hive.Hive.Parser'>
generator_class =
<class 'sqlglot.dialects.hive.Hive.Generator'>
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
Inherited Members
- sqlglot.dialects.dialect.Dialect
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
193 class Tokenizer(tokens.Tokenizer): 194 QUOTES = ["'", '"'] 195 IDENTIFIERS = ["`"] 196 STRING_ESCAPES = ["\\"] 197 ENCODE = "utf-8" 198 199 KEYWORDS = { 200 **tokens.Tokenizer.KEYWORDS, 201 "ADD ARCHIVE": TokenType.COMMAND, 202 "ADD ARCHIVES": TokenType.COMMAND, 203 "ADD FILE": TokenType.COMMAND, 204 "ADD FILES": TokenType.COMMAND, 205 "ADD JAR": TokenType.COMMAND, 206 "ADD JARS": TokenType.COMMAND, 207 "MSCK REPAIR": TokenType.COMMAND, 208 "REFRESH": TokenType.COMMAND, 209 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 210 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 211 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 212 } 213 214 NUMERIC_LITERALS = { 215 "L": "BIGINT", 216 "S": "SMALLINT", 217 "Y": "TINYINT", 218 "D": "DOUBLE", 219 "F": "FLOAT", 220 "BD": "DECIMAL", 221 }
KEYWORDS =
{'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, '??': <TokenType.DQMARK: 'DQMARK'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONNECT BY': <TokenType.CONNECT_BY: 'CONNECT_BY'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'START WITH': <TokenType.START_WITH: 'START_WITH'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNKNOWN': <TokenType.UNKNOWN: 'UNKNOWN'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT128': <TokenType.INT128: 'INT128'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.BIGINT: 'BIGINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMETZ': <TokenType.TIMETZ: 'TIMETZ'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'COPY': <TokenType.COMMAND: 'COMMAND'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'TRUNCATE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'FOR VERSION': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'FOR TIMESTAMP': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'ADD ARCHIVE': <TokenType.COMMAND: 'COMMAND'>, 'ADD ARCHIVES': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILE': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILES': <TokenType.COMMAND: 'COMMAND'>, 'ADD JAR': <TokenType.COMMAND: 'COMMAND'>, 'ADD JARS': <TokenType.COMMAND: 'COMMAND'>, 'MSCK REPAIR': <TokenType.COMMAND: 'COMMAND'>, 'REFRESH': <TokenType.COMMAND: 'COMMAND'>, 'WITH SERDEPROPERTIES': <TokenType.SERDE_PROPERTIES: 'SERDE_PROPERTIES'>, 'TIMESTAMP AS OF': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'VERSION AS OF': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>}
223 class Parser(parser.Parser): 224 LOG_DEFAULTS_TO_LN = True 225 STRICT_CAST = False 226 SUPPORTS_USER_DEFINED_TYPES = False 227 228 FUNCTIONS = { 229 **parser.Parser.FUNCTIONS, 230 "BASE64": exp.ToBase64.from_arg_list, 231 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 232 "COLLECT_SET": exp.SetAgg.from_arg_list, 233 "DATE_ADD": lambda args: exp.TsOrDsAdd( 234 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 235 ), 236 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 237 [ 238 exp.TimeStrToTime(this=seq_get(args, 0)), 239 seq_get(args, 1), 240 ] 241 ), 242 "DATE_SUB": lambda args: exp.TsOrDsAdd( 243 this=seq_get(args, 0), 244 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 245 unit=exp.Literal.string("DAY"), 246 ), 247 "DATEDIFF": lambda args: exp.DateDiff( 248 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 249 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 250 ), 251 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 252 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 253 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 254 "LOCATE": locate_to_strposition, 255 "MAP": parse_var_map, 256 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 257 "PERCENTILE": exp.Quantile.from_arg_list, 258 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 259 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 260 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 261 ), 262 "SIZE": exp.ArraySize.from_arg_list, 263 "SPLIT": exp.RegexpSplit.from_arg_list, 264 "STR_TO_MAP": lambda args: exp.StrToMap( 265 this=seq_get(args, 0), 266 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 267 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 268 ), 269 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 270 "TO_JSON": exp.JSONFormat.from_arg_list, 271 "UNBASE64": exp.FromBase64.from_arg_list, 272 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 273 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 274 } 275 276 NO_PAREN_FUNCTION_PARSERS = { 277 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 278 "TRANSFORM": lambda self: self._parse_transform(), 279 } 280 281 PROPERTY_PARSERS = { 282 **parser.Parser.PROPERTY_PARSERS, 283 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 284 expressions=self._parse_wrapped_csv(self._parse_property) 285 ), 286 } 287 288 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 289 if not self._match(TokenType.L_PAREN, advance=False): 290 self._retreat(self._index - 1) 291 return None 292 293 args = self._parse_wrapped_csv(self._parse_lambda) 294 row_format_before = self._parse_row_format(match_row=True) 295 296 record_writer = None 297 if self._match_text_seq("RECORDWRITER"): 298 record_writer = self._parse_string() 299 300 if not self._match(TokenType.USING): 301 return exp.Transform.from_arg_list(args) 302 303 command_script = self._parse_string() 304 305 self._match(TokenType.ALIAS) 306 schema = self._parse_schema() 307 308 row_format_after = self._parse_row_format(match_row=True) 309 record_reader = None 310 if self._match_text_seq("RECORDREADER"): 311 record_reader = self._parse_string() 312 313 return self.expression( 314 exp.QueryTransform, 315 expressions=args, 316 command_script=command_script, 317 schema=schema, 318 row_format_before=row_format_before, 319 record_writer=record_writer, 320 row_format_after=row_format_after, 321 record_reader=record_reader, 322 ) 323 324 def _parse_types( 325 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 326 ) -> t.Optional[exp.Expression]: 327 """ 328 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 329 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 330 331 spark-sql (default)> select cast(1234 as varchar(2)); 332 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 333 char/varchar type and simply treats them as string type. Please use string type 334 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 335 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 336 337 1234 338 Time taken: 4.265 seconds, Fetched 1 row(s) 339 340 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 341 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 342 343 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 344 """ 345 this = super()._parse_types( 346 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 347 ) 348 349 if this and not schema: 350 return this.transform( 351 lambda node: node.replace(exp.DataType.build("text")) 352 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 353 else node, 354 copy=False, 355 ) 356 357 return this 358 359 def _parse_partition_and_order( 360 self, 361 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 362 return ( 363 self._parse_csv(self._parse_conjunction) 364 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 365 else [], 366 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 367 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
FUNCTIONS =
{'ABS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Abs'>>, 'ANY_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnyValue'>>, 'APPROX_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_COUNT_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Array'>>, 'ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'ARRAY_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAll'>>, 'ARRAY_ANY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAny'>>, 'ARRAY_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContains'>>, 'FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_JOIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayJoin'>>, 'ARRAY_SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_SORT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySort'>>, 'ARRAY_SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySum'>>, 'ARRAY_UNION_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUnionAgg'>>, 'AVG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Avg'>>, 'CASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Case'>>, 'CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cast'>>, 'CAST_TO_STR_TYPE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CastToStrType'>>, 'CEIL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CEILING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'COALESCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'IFNULL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'NVL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Concat'>>, 'CONCAT_WS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ConcatWs'>>, 'COUNT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Count'>>, 'COUNT_IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'CURRENT_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDate'>>, 'CURRENT_DATETIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDatetime'>>, 'CURRENT_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTime'>>, 'CURRENT_TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTimestamp'>>, 'CURRENT_USER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentUser'>>, 'DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Date'>>, 'DATE_ADD': <function Hive.Parser.<lambda>>, 'DATEDIFF': <function Hive.Parser.<lambda>>, 'DATE_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateDiff'>>, 'DATEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATE_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateStrToDate'>>, 'DATE_SUB': <function Hive.Parser.<lambda>>, 'DATE_TO_DATE_STR': <function Parser.<lambda>>, 'DATE_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateToDi'>>, 'DATE_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateTrunc'>>, 'DATETIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeAdd'>>, 'DATETIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeDiff'>>, 'DATETIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeSub'>>, 'DATETIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeTrunc'>>, 'DAY': <function Hive.Parser.<lambda>>, 'DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAYOFMONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAY_OF_WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAYOFWEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAY_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DAYOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DECODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Decode'>>, 'DI_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DiToDate'>>, 'ENCODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Encode'>>, 'EXP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Exp'>>, 'EXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Explode'>>, 'EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Extract'>>, 'FIRST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.First'>>, 'FLOOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Floor'>>, 'FROM_BASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase'>>, 'FROM_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'GENERATE_SERIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GenerateSeries'>>, 'GREATEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Greatest'>>, 'GROUP_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GroupConcat'>>, 'HEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hex'>>, 'HLL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hll'>>, 'IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'INITCAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Initcap'>>, 'IS_NAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'ISNAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'JSON_ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayContains'>>, 'JSONB_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtract'>>, 'JSONB_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtractScalar'>>, 'JSON_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtract'>>, 'JSON_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'JSON_FORMAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'J_S_O_N_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObject'>>, 'LAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Last'>>, 'LAST_DATE_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDateOfMonth'>>, 'LEAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Least'>>, 'LEFT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Left'>>, 'LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEVENSHTEIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Levenshtein'>>, 'LN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ln'>>, 'LOG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log'>>, 'LOG10': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log10'>>, 'LOG2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log2'>>, 'LOGICAL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOLAND_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'LOGICAL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOLOR_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'LOWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'LCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'MD5': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5'>>, 'MD5_DIGEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5Digest'>>, 'MAP': <function parse_var_map>, 'MAP_FROM_ENTRIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MapFromEntries'>>, 'MATCH_AGAINST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MatchAgainst'>>, 'MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Max'>>, 'MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Min'>>, 'MONTH': <function Hive.Parser.<lambda>>, 'MONTHS_BETWEEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MonthsBetween'>>, 'NEXT_VALUE_FOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NextValueFor'>>, 'NUMBER_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NumberToStr'>>, 'NVL2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nvl2'>>, 'OPEN_J_S_O_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.OpenJSON'>>, 'PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParameterizedAgg'>>, 'PERCENTILE_CONT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileCont'>>, 'PERCENTILE_DISC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileDisc'>>, 'POSEXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Posexplode'>>, 'POWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'POW': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'RANGE_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RangeN'>>, 'READ_CSV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ReadCSV'>>, 'REDUCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Reduce'>>, 'REGEXP_EXTRACT': <function Hive.Parser.<lambda>>, 'REGEXP_I_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpILike'>>, 'REGEXP_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpLike'>>, 'REGEXP_REPLACE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpReplace'>>, 'REGEXP_SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'REPEAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Repeat'>>, 'RIGHT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Right'>>, 'ROUND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Round'>>, 'ROW_NUMBER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RowNumber'>>, 'SHA': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA1': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA2'>>, 'SAFE_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeConcat'>>, 'SAFE_DIVIDE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeDivide'>>, 'SET_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'SORT_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SortArray'>>, 'SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'SQRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sqrt'>>, 'STANDARD_HASH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StandardHash'>>, 'STAR_MAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StarMap'>>, 'STARTS_WITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STARTSWITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STDDEV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stddev'>>, 'STDDEV_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevPop'>>, 'STDDEV_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevSamp'>>, 'STR_POSITION': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrPosition'>>, 'STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToDate'>>, 'STR_TO_MAP': <function Hive.Parser.<lambda>>, 'STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToTime'>>, 'STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToUnix'>>, 'STRUCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Struct'>>, 'STRUCT_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StructExtract'>>, 'STUFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'INSERT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'SUBSTRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Substring'>>, 'SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sum'>>, 'TIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeAdd'>>, 'TIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeDiff'>>, 'TIME_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToDate'>>, 'TIME_STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToTime'>>, 'TIME_STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToUnix'>>, 'TIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeSub'>>, 'TIME_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToStr'>>, 'TIME_TO_TIME_STR': <function Parser.<lambda>>, 'TIME_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToUnix'>>, 'TIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeTrunc'>>, 'TIMESTAMP_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampAdd'>>, 'TIMESTAMP_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampSub'>>, 'TIMESTAMP_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampTrunc'>>, 'TO_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'TO_CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToChar'>>, 'TRANSFORM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Transform'>>, 'TRIM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Trim'>>, 'TRY_CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TryCast'>>, 'TS_OR_DI_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDiToDi'>>, 'TS_OR_DS_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsAdd'>>, 'TS_OR_DS_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToDate'>>, 'TS_OR_DS_TO_DATE_STR': <function Parser.<lambda>>, 'UNHEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Unhex'>>, 'UNIX_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToStr'>>, 'UNIX_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTime'>>, 'UNIX_TO_TIME_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTimeStr'>>, 'UPPER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'UCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'VAR_MAP': <function parse_var_map>, 'VARIANCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VAR_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'VAR_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Week'>>, 'WEEK_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WEEKOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WHEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.When'>>, 'X_M_L_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.XMLTable'>>, 'XOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Xor'>>, 'YEAR': <function Hive.Parser.<lambda>>, 'GLOB': <function Parser.<lambda>>, 'LIKE': <function parse_like>, 'BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'COLLECT_LIST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'COLLECT_SET': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'DATE_FORMAT': <function Hive.Parser.<lambda>>, 'FROM_UNIXTIME': <function format_time_lambda.<locals>._format_time>, 'GET_JSON_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'LOCATE': <function locate_to_strposition>, 'PERCENTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'PERCENTILE_APPROX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'TO_DATE': <function format_time_lambda.<locals>._format_time>, 'TO_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'UNBASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'UNIX_TIMESTAMP': <function format_time_lambda.<locals>._format_time>}
NO_PAREN_FUNCTION_PARSERS =
{'ANY': <function Parser.<lambda>>, 'CASE': <function Parser.<lambda>>, 'IF': <function Parser.<lambda>>, 'NEXT': <function Parser.<lambda>>, 'TRANSFORM': <function Hive.Parser.<lambda>>}
PROPERTY_PARSERS =
{'ALGORITHM': <function Parser.<lambda>>, 'AUTO_INCREMENT': <function Parser.<lambda>>, 'BLOCKCOMPRESSION': <function Parser.<lambda>>, 'CHARACTER SET': <function Parser.<lambda>>, 'CHECKSUM': <function Parser.<lambda>>, 'CLUSTER BY': <function Parser.<lambda>>, 'CLUSTERED': <function Parser.<lambda>>, 'COLLATE': <function Parser.<lambda>>, 'COMMENT': <function Parser.<lambda>>, 'COPY': <function Parser.<lambda>>, 'DATABLOCKSIZE': <function Parser.<lambda>>, 'DEFINER': <function Parser.<lambda>>, 'DETERMINISTIC': <function Parser.<lambda>>, 'DISTKEY': <function Parser.<lambda>>, 'DISTSTYLE': <function Parser.<lambda>>, 'ENGINE': <function Parser.<lambda>>, 'EXECUTE': <function Parser.<lambda>>, 'EXTERNAL': <function Parser.<lambda>>, 'FALLBACK': <function Parser.<lambda>>, 'FORMAT': <function Parser.<lambda>>, 'FREESPACE': <function Parser.<lambda>>, 'HEAP': <function Parser.<lambda>>, 'IMMUTABLE': <function Parser.<lambda>>, 'JOURNAL': <function Parser.<lambda>>, 'LANGUAGE': <function Parser.<lambda>>, 'LAYOUT': <function Parser.<lambda>>, 'LIFETIME': <function Parser.<lambda>>, 'LIKE': <function Parser.<lambda>>, 'LOCATION': <function Parser.<lambda>>, 'LOCK': <function Parser.<lambda>>, 'LOCKING': <function Parser.<lambda>>, 'LOG': <function Parser.<lambda>>, 'MATERIALIZED': <function Parser.<lambda>>, 'MERGEBLOCKRATIO': <function Parser.<lambda>>, 'MULTISET': <function Parser.<lambda>>, 'NO': <function Parser.<lambda>>, 'ON': <function Parser.<lambda>>, 'ORDER BY': <function Parser.<lambda>>, 'PARTITION BY': <function Parser.<lambda>>, 'PARTITIONED BY': <function Parser.<lambda>>, 'PARTITIONED_BY': <function Parser.<lambda>>, 'PRIMARY KEY': <function Parser.<lambda>>, 'RANGE': <function Parser.<lambda>>, 'RETURNS': <function Parser.<lambda>>, 'ROW': <function Parser.<lambda>>, 'ROW_FORMAT': <function Parser.<lambda>>, 'SET': <function Parser.<lambda>>, 'SETTINGS': <function Parser.<lambda>>, 'SORTKEY': <function Parser.<lambda>>, 'SOURCE': <function Parser.<lambda>>, 'STABLE': <function Parser.<lambda>>, 'STORED': <function Parser.<lambda>>, 'TBLPROPERTIES': <function Parser.<lambda>>, 'TEMP': <function Parser.<lambda>>, 'TEMPORARY': <function Parser.<lambda>>, 'TO': <function Parser.<lambda>>, 'TRANSIENT': <function Parser.<lambda>>, 'TTL': <function Parser.<lambda>>, 'USING': <function Parser.<lambda>>, 'VOLATILE': <function Parser.<lambda>>, 'WITH': <function Parser.<lambda>>, 'WITH SERDEPROPERTIES': <function Hive.Parser.<lambda>>}
SET_TRIE: Dict =
{'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- TYPE_TOKENS
- SUBQUERY_PREDICATES
- RESERVED_KEYWORDS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- FUNCTION_PARSERS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KINDS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- CONCAT_NULL_OUTPUTS_STRING
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_BASE_FIRST
- ALTER_TABLE_ADD_COLUMN_KEYWORD
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- FORMAT_MAPPING
- error_level
- error_message_context
- max_errors
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
369 class Generator(generator.Generator): 370 LIMIT_FETCH = "LIMIT" 371 TABLESAMPLE_WITH_METHOD = False 372 TABLESAMPLE_SIZE_IS_PERCENT = True 373 JOIN_HINTS = False 374 TABLE_HINTS = False 375 QUERY_HINTS = False 376 INDEX_ON = "ON TABLE" 377 EXTRACT_ALLOWS_QUOTES = False 378 NVL2_SUPPORTED = False 379 380 TYPE_MAPPING = { 381 **generator.Generator.TYPE_MAPPING, 382 exp.DataType.Type.BIT: "BOOLEAN", 383 exp.DataType.Type.DATETIME: "TIMESTAMP", 384 exp.DataType.Type.TEXT: "STRING", 385 exp.DataType.Type.TIME: "TIMESTAMP", 386 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 387 exp.DataType.Type.VARBINARY: "BINARY", 388 } 389 390 TRANSFORMS = { 391 **generator.Generator.TRANSFORMS, 392 exp.Group: transforms.preprocess([transforms.unalias_group]), 393 exp.Select: transforms.preprocess( 394 [ 395 transforms.eliminate_qualify, 396 transforms.eliminate_distinct_on, 397 transforms.unnest_to_explode, 398 ] 399 ), 400 exp.Property: _property_sql, 401 exp.AnyValue: rename_func("FIRST"), 402 exp.ApproxDistinct: approx_count_distinct_sql, 403 exp.ArrayConcat: rename_func("CONCAT"), 404 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 405 exp.ArraySize: rename_func("SIZE"), 406 exp.ArraySort: _array_sort_sql, 407 exp.With: no_recursive_cte_sql, 408 exp.DateAdd: _add_date_sql, 409 exp.DateDiff: _date_diff_sql, 410 exp.DateStrToDate: rename_func("TO_DATE"), 411 exp.DateSub: _add_date_sql, 412 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 413 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 414 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 415 exp.FromBase64: rename_func("UNBASE64"), 416 exp.If: if_sql, 417 exp.ILike: no_ilike_sql, 418 exp.IsNan: rename_func("ISNAN"), 419 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 420 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 421 exp.JSONFormat: _json_format_sql, 422 exp.Left: left_to_substring_sql, 423 exp.Map: var_map_sql, 424 exp.Max: max_or_greatest, 425 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 426 exp.Min: min_or_least, 427 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 428 exp.NotNullColumnConstraint: lambda self, e: "" 429 if e.args.get("allow_null") 430 else "NOT NULL", 431 exp.VarMap: var_map_sql, 432 exp.Create: create_with_partitions_sql, 433 exp.Quantile: rename_func("PERCENTILE"), 434 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 435 exp.RegexpExtract: regexp_extract_sql, 436 exp.RegexpReplace: regexp_replace_sql, 437 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 438 exp.RegexpSplit: rename_func("SPLIT"), 439 exp.Right: right_to_substring_sql, 440 exp.SafeDivide: no_safe_divide_sql, 441 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 442 exp.SetAgg: rename_func("COLLECT_SET"), 443 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 444 exp.StrPosition: strposition_to_locate_sql, 445 exp.StrToDate: _str_to_date_sql, 446 exp.StrToTime: _str_to_time_sql, 447 exp.StrToUnix: _str_to_unix_sql, 448 exp.StructExtract: struct_extract_sql, 449 exp.TimeStrToDate: rename_func("TO_DATE"), 450 exp.TimeStrToTime: timestrtotime_sql, 451 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 452 exp.TimeToStr: _time_to_str, 453 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 454 exp.ToBase64: rename_func("BASE64"), 455 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 456 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 457 exp.TsOrDsToDate: _to_date_sql, 458 exp.TryCast: no_trycast_sql, 459 exp.UnixToStr: lambda self, e: self.func( 460 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 461 ), 462 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 463 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 464 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 465 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 466 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 467 exp.LastDateOfMonth: rename_func("LAST_DAY"), 468 exp.National: lambda self, e: self.national_sql(e, prefix=""), 469 } 470 471 PROPERTIES_LOCATION = { 472 **generator.Generator.PROPERTIES_LOCATION, 473 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 474 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 475 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 476 } 477 478 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 479 serde_props = self.sql(expression, "serde_properties") 480 serde_props = f" {serde_props}" if serde_props else "" 481 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 482 483 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 484 return self.func( 485 "COLLECT_LIST", 486 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 487 ) 488 489 def with_properties(self, properties: exp.Properties) -> str: 490 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 491 492 def datatype_sql(self, expression: exp.DataType) -> str: 493 if ( 494 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 495 and not expression.expressions 496 ): 497 expression = exp.DataType.build("text") 498 elif expression.this in exp.DataType.TEMPORAL_TYPES: 499 expression = exp.DataType.build(expression.this) 500 elif expression.is_type("float"): 501 size_expression = expression.find(exp.DataTypeParam) 502 if size_expression: 503 size = int(size_expression.name) 504 expression = ( 505 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 506 ) 507 508 return super().datatype_sql(expression) 509 510 def version_sql(self, expression: exp.Version) -> str: 511 sql = super().version_sql(expression) 512 return sql.replace("FOR ", "", 1)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
TYPE_MAPPING =
{<Type.NCHAR: 'NCHAR'>: 'CHAR', <Type.NVARCHAR: 'NVARCHAR'>: 'VARCHAR', <Type.MEDIUMTEXT: 'MEDIUMTEXT'>: 'TEXT', <Type.LONGTEXT: 'LONGTEXT'>: 'TEXT', <Type.MEDIUMBLOB: 'MEDIUMBLOB'>: 'BLOB', <Type.LONGBLOB: 'LONGBLOB'>: 'BLOB', <Type.INET: 'INET'>: 'INET', <Type.BIT: 'BIT'>: 'BOOLEAN', <Type.DATETIME: 'DATETIME'>: 'TIMESTAMP', <Type.TEXT: 'TEXT'>: 'STRING', <Type.TIME: 'TIME'>: 'TIMESTAMP', <Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>: 'TIMESTAMP', <Type.VARBINARY: 'VARBINARY'>: 'BINARY'}
TRANSFORMS =
{<class 'sqlglot.expressions.DateAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.TsOrDsAdd'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CaseSpecificColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CheckColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ClusteredColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CollateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CommentColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DateFormatColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DefaultColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.EncodeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExternalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.HeapProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InlineLengthColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.IntervalDayToSecondSpan'>: 'DAY TO SECOND', <class 'sqlglot.expressions.IntervalYearToMonthSpan'>: 'YEAR TO MONTH', <class 'sqlglot.expressions.LanguageProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LocationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.MaterializedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NonClusteredColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NotForReplicationColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnCommitProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnUpdateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.PathColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ReturnsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SettingsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.StabilityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TemporaryProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ToTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransientProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TitleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.UppercaseColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.VarMap'>: <function var_map_sql>, <class 'sqlglot.expressions.VolatileProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Group'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Select'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Property'>: <function _property_sql>, <class 'sqlglot.expressions.AnyValue'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxDistinct'>: <function approx_count_distinct_sql>, <class 'sqlglot.expressions.ArrayConcat'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArrayJoin'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArraySize'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArraySort'>: <function _array_sort_sql>, <class 'sqlglot.expressions.With'>: <function no_recursive_cte_sql>, <class 'sqlglot.expressions.DateDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.DateStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.DateSub'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.DiToDate'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FileFormatProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FromBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.If'>: <function if_sql>, <class 'sqlglot.expressions.ILike'>: <function no_ilike_sql>, <class 'sqlglot.expressions.IsNan'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtract'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONFormat'>: <function _json_format_sql>, <class 'sqlglot.expressions.Left'>: <function left_to_substring_sql>, <class 'sqlglot.expressions.Map'>: <function var_map_sql>, <class 'sqlglot.expressions.Max'>: <function max_or_greatest>, <class 'sqlglot.expressions.MD5Digest'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Min'>: <function min_or_least>, <class 'sqlglot.expressions.MonthsBetween'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotNullColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Create'>: <function create_with_partitions_sql>, <class 'sqlglot.expressions.Quantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxQuantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.RegexpExtract'>: <function regexp_extract_sql>, <class 'sqlglot.expressions.RegexpReplace'>: <function regexp_replace_sql>, <class 'sqlglot.expressions.RegexpLike'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.RegexpSplit'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Right'>: <function right_to_substring_sql>, <class 'sqlglot.expressions.SafeDivide'>: <function no_safe_divide_sql>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SetAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Split'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.StrPosition'>: <function strposition_to_locate_sql>, <class 'sqlglot.expressions.StrToDate'>: <function _str_to_date_sql>, <class 'sqlglot.expressions.StrToTime'>: <function _str_to_time_sql>, <class 'sqlglot.expressions.StrToUnix'>: <function _str_to_unix_sql>, <class 'sqlglot.expressions.StructExtract'>: <function struct_extract_sql>, <class 'sqlglot.expressions.TimeStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToTime'>: <function timestrtotime_sql>, <class 'sqlglot.expressions.TimeStrToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToStr'>: <function _time_to_str>, <class 'sqlglot.expressions.TimeToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ToBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _to_date_sql>, <class 'sqlglot.expressions.TryCast'>: <function no_trycast_sql>, <class 'sqlglot.expressions.UnixToStr'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.UnixToTime'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.PartitionedByProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SerdeProperties'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NumberToStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.LastDateOfMonth'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.National'>: <function Hive.Generator.<lambda>>}
PROPERTIES_LOCATION =
{<class 'sqlglot.expressions.AlgorithmProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.AutoIncrementProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.BlockCompressionProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CharacterSetProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ChecksumProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CollateProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Cluster'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ClusteredByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DataBlocksizeProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.DefinerProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.DictRange'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DictProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistStyleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.EngineProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExternalProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.FallbackProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.FileFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.FreespaceProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.HeapProperty'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.IsolatedLoadingProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.JournalProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.LanguageProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LikeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LocationProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LockingProperty'>: <Location.POST_ALIAS: 'POST_ALIAS'>, <class 'sqlglot.expressions.LogProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.MaterializedProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeBlockRatioProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.OnProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OnCommitProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.Order'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PrimaryKey'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Property'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.ReturnsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatDelimitedProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatSerdeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SerdeProperties'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Set'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SettingsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SetProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.SortKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.StabilityProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TemporaryProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.ToTableProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TransientProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeTreeTTL'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.VolatileProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithDataProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <Location.POST_NAME: 'POST_NAME'>}
def
rowformatserdeproperty_sql(self, expression: sqlglot.expressions.RowFormatSerdeProperty) -> str:
492 def datatype_sql(self, expression: exp.DataType) -> str: 493 if ( 494 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 495 and not expression.expressions 496 ): 497 expression = exp.DataType.build("text") 498 elif expression.this in exp.DataType.TEMPORAL_TYPES: 499 expression = exp.DataType.build(expression.this) 500 elif expression.is_type("float"): 501 size_expression = expression.find(exp.DataTypeParam) 502 if size_expression: 503 size = int(size_expression.name) 504 expression = ( 505 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 506 ) 507 508 return super().datatype_sql(expression)
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
@classmethod
def
can_identify(text: str, identify: str | bool = 'safe') -> bool:
256 @classmethod 257 def can_identify(cls, text: str, identify: str | bool = "safe") -> bool: 258 """Checks if text can be identified given an identify option. 259 260 Args: 261 text: The text to check. 262 identify: 263 "always" or `True`: Always returns true. 264 "safe": True if the identifier is case-insensitive. 265 266 Returns: 267 Whether or not the given text can be identified. 268 """ 269 if identify is True or identify == "always": 270 return True 271 272 if identify == "safe": 273 return not cls.case_sensitive(text) 274 275 return False
Checks if text can be identified given an identify option.
Arguments:
- text: The text to check.
- identify: "always" or
True
: Always returns true. "safe": True if the identifier is case-insensitive.
Returns:
Whether or not the given text can be identified.
TOKENIZER_CLASS =
<class 'sqlglot.dialects.hive.Hive.Tokenizer'>
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_ADD_COLUMN_KEYWORD
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- SENTINEL_LINE_BREAK
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- ESCAPE_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- normalize_functions
- unsupported_messages
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- safebracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- safeconcat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonobject_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- safedpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql