sqlglot.dialects.hive
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 approx_count_distinct_sql, 9 create_with_partitions_sql, 10 format_time_lambda, 11 if_sql, 12 left_to_substring_sql, 13 locate_to_strposition, 14 max_or_greatest, 15 min_or_least, 16 no_ilike_sql, 17 no_recursive_cte_sql, 18 no_safe_divide_sql, 19 no_trycast_sql, 20 regexp_extract_sql, 21 regexp_replace_sql, 22 rename_func, 23 right_to_substring_sql, 24 strposition_to_locate_sql, 25 struct_extract_sql, 26 timestrtotime_sql, 27 var_map_sql, 28) 29from sqlglot.helper import seq_get 30from sqlglot.parser import parse_var_map 31from sqlglot.tokens import TokenType 32 33# (FuncType, Multiplier) 34DATE_DELTA_INTERVAL = { 35 "YEAR": ("ADD_MONTHS", 12), 36 "MONTH": ("ADD_MONTHS", 1), 37 "QUARTER": ("ADD_MONTHS", 3), 38 "WEEK": ("DATE_ADD", 7), 39 "DAY": ("DATE_ADD", 1), 40} 41 42TIME_DIFF_FACTOR = { 43 "MILLISECOND": " * 1000", 44 "SECOND": "", 45 "MINUTE": " / 60", 46 "HOUR": " / 3600", 47} 48 49DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH") 50 51 52def _add_date_sql(self: generator.Generator, expression: exp.DateAdd | exp.DateSub) -> str: 53 unit = expression.text("unit").upper() 54 func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1)) 55 56 if isinstance(expression, exp.DateSub): 57 multiplier *= -1 58 59 if expression.expression.is_number: 60 modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier) 61 else: 62 modified_increment = expression.expression 63 if multiplier != 1: 64 modified_increment = exp.Mul( # type: ignore 65 this=modified_increment, expression=exp.Literal.number(multiplier) 66 ) 67 68 return self.func(func, expression.this, modified_increment) 69 70 71def _date_diff_sql(self: generator.Generator, expression: exp.DateDiff) -> str: 72 unit = expression.text("unit").upper() 73 74 factor = TIME_DIFF_FACTOR.get(unit) 75 if factor is not None: 76 left = self.sql(expression, "this") 77 right = self.sql(expression, "expression") 78 sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})" 79 return f"({sec_diff}){factor}" if factor else sec_diff 80 81 sql_func = "MONTHS_BETWEEN" if unit in DIFF_MONTH_SWITCH else "DATEDIFF" 82 _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1)) 83 multiplier_sql = f" / {multiplier}" if multiplier > 1 else "" 84 diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})" 85 86 return f"{diff_sql}{multiplier_sql}" 87 88 89def _json_format_sql(self: generator.Generator, expression: exp.JSONFormat) -> str: 90 this = expression.this 91 if isinstance(this, exp.Cast) and this.is_type("json") and this.this.is_string: 92 # Since FROM_JSON requires a nested type, we always wrap the json string with 93 # an array to ensure that "naked" strings like "'a'" will be handled correctly 94 wrapped_json = exp.Literal.string(f"[{this.this.name}]") 95 96 from_json = self.func("FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json)) 97 to_json = self.func("TO_JSON", from_json) 98 99 # This strips the [, ] delimiters of the dummy array printed by TO_JSON 100 return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1") 101 102 return self.func("TO_JSON", this, expression.args.get("options")) 103 104 105def _array_sort_sql(self: generator.Generator, expression: exp.ArraySort) -> str: 106 if expression.expression: 107 self.unsupported("Hive SORT_ARRAY does not support a comparator") 108 return f"SORT_ARRAY({self.sql(expression, 'this')})" 109 110 111def _property_sql(self: generator.Generator, expression: exp.Property) -> str: 112 return f"'{expression.name}'={self.sql(expression, 'value')}" 113 114 115def _str_to_unix_sql(self: generator.Generator, expression: exp.StrToUnix) -> str: 116 return self.func("UNIX_TIMESTAMP", expression.this, _time_format(self, expression)) 117 118 119def _str_to_date_sql(self: generator.Generator, expression: exp.StrToDate) -> str: 120 this = self.sql(expression, "this") 121 time_format = self.format_time(expression) 122 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 123 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 124 return f"CAST({this} AS DATE)" 125 126 127def _str_to_time_sql(self: generator.Generator, expression: exp.StrToTime) -> str: 128 this = self.sql(expression, "this") 129 time_format = self.format_time(expression) 130 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 131 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 132 return f"CAST({this} AS TIMESTAMP)" 133 134 135def _time_format( 136 self: generator.Generator, expression: exp.UnixToStr | exp.StrToUnix 137) -> t.Optional[str]: 138 time_format = self.format_time(expression) 139 if time_format == Hive.TIME_FORMAT: 140 return None 141 return time_format 142 143 144def _time_to_str(self: generator.Generator, expression: exp.TimeToStr) -> str: 145 this = self.sql(expression, "this") 146 time_format = self.format_time(expression) 147 return f"DATE_FORMAT({this}, {time_format})" 148 149 150def _to_date_sql(self: generator.Generator, expression: exp.TsOrDsToDate) -> str: 151 this = self.sql(expression, "this") 152 time_format = self.format_time(expression) 153 if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 154 return f"TO_DATE({this}, {time_format})" 155 return f"TO_DATE({this})" 156 157 158class Hive(Dialect): 159 ALIAS_POST_TABLESAMPLE = True 160 IDENTIFIERS_CAN_START_WITH_DIGIT = True 161 162 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 163 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 164 165 TIME_MAPPING = { 166 "y": "%Y", 167 "Y": "%Y", 168 "YYYY": "%Y", 169 "yyyy": "%Y", 170 "YY": "%y", 171 "yy": "%y", 172 "MMMM": "%B", 173 "MMM": "%b", 174 "MM": "%m", 175 "M": "%-m", 176 "dd": "%d", 177 "d": "%-d", 178 "HH": "%H", 179 "H": "%-H", 180 "hh": "%I", 181 "h": "%-I", 182 "mm": "%M", 183 "m": "%-M", 184 "ss": "%S", 185 "s": "%-S", 186 "SSSSSS": "%f", 187 "a": "%p", 188 "DD": "%j", 189 "D": "%-j", 190 "E": "%a", 191 "EE": "%a", 192 "EEE": "%a", 193 "EEEE": "%A", 194 } 195 196 DATE_FORMAT = "'yyyy-MM-dd'" 197 DATEINT_FORMAT = "'yyyyMMdd'" 198 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 199 200 class Tokenizer(tokens.Tokenizer): 201 QUOTES = ["'", '"'] 202 IDENTIFIERS = ["`"] 203 STRING_ESCAPES = ["\\"] 204 ENCODE = "utf-8" 205 206 KEYWORDS = { 207 **tokens.Tokenizer.KEYWORDS, 208 "ADD ARCHIVE": TokenType.COMMAND, 209 "ADD ARCHIVES": TokenType.COMMAND, 210 "ADD FILE": TokenType.COMMAND, 211 "ADD FILES": TokenType.COMMAND, 212 "ADD JAR": TokenType.COMMAND, 213 "ADD JARS": TokenType.COMMAND, 214 "MSCK REPAIR": TokenType.COMMAND, 215 "REFRESH": TokenType.COMMAND, 216 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 217 } 218 219 NUMERIC_LITERALS = { 220 "L": "BIGINT", 221 "S": "SMALLINT", 222 "Y": "TINYINT", 223 "D": "DOUBLE", 224 "F": "FLOAT", 225 "BD": "DECIMAL", 226 } 227 228 class Parser(parser.Parser): 229 LOG_DEFAULTS_TO_LN = True 230 STRICT_CAST = False 231 232 FUNCTIONS = { 233 **parser.Parser.FUNCTIONS, 234 "BASE64": exp.ToBase64.from_arg_list, 235 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 236 "COLLECT_SET": exp.SetAgg.from_arg_list, 237 "DATE_ADD": lambda args: exp.TsOrDsAdd( 238 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 239 ), 240 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 241 [ 242 exp.TimeStrToTime(this=seq_get(args, 0)), 243 seq_get(args, 1), 244 ] 245 ), 246 "DATE_SUB": lambda args: exp.TsOrDsAdd( 247 this=seq_get(args, 0), 248 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 249 unit=exp.Literal.string("DAY"), 250 ), 251 "DATEDIFF": lambda args: exp.DateDiff( 252 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 253 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 254 ), 255 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 256 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 257 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 258 "LOCATE": locate_to_strposition, 259 "MAP": parse_var_map, 260 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 261 "PERCENTILE": exp.Quantile.from_arg_list, 262 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 263 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 264 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 265 ), 266 "SIZE": exp.ArraySize.from_arg_list, 267 "SPLIT": exp.RegexpSplit.from_arg_list, 268 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 269 "TO_JSON": exp.JSONFormat.from_arg_list, 270 "UNBASE64": exp.FromBase64.from_arg_list, 271 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 272 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 273 } 274 275 FUNCTION_PARSERS = { 276 **parser.Parser.FUNCTION_PARSERS, 277 "TRANSFORM": lambda self: self._parse_transform(), 278 } 279 280 PROPERTY_PARSERS = { 281 **parser.Parser.PROPERTY_PARSERS, 282 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 283 expressions=self._parse_wrapped_csv(self._parse_property) 284 ), 285 } 286 287 def _parse_transform(self) -> exp.Transform | exp.QueryTransform: 288 args = self._parse_csv(self._parse_lambda) 289 self._match_r_paren() 290 291 row_format_before = self._parse_row_format(match_row=True) 292 293 record_writer = None 294 if self._match_text_seq("RECORDWRITER"): 295 record_writer = self._parse_string() 296 297 if not self._match(TokenType.USING): 298 return exp.Transform.from_arg_list(args) 299 300 command_script = self._parse_string() 301 302 self._match(TokenType.ALIAS) 303 schema = self._parse_schema() 304 305 row_format_after = self._parse_row_format(match_row=True) 306 record_reader = None 307 if self._match_text_seq("RECORDREADER"): 308 record_reader = self._parse_string() 309 310 return self.expression( 311 exp.QueryTransform, 312 expressions=args, 313 command_script=command_script, 314 schema=schema, 315 row_format_before=row_format_before, 316 record_writer=record_writer, 317 row_format_after=row_format_after, 318 record_reader=record_reader, 319 ) 320 321 def _parse_types( 322 self, check_func: bool = False, schema: bool = False 323 ) -> t.Optional[exp.Expression]: 324 """ 325 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 326 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 327 328 spark-sql (default)> select cast(1234 as varchar(2)); 329 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 330 char/varchar type and simply treats them as string type. Please use string type 331 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 332 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 333 334 1234 335 Time taken: 4.265 seconds, Fetched 1 row(s) 336 337 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 338 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 339 340 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 341 """ 342 this = super()._parse_types(check_func=check_func, schema=schema) 343 344 if this and not schema: 345 return this.transform( 346 lambda node: node.replace(exp.DataType.build("text")) 347 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 348 else node, 349 copy=False, 350 ) 351 352 return this 353 354 class Generator(generator.Generator): 355 LIMIT_FETCH = "LIMIT" 356 TABLESAMPLE_WITH_METHOD = False 357 TABLESAMPLE_SIZE_IS_PERCENT = True 358 JOIN_HINTS = False 359 TABLE_HINTS = False 360 QUERY_HINTS = False 361 INDEX_ON = "ON TABLE" 362 EXTRACT_ALLOWS_QUOTES = False 363 364 TYPE_MAPPING = { 365 **generator.Generator.TYPE_MAPPING, 366 exp.DataType.Type.BIT: "BOOLEAN", 367 exp.DataType.Type.DATETIME: "TIMESTAMP", 368 exp.DataType.Type.TEXT: "STRING", 369 exp.DataType.Type.TIME: "TIMESTAMP", 370 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 371 exp.DataType.Type.VARBINARY: "BINARY", 372 } 373 374 TRANSFORMS = { 375 **generator.Generator.TRANSFORMS, 376 exp.Group: transforms.preprocess([transforms.unalias_group]), 377 exp.Select: transforms.preprocess( 378 [ 379 transforms.eliminate_qualify, 380 transforms.eliminate_distinct_on, 381 transforms.unnest_to_explode, 382 ] 383 ), 384 exp.Property: _property_sql, 385 exp.ApproxDistinct: approx_count_distinct_sql, 386 exp.ArrayConcat: rename_func("CONCAT"), 387 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 388 exp.ArraySize: rename_func("SIZE"), 389 exp.ArraySort: _array_sort_sql, 390 exp.With: no_recursive_cte_sql, 391 exp.DateAdd: _add_date_sql, 392 exp.DateDiff: _date_diff_sql, 393 exp.DateStrToDate: rename_func("TO_DATE"), 394 exp.DateSub: _add_date_sql, 395 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 396 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 397 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 398 exp.FromBase64: rename_func("UNBASE64"), 399 exp.If: if_sql, 400 exp.ILike: no_ilike_sql, 401 exp.IsNan: rename_func("ISNAN"), 402 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 403 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 404 exp.JSONFormat: _json_format_sql, 405 exp.Left: left_to_substring_sql, 406 exp.Map: var_map_sql, 407 exp.Max: max_or_greatest, 408 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 409 exp.Min: min_or_least, 410 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 411 exp.VarMap: var_map_sql, 412 exp.Create: create_with_partitions_sql, 413 exp.Quantile: rename_func("PERCENTILE"), 414 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 415 exp.RegexpExtract: regexp_extract_sql, 416 exp.RegexpReplace: regexp_replace_sql, 417 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 418 exp.RegexpSplit: rename_func("SPLIT"), 419 exp.Right: right_to_substring_sql, 420 exp.SafeDivide: no_safe_divide_sql, 421 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 422 exp.SetAgg: rename_func("COLLECT_SET"), 423 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 424 exp.StrPosition: strposition_to_locate_sql, 425 exp.StrToDate: _str_to_date_sql, 426 exp.StrToTime: _str_to_time_sql, 427 exp.StrToUnix: _str_to_unix_sql, 428 exp.StructExtract: struct_extract_sql, 429 exp.TimeStrToDate: rename_func("TO_DATE"), 430 exp.TimeStrToTime: timestrtotime_sql, 431 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 432 exp.TimeToStr: _time_to_str, 433 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 434 exp.ToBase64: rename_func("BASE64"), 435 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 436 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 437 exp.TsOrDsToDate: _to_date_sql, 438 exp.TryCast: no_trycast_sql, 439 exp.UnixToStr: lambda self, e: self.func( 440 "FROM_UNIXTIME", e.this, _time_format(self, e) 441 ), 442 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 443 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 444 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 445 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 446 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 447 exp.LastDateOfMonth: rename_func("LAST_DAY"), 448 exp.National: lambda self, e: self.national_sql(e, prefix=""), 449 } 450 451 PROPERTIES_LOCATION = { 452 **generator.Generator.PROPERTIES_LOCATION, 453 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 454 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 455 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 456 } 457 458 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 459 serde_props = self.sql(expression, "serde_properties") 460 serde_props = f" {serde_props}" if serde_props else "" 461 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 462 463 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 464 return self.func( 465 "COLLECT_LIST", 466 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 467 ) 468 469 def with_properties(self, properties: exp.Properties) -> str: 470 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 471 472 def datatype_sql(self, expression: exp.DataType) -> str: 473 if ( 474 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 475 and not expression.expressions 476 ): 477 expression = exp.DataType.build("text") 478 elif expression.this in exp.DataType.TEMPORAL_TYPES: 479 expression = exp.DataType.build(expression.this) 480 elif expression.is_type("float"): 481 size_expression = expression.find(exp.DataTypeSize) 482 if size_expression: 483 size = int(size_expression.name) 484 expression = ( 485 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 486 ) 487 488 return super().datatype_sql(expression)
DATE_DELTA_INTERVAL =
{'YEAR': ('ADD_MONTHS', 12), 'MONTH': ('ADD_MONTHS', 1), 'QUARTER': ('ADD_MONTHS', 3), 'WEEK': ('DATE_ADD', 7), 'DAY': ('DATE_ADD', 1)}
TIME_DIFF_FACTOR =
{'MILLISECOND': ' * 1000', 'SECOND': '', 'MINUTE': ' / 60', 'HOUR': ' / 3600'}
DIFF_MONTH_SWITCH =
('YEAR', 'QUARTER', 'MONTH')
159class Hive(Dialect): 160 ALIAS_POST_TABLESAMPLE = True 161 IDENTIFIERS_CAN_START_WITH_DIGIT = True 162 163 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 164 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 165 166 TIME_MAPPING = { 167 "y": "%Y", 168 "Y": "%Y", 169 "YYYY": "%Y", 170 "yyyy": "%Y", 171 "YY": "%y", 172 "yy": "%y", 173 "MMMM": "%B", 174 "MMM": "%b", 175 "MM": "%m", 176 "M": "%-m", 177 "dd": "%d", 178 "d": "%-d", 179 "HH": "%H", 180 "H": "%-H", 181 "hh": "%I", 182 "h": "%-I", 183 "mm": "%M", 184 "m": "%-M", 185 "ss": "%S", 186 "s": "%-S", 187 "SSSSSS": "%f", 188 "a": "%p", 189 "DD": "%j", 190 "D": "%-j", 191 "E": "%a", 192 "EE": "%a", 193 "EEE": "%a", 194 "EEEE": "%A", 195 } 196 197 DATE_FORMAT = "'yyyy-MM-dd'" 198 DATEINT_FORMAT = "'yyyyMMdd'" 199 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 200 201 class Tokenizer(tokens.Tokenizer): 202 QUOTES = ["'", '"'] 203 IDENTIFIERS = ["`"] 204 STRING_ESCAPES = ["\\"] 205 ENCODE = "utf-8" 206 207 KEYWORDS = { 208 **tokens.Tokenizer.KEYWORDS, 209 "ADD ARCHIVE": TokenType.COMMAND, 210 "ADD ARCHIVES": TokenType.COMMAND, 211 "ADD FILE": TokenType.COMMAND, 212 "ADD FILES": TokenType.COMMAND, 213 "ADD JAR": TokenType.COMMAND, 214 "ADD JARS": TokenType.COMMAND, 215 "MSCK REPAIR": TokenType.COMMAND, 216 "REFRESH": TokenType.COMMAND, 217 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 218 } 219 220 NUMERIC_LITERALS = { 221 "L": "BIGINT", 222 "S": "SMALLINT", 223 "Y": "TINYINT", 224 "D": "DOUBLE", 225 "F": "FLOAT", 226 "BD": "DECIMAL", 227 } 228 229 class Parser(parser.Parser): 230 LOG_DEFAULTS_TO_LN = True 231 STRICT_CAST = False 232 233 FUNCTIONS = { 234 **parser.Parser.FUNCTIONS, 235 "BASE64": exp.ToBase64.from_arg_list, 236 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 237 "COLLECT_SET": exp.SetAgg.from_arg_list, 238 "DATE_ADD": lambda args: exp.TsOrDsAdd( 239 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 240 ), 241 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 242 [ 243 exp.TimeStrToTime(this=seq_get(args, 0)), 244 seq_get(args, 1), 245 ] 246 ), 247 "DATE_SUB": lambda args: exp.TsOrDsAdd( 248 this=seq_get(args, 0), 249 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 250 unit=exp.Literal.string("DAY"), 251 ), 252 "DATEDIFF": lambda args: exp.DateDiff( 253 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 254 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 255 ), 256 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 257 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 258 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 259 "LOCATE": locate_to_strposition, 260 "MAP": parse_var_map, 261 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 262 "PERCENTILE": exp.Quantile.from_arg_list, 263 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 264 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 265 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 266 ), 267 "SIZE": exp.ArraySize.from_arg_list, 268 "SPLIT": exp.RegexpSplit.from_arg_list, 269 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 270 "TO_JSON": exp.JSONFormat.from_arg_list, 271 "UNBASE64": exp.FromBase64.from_arg_list, 272 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 273 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 274 } 275 276 FUNCTION_PARSERS = { 277 **parser.Parser.FUNCTION_PARSERS, 278 "TRANSFORM": lambda self: self._parse_transform(), 279 } 280 281 PROPERTY_PARSERS = { 282 **parser.Parser.PROPERTY_PARSERS, 283 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 284 expressions=self._parse_wrapped_csv(self._parse_property) 285 ), 286 } 287 288 def _parse_transform(self) -> exp.Transform | exp.QueryTransform: 289 args = self._parse_csv(self._parse_lambda) 290 self._match_r_paren() 291 292 row_format_before = self._parse_row_format(match_row=True) 293 294 record_writer = None 295 if self._match_text_seq("RECORDWRITER"): 296 record_writer = self._parse_string() 297 298 if not self._match(TokenType.USING): 299 return exp.Transform.from_arg_list(args) 300 301 command_script = self._parse_string() 302 303 self._match(TokenType.ALIAS) 304 schema = self._parse_schema() 305 306 row_format_after = self._parse_row_format(match_row=True) 307 record_reader = None 308 if self._match_text_seq("RECORDREADER"): 309 record_reader = self._parse_string() 310 311 return self.expression( 312 exp.QueryTransform, 313 expressions=args, 314 command_script=command_script, 315 schema=schema, 316 row_format_before=row_format_before, 317 record_writer=record_writer, 318 row_format_after=row_format_after, 319 record_reader=record_reader, 320 ) 321 322 def _parse_types( 323 self, check_func: bool = False, schema: bool = False 324 ) -> t.Optional[exp.Expression]: 325 """ 326 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 327 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 328 329 spark-sql (default)> select cast(1234 as varchar(2)); 330 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 331 char/varchar type and simply treats them as string type. Please use string type 332 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 333 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 334 335 1234 336 Time taken: 4.265 seconds, Fetched 1 row(s) 337 338 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 339 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 340 341 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 342 """ 343 this = super()._parse_types(check_func=check_func, schema=schema) 344 345 if this and not schema: 346 return this.transform( 347 lambda node: node.replace(exp.DataType.build("text")) 348 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 349 else node, 350 copy=False, 351 ) 352 353 return this 354 355 class Generator(generator.Generator): 356 LIMIT_FETCH = "LIMIT" 357 TABLESAMPLE_WITH_METHOD = False 358 TABLESAMPLE_SIZE_IS_PERCENT = True 359 JOIN_HINTS = False 360 TABLE_HINTS = False 361 QUERY_HINTS = False 362 INDEX_ON = "ON TABLE" 363 EXTRACT_ALLOWS_QUOTES = False 364 365 TYPE_MAPPING = { 366 **generator.Generator.TYPE_MAPPING, 367 exp.DataType.Type.BIT: "BOOLEAN", 368 exp.DataType.Type.DATETIME: "TIMESTAMP", 369 exp.DataType.Type.TEXT: "STRING", 370 exp.DataType.Type.TIME: "TIMESTAMP", 371 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 372 exp.DataType.Type.VARBINARY: "BINARY", 373 } 374 375 TRANSFORMS = { 376 **generator.Generator.TRANSFORMS, 377 exp.Group: transforms.preprocess([transforms.unalias_group]), 378 exp.Select: transforms.preprocess( 379 [ 380 transforms.eliminate_qualify, 381 transforms.eliminate_distinct_on, 382 transforms.unnest_to_explode, 383 ] 384 ), 385 exp.Property: _property_sql, 386 exp.ApproxDistinct: approx_count_distinct_sql, 387 exp.ArrayConcat: rename_func("CONCAT"), 388 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 389 exp.ArraySize: rename_func("SIZE"), 390 exp.ArraySort: _array_sort_sql, 391 exp.With: no_recursive_cte_sql, 392 exp.DateAdd: _add_date_sql, 393 exp.DateDiff: _date_diff_sql, 394 exp.DateStrToDate: rename_func("TO_DATE"), 395 exp.DateSub: _add_date_sql, 396 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 397 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 398 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 399 exp.FromBase64: rename_func("UNBASE64"), 400 exp.If: if_sql, 401 exp.ILike: no_ilike_sql, 402 exp.IsNan: rename_func("ISNAN"), 403 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 404 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 405 exp.JSONFormat: _json_format_sql, 406 exp.Left: left_to_substring_sql, 407 exp.Map: var_map_sql, 408 exp.Max: max_or_greatest, 409 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 410 exp.Min: min_or_least, 411 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 412 exp.VarMap: var_map_sql, 413 exp.Create: create_with_partitions_sql, 414 exp.Quantile: rename_func("PERCENTILE"), 415 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 416 exp.RegexpExtract: regexp_extract_sql, 417 exp.RegexpReplace: regexp_replace_sql, 418 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 419 exp.RegexpSplit: rename_func("SPLIT"), 420 exp.Right: right_to_substring_sql, 421 exp.SafeDivide: no_safe_divide_sql, 422 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 423 exp.SetAgg: rename_func("COLLECT_SET"), 424 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 425 exp.StrPosition: strposition_to_locate_sql, 426 exp.StrToDate: _str_to_date_sql, 427 exp.StrToTime: _str_to_time_sql, 428 exp.StrToUnix: _str_to_unix_sql, 429 exp.StructExtract: struct_extract_sql, 430 exp.TimeStrToDate: rename_func("TO_DATE"), 431 exp.TimeStrToTime: timestrtotime_sql, 432 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 433 exp.TimeToStr: _time_to_str, 434 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 435 exp.ToBase64: rename_func("BASE64"), 436 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 437 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 438 exp.TsOrDsToDate: _to_date_sql, 439 exp.TryCast: no_trycast_sql, 440 exp.UnixToStr: lambda self, e: self.func( 441 "FROM_UNIXTIME", e.this, _time_format(self, e) 442 ), 443 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 444 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 445 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 446 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 447 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 448 exp.LastDateOfMonth: rename_func("LAST_DAY"), 449 exp.National: lambda self, e: self.national_sql(e, prefix=""), 450 } 451 452 PROPERTIES_LOCATION = { 453 **generator.Generator.PROPERTIES_LOCATION, 454 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 455 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 456 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 457 } 458 459 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 460 serde_props = self.sql(expression, "serde_properties") 461 serde_props = f" {serde_props}" if serde_props else "" 462 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 463 464 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 465 return self.func( 466 "COLLECT_LIST", 467 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 468 ) 469 470 def with_properties(self, properties: exp.Properties) -> str: 471 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 472 473 def datatype_sql(self, expression: exp.DataType) -> str: 474 if ( 475 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 476 and not expression.expressions 477 ): 478 expression = exp.DataType.build("text") 479 elif expression.this in exp.DataType.TEMPORAL_TYPES: 480 expression = exp.DataType.build(expression.this) 481 elif expression.is_type("float"): 482 size_expression = expression.find(exp.DataTypeSize) 483 if size_expression: 484 size = int(size_expression.name) 485 expression = ( 486 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 487 ) 488 489 return super().datatype_sql(expression)
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
tokenizer_class =
<class 'sqlglot.dialects.hive.Hive.Tokenizer'>
parser_class =
<class 'sqlglot.dialects.hive.Hive.Parser'>
generator_class =
<class 'sqlglot.dialects.hive.Hive.Generator'>
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
Inherited Members
- sqlglot.dialects.dialect.Dialect
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
201 class Tokenizer(tokens.Tokenizer): 202 QUOTES = ["'", '"'] 203 IDENTIFIERS = ["`"] 204 STRING_ESCAPES = ["\\"] 205 ENCODE = "utf-8" 206 207 KEYWORDS = { 208 **tokens.Tokenizer.KEYWORDS, 209 "ADD ARCHIVE": TokenType.COMMAND, 210 "ADD ARCHIVES": TokenType.COMMAND, 211 "ADD FILE": TokenType.COMMAND, 212 "ADD FILES": TokenType.COMMAND, 213 "ADD JAR": TokenType.COMMAND, 214 "ADD JARS": TokenType.COMMAND, 215 "MSCK REPAIR": TokenType.COMMAND, 216 "REFRESH": TokenType.COMMAND, 217 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 218 } 219 220 NUMERIC_LITERALS = { 221 "L": "BIGINT", 222 "S": "SMALLINT", 223 "Y": "TINYINT", 224 "D": "DOUBLE", 225 "F": "FLOAT", 226 "BD": "DECIMAL", 227 }
KEYWORDS =
{'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'IF': <TokenType.IF: 'IF'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NEXT VALUE FOR': <TokenType.NEXT_VALUE_FOR: 'NEXT_VALUE_FOR'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.BIGINT: 'BIGINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'COPY': <TokenType.COMMAND: 'COMMAND'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'TRUNCATE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'ADD ARCHIVE': <TokenType.COMMAND: 'COMMAND'>, 'ADD ARCHIVES': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILE': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILES': <TokenType.COMMAND: 'COMMAND'>, 'ADD JAR': <TokenType.COMMAND: 'COMMAND'>, 'ADD JARS': <TokenType.COMMAND: 'COMMAND'>, 'MSCK REPAIR': <TokenType.COMMAND: 'COMMAND'>, 'REFRESH': <TokenType.COMMAND: 'COMMAND'>, 'WITH SERDEPROPERTIES': <TokenType.SERDE_PROPERTIES: 'SERDE_PROPERTIES'>}
229 class Parser(parser.Parser): 230 LOG_DEFAULTS_TO_LN = True 231 STRICT_CAST = False 232 233 FUNCTIONS = { 234 **parser.Parser.FUNCTIONS, 235 "BASE64": exp.ToBase64.from_arg_list, 236 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 237 "COLLECT_SET": exp.SetAgg.from_arg_list, 238 "DATE_ADD": lambda args: exp.TsOrDsAdd( 239 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 240 ), 241 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 242 [ 243 exp.TimeStrToTime(this=seq_get(args, 0)), 244 seq_get(args, 1), 245 ] 246 ), 247 "DATE_SUB": lambda args: exp.TsOrDsAdd( 248 this=seq_get(args, 0), 249 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 250 unit=exp.Literal.string("DAY"), 251 ), 252 "DATEDIFF": lambda args: exp.DateDiff( 253 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 254 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 255 ), 256 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 257 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 258 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 259 "LOCATE": locate_to_strposition, 260 "MAP": parse_var_map, 261 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 262 "PERCENTILE": exp.Quantile.from_arg_list, 263 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 264 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 265 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 266 ), 267 "SIZE": exp.ArraySize.from_arg_list, 268 "SPLIT": exp.RegexpSplit.from_arg_list, 269 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 270 "TO_JSON": exp.JSONFormat.from_arg_list, 271 "UNBASE64": exp.FromBase64.from_arg_list, 272 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 273 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 274 } 275 276 FUNCTION_PARSERS = { 277 **parser.Parser.FUNCTION_PARSERS, 278 "TRANSFORM": lambda self: self._parse_transform(), 279 } 280 281 PROPERTY_PARSERS = { 282 **parser.Parser.PROPERTY_PARSERS, 283 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 284 expressions=self._parse_wrapped_csv(self._parse_property) 285 ), 286 } 287 288 def _parse_transform(self) -> exp.Transform | exp.QueryTransform: 289 args = self._parse_csv(self._parse_lambda) 290 self._match_r_paren() 291 292 row_format_before = self._parse_row_format(match_row=True) 293 294 record_writer = None 295 if self._match_text_seq("RECORDWRITER"): 296 record_writer = self._parse_string() 297 298 if not self._match(TokenType.USING): 299 return exp.Transform.from_arg_list(args) 300 301 command_script = self._parse_string() 302 303 self._match(TokenType.ALIAS) 304 schema = self._parse_schema() 305 306 row_format_after = self._parse_row_format(match_row=True) 307 record_reader = None 308 if self._match_text_seq("RECORDREADER"): 309 record_reader = self._parse_string() 310 311 return self.expression( 312 exp.QueryTransform, 313 expressions=args, 314 command_script=command_script, 315 schema=schema, 316 row_format_before=row_format_before, 317 record_writer=record_writer, 318 row_format_after=row_format_after, 319 record_reader=record_reader, 320 ) 321 322 def _parse_types( 323 self, check_func: bool = False, schema: bool = False 324 ) -> t.Optional[exp.Expression]: 325 """ 326 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 327 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 328 329 spark-sql (default)> select cast(1234 as varchar(2)); 330 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 331 char/varchar type and simply treats them as string type. Please use string type 332 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 333 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 334 335 1234 336 Time taken: 4.265 seconds, Fetched 1 row(s) 337 338 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 339 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 340 341 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 342 """ 343 this = super()._parse_types(check_func=check_func, schema=schema) 344 345 if this and not schema: 346 return this.transform( 347 lambda node: node.replace(exp.DataType.build("text")) 348 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 349 else node, 350 copy=False, 351 ) 352 353 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
FUNCTIONS =
{'ABS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Abs'>>, 'ANY_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnyValue'>>, 'APPROX_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_COUNT_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Array'>>, 'ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'ARRAY_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAll'>>, 'ARRAY_ANY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAny'>>, 'ARRAY_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContains'>>, 'FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_JOIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayJoin'>>, 'ARRAY_SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_SORT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySort'>>, 'ARRAY_SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySum'>>, 'ARRAY_UNION_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUnionAgg'>>, 'AVG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Avg'>>, 'CASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Case'>>, 'CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cast'>>, 'CAST_TO_STR_TYPE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CastToStrType'>>, 'CEIL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CEILING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'COALESCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'IFNULL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'NVL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Concat'>>, 'CONCAT_WS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ConcatWs'>>, 'COUNT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Count'>>, 'COUNT_IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'CURRENT_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDate'>>, 'CURRENT_DATETIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDatetime'>>, 'CURRENT_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTime'>>, 'CURRENT_TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTimestamp'>>, 'CURRENT_USER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentUser'>>, 'DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Date'>>, 'DATE_ADD': <function Hive.Parser.<lambda>>, 'DATEDIFF': <function Hive.Parser.<lambda>>, 'DATE_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateDiff'>>, 'DATEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATE_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateStrToDate'>>, 'DATE_SUB': <function Hive.Parser.<lambda>>, 'DATE_TO_DATE_STR': <function Parser.<lambda>>, 'DATE_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateToDi'>>, 'DATE_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateTrunc'>>, 'DATETIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeAdd'>>, 'DATETIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeDiff'>>, 'DATETIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeSub'>>, 'DATETIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeTrunc'>>, 'DAY': <function Hive.Parser.<lambda>>, 'DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAYOFMONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAY_OF_WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAYOFWEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAY_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DAYOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DECODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Decode'>>, 'DI_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DiToDate'>>, 'ENCODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Encode'>>, 'EXP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Exp'>>, 'EXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Explode'>>, 'EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Extract'>>, 'FLOOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Floor'>>, 'FROM_BASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase'>>, 'FROM_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'GENERATE_SERIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GenerateSeries'>>, 'GREATEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Greatest'>>, 'GROUP_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GroupConcat'>>, 'HEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hex'>>, 'HLL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hll'>>, 'IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'INITCAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Initcap'>>, 'IS_NAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'ISNAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'JSON_ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayContains'>>, 'JSONB_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtract'>>, 'JSONB_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtractScalar'>>, 'JSON_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtract'>>, 'JSON_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'JSON_FORMAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'J_S_O_N_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObject'>>, 'LAST_DATE_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDateOfMonth'>>, 'LEAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Least'>>, 'LEFT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Left'>>, 'LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEVENSHTEIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Levenshtein'>>, 'LN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ln'>>, 'LOG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log'>>, 'LOG10': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log10'>>, 'LOG2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log2'>>, 'LOGICAL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOLAND_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'LOGICAL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOLOR_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'LOWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'LCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'MD5': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5'>>, 'MD5_DIGEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5Digest'>>, 'MAP': <function parse_var_map>, 'MAP_FROM_ENTRIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MapFromEntries'>>, 'MATCH_AGAINST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MatchAgainst'>>, 'MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Max'>>, 'MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Min'>>, 'MONTH': <function Hive.Parser.<lambda>>, 'MONTHS_BETWEEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MonthsBetween'>>, 'NEXT_VALUE_FOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NextValueFor'>>, 'NUMBER_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NumberToStr'>>, 'NVL2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nvl2'>>, 'OPEN_J_S_O_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.OpenJSON'>>, 'PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParameterizedAgg'>>, 'PERCENTILE_CONT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileCont'>>, 'PERCENTILE_DISC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileDisc'>>, 'POSEXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Posexplode'>>, 'POWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'POW': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'RANGE_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RangeN'>>, 'READ_CSV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ReadCSV'>>, 'REDUCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Reduce'>>, 'REGEXP_EXTRACT': <function Hive.Parser.<lambda>>, 'REGEXP_I_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpILike'>>, 'REGEXP_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpLike'>>, 'REGEXP_REPLACE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpReplace'>>, 'REGEXP_SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'REPEAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Repeat'>>, 'RIGHT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Right'>>, 'ROUND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Round'>>, 'ROW_NUMBER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RowNumber'>>, 'SHA': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA1': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA2'>>, 'SAFE_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeConcat'>>, 'SAFE_DIVIDE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeDivide'>>, 'SET_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'SORT_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SortArray'>>, 'SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'SQRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sqrt'>>, 'STANDARD_HASH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StandardHash'>>, 'STAR_MAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StarMap'>>, 'STARTS_WITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STARTSWITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STDDEV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stddev'>>, 'STDDEV_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevPop'>>, 'STDDEV_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevSamp'>>, 'STR_POSITION': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrPosition'>>, 'STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToDate'>>, 'STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToTime'>>, 'STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToUnix'>>, 'STRUCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Struct'>>, 'STRUCT_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StructExtract'>>, 'SUBSTRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Substring'>>, 'SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sum'>>, 'TIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeAdd'>>, 'TIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeDiff'>>, 'TIME_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToDate'>>, 'TIME_STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToTime'>>, 'TIME_STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToUnix'>>, 'TIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeSub'>>, 'TIME_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToStr'>>, 'TIME_TO_TIME_STR': <function Parser.<lambda>>, 'TIME_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToUnix'>>, 'TIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeTrunc'>>, 'TIMESTAMP_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampAdd'>>, 'TIMESTAMP_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampSub'>>, 'TIMESTAMP_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampTrunc'>>, 'TO_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'TO_CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToChar'>>, 'TRANSFORM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Transform'>>, 'TRIM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Trim'>>, 'TRY_CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TryCast'>>, 'TS_OR_DI_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDiToDi'>>, 'TS_OR_DS_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsAdd'>>, 'TS_OR_DS_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToDate'>>, 'TS_OR_DS_TO_DATE_STR': <function Parser.<lambda>>, 'UNHEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Unhex'>>, 'UNIX_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToStr'>>, 'UNIX_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTime'>>, 'UNIX_TO_TIME_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTimeStr'>>, 'UPPER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'UCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'VAR_MAP': <function parse_var_map>, 'VARIANCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VAR_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'VAR_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Week'>>, 'WEEK_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WEEKOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WHEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.When'>>, 'X_M_L_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.XMLTable'>>, 'XOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Xor'>>, 'YEAR': <function Hive.Parser.<lambda>>, 'GLOB': <function Parser.<lambda>>, 'LIKE': <function parse_like>, 'BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'COLLECT_LIST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'COLLECT_SET': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'DATE_FORMAT': <function Hive.Parser.<lambda>>, 'FROM_UNIXTIME': <function format_time_lambda.<locals>._format_time>, 'GET_JSON_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'LOCATE': <function locate_to_strposition>, 'PERCENTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'PERCENTILE_APPROX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'TO_DATE': <function format_time_lambda.<locals>._format_time>, 'TO_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'UNBASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'UNIX_TIMESTAMP': <function format_time_lambda.<locals>._format_time>}
FUNCTION_PARSERS =
{'ANY_VALUE': <function Parser.<lambda>>, 'CAST': <function Parser.<lambda>>, 'CONCAT': <function Parser.<lambda>>, 'CONVERT': <function Parser.<lambda>>, 'DECODE': <function Parser.<lambda>>, 'EXTRACT': <function Parser.<lambda>>, 'JSON_OBJECT': <function Parser.<lambda>>, 'LOG': <function Parser.<lambda>>, 'MATCH': <function Parser.<lambda>>, 'OPENJSON': <function Parser.<lambda>>, 'POSITION': <function Parser.<lambda>>, 'SAFE_CAST': <function Parser.<lambda>>, 'STRING_AGG': <function Parser.<lambda>>, 'SUBSTRING': <function Parser.<lambda>>, 'TRIM': <function Parser.<lambda>>, 'TRY_CAST': <function Parser.<lambda>>, 'TRY_CONVERT': <function Parser.<lambda>>, 'TRANSFORM': <function Hive.Parser.<lambda>>}
PROPERTY_PARSERS =
{'ALGORITHM': <function Parser.<lambda>>, 'AUTO_INCREMENT': <function Parser.<lambda>>, 'BLOCKCOMPRESSION': <function Parser.<lambda>>, 'CHARACTER SET': <function Parser.<lambda>>, 'CHECKSUM': <function Parser.<lambda>>, 'CLUSTER BY': <function Parser.<lambda>>, 'CLUSTERED': <function Parser.<lambda>>, 'COLLATE': <function Parser.<lambda>>, 'COMMENT': <function Parser.<lambda>>, 'COPY': <function Parser.<lambda>>, 'DATABLOCKSIZE': <function Parser.<lambda>>, 'DEFINER': <function Parser.<lambda>>, 'DETERMINISTIC': <function Parser.<lambda>>, 'DISTKEY': <function Parser.<lambda>>, 'DISTSTYLE': <function Parser.<lambda>>, 'ENGINE': <function Parser.<lambda>>, 'EXECUTE': <function Parser.<lambda>>, 'EXTERNAL': <function Parser.<lambda>>, 'FALLBACK': <function Parser.<lambda>>, 'FORMAT': <function Parser.<lambda>>, 'FREESPACE': <function Parser.<lambda>>, 'HEAP': <function Parser.<lambda>>, 'IMMUTABLE': <function Parser.<lambda>>, 'JOURNAL': <function Parser.<lambda>>, 'LANGUAGE': <function Parser.<lambda>>, 'LAYOUT': <function Parser.<lambda>>, 'LIFETIME': <function Parser.<lambda>>, 'LIKE': <function Parser.<lambda>>, 'LOCATION': <function Parser.<lambda>>, 'LOCK': <function Parser.<lambda>>, 'LOCKING': <function Parser.<lambda>>, 'LOG': <function Parser.<lambda>>, 'MATERIALIZED': <function Parser.<lambda>>, 'MERGEBLOCKRATIO': <function Parser.<lambda>>, 'MULTISET': <function Parser.<lambda>>, 'NO': <function Parser.<lambda>>, 'ON': <function Parser.<lambda>>, 'ORDER BY': <function Parser.<lambda>>, 'PARTITION BY': <function Parser.<lambda>>, 'PARTITIONED BY': <function Parser.<lambda>>, 'PARTITIONED_BY': <function Parser.<lambda>>, 'PRIMARY KEY': <function Parser.<lambda>>, 'RANGE': <function Parser.<lambda>>, 'RETURNS': <function Parser.<lambda>>, 'ROW': <function Parser.<lambda>>, 'ROW_FORMAT': <function Parser.<lambda>>, 'SET': <function Parser.<lambda>>, 'SETTINGS': <function Parser.<lambda>>, 'SORTKEY': <function Parser.<lambda>>, 'SOURCE': <function Parser.<lambda>>, 'STABLE': <function Parser.<lambda>>, 'STORED': <function Parser.<lambda>>, 'TBLPROPERTIES': <function Parser.<lambda>>, 'TEMP': <function Parser.<lambda>>, 'TEMPORARY': <function Parser.<lambda>>, 'TO': <function Parser.<lambda>>, 'TRANSIENT': <function Parser.<lambda>>, 'TTL': <function Parser.<lambda>>, 'USING': <function Parser.<lambda>>, 'VOLATILE': <function Parser.<lambda>>, 'WITH': <function Parser.<lambda>>, 'WITH SERDEPROPERTIES': <function Hive.Parser.<lambda>>}
SET_TRIE: Dict =
{'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- TYPE_TOKENS
- SUBQUERY_PREDICATES
- RESERVED_KEYWORDS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- FUNCTIONS_WITH_ALIASED_ARGS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KINDS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- ADD_CONSTRAINT_TOKENS
- CONCAT_NULL_OUTPUTS_STRING
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_BASE_FIRST
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- FORMAT_MAPPING
- error_level
- error_message_context
- max_errors
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
355 class Generator(generator.Generator): 356 LIMIT_FETCH = "LIMIT" 357 TABLESAMPLE_WITH_METHOD = False 358 TABLESAMPLE_SIZE_IS_PERCENT = True 359 JOIN_HINTS = False 360 TABLE_HINTS = False 361 QUERY_HINTS = False 362 INDEX_ON = "ON TABLE" 363 EXTRACT_ALLOWS_QUOTES = False 364 365 TYPE_MAPPING = { 366 **generator.Generator.TYPE_MAPPING, 367 exp.DataType.Type.BIT: "BOOLEAN", 368 exp.DataType.Type.DATETIME: "TIMESTAMP", 369 exp.DataType.Type.TEXT: "STRING", 370 exp.DataType.Type.TIME: "TIMESTAMP", 371 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 372 exp.DataType.Type.VARBINARY: "BINARY", 373 } 374 375 TRANSFORMS = { 376 **generator.Generator.TRANSFORMS, 377 exp.Group: transforms.preprocess([transforms.unalias_group]), 378 exp.Select: transforms.preprocess( 379 [ 380 transforms.eliminate_qualify, 381 transforms.eliminate_distinct_on, 382 transforms.unnest_to_explode, 383 ] 384 ), 385 exp.Property: _property_sql, 386 exp.ApproxDistinct: approx_count_distinct_sql, 387 exp.ArrayConcat: rename_func("CONCAT"), 388 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 389 exp.ArraySize: rename_func("SIZE"), 390 exp.ArraySort: _array_sort_sql, 391 exp.With: no_recursive_cte_sql, 392 exp.DateAdd: _add_date_sql, 393 exp.DateDiff: _date_diff_sql, 394 exp.DateStrToDate: rename_func("TO_DATE"), 395 exp.DateSub: _add_date_sql, 396 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 397 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 398 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 399 exp.FromBase64: rename_func("UNBASE64"), 400 exp.If: if_sql, 401 exp.ILike: no_ilike_sql, 402 exp.IsNan: rename_func("ISNAN"), 403 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 404 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 405 exp.JSONFormat: _json_format_sql, 406 exp.Left: left_to_substring_sql, 407 exp.Map: var_map_sql, 408 exp.Max: max_or_greatest, 409 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 410 exp.Min: min_or_least, 411 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 412 exp.VarMap: var_map_sql, 413 exp.Create: create_with_partitions_sql, 414 exp.Quantile: rename_func("PERCENTILE"), 415 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 416 exp.RegexpExtract: regexp_extract_sql, 417 exp.RegexpReplace: regexp_replace_sql, 418 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 419 exp.RegexpSplit: rename_func("SPLIT"), 420 exp.Right: right_to_substring_sql, 421 exp.SafeDivide: no_safe_divide_sql, 422 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 423 exp.SetAgg: rename_func("COLLECT_SET"), 424 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 425 exp.StrPosition: strposition_to_locate_sql, 426 exp.StrToDate: _str_to_date_sql, 427 exp.StrToTime: _str_to_time_sql, 428 exp.StrToUnix: _str_to_unix_sql, 429 exp.StructExtract: struct_extract_sql, 430 exp.TimeStrToDate: rename_func("TO_DATE"), 431 exp.TimeStrToTime: timestrtotime_sql, 432 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 433 exp.TimeToStr: _time_to_str, 434 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 435 exp.ToBase64: rename_func("BASE64"), 436 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 437 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 438 exp.TsOrDsToDate: _to_date_sql, 439 exp.TryCast: no_trycast_sql, 440 exp.UnixToStr: lambda self, e: self.func( 441 "FROM_UNIXTIME", e.this, _time_format(self, e) 442 ), 443 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 444 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 445 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 446 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 447 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 448 exp.LastDateOfMonth: rename_func("LAST_DAY"), 449 exp.National: lambda self, e: self.national_sql(e, prefix=""), 450 } 451 452 PROPERTIES_LOCATION = { 453 **generator.Generator.PROPERTIES_LOCATION, 454 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 455 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 456 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 457 } 458 459 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 460 serde_props = self.sql(expression, "serde_properties") 461 serde_props = f" {serde_props}" if serde_props else "" 462 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 463 464 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 465 return self.func( 466 "COLLECT_LIST", 467 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 468 ) 469 470 def with_properties(self, properties: exp.Properties) -> str: 471 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 472 473 def datatype_sql(self, expression: exp.DataType) -> str: 474 if ( 475 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 476 and not expression.expressions 477 ): 478 expression = exp.DataType.build("text") 479 elif expression.this in exp.DataType.TEMPORAL_TYPES: 480 expression = exp.DataType.build(expression.this) 481 elif expression.is_type("float"): 482 size_expression = expression.find(exp.DataTypeSize) 483 if size_expression: 484 size = int(size_expression.name) 485 expression = ( 486 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 487 ) 488 489 return super().datatype_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
TYPE_MAPPING =
{<Type.NCHAR: 'NCHAR'>: 'CHAR', <Type.NVARCHAR: 'NVARCHAR'>: 'VARCHAR', <Type.MEDIUMTEXT: 'MEDIUMTEXT'>: 'TEXT', <Type.LONGTEXT: 'LONGTEXT'>: 'TEXT', <Type.MEDIUMBLOB: 'MEDIUMBLOB'>: 'BLOB', <Type.LONGBLOB: 'LONGBLOB'>: 'BLOB', <Type.INET: 'INET'>: 'INET', <Type.BIT: 'BIT'>: 'BOOLEAN', <Type.DATETIME: 'DATETIME'>: 'TIMESTAMP', <Type.TEXT: 'TEXT'>: 'STRING', <Type.TIME: 'TIME'>: 'TIMESTAMP', <Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>: 'TIMESTAMP', <Type.VARBINARY: 'VARBINARY'>: 'BINARY'}
TRANSFORMS =
{<class 'sqlglot.expressions.DateAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.TsOrDsAdd'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CaseSpecificColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CheckColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CollateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CommentColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DateFormatColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DefaultColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.EncodeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExternalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.HeapProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InlineLengthColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LanguageProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LocationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.MaterializedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnCommitProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnUpdateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.PathColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ReturnsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SettingsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.StabilityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TemporaryProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ToTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransientProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TitleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.UppercaseColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.VarMap'>: <function var_map_sql>, <class 'sqlglot.expressions.VolatileProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Group'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Select'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Property'>: <function _property_sql>, <class 'sqlglot.expressions.ApproxDistinct'>: <function approx_count_distinct_sql>, <class 'sqlglot.expressions.ArrayConcat'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArrayJoin'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArraySize'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArraySort'>: <function _array_sort_sql>, <class 'sqlglot.expressions.With'>: <function no_recursive_cte_sql>, <class 'sqlglot.expressions.DateDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.DateStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.DateSub'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.DiToDate'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FileFormatProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FromBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.If'>: <function if_sql>, <class 'sqlglot.expressions.ILike'>: <function no_ilike_sql>, <class 'sqlglot.expressions.IsNan'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtract'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONFormat'>: <function _json_format_sql>, <class 'sqlglot.expressions.Left'>: <function left_to_substring_sql>, <class 'sqlglot.expressions.Map'>: <function var_map_sql>, <class 'sqlglot.expressions.Max'>: <function max_or_greatest>, <class 'sqlglot.expressions.MD5Digest'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Min'>: <function min_or_least>, <class 'sqlglot.expressions.MonthsBetween'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Create'>: <function create_with_partitions_sql>, <class 'sqlglot.expressions.Quantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxQuantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.RegexpExtract'>: <function regexp_extract_sql>, <class 'sqlglot.expressions.RegexpReplace'>: <function regexp_replace_sql>, <class 'sqlglot.expressions.RegexpLike'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.RegexpSplit'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Right'>: <function right_to_substring_sql>, <class 'sqlglot.expressions.SafeDivide'>: <function no_safe_divide_sql>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SetAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Split'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.StrPosition'>: <function strposition_to_locate_sql>, <class 'sqlglot.expressions.StrToDate'>: <function _str_to_date_sql>, <class 'sqlglot.expressions.StrToTime'>: <function _str_to_time_sql>, <class 'sqlglot.expressions.StrToUnix'>: <function _str_to_unix_sql>, <class 'sqlglot.expressions.StructExtract'>: <function struct_extract_sql>, <class 'sqlglot.expressions.TimeStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToTime'>: <function timestrtotime_sql>, <class 'sqlglot.expressions.TimeStrToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToStr'>: <function _time_to_str>, <class 'sqlglot.expressions.TimeToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ToBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _to_date_sql>, <class 'sqlglot.expressions.TryCast'>: <function no_trycast_sql>, <class 'sqlglot.expressions.UnixToStr'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.UnixToTime'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.PartitionedByProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SerdeProperties'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NumberToStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.LastDateOfMonth'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.National'>: <function Hive.Generator.<lambda>>}
PROPERTIES_LOCATION =
{<class 'sqlglot.expressions.AlgorithmProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.AutoIncrementProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.BlockCompressionProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CharacterSetProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ChecksumProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CollateProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Cluster'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ClusteredByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DataBlocksizeProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.DefinerProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.DictRange'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DictProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistStyleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.EngineProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExternalProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.FallbackProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.FileFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.FreespaceProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.HeapProperty'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.IsolatedLoadingProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.JournalProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.LanguageProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LikeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LocationProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LockingProperty'>: <Location.POST_ALIAS: 'POST_ALIAS'>, <class 'sqlglot.expressions.LogProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.MaterializedProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeBlockRatioProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.OnCommitProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.Order'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PrimaryKey'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Property'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.ReturnsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatDelimitedProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatSerdeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SerdeProperties'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Set'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SettingsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SetProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.SortKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.StabilityProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TemporaryProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.ToTableProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TransientProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeTreeTTL'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.VolatileProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithDataProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <Location.POST_NAME: 'POST_NAME'>}
def
rowformatserdeproperty_sql(self, expression: sqlglot.expressions.RowFormatSerdeProperty) -> str:
473 def datatype_sql(self, expression: exp.DataType) -> str: 474 if ( 475 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 476 and not expression.expressions 477 ): 478 expression = exp.DataType.build("text") 479 elif expression.this in exp.DataType.TEMPORAL_TYPES: 480 expression = exp.DataType.build(expression.this) 481 elif expression.is_type("float"): 482 size_expression = expression.find(exp.DataTypeSize) 483 if size_expression: 484 size = int(size_expression.name) 485 expression = ( 486 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 487 ) 488 489 return super().datatype_sql(expression)
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
@classmethod
def
can_identify(text: str, identify: str | bool = 'safe') -> bool:
253 @classmethod 254 def can_identify(cls, text: str, identify: str | bool = "safe") -> bool: 255 """Checks if text can be identified given an identify option. 256 257 Args: 258 text: The text to check. 259 identify: 260 "always" or `True`: Always returns true. 261 "safe": True if the identifier is case-insensitive. 262 263 Returns: 264 Whether or not the given text can be identified. 265 """ 266 if identify is True or identify == "always": 267 return True 268 269 if identify == "safe": 270 return not cls.case_sensitive(text) 271 272 return False
Checks if text can be identified given an identify option.
Arguments:
- text: The text to check.
- identify: "always" or
True
: Always returns true. "safe": True if the identifier is case-insensitive.
Returns:
Whether or not the given text can be identified.
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- VALUES_AS_TABLE
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- SENTINEL_LINE_BREAK
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- ESCAPE_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- normalize_functions
- unsupported_messages
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- rawstring_sql
- datatypesize_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- pseudotype_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- safebracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- safeconcat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonobject_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- safedpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql