sqlglot.dialects.hive
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 approx_count_distinct_sql, 9 create_with_partitions_sql, 10 format_time_lambda, 11 if_sql, 12 left_to_substring_sql, 13 locate_to_strposition, 14 max_or_greatest, 15 min_or_least, 16 no_ilike_sql, 17 no_recursive_cte_sql, 18 no_safe_divide_sql, 19 no_trycast_sql, 20 rename_func, 21 right_to_substring_sql, 22 strposition_to_locate_sql, 23 struct_extract_sql, 24 timestrtotime_sql, 25 var_map_sql, 26) 27from sqlglot.helper import seq_get 28from sqlglot.parser import parse_var_map 29from sqlglot.tokens import TokenType 30 31# (FuncType, Multiplier) 32DATE_DELTA_INTERVAL = { 33 "YEAR": ("ADD_MONTHS", 12), 34 "MONTH": ("ADD_MONTHS", 1), 35 "QUARTER": ("ADD_MONTHS", 3), 36 "WEEK": ("DATE_ADD", 7), 37 "DAY": ("DATE_ADD", 1), 38} 39 40TIME_DIFF_FACTOR = { 41 "MILLISECOND": " * 1000", 42 "SECOND": "", 43 "MINUTE": " / 60", 44 "HOUR": " / 3600", 45} 46 47DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH") 48 49 50def _add_date_sql(self: generator.Generator, expression: exp.DateAdd | exp.DateSub) -> str: 51 unit = expression.text("unit").upper() 52 func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1)) 53 54 if isinstance(expression, exp.DateSub): 55 multiplier *= -1 56 57 if expression.expression.is_number: 58 modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier) 59 else: 60 modified_increment = expression.expression 61 if multiplier != 1: 62 modified_increment = exp.Mul( # type: ignore 63 this=modified_increment, expression=exp.Literal.number(multiplier) 64 ) 65 66 return self.func(func, expression.this, modified_increment) 67 68 69def _date_diff_sql(self: generator.Generator, expression: exp.DateDiff) -> str: 70 unit = expression.text("unit").upper() 71 72 factor = TIME_DIFF_FACTOR.get(unit) 73 if factor is not None: 74 left = self.sql(expression, "this") 75 right = self.sql(expression, "expression") 76 sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})" 77 return f"({sec_diff}){factor}" if factor else sec_diff 78 79 sql_func = "MONTHS_BETWEEN" if unit in DIFF_MONTH_SWITCH else "DATEDIFF" 80 _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1)) 81 multiplier_sql = f" / {multiplier}" if multiplier > 1 else "" 82 diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})" 83 84 return f"{diff_sql}{multiplier_sql}" 85 86 87def _json_format_sql(self: generator.Generator, expression: exp.JSONFormat) -> str: 88 this = expression.this 89 if isinstance(this, exp.Cast) and this.is_type("json") and this.this.is_string: 90 # Since FROM_JSON requires a nested type, we always wrap the json string with 91 # an array to ensure that "naked" strings like "'a'" will be handled correctly 92 wrapped_json = exp.Literal.string(f"[{this.this.name}]") 93 94 from_json = self.func("FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json)) 95 to_json = self.func("TO_JSON", from_json) 96 97 # This strips the [, ] delimiters of the dummy array printed by TO_JSON 98 return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1") 99 100 return self.func("TO_JSON", this, expression.args.get("options")) 101 102 103def _array_sort_sql(self: generator.Generator, expression: exp.ArraySort) -> str: 104 if expression.expression: 105 self.unsupported("Hive SORT_ARRAY does not support a comparator") 106 return f"SORT_ARRAY({self.sql(expression, 'this')})" 107 108 109def _property_sql(self: generator.Generator, expression: exp.Property) -> str: 110 return f"'{expression.name}'={self.sql(expression, 'value')}" 111 112 113def _str_to_unix_sql(self: generator.Generator, expression: exp.StrToUnix) -> str: 114 return self.func("UNIX_TIMESTAMP", expression.this, _time_format(self, expression)) 115 116 117def _str_to_date_sql(self: generator.Generator, expression: exp.StrToDate) -> str: 118 this = self.sql(expression, "this") 119 time_format = self.format_time(expression) 120 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 121 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 122 return f"CAST({this} AS DATE)" 123 124 125def _str_to_time_sql(self: generator.Generator, expression: exp.StrToTime) -> str: 126 this = self.sql(expression, "this") 127 time_format = self.format_time(expression) 128 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 129 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 130 return f"CAST({this} AS TIMESTAMP)" 131 132 133def _time_format( 134 self: generator.Generator, expression: exp.UnixToStr | exp.StrToUnix 135) -> t.Optional[str]: 136 time_format = self.format_time(expression) 137 if time_format == Hive.TIME_FORMAT: 138 return None 139 return time_format 140 141 142def _time_to_str(self: generator.Generator, expression: exp.TimeToStr) -> str: 143 this = self.sql(expression, "this") 144 time_format = self.format_time(expression) 145 return f"DATE_FORMAT({this}, {time_format})" 146 147 148def _to_date_sql(self: generator.Generator, expression: exp.TsOrDsToDate) -> str: 149 this = self.sql(expression, "this") 150 time_format = self.format_time(expression) 151 if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 152 return f"TO_DATE({this}, {time_format})" 153 return f"TO_DATE({this})" 154 155 156class Hive(Dialect): 157 ALIAS_POST_TABLESAMPLE = True 158 IDENTIFIERS_CAN_START_WITH_DIGIT = True 159 160 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 161 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 162 163 TIME_MAPPING = { 164 "y": "%Y", 165 "Y": "%Y", 166 "YYYY": "%Y", 167 "yyyy": "%Y", 168 "YY": "%y", 169 "yy": "%y", 170 "MMMM": "%B", 171 "MMM": "%b", 172 "MM": "%m", 173 "M": "%-m", 174 "dd": "%d", 175 "d": "%-d", 176 "HH": "%H", 177 "H": "%-H", 178 "hh": "%I", 179 "h": "%-I", 180 "mm": "%M", 181 "m": "%-M", 182 "ss": "%S", 183 "s": "%-S", 184 "SSSSSS": "%f", 185 "a": "%p", 186 "DD": "%j", 187 "D": "%-j", 188 "E": "%a", 189 "EE": "%a", 190 "EEE": "%a", 191 "EEEE": "%A", 192 } 193 194 DATE_FORMAT = "'yyyy-MM-dd'" 195 DATEINT_FORMAT = "'yyyyMMdd'" 196 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 197 198 class Tokenizer(tokens.Tokenizer): 199 QUOTES = ["'", '"'] 200 IDENTIFIERS = ["`"] 201 STRING_ESCAPES = ["\\"] 202 ENCODE = "utf-8" 203 204 KEYWORDS = { 205 **tokens.Tokenizer.KEYWORDS, 206 "ADD ARCHIVE": TokenType.COMMAND, 207 "ADD ARCHIVES": TokenType.COMMAND, 208 "ADD FILE": TokenType.COMMAND, 209 "ADD FILES": TokenType.COMMAND, 210 "ADD JAR": TokenType.COMMAND, 211 "ADD JARS": TokenType.COMMAND, 212 "MSCK REPAIR": TokenType.COMMAND, 213 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 214 } 215 216 NUMERIC_LITERALS = { 217 "L": "BIGINT", 218 "S": "SMALLINT", 219 "Y": "TINYINT", 220 "D": "DOUBLE", 221 "F": "FLOAT", 222 "BD": "DECIMAL", 223 } 224 225 class Parser(parser.Parser): 226 LOG_DEFAULTS_TO_LN = True 227 STRICT_CAST = False 228 229 FUNCTIONS = { 230 **parser.Parser.FUNCTIONS, 231 "BASE64": exp.ToBase64.from_arg_list, 232 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 233 "DATE_ADD": lambda args: exp.TsOrDsAdd( 234 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 235 ), 236 "DATEDIFF": lambda args: exp.DateDiff( 237 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 238 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 239 ), 240 "DATE_SUB": lambda args: exp.TsOrDsAdd( 241 this=seq_get(args, 0), 242 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 243 unit=exp.Literal.string("DAY"), 244 ), 245 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 246 [ 247 exp.TimeStrToTime(this=seq_get(args, 0)), 248 seq_get(args, 1), 249 ] 250 ), 251 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 252 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 253 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 254 "LOCATE": locate_to_strposition, 255 "MAP": parse_var_map, 256 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 257 "PERCENTILE": exp.Quantile.from_arg_list, 258 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 259 "COLLECT_SET": exp.SetAgg.from_arg_list, 260 "SIZE": exp.ArraySize.from_arg_list, 261 "SPLIT": exp.RegexpSplit.from_arg_list, 262 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 263 "TO_JSON": exp.JSONFormat.from_arg_list, 264 "UNBASE64": exp.FromBase64.from_arg_list, 265 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 266 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 267 } 268 269 PROPERTY_PARSERS = { 270 **parser.Parser.PROPERTY_PARSERS, 271 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 272 expressions=self._parse_wrapped_csv(self._parse_property) 273 ), 274 } 275 276 QUERY_MODIFIER_PARSERS = { 277 **parser.Parser.QUERY_MODIFIER_PARSERS, 278 "cluster": lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 279 "distribute": lambda self: self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 280 "sort": lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 281 } 282 283 def _parse_types( 284 self, check_func: bool = False, schema: bool = False 285 ) -> t.Optional[exp.Expression]: 286 """ 287 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 288 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 289 290 spark-sql (default)> select cast(1234 as varchar(2)); 291 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 292 char/varchar type and simply treats them as string type. Please use string type 293 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 294 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 295 296 1234 297 Time taken: 4.265 seconds, Fetched 1 row(s) 298 299 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 300 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 301 302 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 303 """ 304 this = super()._parse_types(check_func=check_func, schema=schema) 305 306 if this and not schema: 307 return this.transform( 308 lambda node: node.replace(exp.DataType.build("text")) 309 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 310 else node, 311 copy=False, 312 ) 313 314 return this 315 316 class Generator(generator.Generator): 317 LIMIT_FETCH = "LIMIT" 318 TABLESAMPLE_WITH_METHOD = False 319 TABLESAMPLE_SIZE_IS_PERCENT = True 320 JOIN_HINTS = False 321 TABLE_HINTS = False 322 INDEX_ON = "ON TABLE" 323 324 TYPE_MAPPING = { 325 **generator.Generator.TYPE_MAPPING, 326 exp.DataType.Type.TEXT: "STRING", 327 exp.DataType.Type.DATETIME: "TIMESTAMP", 328 exp.DataType.Type.VARBINARY: "BINARY", 329 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 330 exp.DataType.Type.BIT: "BOOLEAN", 331 } 332 333 TRANSFORMS = { 334 **generator.Generator.TRANSFORMS, 335 exp.Group: transforms.preprocess([transforms.unalias_group]), 336 exp.Select: transforms.preprocess( 337 [ 338 transforms.eliminate_qualify, 339 transforms.eliminate_distinct_on, 340 transforms.unnest_to_explode, 341 ] 342 ), 343 exp.Property: _property_sql, 344 exp.ApproxDistinct: approx_count_distinct_sql, 345 exp.ArrayConcat: rename_func("CONCAT"), 346 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 347 exp.ArraySize: rename_func("SIZE"), 348 exp.ArraySort: _array_sort_sql, 349 exp.With: no_recursive_cte_sql, 350 exp.DateAdd: _add_date_sql, 351 exp.DateDiff: _date_diff_sql, 352 exp.DateStrToDate: rename_func("TO_DATE"), 353 exp.DateSub: _add_date_sql, 354 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 355 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 356 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 357 exp.FromBase64: rename_func("UNBASE64"), 358 exp.If: if_sql, 359 exp.ILike: no_ilike_sql, 360 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 361 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 362 exp.JSONFormat: _json_format_sql, 363 exp.Left: left_to_substring_sql, 364 exp.Map: var_map_sql, 365 exp.Max: max_or_greatest, 366 exp.Min: min_or_least, 367 exp.VarMap: var_map_sql, 368 exp.Create: create_with_partitions_sql, 369 exp.Quantile: rename_func("PERCENTILE"), 370 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 371 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 372 exp.RegexpSplit: rename_func("SPLIT"), 373 exp.Right: right_to_substring_sql, 374 exp.SafeDivide: no_safe_divide_sql, 375 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 376 exp.SetAgg: rename_func("COLLECT_SET"), 377 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 378 exp.StrPosition: strposition_to_locate_sql, 379 exp.StrToDate: _str_to_date_sql, 380 exp.StrToTime: _str_to_time_sql, 381 exp.StrToUnix: _str_to_unix_sql, 382 exp.StructExtract: struct_extract_sql, 383 exp.TimeStrToDate: rename_func("TO_DATE"), 384 exp.TimeStrToTime: timestrtotime_sql, 385 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 386 exp.TimeToStr: _time_to_str, 387 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 388 exp.ToBase64: rename_func("BASE64"), 389 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 390 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 391 exp.TsOrDsToDate: _to_date_sql, 392 exp.TryCast: no_trycast_sql, 393 exp.UnixToStr: lambda self, e: self.func( 394 "FROM_UNIXTIME", e.this, _time_format(self, e) 395 ), 396 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 397 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 398 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 399 exp.RowFormatSerdeProperty: lambda self, e: f"ROW FORMAT SERDE {self.sql(e, 'this')}", 400 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 401 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 402 exp.LastDateOfMonth: rename_func("LAST_DAY"), 403 exp.National: lambda self, e: self.national_sql(e, prefix=""), 404 } 405 406 PROPERTIES_LOCATION = { 407 **generator.Generator.PROPERTIES_LOCATION, 408 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 409 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 410 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 411 } 412 413 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 414 return self.func( 415 "COLLECT_LIST", 416 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 417 ) 418 419 def with_properties(self, properties: exp.Properties) -> str: 420 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 421 422 def datatype_sql(self, expression: exp.DataType) -> str: 423 if ( 424 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 425 and not expression.expressions 426 ): 427 expression = exp.DataType.build("text") 428 elif expression.this in exp.DataType.TEMPORAL_TYPES: 429 expression = exp.DataType.build(expression.this) 430 431 return super().datatype_sql(expression) 432 433 def after_having_modifiers(self, expression: exp.Expression) -> t.List[str]: 434 return super().after_having_modifiers(expression) + [ 435 self.sql(expression, "distribute"), 436 self.sql(expression, "sort"), 437 self.sql(expression, "cluster"), 438 ]
DATE_DELTA_INTERVAL =
{'YEAR': ('ADD_MONTHS', 12), 'MONTH': ('ADD_MONTHS', 1), 'QUARTER': ('ADD_MONTHS', 3), 'WEEK': ('DATE_ADD', 7), 'DAY': ('DATE_ADD', 1)}
TIME_DIFF_FACTOR =
{'MILLISECOND': ' * 1000', 'SECOND': '', 'MINUTE': ' / 60', 'HOUR': ' / 3600'}
DIFF_MONTH_SWITCH =
('YEAR', 'QUARTER', 'MONTH')
157class Hive(Dialect): 158 ALIAS_POST_TABLESAMPLE = True 159 IDENTIFIERS_CAN_START_WITH_DIGIT = True 160 161 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 162 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 163 164 TIME_MAPPING = { 165 "y": "%Y", 166 "Y": "%Y", 167 "YYYY": "%Y", 168 "yyyy": "%Y", 169 "YY": "%y", 170 "yy": "%y", 171 "MMMM": "%B", 172 "MMM": "%b", 173 "MM": "%m", 174 "M": "%-m", 175 "dd": "%d", 176 "d": "%-d", 177 "HH": "%H", 178 "H": "%-H", 179 "hh": "%I", 180 "h": "%-I", 181 "mm": "%M", 182 "m": "%-M", 183 "ss": "%S", 184 "s": "%-S", 185 "SSSSSS": "%f", 186 "a": "%p", 187 "DD": "%j", 188 "D": "%-j", 189 "E": "%a", 190 "EE": "%a", 191 "EEE": "%a", 192 "EEEE": "%A", 193 } 194 195 DATE_FORMAT = "'yyyy-MM-dd'" 196 DATEINT_FORMAT = "'yyyyMMdd'" 197 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 198 199 class Tokenizer(tokens.Tokenizer): 200 QUOTES = ["'", '"'] 201 IDENTIFIERS = ["`"] 202 STRING_ESCAPES = ["\\"] 203 ENCODE = "utf-8" 204 205 KEYWORDS = { 206 **tokens.Tokenizer.KEYWORDS, 207 "ADD ARCHIVE": TokenType.COMMAND, 208 "ADD ARCHIVES": TokenType.COMMAND, 209 "ADD FILE": TokenType.COMMAND, 210 "ADD FILES": TokenType.COMMAND, 211 "ADD JAR": TokenType.COMMAND, 212 "ADD JARS": TokenType.COMMAND, 213 "MSCK REPAIR": TokenType.COMMAND, 214 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 215 } 216 217 NUMERIC_LITERALS = { 218 "L": "BIGINT", 219 "S": "SMALLINT", 220 "Y": "TINYINT", 221 "D": "DOUBLE", 222 "F": "FLOAT", 223 "BD": "DECIMAL", 224 } 225 226 class Parser(parser.Parser): 227 LOG_DEFAULTS_TO_LN = True 228 STRICT_CAST = False 229 230 FUNCTIONS = { 231 **parser.Parser.FUNCTIONS, 232 "BASE64": exp.ToBase64.from_arg_list, 233 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 234 "DATE_ADD": lambda args: exp.TsOrDsAdd( 235 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 236 ), 237 "DATEDIFF": lambda args: exp.DateDiff( 238 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 239 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 240 ), 241 "DATE_SUB": lambda args: exp.TsOrDsAdd( 242 this=seq_get(args, 0), 243 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 244 unit=exp.Literal.string("DAY"), 245 ), 246 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 247 [ 248 exp.TimeStrToTime(this=seq_get(args, 0)), 249 seq_get(args, 1), 250 ] 251 ), 252 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 253 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 254 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 255 "LOCATE": locate_to_strposition, 256 "MAP": parse_var_map, 257 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 258 "PERCENTILE": exp.Quantile.from_arg_list, 259 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 260 "COLLECT_SET": exp.SetAgg.from_arg_list, 261 "SIZE": exp.ArraySize.from_arg_list, 262 "SPLIT": exp.RegexpSplit.from_arg_list, 263 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 264 "TO_JSON": exp.JSONFormat.from_arg_list, 265 "UNBASE64": exp.FromBase64.from_arg_list, 266 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 267 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 268 } 269 270 PROPERTY_PARSERS = { 271 **parser.Parser.PROPERTY_PARSERS, 272 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 273 expressions=self._parse_wrapped_csv(self._parse_property) 274 ), 275 } 276 277 QUERY_MODIFIER_PARSERS = { 278 **parser.Parser.QUERY_MODIFIER_PARSERS, 279 "cluster": lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 280 "distribute": lambda self: self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 281 "sort": lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 282 } 283 284 def _parse_types( 285 self, check_func: bool = False, schema: bool = False 286 ) -> t.Optional[exp.Expression]: 287 """ 288 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 289 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 290 291 spark-sql (default)> select cast(1234 as varchar(2)); 292 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 293 char/varchar type and simply treats them as string type. Please use string type 294 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 295 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 296 297 1234 298 Time taken: 4.265 seconds, Fetched 1 row(s) 299 300 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 301 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 302 303 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 304 """ 305 this = super()._parse_types(check_func=check_func, schema=schema) 306 307 if this and not schema: 308 return this.transform( 309 lambda node: node.replace(exp.DataType.build("text")) 310 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 311 else node, 312 copy=False, 313 ) 314 315 return this 316 317 class Generator(generator.Generator): 318 LIMIT_FETCH = "LIMIT" 319 TABLESAMPLE_WITH_METHOD = False 320 TABLESAMPLE_SIZE_IS_PERCENT = True 321 JOIN_HINTS = False 322 TABLE_HINTS = False 323 INDEX_ON = "ON TABLE" 324 325 TYPE_MAPPING = { 326 **generator.Generator.TYPE_MAPPING, 327 exp.DataType.Type.TEXT: "STRING", 328 exp.DataType.Type.DATETIME: "TIMESTAMP", 329 exp.DataType.Type.VARBINARY: "BINARY", 330 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 331 exp.DataType.Type.BIT: "BOOLEAN", 332 } 333 334 TRANSFORMS = { 335 **generator.Generator.TRANSFORMS, 336 exp.Group: transforms.preprocess([transforms.unalias_group]), 337 exp.Select: transforms.preprocess( 338 [ 339 transforms.eliminate_qualify, 340 transforms.eliminate_distinct_on, 341 transforms.unnest_to_explode, 342 ] 343 ), 344 exp.Property: _property_sql, 345 exp.ApproxDistinct: approx_count_distinct_sql, 346 exp.ArrayConcat: rename_func("CONCAT"), 347 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 348 exp.ArraySize: rename_func("SIZE"), 349 exp.ArraySort: _array_sort_sql, 350 exp.With: no_recursive_cte_sql, 351 exp.DateAdd: _add_date_sql, 352 exp.DateDiff: _date_diff_sql, 353 exp.DateStrToDate: rename_func("TO_DATE"), 354 exp.DateSub: _add_date_sql, 355 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 356 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 357 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 358 exp.FromBase64: rename_func("UNBASE64"), 359 exp.If: if_sql, 360 exp.ILike: no_ilike_sql, 361 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 362 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 363 exp.JSONFormat: _json_format_sql, 364 exp.Left: left_to_substring_sql, 365 exp.Map: var_map_sql, 366 exp.Max: max_or_greatest, 367 exp.Min: min_or_least, 368 exp.VarMap: var_map_sql, 369 exp.Create: create_with_partitions_sql, 370 exp.Quantile: rename_func("PERCENTILE"), 371 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 372 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 373 exp.RegexpSplit: rename_func("SPLIT"), 374 exp.Right: right_to_substring_sql, 375 exp.SafeDivide: no_safe_divide_sql, 376 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 377 exp.SetAgg: rename_func("COLLECT_SET"), 378 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 379 exp.StrPosition: strposition_to_locate_sql, 380 exp.StrToDate: _str_to_date_sql, 381 exp.StrToTime: _str_to_time_sql, 382 exp.StrToUnix: _str_to_unix_sql, 383 exp.StructExtract: struct_extract_sql, 384 exp.TimeStrToDate: rename_func("TO_DATE"), 385 exp.TimeStrToTime: timestrtotime_sql, 386 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 387 exp.TimeToStr: _time_to_str, 388 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 389 exp.ToBase64: rename_func("BASE64"), 390 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 391 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 392 exp.TsOrDsToDate: _to_date_sql, 393 exp.TryCast: no_trycast_sql, 394 exp.UnixToStr: lambda self, e: self.func( 395 "FROM_UNIXTIME", e.this, _time_format(self, e) 396 ), 397 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 398 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 399 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 400 exp.RowFormatSerdeProperty: lambda self, e: f"ROW FORMAT SERDE {self.sql(e, 'this')}", 401 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 402 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 403 exp.LastDateOfMonth: rename_func("LAST_DAY"), 404 exp.National: lambda self, e: self.national_sql(e, prefix=""), 405 } 406 407 PROPERTIES_LOCATION = { 408 **generator.Generator.PROPERTIES_LOCATION, 409 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 410 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 411 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 412 } 413 414 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 415 return self.func( 416 "COLLECT_LIST", 417 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 418 ) 419 420 def with_properties(self, properties: exp.Properties) -> str: 421 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 422 423 def datatype_sql(self, expression: exp.DataType) -> str: 424 if ( 425 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 426 and not expression.expressions 427 ): 428 expression = exp.DataType.build("text") 429 elif expression.this in exp.DataType.TEMPORAL_TYPES: 430 expression = exp.DataType.build(expression.this) 431 432 return super().datatype_sql(expression) 433 434 def after_having_modifiers(self, expression: exp.Expression) -> t.List[str]: 435 return super().after_having_modifiers(expression) + [ 436 self.sql(expression, "distribute"), 437 self.sql(expression, "sort"), 438 self.sql(expression, "cluster"), 439 ]
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
tokenizer_class =
<class 'sqlglot.dialects.hive.Hive.Tokenizer'>
parser_class =
<class 'sqlglot.dialects.hive.Hive.Parser'>
generator_class =
<class 'sqlglot.dialects.hive.Hive.Generator'>
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
Inherited Members
199 class Tokenizer(tokens.Tokenizer): 200 QUOTES = ["'", '"'] 201 IDENTIFIERS = ["`"] 202 STRING_ESCAPES = ["\\"] 203 ENCODE = "utf-8" 204 205 KEYWORDS = { 206 **tokens.Tokenizer.KEYWORDS, 207 "ADD ARCHIVE": TokenType.COMMAND, 208 "ADD ARCHIVES": TokenType.COMMAND, 209 "ADD FILE": TokenType.COMMAND, 210 "ADD FILES": TokenType.COMMAND, 211 "ADD JAR": TokenType.COMMAND, 212 "ADD JARS": TokenType.COMMAND, 213 "MSCK REPAIR": TokenType.COMMAND, 214 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 215 } 216 217 NUMERIC_LITERALS = { 218 "L": "BIGINT", 219 "S": "SMALLINT", 220 "Y": "TINYINT", 221 "D": "DOUBLE", 222 "F": "FLOAT", 223 "BD": "DECIMAL", 224 }
KEYWORDS =
{'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'IF': <TokenType.IF: 'IF'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NEXT VALUE FOR': <TokenType.NEXT_VALUE_FOR: 'NEXT_VALUE_FOR'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.BIGINT: 'BIGINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'COPY': <TokenType.COMMAND: 'COMMAND'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'TRUNCATE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'ADD ARCHIVE': <TokenType.COMMAND: 'COMMAND'>, 'ADD ARCHIVES': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILE': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILES': <TokenType.COMMAND: 'COMMAND'>, 'ADD JAR': <TokenType.COMMAND: 'COMMAND'>, 'ADD JARS': <TokenType.COMMAND: 'COMMAND'>, 'MSCK REPAIR': <TokenType.COMMAND: 'COMMAND'>, 'WITH SERDEPROPERTIES': <TokenType.SERDE_PROPERTIES: 'SERDE_PROPERTIES'>}
226 class Parser(parser.Parser): 227 LOG_DEFAULTS_TO_LN = True 228 STRICT_CAST = False 229 230 FUNCTIONS = { 231 **parser.Parser.FUNCTIONS, 232 "BASE64": exp.ToBase64.from_arg_list, 233 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 234 "DATE_ADD": lambda args: exp.TsOrDsAdd( 235 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 236 ), 237 "DATEDIFF": lambda args: exp.DateDiff( 238 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 239 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 240 ), 241 "DATE_SUB": lambda args: exp.TsOrDsAdd( 242 this=seq_get(args, 0), 243 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 244 unit=exp.Literal.string("DAY"), 245 ), 246 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 247 [ 248 exp.TimeStrToTime(this=seq_get(args, 0)), 249 seq_get(args, 1), 250 ] 251 ), 252 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 253 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 254 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 255 "LOCATE": locate_to_strposition, 256 "MAP": parse_var_map, 257 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 258 "PERCENTILE": exp.Quantile.from_arg_list, 259 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 260 "COLLECT_SET": exp.SetAgg.from_arg_list, 261 "SIZE": exp.ArraySize.from_arg_list, 262 "SPLIT": exp.RegexpSplit.from_arg_list, 263 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 264 "TO_JSON": exp.JSONFormat.from_arg_list, 265 "UNBASE64": exp.FromBase64.from_arg_list, 266 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 267 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 268 } 269 270 PROPERTY_PARSERS = { 271 **parser.Parser.PROPERTY_PARSERS, 272 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 273 expressions=self._parse_wrapped_csv(self._parse_property) 274 ), 275 } 276 277 QUERY_MODIFIER_PARSERS = { 278 **parser.Parser.QUERY_MODIFIER_PARSERS, 279 "cluster": lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 280 "distribute": lambda self: self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 281 "sort": lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 282 } 283 284 def _parse_types( 285 self, check_func: bool = False, schema: bool = False 286 ) -> t.Optional[exp.Expression]: 287 """ 288 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 289 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 290 291 spark-sql (default)> select cast(1234 as varchar(2)); 292 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 293 char/varchar type and simply treats them as string type. Please use string type 294 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 295 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 296 297 1234 298 Time taken: 4.265 seconds, Fetched 1 row(s) 299 300 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 301 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 302 303 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 304 """ 305 this = super()._parse_types(check_func=check_func, schema=schema) 306 307 if this and not schema: 308 return this.transform( 309 lambda node: node.replace(exp.DataType.build("text")) 310 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 311 else node, 312 copy=False, 313 ) 314 315 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
FUNCTIONS =
{'ABS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Abs'>>, 'ANY_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnyValue'>>, 'APPROX_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_COUNT_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Array'>>, 'ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'ARRAY_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAll'>>, 'ARRAY_ANY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAny'>>, 'ARRAY_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContains'>>, 'FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_JOIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayJoin'>>, 'ARRAY_SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_SORT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySort'>>, 'ARRAY_SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySum'>>, 'ARRAY_UNION_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUnionAgg'>>, 'AVG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Avg'>>, 'CASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Case'>>, 'CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cast'>>, 'CAST_TO_STR_TYPE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CastToStrType'>>, 'CEIL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CEILING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'COALESCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'IFNULL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'NVL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Concat'>>, 'CONCAT_WS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ConcatWs'>>, 'COUNT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Count'>>, 'COUNT_IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'CURRENT_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDate'>>, 'CURRENT_DATETIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDatetime'>>, 'CURRENT_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTime'>>, 'CURRENT_TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTimestamp'>>, 'CURRENT_USER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentUser'>>, 'DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Date'>>, 'DATE_ADD': <function Hive.Parser.<lambda>>, 'DATEDIFF': <function Hive.Parser.<lambda>>, 'DATE_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateDiff'>>, 'DATEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATE_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateStrToDate'>>, 'DATE_SUB': <function Hive.Parser.<lambda>>, 'DATE_TO_DATE_STR': <function Parser.<lambda>>, 'DATE_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateToDi'>>, 'DATE_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateTrunc'>>, 'DATETIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeAdd'>>, 'DATETIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeDiff'>>, 'DATETIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeSub'>>, 'DATETIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeTrunc'>>, 'DAY': <function Hive.Parser.<lambda>>, 'DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAYOFMONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAY_OF_WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAYOFWEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAY_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DAYOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DECODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Decode'>>, 'DI_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DiToDate'>>, 'ENCODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Encode'>>, 'EXP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Exp'>>, 'EXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Explode'>>, 'EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Extract'>>, 'FLOOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Floor'>>, 'FROM_BASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase'>>, 'FROM_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'GENERATE_SERIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GenerateSeries'>>, 'GREATEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Greatest'>>, 'GROUP_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GroupConcat'>>, 'HEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hex'>>, 'HLL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hll'>>, 'IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'INITCAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Initcap'>>, 'JSONB_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtract'>>, 'JSONB_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtractScalar'>>, 'JSON_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtract'>>, 'JSON_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'JSON_FORMAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'J_S_O_N_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObject'>>, 'LAST_DATE_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDateOfMonth'>>, 'LEAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Least'>>, 'LEFT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Left'>>, 'LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEVENSHTEIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Levenshtein'>>, 'LN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ln'>>, 'LOG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log'>>, 'LOG10': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log10'>>, 'LOG2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log2'>>, 'LOGICAL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOLAND_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'LOGICAL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOLOR_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'LOWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'LCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'MD5': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5'>>, 'MAP': <function parse_var_map>, 'MAP_FROM_ENTRIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MapFromEntries'>>, 'MATCH_AGAINST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MatchAgainst'>>, 'MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Max'>>, 'MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Min'>>, 'MONTH': <function Hive.Parser.<lambda>>, 'NEXT_VALUE_FOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NextValueFor'>>, 'NUMBER_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NumberToStr'>>, 'NVL2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nvl2'>>, 'OPEN_J_S_O_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.OpenJSON'>>, 'PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParameterizedAgg'>>, 'PERCENTILE_CONT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileCont'>>, 'PERCENTILE_DISC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileDisc'>>, 'POSEXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Posexplode'>>, 'POWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'POW': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'RANGE_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RangeN'>>, 'READ_CSV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ReadCSV'>>, 'REDUCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Reduce'>>, 'REGEXP_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpExtract'>>, 'REGEXP_I_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpILike'>>, 'REGEXP_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpLike'>>, 'REGEXP_SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'REPEAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Repeat'>>, 'RIGHT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Right'>>, 'ROUND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Round'>>, 'ROW_NUMBER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RowNumber'>>, 'SHA': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA1': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA2'>>, 'SAFE_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeConcat'>>, 'SAFE_DIVIDE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeDivide'>>, 'SET_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'SORT_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SortArray'>>, 'SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'SQRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sqrt'>>, 'STANDARD_HASH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StandardHash'>>, 'STAR_MAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StarMap'>>, 'STDDEV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stddev'>>, 'STDDEV_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevPop'>>, 'STDDEV_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevSamp'>>, 'STR_POSITION': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrPosition'>>, 'STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToDate'>>, 'STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToTime'>>, 'STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToUnix'>>, 'STRUCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Struct'>>, 'STRUCT_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StructExtract'>>, 'SUBSTRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Substring'>>, 'SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sum'>>, 'TIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeAdd'>>, 'TIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeDiff'>>, 'TIME_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToDate'>>, 'TIME_STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToTime'>>, 'TIME_STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToUnix'>>, 'TIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeSub'>>, 'TIME_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToStr'>>, 'TIME_TO_TIME_STR': <function Parser.<lambda>>, 'TIME_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToUnix'>>, 'TIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeTrunc'>>, 'TIMESTAMP_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampAdd'>>, 'TIMESTAMP_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampSub'>>, 'TIMESTAMP_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampTrunc'>>, 'TO_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'TO_CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToChar'>>, 'TRIM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Trim'>>, 'TRY_CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TryCast'>>, 'TS_OR_DI_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDiToDi'>>, 'TS_OR_DS_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsAdd'>>, 'TS_OR_DS_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToDate'>>, 'TS_OR_DS_TO_DATE_STR': <function Parser.<lambda>>, 'UNHEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Unhex'>>, 'UNIX_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToStr'>>, 'UNIX_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTime'>>, 'UNIX_TO_TIME_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTimeStr'>>, 'UPPER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'UCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'VAR_MAP': <function parse_var_map>, 'VARIANCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VAR_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'VAR_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Week'>>, 'WEEK_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WEEKOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WHEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.When'>>, 'X_M_L_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.XMLTable'>>, 'YEAR': <function Hive.Parser.<lambda>>, 'GLOB': <function Parser.<lambda>>, 'LIKE': <function parse_like>, 'BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'COLLECT_LIST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'DATE_FORMAT': <function Hive.Parser.<lambda>>, 'FROM_UNIXTIME': <function format_time_lambda.<locals>._format_time>, 'GET_JSON_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'LOCATE': <function locate_to_strposition>, 'PERCENTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'PERCENTILE_APPROX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'COLLECT_SET': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'TO_DATE': <function format_time_lambda.<locals>._format_time>, 'TO_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'UNBASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'UNIX_TIMESTAMP': <function format_time_lambda.<locals>._format_time>}
PROPERTY_PARSERS =
{'ALGORITHM': <function Parser.<lambda>>, 'AUTO_INCREMENT': <function Parser.<lambda>>, 'BLOCKCOMPRESSION': <function Parser.<lambda>>, 'CHARACTER SET': <function Parser.<lambda>>, 'CHECKSUM': <function Parser.<lambda>>, 'CLUSTER BY': <function Parser.<lambda>>, 'CLUSTERED': <function Parser.<lambda>>, 'COLLATE': <function Parser.<lambda>>, 'COMMENT': <function Parser.<lambda>>, 'COPY': <function Parser.<lambda>>, 'DATABLOCKSIZE': <function Parser.<lambda>>, 'DEFINER': <function Parser.<lambda>>, 'DETERMINISTIC': <function Parser.<lambda>>, 'DISTKEY': <function Parser.<lambda>>, 'DISTSTYLE': <function Parser.<lambda>>, 'ENGINE': <function Parser.<lambda>>, 'EXECUTE': <function Parser.<lambda>>, 'EXTERNAL': <function Parser.<lambda>>, 'FALLBACK': <function Parser.<lambda>>, 'FORMAT': <function Parser.<lambda>>, 'FREESPACE': <function Parser.<lambda>>, 'IMMUTABLE': <function Parser.<lambda>>, 'JOURNAL': <function Parser.<lambda>>, 'LANGUAGE': <function Parser.<lambda>>, 'LAYOUT': <function Parser.<lambda>>, 'LIFETIME': <function Parser.<lambda>>, 'LIKE': <function Parser.<lambda>>, 'LOCATION': <function Parser.<lambda>>, 'LOCK': <function Parser.<lambda>>, 'LOCKING': <function Parser.<lambda>>, 'LOG': <function Parser.<lambda>>, 'MATERIALIZED': <function Parser.<lambda>>, 'MERGEBLOCKRATIO': <function Parser.<lambda>>, 'MULTISET': <function Parser.<lambda>>, 'NO': <function Parser.<lambda>>, 'ON': <function Parser.<lambda>>, 'ORDER BY': <function Parser.<lambda>>, 'PARTITION BY': <function Parser.<lambda>>, 'PARTITIONED BY': <function Parser.<lambda>>, 'PARTITIONED_BY': <function Parser.<lambda>>, 'PRIMARY KEY': <function Parser.<lambda>>, 'RANGE': <function Parser.<lambda>>, 'RETURNS': <function Parser.<lambda>>, 'ROW': <function Parser.<lambda>>, 'ROW_FORMAT': <function Parser.<lambda>>, 'SET': <function Parser.<lambda>>, 'SETTINGS': <function Parser.<lambda>>, 'SORTKEY': <function Parser.<lambda>>, 'SOURCE': <function Parser.<lambda>>, 'STABLE': <function Parser.<lambda>>, 'STORED': <function Parser.<lambda>>, 'TBLPROPERTIES': <function Parser.<lambda>>, 'TEMP': <function Parser.<lambda>>, 'TEMPORARY': <function Parser.<lambda>>, 'TO': <function Parser.<lambda>>, 'TRANSIENT': <function Parser.<lambda>>, 'TTL': <function Parser.<lambda>>, 'USING': <function Parser.<lambda>>, 'VOLATILE': <function Parser.<lambda>>, 'WITH': <function Parser.<lambda>>, 'WITH SERDEPROPERTIES': <function Hive.Parser.<lambda>>}
QUERY_MODIFIER_PARSERS =
{'joins': <function Parser.<lambda>>, 'laterals': <function Parser.<lambda>>, 'match': <function Parser.<lambda>>, 'where': <function Parser.<lambda>>, 'group': <function Parser.<lambda>>, 'having': <function Parser.<lambda>>, 'qualify': <function Parser.<lambda>>, 'windows': <function Parser.<lambda>>, 'order': <function Parser.<lambda>>, 'limit': <function Parser.<lambda>>, 'offset': <function Parser.<lambda>>, 'locks': <function Parser.<lambda>>, 'sample': <function Parser.<lambda>>, 'cluster': <function Hive.Parser.<lambda>>, 'distribute': <function Hive.Parser.<lambda>>, 'sort': <function Hive.Parser.<lambda>>}
SET_TRIE: Dict =
{'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- TYPE_TOKENS
- SUBQUERY_PREDICATES
- RESERVED_KEYWORDS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- FUNCTIONS_WITH_ALIASED_ARGS
- FUNCTION_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KINDS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- ADD_CONSTRAINT_TOKENS
- CONCAT_NULL_OUTPUTS_STRING
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_BASE_FIRST
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NULL_ORDERING
- FORMAT_MAPPING
- error_level
- error_message_context
- max_errors
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
317 class Generator(generator.Generator): 318 LIMIT_FETCH = "LIMIT" 319 TABLESAMPLE_WITH_METHOD = False 320 TABLESAMPLE_SIZE_IS_PERCENT = True 321 JOIN_HINTS = False 322 TABLE_HINTS = False 323 INDEX_ON = "ON TABLE" 324 325 TYPE_MAPPING = { 326 **generator.Generator.TYPE_MAPPING, 327 exp.DataType.Type.TEXT: "STRING", 328 exp.DataType.Type.DATETIME: "TIMESTAMP", 329 exp.DataType.Type.VARBINARY: "BINARY", 330 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 331 exp.DataType.Type.BIT: "BOOLEAN", 332 } 333 334 TRANSFORMS = { 335 **generator.Generator.TRANSFORMS, 336 exp.Group: transforms.preprocess([transforms.unalias_group]), 337 exp.Select: transforms.preprocess( 338 [ 339 transforms.eliminate_qualify, 340 transforms.eliminate_distinct_on, 341 transforms.unnest_to_explode, 342 ] 343 ), 344 exp.Property: _property_sql, 345 exp.ApproxDistinct: approx_count_distinct_sql, 346 exp.ArrayConcat: rename_func("CONCAT"), 347 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 348 exp.ArraySize: rename_func("SIZE"), 349 exp.ArraySort: _array_sort_sql, 350 exp.With: no_recursive_cte_sql, 351 exp.DateAdd: _add_date_sql, 352 exp.DateDiff: _date_diff_sql, 353 exp.DateStrToDate: rename_func("TO_DATE"), 354 exp.DateSub: _add_date_sql, 355 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 356 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 357 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 358 exp.FromBase64: rename_func("UNBASE64"), 359 exp.If: if_sql, 360 exp.ILike: no_ilike_sql, 361 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 362 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 363 exp.JSONFormat: _json_format_sql, 364 exp.Left: left_to_substring_sql, 365 exp.Map: var_map_sql, 366 exp.Max: max_or_greatest, 367 exp.Min: min_or_least, 368 exp.VarMap: var_map_sql, 369 exp.Create: create_with_partitions_sql, 370 exp.Quantile: rename_func("PERCENTILE"), 371 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 372 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 373 exp.RegexpSplit: rename_func("SPLIT"), 374 exp.Right: right_to_substring_sql, 375 exp.SafeDivide: no_safe_divide_sql, 376 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 377 exp.SetAgg: rename_func("COLLECT_SET"), 378 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 379 exp.StrPosition: strposition_to_locate_sql, 380 exp.StrToDate: _str_to_date_sql, 381 exp.StrToTime: _str_to_time_sql, 382 exp.StrToUnix: _str_to_unix_sql, 383 exp.StructExtract: struct_extract_sql, 384 exp.TimeStrToDate: rename_func("TO_DATE"), 385 exp.TimeStrToTime: timestrtotime_sql, 386 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 387 exp.TimeToStr: _time_to_str, 388 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 389 exp.ToBase64: rename_func("BASE64"), 390 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 391 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 392 exp.TsOrDsToDate: _to_date_sql, 393 exp.TryCast: no_trycast_sql, 394 exp.UnixToStr: lambda self, e: self.func( 395 "FROM_UNIXTIME", e.this, _time_format(self, e) 396 ), 397 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 398 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 399 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 400 exp.RowFormatSerdeProperty: lambda self, e: f"ROW FORMAT SERDE {self.sql(e, 'this')}", 401 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 402 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 403 exp.LastDateOfMonth: rename_func("LAST_DAY"), 404 exp.National: lambda self, e: self.national_sql(e, prefix=""), 405 } 406 407 PROPERTIES_LOCATION = { 408 **generator.Generator.PROPERTIES_LOCATION, 409 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 410 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 411 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 412 } 413 414 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 415 return self.func( 416 "COLLECT_LIST", 417 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 418 ) 419 420 def with_properties(self, properties: exp.Properties) -> str: 421 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 422 423 def datatype_sql(self, expression: exp.DataType) -> str: 424 if ( 425 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 426 and not expression.expressions 427 ): 428 expression = exp.DataType.build("text") 429 elif expression.this in exp.DataType.TEMPORAL_TYPES: 430 expression = exp.DataType.build(expression.this) 431 432 return super().datatype_sql(expression) 433 434 def after_having_modifiers(self, expression: exp.Expression) -> t.List[str]: 435 return super().after_having_modifiers(expression) + [ 436 self.sql(expression, "distribute"), 437 self.sql(expression, "sort"), 438 self.sql(expression, "cluster"), 439 ]
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
TYPE_MAPPING =
{<Type.NCHAR: 'NCHAR'>: 'CHAR', <Type.NVARCHAR: 'NVARCHAR'>: 'VARCHAR', <Type.MEDIUMTEXT: 'MEDIUMTEXT'>: 'TEXT', <Type.LONGTEXT: 'LONGTEXT'>: 'TEXT', <Type.MEDIUMBLOB: 'MEDIUMBLOB'>: 'BLOB', <Type.LONGBLOB: 'LONGBLOB'>: 'BLOB', <Type.INET: 'INET'>: 'INET', <Type.TEXT: 'TEXT'>: 'STRING', <Type.DATETIME: 'DATETIME'>: 'TIMESTAMP', <Type.VARBINARY: 'VARBINARY'>: 'BINARY', <Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>: 'TIMESTAMP', <Type.BIT: 'BIT'>: 'BOOLEAN'}
TRANSFORMS =
{<class 'sqlglot.expressions.DateAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.TsOrDsAdd'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CaseSpecificColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CheckColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CollateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CommentColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DateFormatColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DefaultColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.EncodeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExternalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InlineLengthColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LanguageProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LocationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.MaterializedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnCommitProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnUpdateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.PathColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ReturnsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SettingsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.StabilityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TemporaryProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ToTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransientProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TitleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.UppercaseColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.VarMap'>: <function var_map_sql>, <class 'sqlglot.expressions.VolatileProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Group'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Select'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Property'>: <function _property_sql>, <class 'sqlglot.expressions.ApproxDistinct'>: <function approx_count_distinct_sql>, <class 'sqlglot.expressions.ArrayConcat'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArrayJoin'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArraySize'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArraySort'>: <function _array_sort_sql>, <class 'sqlglot.expressions.With'>: <function no_recursive_cte_sql>, <class 'sqlglot.expressions.DateDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.DateStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.DateSub'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.DiToDate'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FileFormatProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FromBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.If'>: <function if_sql>, <class 'sqlglot.expressions.ILike'>: <function no_ilike_sql>, <class 'sqlglot.expressions.JSONExtract'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONFormat'>: <function _json_format_sql>, <class 'sqlglot.expressions.Left'>: <function left_to_substring_sql>, <class 'sqlglot.expressions.Map'>: <function var_map_sql>, <class 'sqlglot.expressions.Max'>: <function max_or_greatest>, <class 'sqlglot.expressions.Min'>: <function min_or_least>, <class 'sqlglot.expressions.Create'>: <function create_with_partitions_sql>, <class 'sqlglot.expressions.Quantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxQuantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.RegexpLike'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.RegexpSplit'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Right'>: <function right_to_substring_sql>, <class 'sqlglot.expressions.SafeDivide'>: <function no_safe_divide_sql>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SetAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Split'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.StrPosition'>: <function strposition_to_locate_sql>, <class 'sqlglot.expressions.StrToDate'>: <function _str_to_date_sql>, <class 'sqlglot.expressions.StrToTime'>: <function _str_to_time_sql>, <class 'sqlglot.expressions.StrToUnix'>: <function _str_to_unix_sql>, <class 'sqlglot.expressions.StructExtract'>: <function struct_extract_sql>, <class 'sqlglot.expressions.TimeStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToTime'>: <function timestrtotime_sql>, <class 'sqlglot.expressions.TimeStrToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToStr'>: <function _time_to_str>, <class 'sqlglot.expressions.TimeToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ToBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _to_date_sql>, <class 'sqlglot.expressions.TryCast'>: <function no_trycast_sql>, <class 'sqlglot.expressions.UnixToStr'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.UnixToTime'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.PartitionedByProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.RowFormatSerdeProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SerdeProperties'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NumberToStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.LastDateOfMonth'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.National'>: <function Hive.Generator.<lambda>>}
PROPERTIES_LOCATION =
{<class 'sqlglot.expressions.AlgorithmProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.AutoIncrementProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.BlockCompressionProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CharacterSetProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ChecksumProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CollateProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Cluster'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ClusteredByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DataBlocksizeProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.DefinerProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.DictRange'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DictProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistStyleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.EngineProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExternalProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.FallbackProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.FileFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.FreespaceProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.IsolatedLoadingProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.JournalProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.LanguageProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LikeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LocationProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LockingProperty'>: <Location.POST_ALIAS: 'POST_ALIAS'>, <class 'sqlglot.expressions.LogProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.MaterializedProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeBlockRatioProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.OnCommitProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.Order'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PrimaryKey'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Property'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.ReturnsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatDelimitedProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatSerdeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SerdeProperties'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Set'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SettingsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SetProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.SortKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.StabilityProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TemporaryProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.ToTableProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TransientProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeTreeTTL'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.VolatileProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithDataProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <Location.POST_NAME: 'POST_NAME'>}
423 def datatype_sql(self, expression: exp.DataType) -> str: 424 if ( 425 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 426 and not expression.expressions 427 ): 428 expression = exp.DataType.build("text") 429 elif expression.this in exp.DataType.TEMPORAL_TYPES: 430 expression = exp.DataType.build(expression.this) 431 432 return super().datatype_sql(expression)
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
@classmethod
def
can_identify(text: str, identify: str | bool = 'safe') -> bool:
247 @classmethod 248 def can_identify(cls, text: str, identify: str | bool = "safe") -> bool: 249 """Checks if text can be identified given an identify option. 250 251 Args: 252 text: The text to check. 253 identify: 254 "always" or `True`: Always returns true. 255 "safe": True if the identifier is case-insensitive. 256 257 Returns: 258 Whether or not the given text can be identified. 259 """ 260 if identify is True or identify == "always": 261 return True 262 263 if identify == "safe": 264 return not cls.case_sensitive(text) 265 266 return False
Checks if text can be identified given an identify option.
Arguments:
- text: The text to check.
- identify: "always" or
True
: Always returns true. "safe": True if the identifier is case-insensitive.
Returns:
Whether or not the given text can be identified.
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- IS_BOOL_ALLOWED
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- SENTINEL_LINE_BREAK
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- normalize_functions
- unsupported_messages
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- rawstring_sql
- datatypesize_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- pseudotype_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- safebracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- safeconcat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonobject_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- safedpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql