sqlglot.dialects.hive
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 approx_count_distinct_sql, 9 create_with_partitions_sql, 10 format_time_lambda, 11 if_sql, 12 left_to_substring_sql, 13 locate_to_strposition, 14 max_or_greatest, 15 min_or_least, 16 no_ilike_sql, 17 no_recursive_cte_sql, 18 no_safe_divide_sql, 19 no_trycast_sql, 20 rename_func, 21 right_to_substring_sql, 22 strposition_to_locate_sql, 23 struct_extract_sql, 24 timestrtotime_sql, 25 var_map_sql, 26) 27from sqlglot.helper import seq_get 28from sqlglot.parser import parse_var_map 29from sqlglot.tokens import TokenType 30 31# (FuncType, Multiplier) 32DATE_DELTA_INTERVAL = { 33 "YEAR": ("ADD_MONTHS", 12), 34 "MONTH": ("ADD_MONTHS", 1), 35 "QUARTER": ("ADD_MONTHS", 3), 36 "WEEK": ("DATE_ADD", 7), 37 "DAY": ("DATE_ADD", 1), 38} 39 40TIME_DIFF_FACTOR = { 41 "MILLISECOND": " * 1000", 42 "SECOND": "", 43 "MINUTE": " / 60", 44 "HOUR": " / 3600", 45} 46 47DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH") 48 49 50def _add_date_sql(self: generator.Generator, expression: exp.DateAdd | exp.DateSub) -> str: 51 unit = expression.text("unit").upper() 52 func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1)) 53 54 if isinstance(expression, exp.DateSub): 55 multiplier *= -1 56 57 if expression.expression.is_number: 58 modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier) 59 else: 60 modified_increment = expression.expression 61 if multiplier != 1: 62 modified_increment = exp.Mul( # type: ignore 63 this=modified_increment, expression=exp.Literal.number(multiplier) 64 ) 65 66 return self.func(func, expression.this, modified_increment) 67 68 69def _date_diff_sql(self: generator.Generator, expression: exp.DateDiff) -> str: 70 unit = expression.text("unit").upper() 71 72 factor = TIME_DIFF_FACTOR.get(unit) 73 if factor is not None: 74 left = self.sql(expression, "this") 75 right = self.sql(expression, "expression") 76 sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})" 77 return f"({sec_diff}){factor}" if factor else sec_diff 78 79 sql_func = "MONTHS_BETWEEN" if unit in DIFF_MONTH_SWITCH else "DATEDIFF" 80 _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1)) 81 multiplier_sql = f" / {multiplier}" if multiplier > 1 else "" 82 diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})" 83 84 return f"{diff_sql}{multiplier_sql}" 85 86 87def _json_format_sql(self: generator.Generator, expression: exp.JSONFormat) -> str: 88 this = expression.this 89 if not this.type: 90 from sqlglot.optimizer.annotate_types import annotate_types 91 92 annotate_types(this) 93 94 if this.type.is_type("json"): 95 return self.sql(this) 96 return self.func("TO_JSON", this, expression.args.get("options")) 97 98 99def _array_sort_sql(self: generator.Generator, expression: exp.ArraySort) -> str: 100 if expression.expression: 101 self.unsupported("Hive SORT_ARRAY does not support a comparator") 102 return f"SORT_ARRAY({self.sql(expression, 'this')})" 103 104 105def _property_sql(self: generator.Generator, expression: exp.Property) -> str: 106 return f"'{expression.name}'={self.sql(expression, 'value')}" 107 108 109def _str_to_unix_sql(self: generator.Generator, expression: exp.StrToUnix) -> str: 110 return self.func("UNIX_TIMESTAMP", expression.this, _time_format(self, expression)) 111 112 113def _str_to_date_sql(self: generator.Generator, expression: exp.StrToDate) -> str: 114 this = self.sql(expression, "this") 115 time_format = self.format_time(expression) 116 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 117 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 118 return f"CAST({this} AS DATE)" 119 120 121def _str_to_time_sql(self: generator.Generator, expression: exp.StrToTime) -> str: 122 this = self.sql(expression, "this") 123 time_format = self.format_time(expression) 124 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 125 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 126 return f"CAST({this} AS TIMESTAMP)" 127 128 129def _time_format( 130 self: generator.Generator, expression: exp.UnixToStr | exp.StrToUnix 131) -> t.Optional[str]: 132 time_format = self.format_time(expression) 133 if time_format == Hive.TIME_FORMAT: 134 return None 135 return time_format 136 137 138def _time_to_str(self: generator.Generator, expression: exp.TimeToStr) -> str: 139 this = self.sql(expression, "this") 140 time_format = self.format_time(expression) 141 return f"DATE_FORMAT({this}, {time_format})" 142 143 144def _to_date_sql(self: generator.Generator, expression: exp.TsOrDsToDate) -> str: 145 this = self.sql(expression, "this") 146 time_format = self.format_time(expression) 147 if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 148 return f"TO_DATE({this}, {time_format})" 149 return f"TO_DATE({this})" 150 151 152class Hive(Dialect): 153 ALIAS_POST_TABLESAMPLE = True 154 IDENTIFIERS_CAN_START_WITH_DIGIT = True 155 156 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 157 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 158 159 TIME_MAPPING = { 160 "y": "%Y", 161 "Y": "%Y", 162 "YYYY": "%Y", 163 "yyyy": "%Y", 164 "YY": "%y", 165 "yy": "%y", 166 "MMMM": "%B", 167 "MMM": "%b", 168 "MM": "%m", 169 "M": "%-m", 170 "dd": "%d", 171 "d": "%-d", 172 "HH": "%H", 173 "H": "%-H", 174 "hh": "%I", 175 "h": "%-I", 176 "mm": "%M", 177 "m": "%-M", 178 "ss": "%S", 179 "s": "%-S", 180 "SSSSSS": "%f", 181 "a": "%p", 182 "DD": "%j", 183 "D": "%-j", 184 "E": "%a", 185 "EE": "%a", 186 "EEE": "%a", 187 "EEEE": "%A", 188 } 189 190 DATE_FORMAT = "'yyyy-MM-dd'" 191 DATEINT_FORMAT = "'yyyyMMdd'" 192 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 193 194 class Tokenizer(tokens.Tokenizer): 195 QUOTES = ["'", '"'] 196 IDENTIFIERS = ["`"] 197 STRING_ESCAPES = ["\\"] 198 ENCODE = "utf-8" 199 200 KEYWORDS = { 201 **tokens.Tokenizer.KEYWORDS, 202 "ADD ARCHIVE": TokenType.COMMAND, 203 "ADD ARCHIVES": TokenType.COMMAND, 204 "ADD FILE": TokenType.COMMAND, 205 "ADD FILES": TokenType.COMMAND, 206 "ADD JAR": TokenType.COMMAND, 207 "ADD JARS": TokenType.COMMAND, 208 "MSCK REPAIR": TokenType.COMMAND, 209 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 210 } 211 212 NUMERIC_LITERALS = { 213 "L": "BIGINT", 214 "S": "SMALLINT", 215 "Y": "TINYINT", 216 "D": "DOUBLE", 217 "F": "FLOAT", 218 "BD": "DECIMAL", 219 } 220 221 class Parser(parser.Parser): 222 LOG_DEFAULTS_TO_LN = True 223 STRICT_CAST = False 224 225 FUNCTIONS = { 226 **parser.Parser.FUNCTIONS, 227 "BASE64": exp.ToBase64.from_arg_list, 228 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 229 "DATE_ADD": lambda args: exp.TsOrDsAdd( 230 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 231 ), 232 "DATEDIFF": lambda args: exp.DateDiff( 233 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 234 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 235 ), 236 "DATE_SUB": lambda args: exp.TsOrDsAdd( 237 this=seq_get(args, 0), 238 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 239 unit=exp.Literal.string("DAY"), 240 ), 241 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 242 [ 243 exp.TimeStrToTime(this=seq_get(args, 0)), 244 seq_get(args, 1), 245 ] 246 ), 247 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 248 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 249 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 250 "LOCATE": locate_to_strposition, 251 "MAP": parse_var_map, 252 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 253 "PERCENTILE": exp.Quantile.from_arg_list, 254 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 255 "COLLECT_SET": exp.SetAgg.from_arg_list, 256 "SIZE": exp.ArraySize.from_arg_list, 257 "SPLIT": exp.RegexpSplit.from_arg_list, 258 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 259 "TO_JSON": exp.JSONFormat.from_arg_list, 260 "UNBASE64": exp.FromBase64.from_arg_list, 261 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 262 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 263 } 264 265 PROPERTY_PARSERS = { 266 **parser.Parser.PROPERTY_PARSERS, 267 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 268 expressions=self._parse_wrapped_csv(self._parse_property) 269 ), 270 } 271 272 QUERY_MODIFIER_PARSERS = { 273 **parser.Parser.QUERY_MODIFIER_PARSERS, 274 "cluster": lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 275 "distribute": lambda self: self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 276 "sort": lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 277 } 278 279 def _parse_types( 280 self, check_func: bool = False, schema: bool = False 281 ) -> t.Optional[exp.Expression]: 282 """ 283 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 284 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 285 286 spark-sql (default)> select cast(1234 as varchar(2)); 287 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 288 char/varchar type and simply treats them as string type. Please use string type 289 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 290 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 291 292 1234 293 Time taken: 4.265 seconds, Fetched 1 row(s) 294 295 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 296 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 297 298 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 299 """ 300 this = super()._parse_types(check_func=check_func, schema=schema) 301 302 if this and not schema: 303 return this.transform( 304 lambda node: node.replace(exp.DataType.build("text")) 305 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 306 else node, 307 copy=False, 308 ) 309 310 return this 311 312 class Generator(generator.Generator): 313 LIMIT_FETCH = "LIMIT" 314 TABLESAMPLE_WITH_METHOD = False 315 TABLESAMPLE_SIZE_IS_PERCENT = True 316 JOIN_HINTS = False 317 TABLE_HINTS = False 318 INDEX_ON = "ON TABLE" 319 320 TYPE_MAPPING = { 321 **generator.Generator.TYPE_MAPPING, 322 exp.DataType.Type.TEXT: "STRING", 323 exp.DataType.Type.DATETIME: "TIMESTAMP", 324 exp.DataType.Type.VARBINARY: "BINARY", 325 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 326 exp.DataType.Type.BIT: "BOOLEAN", 327 } 328 329 TRANSFORMS = { 330 **generator.Generator.TRANSFORMS, 331 exp.Group: transforms.preprocess([transforms.unalias_group]), 332 exp.Select: transforms.preprocess( 333 [ 334 transforms.eliminate_qualify, 335 transforms.eliminate_distinct_on, 336 transforms.unnest_to_explode, 337 ] 338 ), 339 exp.Property: _property_sql, 340 exp.ApproxDistinct: approx_count_distinct_sql, 341 exp.ArrayConcat: rename_func("CONCAT"), 342 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 343 exp.ArraySize: rename_func("SIZE"), 344 exp.ArraySort: _array_sort_sql, 345 exp.With: no_recursive_cte_sql, 346 exp.DateAdd: _add_date_sql, 347 exp.DateDiff: _date_diff_sql, 348 exp.DateStrToDate: rename_func("TO_DATE"), 349 exp.DateSub: _add_date_sql, 350 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 351 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 352 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 353 exp.FromBase64: rename_func("UNBASE64"), 354 exp.If: if_sql, 355 exp.ILike: no_ilike_sql, 356 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 357 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 358 exp.JSONFormat: _json_format_sql, 359 exp.Left: left_to_substring_sql, 360 exp.Map: var_map_sql, 361 exp.Max: max_or_greatest, 362 exp.Min: min_or_least, 363 exp.VarMap: var_map_sql, 364 exp.Create: create_with_partitions_sql, 365 exp.Quantile: rename_func("PERCENTILE"), 366 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 367 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 368 exp.RegexpSplit: rename_func("SPLIT"), 369 exp.Right: right_to_substring_sql, 370 exp.SafeDivide: no_safe_divide_sql, 371 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 372 exp.SetAgg: rename_func("COLLECT_SET"), 373 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 374 exp.StrPosition: strposition_to_locate_sql, 375 exp.StrToDate: _str_to_date_sql, 376 exp.StrToTime: _str_to_time_sql, 377 exp.StrToUnix: _str_to_unix_sql, 378 exp.StructExtract: struct_extract_sql, 379 exp.TimeStrToDate: rename_func("TO_DATE"), 380 exp.TimeStrToTime: timestrtotime_sql, 381 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 382 exp.TimeToStr: _time_to_str, 383 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 384 exp.ToBase64: rename_func("BASE64"), 385 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 386 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 387 exp.TsOrDsToDate: _to_date_sql, 388 exp.TryCast: no_trycast_sql, 389 exp.UnixToStr: lambda self, e: self.func( 390 "FROM_UNIXTIME", e.this, _time_format(self, e) 391 ), 392 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 393 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 394 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 395 exp.RowFormatSerdeProperty: lambda self, e: f"ROW FORMAT SERDE {self.sql(e, 'this')}", 396 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 397 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 398 exp.LastDateOfMonth: rename_func("LAST_DAY"), 399 exp.National: lambda self, e: self.national_sql(e, prefix=""), 400 } 401 402 PROPERTIES_LOCATION = { 403 **generator.Generator.PROPERTIES_LOCATION, 404 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 405 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 406 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 407 } 408 409 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 410 return self.func( 411 "COLLECT_LIST", 412 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 413 ) 414 415 def with_properties(self, properties: exp.Properties) -> str: 416 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 417 418 def datatype_sql(self, expression: exp.DataType) -> str: 419 if ( 420 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 421 and not expression.expressions 422 ): 423 expression = exp.DataType.build("text") 424 elif expression.this in exp.DataType.TEMPORAL_TYPES: 425 expression = exp.DataType.build(expression.this) 426 427 return super().datatype_sql(expression) 428 429 def after_having_modifiers(self, expression: exp.Expression) -> t.List[str]: 430 return super().after_having_modifiers(expression) + [ 431 self.sql(expression, "distribute"), 432 self.sql(expression, "sort"), 433 self.sql(expression, "cluster"), 434 ]
153class Hive(Dialect): 154 ALIAS_POST_TABLESAMPLE = True 155 IDENTIFIERS_CAN_START_WITH_DIGIT = True 156 157 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 158 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 159 160 TIME_MAPPING = { 161 "y": "%Y", 162 "Y": "%Y", 163 "YYYY": "%Y", 164 "yyyy": "%Y", 165 "YY": "%y", 166 "yy": "%y", 167 "MMMM": "%B", 168 "MMM": "%b", 169 "MM": "%m", 170 "M": "%-m", 171 "dd": "%d", 172 "d": "%-d", 173 "HH": "%H", 174 "H": "%-H", 175 "hh": "%I", 176 "h": "%-I", 177 "mm": "%M", 178 "m": "%-M", 179 "ss": "%S", 180 "s": "%-S", 181 "SSSSSS": "%f", 182 "a": "%p", 183 "DD": "%j", 184 "D": "%-j", 185 "E": "%a", 186 "EE": "%a", 187 "EEE": "%a", 188 "EEEE": "%A", 189 } 190 191 DATE_FORMAT = "'yyyy-MM-dd'" 192 DATEINT_FORMAT = "'yyyyMMdd'" 193 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 194 195 class Tokenizer(tokens.Tokenizer): 196 QUOTES = ["'", '"'] 197 IDENTIFIERS = ["`"] 198 STRING_ESCAPES = ["\\"] 199 ENCODE = "utf-8" 200 201 KEYWORDS = { 202 **tokens.Tokenizer.KEYWORDS, 203 "ADD ARCHIVE": TokenType.COMMAND, 204 "ADD ARCHIVES": TokenType.COMMAND, 205 "ADD FILE": TokenType.COMMAND, 206 "ADD FILES": TokenType.COMMAND, 207 "ADD JAR": TokenType.COMMAND, 208 "ADD JARS": TokenType.COMMAND, 209 "MSCK REPAIR": TokenType.COMMAND, 210 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 211 } 212 213 NUMERIC_LITERALS = { 214 "L": "BIGINT", 215 "S": "SMALLINT", 216 "Y": "TINYINT", 217 "D": "DOUBLE", 218 "F": "FLOAT", 219 "BD": "DECIMAL", 220 } 221 222 class Parser(parser.Parser): 223 LOG_DEFAULTS_TO_LN = True 224 STRICT_CAST = False 225 226 FUNCTIONS = { 227 **parser.Parser.FUNCTIONS, 228 "BASE64": exp.ToBase64.from_arg_list, 229 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 230 "DATE_ADD": lambda args: exp.TsOrDsAdd( 231 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 232 ), 233 "DATEDIFF": lambda args: exp.DateDiff( 234 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 235 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 236 ), 237 "DATE_SUB": lambda args: exp.TsOrDsAdd( 238 this=seq_get(args, 0), 239 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 240 unit=exp.Literal.string("DAY"), 241 ), 242 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 243 [ 244 exp.TimeStrToTime(this=seq_get(args, 0)), 245 seq_get(args, 1), 246 ] 247 ), 248 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 249 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 250 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 251 "LOCATE": locate_to_strposition, 252 "MAP": parse_var_map, 253 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 254 "PERCENTILE": exp.Quantile.from_arg_list, 255 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 256 "COLLECT_SET": exp.SetAgg.from_arg_list, 257 "SIZE": exp.ArraySize.from_arg_list, 258 "SPLIT": exp.RegexpSplit.from_arg_list, 259 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 260 "TO_JSON": exp.JSONFormat.from_arg_list, 261 "UNBASE64": exp.FromBase64.from_arg_list, 262 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 263 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 264 } 265 266 PROPERTY_PARSERS = { 267 **parser.Parser.PROPERTY_PARSERS, 268 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 269 expressions=self._parse_wrapped_csv(self._parse_property) 270 ), 271 } 272 273 QUERY_MODIFIER_PARSERS = { 274 **parser.Parser.QUERY_MODIFIER_PARSERS, 275 "cluster": lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 276 "distribute": lambda self: self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 277 "sort": lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 278 } 279 280 def _parse_types( 281 self, check_func: bool = False, schema: bool = False 282 ) -> t.Optional[exp.Expression]: 283 """ 284 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 285 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 286 287 spark-sql (default)> select cast(1234 as varchar(2)); 288 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 289 char/varchar type and simply treats them as string type. Please use string type 290 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 291 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 292 293 1234 294 Time taken: 4.265 seconds, Fetched 1 row(s) 295 296 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 297 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 298 299 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 300 """ 301 this = super()._parse_types(check_func=check_func, schema=schema) 302 303 if this and not schema: 304 return this.transform( 305 lambda node: node.replace(exp.DataType.build("text")) 306 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 307 else node, 308 copy=False, 309 ) 310 311 return this 312 313 class Generator(generator.Generator): 314 LIMIT_FETCH = "LIMIT" 315 TABLESAMPLE_WITH_METHOD = False 316 TABLESAMPLE_SIZE_IS_PERCENT = True 317 JOIN_HINTS = False 318 TABLE_HINTS = False 319 INDEX_ON = "ON TABLE" 320 321 TYPE_MAPPING = { 322 **generator.Generator.TYPE_MAPPING, 323 exp.DataType.Type.TEXT: "STRING", 324 exp.DataType.Type.DATETIME: "TIMESTAMP", 325 exp.DataType.Type.VARBINARY: "BINARY", 326 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 327 exp.DataType.Type.BIT: "BOOLEAN", 328 } 329 330 TRANSFORMS = { 331 **generator.Generator.TRANSFORMS, 332 exp.Group: transforms.preprocess([transforms.unalias_group]), 333 exp.Select: transforms.preprocess( 334 [ 335 transforms.eliminate_qualify, 336 transforms.eliminate_distinct_on, 337 transforms.unnest_to_explode, 338 ] 339 ), 340 exp.Property: _property_sql, 341 exp.ApproxDistinct: approx_count_distinct_sql, 342 exp.ArrayConcat: rename_func("CONCAT"), 343 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 344 exp.ArraySize: rename_func("SIZE"), 345 exp.ArraySort: _array_sort_sql, 346 exp.With: no_recursive_cte_sql, 347 exp.DateAdd: _add_date_sql, 348 exp.DateDiff: _date_diff_sql, 349 exp.DateStrToDate: rename_func("TO_DATE"), 350 exp.DateSub: _add_date_sql, 351 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 352 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 353 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 354 exp.FromBase64: rename_func("UNBASE64"), 355 exp.If: if_sql, 356 exp.ILike: no_ilike_sql, 357 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 358 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 359 exp.JSONFormat: _json_format_sql, 360 exp.Left: left_to_substring_sql, 361 exp.Map: var_map_sql, 362 exp.Max: max_or_greatest, 363 exp.Min: min_or_least, 364 exp.VarMap: var_map_sql, 365 exp.Create: create_with_partitions_sql, 366 exp.Quantile: rename_func("PERCENTILE"), 367 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 368 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 369 exp.RegexpSplit: rename_func("SPLIT"), 370 exp.Right: right_to_substring_sql, 371 exp.SafeDivide: no_safe_divide_sql, 372 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 373 exp.SetAgg: rename_func("COLLECT_SET"), 374 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 375 exp.StrPosition: strposition_to_locate_sql, 376 exp.StrToDate: _str_to_date_sql, 377 exp.StrToTime: _str_to_time_sql, 378 exp.StrToUnix: _str_to_unix_sql, 379 exp.StructExtract: struct_extract_sql, 380 exp.TimeStrToDate: rename_func("TO_DATE"), 381 exp.TimeStrToTime: timestrtotime_sql, 382 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 383 exp.TimeToStr: _time_to_str, 384 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 385 exp.ToBase64: rename_func("BASE64"), 386 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 387 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 388 exp.TsOrDsToDate: _to_date_sql, 389 exp.TryCast: no_trycast_sql, 390 exp.UnixToStr: lambda self, e: self.func( 391 "FROM_UNIXTIME", e.this, _time_format(self, e) 392 ), 393 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 394 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 395 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 396 exp.RowFormatSerdeProperty: lambda self, e: f"ROW FORMAT SERDE {self.sql(e, 'this')}", 397 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 398 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 399 exp.LastDateOfMonth: rename_func("LAST_DAY"), 400 exp.National: lambda self, e: self.national_sql(e, prefix=""), 401 } 402 403 PROPERTIES_LOCATION = { 404 **generator.Generator.PROPERTIES_LOCATION, 405 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 406 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 407 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 408 } 409 410 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 411 return self.func( 412 "COLLECT_LIST", 413 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 414 ) 415 416 def with_properties(self, properties: exp.Properties) -> str: 417 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 418 419 def datatype_sql(self, expression: exp.DataType) -> str: 420 if ( 421 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 422 and not expression.expressions 423 ): 424 expression = exp.DataType.build("text") 425 elif expression.this in exp.DataType.TEMPORAL_TYPES: 426 expression = exp.DataType.build(expression.this) 427 428 return super().datatype_sql(expression) 429 430 def after_having_modifiers(self, expression: exp.Expression) -> t.List[str]: 431 return super().after_having_modifiers(expression) + [ 432 self.sql(expression, "distribute"), 433 self.sql(expression, "sort"), 434 self.sql(expression, "cluster"), 435 ]
195 class Tokenizer(tokens.Tokenizer): 196 QUOTES = ["'", '"'] 197 IDENTIFIERS = ["`"] 198 STRING_ESCAPES = ["\\"] 199 ENCODE = "utf-8" 200 201 KEYWORDS = { 202 **tokens.Tokenizer.KEYWORDS, 203 "ADD ARCHIVE": TokenType.COMMAND, 204 "ADD ARCHIVES": TokenType.COMMAND, 205 "ADD FILE": TokenType.COMMAND, 206 "ADD FILES": TokenType.COMMAND, 207 "ADD JAR": TokenType.COMMAND, 208 "ADD JARS": TokenType.COMMAND, 209 "MSCK REPAIR": TokenType.COMMAND, 210 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 211 } 212 213 NUMERIC_LITERALS = { 214 "L": "BIGINT", 215 "S": "SMALLINT", 216 "Y": "TINYINT", 217 "D": "DOUBLE", 218 "F": "FLOAT", 219 "BD": "DECIMAL", 220 }
Inherited Members
222 class Parser(parser.Parser): 223 LOG_DEFAULTS_TO_LN = True 224 STRICT_CAST = False 225 226 FUNCTIONS = { 227 **parser.Parser.FUNCTIONS, 228 "BASE64": exp.ToBase64.from_arg_list, 229 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 230 "DATE_ADD": lambda args: exp.TsOrDsAdd( 231 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 232 ), 233 "DATEDIFF": lambda args: exp.DateDiff( 234 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 235 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 236 ), 237 "DATE_SUB": lambda args: exp.TsOrDsAdd( 238 this=seq_get(args, 0), 239 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 240 unit=exp.Literal.string("DAY"), 241 ), 242 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 243 [ 244 exp.TimeStrToTime(this=seq_get(args, 0)), 245 seq_get(args, 1), 246 ] 247 ), 248 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 249 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 250 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 251 "LOCATE": locate_to_strposition, 252 "MAP": parse_var_map, 253 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 254 "PERCENTILE": exp.Quantile.from_arg_list, 255 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 256 "COLLECT_SET": exp.SetAgg.from_arg_list, 257 "SIZE": exp.ArraySize.from_arg_list, 258 "SPLIT": exp.RegexpSplit.from_arg_list, 259 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 260 "TO_JSON": exp.JSONFormat.from_arg_list, 261 "UNBASE64": exp.FromBase64.from_arg_list, 262 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 263 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 264 } 265 266 PROPERTY_PARSERS = { 267 **parser.Parser.PROPERTY_PARSERS, 268 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 269 expressions=self._parse_wrapped_csv(self._parse_property) 270 ), 271 } 272 273 QUERY_MODIFIER_PARSERS = { 274 **parser.Parser.QUERY_MODIFIER_PARSERS, 275 "cluster": lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 276 "distribute": lambda self: self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 277 "sort": lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 278 } 279 280 def _parse_types( 281 self, check_func: bool = False, schema: bool = False 282 ) -> t.Optional[exp.Expression]: 283 """ 284 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 285 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 286 287 spark-sql (default)> select cast(1234 as varchar(2)); 288 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 289 char/varchar type and simply treats them as string type. Please use string type 290 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 291 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 292 293 1234 294 Time taken: 4.265 seconds, Fetched 1 row(s) 295 296 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 297 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 298 299 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 300 """ 301 this = super()._parse_types(check_func=check_func, schema=schema) 302 303 if this and not schema: 304 return this.transform( 305 lambda node: node.replace(exp.DataType.build("text")) 306 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 307 else node, 308 copy=False, 309 ) 310 311 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
313 class Generator(generator.Generator): 314 LIMIT_FETCH = "LIMIT" 315 TABLESAMPLE_WITH_METHOD = False 316 TABLESAMPLE_SIZE_IS_PERCENT = True 317 JOIN_HINTS = False 318 TABLE_HINTS = False 319 INDEX_ON = "ON TABLE" 320 321 TYPE_MAPPING = { 322 **generator.Generator.TYPE_MAPPING, 323 exp.DataType.Type.TEXT: "STRING", 324 exp.DataType.Type.DATETIME: "TIMESTAMP", 325 exp.DataType.Type.VARBINARY: "BINARY", 326 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 327 exp.DataType.Type.BIT: "BOOLEAN", 328 } 329 330 TRANSFORMS = { 331 **generator.Generator.TRANSFORMS, 332 exp.Group: transforms.preprocess([transforms.unalias_group]), 333 exp.Select: transforms.preprocess( 334 [ 335 transforms.eliminate_qualify, 336 transforms.eliminate_distinct_on, 337 transforms.unnest_to_explode, 338 ] 339 ), 340 exp.Property: _property_sql, 341 exp.ApproxDistinct: approx_count_distinct_sql, 342 exp.ArrayConcat: rename_func("CONCAT"), 343 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 344 exp.ArraySize: rename_func("SIZE"), 345 exp.ArraySort: _array_sort_sql, 346 exp.With: no_recursive_cte_sql, 347 exp.DateAdd: _add_date_sql, 348 exp.DateDiff: _date_diff_sql, 349 exp.DateStrToDate: rename_func("TO_DATE"), 350 exp.DateSub: _add_date_sql, 351 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 352 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 353 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 354 exp.FromBase64: rename_func("UNBASE64"), 355 exp.If: if_sql, 356 exp.ILike: no_ilike_sql, 357 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 358 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 359 exp.JSONFormat: _json_format_sql, 360 exp.Left: left_to_substring_sql, 361 exp.Map: var_map_sql, 362 exp.Max: max_or_greatest, 363 exp.Min: min_or_least, 364 exp.VarMap: var_map_sql, 365 exp.Create: create_with_partitions_sql, 366 exp.Quantile: rename_func("PERCENTILE"), 367 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 368 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 369 exp.RegexpSplit: rename_func("SPLIT"), 370 exp.Right: right_to_substring_sql, 371 exp.SafeDivide: no_safe_divide_sql, 372 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 373 exp.SetAgg: rename_func("COLLECT_SET"), 374 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 375 exp.StrPosition: strposition_to_locate_sql, 376 exp.StrToDate: _str_to_date_sql, 377 exp.StrToTime: _str_to_time_sql, 378 exp.StrToUnix: _str_to_unix_sql, 379 exp.StructExtract: struct_extract_sql, 380 exp.TimeStrToDate: rename_func("TO_DATE"), 381 exp.TimeStrToTime: timestrtotime_sql, 382 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 383 exp.TimeToStr: _time_to_str, 384 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 385 exp.ToBase64: rename_func("BASE64"), 386 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 387 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 388 exp.TsOrDsToDate: _to_date_sql, 389 exp.TryCast: no_trycast_sql, 390 exp.UnixToStr: lambda self, e: self.func( 391 "FROM_UNIXTIME", e.this, _time_format(self, e) 392 ), 393 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 394 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 395 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 396 exp.RowFormatSerdeProperty: lambda self, e: f"ROW FORMAT SERDE {self.sql(e, 'this')}", 397 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 398 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 399 exp.LastDateOfMonth: rename_func("LAST_DAY"), 400 exp.National: lambda self, e: self.national_sql(e, prefix=""), 401 } 402 403 PROPERTIES_LOCATION = { 404 **generator.Generator.PROPERTIES_LOCATION, 405 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 406 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 407 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 408 } 409 410 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 411 return self.func( 412 "COLLECT_LIST", 413 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 414 ) 415 416 def with_properties(self, properties: exp.Properties) -> str: 417 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 418 419 def datatype_sql(self, expression: exp.DataType) -> str: 420 if ( 421 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 422 and not expression.expressions 423 ): 424 expression = exp.DataType.build("text") 425 elif expression.this in exp.DataType.TEMPORAL_TYPES: 426 expression = exp.DataType.build(expression.this) 427 428 return super().datatype_sql(expression) 429 430 def after_having_modifiers(self, expression: exp.Expression) -> t.List[str]: 431 return super().after_having_modifiers(expression) + [ 432 self.sql(expression, "distribute"), 433 self.sql(expression, "sort"), 434 self.sql(expression, "cluster"), 435 ]
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
419 def datatype_sql(self, expression: exp.DataType) -> str: 420 if ( 421 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 422 and not expression.expressions 423 ): 424 expression = exp.DataType.build("text") 425 elif expression.this in exp.DataType.TEMPORAL_TYPES: 426 expression = exp.DataType.build(expression.this) 427 428 return super().datatype_sql(expression)
@classmethod
def
can_identify(text: str, identify: str | bool = 'safe') -> bool:
247 @classmethod 248 def can_identify(cls, text: str, identify: str | bool = "safe") -> bool: 249 """Checks if text can be identified given an identify option. 250 251 Args: 252 text: The text to check. 253 identify: 254 "always" or `True`: Always returns true. 255 "safe": True if the identifier is case-insensitive. 256 257 Returns: 258 Whether or not the given text can be identified. 259 """ 260 if identify is True or identify == "always": 261 return True 262 263 if identify == "safe": 264 return not cls.case_sensitive(text) 265 266 return False
Checks if text can be identified given an identify option.
Arguments:
- text: The text to check.
- identify: "always" or
True
: Always returns true. "safe": True if the identifier is case-insensitive.
Returns:
Whether or not the given text can be identified.
Inherited Members
- sqlglot.generator.Generator
- Generator
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- rawstring_sql
- datatypesize_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- pseudotype_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- safeconcat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonobject_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- safedpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql