Edit on GitHub

sqlglot.dialects.hive

View Source

  1from __future__ import annotations
  2
  3import typing as t
  4
  5from sqlglot import exp, generator, parser, tokens, transforms
  6from sqlglot.dialects.dialect import (
  7    DATE_ADD_OR_SUB,
  8    Dialect,
  9    NormalizationStrategy,
 10    approx_count_distinct_sql,
 11    arg_max_or_min_no_count,
 12    datestrtodate_sql,
 13    build_formatted_time,
 14    if_sql,
 15    is_parse_json,
 16    left_to_substring_sql,
 17    locate_to_strposition,
 18    max_or_greatest,
 19    min_or_least,
 20    no_ilike_sql,
 21    no_recursive_cte_sql,
 22    no_safe_divide_sql,
 23    no_trycast_sql,
 24    regexp_extract_sql,
 25    regexp_replace_sql,
 26    rename_func,
 27    right_to_substring_sql,
 28    strposition_to_locate_sql,
 29    struct_extract_sql,
 30    time_format,
 31    timestrtotime_sql,
 32    var_map_sql,
 33)
 34from sqlglot.transforms import (
 35    remove_unique_constraints,
 36    ctas_with_tmp_tables_to_create_tmp_view,
 37    preprocess,
 38    move_schema_columns_to_partitioned_by,
 39)
 40from sqlglot.helper import seq_get
 41from sqlglot.tokens import TokenType
 42
 43# (FuncType, Multiplier)
 44DATE_DELTA_INTERVAL = {
 45    "YEAR": ("ADD_MONTHS", 12),
 46    "MONTH": ("ADD_MONTHS", 1),
 47    "QUARTER": ("ADD_MONTHS", 3),
 48    "WEEK": ("DATE_ADD", 7),
 49    "DAY": ("DATE_ADD", 1),
 50}
 51
 52TIME_DIFF_FACTOR = {
 53    "MILLISECOND": " * 1000",
 54    "SECOND": "",
 55    "MINUTE": " / 60",
 56    "HOUR": " / 3600",
 57}
 58
 59DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH")
 60
 61
 62def _add_date_sql(self: Hive.Generator, expression: DATE_ADD_OR_SUB) -> str:
 63    if isinstance(expression, exp.TsOrDsAdd) and not expression.unit:
 64        return self.func("DATE_ADD", expression.this, expression.expression)
 65
 66    unit = expression.text("unit").upper()
 67    func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1))
 68
 69    if isinstance(expression, exp.DateSub):
 70        multiplier *= -1
 71
 72    if expression.expression.is_number:
 73        modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier)
 74    else:
 75        modified_increment = expression.expression
 76        if multiplier != 1:
 77            modified_increment = exp.Mul(  # type: ignore
 78                this=modified_increment, expression=exp.Literal.number(multiplier)
 79            )
 80
 81    return self.func(func, expression.this, modified_increment)
 82
 83
 84def _date_diff_sql(self: Hive.Generator, expression: exp.DateDiff | exp.TsOrDsDiff) -> str:
 85    unit = expression.text("unit").upper()
 86
 87    factor = TIME_DIFF_FACTOR.get(unit)
 88    if factor is not None:
 89        left = self.sql(expression, "this")
 90        right = self.sql(expression, "expression")
 91        sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})"
 92        return f"({sec_diff}){factor}" if factor else sec_diff
 93
 94    months_between = unit in DIFF_MONTH_SWITCH
 95    sql_func = "MONTHS_BETWEEN" if months_between else "DATEDIFF"
 96    _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1))
 97    multiplier_sql = f" / {multiplier}" if multiplier > 1 else ""
 98    diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})"
 99
100    if months_between or multiplier_sql:
101        # MONTHS_BETWEEN returns a float, so we need to truncate the fractional part.
102        # For the same reason, we want to truncate if there's a divisor present.
103        diff_sql = f"CAST({diff_sql}{multiplier_sql} AS INT)"
104
105    return diff_sql
106
107
108def _json_format_sql(self: Hive.Generator, expression: exp.JSONFormat) -> str:
109    this = expression.this
110
111    if is_parse_json(this):
112        if this.this.is_string:
113            # Since FROM_JSON requires a nested type, we always wrap the json string with
114            # an array to ensure that "naked" strings like "'a'" will be handled correctly
115            wrapped_json = exp.Literal.string(f"[{this.this.name}]")
116
117            from_json = self.func(
118                "FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json)
119            )
120            to_json = self.func("TO_JSON", from_json)
121
122            # This strips the [, ] delimiters of the dummy array printed by TO_JSON
123            return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1")
124        return self.sql(this)
125
126    return self.func("TO_JSON", this, expression.args.get("options"))
127
128
129def _array_sort_sql(self: Hive.Generator, expression: exp.ArraySort) -> str:
130    if expression.expression:
131        self.unsupported("Hive SORT_ARRAY does not support a comparator")
132    return self.func("SORT_ARRAY", expression.this)
133
134
135def _property_sql(self: Hive.Generator, expression: exp.Property) -> str:
136    return f"{self.property_name(expression, string_key=True)}={self.sql(expression, 'value')}"
137
138
139def _str_to_unix_sql(self: Hive.Generator, expression: exp.StrToUnix) -> str:
140    return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression))
141
142
143def _unix_to_time_sql(self: Hive.Generator, expression: exp.UnixToTime) -> str:
144    timestamp = self.sql(expression, "this")
145    scale = expression.args.get("scale")
146    if scale in (None, exp.UnixToTime.SECONDS):
147        return rename_func("FROM_UNIXTIME")(self, expression)
148
149    return f"FROM_UNIXTIME({timestamp} / POW(10, {scale}))"
150
151
152def _str_to_date_sql(self: Hive.Generator, expression: exp.StrToDate) -> str:
153    this = self.sql(expression, "this")
154    time_format = self.format_time(expression)
155    if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT):
156        this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))"
157    return f"CAST({this} AS DATE)"
158
159
160def _str_to_time_sql(self: Hive.Generator, expression: exp.StrToTime) -> str:
161    this = self.sql(expression, "this")
162    time_format = self.format_time(expression)
163    if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT):
164        this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))"
165    return f"CAST({this} AS TIMESTAMP)"
166
167
168def _to_date_sql(self: Hive.Generator, expression: exp.TsOrDsToDate) -> str:
169    time_format = self.format_time(expression)
170    if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT):
171        return self.func("TO_DATE", expression.this, time_format)
172
173    if isinstance(expression.this, exp.TsOrDsToDate):
174        return self.sql(expression, "this")
175
176    return self.func("TO_DATE", expression.this)
177
178
179def _build_with_ignore_nulls(
180    exp_class: t.Type[exp.Expression],
181) -> t.Callable[[t.List[exp.Expression]], exp.Expression]:
182    def _parse(args: t.List[exp.Expression]) -> exp.Expression:
183        this = exp_class(this=seq_get(args, 0))
184        if seq_get(args, 1) == exp.true():
185            return exp.IgnoreNulls(this=this)
186        return this
187
188    return _parse
189
190
191class Hive(Dialect):
192    ALIAS_POST_TABLESAMPLE = True
193    IDENTIFIERS_CAN_START_WITH_DIGIT = True
194    SUPPORTS_USER_DEFINED_TYPES = False
195    SAFE_DIVISION = True
196
197    # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description
198    NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
199
200    TIME_MAPPING = {
201        "y": "%Y",
202        "Y": "%Y",
203        "YYYY": "%Y",
204        "yyyy": "%Y",
205        "YY": "%y",
206        "yy": "%y",
207        "MMMM": "%B",
208        "MMM": "%b",
209        "MM": "%m",
210        "M": "%-m",
211        "dd": "%d",
212        "d": "%-d",
213        "HH": "%H",
214        "H": "%-H",
215        "hh": "%I",
216        "h": "%-I",
217        "mm": "%M",
218        "m": "%-M",
219        "ss": "%S",
220        "s": "%-S",
221        "SSSSSS": "%f",
222        "a": "%p",
223        "DD": "%j",
224        "D": "%-j",
225        "E": "%a",
226        "EE": "%a",
227        "EEE": "%a",
228        "EEEE": "%A",
229    }
230
231    DATE_FORMAT = "'yyyy-MM-dd'"
232    DATEINT_FORMAT = "'yyyyMMdd'"
233    TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'"
234
235    class Tokenizer(tokens.Tokenizer):
236        QUOTES = ["'", '"']
237        IDENTIFIERS = ["`"]
238        STRING_ESCAPES = ["\\"]
239
240        SINGLE_TOKENS = {
241            **tokens.Tokenizer.SINGLE_TOKENS,
242            "$": TokenType.PARAMETER,
243        }
244
245        KEYWORDS = {
246            **tokens.Tokenizer.KEYWORDS,
247            "ADD ARCHIVE": TokenType.COMMAND,
248            "ADD ARCHIVES": TokenType.COMMAND,
249            "ADD FILE": TokenType.COMMAND,
250            "ADD FILES": TokenType.COMMAND,
251            "ADD JAR": TokenType.COMMAND,
252            "ADD JARS": TokenType.COMMAND,
253            "MSCK REPAIR": TokenType.COMMAND,
254            "REFRESH": TokenType.REFRESH,
255            "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT,
256            "VERSION AS OF": TokenType.VERSION_SNAPSHOT,
257            "SERDEPROPERTIES": TokenType.SERDE_PROPERTIES,
258        }
259
260        NUMERIC_LITERALS = {
261            "L": "BIGINT",
262            "S": "SMALLINT",
263            "Y": "TINYINT",
264            "D": "DOUBLE",
265            "F": "FLOAT",
266            "BD": "DECIMAL",
267        }
268
269    class Parser(parser.Parser):
270        LOG_DEFAULTS_TO_LN = True
271        STRICT_CAST = False
272        VALUES_FOLLOWED_BY_PAREN = False
273
274        FUNCTIONS = {
275            **parser.Parser.FUNCTIONS,
276            "BASE64": exp.ToBase64.from_arg_list,
277            "COLLECT_LIST": exp.ArrayAgg.from_arg_list,
278            "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list,
279            "DATE_ADD": lambda args: exp.TsOrDsAdd(
280                this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY")
281            ),
282            "DATE_FORMAT": lambda args: build_formatted_time(exp.TimeToStr, "hive")(
283                [
284                    exp.TimeStrToTime(this=seq_get(args, 0)),
285                    seq_get(args, 1),
286                ]
287            ),
288            "DATE_SUB": lambda args: exp.TsOrDsAdd(
289                this=seq_get(args, 0),
290                expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)),
291                unit=exp.Literal.string("DAY"),
292            ),
293            "DATEDIFF": lambda args: exp.DateDiff(
294                this=exp.TsOrDsToDate(this=seq_get(args, 0)),
295                expression=exp.TsOrDsToDate(this=seq_get(args, 1)),
296            ),
297            "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))),
298            "FIRST": _build_with_ignore_nulls(exp.First),
299            "FIRST_VALUE": _build_with_ignore_nulls(exp.FirstValue),
300            "FROM_UNIXTIME": build_formatted_time(exp.UnixToStr, "hive", True),
301            "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list,
302            "LAST": _build_with_ignore_nulls(exp.Last),
303            "LAST_VALUE": _build_with_ignore_nulls(exp.LastValue),
304            "LOCATE": locate_to_strposition,
305            "MAP": parser.build_var_map,
306            "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)),
307            "PERCENTILE": exp.Quantile.from_arg_list,
308            "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list,
309            "REGEXP_EXTRACT": lambda args: exp.RegexpExtract(
310                this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2)
311            ),
312            "SIZE": exp.ArraySize.from_arg_list,
313            "SPLIT": exp.RegexpSplit.from_arg_list,
314            "STR_TO_MAP": lambda args: exp.StrToMap(
315                this=seq_get(args, 0),
316                pair_delim=seq_get(args, 1) or exp.Literal.string(","),
317                key_value_delim=seq_get(args, 2) or exp.Literal.string(":"),
318            ),
319            "TO_DATE": build_formatted_time(exp.TsOrDsToDate, "hive"),
320            "TO_JSON": exp.JSONFormat.from_arg_list,
321            "UNBASE64": exp.FromBase64.from_arg_list,
322            "UNIX_TIMESTAMP": lambda args: build_formatted_time(exp.StrToUnix, "hive", True)(
323                args or [exp.CurrentTimestamp()]
324            ),
325            "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)),
326        }
327
328        NO_PAREN_FUNCTION_PARSERS = {
329            **parser.Parser.NO_PAREN_FUNCTION_PARSERS,
330            "TRANSFORM": lambda self: self._parse_transform(),
331        }
332
333        PROPERTY_PARSERS = {
334            **parser.Parser.PROPERTY_PARSERS,
335            "SERDEPROPERTIES": lambda self: exp.SerdeProperties(
336                expressions=self._parse_wrapped_csv(self._parse_property)
337            ),
338        }
339
340        def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]:
341            if not self._match(TokenType.L_PAREN, advance=False):
342                self._retreat(self._index - 1)
343                return None
344
345            args = self._parse_wrapped_csv(self._parse_lambda)
346            row_format_before = self._parse_row_format(match_row=True)
347
348            record_writer = None
349            if self._match_text_seq("RECORDWRITER"):
350                record_writer = self._parse_string()
351
352            if not self._match(TokenType.USING):
353                return exp.Transform.from_arg_list(args)
354
355            command_script = self._parse_string()
356
357            self._match(TokenType.ALIAS)
358            schema = self._parse_schema()
359
360            row_format_after = self._parse_row_format(match_row=True)
361            record_reader = None
362            if self._match_text_seq("RECORDREADER"):
363                record_reader = self._parse_string()
364
365            return self.expression(
366                exp.QueryTransform,
367                expressions=args,
368                command_script=command_script,
369                schema=schema,
370                row_format_before=row_format_before,
371                record_writer=record_writer,
372                row_format_after=row_format_after,
373                record_reader=record_reader,
374            )
375
376        def _parse_types(
377            self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True
378        ) -> t.Optional[exp.Expression]:
379            """
380            Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to
381            STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0:
382
383                spark-sql (default)> select cast(1234 as varchar(2));
384                23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support
385                char/varchar type and simply treats them as string type. Please use string type
386                directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString
387                to true, so that Spark treat them as string type as same as Spark 3.0 and earlier
388
389                1234
390                Time taken: 4.265 seconds, Fetched 1 row(s)
391
392            This shows that Spark doesn't truncate the value into '12', which is inconsistent with
393            what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly.
394
395            Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html
396            """
397            this = super()._parse_types(
398                check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
399            )
400
401            if this and not schema:
402                return this.transform(
403                    lambda node: (
404                        node.replace(exp.DataType.build("text"))
405                        if isinstance(node, exp.DataType) and node.is_type("char", "varchar")
406                        else node
407                    ),
408                    copy=False,
409                )
410
411            return this
412
413        def _parse_partition_and_order(
414            self,
415        ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]:
416            return (
417                (
418                    self._parse_csv(self._parse_conjunction)
419                    if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY})
420                    else []
421                ),
422                super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)),
423            )
424
425        def _parse_parameter(self) -> exp.Parameter:
426            self._match(TokenType.L_BRACE)
427            this = self._parse_identifier() or self._parse_primary_or_var()
428            expression = self._match(TokenType.COLON) and (
429                self._parse_identifier() or self._parse_primary_or_var()
430            )
431            self._match(TokenType.R_BRACE)
432            return self.expression(exp.Parameter, this=this, expression=expression)
433
434    class Generator(generator.Generator):
435        LIMIT_FETCH = "LIMIT"
436        TABLESAMPLE_WITH_METHOD = False
437        JOIN_HINTS = False
438        TABLE_HINTS = False
439        QUERY_HINTS = False
440        INDEX_ON = "ON TABLE"
441        EXTRACT_ALLOWS_QUOTES = False
442        NVL2_SUPPORTED = False
443        LAST_DAY_SUPPORTS_DATE_PART = False
444        JSON_PATH_SINGLE_QUOTE_ESCAPE = True
445        SUPPORTS_TO_NUMBER = False
446        WITH_PROPERTIES_PREFIX = "TBLPROPERTIES"
447
448        EXPRESSIONS_WITHOUT_NESTED_CTES = {
449            exp.Insert,
450            exp.Select,
451            exp.Subquery,
452            exp.Union,
453        }
454
455        SUPPORTED_JSON_PATH_PARTS = {
456            exp.JSONPathKey,
457            exp.JSONPathRoot,
458            exp.JSONPathSubscript,
459            exp.JSONPathWildcard,
460        }
461
462        TYPE_MAPPING = {
463            **generator.Generator.TYPE_MAPPING,
464            exp.DataType.Type.BIT: "BOOLEAN",
465            exp.DataType.Type.DATETIME: "TIMESTAMP",
466            exp.DataType.Type.ROWVERSION: "BINARY",
467            exp.DataType.Type.TEXT: "STRING",
468            exp.DataType.Type.TIME: "TIMESTAMP",
469            exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP",
470            exp.DataType.Type.UTINYINT: "SMALLINT",
471            exp.DataType.Type.VARBINARY: "BINARY",
472        }
473
474        TRANSFORMS = {
475            **generator.Generator.TRANSFORMS,
476            exp.Group: transforms.preprocess([transforms.unalias_group]),
477            exp.Select: transforms.preprocess(
478                [
479                    transforms.eliminate_qualify,
480                    transforms.eliminate_distinct_on,
481                    transforms.unnest_to_explode,
482                ]
483            ),
484            exp.Property: _property_sql,
485            exp.AnyValue: rename_func("FIRST"),
486            exp.ApproxDistinct: approx_count_distinct_sql,
487            exp.ArgMax: arg_max_or_min_no_count("MAX_BY"),
488            exp.ArgMin: arg_max_or_min_no_count("MIN_BY"),
489            exp.ArrayConcat: rename_func("CONCAT"),
490            exp.ArrayToString: lambda self, e: self.func("CONCAT_WS", e.expression, e.this),
491            exp.ArraySize: rename_func("SIZE"),
492            exp.ArraySort: _array_sort_sql,
493            exp.With: no_recursive_cte_sql,
494            exp.DateAdd: _add_date_sql,
495            exp.DateDiff: _date_diff_sql,
496            exp.DateStrToDate: datestrtodate_sql,
497            exp.DateSub: _add_date_sql,
498            exp.DateToDi: lambda self,
499            e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)",
500            exp.DiToDate: lambda self,
501            e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})",
502            exp.FileFormatProperty: lambda self,
503            e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}",
504            exp.FromBase64: rename_func("UNBASE64"),
505            exp.If: if_sql(),
506            exp.ILike: no_ilike_sql,
507            exp.IsNan: rename_func("ISNAN"),
508            exp.JSONExtract: lambda self, e: self.func("GET_JSON_OBJECT", e.this, e.expression),
509            exp.JSONExtractScalar: lambda self, e: self.func(
510                "GET_JSON_OBJECT", e.this, e.expression
511            ),
512            exp.JSONFormat: _json_format_sql,
513            exp.Left: left_to_substring_sql,
514            exp.Map: var_map_sql,
515            exp.Max: max_or_greatest,
516            exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
517            exp.Min: min_or_least,
518            exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression),
519            exp.NotNullColumnConstraint: lambda _, e: (
520                "" if e.args.get("allow_null") else "NOT NULL"
521            ),
522            exp.VarMap: var_map_sql,
523            exp.Create: preprocess(
524                [
525                    remove_unique_constraints,
526                    ctas_with_tmp_tables_to_create_tmp_view,
527                    move_schema_columns_to_partitioned_by,
528                ]
529            ),
530            exp.Quantile: rename_func("PERCENTILE"),
531            exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"),
532            exp.RegexpExtract: regexp_extract_sql,
533            exp.RegexpReplace: regexp_replace_sql,
534            exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"),
535            exp.RegexpSplit: rename_func("SPLIT"),
536            exp.Right: right_to_substring_sql,
537            exp.SafeDivide: no_safe_divide_sql,
538            exp.SchemaCommentProperty: lambda self, e: self.naked_property(e),
539            exp.ArrayUniqueAgg: rename_func("COLLECT_SET"),
540            exp.Split: lambda self, e: self.func(
541                "SPLIT", e.this, self.func("CONCAT", "'\\\\Q'", e.expression)
542            ),
543            exp.StrPosition: strposition_to_locate_sql,
544            exp.StrToDate: _str_to_date_sql,
545            exp.StrToTime: _str_to_time_sql,
546            exp.StrToUnix: _str_to_unix_sql,
547            exp.StructExtract: struct_extract_sql,
548            exp.TimeStrToDate: rename_func("TO_DATE"),
549            exp.TimeStrToTime: timestrtotime_sql,
550            exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"),
551            exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)),
552            exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"),
553            exp.ToBase64: rename_func("BASE64"),
554            exp.TsOrDiToDi: lambda self,
555            e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)",
556            exp.TsOrDsAdd: _add_date_sql,
557            exp.TsOrDsDiff: _date_diff_sql,
558            exp.TsOrDsToDate: _to_date_sql,
559            exp.TryCast: no_trycast_sql,
560            exp.UnixToStr: lambda self, e: self.func(
561                "FROM_UNIXTIME", e.this, time_format("hive")(self, e)
562            ),
563            exp.UnixToTime: _unix_to_time_sql,
564            exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"),
565            exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}",
566            exp.NumberToStr: rename_func("FORMAT_NUMBER"),
567            exp.National: lambda self, e: self.national_sql(e, prefix=""),
568            exp.ClusteredColumnConstraint: lambda self,
569            e: f"({self.expressions(e, 'this', indent=False)})",
570            exp.NonClusteredColumnConstraint: lambda self,
571            e: f"({self.expressions(e, 'this', indent=False)})",
572            exp.NotForReplicationColumnConstraint: lambda *_: "",
573            exp.OnProperty: lambda *_: "",
574            exp.PrimaryKeyColumnConstraint: lambda *_: "PRIMARY KEY",
575            exp.ParseJSON: lambda self, e: self.sql(e.this),
576            exp.WeekOfYear: rename_func("WEEKOFYEAR"),
577            exp.DayOfMonth: rename_func("DAYOFMONTH"),
578            exp.DayOfWeek: rename_func("DAYOFWEEK"),
579        }
580
581        PROPERTIES_LOCATION = {
582            **generator.Generator.PROPERTIES_LOCATION,
583            exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA,
584            exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA,
585            exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
586            exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED,
587        }
588
589        def _jsonpathkey_sql(self, expression: exp.JSONPathKey) -> str:
590            if isinstance(expression.this, exp.JSONPathWildcard):
591                self.unsupported("Unsupported wildcard in JSONPathKey expression")
592                return ""
593
594            return super()._jsonpathkey_sql(expression)
595
596        def parameter_sql(self, expression: exp.Parameter) -> str:
597            this = self.sql(expression, "this")
598            expression_sql = self.sql(expression, "expression")
599
600            parent = expression.parent
601            this = f"{this}:{expression_sql}" if expression_sql else this
602
603            if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem):
604                # We need to produce SET key = value instead of SET ${key} = value
605                return this
606
607            return f"${{{this}}}"
608
609        def schema_sql(self, expression: exp.Schema) -> str:
610            for ordered in expression.find_all(exp.Ordered):
611                if ordered.args.get("desc") is False:
612                    ordered.set("desc", None)
613
614            return super().schema_sql(expression)
615
616        def constraint_sql(self, expression: exp.Constraint) -> str:
617            for prop in list(expression.find_all(exp.Properties)):
618                prop.pop()
619
620            this = self.sql(expression, "this")
621            expressions = self.expressions(expression, sep=" ", flat=True)
622            return f"CONSTRAINT {this} {expressions}"
623
624        def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str:
625            serde_props = self.sql(expression, "serde_properties")
626            serde_props = f" {serde_props}" if serde_props else ""
627            return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}"
628
629        def arrayagg_sql(self, expression: exp.ArrayAgg) -> str:
630            return self.func(
631                "COLLECT_LIST",
632                expression.this.this if isinstance(expression.this, exp.Order) else expression.this,
633            )
634
635        def datatype_sql(self, expression: exp.DataType) -> str:
636            if expression.this in self.PARAMETERIZABLE_TEXT_TYPES and (
637                not expression.expressions or expression.expressions[0].name == "MAX"
638            ):
639                expression = exp.DataType.build("text")
640            elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions:
641                expression.set("this", exp.DataType.Type.VARCHAR)
642            elif expression.this in exp.DataType.TEMPORAL_TYPES:
643                expression = exp.DataType.build(expression.this)
644            elif expression.is_type("float"):
645                size_expression = expression.find(exp.DataTypeParam)
646                if size_expression:
647                    size = int(size_expression.name)
648                    expression = (
649                        exp.DataType.build("float") if size <= 32 else exp.DataType.build("double")
650                    )
651
652            return super().datatype_sql(expression)
653
654        def version_sql(self, expression: exp.Version) -> str:
655            sql = super().version_sql(expression)
656            return sql.replace("FOR ", "", 1)
657
658        def struct_sql(self, expression: exp.Struct) -> str:
659            values = []
660
661            for i, e in enumerate(expression.expressions):
662                if isinstance(e, exp.PropertyEQ):
663                    self.unsupported("Hive does not support named structs.")
664                    values.append(e.expression)
665                else:
666                    values.append(e)
667
668            return self.func("STRUCT", *values)
669
670        def alterset_sql(self, expression: exp.AlterSet) -> str:
671            exprs = self.expressions(expression, flat=True)
672            exprs = f" {exprs}" if exprs else ""
673            location = self.sql(expression, "location")
674            location = f" LOCATION {location}" if location else ""
675            file_format = self.expressions(expression, key="file_format", flat=True, sep=" ")
676            file_format = f" FILEFORMAT {file_format}" if file_format else ""
677            serde = self.sql(expression, "serde")
678            serde = f" SERDE {serde}" if serde else ""
679            tags = self.expressions(expression, key="tag", flat=True, sep="")
680            tags = f" TAGS {tags}" if tags else ""
681
682            return f"SET{serde}{exprs}{location}{file_format}{tags}"
683
684        def serdeproperties_sql(self, expression: exp.SerdeProperties) -> str:
685            prefix = "WITH " if expression.args.get("with") else ""
686            exprs = self.expressions(expression, flat=True)
687
688            return f"{prefix}SERDEPROPERTIES ({exprs})"

DATE_DELTA_INTERVAL = {'YEAR': ('ADD_MONTHS', 12), 'MONTH': ('ADD_MONTHS', 1), 'QUARTER': ('ADD_MONTHS', 3), 'WEEK': ('DATE_ADD', 7), 'DAY': ('DATE_ADD', 1)}

TIME_DIFF_FACTOR = {'MILLISECOND': ' * 1000', 'SECOND': '', 'MINUTE': ' / 60', 'HOUR': ' / 3600'}

DIFF_MONTH_SWITCH = ('YEAR', 'QUARTER', 'MONTH')

class Hive(sqlglot.dialects.dialect.Dialect): View Source

192class Hive(Dialect):
193    ALIAS_POST_TABLESAMPLE = True
194    IDENTIFIERS_CAN_START_WITH_DIGIT = True
195    SUPPORTS_USER_DEFINED_TYPES = False
196    SAFE_DIVISION = True
197
198    # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description
199    NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
200
201    TIME_MAPPING = {
202        "y": "%Y",
203        "Y": "%Y",
204        "YYYY": "%Y",
205        "yyyy": "%Y",
206        "YY": "%y",
207        "yy": "%y",
208        "MMMM": "%B",
209        "MMM": "%b",
210        "MM": "%m",
211        "M": "%-m",
212        "dd": "%d",
213        "d": "%-d",
214        "HH": "%H",
215        "H": "%-H",
216        "hh": "%I",
217        "h": "%-I",
218        "mm": "%M",
219        "m": "%-M",
220        "ss": "%S",
221        "s": "%-S",
222        "SSSSSS": "%f",
223        "a": "%p",
224        "DD": "%j",
225        "D": "%-j",
226        "E": "%a",
227        "EE": "%a",
228        "EEE": "%a",
229        "EEEE": "%A",
230    }
231
232    DATE_FORMAT = "'yyyy-MM-dd'"
233    DATEINT_FORMAT = "'yyyyMMdd'"
234    TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'"
235
236    class Tokenizer(tokens.Tokenizer):
237        QUOTES = ["'", '"']
238        IDENTIFIERS = ["`"]
239        STRING_ESCAPES = ["\\"]
240
241        SINGLE_TOKENS = {
242            **tokens.Tokenizer.SINGLE_TOKENS,
243            "$": TokenType.PARAMETER,
244        }
245
246        KEYWORDS = {
247            **tokens.Tokenizer.KEYWORDS,
248            "ADD ARCHIVE": TokenType.COMMAND,
249            "ADD ARCHIVES": TokenType.COMMAND,
250            "ADD FILE": TokenType.COMMAND,
251            "ADD FILES": TokenType.COMMAND,
252            "ADD JAR": TokenType.COMMAND,
253            "ADD JARS": TokenType.COMMAND,
254            "MSCK REPAIR": TokenType.COMMAND,
255            "REFRESH": TokenType.REFRESH,
256            "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT,
257            "VERSION AS OF": TokenType.VERSION_SNAPSHOT,
258            "SERDEPROPERTIES": TokenType.SERDE_PROPERTIES,
259        }
260
261        NUMERIC_LITERALS = {
262            "L": "BIGINT",
263            "S": "SMALLINT",
264            "Y": "TINYINT",
265            "D": "DOUBLE",
266            "F": "FLOAT",
267            "BD": "DECIMAL",
268        }
269
270    class Parser(parser.Parser):
271        LOG_DEFAULTS_TO_LN = True
272        STRICT_CAST = False
273        VALUES_FOLLOWED_BY_PAREN = False
274
275        FUNCTIONS = {
276            **parser.Parser.FUNCTIONS,
277            "BASE64": exp.ToBase64.from_arg_list,
278            "COLLECT_LIST": exp.ArrayAgg.from_arg_list,
279            "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list,
280            "DATE_ADD": lambda args: exp.TsOrDsAdd(
281                this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY")
282            ),
283            "DATE_FORMAT": lambda args: build_formatted_time(exp.TimeToStr, "hive")(
284                [
285                    exp.TimeStrToTime(this=seq_get(args, 0)),
286                    seq_get(args, 1),
287                ]
288            ),
289            "DATE_SUB": lambda args: exp.TsOrDsAdd(
290                this=seq_get(args, 0),
291                expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)),
292                unit=exp.Literal.string("DAY"),
293            ),
294            "DATEDIFF": lambda args: exp.DateDiff(
295                this=exp.TsOrDsToDate(this=seq_get(args, 0)),
296                expression=exp.TsOrDsToDate(this=seq_get(args, 1)),
297            ),
298            "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))),
299            "FIRST": _build_with_ignore_nulls(exp.First),
300            "FIRST_VALUE": _build_with_ignore_nulls(exp.FirstValue),
301            "FROM_UNIXTIME": build_formatted_time(exp.UnixToStr, "hive", True),
302            "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list,
303            "LAST": _build_with_ignore_nulls(exp.Last),
304            "LAST_VALUE": _build_with_ignore_nulls(exp.LastValue),
305            "LOCATE": locate_to_strposition,
306            "MAP": parser.build_var_map,
307            "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)),
308            "PERCENTILE": exp.Quantile.from_arg_list,
309            "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list,
310            "REGEXP_EXTRACT": lambda args: exp.RegexpExtract(
311                this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2)
312            ),
313            "SIZE": exp.ArraySize.from_arg_list,
314            "SPLIT": exp.RegexpSplit.from_arg_list,
315            "STR_TO_MAP": lambda args: exp.StrToMap(
316                this=seq_get(args, 0),
317                pair_delim=seq_get(args, 1) or exp.Literal.string(","),
318                key_value_delim=seq_get(args, 2) or exp.Literal.string(":"),
319            ),
320            "TO_DATE": build_formatted_time(exp.TsOrDsToDate, "hive"),
321            "TO_JSON": exp.JSONFormat.from_arg_list,
322            "UNBASE64": exp.FromBase64.from_arg_list,
323            "UNIX_TIMESTAMP": lambda args: build_formatted_time(exp.StrToUnix, "hive", True)(
324                args or [exp.CurrentTimestamp()]
325            ),
326            "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)),
327        }
328
329        NO_PAREN_FUNCTION_PARSERS = {
330            **parser.Parser.NO_PAREN_FUNCTION_PARSERS,
331            "TRANSFORM": lambda self: self._parse_transform(),
332        }
333
334        PROPERTY_PARSERS = {
335            **parser.Parser.PROPERTY_PARSERS,
336            "SERDEPROPERTIES": lambda self: exp.SerdeProperties(
337                expressions=self._parse_wrapped_csv(self._parse_property)
338            ),
339        }
340
341        def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]:
342            if not self._match(TokenType.L_PAREN, advance=False):
343                self._retreat(self._index - 1)
344                return None
345
346            args = self._parse_wrapped_csv(self._parse_lambda)
347            row_format_before = self._parse_row_format(match_row=True)
348
349            record_writer = None
350            if self._match_text_seq("RECORDWRITER"):
351                record_writer = self._parse_string()
352
353            if not self._match(TokenType.USING):
354                return exp.Transform.from_arg_list(args)
355
356            command_script = self._parse_string()
357
358            self._match(TokenType.ALIAS)
359            schema = self._parse_schema()
360
361            row_format_after = self._parse_row_format(match_row=True)
362            record_reader = None
363            if self._match_text_seq("RECORDREADER"):
364                record_reader = self._parse_string()
365
366            return self.expression(
367                exp.QueryTransform,
368                expressions=args,
369                command_script=command_script,
370                schema=schema,
371                row_format_before=row_format_before,
372                record_writer=record_writer,
373                row_format_after=row_format_after,
374                record_reader=record_reader,
375            )
376
377        def _parse_types(
378            self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True
379        ) -> t.Optional[exp.Expression]:
380            """
381            Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to
382            STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0:
383
384                spark-sql (default)> select cast(1234 as varchar(2));
385                23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support
386                char/varchar type and simply treats them as string type. Please use string type
387                directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString
388                to true, so that Spark treat them as string type as same as Spark 3.0 and earlier
389
390                1234
391                Time taken: 4.265 seconds, Fetched 1 row(s)
392
393            This shows that Spark doesn't truncate the value into '12', which is inconsistent with
394            what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly.
395
396            Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html
397            """
398            this = super()._parse_types(
399                check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
400            )
401
402            if this and not schema:
403                return this.transform(
404                    lambda node: (
405                        node.replace(exp.DataType.build("text"))
406                        if isinstance(node, exp.DataType) and node.is_type("char", "varchar")
407                        else node
408                    ),
409                    copy=False,
410                )
411
412            return this
413
414        def _parse_partition_and_order(
415            self,
416        ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]:
417            return (
418                (
419                    self._parse_csv(self._parse_conjunction)
420                    if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY})
421                    else []
422                ),
423                super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)),
424            )
425
426        def _parse_parameter(self) -> exp.Parameter:
427            self._match(TokenType.L_BRACE)
428            this = self._parse_identifier() or self._parse_primary_or_var()
429            expression = self._match(TokenType.COLON) and (
430                self._parse_identifier() or self._parse_primary_or_var()
431            )
432            self._match(TokenType.R_BRACE)
433            return self.expression(exp.Parameter, this=this, expression=expression)
434
435    class Generator(generator.Generator):
436        LIMIT_FETCH = "LIMIT"
437        TABLESAMPLE_WITH_METHOD = False
438        JOIN_HINTS = False
439        TABLE_HINTS = False
440        QUERY_HINTS = False
441        INDEX_ON = "ON TABLE"
442        EXTRACT_ALLOWS_QUOTES = False
443        NVL2_SUPPORTED = False
444        LAST_DAY_SUPPORTS_DATE_PART = False
445        JSON_PATH_SINGLE_QUOTE_ESCAPE = True
446        SUPPORTS_TO_NUMBER = False
447        WITH_PROPERTIES_PREFIX = "TBLPROPERTIES"
448
449        EXPRESSIONS_WITHOUT_NESTED_CTES = {
450            exp.Insert,
451            exp.Select,
452            exp.Subquery,
453            exp.Union,
454        }
455
456        SUPPORTED_JSON_PATH_PARTS = {
457            exp.JSONPathKey,
458            exp.JSONPathRoot,
459            exp.JSONPathSubscript,
460            exp.JSONPathWildcard,
461        }
462
463        TYPE_MAPPING = {
464            **generator.Generator.TYPE_MAPPING,
465            exp.DataType.Type.BIT: "BOOLEAN",
466            exp.DataType.Type.DATETIME: "TIMESTAMP",
467            exp.DataType.Type.ROWVERSION: "BINARY",
468            exp.DataType.Type.TEXT: "STRING",
469            exp.DataType.Type.TIME: "TIMESTAMP",
470            exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP",
471            exp.DataType.Type.UTINYINT: "SMALLINT",
472            exp.DataType.Type.VARBINARY: "BINARY",
473        }
474
475        TRANSFORMS = {
476            **generator.Generator.TRANSFORMS,
477            exp.Group: transforms.preprocess([transforms.unalias_group]),
478            exp.Select: transforms.preprocess(
479                [
480                    transforms.eliminate_qualify,
481                    transforms.eliminate_distinct_on,
482                    transforms.unnest_to_explode,
483                ]
484            ),
485            exp.Property: _property_sql,
486            exp.AnyValue: rename_func("FIRST"),
487            exp.ApproxDistinct: approx_count_distinct_sql,
488            exp.ArgMax: arg_max_or_min_no_count("MAX_BY"),
489            exp.ArgMin: arg_max_or_min_no_count("MIN_BY"),
490            exp.ArrayConcat: rename_func("CONCAT"),
491            exp.ArrayToString: lambda self, e: self.func("CONCAT_WS", e.expression, e.this),
492            exp.ArraySize: rename_func("SIZE"),
493            exp.ArraySort: _array_sort_sql,
494            exp.With: no_recursive_cte_sql,
495            exp.DateAdd: _add_date_sql,
496            exp.DateDiff: _date_diff_sql,
497            exp.DateStrToDate: datestrtodate_sql,
498            exp.DateSub: _add_date_sql,
499            exp.DateToDi: lambda self,
500            e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)",
501            exp.DiToDate: lambda self,
502            e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})",
503            exp.FileFormatProperty: lambda self,
504            e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}",
505            exp.FromBase64: rename_func("UNBASE64"),
506            exp.If: if_sql(),
507            exp.ILike: no_ilike_sql,
508            exp.IsNan: rename_func("ISNAN"),
509            exp.JSONExtract: lambda self, e: self.func("GET_JSON_OBJECT", e.this, e.expression),
510            exp.JSONExtractScalar: lambda self, e: self.func(
511                "GET_JSON_OBJECT", e.this, e.expression
512            ),
513            exp.JSONFormat: _json_format_sql,
514            exp.Left: left_to_substring_sql,
515            exp.Map: var_map_sql,
516            exp.Max: max_or_greatest,
517            exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
518            exp.Min: min_or_least,
519            exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression),
520            exp.NotNullColumnConstraint: lambda _, e: (
521                "" if e.args.get("allow_null") else "NOT NULL"
522            ),
523            exp.VarMap: var_map_sql,
524            exp.Create: preprocess(
525                [
526                    remove_unique_constraints,
527                    ctas_with_tmp_tables_to_create_tmp_view,
528                    move_schema_columns_to_partitioned_by,
529                ]
530            ),
531            exp.Quantile: rename_func("PERCENTILE"),
532            exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"),
533            exp.RegexpExtract: regexp_extract_sql,
534            exp.RegexpReplace: regexp_replace_sql,
535            exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"),
536            exp.RegexpSplit: rename_func("SPLIT"),
537            exp.Right: right_to_substring_sql,
538            exp.SafeDivide: no_safe_divide_sql,
539            exp.SchemaCommentProperty: lambda self, e: self.naked_property(e),
540            exp.ArrayUniqueAgg: rename_func("COLLECT_SET"),
541            exp.Split: lambda self, e: self.func(
542                "SPLIT", e.this, self.func("CONCAT", "'\\\\Q'", e.expression)
543            ),
544            exp.StrPosition: strposition_to_locate_sql,
545            exp.StrToDate: _str_to_date_sql,
546            exp.StrToTime: _str_to_time_sql,
547            exp.StrToUnix: _str_to_unix_sql,
548            exp.StructExtract: struct_extract_sql,
549            exp.TimeStrToDate: rename_func("TO_DATE"),
550            exp.TimeStrToTime: timestrtotime_sql,
551            exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"),
552            exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)),
553            exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"),
554            exp.ToBase64: rename_func("BASE64"),
555            exp.TsOrDiToDi: lambda self,
556            e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)",
557            exp.TsOrDsAdd: _add_date_sql,
558            exp.TsOrDsDiff: _date_diff_sql,
559            exp.TsOrDsToDate: _to_date_sql,
560            exp.TryCast: no_trycast_sql,
561            exp.UnixToStr: lambda self, e: self.func(
562                "FROM_UNIXTIME", e.this, time_format("hive")(self, e)
563            ),
564            exp.UnixToTime: _unix_to_time_sql,
565            exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"),
566            exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}",
567            exp.NumberToStr: rename_func("FORMAT_NUMBER"),
568            exp.National: lambda self, e: self.national_sql(e, prefix=""),
569            exp.ClusteredColumnConstraint: lambda self,
570            e: f"({self.expressions(e, 'this', indent=False)})",
571            exp.NonClusteredColumnConstraint: lambda self,
572            e: f"({self.expressions(e, 'this', indent=False)})",
573            exp.NotForReplicationColumnConstraint: lambda *_: "",
574            exp.OnProperty: lambda *_: "",
575            exp.PrimaryKeyColumnConstraint: lambda *_: "PRIMARY KEY",
576            exp.ParseJSON: lambda self, e: self.sql(e.this),
577            exp.WeekOfYear: rename_func("WEEKOFYEAR"),
578            exp.DayOfMonth: rename_func("DAYOFMONTH"),
579            exp.DayOfWeek: rename_func("DAYOFWEEK"),
580        }
581
582        PROPERTIES_LOCATION = {
583            **generator.Generator.PROPERTIES_LOCATION,
584            exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA,
585            exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA,
586            exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
587            exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED,
588        }
589
590        def _jsonpathkey_sql(self, expression: exp.JSONPathKey) -> str:
591            if isinstance(expression.this, exp.JSONPathWildcard):
592                self.unsupported("Unsupported wildcard in JSONPathKey expression")
593                return ""
594
595            return super()._jsonpathkey_sql(expression)
596
597        def parameter_sql(self, expression: exp.Parameter) -> str:
598            this = self.sql(expression, "this")
599            expression_sql = self.sql(expression, "expression")
600
601            parent = expression.parent
602            this = f"{this}:{expression_sql}" if expression_sql else this
603
604            if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem):
605                # We need to produce SET key = value instead of SET ${key} = value
606                return this
607
608            return f"${{{this}}}"
609
610        def schema_sql(self, expression: exp.Schema) -> str:
611            for ordered in expression.find_all(exp.Ordered):
612                if ordered.args.get("desc") is False:
613                    ordered.set("desc", None)
614
615            return super().schema_sql(expression)
616
617        def constraint_sql(self, expression: exp.Constraint) -> str:
618            for prop in list(expression.find_all(exp.Properties)):
619                prop.pop()
620
621            this = self.sql(expression, "this")
622            expressions = self.expressions(expression, sep=" ", flat=True)
623            return f"CONSTRAINT {this} {expressions}"
624
625        def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str:
626            serde_props = self.sql(expression, "serde_properties")
627            serde_props = f" {serde_props}" if serde_props else ""
628            return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}"
629
630        def arrayagg_sql(self, expression: exp.ArrayAgg) -> str:
631            return self.func(
632                "COLLECT_LIST",
633                expression.this.this if isinstance(expression.this, exp.Order) else expression.this,
634            )
635
636        def datatype_sql(self, expression: exp.DataType) -> str:
637            if expression.this in self.PARAMETERIZABLE_TEXT_TYPES and (
638                not expression.expressions or expression.expressions[0].name == "MAX"
639            ):
640                expression = exp.DataType.build("text")
641            elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions:
642                expression.set("this", exp.DataType.Type.VARCHAR)
643            elif expression.this in exp.DataType.TEMPORAL_TYPES:
644                expression = exp.DataType.build(expression.this)
645            elif expression.is_type("float"):
646                size_expression = expression.find(exp.DataTypeParam)
647                if size_expression:
648                    size = int(size_expression.name)
649                    expression = (
650                        exp.DataType.build("float") if size <= 32 else exp.DataType.build("double")
651                    )
652
653            return super().datatype_sql(expression)
654
655        def version_sql(self, expression: exp.Version) -> str:
656            sql = super().version_sql(expression)
657            return sql.replace("FOR ", "", 1)
658
659        def struct_sql(self, expression: exp.Struct) -> str:
660            values = []
661
662            for i, e in enumerate(expression.expressions):
663                if isinstance(e, exp.PropertyEQ):
664                    self.unsupported("Hive does not support named structs.")
665                    values.append(e.expression)
666                else:
667                    values.append(e)
668
669            return self.func("STRUCT", *values)
670
671        def alterset_sql(self, expression: exp.AlterSet) -> str:
672            exprs = self.expressions(expression, flat=True)
673            exprs = f" {exprs}" if exprs else ""
674            location = self.sql(expression, "location")
675            location = f" LOCATION {location}" if location else ""
676            file_format = self.expressions(expression, key="file_format", flat=True, sep=" ")
677            file_format = f" FILEFORMAT {file_format}" if file_format else ""
678            serde = self.sql(expression, "serde")
679            serde = f" SERDE {serde}" if serde else ""
680            tags = self.expressions(expression, key="tag", flat=True, sep="")
681            tags = f" TAGS {tags}" if tags else ""
682
683            return f"SET{serde}{exprs}{location}{file_format}{tags}"
684
685        def serdeproperties_sql(self, expression: exp.SerdeProperties) -> str:
686            prefix = "WITH " if expression.args.get("with") else ""
687            exprs = self.expressions(expression, flat=True)
688
689            return f"{prefix}SERDEPROPERTIES ({exprs})"

ALIAS_POST_TABLESAMPLE = True

Whether the table alias comes after tablesample.

IDENTIFIERS_CAN_START_WITH_DIGIT = True

Whether an unquoted identifier can start with a digit.

SUPPORTS_USER_DEFINED_TYPES = False

Whether user-defined data types are supported.

SAFE_DIVISION = True

Whether division by zero throws an error (False) or returns NULL (True).

NORMALIZATION_STRATEGY = <NormalizationStrategy.CASE_INSENSITIVE: 'CASE_INSENSITIVE'>

Specifies the strategy according to which identifiers should be normalized.

TIME_MAPPING: Dict[str, str] = {'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}

Associates this dialect's time formats with their equivalent Python strftime formats.

DATE_FORMAT = "'yyyy-MM-dd'"

DATEINT_FORMAT = "'yyyyMMdd'"

TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'"

UNESCAPED_SEQUENCES: Dict[str, str] = {'\\a': '\x07', '\\b': '\x08', '\\f': '\x0c', '\\n': '\n', '\\r': '\r', '\\t': '\t', '\\v': '\x0b', '\\\\': '\\'}

Mapping of an escaped sequence (\n) to its unescaped version ().

tokenizer_class = <class 'Hive.Tokenizer'>

parser_class = <class 'Hive.Parser'>

generator_class = <class 'Hive.Generator'>

TIME_TRIE: Dict = {'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}

FORMAT_TRIE: Dict = {'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}

INVERSE_TIME_MAPPING: Dict[str, str] = {'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}

INVERSE_TIME_TRIE: Dict = {'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}

ESCAPED_SEQUENCES: Dict[str, str] = {'\x07': '\\a', '\x08': '\\b', '\x0c': '\\f', '\n': '\\n', '\r': '\\r', '\t': '\\t', '\x0b': '\\v', '\\': '\\\\'}

QUOTE_START = "'"

QUOTE_END = "'"

IDENTIFIER_START = '`'

IDENTIFIER_END = '`'

BIT_START: Optional[str] = None

BIT_END: Optional[str] = None

HEX_START: Optional[str] = None

HEX_END: Optional[str] = None

BYTE_START: Optional[str] = None

BYTE_END: Optional[str] = None

UNICODE_START: Optional[str] = None

UNICODE_END: Optional[str] = None

Inherited Members

sqlglot.dialects.dialect.Dialect: Dialect; INDEX_OFFSET; WEEK_OFFSET; UNNEST_COLUMN_ONLY; TABLESAMPLE_SIZE_IS_PERCENT; DPIPE_IS_STRING_CONCAT; STRICT_STRING_CONCAT; SUPPORTS_SEMI_ANTI_JOIN; NORMALIZE_FUNCTIONS; LOG_BASE_FIRST; NULL_ORDERING; TYPED_DIVISION; CONCAT_COALESCE; HEX_LOWERCASE; FORMAT_MAPPING; PSEUDOCOLUMNS; PREFER_CTE_ALIAS_COLUMN; COPY_PARAMS_ARE_CSV; get_or_raise; format_time; normalize_identifier; case_sensitive; can_identify; quote_identifier; to_json_path; parse; parse_into; generate; transpile; tokenize; tokenizer; parser; generator

class Hive.Tokenizer(sqlglot.tokens.Tokenizer): View Source

236    class Tokenizer(tokens.Tokenizer):
237        QUOTES = ["'", '"']
238        IDENTIFIERS = ["`"]
239        STRING_ESCAPES = ["\\"]
240
241        SINGLE_TOKENS = {
242            **tokens.Tokenizer.SINGLE_TOKENS,
243            "$": TokenType.PARAMETER,
244        }
245
246        KEYWORDS = {
247            **tokens.Tokenizer.KEYWORDS,
248            "ADD ARCHIVE": TokenType.COMMAND,
249            "ADD ARCHIVES": TokenType.COMMAND,
250            "ADD FILE": TokenType.COMMAND,
251            "ADD FILES": TokenType.COMMAND,
252            "ADD JAR": TokenType.COMMAND,
253            "ADD JARS": TokenType.COMMAND,
254            "MSCK REPAIR": TokenType.COMMAND,
255            "REFRESH": TokenType.REFRESH,
256            "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT,
257            "VERSION AS OF": TokenType.VERSION_SNAPSHOT,
258            "SERDEPROPERTIES": TokenType.SERDE_PROPERTIES,
259        }
260
261        NUMERIC_LITERALS = {
262            "L": "BIGINT",
263            "S": "SMALLINT",
264            "Y": "TINYINT",
265            "D": "DOUBLE",
266            "F": "FLOAT",
267            "BD": "DECIMAL",
268        }

QUOTES = ["'", '"']

IDENTIFIERS = ['`']

STRING_ESCAPES = ['\\']

SINGLE_TOKENS = {'(': <TokenType.L_PAREN: 'L_PAREN'>, ')': <TokenType.R_PAREN: 'R_PAREN'>, '[': <TokenType.L_BRACKET: 'L_BRACKET'>, ']': <TokenType.R_BRACKET: 'R_BRACKET'>, '{': <TokenType.L_BRACE: 'L_BRACE'>, '}': <TokenType.R_BRACE: 'R_BRACE'>, '&': <TokenType.AMP: 'AMP'>, '^': <TokenType.CARET: 'CARET'>, ':': <TokenType.COLON: 'COLON'>, ',': <TokenType.COMMA: 'COMMA'>, '.': <TokenType.DOT: 'DOT'>, '-': <TokenType.DASH: 'DASH'>, '=': <TokenType.EQ: 'EQ'>, '>': <TokenType.GT: 'GT'>, '<': <TokenType.LT: 'LT'>, '%': <TokenType.MOD: 'MOD'>, '!': <TokenType.NOT: 'NOT'>, '|': <TokenType.PIPE: 'PIPE'>, '+': <TokenType.PLUS: 'PLUS'>, ';': <TokenType.SEMICOLON: 'SEMICOLON'>, '/': <TokenType.SLASH: 'SLASH'>, '\\': <TokenType.BACKSLASH: 'BACKSLASH'>, '*': <TokenType.STAR: 'STAR'>, '~': <TokenType.TILDA: 'TILDA'>, '?': <TokenType.PLACEHOLDER: 'PLACEHOLDER'>, '@': <TokenType.PARAMETER: 'PARAMETER'>, '#': <TokenType.HASH: 'HASH'>, "'": <TokenType.UNKNOWN: 'UNKNOWN'>, '`': <TokenType.UNKNOWN: 'UNKNOWN'>, '"': <TokenType.UNKNOWN: 'UNKNOWN'>, '$': <TokenType.PARAMETER: 'PARAMETER'>}

KEYWORDS = {'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, ':=': <TokenType.COLON_EQ: 'COLON_EQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, '??': <TokenType.DQMARK: 'DQMARK'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONNECT BY': <TokenType.CONNECT_BY: 'CONNECT_BY'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'COPY': <TokenType.COPY: 'COPY'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ENUM': <TokenType.ENUM: 'ENUM'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'KILL': <TokenType.KILL: 'KILL'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'START WITH': <TokenType.START_WITH: 'START_WITH'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'TRUNCATE': <TokenType.TRUNCATE: 'TRUNCATE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNKNOWN': <TokenType.UNKNOWN: 'UNKNOWN'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'MEDIUMINT': <TokenType.MEDIUMINT: 'MEDIUMINT'>, 'INT1': <TokenType.TINYINT: 'TINYINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'INT16': <TokenType.SMALLINT: 'SMALLINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT128': <TokenType.INT128: 'INT128'>, 'HUGEINT': <TokenType.INT128: 'INT128'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'INT32': <TokenType.INT: 'INT'>, 'INT64': <TokenType.BIGINT: 'BIGINT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.TINYINT: 'TINYINT'>, 'UINT': <TokenType.UINT: 'UINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'JSONB': <TokenType.JSONB: 'JSONB'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'BPCHAR': <TokenType.BPCHAR: 'BPCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'LONGTEXT': <TokenType.LONGTEXT: 'LONGTEXT'>, 'MEDIUMTEXT': <TokenType.MEDIUMTEXT: 'MEDIUMTEXT'>, 'TINYTEXT': <TokenType.TINYTEXT: 'TINYTEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'LONGBLOB': <TokenType.LONGBLOB: 'LONGBLOB'>, 'MEDIUMBLOB': <TokenType.MEDIUMBLOB: 'MEDIUMBLOB'>, 'TINYBLOB': <TokenType.TINYBLOB: 'TINYBLOB'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMETZ': <TokenType.TIMETZ: 'TIMETZ'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'TIMESTAMP_LTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'TIMESTAMPNTZ': <TokenType.TIMESTAMPNTZ: 'TIMESTAMPNTZ'>, 'TIMESTAMP_NTZ': <TokenType.TIMESTAMPNTZ: 'TIMESTAMPNTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'SEQUENCE': <TokenType.SEQUENCE: 'SEQUENCE'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'FOR VERSION': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'FOR TIMESTAMP': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'ADD ARCHIVE': <TokenType.COMMAND: 'COMMAND'>, 'ADD ARCHIVES': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILE': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILES': <TokenType.COMMAND: 'COMMAND'>, 'ADD JAR': <TokenType.COMMAND: 'COMMAND'>, 'ADD JARS': <TokenType.COMMAND: 'COMMAND'>, 'MSCK REPAIR': <TokenType.COMMAND: 'COMMAND'>, 'REFRESH': <TokenType.REFRESH: 'REFRESH'>, 'TIMESTAMP AS OF': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'VERSION AS OF': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'SERDEPROPERTIES': <TokenType.SERDE_PROPERTIES: 'SERDE_PROPERTIES'>}

NUMERIC_LITERALS = {'L': 'BIGINT', 'S': 'SMALLINT', 'Y': 'TINYINT', 'D': 'DOUBLE', 'F': 'FLOAT', 'BD': 'DECIMAL'}

Inherited Members

sqlglot.tokens.Tokenizer: Tokenizer; BIT_STRINGS; BYTE_STRINGS; HEX_STRINGS; RAW_STRINGS; HEREDOC_STRINGS; UNICODE_STRINGS; IDENTIFIER_ESCAPES; VAR_SINGLE_TOKENS; HEREDOC_TAG_IS_IDENTIFIER; HEREDOC_STRING_ALTERNATIVE; WHITE_SPACE; COMMANDS; COMMAND_PREFIX_TOKENS; COMMENTS; dialect; reset; tokenize; tokenize_rs; size; sql; tokens

class Hive.Parser(sqlglot.parser.Parser): View Source

270    class Parser(parser.Parser):
271        LOG_DEFAULTS_TO_LN = True
272        STRICT_CAST = False
273        VALUES_FOLLOWED_BY_PAREN = False
274
275        FUNCTIONS = {
276            **parser.Parser.FUNCTIONS,
277            "BASE64": exp.ToBase64.from_arg_list,
278            "COLLECT_LIST": exp.ArrayAgg.from_arg_list,
279            "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list,
280            "DATE_ADD": lambda args: exp.TsOrDsAdd(
281                this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY")
282            ),
283            "DATE_FORMAT": lambda args: build_formatted_time(exp.TimeToStr, "hive")(
284                [
285                    exp.TimeStrToTime(this=seq_get(args, 0)),
286                    seq_get(args, 1),
287                ]
288            ),
289            "DATE_SUB": lambda args: exp.TsOrDsAdd(
290                this=seq_get(args, 0),
291                expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)),
292                unit=exp.Literal.string("DAY"),
293            ),
294            "DATEDIFF": lambda args: exp.DateDiff(
295                this=exp.TsOrDsToDate(this=seq_get(args, 0)),
296                expression=exp.TsOrDsToDate(this=seq_get(args, 1)),
297            ),
298            "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))),
299            "FIRST": _build_with_ignore_nulls(exp.First),
300            "FIRST_VALUE": _build_with_ignore_nulls(exp.FirstValue),
301            "FROM_UNIXTIME": build_formatted_time(exp.UnixToStr, "hive", True),
302            "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list,
303            "LAST": _build_with_ignore_nulls(exp.Last),
304            "LAST_VALUE": _build_with_ignore_nulls(exp.LastValue),
305            "LOCATE": locate_to_strposition,
306            "MAP": parser.build_var_map,
307            "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)),
308            "PERCENTILE": exp.Quantile.from_arg_list,
309            "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list,
310            "REGEXP_EXTRACT": lambda args: exp.RegexpExtract(
311                this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2)
312            ),
313            "SIZE": exp.ArraySize.from_arg_list,
314            "SPLIT": exp.RegexpSplit.from_arg_list,
315            "STR_TO_MAP": lambda args: exp.StrToMap(
316                this=seq_get(args, 0),
317                pair_delim=seq_get(args, 1) or exp.Literal.string(","),
318                key_value_delim=seq_get(args, 2) or exp.Literal.string(":"),
319            ),
320            "TO_DATE": build_formatted_time(exp.TsOrDsToDate, "hive"),
321            "TO_JSON": exp.JSONFormat.from_arg_list,
322            "UNBASE64": exp.FromBase64.from_arg_list,
323            "UNIX_TIMESTAMP": lambda args: build_formatted_time(exp.StrToUnix, "hive", True)(
324                args or [exp.CurrentTimestamp()]
325            ),
326            "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)),
327        }
328
329        NO_PAREN_FUNCTION_PARSERS = {
330            **parser.Parser.NO_PAREN_FUNCTION_PARSERS,
331            "TRANSFORM": lambda self: self._parse_transform(),
332        }
333
334        PROPERTY_PARSERS = {
335            **parser.Parser.PROPERTY_PARSERS,
336            "SERDEPROPERTIES": lambda self: exp.SerdeProperties(
337                expressions=self._parse_wrapped_csv(self._parse_property)
338            ),
339        }
340
341        def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]:
342            if not self._match(TokenType.L_PAREN, advance=False):
343                self._retreat(self._index - 1)
344                return None
345
346            args = self._parse_wrapped_csv(self._parse_lambda)
347            row_format_before = self._parse_row_format(match_row=True)
348
349            record_writer = None
350            if self._match_text_seq("RECORDWRITER"):
351                record_writer = self._parse_string()
352
353            if not self._match(TokenType.USING):
354                return exp.Transform.from_arg_list(args)
355
356            command_script = self._parse_string()
357
358            self._match(TokenType.ALIAS)
359            schema = self._parse_schema()
360
361            row_format_after = self._parse_row_format(match_row=True)
362            record_reader = None
363            if self._match_text_seq("RECORDREADER"):
364                record_reader = self._parse_string()
365
366            return self.expression(
367                exp.QueryTransform,
368                expressions=args,
369                command_script=command_script,
370                schema=schema,
371                row_format_before=row_format_before,
372                record_writer=record_writer,
373                row_format_after=row_format_after,
374                record_reader=record_reader,
375            )
376
377        def _parse_types(
378            self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True
379        ) -> t.Optional[exp.Expression]:
380            """
381            Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to
382            STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0:
383
384                spark-sql (default)> select cast(1234 as varchar(2));
385                23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support
386                char/varchar type and simply treats them as string type. Please use string type
387                directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString
388                to true, so that Spark treat them as string type as same as Spark 3.0 and earlier
389
390                1234
391                Time taken: 4.265 seconds, Fetched 1 row(s)
392
393            This shows that Spark doesn't truncate the value into '12', which is inconsistent with
394            what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly.
395
396            Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html
397            """
398            this = super()._parse_types(
399                check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
400            )
401
402            if this and not schema:
403                return this.transform(
404                    lambda node: (
405                        node.replace(exp.DataType.build("text"))
406                        if isinstance(node, exp.DataType) and node.is_type("char", "varchar")
407                        else node
408                    ),
409                    copy=False,
410                )
411
412            return this
413
414        def _parse_partition_and_order(
415            self,
416        ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]:
417            return (
418                (
419                    self._parse_csv(self._parse_conjunction)
420                    if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY})
421                    else []
422                ),
423                super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)),
424            )
425
426        def _parse_parameter(self) -> exp.Parameter:
427            self._match(TokenType.L_BRACE)
428            this = self._parse_identifier() or self._parse_primary_or_var()
429            expression = self._match(TokenType.COLON) and (
430                self._parse_identifier() or self._parse_primary_or_var()
431            )
432            self._match(TokenType.R_BRACE)
433            return self.expression(exp.Parameter, this=this, expression=expression)

Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.

Arguments:

error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3

LOG_DEFAULTS_TO_LN = True

STRICT_CAST = False

VALUES_FOLLOWED_BY_PAREN = False

FUNCTIONS = {'ABS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Abs'>>, 'ADD_MONTHS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AddMonths'>>, 'ANONYMOUS_AGG_FUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnonymousAggFunc'>>, 'ANY_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnyValue'>>, 'APPROX_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_COUNT_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'APPROX_TOP_K': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxTopK'>>, 'ARG_MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARGMAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'MAX_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARG_MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARGMIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'MIN_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Array'>>, 'ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'ARRAY_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAll'>>, 'ARRAY_ANY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAny'>>, 'ARRAY_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CONSTRUCT_COMPACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConstructCompact'>>, 'ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContains'>>, 'ARRAY_HAS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContains'>>, 'ARRAY_CONTAINS_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContainsAll'>>, 'ARRAY_HAS_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContainsAll'>>, 'FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_OVERLAPS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayOverlaps'>>, 'ARRAY_SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_SORT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySort'>>, 'ARRAY_SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySum'>>, 'ARRAY_TO_STRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayToString'>>, 'ARRAY_JOIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayToString'>>, 'ARRAY_UNION_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUnionAgg'>>, 'ARRAY_UNIQUE_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUniqueAgg'>>, 'AVG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Avg'>>, 'CASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Case'>>, 'CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cast'>>, 'CAST_TO_STR_TYPE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CastToStrType'>>, 'CBRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cbrt'>>, 'CEIL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CEILING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CHR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'COALESCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'IFNULL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'NVL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'COLLATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Collate'>>, 'COMBINED_AGG_FUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CombinedAggFunc'>>, 'COMBINED_PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CombinedParameterizedAgg'>>, 'CONCAT': <function Parser.<lambda>>, 'CONCAT_WS': <function Parser.<lambda>>, 'CONNECT_BY_ROOT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ConnectByRoot'>>, 'CONVERT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Convert'>>, 'CORR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Corr'>>, 'COUNT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Count'>>, 'COUNT_IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'COUNTIF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'COVAR_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CovarPop'>>, 'COVAR_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CovarSamp'>>, 'CURRENT_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDate'>>, 'CURRENT_DATETIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDatetime'>>, 'CURRENT_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTime'>>, 'CURRENT_TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTimestamp'>>, 'CURRENT_USER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentUser'>>, 'DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Date'>>, 'DATE_ADD': <function Hive.Parser.<lambda>>, 'DATEDIFF': <function Hive.Parser.<lambda>>, 'DATE_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateDiff'>>, 'DATE_FROM_PARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATE_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateStrToDate'>>, 'DATE_SUB': <function Hive.Parser.<lambda>>, 'DATE_TO_DATE_STR': <function Parser.<lambda>>, 'DATE_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateToDi'>>, 'DATE_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateTrunc'>>, 'DATETIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeAdd'>>, 'DATETIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeDiff'>>, 'DATETIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeSub'>>, 'DATETIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeTrunc'>>, 'DAY': <function Hive.Parser.<lambda>>, 'DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAYOFMONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAY_OF_WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAYOFWEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAY_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DAYOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DECODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Decode'>>, 'DI_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DiToDate'>>, 'ENCODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Encode'>>, 'EXP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Exp'>>, 'EXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Explode'>>, 'EXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ExplodeOuter'>>, 'EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Extract'>>, 'FIRST': <function _build_with_ignore_nulls.<locals>._parse>, 'FIRST_VALUE': <function _build_with_ignore_nulls.<locals>._parse>, 'FLATTEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Flatten'>>, 'FLOOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Floor'>>, 'FROM_BASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase'>>, 'FROM_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'GENERATE_DATE_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GenerateDateArray'>>, 'GENERATE_SERIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GenerateSeries'>>, 'GREATEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Greatest'>>, 'GROUP_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GroupConcat'>>, 'HEX': <function build_hex>, 'HLL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hll'>>, 'IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'IIF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'INITCAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Initcap'>>, 'IS_INF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsInf'>>, 'ISINF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsInf'>>, 'IS_NAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'ISNAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'J_S_O_N_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArray'>>, 'J_S_O_N_ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayAgg'>>, 'JSON_ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayContains'>>, 'JSONB_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtract'>>, 'JSONB_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtractScalar'>>, 'JSON_EXTRACT': <function build_extract_json_with_path.<locals>._builder>, 'JSON_EXTRACT_SCALAR': <function build_extract_json_with_path.<locals>._builder>, 'JSON_FORMAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'J_S_O_N_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObject'>>, 'J_S_O_N_OBJECT_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObjectAgg'>>, 'J_S_O_N_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONTable'>>, 'LAG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lag'>>, 'LAST': <function _build_with_ignore_nulls.<locals>._parse>, 'LAST_DAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDay'>>, 'LAST_DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDay'>>, 'LAST_VALUE': <function _build_with_ignore_nulls.<locals>._parse>, 'LEAD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lead'>>, 'LEAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Least'>>, 'LEFT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Left'>>, 'LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEVENSHTEIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Levenshtein'>>, 'LN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ln'>>, 'LOG': <function build_logarithm>, 'LOGICAL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOLAND_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'LOGICAL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOLOR_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'LOWER': <function build_lower>, 'LCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'LOWER_HEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LowerHex'>>, 'MD5': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5'>>, 'MD5_DIGEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5Digest'>>, 'MAP': <function build_var_map>, 'MAP_FROM_ENTRIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MapFromEntries'>>, 'MATCH_AGAINST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MatchAgainst'>>, 'MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Max'>>, 'MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Min'>>, 'MONTH': <function Hive.Parser.<lambda>>, 'MONTHS_BETWEEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MonthsBetween'>>, 'NEXT_VALUE_FOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NextValueFor'>>, 'NTH_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NthValue'>>, 'NULLIF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nullif'>>, 'NUMBER_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NumberToStr'>>, 'NVL2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nvl2'>>, 'OPEN_J_S_O_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.OpenJSON'>>, 'PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParameterizedAgg'>>, 'PARSE_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'JSON_PARSE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'PERCENTILE_CONT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileCont'>>, 'PERCENTILE_DISC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileDisc'>>, 'POSEXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Posexplode'>>, 'POSEXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PosexplodeOuter'>>, 'POWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'POW': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'PREDICT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Predict'>>, 'QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'QUARTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quarter'>>, 'RAND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Rand'>>, 'RANDOM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Rand'>>, 'RANDN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Randn'>>, 'RANGE_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RangeN'>>, 'READ_CSV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ReadCSV'>>, 'REDUCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Reduce'>>, 'REGEXP_EXTRACT': <function Hive.Parser.<lambda>>, 'REGEXP_I_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpILike'>>, 'REGEXP_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpLike'>>, 'REGEXP_REPLACE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpReplace'>>, 'REGEXP_SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'REPEAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Repeat'>>, 'RIGHT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Right'>>, 'ROUND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Round'>>, 'ROW_NUMBER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RowNumber'>>, 'SHA': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA1': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA2'>>, 'SAFE_DIVIDE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeDivide'>>, 'SIGN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sign'>>, 'SIGNUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sign'>>, 'SORT_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SortArray'>>, 'SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'SQRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sqrt'>>, 'STANDARD_HASH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StandardHash'>>, 'STAR_MAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StarMap'>>, 'STARTS_WITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STARTSWITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STDDEV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stddev'>>, 'STDDEV_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevPop'>>, 'STDDEV_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevSamp'>>, 'STR_POSITION': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrPosition'>>, 'STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToDate'>>, 'STR_TO_MAP': <function Hive.Parser.<lambda>>, 'STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToTime'>>, 'STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToUnix'>>, 'STRING_TO_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StringToArray'>>, 'SPLIT_BY_STRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StringToArray'>>, 'STRUCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Struct'>>, 'STRUCT_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StructExtract'>>, 'STUFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'INSERT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'SUBSTRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Substring'>>, 'SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sum'>>, 'TIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeAdd'>>, 'TIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeDiff'>>, 'TIME_FROM_PARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeFromParts'>>, 'TIMEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeFromParts'>>, 'TIME_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToDate'>>, 'TIME_STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToTime'>>, 'TIME_STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToUnix'>>, 'TIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeSub'>>, 'TIME_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToStr'>>, 'TIME_TO_TIME_STR': <function Parser.<lambda>>, 'TIME_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToUnix'>>, 'TIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeTrunc'>>, 'TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Timestamp'>>, 'TIMESTAMP_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampAdd'>>, 'TIMESTAMPDIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_FROM_PARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampFromParts'>>, 'TIMESTAMPFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampFromParts'>>, 'TIMESTAMP_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampSub'>>, 'TIMESTAMP_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampTrunc'>>, 'TO_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToArray'>>, 'TO_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'TO_CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToChar'>>, 'TO_DAYS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToDays'>>, 'TO_MAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToMap'>>, 'TO_NUMBER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToNumber'>>, 'TRANSFORM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Transform'>>, 'TRIM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Trim'>>, 'TRY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Try'>>, 'TRY_CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TryCast'>>, 'TS_OR_DI_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDiToDi'>>, 'TS_OR_DS_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsAdd'>>, 'TS_OR_DS_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsDiff'>>, 'TS_OR_DS_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToDate'>>, 'TS_OR_DS_TO_DATE_STR': <function Parser.<lambda>>, 'TS_OR_DS_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToTime'>>, 'TS_OR_DS_TO_TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToTimestamp'>>, 'UNHEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Unhex'>>, 'UNIX_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixDate'>>, 'UNIX_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToStr'>>, 'UNIX_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTime'>>, 'UNIX_TO_TIME_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTimeStr'>>, 'UPPER': <function build_upper>, 'UCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'VAR_MAP': <function build_var_map>, 'VARIANCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VAR_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'VAR_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Week'>>, 'WEEK_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WEEKOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WHEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.When'>>, 'X_M_L_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.XMLTable'>>, 'XOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Xor'>>, 'YEAR': <function Hive.Parser.<lambda>>, 'GLOB': <function Parser.<lambda>>, 'JSON_EXTRACT_PATH_TEXT': <function build_extract_json_with_path.<locals>._builder>, 'LIKE': <function build_like>, 'LOG2': <function Parser.<lambda>>, 'LOG10': <function Parser.<lambda>>, 'MOD': <function build_mod>, 'TO_HEX': <function build_hex>, 'BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'COLLECT_LIST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'COLLECT_SET': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUniqueAgg'>>, 'DATE_FORMAT': <function Hive.Parser.<lambda>>, 'FROM_UNIXTIME': <function build_formatted_time.<locals>._builder>, 'GET_JSON_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'LOCATE': <function locate_to_strposition>, 'PERCENTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'PERCENTILE_APPROX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'TO_DATE': <function build_formatted_time.<locals>._builder>, 'TO_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'UNBASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'UNIX_TIMESTAMP': <function Hive.Parser.<lambda>>}

NO_PAREN_FUNCTION_PARSERS = {'ANY': <function Parser.<lambda>>, 'CASE': <function Parser.<lambda>>, 'IF': <function Parser.<lambda>>, 'NEXT': <function Parser.<lambda>>, 'TRANSFORM': <function Hive.Parser.<lambda>>}

PROPERTY_PARSERS = {'ALLOWED_VALUES': <function Parser.<lambda>>, 'ALGORITHM': <function Parser.<lambda>>, 'AUTO': <function Parser.<lambda>>, 'AUTO_INCREMENT': <function Parser.<lambda>>, 'BACKUP': <function Parser.<lambda>>, 'BLOCKCOMPRESSION': <function Parser.<lambda>>, 'CHARSET': <function Parser.<lambda>>, 'CHARACTER SET': <function Parser.<lambda>>, 'CHECKSUM': <function Parser.<lambda>>, 'CLUSTER BY': <function Parser.<lambda>>, 'CLUSTERED': <function Parser.<lambda>>, 'COLLATE': <function Parser.<lambda>>, 'COMMENT': <function Parser.<lambda>>, 'CONTAINS': <function Parser.<lambda>>, 'COPY': <function Parser.<lambda>>, 'DATABLOCKSIZE': <function Parser.<lambda>>, 'DATA_DELETION': <function Parser.<lambda>>, 'DEFINER': <function Parser.<lambda>>, 'DETERMINISTIC': <function Parser.<lambda>>, 'DISTKEY': <function Parser.<lambda>>, 'DISTSTYLE': <function Parser.<lambda>>, 'ENGINE': <function Parser.<lambda>>, 'EXECUTE': <function Parser.<lambda>>, 'EXTERNAL': <function Parser.<lambda>>, 'FALLBACK': <function Parser.<lambda>>, 'FORMAT': <function Parser.<lambda>>, 'FREESPACE': <function Parser.<lambda>>, 'GLOBAL': <function Parser.<lambda>>, 'HEAP': <function Parser.<lambda>>, 'ICEBERG': <function Parser.<lambda>>, 'IMMUTABLE': <function Parser.<lambda>>, 'INHERITS': <function Parser.<lambda>>, 'INPUT': <function Parser.<lambda>>, 'JOURNAL': <function Parser.<lambda>>, 'LANGUAGE': <function Parser.<lambda>>, 'LAYOUT': <function Parser.<lambda>>, 'LIFETIME': <function Parser.<lambda>>, 'LIKE': <function Parser.<lambda>>, 'LOCATION': <function Parser.<lambda>>, 'LOCK': <function Parser.<lambda>>, 'LOCKING': <function Parser.<lambda>>, 'LOG': <function Parser.<lambda>>, 'MATERIALIZED': <function Parser.<lambda>>, 'MERGEBLOCKRATIO': <function Parser.<lambda>>, 'MODIFIES': <function Parser.<lambda>>, 'MULTISET': <function Parser.<lambda>>, 'NO': <function Parser.<lambda>>, 'ON': <function Parser.<lambda>>, 'ORDER BY': <function Parser.<lambda>>, 'OUTPUT': <function Parser.<lambda>>, 'PARTITION': <function Parser.<lambda>>, 'PARTITION BY': <function Parser.<lambda>>, 'PARTITIONED BY': <function Parser.<lambda>>, 'PARTITIONED_BY': <function Parser.<lambda>>, 'PRIMARY KEY': <function Parser.<lambda>>, 'RANGE': <function Parser.<lambda>>, 'READS': <function Parser.<lambda>>, 'REMOTE': <function Parser.<lambda>>, 'RETURNS': <function Parser.<lambda>>, 'STRICT': <function Parser.<lambda>>, 'ROW': <function Parser.<lambda>>, 'ROW_FORMAT': <function Parser.<lambda>>, 'SAMPLE': <function Parser.<lambda>>, 'SET': <function Parser.<lambda>>, 'SETTINGS': <function Parser.<lambda>>, 'SHARING': <function Parser.<lambda>>, 'SORTKEY': <function Parser.<lambda>>, 'SOURCE': <function Parser.<lambda>>, 'STABLE': <function Parser.<lambda>>, 'STORED': <function Parser.<lambda>>, 'SYSTEM_VERSIONING': <function Parser.<lambda>>, 'TBLPROPERTIES': <function Parser.<lambda>>, 'TEMP': <function Parser.<lambda>>, 'TEMPORARY': <function Parser.<lambda>>, 'TO': <function Parser.<lambda>>, 'TRANSIENT': <function Parser.<lambda>>, 'TRANSFORM': <function Parser.<lambda>>, 'TTL': <function Parser.<lambda>>, 'USING': <function Parser.<lambda>>, 'UNLOGGED': <function Parser.<lambda>>, 'VOLATILE': <function Parser.<lambda>>, 'WITH': <function Parser.<lambda>>, 'SERDEPROPERTIES': <function Hive.Parser.<lambda>>}

SHOW_TRIE: Dict = {}

SET_TRIE: Dict = {'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}

Inherited Members

sqlglot.parser.Parser: Parser; NO_PAREN_FUNCTIONS; STRUCT_TYPE_TOKENS; NESTED_TYPE_TOKENS; ENUM_TYPE_TOKENS; AGGREGATE_TYPE_TOKENS; TYPE_TOKENS; SIGNED_TO_UNSIGNED_TYPE_TOKEN; SUBQUERY_PREDICATES; RESERVED_TOKENS; DB_CREATABLES; CREATABLES; ID_VAR_TOKENS; INTERVAL_VARS; TABLE_ALIAS_TOKENS; ALIAS_TOKENS; COMMENT_TABLE_ALIAS_TOKENS; UPDATE_ALIAS_TOKENS; TRIM_TYPES; FUNC_TOKENS; CONJUNCTION; EQUALITY; COMPARISON; BITWISE; TERM; FACTOR; EXPONENT; TIMES; TIMESTAMPS; SET_OPERATIONS; JOIN_METHODS; JOIN_SIDES; JOIN_KINDS; JOIN_HINTS; LAMBDAS; COLUMN_OPERATORS; EXPRESSION_PARSERS; STATEMENT_PARSERS; UNARY_PARSERS; STRING_PARSERS; NUMERIC_PARSERS; PRIMARY_PARSERS; PLACEHOLDER_PARSERS; RANGE_PARSERS; CONSTRAINT_PARSERS; ALTER_PARSERS; ALTER_ALTER_PARSERS; SCHEMA_UNNAMED_CONSTRAINTS; INVALID_FUNC_NAME_TOKENS; FUNCTIONS_WITH_ALIASED_ARGS; KEY_VALUE_DEFINITIONS; FUNCTION_PARSERS; QUERY_MODIFIER_PARSERS; SET_PARSERS; SHOW_PARSERS; TYPE_LITERAL_PARSERS; TYPE_CONVERTER; DDL_SELECT_TOKENS; PRE_VOLATILE_TOKENS; TRANSACTION_KIND; TRANSACTION_CHARACTERISTICS; CONFLICT_ACTIONS; CREATE_SEQUENCE; ISOLATED_LOADING_OPTIONS; USABLES; CAST_ACTIONS; INSERT_ALTERNATIVES; CLONE_KEYWORDS; HISTORICAL_DATA_KIND; OPCLASS_FOLLOW_KEYWORDS; OPTYPE_FOLLOW_TOKENS; TABLE_INDEX_HINT_TOKENS; VIEW_ATTRIBUTES; WINDOW_ALIAS_TOKENS; WINDOW_BEFORE_PAREN_TOKENS; WINDOW_SIDES; JSON_KEY_VALUE_SEPARATOR_TOKENS; FETCH_TOKENS; ADD_CONSTRAINT_TOKENS; DISTINCT_TOKENS; NULL_TOKENS; UNNEST_OFFSET_ALIAS_TOKENS; SELECT_START_TOKENS; PREFIXED_PIVOT_COLUMNS; IDENTIFY_PIVOT_STRINGS; ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN; TABLESAMPLE_CSV; DEFAULT_SAMPLING_METHOD; SET_REQUIRES_ASSIGNMENT_DELIMITER; TRIM_PATTERN_FIRST; STRING_ALIASES; MODIFIERS_ATTACHED_TO_UNION; UNION_MODIFIERS; NO_PAREN_IF_COMMANDS; JSON_ARROWS_REQUIRE_JSON_TYPE; COLON_IS_JSON_EXTRACT; SUPPORTS_IMPLICIT_UNNEST; INTERVAL_SPANS; SUPPORTS_PARTITION_SELECTION; error_level; error_message_context; max_errors; dialect; reset; parse; parse_into; check_errors; raise_error; expression; validate_expression; errors; sql