Edit on GitHub

sqlglot.dialects.dialect

  1from __future__ import annotations
  2
  3import typing as t
  4from enum import Enum
  5
  6from sqlglot import exp
  7from sqlglot.generator import Generator
  8from sqlglot.helper import flatten, seq_get
  9from sqlglot.parser import Parser
 10from sqlglot.time import format_time
 11from sqlglot.tokens import Tokenizer
 12from sqlglot.trie import new_trie
 13
 14E = t.TypeVar("E", bound=exp.Expression)
 15
 16
 17class Dialects(str, Enum):
 18    DIALECT = ""
 19
 20    BIGQUERY = "bigquery"
 21    CLICKHOUSE = "clickhouse"
 22    DUCKDB = "duckdb"
 23    HIVE = "hive"
 24    MYSQL = "mysql"
 25    ORACLE = "oracle"
 26    POSTGRES = "postgres"
 27    PRESTO = "presto"
 28    REDSHIFT = "redshift"
 29    SNOWFLAKE = "snowflake"
 30    SPARK = "spark"
 31    SQLITE = "sqlite"
 32    STARROCKS = "starrocks"
 33    TABLEAU = "tableau"
 34    TRINO = "trino"
 35    TSQL = "tsql"
 36    DATABRICKS = "databricks"
 37    DRILL = "drill"
 38    TERADATA = "teradata"
 39
 40
 41class _Dialect(type):
 42    classes: t.Dict[str, t.Type[Dialect]] = {}
 43
 44    @classmethod
 45    def __getitem__(cls, key: str) -> t.Type[Dialect]:
 46        return cls.classes[key]
 47
 48    @classmethod
 49    def get(
 50        cls, key: str, default: t.Optional[t.Type[Dialect]] = None
 51    ) -> t.Optional[t.Type[Dialect]]:
 52        return cls.classes.get(key, default)
 53
 54    def __new__(cls, clsname, bases, attrs):
 55        klass = super().__new__(cls, clsname, bases, attrs)
 56        enum = Dialects.__members__.get(clsname.upper())
 57        cls.classes[enum.value if enum is not None else clsname.lower()] = klass
 58
 59        klass.time_trie = new_trie(klass.time_mapping)
 60        klass.inverse_time_mapping = {v: k for k, v in klass.time_mapping.items()}
 61        klass.inverse_time_trie = new_trie(klass.inverse_time_mapping)
 62
 63        klass.tokenizer_class = getattr(klass, "Tokenizer", Tokenizer)
 64        klass.parser_class = getattr(klass, "Parser", Parser)
 65        klass.generator_class = getattr(klass, "Generator", Generator)
 66
 67        klass.quote_start, klass.quote_end = list(klass.tokenizer_class._QUOTES.items())[0]
 68        klass.identifier_start, klass.identifier_end = list(
 69            klass.tokenizer_class._IDENTIFIERS.items()
 70        )[0]
 71
 72        if (
 73            klass.tokenizer_class._BIT_STRINGS
 74            and exp.BitString not in klass.generator_class.TRANSFORMS
 75        ):
 76            bs_start, bs_end = list(klass.tokenizer_class._BIT_STRINGS.items())[0]
 77            klass.generator_class.TRANSFORMS[
 78                exp.BitString
 79            ] = lambda self, e: f"{bs_start}{int(self.sql(e, 'this')):b}{bs_end}"
 80        if (
 81            klass.tokenizer_class._HEX_STRINGS
 82            and exp.HexString not in klass.generator_class.TRANSFORMS
 83        ):
 84            hs_start, hs_end = list(klass.tokenizer_class._HEX_STRINGS.items())[0]
 85            klass.generator_class.TRANSFORMS[
 86                exp.HexString
 87            ] = lambda self, e: f"{hs_start}{int(self.sql(e, 'this')):X}{hs_end}"
 88        if (
 89            klass.tokenizer_class._BYTE_STRINGS
 90            and exp.ByteString not in klass.generator_class.TRANSFORMS
 91        ):
 92            be_start, be_end = list(klass.tokenizer_class._BYTE_STRINGS.items())[0]
 93            klass.generator_class.TRANSFORMS[
 94                exp.ByteString
 95            ] = lambda self, e: f"{be_start}{self.sql(e, 'this')}{be_end}"
 96
 97        return klass
 98
 99
100class Dialect(metaclass=_Dialect):
101    index_offset = 0
102    unnest_column_only = False
103    alias_post_tablesample = False
104    normalize_functions: t.Optional[str] = "upper"
105    null_ordering = "nulls_are_small"
106
107    date_format = "'%Y-%m-%d'"
108    dateint_format = "'%Y%m%d'"
109    time_format = "'%Y-%m-%d %H:%M:%S'"
110    time_mapping: t.Dict[str, str] = {}
111
112    # autofilled
113    quote_start = None
114    quote_end = None
115    identifier_start = None
116    identifier_end = None
117
118    time_trie = None
119    inverse_time_mapping = None
120    inverse_time_trie = None
121    tokenizer_class = None
122    parser_class = None
123    generator_class = None
124
125    @classmethod
126    def get_or_raise(cls, dialect: DialectType) -> t.Type[Dialect]:
127        if not dialect:
128            return cls
129        if isinstance(dialect, _Dialect):
130            return dialect
131        if isinstance(dialect, Dialect):
132            return dialect.__class__
133
134        result = cls.get(dialect)
135        if not result:
136            raise ValueError(f"Unknown dialect '{dialect}'")
137
138        return result
139
140    @classmethod
141    def format_time(
142        cls, expression: t.Optional[str | exp.Expression]
143    ) -> t.Optional[exp.Expression]:
144        if isinstance(expression, str):
145            return exp.Literal.string(
146                format_time(
147                    expression[1:-1],  # the time formats are quoted
148                    cls.time_mapping,
149                    cls.time_trie,
150                )
151            )
152        if expression and expression.is_string:
153            return exp.Literal.string(
154                format_time(
155                    expression.this,
156                    cls.time_mapping,
157                    cls.time_trie,
158                )
159            )
160        return expression
161
162    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
163        return self.parser(**opts).parse(self.tokenizer.tokenize(sql), sql)
164
165    def parse_into(
166        self, expression_type: exp.IntoType, sql: str, **opts
167    ) -> t.List[t.Optional[exp.Expression]]:
168        return self.parser(**opts).parse_into(expression_type, self.tokenizer.tokenize(sql), sql)
169
170    def generate(self, expression: t.Optional[exp.Expression], **opts) -> str:
171        return self.generator(**opts).generate(expression)
172
173    def transpile(self, sql: str, **opts) -> t.List[str]:
174        return [self.generate(expression, **opts) for expression in self.parse(sql)]
175
176    @property
177    def tokenizer(self) -> Tokenizer:
178        if not hasattr(self, "_tokenizer"):
179            self._tokenizer = self.tokenizer_class()  # type: ignore
180        return self._tokenizer
181
182    def parser(self, **opts) -> Parser:
183        return self.parser_class(  # type: ignore
184            **{
185                "index_offset": self.index_offset,
186                "unnest_column_only": self.unnest_column_only,
187                "alias_post_tablesample": self.alias_post_tablesample,
188                "null_ordering": self.null_ordering,
189                **opts,
190            },
191        )
192
193    def generator(self, **opts) -> Generator:
194        return self.generator_class(  # type: ignore
195            **{
196                "quote_start": self.quote_start,
197                "quote_end": self.quote_end,
198                "identifier_start": self.identifier_start,
199                "identifier_end": self.identifier_end,
200                "string_escape": self.tokenizer_class.STRING_ESCAPES[0],
201                "identifier_escape": self.tokenizer_class.IDENTIFIER_ESCAPES[0],
202                "index_offset": self.index_offset,
203                "time_mapping": self.inverse_time_mapping,
204                "time_trie": self.inverse_time_trie,
205                "unnest_column_only": self.unnest_column_only,
206                "alias_post_tablesample": self.alias_post_tablesample,
207                "normalize_functions": self.normalize_functions,
208                "null_ordering": self.null_ordering,
209                **opts,
210            }
211        )
212
213
214DialectType = t.Union[str, Dialect, t.Type[Dialect], None]
215
216
217def rename_func(name: str) -> t.Callable[[Generator, exp.Expression], str]:
218    def _rename(self, expression):
219        args = flatten(expression.args.values())
220        return f"{self.normalize_func(name)}({self.format_args(*args)})"
221
222    return _rename
223
224
225def approx_count_distinct_sql(self: Generator, expression: exp.ApproxDistinct) -> str:
226    if expression.args.get("accuracy"):
227        self.unsupported("APPROX_COUNT_DISTINCT does not support accuracy")
228    return f"APPROX_COUNT_DISTINCT({self.format_args(expression.this)})"
229
230
231def if_sql(self: Generator, expression: exp.If) -> str:
232    expressions = self.format_args(
233        expression.this, expression.args.get("true"), expression.args.get("false")
234    )
235    return f"IF({expressions})"
236
237
238def arrow_json_extract_sql(self: Generator, expression: exp.JSONExtract | exp.JSONBExtract) -> str:
239    return self.binary(expression, "->")
240
241
242def arrow_json_extract_scalar_sql(
243    self: Generator, expression: exp.JSONExtractScalar | exp.JSONBExtractScalar
244) -> str:
245    return self.binary(expression, "->>")
246
247
248def inline_array_sql(self: Generator, expression: exp.Array) -> str:
249    return f"[{self.expressions(expression)}]"
250
251
252def no_ilike_sql(self: Generator, expression: exp.ILike) -> str:
253    return self.like_sql(
254        exp.Like(
255            this=exp.Lower(this=expression.this),
256            expression=expression.args["expression"],
257        )
258    )
259
260
261def no_paren_current_date_sql(self: Generator, expression: exp.CurrentDate) -> str:
262    zone = self.sql(expression, "this")
263    return f"CURRENT_DATE AT TIME ZONE {zone}" if zone else "CURRENT_DATE"
264
265
266def no_recursive_cte_sql(self: Generator, expression: exp.With) -> str:
267    if expression.args.get("recursive"):
268        self.unsupported("Recursive CTEs are unsupported")
269        expression.args["recursive"] = False
270    return self.with_sql(expression)
271
272
273def no_safe_divide_sql(self: Generator, expression: exp.SafeDivide) -> str:
274    n = self.sql(expression, "this")
275    d = self.sql(expression, "expression")
276    return f"IF({d} <> 0, {n} / {d}, NULL)"
277
278
279def no_tablesample_sql(self: Generator, expression: exp.TableSample) -> str:
280    self.unsupported("TABLESAMPLE unsupported")
281    return self.sql(expression.this)
282
283
284def no_pivot_sql(self: Generator, expression: exp.Pivot) -> str:
285    self.unsupported("PIVOT unsupported")
286    return self.sql(expression)
287
288
289def no_trycast_sql(self: Generator, expression: exp.TryCast) -> str:
290    return self.cast_sql(expression)
291
292
293def no_properties_sql(self: Generator, expression: exp.Properties) -> str:
294    self.unsupported("Properties unsupported")
295    return ""
296
297
298def str_position_sql(self: Generator, expression: exp.StrPosition) -> str:
299    this = self.sql(expression, "this")
300    substr = self.sql(expression, "substr")
301    position = self.sql(expression, "position")
302    if position:
303        return f"STRPOS(SUBSTR({this}, {position}), {substr}) + {position} - 1"
304    return f"STRPOS({this}, {substr})"
305
306
307def struct_extract_sql(self: Generator, expression: exp.StructExtract) -> str:
308    this = self.sql(expression, "this")
309    struct_key = self.sql(exp.Identifier(this=expression.expression, quoted=True))
310    return f"{this}.{struct_key}"
311
312
313def var_map_sql(
314    self: Generator, expression: exp.Map | exp.VarMap, map_func_name: str = "MAP"
315) -> str:
316    keys = expression.args["keys"]
317    values = expression.args["values"]
318
319    if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array):
320        self.unsupported("Cannot convert array columns into map.")
321        return f"{map_func_name}({self.format_args(keys, values)})"
322
323    args = []
324    for key, value in zip(keys.expressions, values.expressions):
325        args.append(self.sql(key))
326        args.append(self.sql(value))
327    return f"{map_func_name}({self.format_args(*args)})"
328
329
330def format_time_lambda(
331    exp_class: t.Type[E], dialect: str, default: t.Optional[bool | str] = None
332) -> t.Callable[[t.Sequence], E]:
333    """Helper used for time expressions.
334
335    Args:
336        exp_class: the expression class to instantiate.
337        dialect: target sql dialect.
338        default: the default format, True being time.
339
340    Returns:
341        A callable that can be used to return the appropriately formatted time expression.
342    """
343
344    def _format_time(args: t.Sequence):
345        return exp_class(
346            this=seq_get(args, 0),
347            format=Dialect[dialect].format_time(
348                seq_get(args, 1)
349                or (Dialect[dialect].time_format if default is True else default or None)
350            ),
351        )
352
353    return _format_time
354
355
356def create_with_partitions_sql(self: Generator, expression: exp.Create) -> str:
357    """
358    In Hive and Spark, the PARTITIONED BY property acts as an extension of a table's schema. When the
359    PARTITIONED BY value is an array of column names, they are transformed into a schema. The corresponding
360    columns are removed from the create statement.
361    """
362    has_schema = isinstance(expression.this, exp.Schema)
363    is_partitionable = expression.args.get("kind") in ("TABLE", "VIEW")
364
365    if has_schema and is_partitionable:
366        expression = expression.copy()
367        prop = expression.find(exp.PartitionedByProperty)
368        this = prop and prop.this
369        if prop and not isinstance(this, exp.Schema):
370            schema = expression.this
371            columns = {v.name.upper() for v in this.expressions}
372            partitions = [col for col in schema.expressions if col.name.upper() in columns]
373            schema.set("expressions", [e for e in schema.expressions if e not in partitions])
374            prop.replace(exp.PartitionedByProperty(this=exp.Schema(expressions=partitions)))
375            expression.set("this", schema)
376
377    return self.create_sql(expression)
378
379
380def parse_date_delta(
381    exp_class: t.Type[E], unit_mapping: t.Optional[t.Dict[str, str]] = None
382) -> t.Callable[[t.Sequence], E]:
383    def inner_func(args: t.Sequence) -> E:
384        unit_based = len(args) == 3
385        this = seq_get(args, 2) if unit_based else seq_get(args, 0)
386        expression = seq_get(args, 1) if unit_based else seq_get(args, 1)
387        unit = seq_get(args, 0) if unit_based else exp.Literal.string("DAY")
388        unit = unit_mapping.get(unit.name.lower(), unit) if unit_mapping else unit  # type: ignore
389        return exp_class(this=this, expression=expression, unit=unit)
390
391    return inner_func
392
393
394def locate_to_strposition(args: t.Sequence) -> exp.Expression:
395    return exp.StrPosition(
396        this=seq_get(args, 1),
397        substr=seq_get(args, 0),
398        position=seq_get(args, 2),
399    )
400
401
402def strposition_to_locate_sql(self: Generator, expression: exp.StrPosition) -> str:
403    args = self.format_args(
404        expression.args.get("substr"), expression.this, expression.args.get("position")
405    )
406    return f"LOCATE({args})"
407
408
409def timestrtotime_sql(self: Generator, expression: exp.TimeStrToTime) -> str:
410    return f"CAST({self.sql(expression, 'this')} AS TIMESTAMP)"
411
412
413def datestrtodate_sql(self: Generator, expression: exp.DateStrToDate) -> str:
414    return f"CAST({self.sql(expression, 'this')} AS DATE)"
415
416
417def trim_sql(self: Generator, expression: exp.Trim) -> str:
418    target = self.sql(expression, "this")
419    trim_type = self.sql(expression, "position")
420    remove_chars = self.sql(expression, "expression")
421    collation = self.sql(expression, "collation")
422
423    # Use TRIM/LTRIM/RTRIM syntax if the expression isn't database-specific
424    if not remove_chars and not collation:
425        return self.trim_sql(expression)
426
427    trim_type = f"{trim_type} " if trim_type else ""
428    remove_chars = f"{remove_chars} " if remove_chars else ""
429    from_part = "FROM " if trim_type or remove_chars else ""
430    collation = f" COLLATE {collation}" if collation else ""
431    return f"TRIM({trim_type}{remove_chars}{from_part}{target}{collation})"
class Dialects(builtins.str, enum.Enum):
18class Dialects(str, Enum):
19    DIALECT = ""
20
21    BIGQUERY = "bigquery"
22    CLICKHOUSE = "clickhouse"
23    DUCKDB = "duckdb"
24    HIVE = "hive"
25    MYSQL = "mysql"
26    ORACLE = "oracle"
27    POSTGRES = "postgres"
28    PRESTO = "presto"
29    REDSHIFT = "redshift"
30    SNOWFLAKE = "snowflake"
31    SPARK = "spark"
32    SQLITE = "sqlite"
33    STARROCKS = "starrocks"
34    TABLEAU = "tableau"
35    TRINO = "trino"
36    TSQL = "tsql"
37    DATABRICKS = "databricks"
38    DRILL = "drill"
39    TERADATA = "teradata"

An enumeration.

DIALECT = <Dialects.DIALECT: ''>
BIGQUERY = <Dialects.BIGQUERY: 'bigquery'>
CLICKHOUSE = <Dialects.CLICKHOUSE: 'clickhouse'>
DUCKDB = <Dialects.DUCKDB: 'duckdb'>
HIVE = <Dialects.HIVE: 'hive'>
MYSQL = <Dialects.MYSQL: 'mysql'>
ORACLE = <Dialects.ORACLE: 'oracle'>
POSTGRES = <Dialects.POSTGRES: 'postgres'>
PRESTO = <Dialects.PRESTO: 'presto'>
REDSHIFT = <Dialects.REDSHIFT: 'redshift'>
SNOWFLAKE = <Dialects.SNOWFLAKE: 'snowflake'>
SPARK = <Dialects.SPARK: 'spark'>
SQLITE = <Dialects.SQLITE: 'sqlite'>
STARROCKS = <Dialects.STARROCKS: 'starrocks'>
TABLEAU = <Dialects.TABLEAU: 'tableau'>
TRINO = <Dialects.TRINO: 'trino'>
TSQL = <Dialects.TSQL: 'tsql'>
DATABRICKS = <Dialects.DATABRICKS: 'databricks'>
DRILL = <Dialects.DRILL: 'drill'>
TERADATA = <Dialects.TERADATA: 'teradata'>
Inherited Members
enum.Enum
name
value
builtins.str
encode
replace
split
rsplit
join
capitalize
casefold
title
center
count
expandtabs
find
partition
index
ljust
lower
lstrip
rfind
rindex
rjust
rstrip
rpartition
splitlines
strip
swapcase
translate
upper
startswith
endswith
removeprefix
removesuffix
isascii
islower
isupper
istitle
isspace
isdecimal
isdigit
isnumeric
isalpha
isalnum
isidentifier
isprintable
zfill
format
format_map
maketrans
class Dialect:
101class Dialect(metaclass=_Dialect):
102    index_offset = 0
103    unnest_column_only = False
104    alias_post_tablesample = False
105    normalize_functions: t.Optional[str] = "upper"
106    null_ordering = "nulls_are_small"
107
108    date_format = "'%Y-%m-%d'"
109    dateint_format = "'%Y%m%d'"
110    time_format = "'%Y-%m-%d %H:%M:%S'"
111    time_mapping: t.Dict[str, str] = {}
112
113    # autofilled
114    quote_start = None
115    quote_end = None
116    identifier_start = None
117    identifier_end = None
118
119    time_trie = None
120    inverse_time_mapping = None
121    inverse_time_trie = None
122    tokenizer_class = None
123    parser_class = None
124    generator_class = None
125
126    @classmethod
127    def get_or_raise(cls, dialect: DialectType) -> t.Type[Dialect]:
128        if not dialect:
129            return cls
130        if isinstance(dialect, _Dialect):
131            return dialect
132        if isinstance(dialect, Dialect):
133            return dialect.__class__
134
135        result = cls.get(dialect)
136        if not result:
137            raise ValueError(f"Unknown dialect '{dialect}'")
138
139        return result
140
141    @classmethod
142    def format_time(
143        cls, expression: t.Optional[str | exp.Expression]
144    ) -> t.Optional[exp.Expression]:
145        if isinstance(expression, str):
146            return exp.Literal.string(
147                format_time(
148                    expression[1:-1],  # the time formats are quoted
149                    cls.time_mapping,
150                    cls.time_trie,
151                )
152            )
153        if expression and expression.is_string:
154            return exp.Literal.string(
155                format_time(
156                    expression.this,
157                    cls.time_mapping,
158                    cls.time_trie,
159                )
160            )
161        return expression
162
163    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
164        return self.parser(**opts).parse(self.tokenizer.tokenize(sql), sql)
165
166    def parse_into(
167        self, expression_type: exp.IntoType, sql: str, **opts
168    ) -> t.List[t.Optional[exp.Expression]]:
169        return self.parser(**opts).parse_into(expression_type, self.tokenizer.tokenize(sql), sql)
170
171    def generate(self, expression: t.Optional[exp.Expression], **opts) -> str:
172        return self.generator(**opts).generate(expression)
173
174    def transpile(self, sql: str, **opts) -> t.List[str]:
175        return [self.generate(expression, **opts) for expression in self.parse(sql)]
176
177    @property
178    def tokenizer(self) -> Tokenizer:
179        if not hasattr(self, "_tokenizer"):
180            self._tokenizer = self.tokenizer_class()  # type: ignore
181        return self._tokenizer
182
183    def parser(self, **opts) -> Parser:
184        return self.parser_class(  # type: ignore
185            **{
186                "index_offset": self.index_offset,
187                "unnest_column_only": self.unnest_column_only,
188                "alias_post_tablesample": self.alias_post_tablesample,
189                "null_ordering": self.null_ordering,
190                **opts,
191            },
192        )
193
194    def generator(self, **opts) -> Generator:
195        return self.generator_class(  # type: ignore
196            **{
197                "quote_start": self.quote_start,
198                "quote_end": self.quote_end,
199                "identifier_start": self.identifier_start,
200                "identifier_end": self.identifier_end,
201                "string_escape": self.tokenizer_class.STRING_ESCAPES[0],
202                "identifier_escape": self.tokenizer_class.IDENTIFIER_ESCAPES[0],
203                "index_offset": self.index_offset,
204                "time_mapping": self.inverse_time_mapping,
205                "time_trie": self.inverse_time_trie,
206                "unnest_column_only": self.unnest_column_only,
207                "alias_post_tablesample": self.alias_post_tablesample,
208                "normalize_functions": self.normalize_functions,
209                "null_ordering": self.null_ordering,
210                **opts,
211            }
212        )
Dialect()
@classmethod
def get_or_raise( cls, dialect: Union[str, sqlglot.dialects.dialect.Dialect, Type[sqlglot.dialects.dialect.Dialect], NoneType]) -> Type[sqlglot.dialects.dialect.Dialect]:
126    @classmethod
127    def get_or_raise(cls, dialect: DialectType) -> t.Type[Dialect]:
128        if not dialect:
129            return cls
130        if isinstance(dialect, _Dialect):
131            return dialect
132        if isinstance(dialect, Dialect):
133            return dialect.__class__
134
135        result = cls.get(dialect)
136        if not result:
137            raise ValueError(f"Unknown dialect '{dialect}'")
138
139        return result
@classmethod
def format_time( cls, expression: Union[str, sqlglot.expressions.Expression, NoneType]) -> Optional[sqlglot.expressions.Expression]:
141    @classmethod
142    def format_time(
143        cls, expression: t.Optional[str | exp.Expression]
144    ) -> t.Optional[exp.Expression]:
145        if isinstance(expression, str):
146            return exp.Literal.string(
147                format_time(
148                    expression[1:-1],  # the time formats are quoted
149                    cls.time_mapping,
150                    cls.time_trie,
151                )
152            )
153        if expression and expression.is_string:
154            return exp.Literal.string(
155                format_time(
156                    expression.this,
157                    cls.time_mapping,
158                    cls.time_trie,
159                )
160            )
161        return expression
def parse(self, sql: str, **opts) -> List[Optional[sqlglot.expressions.Expression]]:
163    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
164        return self.parser(**opts).parse(self.tokenizer.tokenize(sql), sql)
def parse_into( self, expression_type: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]]], sql: str, **opts) -> List[Optional[sqlglot.expressions.Expression]]:
166    def parse_into(
167        self, expression_type: exp.IntoType, sql: str, **opts
168    ) -> t.List[t.Optional[exp.Expression]]:
169        return self.parser(**opts).parse_into(expression_type, self.tokenizer.tokenize(sql), sql)
def generate( self, expression: Optional[sqlglot.expressions.Expression], **opts) -> str:
171    def generate(self, expression: t.Optional[exp.Expression], **opts) -> str:
172        return self.generator(**opts).generate(expression)
def transpile(self, sql: str, **opts) -> List[str]:
174    def transpile(self, sql: str, **opts) -> t.List[str]:
175        return [self.generate(expression, **opts) for expression in self.parse(sql)]
def parser(self, **opts) -> sqlglot.parser.Parser:
183    def parser(self, **opts) -> Parser:
184        return self.parser_class(  # type: ignore
185            **{
186                "index_offset": self.index_offset,
187                "unnest_column_only": self.unnest_column_only,
188                "alias_post_tablesample": self.alias_post_tablesample,
189                "null_ordering": self.null_ordering,
190                **opts,
191            },
192        )
def generator(self, **opts) -> sqlglot.generator.Generator:
194    def generator(self, **opts) -> Generator:
195        return self.generator_class(  # type: ignore
196            **{
197                "quote_start": self.quote_start,
198                "quote_end": self.quote_end,
199                "identifier_start": self.identifier_start,
200                "identifier_end": self.identifier_end,
201                "string_escape": self.tokenizer_class.STRING_ESCAPES[0],
202                "identifier_escape": self.tokenizer_class.IDENTIFIER_ESCAPES[0],
203                "index_offset": self.index_offset,
204                "time_mapping": self.inverse_time_mapping,
205                "time_trie": self.inverse_time_trie,
206                "unnest_column_only": self.unnest_column_only,
207                "alias_post_tablesample": self.alias_post_tablesample,
208                "normalize_functions": self.normalize_functions,
209                "null_ordering": self.null_ordering,
210                **opts,
211            }
212        )
def rename_func( name: str) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.Expression], str]:
218def rename_func(name: str) -> t.Callable[[Generator, exp.Expression], str]:
219    def _rename(self, expression):
220        args = flatten(expression.args.values())
221        return f"{self.normalize_func(name)}({self.format_args(*args)})"
222
223    return _rename
def approx_count_distinct_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ApproxDistinct) -> str:
226def approx_count_distinct_sql(self: Generator, expression: exp.ApproxDistinct) -> str:
227    if expression.args.get("accuracy"):
228        self.unsupported("APPROX_COUNT_DISTINCT does not support accuracy")
229    return f"APPROX_COUNT_DISTINCT({self.format_args(expression.this)})"
def if_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.If) -> str:
232def if_sql(self: Generator, expression: exp.If) -> str:
233    expressions = self.format_args(
234        expression.this, expression.args.get("true"), expression.args.get("false")
235    )
236    return f"IF({expressions})"
def arrow_json_extract_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.JSONExtract | sqlglot.expressions.JSONBExtract) -> str:
239def arrow_json_extract_sql(self: Generator, expression: exp.JSONExtract | exp.JSONBExtract) -> str:
240    return self.binary(expression, "->")
def arrow_json_extract_scalar_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.JSONExtractScalar | sqlglot.expressions.JSONBExtractScalar) -> str:
243def arrow_json_extract_scalar_sql(
244    self: Generator, expression: exp.JSONExtractScalar | exp.JSONBExtractScalar
245) -> str:
246    return self.binary(expression, "->>")
def inline_array_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Array) -> str:
249def inline_array_sql(self: Generator, expression: exp.Array) -> str:
250    return f"[{self.expressions(expression)}]"
def no_ilike_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ILike) -> str:
253def no_ilike_sql(self: Generator, expression: exp.ILike) -> str:
254    return self.like_sql(
255        exp.Like(
256            this=exp.Lower(this=expression.this),
257            expression=expression.args["expression"],
258        )
259    )
def no_paren_current_date_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.CurrentDate) -> str:
262def no_paren_current_date_sql(self: Generator, expression: exp.CurrentDate) -> str:
263    zone = self.sql(expression, "this")
264    return f"CURRENT_DATE AT TIME ZONE {zone}" if zone else "CURRENT_DATE"
def no_recursive_cte_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.With) -> str:
267def no_recursive_cte_sql(self: Generator, expression: exp.With) -> str:
268    if expression.args.get("recursive"):
269        self.unsupported("Recursive CTEs are unsupported")
270        expression.args["recursive"] = False
271    return self.with_sql(expression)
def no_safe_divide_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.SafeDivide) -> str:
274def no_safe_divide_sql(self: Generator, expression: exp.SafeDivide) -> str:
275    n = self.sql(expression, "this")
276    d = self.sql(expression, "expression")
277    return f"IF({d} <> 0, {n} / {d}, NULL)"
def no_tablesample_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TableSample) -> str:
280def no_tablesample_sql(self: Generator, expression: exp.TableSample) -> str:
281    self.unsupported("TABLESAMPLE unsupported")
282    return self.sql(expression.this)
def no_pivot_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Pivot) -> str:
285def no_pivot_sql(self: Generator, expression: exp.Pivot) -> str:
286    self.unsupported("PIVOT unsupported")
287    return self.sql(expression)
def no_trycast_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TryCast) -> str:
290def no_trycast_sql(self: Generator, expression: exp.TryCast) -> str:
291    return self.cast_sql(expression)
def no_properties_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Properties) -> str:
294def no_properties_sql(self: Generator, expression: exp.Properties) -> str:
295    self.unsupported("Properties unsupported")
296    return ""
def str_position_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StrPosition) -> str:
299def str_position_sql(self: Generator, expression: exp.StrPosition) -> str:
300    this = self.sql(expression, "this")
301    substr = self.sql(expression, "substr")
302    position = self.sql(expression, "position")
303    if position:
304        return f"STRPOS(SUBSTR({this}, {position}), {substr}) + {position} - 1"
305    return f"STRPOS({this}, {substr})"
def struct_extract_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StructExtract) -> str:
308def struct_extract_sql(self: Generator, expression: exp.StructExtract) -> str:
309    this = self.sql(expression, "this")
310    struct_key = self.sql(exp.Identifier(this=expression.expression, quoted=True))
311    return f"{this}.{struct_key}"
def var_map_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Map | sqlglot.expressions.VarMap, map_func_name: str = 'MAP') -> str:
314def var_map_sql(
315    self: Generator, expression: exp.Map | exp.VarMap, map_func_name: str = "MAP"
316) -> str:
317    keys = expression.args["keys"]
318    values = expression.args["values"]
319
320    if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array):
321        self.unsupported("Cannot convert array columns into map.")
322        return f"{map_func_name}({self.format_args(keys, values)})"
323
324    args = []
325    for key, value in zip(keys.expressions, values.expressions):
326        args.append(self.sql(key))
327        args.append(self.sql(value))
328    return f"{map_func_name}({self.format_args(*args)})"
def format_time_lambda( exp_class: Type[~E], dialect: str, default: Union[bool, str, NoneType] = None) -> Callable[[Sequence], ~E]:
331def format_time_lambda(
332    exp_class: t.Type[E], dialect: str, default: t.Optional[bool | str] = None
333) -> t.Callable[[t.Sequence], E]:
334    """Helper used for time expressions.
335
336    Args:
337        exp_class: the expression class to instantiate.
338        dialect: target sql dialect.
339        default: the default format, True being time.
340
341    Returns:
342        A callable that can be used to return the appropriately formatted time expression.
343    """
344
345    def _format_time(args: t.Sequence):
346        return exp_class(
347            this=seq_get(args, 0),
348            format=Dialect[dialect].format_time(
349                seq_get(args, 1)
350                or (Dialect[dialect].time_format if default is True else default or None)
351            ),
352        )
353
354    return _format_time

Helper used for time expressions.

Arguments:
  • exp_class: the expression class to instantiate.
  • dialect: target sql dialect.
  • default: the default format, True being time.
Returns:

A callable that can be used to return the appropriately formatted time expression.

def create_with_partitions_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Create) -> str:
357def create_with_partitions_sql(self: Generator, expression: exp.Create) -> str:
358    """
359    In Hive and Spark, the PARTITIONED BY property acts as an extension of a table's schema. When the
360    PARTITIONED BY value is an array of column names, they are transformed into a schema. The corresponding
361    columns are removed from the create statement.
362    """
363    has_schema = isinstance(expression.this, exp.Schema)
364    is_partitionable = expression.args.get("kind") in ("TABLE", "VIEW")
365
366    if has_schema and is_partitionable:
367        expression = expression.copy()
368        prop = expression.find(exp.PartitionedByProperty)
369        this = prop and prop.this
370        if prop and not isinstance(this, exp.Schema):
371            schema = expression.this
372            columns = {v.name.upper() for v in this.expressions}
373            partitions = [col for col in schema.expressions if col.name.upper() in columns]
374            schema.set("expressions", [e for e in schema.expressions if e not in partitions])
375            prop.replace(exp.PartitionedByProperty(this=exp.Schema(expressions=partitions)))
376            expression.set("this", schema)
377
378    return self.create_sql(expression)

In Hive and Spark, the PARTITIONED BY property acts as an extension of a table's schema. When the PARTITIONED BY value is an array of column names, they are transformed into a schema. The corresponding columns are removed from the create statement.

def parse_date_delta( exp_class: Type[~E], unit_mapping: Optional[Dict[str, str]] = None) -> Callable[[Sequence], ~E]:
381def parse_date_delta(
382    exp_class: t.Type[E], unit_mapping: t.Optional[t.Dict[str, str]] = None
383) -> t.Callable[[t.Sequence], E]:
384    def inner_func(args: t.Sequence) -> E:
385        unit_based = len(args) == 3
386        this = seq_get(args, 2) if unit_based else seq_get(args, 0)
387        expression = seq_get(args, 1) if unit_based else seq_get(args, 1)
388        unit = seq_get(args, 0) if unit_based else exp.Literal.string("DAY")
389        unit = unit_mapping.get(unit.name.lower(), unit) if unit_mapping else unit  # type: ignore
390        return exp_class(this=this, expression=expression, unit=unit)
391
392    return inner_func
def locate_to_strposition(args: Sequence) -> sqlglot.expressions.Expression:
395def locate_to_strposition(args: t.Sequence) -> exp.Expression:
396    return exp.StrPosition(
397        this=seq_get(args, 1),
398        substr=seq_get(args, 0),
399        position=seq_get(args, 2),
400    )
def strposition_to_locate_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StrPosition) -> str:
403def strposition_to_locate_sql(self: Generator, expression: exp.StrPosition) -> str:
404    args = self.format_args(
405        expression.args.get("substr"), expression.this, expression.args.get("position")
406    )
407    return f"LOCATE({args})"
def timestrtotime_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TimeStrToTime) -> str:
410def timestrtotime_sql(self: Generator, expression: exp.TimeStrToTime) -> str:
411    return f"CAST({self.sql(expression, 'this')} AS TIMESTAMP)"
def datestrtodate_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.DateStrToDate) -> str:
414def datestrtodate_sql(self: Generator, expression: exp.DateStrToDate) -> str:
415    return f"CAST({self.sql(expression, 'this')} AS DATE)"
def trim_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Trim) -> str:
418def trim_sql(self: Generator, expression: exp.Trim) -> str:
419    target = self.sql(expression, "this")
420    trim_type = self.sql(expression, "position")
421    remove_chars = self.sql(expression, "expression")
422    collation = self.sql(expression, "collation")
423
424    # Use TRIM/LTRIM/RTRIM syntax if the expression isn't database-specific
425    if not remove_chars and not collation:
426        return self.trim_sql(expression)
427
428    trim_type = f"{trim_type} " if trim_type else ""
429    remove_chars = f"{remove_chars} " if remove_chars else ""
430    from_part = "FROM " if trim_type or remove_chars else ""
431    collation = f" COLLATE {collation}" if collation else ""
432    return f"TRIM({trim_type}{remove_chars}{from_part}{target}{collation})"