Edit on GitHub

sqlglot.dialects.dialect

  1from __future__ import annotations
  2
  3import typing as t
  4from enum import Enum
  5
  6from sqlglot import exp
  7from sqlglot.generator import Generator
  8from sqlglot.helper import flatten, seq_get
  9from sqlglot.parser import Parser
 10from sqlglot.time import format_time
 11from sqlglot.tokens import Tokenizer
 12from sqlglot.trie import new_trie
 13
 14E = t.TypeVar("E", bound=exp.Expression)
 15
 16
 17class Dialects(str, Enum):
 18    DIALECT = ""
 19
 20    BIGQUERY = "bigquery"
 21    CLICKHOUSE = "clickhouse"
 22    DUCKDB = "duckdb"
 23    HIVE = "hive"
 24    MYSQL = "mysql"
 25    ORACLE = "oracle"
 26    POSTGRES = "postgres"
 27    PRESTO = "presto"
 28    REDSHIFT = "redshift"
 29    SNOWFLAKE = "snowflake"
 30    SPARK = "spark"
 31    SQLITE = "sqlite"
 32    STARROCKS = "starrocks"
 33    TABLEAU = "tableau"
 34    TRINO = "trino"
 35    TSQL = "tsql"
 36    DATABRICKS = "databricks"
 37    DRILL = "drill"
 38    TERADATA = "teradata"
 39
 40
 41class _Dialect(type):
 42    classes: t.Dict[str, t.Type[Dialect]] = {}
 43
 44    @classmethod
 45    def __getitem__(cls, key: str) -> t.Type[Dialect]:
 46        return cls.classes[key]
 47
 48    @classmethod
 49    def get(
 50        cls, key: str, default: t.Optional[t.Type[Dialect]] = None
 51    ) -> t.Optional[t.Type[Dialect]]:
 52        return cls.classes.get(key, default)
 53
 54    def __new__(cls, clsname, bases, attrs):
 55        klass = super().__new__(cls, clsname, bases, attrs)
 56        enum = Dialects.__members__.get(clsname.upper())
 57        cls.classes[enum.value if enum is not None else clsname.lower()] = klass
 58
 59        klass.time_trie = new_trie(klass.time_mapping)
 60        klass.inverse_time_mapping = {v: k for k, v in klass.time_mapping.items()}
 61        klass.inverse_time_trie = new_trie(klass.inverse_time_mapping)
 62
 63        klass.tokenizer_class = getattr(klass, "Tokenizer", Tokenizer)
 64        klass.parser_class = getattr(klass, "Parser", Parser)
 65        klass.generator_class = getattr(klass, "Generator", Generator)
 66
 67        klass.quote_start, klass.quote_end = list(klass.tokenizer_class._QUOTES.items())[0]
 68        klass.identifier_start, klass.identifier_end = list(
 69            klass.tokenizer_class._IDENTIFIERS.items()
 70        )[0]
 71
 72        if (
 73            klass.tokenizer_class._BIT_STRINGS
 74            and exp.BitString not in klass.generator_class.TRANSFORMS
 75        ):
 76            bs_start, bs_end = list(klass.tokenizer_class._BIT_STRINGS.items())[0]
 77            klass.generator_class.TRANSFORMS[
 78                exp.BitString
 79            ] = lambda self, e: f"{bs_start}{int(self.sql(e, 'this')):b}{bs_end}"
 80        if (
 81            klass.tokenizer_class._HEX_STRINGS
 82            and exp.HexString not in klass.generator_class.TRANSFORMS
 83        ):
 84            hs_start, hs_end = list(klass.tokenizer_class._HEX_STRINGS.items())[0]
 85            klass.generator_class.TRANSFORMS[
 86                exp.HexString
 87            ] = lambda self, e: f"{hs_start}{int(self.sql(e, 'this')):X}{hs_end}"
 88        if (
 89            klass.tokenizer_class._BYTE_STRINGS
 90            and exp.ByteString not in klass.generator_class.TRANSFORMS
 91        ):
 92            be_start, be_end = list(klass.tokenizer_class._BYTE_STRINGS.items())[0]
 93            klass.generator_class.TRANSFORMS[
 94                exp.ByteString
 95            ] = lambda self, e: f"{be_start}{self.sql(e, 'this')}{be_end}"
 96
 97        return klass
 98
 99
100class Dialect(metaclass=_Dialect):
101    index_offset = 0
102    unnest_column_only = False
103    alias_post_tablesample = False
104    normalize_functions: t.Optional[str] = "upper"
105    null_ordering = "nulls_are_small"
106
107    date_format = "'%Y-%m-%d'"
108    dateint_format = "'%Y%m%d'"
109    time_format = "'%Y-%m-%d %H:%M:%S'"
110    time_mapping: t.Dict[str, str] = {}
111
112    # autofilled
113    quote_start = None
114    quote_end = None
115    identifier_start = None
116    identifier_end = None
117
118    time_trie = None
119    inverse_time_mapping = None
120    inverse_time_trie = None
121    tokenizer_class = None
122    parser_class = None
123    generator_class = None
124
125    @classmethod
126    def get_or_raise(cls, dialect: DialectType) -> t.Type[Dialect]:
127        if not dialect:
128            return cls
129        if isinstance(dialect, _Dialect):
130            return dialect
131        if isinstance(dialect, Dialect):
132            return dialect.__class__
133
134        result = cls.get(dialect)
135        if not result:
136            raise ValueError(f"Unknown dialect '{dialect}'")
137
138        return result
139
140    @classmethod
141    def format_time(
142        cls, expression: t.Optional[str | exp.Expression]
143    ) -> t.Optional[exp.Expression]:
144        if isinstance(expression, str):
145            return exp.Literal.string(
146                format_time(
147                    expression[1:-1],  # the time formats are quoted
148                    cls.time_mapping,
149                    cls.time_trie,
150                )
151            )
152        if expression and expression.is_string:
153            return exp.Literal.string(
154                format_time(
155                    expression.this,
156                    cls.time_mapping,
157                    cls.time_trie,
158                )
159            )
160        return expression
161
162    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
163        return self.parser(**opts).parse(self.tokenizer.tokenize(sql), sql)
164
165    def parse_into(
166        self, expression_type: exp.IntoType, sql: str, **opts
167    ) -> t.List[t.Optional[exp.Expression]]:
168        return self.parser(**opts).parse_into(expression_type, self.tokenizer.tokenize(sql), sql)
169
170    def generate(self, expression: t.Optional[exp.Expression], **opts) -> str:
171        return self.generator(**opts).generate(expression)
172
173    def transpile(self, sql: str, **opts) -> t.List[str]:
174        return [self.generate(expression, **opts) for expression in self.parse(sql)]
175
176    @property
177    def tokenizer(self) -> Tokenizer:
178        if not hasattr(self, "_tokenizer"):
179            self._tokenizer = self.tokenizer_class()  # type: ignore
180        return self._tokenizer
181
182    def parser(self, **opts) -> Parser:
183        return self.parser_class(  # type: ignore
184            **{
185                "index_offset": self.index_offset,
186                "unnest_column_only": self.unnest_column_only,
187                "alias_post_tablesample": self.alias_post_tablesample,
188                "null_ordering": self.null_ordering,
189                **opts,
190            },
191        )
192
193    def generator(self, **opts) -> Generator:
194        return self.generator_class(  # type: ignore
195            **{
196                "quote_start": self.quote_start,
197                "quote_end": self.quote_end,
198                "identifier_start": self.identifier_start,
199                "identifier_end": self.identifier_end,
200                "string_escape": self.tokenizer_class.STRING_ESCAPES[0],
201                "identifier_escape": self.tokenizer_class.IDENTIFIER_ESCAPES[0],
202                "index_offset": self.index_offset,
203                "time_mapping": self.inverse_time_mapping,
204                "time_trie": self.inverse_time_trie,
205                "unnest_column_only": self.unnest_column_only,
206                "alias_post_tablesample": self.alias_post_tablesample,
207                "normalize_functions": self.normalize_functions,
208                "null_ordering": self.null_ordering,
209                **opts,
210            }
211        )
212
213
214DialectType = t.Union[str, Dialect, t.Type[Dialect], None]
215
216
217def rename_func(name: str) -> t.Callable[[Generator, exp.Expression], str]:
218    return lambda self, expression: self.func(name, *flatten(expression.args.values()))
219
220
221def approx_count_distinct_sql(self: Generator, expression: exp.ApproxDistinct) -> str:
222    if expression.args.get("accuracy"):
223        self.unsupported("APPROX_COUNT_DISTINCT does not support accuracy")
224    return self.func("APPROX_COUNT_DISTINCT", expression.this)
225
226
227def if_sql(self: Generator, expression: exp.If) -> str:
228    return self.func(
229        "IF", expression.this, expression.args.get("true"), expression.args.get("false")
230    )
231
232
233def arrow_json_extract_sql(self: Generator, expression: exp.JSONExtract | exp.JSONBExtract) -> str:
234    return self.binary(expression, "->")
235
236
237def arrow_json_extract_scalar_sql(
238    self: Generator, expression: exp.JSONExtractScalar | exp.JSONBExtractScalar
239) -> str:
240    return self.binary(expression, "->>")
241
242
243def inline_array_sql(self: Generator, expression: exp.Array) -> str:
244    return f"[{self.expressions(expression)}]"
245
246
247def no_ilike_sql(self: Generator, expression: exp.ILike) -> str:
248    return self.like_sql(
249        exp.Like(
250            this=exp.Lower(this=expression.this),
251            expression=expression.args["expression"],
252        )
253    )
254
255
256def no_paren_current_date_sql(self: Generator, expression: exp.CurrentDate) -> str:
257    zone = self.sql(expression, "this")
258    return f"CURRENT_DATE AT TIME ZONE {zone}" if zone else "CURRENT_DATE"
259
260
261def no_recursive_cte_sql(self: Generator, expression: exp.With) -> str:
262    if expression.args.get("recursive"):
263        self.unsupported("Recursive CTEs are unsupported")
264        expression.args["recursive"] = False
265    return self.with_sql(expression)
266
267
268def no_safe_divide_sql(self: Generator, expression: exp.SafeDivide) -> str:
269    n = self.sql(expression, "this")
270    d = self.sql(expression, "expression")
271    return f"IF({d} <> 0, {n} / {d}, NULL)"
272
273
274def no_tablesample_sql(self: Generator, expression: exp.TableSample) -> str:
275    self.unsupported("TABLESAMPLE unsupported")
276    return self.sql(expression.this)
277
278
279def no_pivot_sql(self: Generator, expression: exp.Pivot) -> str:
280    self.unsupported("PIVOT unsupported")
281    return self.sql(expression)
282
283
284def no_trycast_sql(self: Generator, expression: exp.TryCast) -> str:
285    return self.cast_sql(expression)
286
287
288def no_properties_sql(self: Generator, expression: exp.Properties) -> str:
289    self.unsupported("Properties unsupported")
290    return ""
291
292
293def str_position_sql(self: Generator, expression: exp.StrPosition) -> str:
294    this = self.sql(expression, "this")
295    substr = self.sql(expression, "substr")
296    position = self.sql(expression, "position")
297    if position:
298        return f"STRPOS(SUBSTR({this}, {position}), {substr}) + {position} - 1"
299    return f"STRPOS({this}, {substr})"
300
301
302def struct_extract_sql(self: Generator, expression: exp.StructExtract) -> str:
303    this = self.sql(expression, "this")
304    struct_key = self.sql(exp.Identifier(this=expression.expression, quoted=True))
305    return f"{this}.{struct_key}"
306
307
308def var_map_sql(
309    self: Generator, expression: exp.Map | exp.VarMap, map_func_name: str = "MAP"
310) -> str:
311    keys = expression.args["keys"]
312    values = expression.args["values"]
313
314    if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array):
315        self.unsupported("Cannot convert array columns into map.")
316        return self.func(map_func_name, keys, values)
317
318    args = []
319    for key, value in zip(keys.expressions, values.expressions):
320        args.append(self.sql(key))
321        args.append(self.sql(value))
322    return self.func(map_func_name, *args)
323
324
325def format_time_lambda(
326    exp_class: t.Type[E], dialect: str, default: t.Optional[bool | str] = None
327) -> t.Callable[[t.Sequence], E]:
328    """Helper used for time expressions.
329
330    Args:
331        exp_class: the expression class to instantiate.
332        dialect: target sql dialect.
333        default: the default format, True being time.
334
335    Returns:
336        A callable that can be used to return the appropriately formatted time expression.
337    """
338
339    def _format_time(args: t.Sequence):
340        return exp_class(
341            this=seq_get(args, 0),
342            format=Dialect[dialect].format_time(
343                seq_get(args, 1)
344                or (Dialect[dialect].time_format if default is True else default or None)
345            ),
346        )
347
348    return _format_time
349
350
351def create_with_partitions_sql(self: Generator, expression: exp.Create) -> str:
352    """
353    In Hive and Spark, the PARTITIONED BY property acts as an extension of a table's schema. When the
354    PARTITIONED BY value is an array of column names, they are transformed into a schema. The corresponding
355    columns are removed from the create statement.
356    """
357    has_schema = isinstance(expression.this, exp.Schema)
358    is_partitionable = expression.args.get("kind") in ("TABLE", "VIEW")
359
360    if has_schema and is_partitionable:
361        expression = expression.copy()
362        prop = expression.find(exp.PartitionedByProperty)
363        this = prop and prop.this
364        if prop and not isinstance(this, exp.Schema):
365            schema = expression.this
366            columns = {v.name.upper() for v in this.expressions}
367            partitions = [col for col in schema.expressions if col.name.upper() in columns]
368            schema.set("expressions", [e for e in schema.expressions if e not in partitions])
369            prop.replace(exp.PartitionedByProperty(this=exp.Schema(expressions=partitions)))
370            expression.set("this", schema)
371
372    return self.create_sql(expression)
373
374
375def parse_date_delta(
376    exp_class: t.Type[E], unit_mapping: t.Optional[t.Dict[str, str]] = None
377) -> t.Callable[[t.Sequence], E]:
378    def inner_func(args: t.Sequence) -> E:
379        unit_based = len(args) == 3
380        this = seq_get(args, 2) if unit_based else seq_get(args, 0)
381        expression = seq_get(args, 1) if unit_based else seq_get(args, 1)
382        unit = seq_get(args, 0) if unit_based else exp.Literal.string("DAY")
383        unit = unit_mapping.get(unit.name.lower(), unit) if unit_mapping else unit  # type: ignore
384        return exp_class(this=this, expression=expression, unit=unit)
385
386    return inner_func
387
388
389def locate_to_strposition(args: t.Sequence) -> exp.Expression:
390    return exp.StrPosition(
391        this=seq_get(args, 1),
392        substr=seq_get(args, 0),
393        position=seq_get(args, 2),
394    )
395
396
397def strposition_to_locate_sql(self: Generator, expression: exp.StrPosition) -> str:
398    return self.func(
399        "LOCATE", expression.args.get("substr"), expression.this, expression.args.get("position")
400    )
401
402
403def timestrtotime_sql(self: Generator, expression: exp.TimeStrToTime) -> str:
404    return f"CAST({self.sql(expression, 'this')} AS TIMESTAMP)"
405
406
407def datestrtodate_sql(self: Generator, expression: exp.DateStrToDate) -> str:
408    return f"CAST({self.sql(expression, 'this')} AS DATE)"
409
410
411def trim_sql(self: Generator, expression: exp.Trim) -> str:
412    target = self.sql(expression, "this")
413    trim_type = self.sql(expression, "position")
414    remove_chars = self.sql(expression, "expression")
415    collation = self.sql(expression, "collation")
416
417    # Use TRIM/LTRIM/RTRIM syntax if the expression isn't database-specific
418    if not remove_chars and not collation:
419        return self.trim_sql(expression)
420
421    trim_type = f"{trim_type} " if trim_type else ""
422    remove_chars = f"{remove_chars} " if remove_chars else ""
423    from_part = "FROM " if trim_type or remove_chars else ""
424    collation = f" COLLATE {collation}" if collation else ""
425    return f"TRIM({trim_type}{remove_chars}{from_part}{target}{collation})"
class Dialects(builtins.str, enum.Enum):
18class Dialects(str, Enum):
19    DIALECT = ""
20
21    BIGQUERY = "bigquery"
22    CLICKHOUSE = "clickhouse"
23    DUCKDB = "duckdb"
24    HIVE = "hive"
25    MYSQL = "mysql"
26    ORACLE = "oracle"
27    POSTGRES = "postgres"
28    PRESTO = "presto"
29    REDSHIFT = "redshift"
30    SNOWFLAKE = "snowflake"
31    SPARK = "spark"
32    SQLITE = "sqlite"
33    STARROCKS = "starrocks"
34    TABLEAU = "tableau"
35    TRINO = "trino"
36    TSQL = "tsql"
37    DATABRICKS = "databricks"
38    DRILL = "drill"
39    TERADATA = "teradata"

An enumeration.

DIALECT = <Dialects.DIALECT: ''>
BIGQUERY = <Dialects.BIGQUERY: 'bigquery'>
CLICKHOUSE = <Dialects.CLICKHOUSE: 'clickhouse'>
DUCKDB = <Dialects.DUCKDB: 'duckdb'>
HIVE = <Dialects.HIVE: 'hive'>
MYSQL = <Dialects.MYSQL: 'mysql'>
ORACLE = <Dialects.ORACLE: 'oracle'>
POSTGRES = <Dialects.POSTGRES: 'postgres'>
PRESTO = <Dialects.PRESTO: 'presto'>
REDSHIFT = <Dialects.REDSHIFT: 'redshift'>
SNOWFLAKE = <Dialects.SNOWFLAKE: 'snowflake'>
SPARK = <Dialects.SPARK: 'spark'>
SQLITE = <Dialects.SQLITE: 'sqlite'>
STARROCKS = <Dialects.STARROCKS: 'starrocks'>
TABLEAU = <Dialects.TABLEAU: 'tableau'>
TRINO = <Dialects.TRINO: 'trino'>
TSQL = <Dialects.TSQL: 'tsql'>
DATABRICKS = <Dialects.DATABRICKS: 'databricks'>
DRILL = <Dialects.DRILL: 'drill'>
TERADATA = <Dialects.TERADATA: 'teradata'>
Inherited Members
enum.Enum
name
value
builtins.str
encode
replace
split
rsplit
join
capitalize
casefold
title
center
count
expandtabs
find
partition
index
ljust
lower
lstrip
rfind
rindex
rjust
rstrip
rpartition
splitlines
strip
swapcase
translate
upper
startswith
endswith
removeprefix
removesuffix
isascii
islower
isupper
istitle
isspace
isdecimal
isdigit
isnumeric
isalpha
isalnum
isidentifier
isprintable
zfill
format
format_map
maketrans
class Dialect:
101class Dialect(metaclass=_Dialect):
102    index_offset = 0
103    unnest_column_only = False
104    alias_post_tablesample = False
105    normalize_functions: t.Optional[str] = "upper"
106    null_ordering = "nulls_are_small"
107
108    date_format = "'%Y-%m-%d'"
109    dateint_format = "'%Y%m%d'"
110    time_format = "'%Y-%m-%d %H:%M:%S'"
111    time_mapping: t.Dict[str, str] = {}
112
113    # autofilled
114    quote_start = None
115    quote_end = None
116    identifier_start = None
117    identifier_end = None
118
119    time_trie = None
120    inverse_time_mapping = None
121    inverse_time_trie = None
122    tokenizer_class = None
123    parser_class = None
124    generator_class = None
125
126    @classmethod
127    def get_or_raise(cls, dialect: DialectType) -> t.Type[Dialect]:
128        if not dialect:
129            return cls
130        if isinstance(dialect, _Dialect):
131            return dialect
132        if isinstance(dialect, Dialect):
133            return dialect.__class__
134
135        result = cls.get(dialect)
136        if not result:
137            raise ValueError(f"Unknown dialect '{dialect}'")
138
139        return result
140
141    @classmethod
142    def format_time(
143        cls, expression: t.Optional[str | exp.Expression]
144    ) -> t.Optional[exp.Expression]:
145        if isinstance(expression, str):
146            return exp.Literal.string(
147                format_time(
148                    expression[1:-1],  # the time formats are quoted
149                    cls.time_mapping,
150                    cls.time_trie,
151                )
152            )
153        if expression and expression.is_string:
154            return exp.Literal.string(
155                format_time(
156                    expression.this,
157                    cls.time_mapping,
158                    cls.time_trie,
159                )
160            )
161        return expression
162
163    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
164        return self.parser(**opts).parse(self.tokenizer.tokenize(sql), sql)
165
166    def parse_into(
167        self, expression_type: exp.IntoType, sql: str, **opts
168    ) -> t.List[t.Optional[exp.Expression]]:
169        return self.parser(**opts).parse_into(expression_type, self.tokenizer.tokenize(sql), sql)
170
171    def generate(self, expression: t.Optional[exp.Expression], **opts) -> str:
172        return self.generator(**opts).generate(expression)
173
174    def transpile(self, sql: str, **opts) -> t.List[str]:
175        return [self.generate(expression, **opts) for expression in self.parse(sql)]
176
177    @property
178    def tokenizer(self) -> Tokenizer:
179        if not hasattr(self, "_tokenizer"):
180            self._tokenizer = self.tokenizer_class()  # type: ignore
181        return self._tokenizer
182
183    def parser(self, **opts) -> Parser:
184        return self.parser_class(  # type: ignore
185            **{
186                "index_offset": self.index_offset,
187                "unnest_column_only": self.unnest_column_only,
188                "alias_post_tablesample": self.alias_post_tablesample,
189                "null_ordering": self.null_ordering,
190                **opts,
191            },
192        )
193
194    def generator(self, **opts) -> Generator:
195        return self.generator_class(  # type: ignore
196            **{
197                "quote_start": self.quote_start,
198                "quote_end": self.quote_end,
199                "identifier_start": self.identifier_start,
200                "identifier_end": self.identifier_end,
201                "string_escape": self.tokenizer_class.STRING_ESCAPES[0],
202                "identifier_escape": self.tokenizer_class.IDENTIFIER_ESCAPES[0],
203                "index_offset": self.index_offset,
204                "time_mapping": self.inverse_time_mapping,
205                "time_trie": self.inverse_time_trie,
206                "unnest_column_only": self.unnest_column_only,
207                "alias_post_tablesample": self.alias_post_tablesample,
208                "normalize_functions": self.normalize_functions,
209                "null_ordering": self.null_ordering,
210                **opts,
211            }
212        )
Dialect()
@classmethod
def get_or_raise( cls, dialect: Union[str, sqlglot.dialects.dialect.Dialect, Type[sqlglot.dialects.dialect.Dialect], NoneType]) -> Type[sqlglot.dialects.dialect.Dialect]:
126    @classmethod
127    def get_or_raise(cls, dialect: DialectType) -> t.Type[Dialect]:
128        if not dialect:
129            return cls
130        if isinstance(dialect, _Dialect):
131            return dialect
132        if isinstance(dialect, Dialect):
133            return dialect.__class__
134
135        result = cls.get(dialect)
136        if not result:
137            raise ValueError(f"Unknown dialect '{dialect}'")
138
139        return result
@classmethod
def format_time( cls, expression: Union[str, sqlglot.expressions.Expression, NoneType]) -> Optional[sqlglot.expressions.Expression]:
141    @classmethod
142    def format_time(
143        cls, expression: t.Optional[str | exp.Expression]
144    ) -> t.Optional[exp.Expression]:
145        if isinstance(expression, str):
146            return exp.Literal.string(
147                format_time(
148                    expression[1:-1],  # the time formats are quoted
149                    cls.time_mapping,
150                    cls.time_trie,
151                )
152            )
153        if expression and expression.is_string:
154            return exp.Literal.string(
155                format_time(
156                    expression.this,
157                    cls.time_mapping,
158                    cls.time_trie,
159                )
160            )
161        return expression
def parse(self, sql: str, **opts) -> List[Optional[sqlglot.expressions.Expression]]:
163    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
164        return self.parser(**opts).parse(self.tokenizer.tokenize(sql), sql)
def parse_into( self, expression_type: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]]], sql: str, **opts) -> List[Optional[sqlglot.expressions.Expression]]:
166    def parse_into(
167        self, expression_type: exp.IntoType, sql: str, **opts
168    ) -> t.List[t.Optional[exp.Expression]]:
169        return self.parser(**opts).parse_into(expression_type, self.tokenizer.tokenize(sql), sql)
def generate( self, expression: Optional[sqlglot.expressions.Expression], **opts) -> str:
171    def generate(self, expression: t.Optional[exp.Expression], **opts) -> str:
172        return self.generator(**opts).generate(expression)
def transpile(self, sql: str, **opts) -> List[str]:
174    def transpile(self, sql: str, **opts) -> t.List[str]:
175        return [self.generate(expression, **opts) for expression in self.parse(sql)]
def parser(self, **opts) -> sqlglot.parser.Parser:
183    def parser(self, **opts) -> Parser:
184        return self.parser_class(  # type: ignore
185            **{
186                "index_offset": self.index_offset,
187                "unnest_column_only": self.unnest_column_only,
188                "alias_post_tablesample": self.alias_post_tablesample,
189                "null_ordering": self.null_ordering,
190                **opts,
191            },
192        )
def generator(self, **opts) -> sqlglot.generator.Generator:
194    def generator(self, **opts) -> Generator:
195        return self.generator_class(  # type: ignore
196            **{
197                "quote_start": self.quote_start,
198                "quote_end": self.quote_end,
199                "identifier_start": self.identifier_start,
200                "identifier_end": self.identifier_end,
201                "string_escape": self.tokenizer_class.STRING_ESCAPES[0],
202                "identifier_escape": self.tokenizer_class.IDENTIFIER_ESCAPES[0],
203                "index_offset": self.index_offset,
204                "time_mapping": self.inverse_time_mapping,
205                "time_trie": self.inverse_time_trie,
206                "unnest_column_only": self.unnest_column_only,
207                "alias_post_tablesample": self.alias_post_tablesample,
208                "normalize_functions": self.normalize_functions,
209                "null_ordering": self.null_ordering,
210                **opts,
211            }
212        )
def rename_func( name: str) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.Expression], str]:
218def rename_func(name: str) -> t.Callable[[Generator, exp.Expression], str]:
219    return lambda self, expression: self.func(name, *flatten(expression.args.values()))
def approx_count_distinct_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ApproxDistinct) -> str:
222def approx_count_distinct_sql(self: Generator, expression: exp.ApproxDistinct) -> str:
223    if expression.args.get("accuracy"):
224        self.unsupported("APPROX_COUNT_DISTINCT does not support accuracy")
225    return self.func("APPROX_COUNT_DISTINCT", expression.this)
def if_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.If) -> str:
228def if_sql(self: Generator, expression: exp.If) -> str:
229    return self.func(
230        "IF", expression.this, expression.args.get("true"), expression.args.get("false")
231    )
def arrow_json_extract_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.JSONExtract | sqlglot.expressions.JSONBExtract) -> str:
234def arrow_json_extract_sql(self: Generator, expression: exp.JSONExtract | exp.JSONBExtract) -> str:
235    return self.binary(expression, "->")
def arrow_json_extract_scalar_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.JSONExtractScalar | sqlglot.expressions.JSONBExtractScalar) -> str:
238def arrow_json_extract_scalar_sql(
239    self: Generator, expression: exp.JSONExtractScalar | exp.JSONBExtractScalar
240) -> str:
241    return self.binary(expression, "->>")
def inline_array_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Array) -> str:
244def inline_array_sql(self: Generator, expression: exp.Array) -> str:
245    return f"[{self.expressions(expression)}]"
def no_ilike_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ILike) -> str:
248def no_ilike_sql(self: Generator, expression: exp.ILike) -> str:
249    return self.like_sql(
250        exp.Like(
251            this=exp.Lower(this=expression.this),
252            expression=expression.args["expression"],
253        )
254    )
def no_paren_current_date_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.CurrentDate) -> str:
257def no_paren_current_date_sql(self: Generator, expression: exp.CurrentDate) -> str:
258    zone = self.sql(expression, "this")
259    return f"CURRENT_DATE AT TIME ZONE {zone}" if zone else "CURRENT_DATE"
def no_recursive_cte_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.With) -> str:
262def no_recursive_cte_sql(self: Generator, expression: exp.With) -> str:
263    if expression.args.get("recursive"):
264        self.unsupported("Recursive CTEs are unsupported")
265        expression.args["recursive"] = False
266    return self.with_sql(expression)
def no_safe_divide_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.SafeDivide) -> str:
269def no_safe_divide_sql(self: Generator, expression: exp.SafeDivide) -> str:
270    n = self.sql(expression, "this")
271    d = self.sql(expression, "expression")
272    return f"IF({d} <> 0, {n} / {d}, NULL)"
def no_tablesample_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TableSample) -> str:
275def no_tablesample_sql(self: Generator, expression: exp.TableSample) -> str:
276    self.unsupported("TABLESAMPLE unsupported")
277    return self.sql(expression.this)
def no_pivot_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Pivot) -> str:
280def no_pivot_sql(self: Generator, expression: exp.Pivot) -> str:
281    self.unsupported("PIVOT unsupported")
282    return self.sql(expression)
def no_trycast_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TryCast) -> str:
285def no_trycast_sql(self: Generator, expression: exp.TryCast) -> str:
286    return self.cast_sql(expression)
def no_properties_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Properties) -> str:
289def no_properties_sql(self: Generator, expression: exp.Properties) -> str:
290    self.unsupported("Properties unsupported")
291    return ""
def str_position_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StrPosition) -> str:
294def str_position_sql(self: Generator, expression: exp.StrPosition) -> str:
295    this = self.sql(expression, "this")
296    substr = self.sql(expression, "substr")
297    position = self.sql(expression, "position")
298    if position:
299        return f"STRPOS(SUBSTR({this}, {position}), {substr}) + {position} - 1"
300    return f"STRPOS({this}, {substr})"
def struct_extract_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StructExtract) -> str:
303def struct_extract_sql(self: Generator, expression: exp.StructExtract) -> str:
304    this = self.sql(expression, "this")
305    struct_key = self.sql(exp.Identifier(this=expression.expression, quoted=True))
306    return f"{this}.{struct_key}"
def var_map_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Map | sqlglot.expressions.VarMap, map_func_name: str = 'MAP') -> str:
309def var_map_sql(
310    self: Generator, expression: exp.Map | exp.VarMap, map_func_name: str = "MAP"
311) -> str:
312    keys = expression.args["keys"]
313    values = expression.args["values"]
314
315    if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array):
316        self.unsupported("Cannot convert array columns into map.")
317        return self.func(map_func_name, keys, values)
318
319    args = []
320    for key, value in zip(keys.expressions, values.expressions):
321        args.append(self.sql(key))
322        args.append(self.sql(value))
323    return self.func(map_func_name, *args)
def format_time_lambda( exp_class: Type[~E], dialect: str, default: Union[bool, str, NoneType] = None) -> Callable[[Sequence], ~E]:
326def format_time_lambda(
327    exp_class: t.Type[E], dialect: str, default: t.Optional[bool | str] = None
328) -> t.Callable[[t.Sequence], E]:
329    """Helper used for time expressions.
330
331    Args:
332        exp_class: the expression class to instantiate.
333        dialect: target sql dialect.
334        default: the default format, True being time.
335
336    Returns:
337        A callable that can be used to return the appropriately formatted time expression.
338    """
339
340    def _format_time(args: t.Sequence):
341        return exp_class(
342            this=seq_get(args, 0),
343            format=Dialect[dialect].format_time(
344                seq_get(args, 1)
345                or (Dialect[dialect].time_format if default is True else default or None)
346            ),
347        )
348
349    return _format_time

Helper used for time expressions.

Arguments:
  • exp_class: the expression class to instantiate.
  • dialect: target sql dialect.
  • default: the default format, True being time.
Returns:

A callable that can be used to return the appropriately formatted time expression.

def create_with_partitions_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Create) -> str:
352def create_with_partitions_sql(self: Generator, expression: exp.Create) -> str:
353    """
354    In Hive and Spark, the PARTITIONED BY property acts as an extension of a table's schema. When the
355    PARTITIONED BY value is an array of column names, they are transformed into a schema. The corresponding
356    columns are removed from the create statement.
357    """
358    has_schema = isinstance(expression.this, exp.Schema)
359    is_partitionable = expression.args.get("kind") in ("TABLE", "VIEW")
360
361    if has_schema and is_partitionable:
362        expression = expression.copy()
363        prop = expression.find(exp.PartitionedByProperty)
364        this = prop and prop.this
365        if prop and not isinstance(this, exp.Schema):
366            schema = expression.this
367            columns = {v.name.upper() for v in this.expressions}
368            partitions = [col for col in schema.expressions if col.name.upper() in columns]
369            schema.set("expressions", [e for e in schema.expressions if e not in partitions])
370            prop.replace(exp.PartitionedByProperty(this=exp.Schema(expressions=partitions)))
371            expression.set("this", schema)
372
373    return self.create_sql(expression)

In Hive and Spark, the PARTITIONED BY property acts as an extension of a table's schema. When the PARTITIONED BY value is an array of column names, they are transformed into a schema. The corresponding columns are removed from the create statement.

def parse_date_delta( exp_class: Type[~E], unit_mapping: Optional[Dict[str, str]] = None) -> Callable[[Sequence], ~E]:
376def parse_date_delta(
377    exp_class: t.Type[E], unit_mapping: t.Optional[t.Dict[str, str]] = None
378) -> t.Callable[[t.Sequence], E]:
379    def inner_func(args: t.Sequence) -> E:
380        unit_based = len(args) == 3
381        this = seq_get(args, 2) if unit_based else seq_get(args, 0)
382        expression = seq_get(args, 1) if unit_based else seq_get(args, 1)
383        unit = seq_get(args, 0) if unit_based else exp.Literal.string("DAY")
384        unit = unit_mapping.get(unit.name.lower(), unit) if unit_mapping else unit  # type: ignore
385        return exp_class(this=this, expression=expression, unit=unit)
386
387    return inner_func
def locate_to_strposition(args: Sequence) -> sqlglot.expressions.Expression:
390def locate_to_strposition(args: t.Sequence) -> exp.Expression:
391    return exp.StrPosition(
392        this=seq_get(args, 1),
393        substr=seq_get(args, 0),
394        position=seq_get(args, 2),
395    )
def strposition_to_locate_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StrPosition) -> str:
398def strposition_to_locate_sql(self: Generator, expression: exp.StrPosition) -> str:
399    return self.func(
400        "LOCATE", expression.args.get("substr"), expression.this, expression.args.get("position")
401    )
def timestrtotime_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TimeStrToTime) -> str:
404def timestrtotime_sql(self: Generator, expression: exp.TimeStrToTime) -> str:
405    return f"CAST({self.sql(expression, 'this')} AS TIMESTAMP)"
def datestrtodate_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.DateStrToDate) -> str:
408def datestrtodate_sql(self: Generator, expression: exp.DateStrToDate) -> str:
409    return f"CAST({self.sql(expression, 'this')} AS DATE)"
def trim_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Trim) -> str:
412def trim_sql(self: Generator, expression: exp.Trim) -> str:
413    target = self.sql(expression, "this")
414    trim_type = self.sql(expression, "position")
415    remove_chars = self.sql(expression, "expression")
416    collation = self.sql(expression, "collation")
417
418    # Use TRIM/LTRIM/RTRIM syntax if the expression isn't database-specific
419    if not remove_chars and not collation:
420        return self.trim_sql(expression)
421
422    trim_type = f"{trim_type} " if trim_type else ""
423    remove_chars = f"{remove_chars} " if remove_chars else ""
424    from_part = "FROM " if trim_type or remove_chars else ""
425    collation = f" COLLATE {collation}" if collation else ""
426    return f"TRIM({trim_type}{remove_chars}{from_part}{target}{collation})"