Edit on GitHub

sqlglot.dialects.dialect

View Source

  1from __future__ import annotations
  2
  3import typing as t
  4from enum import Enum
  5
  6from sqlglot import exp
  7from sqlglot.generator import Generator
  8from sqlglot.helper import flatten, seq_get
  9from sqlglot.parser import Parser
 10from sqlglot.time import format_time
 11from sqlglot.tokens import Tokenizer
 12from sqlglot.trie import new_trie
 13
 14E = t.TypeVar("E", bound=exp.Expression)
 15
 16
 17class Dialects(str, Enum):
 18    DIALECT = ""
 19
 20    BIGQUERY = "bigquery"
 21    CLICKHOUSE = "clickhouse"
 22    DUCKDB = "duckdb"
 23    HIVE = "hive"
 24    MYSQL = "mysql"
 25    ORACLE = "oracle"
 26    POSTGRES = "postgres"
 27    PRESTO = "presto"
 28    REDSHIFT = "redshift"
 29    SNOWFLAKE = "snowflake"
 30    SPARK = "spark"
 31    SQLITE = "sqlite"
 32    STARROCKS = "starrocks"
 33    TABLEAU = "tableau"
 34    TRINO = "trino"
 35    TSQL = "tsql"
 36    DATABRICKS = "databricks"
 37    DRILL = "drill"
 38    TERADATA = "teradata"
 39
 40
 41class _Dialect(type):
 42    classes: t.Dict[str, t.Type[Dialect]] = {}
 43
 44    @classmethod
 45    def __getitem__(cls, key: str) -> t.Type[Dialect]:
 46        return cls.classes[key]
 47
 48    @classmethod
 49    def get(
 50        cls, key: str, default: t.Optional[t.Type[Dialect]] = None
 51    ) -> t.Optional[t.Type[Dialect]]:
 52        return cls.classes.get(key, default)
 53
 54    def __new__(cls, clsname, bases, attrs):
 55        klass = super().__new__(cls, clsname, bases, attrs)
 56        enum = Dialects.__members__.get(clsname.upper())
 57        cls.classes[enum.value if enum is not None else clsname.lower()] = klass
 58
 59        klass.time_trie = new_trie(klass.time_mapping)
 60        klass.inverse_time_mapping = {v: k for k, v in klass.time_mapping.items()}
 61        klass.inverse_time_trie = new_trie(klass.inverse_time_mapping)
 62
 63        klass.tokenizer_class = getattr(klass, "Tokenizer", Tokenizer)
 64        klass.parser_class = getattr(klass, "Parser", Parser)
 65        klass.generator_class = getattr(klass, "Generator", Generator)
 66
 67        klass.quote_start, klass.quote_end = list(klass.tokenizer_class._QUOTES.items())[0]
 68        klass.identifier_start, klass.identifier_end = list(
 69            klass.tokenizer_class._IDENTIFIERS.items()
 70        )[0]
 71
 72        if (
 73            klass.tokenizer_class._BIT_STRINGS
 74            and exp.BitString not in klass.generator_class.TRANSFORMS
 75        ):
 76            bs_start, bs_end = list(klass.tokenizer_class._BIT_STRINGS.items())[0]
 77            klass.generator_class.TRANSFORMS[
 78                exp.BitString
 79            ] = lambda self, e: f"{bs_start}{int(self.sql(e, 'this')):b}{bs_end}"
 80        if (
 81            klass.tokenizer_class._HEX_STRINGS
 82            and exp.HexString not in klass.generator_class.TRANSFORMS
 83        ):
 84            hs_start, hs_end = list(klass.tokenizer_class._HEX_STRINGS.items())[0]
 85            klass.generator_class.TRANSFORMS[
 86                exp.HexString
 87            ] = lambda self, e: f"{hs_start}{int(self.sql(e, 'this')):X}{hs_end}"
 88        if (
 89            klass.tokenizer_class._BYTE_STRINGS
 90            and exp.ByteString not in klass.generator_class.TRANSFORMS
 91        ):
 92            be_start, be_end = list(klass.tokenizer_class._BYTE_STRINGS.items())[0]
 93            klass.generator_class.TRANSFORMS[
 94                exp.ByteString
 95            ] = lambda self, e: f"{be_start}{self.sql(e, 'this')}{be_end}"
 96
 97        return klass
 98
 99
100class Dialect(metaclass=_Dialect):
101    index_offset = 0
102    unnest_column_only = False
103    alias_post_tablesample = False
104    normalize_functions: t.Optional[str] = "upper"
105    null_ordering = "nulls_are_small"
106
107    date_format = "'%Y-%m-%d'"
108    dateint_format = "'%Y%m%d'"
109    time_format = "'%Y-%m-%d %H:%M:%S'"
110    time_mapping: t.Dict[str, str] = {}
111
112    # autofilled
113    quote_start = None
114    quote_end = None
115    identifier_start = None
116    identifier_end = None
117
118    time_trie = None
119    inverse_time_mapping = None
120    inverse_time_trie = None
121    tokenizer_class = None
122    parser_class = None
123    generator_class = None
124
125    @classmethod
126    def get_or_raise(cls, dialect: DialectType) -> t.Type[Dialect]:
127        if not dialect:
128            return cls
129        if isinstance(dialect, _Dialect):
130            return dialect
131        if isinstance(dialect, Dialect):
132            return dialect.__class__
133
134        result = cls.get(dialect)
135        if not result:
136            raise ValueError(f"Unknown dialect '{dialect}'")
137
138        return result
139
140    @classmethod
141    def format_time(
142        cls, expression: t.Optional[str | exp.Expression]
143    ) -> t.Optional[exp.Expression]:
144        if isinstance(expression, str):
145            return exp.Literal.string(
146                format_time(
147                    expression[1:-1],  # the time formats are quoted
148                    cls.time_mapping,
149                    cls.time_trie,
150                )
151            )
152        if expression and expression.is_string:
153            return exp.Literal.string(
154                format_time(
155                    expression.this,
156                    cls.time_mapping,
157                    cls.time_trie,
158                )
159            )
160        return expression
161
162    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
163        return self.parser(**opts).parse(self.tokenizer.tokenize(sql), sql)
164
165    def parse_into(
166        self, expression_type: exp.IntoType, sql: str, **opts
167    ) -> t.List[t.Optional[exp.Expression]]:
168        return self.parser(**opts).parse_into(expression_type, self.tokenizer.tokenize(sql), sql)
169
170    def generate(self, expression: t.Optional[exp.Expression], **opts) -> str:
171        return self.generator(**opts).generate(expression)
172
173    def transpile(self, sql: str, **opts) -> t.List[str]:
174        return [self.generate(expression, **opts) for expression in self.parse(sql)]
175
176    @property
177    def tokenizer(self) -> Tokenizer:
178        if not hasattr(self, "_tokenizer"):
179            self._tokenizer = self.tokenizer_class()  # type: ignore
180        return self._tokenizer
181
182    def parser(self, **opts) -> Parser:
183        return self.parser_class(  # type: ignore
184            **{
185                "index_offset": self.index_offset,
186                "unnest_column_only": self.unnest_column_only,
187                "alias_post_tablesample": self.alias_post_tablesample,
188                "null_ordering": self.null_ordering,
189                **opts,
190            },
191        )
192
193    def generator(self, **opts) -> Generator:
194        return self.generator_class(  # type: ignore
195            **{
196                "quote_start": self.quote_start,
197                "quote_end": self.quote_end,
198                "identifier_start": self.identifier_start,
199                "identifier_end": self.identifier_end,
200                "string_escape": self.tokenizer_class.STRING_ESCAPES[0],
201                "identifier_escape": self.tokenizer_class.IDENTIFIER_ESCAPES[0],
202                "index_offset": self.index_offset,
203                "time_mapping": self.inverse_time_mapping,
204                "time_trie": self.inverse_time_trie,
205                "unnest_column_only": self.unnest_column_only,
206                "alias_post_tablesample": self.alias_post_tablesample,
207                "normalize_functions": self.normalize_functions,
208                "null_ordering": self.null_ordering,
209                **opts,
210            }
211        )
212
213
214DialectType = t.Union[str, Dialect, t.Type[Dialect], None]
215
216
217def rename_func(name: str) -> t.Callable[[Generator, exp.Expression], str]:
218    def _rename(self, expression):
219        args = flatten(expression.args.values())
220        return f"{self.normalize_func(name)}({self.format_args(*args)})"
221
222    return _rename
223
224
225def approx_count_distinct_sql(self: Generator, expression: exp.ApproxDistinct) -> str:
226    if expression.args.get("accuracy"):
227        self.unsupported("APPROX_COUNT_DISTINCT does not support accuracy")
228    return f"APPROX_COUNT_DISTINCT({self.format_args(expression.this)})"
229
230
231def if_sql(self: Generator, expression: exp.If) -> str:
232    expressions = self.format_args(
233        expression.this, expression.args.get("true"), expression.args.get("false")
234    )
235    return f"IF({expressions})"
236
237
238def arrow_json_extract_sql(self: Generator, expression: exp.JSONExtract | exp.JSONBExtract) -> str:
239    return self.binary(expression, "->")
240
241
242def arrow_json_extract_scalar_sql(
243    self: Generator, expression: exp.JSONExtractScalar | exp.JSONBExtractScalar
244) -> str:
245    return self.binary(expression, "->>")
246
247
248def inline_array_sql(self: Generator, expression: exp.Array) -> str:
249    return f"[{self.expressions(expression)}]"
250
251
252def no_ilike_sql(self: Generator, expression: exp.ILike) -> str:
253    return self.like_sql(
254        exp.Like(
255            this=exp.Lower(this=expression.this),
256            expression=expression.args["expression"],
257        )
258    )
259
260
261def no_paren_current_date_sql(self: Generator, expression: exp.CurrentDate) -> str:
262    zone = self.sql(expression, "this")
263    return f"CURRENT_DATE AT TIME ZONE {zone}" if zone else "CURRENT_DATE"
264
265
266def no_recursive_cte_sql(self: Generator, expression: exp.With) -> str:
267    if expression.args.get("recursive"):
268        self.unsupported("Recursive CTEs are unsupported")
269        expression.args["recursive"] = False
270    return self.with_sql(expression)
271
272
273def no_safe_divide_sql(self: Generator, expression: exp.SafeDivide) -> str:
274    n = self.sql(expression, "this")
275    d = self.sql(expression, "expression")
276    return f"IF({d} <> 0, {n} / {d}, NULL)"
277
278
279def no_tablesample_sql(self: Generator, expression: exp.TableSample) -> str:
280    self.unsupported("TABLESAMPLE unsupported")
281    return self.sql(expression.this)
282
283
284def no_pivot_sql(self: Generator, expression: exp.Pivot) -> str:
285    self.unsupported("PIVOT unsupported")
286    return self.sql(expression)
287
288
289def no_trycast_sql(self: Generator, expression: exp.TryCast) -> str:
290    return self.cast_sql(expression)
291
292
293def no_properties_sql(self: Generator, expression: exp.Properties) -> str:
294    self.unsupported("Properties unsupported")
295    return ""
296
297
298def str_position_sql(self: Generator, expression: exp.StrPosition) -> str:
299    this = self.sql(expression, "this")
300    substr = self.sql(expression, "substr")
301    position = self.sql(expression, "position")
302    if position:
303        return f"STRPOS(SUBSTR({this}, {position}), {substr}) + {position} - 1"
304    return f"STRPOS({this}, {substr})"
305
306
307def struct_extract_sql(self: Generator, expression: exp.StructExtract) -> str:
308    this = self.sql(expression, "this")
309    struct_key = self.sql(exp.Identifier(this=expression.expression, quoted=True))
310    return f"{this}.{struct_key}"
311
312
313def var_map_sql(
314    self: Generator, expression: exp.Map | exp.VarMap, map_func_name: str = "MAP"
315) -> str:
316    keys = expression.args["keys"]
317    values = expression.args["values"]
318
319    if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array):
320        self.unsupported("Cannot convert array columns into map.")
321        return f"{map_func_name}({self.format_args(keys, values)})"
322
323    args = []
324    for key, value in zip(keys.expressions, values.expressions):
325        args.append(self.sql(key))
326        args.append(self.sql(value))
327    return f"{map_func_name}({self.format_args(*args)})"
328
329
330def format_time_lambda(
331    exp_class: t.Type[E], dialect: str, default: t.Optional[bool | str] = None
332) -> t.Callable[[t.Sequence], E]:
333    """Helper used for time expressions.
334
335    Args:
336        exp_class: the expression class to instantiate.
337        dialect: target sql dialect.
338        default: the default format, True being time.
339
340    Returns:
341        A callable that can be used to return the appropriately formatted time expression.
342    """
343
344    def _format_time(args: t.Sequence):
345        return exp_class(
346            this=seq_get(args, 0),
347            format=Dialect[dialect].format_time(
348                seq_get(args, 1)
349                or (Dialect[dialect].time_format if default is True else default or None)
350            ),
351        )
352
353    return _format_time
354
355
356def create_with_partitions_sql(self: Generator, expression: exp.Create) -> str:
357    """
358    In Hive and Spark, the PARTITIONED BY property acts as an extension of a table's schema. When the
359    PARTITIONED BY value is an array of column names, they are transformed into a schema. The corresponding
360    columns are removed from the create statement.
361    """
362    has_schema = isinstance(expression.this, exp.Schema)
363    is_partitionable = expression.args.get("kind") in ("TABLE", "VIEW")
364
365    if has_schema and is_partitionable:
366        expression = expression.copy()
367        prop = expression.find(exp.PartitionedByProperty)
368        this = prop and prop.this
369        if prop and not isinstance(this, exp.Schema):
370            schema = expression.this
371            columns = {v.name.upper() for v in this.expressions}
372            partitions = [col for col in schema.expressions if col.name.upper() in columns]
373            schema.set("expressions", [e for e in schema.expressions if e not in partitions])
374            prop.replace(exp.PartitionedByProperty(this=exp.Schema(expressions=partitions)))
375            expression.set("this", schema)
376
377    return self.create_sql(expression)
378
379
380def parse_date_delta(
381    exp_class: t.Type[E], unit_mapping: t.Optional[t.Dict[str, str]] = None
382) -> t.Callable[[t.Sequence], E]:
383    def inner_func(args: t.Sequence) -> E:
384        unit_based = len(args) == 3
385        this = seq_get(args, 2) if unit_based else seq_get(args, 0)
386        expression = seq_get(args, 1) if unit_based else seq_get(args, 1)
387        unit = seq_get(args, 0) if unit_based else exp.Literal.string("DAY")
388        unit = unit_mapping.get(unit.name.lower(), unit) if unit_mapping else unit  # type: ignore
389        return exp_class(this=this, expression=expression, unit=unit)
390
391    return inner_func
392
393
394def locate_to_strposition(args: t.Sequence) -> exp.Expression:
395    return exp.StrPosition(
396        this=seq_get(args, 1),
397        substr=seq_get(args, 0),
398        position=seq_get(args, 2),
399    )
400
401
402def strposition_to_locate_sql(self: Generator, expression: exp.StrPosition) -> str:
403    args = self.format_args(
404        expression.args.get("substr"), expression.this, expression.args.get("position")
405    )
406    return f"LOCATE({args})"
407
408
409def timestrtotime_sql(self: Generator, expression: exp.TimeStrToTime) -> str:
410    return f"CAST({self.sql(expression, 'this')} AS TIMESTAMP)"
411
412
413def datestrtodate_sql(self: Generator, expression: exp.DateStrToDate) -> str:
414    return f"CAST({self.sql(expression, 'this')} AS DATE)"
415
416
417def trim_sql(self: Generator, expression: exp.Trim) -> str:
418    target = self.sql(expression, "this")
419    trim_type = self.sql(expression, "position")
420    remove_chars = self.sql(expression, "expression")
421    collation = self.sql(expression, "collation")
422
423    # Use TRIM/LTRIM/RTRIM syntax if the expression isn't database-specific
424    if not remove_chars and not collation:
425        return self.trim_sql(expression)
426
427    trim_type = f"{trim_type} " if trim_type else ""
428    remove_chars = f"{remove_chars} " if remove_chars else ""
429    from_part = "FROM " if trim_type or remove_chars else ""
430    collation = f" COLLATE {collation}" if collation else ""
431    return f"TRIM({trim_type}{remove_chars}{from_part}{target}{collation})"

class Dialects(builtins.str, enum.Enum): View Source

18class Dialects(str, Enum):
19    DIALECT = ""
20
21    BIGQUERY = "bigquery"
22    CLICKHOUSE = "clickhouse"
23    DUCKDB = "duckdb"
24    HIVE = "hive"
25    MYSQL = "mysql"
26    ORACLE = "oracle"
27    POSTGRES = "postgres"
28    PRESTO = "presto"
29    REDSHIFT = "redshift"
30    SNOWFLAKE = "snowflake"
31    SPARK = "spark"
32    SQLITE = "sqlite"
33    STARROCKS = "starrocks"
34    TABLEAU = "tableau"
35    TRINO = "trino"
36    TSQL = "tsql"
37    DATABRICKS = "databricks"
38    DRILL = "drill"
39    TERADATA = "teradata"

An enumeration.

DIALECT = <Dialects.DIALECT: ''>

BIGQUERY = <Dialects.BIGQUERY: 'bigquery'>

CLICKHOUSE = <Dialects.CLICKHOUSE: 'clickhouse'>

DUCKDB = <Dialects.DUCKDB: 'duckdb'>

HIVE = <Dialects.HIVE: 'hive'>

MYSQL = <Dialects.MYSQL: 'mysql'>

ORACLE = <Dialects.ORACLE: 'oracle'>

POSTGRES = <Dialects.POSTGRES: 'postgres'>

PRESTO = <Dialects.PRESTO: 'presto'>

REDSHIFT = <Dialects.REDSHIFT: 'redshift'>

SNOWFLAKE = <Dialects.SNOWFLAKE: 'snowflake'>

SPARK = <Dialects.SPARK: 'spark'>

SQLITE = <Dialects.SQLITE: 'sqlite'>

STARROCKS = <Dialects.STARROCKS: 'starrocks'>

TABLEAU = <Dialects.TABLEAU: 'tableau'>

TRINO = <Dialects.TRINO: 'trino'>

TSQL = <Dialects.TSQL: 'tsql'>

DATABRICKS = <Dialects.DATABRICKS: 'databricks'>

DRILL = <Dialects.DRILL: 'drill'>

TERADATA = <Dialects.TERADATA: 'teradata'>

Inherited Members

enum.Enum: name; value
builtins.str: encode; replace; split; rsplit; join; capitalize; casefold; title; center; count; expandtabs; find; partition; index; ljust; lower; lstrip; rfind; rindex; rjust; rstrip; rpartition; splitlines; strip; swapcase; translate; upper; startswith; endswith; removeprefix; removesuffix; isascii; islower; isupper; istitle; isspace; isdecimal; isdigit; isnumeric; isalpha; isalnum; isidentifier; isprintable; zfill; format; format_map; maketrans

class Dialect: View Source

101class Dialect(metaclass=_Dialect):
102    index_offset = 0
103    unnest_column_only = False
104    alias_post_tablesample = False
105    normalize_functions: t.Optional[str] = "upper"
106    null_ordering = "nulls_are_small"
107
108    date_format = "'%Y-%m-%d'"
109    dateint_format = "'%Y%m%d'"
110    time_format = "'%Y-%m-%d %H:%M:%S'"
111    time_mapping: t.Dict[str, str] = {}
112
113    # autofilled
114    quote_start = None
115    quote_end = None
116    identifier_start = None
117    identifier_end = None
118
119    time_trie = None
120    inverse_time_mapping = None
121    inverse_time_trie = None
122    tokenizer_class = None
123    parser_class = None
124    generator_class = None
125
126    @classmethod
127    def get_or_raise(cls, dialect: DialectType) -> t.Type[Dialect]:
128        if not dialect:
129            return cls
130        if isinstance(dialect, _Dialect):
131            return dialect
132        if isinstance(dialect, Dialect):
133            return dialect.__class__
134
135        result = cls.get(dialect)
136        if not result:
137            raise ValueError(f"Unknown dialect '{dialect}'")
138
139        return result
140
141    @classmethod
142    def format_time(
143        cls, expression: t.Optional[str | exp.Expression]
144    ) -> t.Optional[exp.Expression]:
145        if isinstance(expression, str):
146            return exp.Literal.string(
147                format_time(
148                    expression[1:-1],  # the time formats are quoted
149                    cls.time_mapping,
150                    cls.time_trie,
151                )
152            )
153        if expression and expression.is_string:
154            return exp.Literal.string(
155                format_time(
156                    expression.this,
157                    cls.time_mapping,
158                    cls.time_trie,
159                )
160            )
161        return expression
162
163    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
164        return self.parser(**opts).parse(self.tokenizer.tokenize(sql), sql)
165
166    def parse_into(
167        self, expression_type: exp.IntoType, sql: str, **opts
168    ) -> t.List[t.Optional[exp.Expression]]:
169        return self.parser(**opts).parse_into(expression_type, self.tokenizer.tokenize(sql), sql)
170
171    def generate(self, expression: t.Optional[exp.Expression], **opts) -> str:
172        return self.generator(**opts).generate(expression)
173
174    def transpile(self, sql: str, **opts) -> t.List[str]:
175        return [self.generate(expression, **opts) for expression in self.parse(sql)]
176
177    @property
178    def tokenizer(self) -> Tokenizer:
179        if not hasattr(self, "_tokenizer"):
180            self._tokenizer = self.tokenizer_class()  # type: ignore
181        return self._tokenizer
182
183    def parser(self, **opts) -> Parser:
184        return self.parser_class(  # type: ignore
185            **{
186                "index_offset": self.index_offset,
187                "unnest_column_only": self.unnest_column_only,
188                "alias_post_tablesample": self.alias_post_tablesample,
189                "null_ordering": self.null_ordering,
190                **opts,
191            },
192        )
193
194    def generator(self, **opts) -> Generator:
195        return self.generator_class(  # type: ignore
196            **{
197                "quote_start": self.quote_start,
198                "quote_end": self.quote_end,
199                "identifier_start": self.identifier_start,
200                "identifier_end": self.identifier_end,
201                "string_escape": self.tokenizer_class.STRING_ESCAPES[0],
202                "identifier_escape": self.tokenizer_class.IDENTIFIER_ESCAPES[0],
203                "index_offset": self.index_offset,
204                "time_mapping": self.inverse_time_mapping,
205                "time_trie": self.inverse_time_trie,
206                "unnest_column_only": self.unnest_column_only,
207                "alias_post_tablesample": self.alias_post_tablesample,
208                "normalize_functions": self.normalize_functions,
209                "null_ordering": self.null_ordering,
210                **opts,
211            }
212        )

Dialect()

@classmethod

def get_or_raise( cls, dialect: Union[str, sqlglot.dialects.dialect.Dialect, Type[sqlglot.dialects.dialect.Dialect], NoneType]) -> Type[sqlglot.dialects.dialect.Dialect]: View Source

126    @classmethod
127    def get_or_raise(cls, dialect: DialectType) -> t.Type[Dialect]:
128        if not dialect:
129            return cls
130        if isinstance(dialect, _Dialect):
131            return dialect
132        if isinstance(dialect, Dialect):
133            return dialect.__class__
134
135        result = cls.get(dialect)
136        if not result:
137            raise ValueError(f"Unknown dialect '{dialect}'")
138
139        return result

@classmethod

def format_time( cls, expression: Union[str, sqlglot.expressions.Expression, NoneType]) -> Optional[sqlglot.expressions.Expression]: View Source

141    @classmethod
142    def format_time(
143        cls, expression: t.Optional[str | exp.Expression]
144    ) -> t.Optional[exp.Expression]:
145        if isinstance(expression, str):
146            return exp.Literal.string(
147                format_time(
148                    expression[1:-1],  # the time formats are quoted
149                    cls.time_mapping,
150                    cls.time_trie,
151                )
152            )
153        if expression and expression.is_string:
154            return exp.Literal.string(
155                format_time(
156                    expression.this,
157                    cls.time_mapping,
158                    cls.time_trie,
159                )
160            )
161        return expression

def parse(self, sql: str, **opts) -> List[Optional[sqlglot.expressions.Expression]]: View Source

163    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
164        return self.parser(**opts).parse(self.tokenizer.tokenize(sql), sql)

def parse_into( self, expression_type: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]]], sql: str, **opts) -> List[Optional[sqlglot.expressions.Expression]]: View Source

166    def parse_into(
167        self, expression_type: exp.IntoType, sql: str, **opts
168    ) -> t.List[t.Optional[exp.Expression]]:
169        return self.parser(**opts).parse_into(expression_type, self.tokenizer.tokenize(sql), sql)

def generate( self, expression: Optional[sqlglot.expressions.Expression], **opts) -> str: View Source

171    def generate(self, expression: t.Optional[exp.Expression], **opts) -> str:
172        return self.generator(**opts).generate(expression)

def transpile(self, sql: str, **opts) -> List[str]: View Source

174    def transpile(self, sql: str, **opts) -> t.List[str]:
175        return [self.generate(expression, **opts) for expression in self.parse(sql)]

def parser(self, **opts) -> sqlglot.parser.Parser: View Source

183    def parser(self, **opts) -> Parser:
184        return self.parser_class(  # type: ignore
185            **{
186                "index_offset": self.index_offset,
187                "unnest_column_only": self.unnest_column_only,
188                "alias_post_tablesample": self.alias_post_tablesample,
189                "null_ordering": self.null_ordering,
190                **opts,
191            },
192        )

def generator(self, **opts) -> sqlglot.generator.Generator: View Source

194    def generator(self, **opts) -> Generator:
195        return self.generator_class(  # type: ignore
196            **{
197                "quote_start": self.quote_start,
198                "quote_end": self.quote_end,
199                "identifier_start": self.identifier_start,
200                "identifier_end": self.identifier_end,
201                "string_escape": self.tokenizer_class.STRING_ESCAPES[0],
202                "identifier_escape": self.tokenizer_class.IDENTIFIER_ESCAPES[0],
203                "index_offset": self.index_offset,
204                "time_mapping": self.inverse_time_mapping,
205                "time_trie": self.inverse_time_trie,
206                "unnest_column_only": self.unnest_column_only,
207                "alias_post_tablesample": self.alias_post_tablesample,
208                "normalize_functions": self.normalize_functions,
209                "null_ordering": self.null_ordering,
210                **opts,
211            }
212        )

def rename_func( name: str) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.Expression], str]: View Source

218def rename_func(name: str) -> t.Callable[[Generator, exp.Expression], str]:
219    def _rename(self, expression):
220        args = flatten(expression.args.values())
221        return f"{self.normalize_func(name)}({self.format_args(*args)})"
222
223    return _rename

def approx_count_distinct_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ApproxDistinct) -> str: View Source

226def approx_count_distinct_sql(self: Generator, expression: exp.ApproxDistinct) -> str:
227    if expression.args.get("accuracy"):
228        self.unsupported("APPROX_COUNT_DISTINCT does not support accuracy")
229    return f"APPROX_COUNT_DISTINCT({self.format_args(expression.this)})"

def if_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.If) -> str: View Source

232def if_sql(self: Generator, expression: exp.If) -> str:
233    expressions = self.format_args(
234        expression.this, expression.args.get("true"), expression.args.get("false")
235    )
236    return f"IF({expressions})"

def arrow_json_extract_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.JSONExtract | sqlglot.expressions.JSONBExtract) -> str: View Source

239def arrow_json_extract_sql(self: Generator, expression: exp.JSONExtract | exp.JSONBExtract) -> str:
240    return self.binary(expression, "->")

def arrow_json_extract_scalar_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.JSONExtractScalar | sqlglot.expressions.JSONBExtractScalar) -> str: View Source

243def arrow_json_extract_scalar_sql(
244    self: Generator, expression: exp.JSONExtractScalar | exp.JSONBExtractScalar
245) -> str:
246    return self.binary(expression, "->>")

def inline_array_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Array) -> str: View Source

249def inline_array_sql(self: Generator, expression: exp.Array) -> str:
250    return f"[{self.expressions(expression)}]"

def no_ilike_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ILike) -> str: View Source

253def no_ilike_sql(self: Generator, expression: exp.ILike) -> str:
254    return self.like_sql(
255        exp.Like(
256            this=exp.Lower(this=expression.this),
257            expression=expression.args["expression"],
258        )
259    )

def no_paren_current_date_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.CurrentDate) -> str: View Source

262def no_paren_current_date_sql(self: Generator, expression: exp.CurrentDate) -> str:
263    zone = self.sql(expression, "this")
264    return f"CURRENT_DATE AT TIME ZONE {zone}" if zone else "CURRENT_DATE"

def no_recursive_cte_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.With) -> str: View Source

267def no_recursive_cte_sql(self: Generator, expression: exp.With) -> str:
268    if expression.args.get("recursive"):
269        self.unsupported("Recursive CTEs are unsupported")
270        expression.args["recursive"] = False
271    return self.with_sql(expression)

def no_safe_divide_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.SafeDivide) -> str: View Source

274def no_safe_divide_sql(self: Generator, expression: exp.SafeDivide) -> str:
275    n = self.sql(expression, "this")
276    d = self.sql(expression, "expression")
277    return f"IF({d} <> 0, {n} / {d}, NULL)"

def no_tablesample_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TableSample) -> str: View Source

280def no_tablesample_sql(self: Generator, expression: exp.TableSample) -> str:
281    self.unsupported("TABLESAMPLE unsupported")
282    return self.sql(expression.this)

def no_pivot_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Pivot) -> str: View Source

285def no_pivot_sql(self: Generator, expression: exp.Pivot) -> str:
286    self.unsupported("PIVOT unsupported")
287    return self.sql(expression)

def no_trycast_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TryCast) -> str: View Source

290def no_trycast_sql(self: Generator, expression: exp.TryCast) -> str:
291    return self.cast_sql(expression)

def no_properties_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Properties) -> str: View Source

294def no_properties_sql(self: Generator, expression: exp.Properties) -> str:
295    self.unsupported("Properties unsupported")
296    return ""

def str_position_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StrPosition) -> str: View Source

299def str_position_sql(self: Generator, expression: exp.StrPosition) -> str:
300    this = self.sql(expression, "this")
301    substr = self.sql(expression, "substr")
302    position = self.sql(expression, "position")
303    if position:
304        return f"STRPOS(SUBSTR({this}, {position}), {substr}) + {position} - 1"
305    return f"STRPOS({this}, {substr})"

def struct_extract_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StructExtract) -> str: View Source

308def struct_extract_sql(self: Generator, expression: exp.StructExtract) -> str:
309    this = self.sql(expression, "this")
310    struct_key = self.sql(exp.Identifier(this=expression.expression, quoted=True))
311    return f"{this}.{struct_key}"

def var_map_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Map | sqlglot.expressions.VarMap, map_func_name: str = 'MAP') -> str: View Source

314def var_map_sql(
315    self: Generator, expression: exp.Map | exp.VarMap, map_func_name: str = "MAP"
316) -> str:
317    keys = expression.args["keys"]
318    values = expression.args["values"]
319
320    if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array):
321        self.unsupported("Cannot convert array columns into map.")
322        return f"{map_func_name}({self.format_args(keys, values)})"
323
324    args = []
325    for key, value in zip(keys.expressions, values.expressions):
326        args.append(self.sql(key))
327        args.append(self.sql(value))
328    return f"{map_func_name}({self.format_args(*args)})"

def format_time_lambda( exp_class: Type[~E], dialect: str, default: Union[bool, str, NoneType] = None) -> Callable[[Sequence], ~E]: View Source

331def format_time_lambda(
332    exp_class: t.Type[E], dialect: str, default: t.Optional[bool | str] = None
333) -> t.Callable[[t.Sequence], E]:
334    """Helper used for time expressions.
335
336    Args:
337        exp_class: the expression class to instantiate.
338        dialect: target sql dialect.
339        default: the default format, True being time.
340
341    Returns:
342        A callable that can be used to return the appropriately formatted time expression.
343    """
344
345    def _format_time(args: t.Sequence):
346        return exp_class(
347            this=seq_get(args, 0),
348            format=Dialect[dialect].format_time(
349                seq_get(args, 1)
350                or (Dialect[dialect].time_format if default is True else default or None)
351            ),
352        )
353
354    return _format_time

Helper used for time expressions.

Arguments:

exp_class: the expression class to instantiate.
dialect: target sql dialect.
default: the default format, True being time.

Returns:

A callable that can be used to return the appropriately formatted time expression.

def create_with_partitions_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Create) -> str: View Source

357def create_with_partitions_sql(self: Generator, expression: exp.Create) -> str:
358    """
359    In Hive and Spark, the PARTITIONED BY property acts as an extension of a table's schema. When the
360    PARTITIONED BY value is an array of column names, they are transformed into a schema. The corresponding
361    columns are removed from the create statement.
362    """
363    has_schema = isinstance(expression.this, exp.Schema)
364    is_partitionable = expression.args.get("kind") in ("TABLE", "VIEW")
365
366    if has_schema and is_partitionable:
367        expression = expression.copy()
368        prop = expression.find(exp.PartitionedByProperty)
369        this = prop and prop.this
370        if prop and not isinstance(this, exp.Schema):
371            schema = expression.this
372            columns = {v.name.upper() for v in this.expressions}
373            partitions = [col for col in schema.expressions if col.name.upper() in columns]
374            schema.set("expressions", [e for e in schema.expressions if e not in partitions])
375            prop.replace(exp.PartitionedByProperty(this=exp.Schema(expressions=partitions)))
376            expression.set("this", schema)
377
378    return self.create_sql(expression)

In Hive and Spark, the PARTITIONED BY property acts as an extension of a table's schema. When the PARTITIONED BY value is an array of column names, they are transformed into a schema. The corresponding columns are removed from the create statement.

def parse_date_delta( exp_class: Type[~E], unit_mapping: Optional[Dict[str, str]] = None) -> Callable[[Sequence], ~E]: View Source

381def parse_date_delta(
382    exp_class: t.Type[E], unit_mapping: t.Optional[t.Dict[str, str]] = None
383) -> t.Callable[[t.Sequence], E]:
384    def inner_func(args: t.Sequence) -> E:
385        unit_based = len(args) == 3
386        this = seq_get(args, 2) if unit_based else seq_get(args, 0)
387        expression = seq_get(args, 1) if unit_based else seq_get(args, 1)
388        unit = seq_get(args, 0) if unit_based else exp.Literal.string("DAY")
389        unit = unit_mapping.get(unit.name.lower(), unit) if unit_mapping else unit  # type: ignore
390        return exp_class(this=this, expression=expression, unit=unit)
391
392    return inner_func

def locate_to_strposition(args: Sequence) -> sqlglot.expressions.Expression: View Source

395def locate_to_strposition(args: t.Sequence) -> exp.Expression:
396    return exp.StrPosition(
397        this=seq_get(args, 1),
398        substr=seq_get(args, 0),
399        position=seq_get(args, 2),
400    )

def strposition_to_locate_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StrPosition) -> str: View Source

403def strposition_to_locate_sql(self: Generator, expression: exp.StrPosition) -> str:
404    args = self.format_args(
405        expression.args.get("substr"), expression.this, expression.args.get("position")
406    )
407    return f"LOCATE({args})"

def timestrtotime_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TimeStrToTime) -> str: View Source

410def timestrtotime_sql(self: Generator, expression: exp.TimeStrToTime) -> str:
411    return f"CAST({self.sql(expression, 'this')} AS TIMESTAMP)"

def datestrtodate_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.DateStrToDate) -> str: View Source

414def datestrtodate_sql(self: Generator, expression: exp.DateStrToDate) -> str:
415    return f"CAST({self.sql(expression, 'this')} AS DATE)"

def trim_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Trim) -> str: View Source

418def trim_sql(self: Generator, expression: exp.Trim) -> str:
419    target = self.sql(expression, "this")
420    trim_type = self.sql(expression, "position")
421    remove_chars = self.sql(expression, "expression")
422    collation = self.sql(expression, "collation")
423
424    # Use TRIM/LTRIM/RTRIM syntax if the expression isn't database-specific
425    if not remove_chars and not collation:
426        return self.trim_sql(expression)
427
428    trim_type = f"{trim_type} " if trim_type else ""
429    remove_chars = f"{remove_chars} " if remove_chars else ""
430    from_part = "FROM " if trim_type or remove_chars else ""
431    collation = f" COLLATE {collation}" if collation else ""
432    return f"TRIM({trim_type}{remove_chars}{from_part}{target}{collation})"