Edit on GitHub

sqlglot.dialects.dialect

View Source

  1from __future__ import annotations
  2
  3import typing as t
  4from enum import Enum
  5
  6from sqlglot import exp
  7from sqlglot.generator import Generator
  8from sqlglot.helper import flatten, seq_get
  9from sqlglot.parser import Parser
 10from sqlglot.time import format_time
 11from sqlglot.tokens import Tokenizer
 12from sqlglot.trie import new_trie
 13
 14E = t.TypeVar("E", bound=exp.Expression)
 15
 16
 17class Dialects(str, Enum):
 18    DIALECT = ""
 19
 20    BIGQUERY = "bigquery"
 21    CLICKHOUSE = "clickhouse"
 22    DUCKDB = "duckdb"
 23    HIVE = "hive"
 24    MYSQL = "mysql"
 25    ORACLE = "oracle"
 26    POSTGRES = "postgres"
 27    PRESTO = "presto"
 28    REDSHIFT = "redshift"
 29    SNOWFLAKE = "snowflake"
 30    SPARK = "spark"
 31    SQLITE = "sqlite"
 32    STARROCKS = "starrocks"
 33    TABLEAU = "tableau"
 34    TRINO = "trino"
 35    TSQL = "tsql"
 36    DATABRICKS = "databricks"
 37    DRILL = "drill"
 38    TERADATA = "teradata"
 39
 40
 41class _Dialect(type):
 42    classes: t.Dict[str, t.Type[Dialect]] = {}
 43
 44    @classmethod
 45    def __getitem__(cls, key: str) -> t.Type[Dialect]:
 46        return cls.classes[key]
 47
 48    @classmethod
 49    def get(
 50        cls, key: str, default: t.Optional[t.Type[Dialect]] = None
 51    ) -> t.Optional[t.Type[Dialect]]:
 52        return cls.classes.get(key, default)
 53
 54    def __new__(cls, clsname, bases, attrs):
 55        klass = super().__new__(cls, clsname, bases, attrs)
 56        enum = Dialects.__members__.get(clsname.upper())
 57        cls.classes[enum.value if enum is not None else clsname.lower()] = klass
 58
 59        klass.time_trie = new_trie(klass.time_mapping)
 60        klass.inverse_time_mapping = {v: k for k, v in klass.time_mapping.items()}
 61        klass.inverse_time_trie = new_trie(klass.inverse_time_mapping)
 62
 63        klass.tokenizer_class = getattr(klass, "Tokenizer", Tokenizer)
 64        klass.parser_class = getattr(klass, "Parser", Parser)
 65        klass.generator_class = getattr(klass, "Generator", Generator)
 66
 67        klass.quote_start, klass.quote_end = list(klass.tokenizer_class._QUOTES.items())[0]
 68        klass.identifier_start, klass.identifier_end = list(
 69            klass.tokenizer_class._IDENTIFIERS.items()
 70        )[0]
 71
 72        if (
 73            klass.tokenizer_class._BIT_STRINGS
 74            and exp.BitString not in klass.generator_class.TRANSFORMS
 75        ):
 76            bs_start, bs_end = list(klass.tokenizer_class._BIT_STRINGS.items())[0]
 77            klass.generator_class.TRANSFORMS[
 78                exp.BitString
 79            ] = lambda self, e: f"{bs_start}{int(self.sql(e, 'this')):b}{bs_end}"
 80        if (
 81            klass.tokenizer_class._HEX_STRINGS
 82            and exp.HexString not in klass.generator_class.TRANSFORMS
 83        ):
 84            hs_start, hs_end = list(klass.tokenizer_class._HEX_STRINGS.items())[0]
 85            klass.generator_class.TRANSFORMS[
 86                exp.HexString
 87            ] = lambda self, e: f"{hs_start}{int(self.sql(e, 'this')):X}{hs_end}"
 88        if (
 89            klass.tokenizer_class._BYTE_STRINGS
 90            and exp.ByteString not in klass.generator_class.TRANSFORMS
 91        ):
 92            be_start, be_end = list(klass.tokenizer_class._BYTE_STRINGS.items())[0]
 93            klass.generator_class.TRANSFORMS[
 94                exp.ByteString
 95            ] = lambda self, e: f"{be_start}{self.sql(e, 'this')}{be_end}"
 96
 97        return klass
 98
 99
100class Dialect(metaclass=_Dialect):
101    index_offset = 0
102    unnest_column_only = False
103    alias_post_tablesample = False
104    normalize_functions: t.Optional[str] = "upper"
105    null_ordering = "nulls_are_small"
106
107    date_format = "'%Y-%m-%d'"
108    dateint_format = "'%Y%m%d'"
109    time_format = "'%Y-%m-%d %H:%M:%S'"
110    time_mapping: t.Dict[str, str] = {}
111
112    # autofilled
113    quote_start = None
114    quote_end = None
115    identifier_start = None
116    identifier_end = None
117
118    time_trie = None
119    inverse_time_mapping = None
120    inverse_time_trie = None
121    tokenizer_class = None
122    parser_class = None
123    generator_class = None
124
125    @classmethod
126    def get_or_raise(cls, dialect: DialectType) -> t.Type[Dialect]:
127        if not dialect:
128            return cls
129        if isinstance(dialect, _Dialect):
130            return dialect
131        if isinstance(dialect, Dialect):
132            return dialect.__class__
133
134        result = cls.get(dialect)
135        if not result:
136            raise ValueError(f"Unknown dialect '{dialect}'")
137
138        return result
139
140    @classmethod
141    def format_time(
142        cls, expression: t.Optional[str | exp.Expression]
143    ) -> t.Optional[exp.Expression]:
144        if isinstance(expression, str):
145            return exp.Literal.string(
146                format_time(
147                    expression[1:-1],  # the time formats are quoted
148                    cls.time_mapping,
149                    cls.time_trie,
150                )
151            )
152        if expression and expression.is_string:
153            return exp.Literal.string(
154                format_time(
155                    expression.this,
156                    cls.time_mapping,
157                    cls.time_trie,
158                )
159            )
160        return expression
161
162    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
163        return self.parser(**opts).parse(self.tokenizer.tokenize(sql), sql)
164
165    def parse_into(
166        self, expression_type: exp.IntoType, sql: str, **opts
167    ) -> t.List[t.Optional[exp.Expression]]:
168        return self.parser(**opts).parse_into(expression_type, self.tokenizer.tokenize(sql), sql)
169
170    def generate(self, expression: t.Optional[exp.Expression], **opts) -> str:
171        return self.generator(**opts).generate(expression)
172
173    def transpile(self, sql: str, **opts) -> t.List[str]:
174        return [self.generate(expression, **opts) for expression in self.parse(sql)]
175
176    @property
177    def tokenizer(self) -> Tokenizer:
178        if not hasattr(self, "_tokenizer"):
179            self._tokenizer = self.tokenizer_class()  # type: ignore
180        return self._tokenizer
181
182    def parser(self, **opts) -> Parser:
183        return self.parser_class(  # type: ignore
184            **{
185                "index_offset": self.index_offset,
186                "unnest_column_only": self.unnest_column_only,
187                "alias_post_tablesample": self.alias_post_tablesample,
188                "null_ordering": self.null_ordering,
189                **opts,
190            },
191        )
192
193    def generator(self, **opts) -> Generator:
194        return self.generator_class(  # type: ignore
195            **{
196                "quote_start": self.quote_start,
197                "quote_end": self.quote_end,
198                "identifier_start": self.identifier_start,
199                "identifier_end": self.identifier_end,
200                "string_escape": self.tokenizer_class.STRING_ESCAPES[0],
201                "identifier_escape": self.tokenizer_class.IDENTIFIER_ESCAPES[0],
202                "index_offset": self.index_offset,
203                "time_mapping": self.inverse_time_mapping,
204                "time_trie": self.inverse_time_trie,
205                "unnest_column_only": self.unnest_column_only,
206                "alias_post_tablesample": self.alias_post_tablesample,
207                "normalize_functions": self.normalize_functions,
208                "null_ordering": self.null_ordering,
209                **opts,
210            }
211        )
212
213
214DialectType = t.Union[str, Dialect, t.Type[Dialect], None]
215
216
217def rename_func(name: str) -> t.Callable[[Generator, exp.Expression], str]:
218    return lambda self, expression: self.func(name, *flatten(expression.args.values()))
219
220
221def approx_count_distinct_sql(self: Generator, expression: exp.ApproxDistinct) -> str:
222    if expression.args.get("accuracy"):
223        self.unsupported("APPROX_COUNT_DISTINCT does not support accuracy")
224    return self.func("APPROX_COUNT_DISTINCT", expression.this)
225
226
227def if_sql(self: Generator, expression: exp.If) -> str:
228    return self.func(
229        "IF", expression.this, expression.args.get("true"), expression.args.get("false")
230    )
231
232
233def arrow_json_extract_sql(self: Generator, expression: exp.JSONExtract | exp.JSONBExtract) -> str:
234    return self.binary(expression, "->")
235
236
237def arrow_json_extract_scalar_sql(
238    self: Generator, expression: exp.JSONExtractScalar | exp.JSONBExtractScalar
239) -> str:
240    return self.binary(expression, "->>")
241
242
243def inline_array_sql(self: Generator, expression: exp.Array) -> str:
244    return f"[{self.expressions(expression)}]"
245
246
247def no_ilike_sql(self: Generator, expression: exp.ILike) -> str:
248    return self.like_sql(
249        exp.Like(
250            this=exp.Lower(this=expression.this),
251            expression=expression.args["expression"],
252        )
253    )
254
255
256def no_paren_current_date_sql(self: Generator, expression: exp.CurrentDate) -> str:
257    zone = self.sql(expression, "this")
258    return f"CURRENT_DATE AT TIME ZONE {zone}" if zone else "CURRENT_DATE"
259
260
261def no_recursive_cte_sql(self: Generator, expression: exp.With) -> str:
262    if expression.args.get("recursive"):
263        self.unsupported("Recursive CTEs are unsupported")
264        expression.args["recursive"] = False
265    return self.with_sql(expression)
266
267
268def no_safe_divide_sql(self: Generator, expression: exp.SafeDivide) -> str:
269    n = self.sql(expression, "this")
270    d = self.sql(expression, "expression")
271    return f"IF({d} <> 0, {n} / {d}, NULL)"
272
273
274def no_tablesample_sql(self: Generator, expression: exp.TableSample) -> str:
275    self.unsupported("TABLESAMPLE unsupported")
276    return self.sql(expression.this)
277
278
279def no_pivot_sql(self: Generator, expression: exp.Pivot) -> str:
280    self.unsupported("PIVOT unsupported")
281    return self.sql(expression)
282
283
284def no_trycast_sql(self: Generator, expression: exp.TryCast) -> str:
285    return self.cast_sql(expression)
286
287
288def no_properties_sql(self: Generator, expression: exp.Properties) -> str:
289    self.unsupported("Properties unsupported")
290    return ""
291
292
293def str_position_sql(self: Generator, expression: exp.StrPosition) -> str:
294    this = self.sql(expression, "this")
295    substr = self.sql(expression, "substr")
296    position = self.sql(expression, "position")
297    if position:
298        return f"STRPOS(SUBSTR({this}, {position}), {substr}) + {position} - 1"
299    return f"STRPOS({this}, {substr})"
300
301
302def struct_extract_sql(self: Generator, expression: exp.StructExtract) -> str:
303    this = self.sql(expression, "this")
304    struct_key = self.sql(exp.Identifier(this=expression.expression, quoted=True))
305    return f"{this}.{struct_key}"
306
307
308def var_map_sql(
309    self: Generator, expression: exp.Map | exp.VarMap, map_func_name: str = "MAP"
310) -> str:
311    keys = expression.args["keys"]
312    values = expression.args["values"]
313
314    if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array):
315        self.unsupported("Cannot convert array columns into map.")
316        return self.func(map_func_name, keys, values)
317
318    args = []
319    for key, value in zip(keys.expressions, values.expressions):
320        args.append(self.sql(key))
321        args.append(self.sql(value))
322    return self.func(map_func_name, *args)
323
324
325def format_time_lambda(
326    exp_class: t.Type[E], dialect: str, default: t.Optional[bool | str] = None
327) -> t.Callable[[t.Sequence], E]:
328    """Helper used for time expressions.
329
330    Args:
331        exp_class: the expression class to instantiate.
332        dialect: target sql dialect.
333        default: the default format, True being time.
334
335    Returns:
336        A callable that can be used to return the appropriately formatted time expression.
337    """
338
339    def _format_time(args: t.Sequence):
340        return exp_class(
341            this=seq_get(args, 0),
342            format=Dialect[dialect].format_time(
343                seq_get(args, 1)
344                or (Dialect[dialect].time_format if default is True else default or None)
345            ),
346        )
347
348    return _format_time
349
350
351def create_with_partitions_sql(self: Generator, expression: exp.Create) -> str:
352    """
353    In Hive and Spark, the PARTITIONED BY property acts as an extension of a table's schema. When the
354    PARTITIONED BY value is an array of column names, they are transformed into a schema. The corresponding
355    columns are removed from the create statement.
356    """
357    has_schema = isinstance(expression.this, exp.Schema)
358    is_partitionable = expression.args.get("kind") in ("TABLE", "VIEW")
359
360    if has_schema and is_partitionable:
361        expression = expression.copy()
362        prop = expression.find(exp.PartitionedByProperty)
363        this = prop and prop.this
364        if prop and not isinstance(this, exp.Schema):
365            schema = expression.this
366            columns = {v.name.upper() for v in this.expressions}
367            partitions = [col for col in schema.expressions if col.name.upper() in columns]
368            schema.set("expressions", [e for e in schema.expressions if e not in partitions])
369            prop.replace(exp.PartitionedByProperty(this=exp.Schema(expressions=partitions)))
370            expression.set("this", schema)
371
372    return self.create_sql(expression)
373
374
375def parse_date_delta(
376    exp_class: t.Type[E], unit_mapping: t.Optional[t.Dict[str, str]] = None
377) -> t.Callable[[t.Sequence], E]:
378    def inner_func(args: t.Sequence) -> E:
379        unit_based = len(args) == 3
380        this = seq_get(args, 2) if unit_based else seq_get(args, 0)
381        expression = seq_get(args, 1) if unit_based else seq_get(args, 1)
382        unit = seq_get(args, 0) if unit_based else exp.Literal.string("DAY")
383        unit = unit_mapping.get(unit.name.lower(), unit) if unit_mapping else unit  # type: ignore
384        return exp_class(this=this, expression=expression, unit=unit)
385
386    return inner_func
387
388
389def locate_to_strposition(args: t.Sequence) -> exp.Expression:
390    return exp.StrPosition(
391        this=seq_get(args, 1),
392        substr=seq_get(args, 0),
393        position=seq_get(args, 2),
394    )
395
396
397def strposition_to_locate_sql(self: Generator, expression: exp.StrPosition) -> str:
398    return self.func(
399        "LOCATE", expression.args.get("substr"), expression.this, expression.args.get("position")
400    )
401
402
403def timestrtotime_sql(self: Generator, expression: exp.TimeStrToTime) -> str:
404    return f"CAST({self.sql(expression, 'this')} AS TIMESTAMP)"
405
406
407def datestrtodate_sql(self: Generator, expression: exp.DateStrToDate) -> str:
408    return f"CAST({self.sql(expression, 'this')} AS DATE)"
409
410
411def trim_sql(self: Generator, expression: exp.Trim) -> str:
412    target = self.sql(expression, "this")
413    trim_type = self.sql(expression, "position")
414    remove_chars = self.sql(expression, "expression")
415    collation = self.sql(expression, "collation")
416
417    # Use TRIM/LTRIM/RTRIM syntax if the expression isn't database-specific
418    if not remove_chars and not collation:
419        return self.trim_sql(expression)
420
421    trim_type = f"{trim_type} " if trim_type else ""
422    remove_chars = f"{remove_chars} " if remove_chars else ""
423    from_part = "FROM " if trim_type or remove_chars else ""
424    collation = f" COLLATE {collation}" if collation else ""
425    return f"TRIM({trim_type}{remove_chars}{from_part}{target}{collation})"

class Dialects(builtins.str, enum.Enum): View Source

18class Dialects(str, Enum):
19    DIALECT = ""
20
21    BIGQUERY = "bigquery"
22    CLICKHOUSE = "clickhouse"
23    DUCKDB = "duckdb"
24    HIVE = "hive"
25    MYSQL = "mysql"
26    ORACLE = "oracle"
27    POSTGRES = "postgres"
28    PRESTO = "presto"
29    REDSHIFT = "redshift"
30    SNOWFLAKE = "snowflake"
31    SPARK = "spark"
32    SQLITE = "sqlite"
33    STARROCKS = "starrocks"
34    TABLEAU = "tableau"
35    TRINO = "trino"
36    TSQL = "tsql"
37    DATABRICKS = "databricks"
38    DRILL = "drill"
39    TERADATA = "teradata"

An enumeration.

DIALECT = <Dialects.DIALECT: ''>

BIGQUERY = <Dialects.BIGQUERY: 'bigquery'>

CLICKHOUSE = <Dialects.CLICKHOUSE: 'clickhouse'>

DUCKDB = <Dialects.DUCKDB: 'duckdb'>

HIVE = <Dialects.HIVE: 'hive'>

MYSQL = <Dialects.MYSQL: 'mysql'>

ORACLE = <Dialects.ORACLE: 'oracle'>

POSTGRES = <Dialects.POSTGRES: 'postgres'>

PRESTO = <Dialects.PRESTO: 'presto'>

REDSHIFT = <Dialects.REDSHIFT: 'redshift'>

SNOWFLAKE = <Dialects.SNOWFLAKE: 'snowflake'>

SPARK = <Dialects.SPARK: 'spark'>

SQLITE = <Dialects.SQLITE: 'sqlite'>

STARROCKS = <Dialects.STARROCKS: 'starrocks'>

TABLEAU = <Dialects.TABLEAU: 'tableau'>

TRINO = <Dialects.TRINO: 'trino'>

TSQL = <Dialects.TSQL: 'tsql'>

DATABRICKS = <Dialects.DATABRICKS: 'databricks'>

DRILL = <Dialects.DRILL: 'drill'>

TERADATA = <Dialects.TERADATA: 'teradata'>

Inherited Members

enum.Enum: name; value
builtins.str: encode; replace; split; rsplit; join; capitalize; casefold; title; center; count; expandtabs; find; partition; index; ljust; lower; lstrip; rfind; rindex; rjust; rstrip; rpartition; splitlines; strip; swapcase; translate; upper; startswith; endswith; removeprefix; removesuffix; isascii; islower; isupper; istitle; isspace; isdecimal; isdigit; isnumeric; isalpha; isalnum; isidentifier; isprintable; zfill; format; format_map; maketrans

class Dialect: View Source

101class Dialect(metaclass=_Dialect):
102    index_offset = 0
103    unnest_column_only = False
104    alias_post_tablesample = False
105    normalize_functions: t.Optional[str] = "upper"
106    null_ordering = "nulls_are_small"
107
108    date_format = "'%Y-%m-%d'"
109    dateint_format = "'%Y%m%d'"
110    time_format = "'%Y-%m-%d %H:%M:%S'"
111    time_mapping: t.Dict[str, str] = {}
112
113    # autofilled
114    quote_start = None
115    quote_end = None
116    identifier_start = None
117    identifier_end = None
118
119    time_trie = None
120    inverse_time_mapping = None
121    inverse_time_trie = None
122    tokenizer_class = None
123    parser_class = None
124    generator_class = None
125
126    @classmethod
127    def get_or_raise(cls, dialect: DialectType) -> t.Type[Dialect]:
128        if not dialect:
129            return cls
130        if isinstance(dialect, _Dialect):
131            return dialect
132        if isinstance(dialect, Dialect):
133            return dialect.__class__
134
135        result = cls.get(dialect)
136        if not result:
137            raise ValueError(f"Unknown dialect '{dialect}'")
138
139        return result
140
141    @classmethod
142    def format_time(
143        cls, expression: t.Optional[str | exp.Expression]
144    ) -> t.Optional[exp.Expression]:
145        if isinstance(expression, str):
146            return exp.Literal.string(
147                format_time(
148                    expression[1:-1],  # the time formats are quoted
149                    cls.time_mapping,
150                    cls.time_trie,
151                )
152            )
153        if expression and expression.is_string:
154            return exp.Literal.string(
155                format_time(
156                    expression.this,
157                    cls.time_mapping,
158                    cls.time_trie,
159                )
160            )
161        return expression
162
163    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
164        return self.parser(**opts).parse(self.tokenizer.tokenize(sql), sql)
165
166    def parse_into(
167        self, expression_type: exp.IntoType, sql: str, **opts
168    ) -> t.List[t.Optional[exp.Expression]]:
169        return self.parser(**opts).parse_into(expression_type, self.tokenizer.tokenize(sql), sql)
170
171    def generate(self, expression: t.Optional[exp.Expression], **opts) -> str:
172        return self.generator(**opts).generate(expression)
173
174    def transpile(self, sql: str, **opts) -> t.List[str]:
175        return [self.generate(expression, **opts) for expression in self.parse(sql)]
176
177    @property
178    def tokenizer(self) -> Tokenizer:
179        if not hasattr(self, "_tokenizer"):
180            self._tokenizer = self.tokenizer_class()  # type: ignore
181        return self._tokenizer
182
183    def parser(self, **opts) -> Parser:
184        return self.parser_class(  # type: ignore
185            **{
186                "index_offset": self.index_offset,
187                "unnest_column_only": self.unnest_column_only,
188                "alias_post_tablesample": self.alias_post_tablesample,
189                "null_ordering": self.null_ordering,
190                **opts,
191            },
192        )
193
194    def generator(self, **opts) -> Generator:
195        return self.generator_class(  # type: ignore
196            **{
197                "quote_start": self.quote_start,
198                "quote_end": self.quote_end,
199                "identifier_start": self.identifier_start,
200                "identifier_end": self.identifier_end,
201                "string_escape": self.tokenizer_class.STRING_ESCAPES[0],
202                "identifier_escape": self.tokenizer_class.IDENTIFIER_ESCAPES[0],
203                "index_offset": self.index_offset,
204                "time_mapping": self.inverse_time_mapping,
205                "time_trie": self.inverse_time_trie,
206                "unnest_column_only": self.unnest_column_only,
207                "alias_post_tablesample": self.alias_post_tablesample,
208                "normalize_functions": self.normalize_functions,
209                "null_ordering": self.null_ordering,
210                **opts,
211            }
212        )

Dialect()

@classmethod

def get_or_raise( cls, dialect: Union[str, sqlglot.dialects.dialect.Dialect, Type[sqlglot.dialects.dialect.Dialect], NoneType]) -> Type[sqlglot.dialects.dialect.Dialect]: View Source

126    @classmethod
127    def get_or_raise(cls, dialect: DialectType) -> t.Type[Dialect]:
128        if not dialect:
129            return cls
130        if isinstance(dialect, _Dialect):
131            return dialect
132        if isinstance(dialect, Dialect):
133            return dialect.__class__
134
135        result = cls.get(dialect)
136        if not result:
137            raise ValueError(f"Unknown dialect '{dialect}'")
138
139        return result

@classmethod

def format_time( cls, expression: Union[str, sqlglot.expressions.Expression, NoneType]) -> Optional[sqlglot.expressions.Expression]: View Source

141    @classmethod
142    def format_time(
143        cls, expression: t.Optional[str | exp.Expression]
144    ) -> t.Optional[exp.Expression]:
145        if isinstance(expression, str):
146            return exp.Literal.string(
147                format_time(
148                    expression[1:-1],  # the time formats are quoted
149                    cls.time_mapping,
150                    cls.time_trie,
151                )
152            )
153        if expression and expression.is_string:
154            return exp.Literal.string(
155                format_time(
156                    expression.this,
157                    cls.time_mapping,
158                    cls.time_trie,
159                )
160            )
161        return expression

def parse(self, sql: str, **opts) -> List[Optional[sqlglot.expressions.Expression]]: View Source

163    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
164        return self.parser(**opts).parse(self.tokenizer.tokenize(sql), sql)

def parse_into( self, expression_type: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]]], sql: str, **opts) -> List[Optional[sqlglot.expressions.Expression]]: View Source

166    def parse_into(
167        self, expression_type: exp.IntoType, sql: str, **opts
168    ) -> t.List[t.Optional[exp.Expression]]:
169        return self.parser(**opts).parse_into(expression_type, self.tokenizer.tokenize(sql), sql)

def generate( self, expression: Optional[sqlglot.expressions.Expression], **opts) -> str: View Source

171    def generate(self, expression: t.Optional[exp.Expression], **opts) -> str:
172        return self.generator(**opts).generate(expression)

def transpile(self, sql: str, **opts) -> List[str]: View Source

174    def transpile(self, sql: str, **opts) -> t.List[str]:
175        return [self.generate(expression, **opts) for expression in self.parse(sql)]

def parser(self, **opts) -> sqlglot.parser.Parser: View Source

183    def parser(self, **opts) -> Parser:
184        return self.parser_class(  # type: ignore
185            **{
186                "index_offset": self.index_offset,
187                "unnest_column_only": self.unnest_column_only,
188                "alias_post_tablesample": self.alias_post_tablesample,
189                "null_ordering": self.null_ordering,
190                **opts,
191            },
192        )

def generator(self, **opts) -> sqlglot.generator.Generator: View Source

194    def generator(self, **opts) -> Generator:
195        return self.generator_class(  # type: ignore
196            **{
197                "quote_start": self.quote_start,
198                "quote_end": self.quote_end,
199                "identifier_start": self.identifier_start,
200                "identifier_end": self.identifier_end,
201                "string_escape": self.tokenizer_class.STRING_ESCAPES[0],
202                "identifier_escape": self.tokenizer_class.IDENTIFIER_ESCAPES[0],
203                "index_offset": self.index_offset,
204                "time_mapping": self.inverse_time_mapping,
205                "time_trie": self.inverse_time_trie,
206                "unnest_column_only": self.unnest_column_only,
207                "alias_post_tablesample": self.alias_post_tablesample,
208                "normalize_functions": self.normalize_functions,
209                "null_ordering": self.null_ordering,
210                **opts,
211            }
212        )

def rename_func( name: str) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.Expression], str]: View Source

218def rename_func(name: str) -> t.Callable[[Generator, exp.Expression], str]:
219    return lambda self, expression: self.func(name, *flatten(expression.args.values()))

def approx_count_distinct_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ApproxDistinct) -> str: View Source

222def approx_count_distinct_sql(self: Generator, expression: exp.ApproxDistinct) -> str:
223    if expression.args.get("accuracy"):
224        self.unsupported("APPROX_COUNT_DISTINCT does not support accuracy")
225    return self.func("APPROX_COUNT_DISTINCT", expression.this)

def if_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.If) -> str: View Source

228def if_sql(self: Generator, expression: exp.If) -> str:
229    return self.func(
230        "IF", expression.this, expression.args.get("true"), expression.args.get("false")
231    )

def arrow_json_extract_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.JSONExtract | sqlglot.expressions.JSONBExtract) -> str: View Source

234def arrow_json_extract_sql(self: Generator, expression: exp.JSONExtract | exp.JSONBExtract) -> str:
235    return self.binary(expression, "->")

def arrow_json_extract_scalar_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.JSONExtractScalar | sqlglot.expressions.JSONBExtractScalar) -> str: View Source

238def arrow_json_extract_scalar_sql(
239    self: Generator, expression: exp.JSONExtractScalar | exp.JSONBExtractScalar
240) -> str:
241    return self.binary(expression, "->>")

def inline_array_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Array) -> str: View Source

244def inline_array_sql(self: Generator, expression: exp.Array) -> str:
245    return f"[{self.expressions(expression)}]"

def no_ilike_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ILike) -> str: View Source

248def no_ilike_sql(self: Generator, expression: exp.ILike) -> str:
249    return self.like_sql(
250        exp.Like(
251            this=exp.Lower(this=expression.this),
252            expression=expression.args["expression"],
253        )
254    )

def no_paren_current_date_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.CurrentDate) -> str: View Source

257def no_paren_current_date_sql(self: Generator, expression: exp.CurrentDate) -> str:
258    zone = self.sql(expression, "this")
259    return f"CURRENT_DATE AT TIME ZONE {zone}" if zone else "CURRENT_DATE"

def no_recursive_cte_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.With) -> str: View Source

262def no_recursive_cte_sql(self: Generator, expression: exp.With) -> str:
263    if expression.args.get("recursive"):
264        self.unsupported("Recursive CTEs are unsupported")
265        expression.args["recursive"] = False
266    return self.with_sql(expression)

def no_safe_divide_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.SafeDivide) -> str: View Source

269def no_safe_divide_sql(self: Generator, expression: exp.SafeDivide) -> str:
270    n = self.sql(expression, "this")
271    d = self.sql(expression, "expression")
272    return f"IF({d} <> 0, {n} / {d}, NULL)"

def no_tablesample_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TableSample) -> str: View Source

275def no_tablesample_sql(self: Generator, expression: exp.TableSample) -> str:
276    self.unsupported("TABLESAMPLE unsupported")
277    return self.sql(expression.this)

def no_pivot_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Pivot) -> str: View Source

280def no_pivot_sql(self: Generator, expression: exp.Pivot) -> str:
281    self.unsupported("PIVOT unsupported")
282    return self.sql(expression)

def no_trycast_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TryCast) -> str: View Source

285def no_trycast_sql(self: Generator, expression: exp.TryCast) -> str:
286    return self.cast_sql(expression)

def no_properties_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Properties) -> str: View Source

289def no_properties_sql(self: Generator, expression: exp.Properties) -> str:
290    self.unsupported("Properties unsupported")
291    return ""

def str_position_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StrPosition) -> str: View Source

294def str_position_sql(self: Generator, expression: exp.StrPosition) -> str:
295    this = self.sql(expression, "this")
296    substr = self.sql(expression, "substr")
297    position = self.sql(expression, "position")
298    if position:
299        return f"STRPOS(SUBSTR({this}, {position}), {substr}) + {position} - 1"
300    return f"STRPOS({this}, {substr})"

def struct_extract_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StructExtract) -> str: View Source

303def struct_extract_sql(self: Generator, expression: exp.StructExtract) -> str:
304    this = self.sql(expression, "this")
305    struct_key = self.sql(exp.Identifier(this=expression.expression, quoted=True))
306    return f"{this}.{struct_key}"

def var_map_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Map | sqlglot.expressions.VarMap, map_func_name: str = 'MAP') -> str: View Source

309def var_map_sql(
310    self: Generator, expression: exp.Map | exp.VarMap, map_func_name: str = "MAP"
311) -> str:
312    keys = expression.args["keys"]
313    values = expression.args["values"]
314
315    if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array):
316        self.unsupported("Cannot convert array columns into map.")
317        return self.func(map_func_name, keys, values)
318
319    args = []
320    for key, value in zip(keys.expressions, values.expressions):
321        args.append(self.sql(key))
322        args.append(self.sql(value))
323    return self.func(map_func_name, *args)

def format_time_lambda( exp_class: Type[~E], dialect: str, default: Union[bool, str, NoneType] = None) -> Callable[[Sequence], ~E]: View Source

326def format_time_lambda(
327    exp_class: t.Type[E], dialect: str, default: t.Optional[bool | str] = None
328) -> t.Callable[[t.Sequence], E]:
329    """Helper used for time expressions.
330
331    Args:
332        exp_class: the expression class to instantiate.
333        dialect: target sql dialect.
334        default: the default format, True being time.
335
336    Returns:
337        A callable that can be used to return the appropriately formatted time expression.
338    """
339
340    def _format_time(args: t.Sequence):
341        return exp_class(
342            this=seq_get(args, 0),
343            format=Dialect[dialect].format_time(
344                seq_get(args, 1)
345                or (Dialect[dialect].time_format if default is True else default or None)
346            ),
347        )
348
349    return _format_time

Helper used for time expressions.

Arguments:

exp_class: the expression class to instantiate.
dialect: target sql dialect.
default: the default format, True being time.

Returns:

A callable that can be used to return the appropriately formatted time expression.

def create_with_partitions_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Create) -> str: View Source

352def create_with_partitions_sql(self: Generator, expression: exp.Create) -> str:
353    """
354    In Hive and Spark, the PARTITIONED BY property acts as an extension of a table's schema. When the
355    PARTITIONED BY value is an array of column names, they are transformed into a schema. The corresponding
356    columns are removed from the create statement.
357    """
358    has_schema = isinstance(expression.this, exp.Schema)
359    is_partitionable = expression.args.get("kind") in ("TABLE", "VIEW")
360
361    if has_schema and is_partitionable:
362        expression = expression.copy()
363        prop = expression.find(exp.PartitionedByProperty)
364        this = prop and prop.this
365        if prop and not isinstance(this, exp.Schema):
366            schema = expression.this
367            columns = {v.name.upper() for v in this.expressions}
368            partitions = [col for col in schema.expressions if col.name.upper() in columns]
369            schema.set("expressions", [e for e in schema.expressions if e not in partitions])
370            prop.replace(exp.PartitionedByProperty(this=exp.Schema(expressions=partitions)))
371            expression.set("this", schema)
372
373    return self.create_sql(expression)

In Hive and Spark, the PARTITIONED BY property acts as an extension of a table's schema. When the PARTITIONED BY value is an array of column names, they are transformed into a schema. The corresponding columns are removed from the create statement.

def parse_date_delta( exp_class: Type[~E], unit_mapping: Optional[Dict[str, str]] = None) -> Callable[[Sequence], ~E]: View Source

376def parse_date_delta(
377    exp_class: t.Type[E], unit_mapping: t.Optional[t.Dict[str, str]] = None
378) -> t.Callable[[t.Sequence], E]:
379    def inner_func(args: t.Sequence) -> E:
380        unit_based = len(args) == 3
381        this = seq_get(args, 2) if unit_based else seq_get(args, 0)
382        expression = seq_get(args, 1) if unit_based else seq_get(args, 1)
383        unit = seq_get(args, 0) if unit_based else exp.Literal.string("DAY")
384        unit = unit_mapping.get(unit.name.lower(), unit) if unit_mapping else unit  # type: ignore
385        return exp_class(this=this, expression=expression, unit=unit)
386
387    return inner_func

def locate_to_strposition(args: Sequence) -> sqlglot.expressions.Expression: View Source

390def locate_to_strposition(args: t.Sequence) -> exp.Expression:
391    return exp.StrPosition(
392        this=seq_get(args, 1),
393        substr=seq_get(args, 0),
394        position=seq_get(args, 2),
395    )

def strposition_to_locate_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StrPosition) -> str: View Source

398def strposition_to_locate_sql(self: Generator, expression: exp.StrPosition) -> str:
399    return self.func(
400        "LOCATE", expression.args.get("substr"), expression.this, expression.args.get("position")
401    )

def timestrtotime_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TimeStrToTime) -> str: View Source

404def timestrtotime_sql(self: Generator, expression: exp.TimeStrToTime) -> str:
405    return f"CAST({self.sql(expression, 'this')} AS TIMESTAMP)"

def datestrtodate_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.DateStrToDate) -> str: View Source

408def datestrtodate_sql(self: Generator, expression: exp.DateStrToDate) -> str:
409    return f"CAST({self.sql(expression, 'this')} AS DATE)"

def trim_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Trim) -> str: View Source

412def trim_sql(self: Generator, expression: exp.Trim) -> str:
413    target = self.sql(expression, "this")
414    trim_type = self.sql(expression, "position")
415    remove_chars = self.sql(expression, "expression")
416    collation = self.sql(expression, "collation")
417
418    # Use TRIM/LTRIM/RTRIM syntax if the expression isn't database-specific
419    if not remove_chars and not collation:
420        return self.trim_sql(expression)
421
422    trim_type = f"{trim_type} " if trim_type else ""
423    remove_chars = f"{remove_chars} " if remove_chars else ""
424    from_part = "FROM " if trim_type or remove_chars else ""
425    collation = f" COLLATE {collation}" if collation else ""
426    return f"TRIM({trim_type}{remove_chars}{from_part}{target}{collation})"