sqlglot.dialects.dialect
1from __future__ import annotations 2 3import typing as t 4from enum import Enum 5 6from sqlglot import exp 7from sqlglot.generator import Generator 8from sqlglot.helper import flatten, seq_get 9from sqlglot.parser import Parser 10from sqlglot.time import format_time 11from sqlglot.tokens import Tokenizer 12from sqlglot.trie import new_trie 13 14E = t.TypeVar("E", bound=exp.Expression) 15 16 17class Dialects(str, Enum): 18 DIALECT = "" 19 20 BIGQUERY = "bigquery" 21 CLICKHOUSE = "clickhouse" 22 DUCKDB = "duckdb" 23 HIVE = "hive" 24 MYSQL = "mysql" 25 ORACLE = "oracle" 26 POSTGRES = "postgres" 27 PRESTO = "presto" 28 REDSHIFT = "redshift" 29 SNOWFLAKE = "snowflake" 30 SPARK = "spark" 31 SQLITE = "sqlite" 32 STARROCKS = "starrocks" 33 TABLEAU = "tableau" 34 TRINO = "trino" 35 TSQL = "tsql" 36 DATABRICKS = "databricks" 37 DRILL = "drill" 38 TERADATA = "teradata" 39 40 41class _Dialect(type): 42 classes: t.Dict[str, t.Type[Dialect]] = {} 43 44 @classmethod 45 def __getitem__(cls, key: str) -> t.Type[Dialect]: 46 return cls.classes[key] 47 48 @classmethod 49 def get( 50 cls, key: str, default: t.Optional[t.Type[Dialect]] = None 51 ) -> t.Optional[t.Type[Dialect]]: 52 return cls.classes.get(key, default) 53 54 def __new__(cls, clsname, bases, attrs): 55 klass = super().__new__(cls, clsname, bases, attrs) 56 enum = Dialects.__members__.get(clsname.upper()) 57 cls.classes[enum.value if enum is not None else clsname.lower()] = klass 58 59 klass.time_trie = new_trie(klass.time_mapping) 60 klass.inverse_time_mapping = {v: k for k, v in klass.time_mapping.items()} 61 klass.inverse_time_trie = new_trie(klass.inverse_time_mapping) 62 63 klass.tokenizer_class = getattr(klass, "Tokenizer", Tokenizer) 64 klass.parser_class = getattr(klass, "Parser", Parser) 65 klass.generator_class = getattr(klass, "Generator", Generator) 66 67 klass.quote_start, klass.quote_end = list(klass.tokenizer_class._QUOTES.items())[0] 68 klass.identifier_start, klass.identifier_end = list( 69 klass.tokenizer_class._IDENTIFIERS.items() 70 )[0] 71 72 if ( 73 klass.tokenizer_class._BIT_STRINGS 74 and exp.BitString not in klass.generator_class.TRANSFORMS 75 ): 76 bs_start, bs_end = list(klass.tokenizer_class._BIT_STRINGS.items())[0] 77 klass.generator_class.TRANSFORMS[ 78 exp.BitString 79 ] = lambda self, e: f"{bs_start}{int(self.sql(e, 'this')):b}{bs_end}" 80 if ( 81 klass.tokenizer_class._HEX_STRINGS 82 and exp.HexString not in klass.generator_class.TRANSFORMS 83 ): 84 hs_start, hs_end = list(klass.tokenizer_class._HEX_STRINGS.items())[0] 85 klass.generator_class.TRANSFORMS[ 86 exp.HexString 87 ] = lambda self, e: f"{hs_start}{int(self.sql(e, 'this')):X}{hs_end}" 88 if ( 89 klass.tokenizer_class._BYTE_STRINGS 90 and exp.ByteString not in klass.generator_class.TRANSFORMS 91 ): 92 be_start, be_end = list(klass.tokenizer_class._BYTE_STRINGS.items())[0] 93 klass.generator_class.TRANSFORMS[ 94 exp.ByteString 95 ] = lambda self, e: f"{be_start}{self.sql(e, 'this')}{be_end}" 96 97 return klass 98 99 100class Dialect(metaclass=_Dialect): 101 index_offset = 0 102 unnest_column_only = False 103 alias_post_tablesample = False 104 normalize_functions: t.Optional[str] = "upper" 105 null_ordering = "nulls_are_small" 106 107 date_format = "'%Y-%m-%d'" 108 dateint_format = "'%Y%m%d'" 109 time_format = "'%Y-%m-%d %H:%M:%S'" 110 time_mapping: t.Dict[str, str] = {} 111 112 # autofilled 113 quote_start = None 114 quote_end = None 115 identifier_start = None 116 identifier_end = None 117 118 time_trie = None 119 inverse_time_mapping = None 120 inverse_time_trie = None 121 tokenizer_class = None 122 parser_class = None 123 generator_class = None 124 125 @classmethod 126 def get_or_raise(cls, dialect: DialectType) -> t.Type[Dialect]: 127 if not dialect: 128 return cls 129 if isinstance(dialect, _Dialect): 130 return dialect 131 if isinstance(dialect, Dialect): 132 return dialect.__class__ 133 134 result = cls.get(dialect) 135 if not result: 136 raise ValueError(f"Unknown dialect '{dialect}'") 137 138 return result 139 140 @classmethod 141 def format_time( 142 cls, expression: t.Optional[str | exp.Expression] 143 ) -> t.Optional[exp.Expression]: 144 if isinstance(expression, str): 145 return exp.Literal.string( 146 format_time( 147 expression[1:-1], # the time formats are quoted 148 cls.time_mapping, 149 cls.time_trie, 150 ) 151 ) 152 if expression and expression.is_string: 153 return exp.Literal.string( 154 format_time( 155 expression.this, 156 cls.time_mapping, 157 cls.time_trie, 158 ) 159 ) 160 return expression 161 162 def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]: 163 return self.parser(**opts).parse(self.tokenizer.tokenize(sql), sql) 164 165 def parse_into( 166 self, expression_type: exp.IntoType, sql: str, **opts 167 ) -> t.List[t.Optional[exp.Expression]]: 168 return self.parser(**opts).parse_into(expression_type, self.tokenizer.tokenize(sql), sql) 169 170 def generate(self, expression: t.Optional[exp.Expression], **opts) -> str: 171 return self.generator(**opts).generate(expression) 172 173 def transpile(self, sql: str, **opts) -> t.List[str]: 174 return [self.generate(expression, **opts) for expression in self.parse(sql)] 175 176 @property 177 def tokenizer(self) -> Tokenizer: 178 if not hasattr(self, "_tokenizer"): 179 self._tokenizer = self.tokenizer_class() # type: ignore 180 return self._tokenizer 181 182 def parser(self, **opts) -> Parser: 183 return self.parser_class( # type: ignore 184 **{ 185 "index_offset": self.index_offset, 186 "unnest_column_only": self.unnest_column_only, 187 "alias_post_tablesample": self.alias_post_tablesample, 188 "null_ordering": self.null_ordering, 189 **opts, 190 }, 191 ) 192 193 def generator(self, **opts) -> Generator: 194 return self.generator_class( # type: ignore 195 **{ 196 "quote_start": self.quote_start, 197 "quote_end": self.quote_end, 198 "identifier_start": self.identifier_start, 199 "identifier_end": self.identifier_end, 200 "string_escape": self.tokenizer_class.STRING_ESCAPES[0], 201 "identifier_escape": self.tokenizer_class.IDENTIFIER_ESCAPES[0], 202 "index_offset": self.index_offset, 203 "time_mapping": self.inverse_time_mapping, 204 "time_trie": self.inverse_time_trie, 205 "unnest_column_only": self.unnest_column_only, 206 "alias_post_tablesample": self.alias_post_tablesample, 207 "normalize_functions": self.normalize_functions, 208 "null_ordering": self.null_ordering, 209 **opts, 210 } 211 ) 212 213 214DialectType = t.Union[str, Dialect, t.Type[Dialect], None] 215 216 217def rename_func(name: str) -> t.Callable[[Generator, exp.Expression], str]: 218 def _rename(self, expression): 219 args = flatten(expression.args.values()) 220 return f"{self.normalize_func(name)}({self.format_args(*args)})" 221 222 return _rename 223 224 225def approx_count_distinct_sql(self: Generator, expression: exp.ApproxDistinct) -> str: 226 if expression.args.get("accuracy"): 227 self.unsupported("APPROX_COUNT_DISTINCT does not support accuracy") 228 return f"APPROX_COUNT_DISTINCT({self.format_args(expression.this)})" 229 230 231def if_sql(self: Generator, expression: exp.If) -> str: 232 expressions = self.format_args( 233 expression.this, expression.args.get("true"), expression.args.get("false") 234 ) 235 return f"IF({expressions})" 236 237 238def arrow_json_extract_sql(self: Generator, expression: exp.JSONExtract | exp.JSONBExtract) -> str: 239 return self.binary(expression, "->") 240 241 242def arrow_json_extract_scalar_sql( 243 self: Generator, expression: exp.JSONExtractScalar | exp.JSONBExtractScalar 244) -> str: 245 return self.binary(expression, "->>") 246 247 248def inline_array_sql(self: Generator, expression: exp.Array) -> str: 249 return f"[{self.expressions(expression)}]" 250 251 252def no_ilike_sql(self: Generator, expression: exp.ILike) -> str: 253 return self.like_sql( 254 exp.Like( 255 this=exp.Lower(this=expression.this), 256 expression=expression.args["expression"], 257 ) 258 ) 259 260 261def no_paren_current_date_sql(self: Generator, expression: exp.CurrentDate) -> str: 262 zone = self.sql(expression, "this") 263 return f"CURRENT_DATE AT TIME ZONE {zone}" if zone else "CURRENT_DATE" 264 265 266def no_recursive_cte_sql(self: Generator, expression: exp.With) -> str: 267 if expression.args.get("recursive"): 268 self.unsupported("Recursive CTEs are unsupported") 269 expression.args["recursive"] = False 270 return self.with_sql(expression) 271 272 273def no_safe_divide_sql(self: Generator, expression: exp.SafeDivide) -> str: 274 n = self.sql(expression, "this") 275 d = self.sql(expression, "expression") 276 return f"IF({d} <> 0, {n} / {d}, NULL)" 277 278 279def no_tablesample_sql(self: Generator, expression: exp.TableSample) -> str: 280 self.unsupported("TABLESAMPLE unsupported") 281 return self.sql(expression.this) 282 283 284def no_pivot_sql(self: Generator, expression: exp.Pivot) -> str: 285 self.unsupported("PIVOT unsupported") 286 return self.sql(expression) 287 288 289def no_trycast_sql(self: Generator, expression: exp.TryCast) -> str: 290 return self.cast_sql(expression) 291 292 293def no_properties_sql(self: Generator, expression: exp.Properties) -> str: 294 self.unsupported("Properties unsupported") 295 return "" 296 297 298def str_position_sql(self: Generator, expression: exp.StrPosition) -> str: 299 this = self.sql(expression, "this") 300 substr = self.sql(expression, "substr") 301 position = self.sql(expression, "position") 302 if position: 303 return f"STRPOS(SUBSTR({this}, {position}), {substr}) + {position} - 1" 304 return f"STRPOS({this}, {substr})" 305 306 307def struct_extract_sql(self: Generator, expression: exp.StructExtract) -> str: 308 this = self.sql(expression, "this") 309 struct_key = self.sql(exp.Identifier(this=expression.expression, quoted=True)) 310 return f"{this}.{struct_key}" 311 312 313def var_map_sql( 314 self: Generator, expression: exp.Map | exp.VarMap, map_func_name: str = "MAP" 315) -> str: 316 keys = expression.args["keys"] 317 values = expression.args["values"] 318 319 if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array): 320 self.unsupported("Cannot convert array columns into map.") 321 return f"{map_func_name}({self.format_args(keys, values)})" 322 323 args = [] 324 for key, value in zip(keys.expressions, values.expressions): 325 args.append(self.sql(key)) 326 args.append(self.sql(value)) 327 return f"{map_func_name}({self.format_args(*args)})" 328 329 330def format_time_lambda( 331 exp_class: t.Type[E], dialect: str, default: t.Optional[bool | str] = None 332) -> t.Callable[[t.Sequence], E]: 333 """Helper used for time expressions. 334 335 Args: 336 exp_class: the expression class to instantiate. 337 dialect: target sql dialect. 338 default: the default format, True being time. 339 340 Returns: 341 A callable that can be used to return the appropriately formatted time expression. 342 """ 343 344 def _format_time(args: t.Sequence): 345 return exp_class( 346 this=seq_get(args, 0), 347 format=Dialect[dialect].format_time( 348 seq_get(args, 1) 349 or (Dialect[dialect].time_format if default is True else default or None) 350 ), 351 ) 352 353 return _format_time 354 355 356def create_with_partitions_sql(self: Generator, expression: exp.Create) -> str: 357 """ 358 In Hive and Spark, the PARTITIONED BY property acts as an extension of a table's schema. When the 359 PARTITIONED BY value is an array of column names, they are transformed into a schema. The corresponding 360 columns are removed from the create statement. 361 """ 362 has_schema = isinstance(expression.this, exp.Schema) 363 is_partitionable = expression.args.get("kind") in ("TABLE", "VIEW") 364 365 if has_schema and is_partitionable: 366 expression = expression.copy() 367 prop = expression.find(exp.PartitionedByProperty) 368 this = prop and prop.this 369 if prop and not isinstance(this, exp.Schema): 370 schema = expression.this 371 columns = {v.name.upper() for v in this.expressions} 372 partitions = [col for col in schema.expressions if col.name.upper() in columns] 373 schema.set("expressions", [e for e in schema.expressions if e not in partitions]) 374 prop.replace(exp.PartitionedByProperty(this=exp.Schema(expressions=partitions))) 375 expression.set("this", schema) 376 377 return self.create_sql(expression) 378 379 380def parse_date_delta( 381 exp_class: t.Type[E], unit_mapping: t.Optional[t.Dict[str, str]] = None 382) -> t.Callable[[t.Sequence], E]: 383 def inner_func(args: t.Sequence) -> E: 384 unit_based = len(args) == 3 385 this = seq_get(args, 2) if unit_based else seq_get(args, 0) 386 expression = seq_get(args, 1) if unit_based else seq_get(args, 1) 387 unit = seq_get(args, 0) if unit_based else exp.Literal.string("DAY") 388 unit = unit_mapping.get(unit.name.lower(), unit) if unit_mapping else unit # type: ignore 389 return exp_class(this=this, expression=expression, unit=unit) 390 391 return inner_func 392 393 394def locate_to_strposition(args: t.Sequence) -> exp.Expression: 395 return exp.StrPosition( 396 this=seq_get(args, 1), 397 substr=seq_get(args, 0), 398 position=seq_get(args, 2), 399 ) 400 401 402def strposition_to_locate_sql(self: Generator, expression: exp.StrPosition) -> str: 403 args = self.format_args( 404 expression.args.get("substr"), expression.this, expression.args.get("position") 405 ) 406 return f"LOCATE({args})" 407 408 409def timestrtotime_sql(self: Generator, expression: exp.TimeStrToTime) -> str: 410 return f"CAST({self.sql(expression, 'this')} AS TIMESTAMP)" 411 412 413def datestrtodate_sql(self: Generator, expression: exp.DateStrToDate) -> str: 414 return f"CAST({self.sql(expression, 'this')} AS DATE)" 415 416 417def trim_sql(self: Generator, expression: exp.Trim) -> str: 418 target = self.sql(expression, "this") 419 trim_type = self.sql(expression, "position") 420 remove_chars = self.sql(expression, "expression") 421 collation = self.sql(expression, "collation") 422 423 # Use TRIM/LTRIM/RTRIM syntax if the expression isn't database-specific 424 if not remove_chars and not collation: 425 return self.trim_sql(expression) 426 427 trim_type = f"{trim_type} " if trim_type else "" 428 remove_chars = f"{remove_chars} " if remove_chars else "" 429 from_part = "FROM " if trim_type or remove_chars else "" 430 collation = f" COLLATE {collation}" if collation else "" 431 return f"TRIM({trim_type}{remove_chars}{from_part}{target}{collation})"
class
Dialects(builtins.str, enum.Enum):
18class Dialects(str, Enum): 19 DIALECT = "" 20 21 BIGQUERY = "bigquery" 22 CLICKHOUSE = "clickhouse" 23 DUCKDB = "duckdb" 24 HIVE = "hive" 25 MYSQL = "mysql" 26 ORACLE = "oracle" 27 POSTGRES = "postgres" 28 PRESTO = "presto" 29 REDSHIFT = "redshift" 30 SNOWFLAKE = "snowflake" 31 SPARK = "spark" 32 SQLITE = "sqlite" 33 STARROCKS = "starrocks" 34 TABLEAU = "tableau" 35 TRINO = "trino" 36 TSQL = "tsql" 37 DATABRICKS = "databricks" 38 DRILL = "drill" 39 TERADATA = "teradata"
An enumeration.
DIALECT = <Dialects.DIALECT: ''>
BIGQUERY = <Dialects.BIGQUERY: 'bigquery'>
CLICKHOUSE = <Dialects.CLICKHOUSE: 'clickhouse'>
DUCKDB = <Dialects.DUCKDB: 'duckdb'>
HIVE = <Dialects.HIVE: 'hive'>
MYSQL = <Dialects.MYSQL: 'mysql'>
ORACLE = <Dialects.ORACLE: 'oracle'>
POSTGRES = <Dialects.POSTGRES: 'postgres'>
PRESTO = <Dialects.PRESTO: 'presto'>
REDSHIFT = <Dialects.REDSHIFT: 'redshift'>
SNOWFLAKE = <Dialects.SNOWFLAKE: 'snowflake'>
SPARK = <Dialects.SPARK: 'spark'>
SQLITE = <Dialects.SQLITE: 'sqlite'>
STARROCKS = <Dialects.STARROCKS: 'starrocks'>
TABLEAU = <Dialects.TABLEAU: 'tableau'>
TRINO = <Dialects.TRINO: 'trino'>
TSQL = <Dialects.TSQL: 'tsql'>
DATABRICKS = <Dialects.DATABRICKS: 'databricks'>
DRILL = <Dialects.DRILL: 'drill'>
TERADATA = <Dialects.TERADATA: 'teradata'>
Inherited Members
- enum.Enum
- name
- value
- builtins.str
- encode
- replace
- split
- rsplit
- join
- capitalize
- casefold
- title
- center
- count
- expandtabs
- find
- partition
- index
- ljust
- lower
- lstrip
- rfind
- rindex
- rjust
- rstrip
- rpartition
- splitlines
- strip
- swapcase
- translate
- upper
- startswith
- endswith
- removeprefix
- removesuffix
- isascii
- islower
- isupper
- istitle
- isspace
- isdecimal
- isdigit
- isnumeric
- isalpha
- isalnum
- isidentifier
- isprintable
- zfill
- format
- format_map
- maketrans
class
Dialect:
101class Dialect(metaclass=_Dialect): 102 index_offset = 0 103 unnest_column_only = False 104 alias_post_tablesample = False 105 normalize_functions: t.Optional[str] = "upper" 106 null_ordering = "nulls_are_small" 107 108 date_format = "'%Y-%m-%d'" 109 dateint_format = "'%Y%m%d'" 110 time_format = "'%Y-%m-%d %H:%M:%S'" 111 time_mapping: t.Dict[str, str] = {} 112 113 # autofilled 114 quote_start = None 115 quote_end = None 116 identifier_start = None 117 identifier_end = None 118 119 time_trie = None 120 inverse_time_mapping = None 121 inverse_time_trie = None 122 tokenizer_class = None 123 parser_class = None 124 generator_class = None 125 126 @classmethod 127 def get_or_raise(cls, dialect: DialectType) -> t.Type[Dialect]: 128 if not dialect: 129 return cls 130 if isinstance(dialect, _Dialect): 131 return dialect 132 if isinstance(dialect, Dialect): 133 return dialect.__class__ 134 135 result = cls.get(dialect) 136 if not result: 137 raise ValueError(f"Unknown dialect '{dialect}'") 138 139 return result 140 141 @classmethod 142 def format_time( 143 cls, expression: t.Optional[str | exp.Expression] 144 ) -> t.Optional[exp.Expression]: 145 if isinstance(expression, str): 146 return exp.Literal.string( 147 format_time( 148 expression[1:-1], # the time formats are quoted 149 cls.time_mapping, 150 cls.time_trie, 151 ) 152 ) 153 if expression and expression.is_string: 154 return exp.Literal.string( 155 format_time( 156 expression.this, 157 cls.time_mapping, 158 cls.time_trie, 159 ) 160 ) 161 return expression 162 163 def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]: 164 return self.parser(**opts).parse(self.tokenizer.tokenize(sql), sql) 165 166 def parse_into( 167 self, expression_type: exp.IntoType, sql: str, **opts 168 ) -> t.List[t.Optional[exp.Expression]]: 169 return self.parser(**opts).parse_into(expression_type, self.tokenizer.tokenize(sql), sql) 170 171 def generate(self, expression: t.Optional[exp.Expression], **opts) -> str: 172 return self.generator(**opts).generate(expression) 173 174 def transpile(self, sql: str, **opts) -> t.List[str]: 175 return [self.generate(expression, **opts) for expression in self.parse(sql)] 176 177 @property 178 def tokenizer(self) -> Tokenizer: 179 if not hasattr(self, "_tokenizer"): 180 self._tokenizer = self.tokenizer_class() # type: ignore 181 return self._tokenizer 182 183 def parser(self, **opts) -> Parser: 184 return self.parser_class( # type: ignore 185 **{ 186 "index_offset": self.index_offset, 187 "unnest_column_only": self.unnest_column_only, 188 "alias_post_tablesample": self.alias_post_tablesample, 189 "null_ordering": self.null_ordering, 190 **opts, 191 }, 192 ) 193 194 def generator(self, **opts) -> Generator: 195 return self.generator_class( # type: ignore 196 **{ 197 "quote_start": self.quote_start, 198 "quote_end": self.quote_end, 199 "identifier_start": self.identifier_start, 200 "identifier_end": self.identifier_end, 201 "string_escape": self.tokenizer_class.STRING_ESCAPES[0], 202 "identifier_escape": self.tokenizer_class.IDENTIFIER_ESCAPES[0], 203 "index_offset": self.index_offset, 204 "time_mapping": self.inverse_time_mapping, 205 "time_trie": self.inverse_time_trie, 206 "unnest_column_only": self.unnest_column_only, 207 "alias_post_tablesample": self.alias_post_tablesample, 208 "normalize_functions": self.normalize_functions, 209 "null_ordering": self.null_ordering, 210 **opts, 211 } 212 )
@classmethod
def
get_or_raise( cls, dialect: Union[str, sqlglot.dialects.dialect.Dialect, Type[sqlglot.dialects.dialect.Dialect], NoneType]) -> Type[sqlglot.dialects.dialect.Dialect]:
126 @classmethod 127 def get_or_raise(cls, dialect: DialectType) -> t.Type[Dialect]: 128 if not dialect: 129 return cls 130 if isinstance(dialect, _Dialect): 131 return dialect 132 if isinstance(dialect, Dialect): 133 return dialect.__class__ 134 135 result = cls.get(dialect) 136 if not result: 137 raise ValueError(f"Unknown dialect '{dialect}'") 138 139 return result
@classmethod
def
format_time( cls, expression: Union[str, sqlglot.expressions.Expression, NoneType]) -> Optional[sqlglot.expressions.Expression]:
141 @classmethod 142 def format_time( 143 cls, expression: t.Optional[str | exp.Expression] 144 ) -> t.Optional[exp.Expression]: 145 if isinstance(expression, str): 146 return exp.Literal.string( 147 format_time( 148 expression[1:-1], # the time formats are quoted 149 cls.time_mapping, 150 cls.time_trie, 151 ) 152 ) 153 if expression and expression.is_string: 154 return exp.Literal.string( 155 format_time( 156 expression.this, 157 cls.time_mapping, 158 cls.time_trie, 159 ) 160 ) 161 return expression
def
parse_into( self, expression_type: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]]], sql: str, **opts) -> List[Optional[sqlglot.expressions.Expression]]:
183 def parser(self, **opts) -> Parser: 184 return self.parser_class( # type: ignore 185 **{ 186 "index_offset": self.index_offset, 187 "unnest_column_only": self.unnest_column_only, 188 "alias_post_tablesample": self.alias_post_tablesample, 189 "null_ordering": self.null_ordering, 190 **opts, 191 }, 192 )
194 def generator(self, **opts) -> Generator: 195 return self.generator_class( # type: ignore 196 **{ 197 "quote_start": self.quote_start, 198 "quote_end": self.quote_end, 199 "identifier_start": self.identifier_start, 200 "identifier_end": self.identifier_end, 201 "string_escape": self.tokenizer_class.STRING_ESCAPES[0], 202 "identifier_escape": self.tokenizer_class.IDENTIFIER_ESCAPES[0], 203 "index_offset": self.index_offset, 204 "time_mapping": self.inverse_time_mapping, 205 "time_trie": self.inverse_time_trie, 206 "unnest_column_only": self.unnest_column_only, 207 "alias_post_tablesample": self.alias_post_tablesample, 208 "normalize_functions": self.normalize_functions, 209 "null_ordering": self.null_ordering, 210 **opts, 211 } 212 )
def
rename_func( name: str) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.Expression], str]:
def
approx_count_distinct_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ApproxDistinct) -> str:
def
arrow_json_extract_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.JSONExtract | sqlglot.expressions.JSONBExtract) -> str:
def
arrow_json_extract_scalar_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.JSONExtractScalar | sqlglot.expressions.JSONBExtractScalar) -> str:
def
inline_array_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Array) -> str:
def
no_paren_current_date_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.CurrentDate) -> str:
def
no_recursive_cte_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.With) -> str:
def
no_safe_divide_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.SafeDivide) -> str:
def
no_tablesample_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TableSample) -> str:
def
no_trycast_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TryCast) -> str:
def
no_properties_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Properties) -> str:
def
str_position_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StrPosition) -> str:
299def str_position_sql(self: Generator, expression: exp.StrPosition) -> str: 300 this = self.sql(expression, "this") 301 substr = self.sql(expression, "substr") 302 position = self.sql(expression, "position") 303 if position: 304 return f"STRPOS(SUBSTR({this}, {position}), {substr}) + {position} - 1" 305 return f"STRPOS({this}, {substr})"
def
struct_extract_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StructExtract) -> str:
def
var_map_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Map | sqlglot.expressions.VarMap, map_func_name: str = 'MAP') -> str:
314def var_map_sql( 315 self: Generator, expression: exp.Map | exp.VarMap, map_func_name: str = "MAP" 316) -> str: 317 keys = expression.args["keys"] 318 values = expression.args["values"] 319 320 if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array): 321 self.unsupported("Cannot convert array columns into map.") 322 return f"{map_func_name}({self.format_args(keys, values)})" 323 324 args = [] 325 for key, value in zip(keys.expressions, values.expressions): 326 args.append(self.sql(key)) 327 args.append(self.sql(value)) 328 return f"{map_func_name}({self.format_args(*args)})"
def
format_time_lambda( exp_class: Type[~E], dialect: str, default: Union[bool, str, NoneType] = None) -> Callable[[Sequence], ~E]:
331def format_time_lambda( 332 exp_class: t.Type[E], dialect: str, default: t.Optional[bool | str] = None 333) -> t.Callable[[t.Sequence], E]: 334 """Helper used for time expressions. 335 336 Args: 337 exp_class: the expression class to instantiate. 338 dialect: target sql dialect. 339 default: the default format, True being time. 340 341 Returns: 342 A callable that can be used to return the appropriately formatted time expression. 343 """ 344 345 def _format_time(args: t.Sequence): 346 return exp_class( 347 this=seq_get(args, 0), 348 format=Dialect[dialect].format_time( 349 seq_get(args, 1) 350 or (Dialect[dialect].time_format if default is True else default or None) 351 ), 352 ) 353 354 return _format_time
Helper used for time expressions.
Arguments:
- exp_class: the expression class to instantiate.
- dialect: target sql dialect.
- default: the default format, True being time.
Returns:
A callable that can be used to return the appropriately formatted time expression.
def
create_with_partitions_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Create) -> str:
357def create_with_partitions_sql(self: Generator, expression: exp.Create) -> str: 358 """ 359 In Hive and Spark, the PARTITIONED BY property acts as an extension of a table's schema. When the 360 PARTITIONED BY value is an array of column names, they are transformed into a schema. The corresponding 361 columns are removed from the create statement. 362 """ 363 has_schema = isinstance(expression.this, exp.Schema) 364 is_partitionable = expression.args.get("kind") in ("TABLE", "VIEW") 365 366 if has_schema and is_partitionable: 367 expression = expression.copy() 368 prop = expression.find(exp.PartitionedByProperty) 369 this = prop and prop.this 370 if prop and not isinstance(this, exp.Schema): 371 schema = expression.this 372 columns = {v.name.upper() for v in this.expressions} 373 partitions = [col for col in schema.expressions if col.name.upper() in columns] 374 schema.set("expressions", [e for e in schema.expressions if e not in partitions]) 375 prop.replace(exp.PartitionedByProperty(this=exp.Schema(expressions=partitions))) 376 expression.set("this", schema) 377 378 return self.create_sql(expression)
In Hive and Spark, the PARTITIONED BY property acts as an extension of a table's schema. When the PARTITIONED BY value is an array of column names, they are transformed into a schema. The corresponding columns are removed from the create statement.
def
parse_date_delta( exp_class: Type[~E], unit_mapping: Optional[Dict[str, str]] = None) -> Callable[[Sequence], ~E]:
381def parse_date_delta( 382 exp_class: t.Type[E], unit_mapping: t.Optional[t.Dict[str, str]] = None 383) -> t.Callable[[t.Sequence], E]: 384 def inner_func(args: t.Sequence) -> E: 385 unit_based = len(args) == 3 386 this = seq_get(args, 2) if unit_based else seq_get(args, 0) 387 expression = seq_get(args, 1) if unit_based else seq_get(args, 1) 388 unit = seq_get(args, 0) if unit_based else exp.Literal.string("DAY") 389 unit = unit_mapping.get(unit.name.lower(), unit) if unit_mapping else unit # type: ignore 390 return exp_class(this=this, expression=expression, unit=unit) 391 392 return inner_func
def
strposition_to_locate_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StrPosition) -> str:
def
timestrtotime_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TimeStrToTime) -> str:
def
datestrtodate_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.DateStrToDate) -> str:
418def trim_sql(self: Generator, expression: exp.Trim) -> str: 419 target = self.sql(expression, "this") 420 trim_type = self.sql(expression, "position") 421 remove_chars = self.sql(expression, "expression") 422 collation = self.sql(expression, "collation") 423 424 # Use TRIM/LTRIM/RTRIM syntax if the expression isn't database-specific 425 if not remove_chars and not collation: 426 return self.trim_sql(expression) 427 428 trim_type = f"{trim_type} " if trim_type else "" 429 remove_chars = f"{remove_chars} " if remove_chars else "" 430 from_part = "FROM " if trim_type or remove_chars else "" 431 collation = f" COLLATE {collation}" if collation else "" 432 return f"TRIM({trim_type}{remove_chars}{from_part}{target}{collation})"