From 8fd7374bf370b99577a40d4de1716ad990d5a34b Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 12 Feb 2024 07:15:14 +0100 Subject: Adding upstream version 21.0.2. Signed-off-by: Daniel Baumann --- docs/sqlglot/dialects/dialect.html | 2338 ++++++++++++++++++------------------ 1 file changed, 1192 insertions(+), 1146 deletions(-) (limited to 'docs/sqlglot/dialects/dialect.html') diff --git a/docs/sqlglot/dialects/dialect.html b/docs/sqlglot/dialects/dialect.html index 37bf8ce..7b67d8a 100644 --- a/docs/sqlglot/dialects/dialect.html +++ b/docs/sqlglot/dialects/dialect.html @@ -490,7 +490,10 @@ parse_json_extract_path
  • - json_path_segments + json_extract_segments +
  • +
  • + json_path_key_only_name
  • @@ -536,507 +539,507 @@ 19DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TsOrDsAdd, exp.DateSub] 20 21if t.TYPE_CHECKING: - 22 from sqlglot._typing import B, E + 22 from sqlglot._typing import B, E, F 23 - 24logger = logging.getLogger("sqlglot") + 24 JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar] 25 - 26 - 27class Dialects(str, Enum): - 28 """Dialects supported by SQLGLot.""" - 29 - 30 DIALECT = "" + 26logger = logging.getLogger("sqlglot") + 27 + 28 + 29class Dialects(str, Enum): + 30 """Dialects supported by SQLGLot.""" 31 - 32 BIGQUERY = "bigquery" - 33 CLICKHOUSE = "clickhouse" - 34 DATABRICKS = "databricks" - 35 DORIS = "doris" - 36 DRILL = "drill" - 37 DUCKDB = "duckdb" - 38 HIVE = "hive" - 39 MYSQL = "mysql" - 40 ORACLE = "oracle" - 41 POSTGRES = "postgres" - 42 PRESTO = "presto" - 43 REDSHIFT = "redshift" - 44 SNOWFLAKE = "snowflake" - 45 SPARK = "spark" - 46 SPARK2 = "spark2" - 47 SQLITE = "sqlite" - 48 STARROCKS = "starrocks" - 49 TABLEAU = "tableau" - 50 TERADATA = "teradata" - 51 TRINO = "trino" - 52 TSQL = "tsql" - 53 - 54 - 55class NormalizationStrategy(str, AutoName): - 56 """Specifies the strategy according to which identifiers should be normalized.""" - 57 - 58 LOWERCASE = auto() - 59 """Unquoted identifiers are lowercased.""" - 60 - 61 UPPERCASE = auto() - 62 """Unquoted identifiers are uppercased.""" - 63 - 64 CASE_SENSITIVE = auto() - 65 """Always case-sensitive, regardless of quotes.""" - 66 - 67 CASE_INSENSITIVE = auto() - 68 """Always case-insensitive, regardless of quotes.""" - 69 - 70 - 71class _Dialect(type): - 72 classes: t.Dict[str, t.Type[Dialect]] = {} - 73 - 74 def __eq__(cls, other: t.Any) -> bool: - 75 if cls is other: - 76 return True - 77 if isinstance(other, str): - 78 return cls is cls.get(other) - 79 if isinstance(other, Dialect): - 80 return cls is type(other) - 81 - 82 return False + 32 DIALECT = "" + 33 + 34 BIGQUERY = "bigquery" + 35 CLICKHOUSE = "clickhouse" + 36 DATABRICKS = "databricks" + 37 DORIS = "doris" + 38 DRILL = "drill" + 39 DUCKDB = "duckdb" + 40 HIVE = "hive" + 41 MYSQL = "mysql" + 42 ORACLE = "oracle" + 43 POSTGRES = "postgres" + 44 PRESTO = "presto" + 45 REDSHIFT = "redshift" + 46 SNOWFLAKE = "snowflake" + 47 SPARK = "spark" + 48 SPARK2 = "spark2" + 49 SQLITE = "sqlite" + 50 STARROCKS = "starrocks" + 51 TABLEAU = "tableau" + 52 TERADATA = "teradata" + 53 TRINO = "trino" + 54 TSQL = "tsql" + 55 + 56 + 57class NormalizationStrategy(str, AutoName): + 58 """Specifies the strategy according to which identifiers should be normalized.""" + 59 + 60 LOWERCASE = auto() + 61 """Unquoted identifiers are lowercased.""" + 62 + 63 UPPERCASE = auto() + 64 """Unquoted identifiers are uppercased.""" + 65 + 66 CASE_SENSITIVE = auto() + 67 """Always case-sensitive, regardless of quotes.""" + 68 + 69 CASE_INSENSITIVE = auto() + 70 """Always case-insensitive, regardless of quotes.""" + 71 + 72 + 73class _Dialect(type): + 74 classes: t.Dict[str, t.Type[Dialect]] = {} + 75 + 76 def __eq__(cls, other: t.Any) -> bool: + 77 if cls is other: + 78 return True + 79 if isinstance(other, str): + 80 return cls is cls.get(other) + 81 if isinstance(other, Dialect): + 82 return cls is type(other) 83 - 84 def __hash__(cls) -> int: - 85 return hash(cls.__name__.lower()) - 86 - 87 @classmethod - 88 def __getitem__(cls, key: str) -> t.Type[Dialect]: - 89 return cls.classes[key] - 90 - 91 @classmethod - 92 def get( - 93 cls, key: str, default: t.Optional[t.Type[Dialect]] = None - 94 ) -> t.Optional[t.Type[Dialect]]: - 95 return cls.classes.get(key, default) - 96 - 97 def __new__(cls, clsname, bases, attrs): - 98 klass = super().__new__(cls, clsname, bases, attrs) - 99 enum = Dialects.__members__.get(clsname.upper()) - 100 cls.classes[enum.value if enum is not None else clsname.lower()] = klass - 101 - 102 klass.TIME_TRIE = new_trie(klass.TIME_MAPPING) - 103 klass.FORMAT_TRIE = ( - 104 new_trie(klass.FORMAT_MAPPING) if klass.FORMAT_MAPPING else klass.TIME_TRIE - 105 ) - 106 klass.INVERSE_TIME_MAPPING = {v: k for k, v in klass.TIME_MAPPING.items()} - 107 klass.INVERSE_TIME_TRIE = new_trie(klass.INVERSE_TIME_MAPPING) - 108 - 109 klass.INVERSE_ESCAPE_SEQUENCES = {v: k for k, v in klass.ESCAPE_SEQUENCES.items()} + 84 return False + 85 + 86 def __hash__(cls) -> int: + 87 return hash(cls.__name__.lower()) + 88 + 89 @classmethod + 90 def __getitem__(cls, key: str) -> t.Type[Dialect]: + 91 return cls.classes[key] + 92 + 93 @classmethod + 94 def get( + 95 cls, key: str, default: t.Optional[t.Type[Dialect]] = None + 96 ) -> t.Optional[t.Type[Dialect]]: + 97 return cls.classes.get(key, default) + 98 + 99 def __new__(cls, clsname, bases, attrs): + 100 klass = super().__new__(cls, clsname, bases, attrs) + 101 enum = Dialects.__members__.get(clsname.upper()) + 102 cls.classes[enum.value if enum is not None else clsname.lower()] = klass + 103 + 104 klass.TIME_TRIE = new_trie(klass.TIME_MAPPING) + 105 klass.FORMAT_TRIE = ( + 106 new_trie(klass.FORMAT_MAPPING) if klass.FORMAT_MAPPING else klass.TIME_TRIE + 107 ) + 108 klass.INVERSE_TIME_MAPPING = {v: k for k, v in klass.TIME_MAPPING.items()} + 109 klass.INVERSE_TIME_TRIE = new_trie(klass.INVERSE_TIME_MAPPING) 110 - 111 klass.tokenizer_class = getattr(klass, "Tokenizer", Tokenizer) - 112 klass.parser_class = getattr(klass, "Parser", Parser) - 113 klass.generator_class = getattr(klass, "Generator", Generator) - 114 - 115 klass.QUOTE_START, klass.QUOTE_END = list(klass.tokenizer_class._QUOTES.items())[0] - 116 klass.IDENTIFIER_START, klass.IDENTIFIER_END = list( - 117 klass.tokenizer_class._IDENTIFIERS.items() - 118 )[0] - 119 - 120 def get_start_end(token_type: TokenType) -> t.Tuple[t.Optional[str], t.Optional[str]]: - 121 return next( - 122 ( - 123 (s, e) - 124 for s, (e, t) in klass.tokenizer_class._FORMAT_STRINGS.items() - 125 if t == token_type - 126 ), - 127 (None, None), - 128 ) - 129 - 130 klass.BIT_START, klass.BIT_END = get_start_end(TokenType.BIT_STRING) - 131 klass.HEX_START, klass.HEX_END = get_start_end(TokenType.HEX_STRING) - 132 klass.BYTE_START, klass.BYTE_END = get_start_end(TokenType.BYTE_STRING) - 133 klass.UNICODE_START, klass.UNICODE_END = get_start_end(TokenType.UNICODE_STRING) - 134 - 135 if enum not in ("", "bigquery"): - 136 klass.generator_class.SELECT_KINDS = () - 137 - 138 if not klass.SUPPORTS_SEMI_ANTI_JOIN: - 139 klass.parser_class.TABLE_ALIAS_TOKENS = klass.parser_class.TABLE_ALIAS_TOKENS | { - 140 TokenType.ANTI, - 141 TokenType.SEMI, - 142 } - 143 - 144 return klass + 111 klass.INVERSE_ESCAPE_SEQUENCES = {v: k for k, v in klass.ESCAPE_SEQUENCES.items()} + 112 + 113 klass.tokenizer_class = getattr(klass, "Tokenizer", Tokenizer) + 114 klass.parser_class = getattr(klass, "Parser", Parser) + 115 klass.generator_class = getattr(klass, "Generator", Generator) + 116 + 117 klass.QUOTE_START, klass.QUOTE_END = list(klass.tokenizer_class._QUOTES.items())[0] + 118 klass.IDENTIFIER_START, klass.IDENTIFIER_END = list( + 119 klass.tokenizer_class._IDENTIFIERS.items() + 120 )[0] + 121 + 122 def get_start_end(token_type: TokenType) -> t.Tuple[t.Optional[str], t.Optional[str]]: + 123 return next( + 124 ( + 125 (s, e) + 126 for s, (e, t) in klass.tokenizer_class._FORMAT_STRINGS.items() + 127 if t == token_type + 128 ), + 129 (None, None), + 130 ) + 131 + 132 klass.BIT_START, klass.BIT_END = get_start_end(TokenType.BIT_STRING) + 133 klass.HEX_START, klass.HEX_END = get_start_end(TokenType.HEX_STRING) + 134 klass.BYTE_START, klass.BYTE_END = get_start_end(TokenType.BYTE_STRING) + 135 klass.UNICODE_START, klass.UNICODE_END = get_start_end(TokenType.UNICODE_STRING) + 136 + 137 if enum not in ("", "bigquery"): + 138 klass.generator_class.SELECT_KINDS = () + 139 + 140 if not klass.SUPPORTS_SEMI_ANTI_JOIN: + 141 klass.parser_class.TABLE_ALIAS_TOKENS = klass.parser_class.TABLE_ALIAS_TOKENS | { + 142 TokenType.ANTI, + 143 TokenType.SEMI, + 144 } 145 - 146 - 147class Dialect(metaclass=_Dialect): - 148 INDEX_OFFSET = 0 - 149 """Determines the base index offset for arrays.""" - 150 - 151 WEEK_OFFSET = 0 - 152 """Determines the day of week of DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.""" - 153 - 154 UNNEST_COLUMN_ONLY = False - 155 """Determines whether or not `UNNEST` table aliases are treated as column aliases.""" - 156 - 157 ALIAS_POST_TABLESAMPLE = False - 158 """Determines whether or not the table alias comes after tablesample.""" - 159 - 160 TABLESAMPLE_SIZE_IS_PERCENT = False - 161 """Determines whether or not a size in the table sample clause represents percentage.""" - 162 - 163 NORMALIZATION_STRATEGY = NormalizationStrategy.LOWERCASE - 164 """Specifies the strategy according to which identifiers should be normalized.""" - 165 - 166 IDENTIFIERS_CAN_START_WITH_DIGIT = False - 167 """Determines whether or not an unquoted identifier can start with a digit.""" - 168 - 169 DPIPE_IS_STRING_CONCAT = True - 170 """Determines whether or not the DPIPE token (`||`) is a string concatenation operator.""" - 171 - 172 STRICT_STRING_CONCAT = False - 173 """Determines whether or not `CONCAT`'s arguments must be strings.""" - 174 - 175 SUPPORTS_USER_DEFINED_TYPES = True - 176 """Determines whether or not user-defined data types are supported.""" - 177 - 178 SUPPORTS_SEMI_ANTI_JOIN = True - 179 """Determines whether or not `SEMI` or `ANTI` joins are supported.""" - 180 - 181 NORMALIZE_FUNCTIONS: bool | str = "upper" - 182 """Determines how function names are going to be normalized.""" - 183 - 184 LOG_BASE_FIRST = True - 185 """Determines whether the base comes first in the `LOG` function.""" - 186 - 187 NULL_ORDERING = "nulls_are_small" - 188 """ - 189 Indicates the default `NULL` ordering method to use if not explicitly set. - 190 Possible values: `"nulls_are_small"`, `"nulls_are_large"`, `"nulls_are_last"` - 191 """ - 192 - 193 TYPED_DIVISION = False - 194 """ - 195 Whether the behavior of `a / b` depends on the types of `a` and `b`. - 196 False means `a / b` is always float division. - 197 True means `a / b` is integer division if both `a` and `b` are integers. - 198 """ - 199 - 200 SAFE_DIVISION = False - 201 """Determines whether division by zero throws an error (`False`) or returns NULL (`True`).""" - 202 - 203 CONCAT_COALESCE = False - 204 """A `NULL` arg in `CONCAT` yields `NULL` by default, but in some dialects it yields an empty string.""" - 205 - 206 DATE_FORMAT = "'%Y-%m-%d'" - 207 DATEINT_FORMAT = "'%Y%m%d'" - 208 TIME_FORMAT = "'%Y-%m-%d %H:%M:%S'" - 209 - 210 TIME_MAPPING: t.Dict[str, str] = {} - 211 """Associates this dialect's time formats with their equivalent Python `strftime` format.""" - 212 - 213 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_model_rules_date_time - 214 # https://docs.teradata.com/r/Teradata-Database-SQL-Functions-Operators-Expressions-and-Predicates/March-2017/Data-Type-Conversions/Character-to-DATE-Conversion/Forcing-a-FORMAT-on-CAST-for-Converting-Character-to-DATE - 215 FORMAT_MAPPING: t.Dict[str, str] = {} - 216 """ - 217 Helper which is used for parsing the special syntax `CAST(x AS DATE FORMAT 'yyyy')`. - 218 If empty, the corresponding trie will be constructed off of `TIME_MAPPING`. - 219 """ - 220 - 221 ESCAPE_SEQUENCES: t.Dict[str, str] = {} - 222 """Mapping of an unescaped escape sequence to the corresponding character.""" - 223 - 224 PSEUDOCOLUMNS: t.Set[str] = set() - 225 """ - 226 Columns that are auto-generated by the engine corresponding to this dialect. - 227 For example, such columns may be excluded from `SELECT *` queries. - 228 """ - 229 - 230 PREFER_CTE_ALIAS_COLUMN = False - 231 """ - 232 Some dialects, such as Snowflake, allow you to reference a CTE column alias in the - 233 HAVING clause of the CTE. This flag will cause the CTE alias columns to override - 234 any projection aliases in the subquery. - 235 - 236 For example, - 237 WITH y(c) AS ( - 238 SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 - 239 ) SELECT c FROM y; - 240 - 241 will be rewritten as + 146 return klass + 147 + 148 + 149class Dialect(metaclass=_Dialect): + 150 INDEX_OFFSET = 0 + 151 """Determines the base index offset for arrays.""" + 152 + 153 WEEK_OFFSET = 0 + 154 """Determines the day of week of DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.""" + 155 + 156 UNNEST_COLUMN_ONLY = False + 157 """Determines whether or not `UNNEST` table aliases are treated as column aliases.""" + 158 + 159 ALIAS_POST_TABLESAMPLE = False + 160 """Determines whether or not the table alias comes after tablesample.""" + 161 + 162 TABLESAMPLE_SIZE_IS_PERCENT = False + 163 """Determines whether or not a size in the table sample clause represents percentage.""" + 164 + 165 NORMALIZATION_STRATEGY = NormalizationStrategy.LOWERCASE + 166 """Specifies the strategy according to which identifiers should be normalized.""" + 167 + 168 IDENTIFIERS_CAN_START_WITH_DIGIT = False + 169 """Determines whether or not an unquoted identifier can start with a digit.""" + 170 + 171 DPIPE_IS_STRING_CONCAT = True + 172 """Determines whether or not the DPIPE token (`||`) is a string concatenation operator.""" + 173 + 174 STRICT_STRING_CONCAT = False + 175 """Determines whether or not `CONCAT`'s arguments must be strings.""" + 176 + 177 SUPPORTS_USER_DEFINED_TYPES = True + 178 """Determines whether or not user-defined data types are supported.""" + 179 + 180 SUPPORTS_SEMI_ANTI_JOIN = True + 181 """Determines whether or not `SEMI` or `ANTI` joins are supported.""" + 182 + 183 NORMALIZE_FUNCTIONS: bool | str = "upper" + 184 """Determines how function names are going to be normalized.""" + 185 + 186 LOG_BASE_FIRST = True + 187 """Determines whether the base comes first in the `LOG` function.""" + 188 + 189 NULL_ORDERING = "nulls_are_small" + 190 """ + 191 Indicates the default `NULL` ordering method to use if not explicitly set. + 192 Possible values: `"nulls_are_small"`, `"nulls_are_large"`, `"nulls_are_last"` + 193 """ + 194 + 195 TYPED_DIVISION = False + 196 """ + 197 Whether the behavior of `a / b` depends on the types of `a` and `b`. + 198 False means `a / b` is always float division. + 199 True means `a / b` is integer division if both `a` and `b` are integers. + 200 """ + 201 + 202 SAFE_DIVISION = False + 203 """Determines whether division by zero throws an error (`False`) or returns NULL (`True`).""" + 204 + 205 CONCAT_COALESCE = False + 206 """A `NULL` arg in `CONCAT` yields `NULL` by default, but in some dialects it yields an empty string.""" + 207 + 208 DATE_FORMAT = "'%Y-%m-%d'" + 209 DATEINT_FORMAT = "'%Y%m%d'" + 210 TIME_FORMAT = "'%Y-%m-%d %H:%M:%S'" + 211 + 212 TIME_MAPPING: t.Dict[str, str] = {} + 213 """Associates this dialect's time formats with their equivalent Python `strftime` format.""" + 214 + 215 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_model_rules_date_time + 216 # https://docs.teradata.com/r/Teradata-Database-SQL-Functions-Operators-Expressions-and-Predicates/March-2017/Data-Type-Conversions/Character-to-DATE-Conversion/Forcing-a-FORMAT-on-CAST-for-Converting-Character-to-DATE + 217 FORMAT_MAPPING: t.Dict[str, str] = {} + 218 """ + 219 Helper which is used for parsing the special syntax `CAST(x AS DATE FORMAT 'yyyy')`. + 220 If empty, the corresponding trie will be constructed off of `TIME_MAPPING`. + 221 """ + 222 + 223 ESCAPE_SEQUENCES: t.Dict[str, str] = {} + 224 """Mapping of an unescaped escape sequence to the corresponding character.""" + 225 + 226 PSEUDOCOLUMNS: t.Set[str] = set() + 227 """ + 228 Columns that are auto-generated by the engine corresponding to this dialect. + 229 For example, such columns may be excluded from `SELECT *` queries. + 230 """ + 231 + 232 PREFER_CTE_ALIAS_COLUMN = False + 233 """ + 234 Some dialects, such as Snowflake, allow you to reference a CTE column alias in the + 235 HAVING clause of the CTE. This flag will cause the CTE alias columns to override + 236 any projection aliases in the subquery. + 237 + 238 For example, + 239 WITH y(c) AS ( + 240 SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 + 241 ) SELECT c FROM y; 242 - 243 WITH y(c) AS ( - 244 SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0 - 245 ) SELECT c FROM y; - 246 """ - 247 - 248 # --- Autofilled --- + 243 will be rewritten as + 244 + 245 WITH y(c) AS ( + 246 SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0 + 247 ) SELECT c FROM y; + 248 """ 249 - 250 tokenizer_class = Tokenizer - 251 parser_class = Parser - 252 generator_class = Generator - 253 - 254 # A trie of the time_mapping keys - 255 TIME_TRIE: t.Dict = {} - 256 FORMAT_TRIE: t.Dict = {} - 257 - 258 INVERSE_TIME_MAPPING: t.Dict[str, str] = {} - 259 INVERSE_TIME_TRIE: t.Dict = {} - 260 - 261 INVERSE_ESCAPE_SEQUENCES: t.Dict[str, str] = {} + 250 # --- Autofilled --- + 251 + 252 tokenizer_class = Tokenizer + 253 parser_class = Parser + 254 generator_class = Generator + 255 + 256 # A trie of the time_mapping keys + 257 TIME_TRIE: t.Dict = {} + 258 FORMAT_TRIE: t.Dict = {} + 259 + 260 INVERSE_TIME_MAPPING: t.Dict[str, str] = {} + 261 INVERSE_TIME_TRIE: t.Dict = {} 262 - 263 # Delimiters for string literals and identifiers - 264 QUOTE_START = "'" - 265 QUOTE_END = "'" - 266 IDENTIFIER_START = '"' - 267 IDENTIFIER_END = '"' - 268 - 269 # Delimiters for bit, hex, byte and unicode literals - 270 BIT_START: t.Optional[str] = None - 271 BIT_END: t.Optional[str] = None - 272 HEX_START: t.Optional[str] = None - 273 HEX_END: t.Optional[str] = None - 274 BYTE_START: t.Optional[str] = None - 275 BYTE_END: t.Optional[str] = None - 276 UNICODE_START: t.Optional[str] = None - 277 UNICODE_END: t.Optional[str] = None - 278 - 279 @classmethod - 280 def get_or_raise(cls, dialect: DialectType) -> Dialect: - 281 """ - 282 Look up a dialect in the global dialect registry and return it if it exists. - 283 - 284 Args: - 285 dialect: The target dialect. If this is a string, it can be optionally followed by - 286 additional key-value pairs that are separated by commas and are used to specify - 287 dialect settings, such as whether the dialect's identifiers are case-sensitive. - 288 - 289 Example: - 290 >>> dialect = dialect_class = get_or_raise("duckdb") - 291 >>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive") - 292 - 293 Returns: - 294 The corresponding Dialect instance. - 295 """ - 296 - 297 if not dialect: - 298 return cls() - 299 if isinstance(dialect, _Dialect): - 300 return dialect() - 301 if isinstance(dialect, Dialect): - 302 return dialect - 303 if isinstance(dialect, str): - 304 try: - 305 dialect_name, *kv_pairs = dialect.split(",") - 306 kwargs = {k.strip(): v.strip() for k, v in (kv.split("=") for kv in kv_pairs)} - 307 except ValueError: - 308 raise ValueError( - 309 f"Invalid dialect format: '{dialect}'. " - 310 "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'." - 311 ) - 312 - 313 result = cls.get(dialect_name.strip()) - 314 if not result: - 315 from difflib import get_close_matches - 316 - 317 similar = seq_get(get_close_matches(dialect_name, cls.classes, n=1), 0) or "" - 318 if similar: - 319 similar = f" Did you mean {similar}?" - 320 - 321 raise ValueError(f"Unknown dialect '{dialect_name}'.{similar}") + 263 INVERSE_ESCAPE_SEQUENCES: t.Dict[str, str] = {} + 264 + 265 # Delimiters for string literals and identifiers + 266 QUOTE_START = "'" + 267 QUOTE_END = "'" + 268 IDENTIFIER_START = '"' + 269 IDENTIFIER_END = '"' + 270 + 271 # Delimiters for bit, hex, byte and unicode literals + 272 BIT_START: t.Optional[str] = None + 273 BIT_END: t.Optional[str] = None + 274 HEX_START: t.Optional[str] = None + 275 HEX_END: t.Optional[str] = None + 276 BYTE_START: t.Optional[str] = None + 277 BYTE_END: t.Optional[str] = None + 278 UNICODE_START: t.Optional[str] = None + 279 UNICODE_END: t.Optional[str] = None + 280 + 281 @classmethod + 282 def get_or_raise(cls, dialect: DialectType) -> Dialect: + 283 """ + 284 Look up a dialect in the global dialect registry and return it if it exists. + 285 + 286 Args: + 287 dialect: The target dialect. If this is a string, it can be optionally followed by + 288 additional key-value pairs that are separated by commas and are used to specify + 289 dialect settings, such as whether the dialect's identifiers are case-sensitive. + 290 + 291 Example: + 292 >>> dialect = dialect_class = get_or_raise("duckdb") + 293 >>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive") + 294 + 295 Returns: + 296 The corresponding Dialect instance. + 297 """ + 298 + 299 if not dialect: + 300 return cls() + 301 if isinstance(dialect, _Dialect): + 302 return dialect() + 303 if isinstance(dialect, Dialect): + 304 return dialect + 305 if isinstance(dialect, str): + 306 try: + 307 dialect_name, *kv_pairs = dialect.split(",") + 308 kwargs = {k.strip(): v.strip() for k, v in (kv.split("=") for kv in kv_pairs)} + 309 except ValueError: + 310 raise ValueError( + 311 f"Invalid dialect format: '{dialect}'. " + 312 "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'." + 313 ) + 314 + 315 result = cls.get(dialect_name.strip()) + 316 if not result: + 317 from difflib import get_close_matches + 318 + 319 similar = seq_get(get_close_matches(dialect_name, cls.classes, n=1), 0) or "" + 320 if similar: + 321 similar = f" Did you mean {similar}?" 322 - 323 return result(**kwargs) + 323 raise ValueError(f"Unknown dialect '{dialect_name}'.{similar}") 324 - 325 raise ValueError(f"Invalid dialect type for '{dialect}': '{type(dialect)}'.") + 325 return result(**kwargs) 326 - 327 @classmethod - 328 def format_time( - 329 cls, expression: t.Optional[str | exp.Expression] - 330 ) -> t.Optional[exp.Expression]: - 331 """Converts a time format in this dialect to its equivalent Python `strftime` format.""" - 332 if isinstance(expression, str): - 333 return exp.Literal.string( - 334 # the time formats are quoted - 335 format_time(expression[1:-1], cls.TIME_MAPPING, cls.TIME_TRIE) - 336 ) - 337 - 338 if expression and expression.is_string: - 339 return exp.Literal.string(format_time(expression.this, cls.TIME_MAPPING, cls.TIME_TRIE)) - 340 - 341 return expression + 327 raise ValueError(f"Invalid dialect type for '{dialect}': '{type(dialect)}'.") + 328 + 329 @classmethod + 330 def format_time( + 331 cls, expression: t.Optional[str | exp.Expression] + 332 ) -> t.Optional[exp.Expression]: + 333 """Converts a time format in this dialect to its equivalent Python `strftime` format.""" + 334 if isinstance(expression, str): + 335 return exp.Literal.string( + 336 # the time formats are quoted + 337 format_time(expression[1:-1], cls.TIME_MAPPING, cls.TIME_TRIE) + 338 ) + 339 + 340 if expression and expression.is_string: + 341 return exp.Literal.string(format_time(expression.this, cls.TIME_MAPPING, cls.TIME_TRIE)) 342 - 343 def __init__(self, **kwargs) -> None: - 344 normalization_strategy = kwargs.get("normalization_strategy") - 345 - 346 if normalization_strategy is None: - 347 self.normalization_strategy = self.NORMALIZATION_STRATEGY - 348 else: - 349 self.normalization_strategy = NormalizationStrategy(normalization_strategy.upper()) - 350 - 351 def __eq__(self, other: t.Any) -> bool: - 352 # Does not currently take dialect state into account - 353 return type(self) == other - 354 - 355 def __hash__(self) -> int: - 356 # Does not currently take dialect state into account - 357 return hash(type(self)) - 358 - 359 def normalize_identifier(self, expression: E) -> E: - 360 """ - 361 Transforms an identifier in a way that resembles how it'd be resolved by this dialect. - 362 - 363 For example, an identifier like `FoO` would be resolved as `foo` in Postgres, because it - 364 lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so - 365 it would resolve it as `FOO`. If it was quoted, it'd need to be treated as case-sensitive, - 366 and so any normalization would be prohibited in order to avoid "breaking" the identifier. - 367 - 368 There are also dialects like Spark, which are case-insensitive even when quotes are - 369 present, and dialects like MySQL, whose resolution rules match those employed by the - 370 underlying operating system, for example they may always be case-sensitive in Linux. - 371 - 372 Finally, the normalization behavior of some engines can even be controlled through flags, - 373 like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier. - 374 - 375 SQLGlot aims to understand and handle all of these different behaviors gracefully, so - 376 that it can analyze queries in the optimizer and successfully capture their semantics. - 377 """ - 378 if ( - 379 isinstance(expression, exp.Identifier) - 380 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE - 381 and ( - 382 not expression.quoted - 383 or self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE - 384 ) - 385 ): - 386 expression.set( - 387 "this", - 388 ( - 389 expression.this.upper() - 390 if self.normalization_strategy is NormalizationStrategy.UPPERCASE - 391 else expression.this.lower() - 392 ), - 393 ) - 394 - 395 return expression + 343 return expression + 344 + 345 def __init__(self, **kwargs) -> None: + 346 normalization_strategy = kwargs.get("normalization_strategy") + 347 + 348 if normalization_strategy is None: + 349 self.normalization_strategy = self.NORMALIZATION_STRATEGY + 350 else: + 351 self.normalization_strategy = NormalizationStrategy(normalization_strategy.upper()) + 352 + 353 def __eq__(self, other: t.Any) -> bool: + 354 # Does not currently take dialect state into account + 355 return type(self) == other + 356 + 357 def __hash__(self) -> int: + 358 # Does not currently take dialect state into account + 359 return hash(type(self)) + 360 + 361 def normalize_identifier(self, expression: E) -> E: + 362 """ + 363 Transforms an identifier in a way that resembles how it'd be resolved by this dialect. + 364 + 365 For example, an identifier like `FoO` would be resolved as `foo` in Postgres, because it + 366 lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so + 367 it would resolve it as `FOO`. If it was quoted, it'd need to be treated as case-sensitive, + 368 and so any normalization would be prohibited in order to avoid "breaking" the identifier. + 369 + 370 There are also dialects like Spark, which are case-insensitive even when quotes are + 371 present, and dialects like MySQL, whose resolution rules match those employed by the + 372 underlying operating system, for example they may always be case-sensitive in Linux. + 373 + 374 Finally, the normalization behavior of some engines can even be controlled through flags, + 375 like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier. + 376 + 377 SQLGlot aims to understand and handle all of these different behaviors gracefully, so + 378 that it can analyze queries in the optimizer and successfully capture their semantics. + 379 """ + 380 if ( + 381 isinstance(expression, exp.Identifier) + 382 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE + 383 and ( + 384 not expression.quoted + 385 or self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE + 386 ) + 387 ): + 388 expression.set( + 389 "this", + 390 ( + 391 expression.this.upper() + 392 if self.normalization_strategy is NormalizationStrategy.UPPERCASE + 393 else expression.this.lower() + 394 ), + 395 ) 396 - 397 def case_sensitive(self, text: str) -> bool: - 398 """Checks if text contains any case sensitive characters, based on the dialect's rules.""" - 399 if self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE: - 400 return False - 401 - 402 unsafe = ( - 403 str.islower - 404 if self.normalization_strategy is NormalizationStrategy.UPPERCASE - 405 else str.isupper - 406 ) - 407 return any(unsafe(char) for char in text) - 408 - 409 def can_identify(self, text: str, identify: str | bool = "safe") -> bool: - 410 """Checks if text can be identified given an identify option. - 411 - 412 Args: - 413 text: The text to check. - 414 identify: - 415 `"always"` or `True`: Always returns `True`. - 416 `"safe"`: Only returns `True` if the identifier is case-insensitive. - 417 - 418 Returns: - 419 Whether or not the given text can be identified. - 420 """ - 421 if identify is True or identify == "always": - 422 return True - 423 - 424 if identify == "safe": - 425 return not self.case_sensitive(text) - 426 - 427 return False + 397 return expression + 398 + 399 def case_sensitive(self, text: str) -> bool: + 400 """Checks if text contains any case sensitive characters, based on the dialect's rules.""" + 401 if self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE: + 402 return False + 403 + 404 unsafe = ( + 405 str.islower + 406 if self.normalization_strategy is NormalizationStrategy.UPPERCASE + 407 else str.isupper + 408 ) + 409 return any(unsafe(char) for char in text) + 410 + 411 def can_identify(self, text: str, identify: str | bool = "safe") -> bool: + 412 """Checks if text can be identified given an identify option. + 413 + 414 Args: + 415 text: The text to check. + 416 identify: + 417 `"always"` or `True`: Always returns `True`. + 418 `"safe"`: Only returns `True` if the identifier is case-insensitive. + 419 + 420 Returns: + 421 Whether or not the given text can be identified. + 422 """ + 423 if identify is True or identify == "always": + 424 return True + 425 + 426 if identify == "safe": + 427 return not self.case_sensitive(text) 428 - 429 def quote_identifier(self, expression: E, identify: bool = True) -> E: - 430 """ - 431 Adds quotes to a given identifier. - 432 - 433 Args: - 434 expression: The expression of interest. If it's not an `Identifier`, this method is a no-op. - 435 identify: If set to `False`, the quotes will only be added if the identifier is deemed - 436 "unsafe", with respect to its characters and this dialect's normalization strategy. - 437 """ - 438 if isinstance(expression, exp.Identifier): - 439 name = expression.this - 440 expression.set( - 441 "quoted", - 442 identify or self.case_sensitive(name) or not exp.SAFE_IDENTIFIER_RE.match(name), - 443 ) - 444 - 445 return expression + 429 return False + 430 + 431 def quote_identifier(self, expression: E, identify: bool = True) -> E: + 432 """ + 433 Adds quotes to a given identifier. + 434 + 435 Args: + 436 expression: The expression of interest. If it's not an `Identifier`, this method is a no-op. + 437 identify: If set to `False`, the quotes will only be added if the identifier is deemed + 438 "unsafe", with respect to its characters and this dialect's normalization strategy. + 439 """ + 440 if isinstance(expression, exp.Identifier): + 441 name = expression.this + 442 expression.set( + 443 "quoted", + 444 identify or self.case_sensitive(name) or not exp.SAFE_IDENTIFIER_RE.match(name), + 445 ) 446 - 447 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: - 448 if isinstance(path, exp.Literal): - 449 path_text = path.name - 450 if path.is_number: - 451 path_text = f"[{path_text}]" - 452 - 453 try: - 454 return parse_json_path(path_text) - 455 except ParseError as e: - 456 logger.warning(f"Invalid JSON path syntax. {str(e)}") - 457 - 458 return path + 447 return expression + 448 + 449 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: + 450 if isinstance(path, exp.Literal): + 451 path_text = path.name + 452 if path.is_number: + 453 path_text = f"[{path_text}]" + 454 + 455 try: + 456 return parse_json_path(path_text) + 457 except ParseError as e: + 458 logger.warning(f"Invalid JSON path syntax. {str(e)}") 459 - 460 def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]: - 461 return self.parser(**opts).parse(self.tokenize(sql), sql) - 462 - 463 def parse_into( - 464 self, expression_type: exp.IntoType, sql: str, **opts - 465 ) -> t.List[t.Optional[exp.Expression]]: - 466 return self.parser(**opts).parse_into(expression_type, self.tokenize(sql), sql) - 467 - 468 def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str: - 469 return self.generator(**opts).generate(expression, copy=copy) - 470 - 471 def transpile(self, sql: str, **opts) -> t.List[str]: - 472 return [ - 473 self.generate(expression, copy=False, **opts) if expression else "" - 474 for expression in self.parse(sql) - 475 ] - 476 - 477 def tokenize(self, sql: str) -> t.List[Token]: - 478 return self.tokenizer.tokenize(sql) - 479 - 480 @property - 481 def tokenizer(self) -> Tokenizer: - 482 if not hasattr(self, "_tokenizer"): - 483 self._tokenizer = self.tokenizer_class(dialect=self) - 484 return self._tokenizer - 485 - 486 def parser(self, **opts) -> Parser: - 487 return self.parser_class(dialect=self, **opts) - 488 - 489 def generator(self, **opts) -> Generator: - 490 return self.generator_class(dialect=self, **opts) - 491 - 492 - 493DialectType = t.Union[str, Dialect, t.Type[Dialect], None] + 460 return path + 461 + 462 def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]: + 463 return self.parser(**opts).parse(self.tokenize(sql), sql) + 464 + 465 def parse_into( + 466 self, expression_type: exp.IntoType, sql: str, **opts + 467 ) -> t.List[t.Optional[exp.Expression]]: + 468 return self.parser(**opts).parse_into(expression_type, self.tokenize(sql), sql) + 469 + 470 def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str: + 471 return self.generator(**opts).generate(expression, copy=copy) + 472 + 473 def transpile(self, sql: str, **opts) -> t.List[str]: + 474 return [ + 475 self.generate(expression, copy=False, **opts) if expression else "" + 476 for expression in self.parse(sql) + 477 ] + 478 + 479 def tokenize(self, sql: str) -> t.List[Token]: + 480 return self.tokenizer.tokenize(sql) + 481 + 482 @property + 483 def tokenizer(self) -> Tokenizer: + 484 if not hasattr(self, "_tokenizer"): + 485 self._tokenizer = self.tokenizer_class(dialect=self) + 486 return self._tokenizer + 487 + 488 def parser(self, **opts) -> Parser: + 489 return self.parser_class(dialect=self, **opts) + 490 + 491 def generator(self, **opts) -> Generator: + 492 return self.generator_class(dialect=self, **opts) + 493 494 - 495 - 496def rename_func(name: str) -> t.Callable[[Generator, exp.Expression], str]: - 497 return lambda self, expression: self.func(name, *flatten(expression.args.values())) - 498 - 499 - 500def approx_count_distinct_sql(self: Generator, expression: exp.ApproxDistinct) -> str: - 501 if expression.args.get("accuracy"): - 502 self.unsupported("APPROX_COUNT_DISTINCT does not support accuracy") - 503 return self.func("APPROX_COUNT_DISTINCT", expression.this) - 504 - 505 - 506def if_sql( - 507 name: str = "IF", false_value: t.Optional[exp.Expression | str] = None - 508) -> t.Callable[[Generator, exp.If], str]: - 509 def _if_sql(self: Generator, expression: exp.If) -> str: - 510 return self.func( - 511 name, - 512 expression.this, - 513 expression.args.get("true"), - 514 expression.args.get("false") or false_value, - 515 ) - 516 - 517 return _if_sql + 495DialectType = t.Union[str, Dialect, t.Type[Dialect], None] + 496 + 497 + 498def rename_func(name: str) -> t.Callable[[Generator, exp.Expression], str]: + 499 return lambda self, expression: self.func(name, *flatten(expression.args.values())) + 500 + 501 + 502def approx_count_distinct_sql(self: Generator, expression: exp.ApproxDistinct) -> str: + 503 if expression.args.get("accuracy"): + 504 self.unsupported("APPROX_COUNT_DISTINCT does not support accuracy") + 505 return self.func("APPROX_COUNT_DISTINCT", expression.this) + 506 + 507 + 508def if_sql( + 509 name: str = "IF", false_value: t.Optional[exp.Expression | str] = None + 510) -> t.Callable[[Generator, exp.If], str]: + 511 def _if_sql(self: Generator, expression: exp.If) -> str: + 512 return self.func( + 513 name, + 514 expression.this, + 515 expression.args.get("true"), + 516 expression.args.get("false") or false_value, + 517 ) 518 - 519 - 520def arrow_json_extract_sql( - 521 self: Generator, expression: exp.JSONExtract | exp.JSONExtractScalar - 522) -> str: + 519 return _if_sql + 520 + 521 + 522def arrow_json_extract_sql(self: Generator, expression: JSON_EXTRACT_TYPE) -> str: 523 this = expression.this 524 if self.JSON_TYPE_REQUIRED_FOR_EXTRACTION and isinstance(this, exp.Literal) and this.is_string: 525 this.replace(exp.cast(this, "json")) @@ -1528,45 +1531,60 @@ 1011 1012 1013def parse_json_extract_path( -1014 expr_type: t.Type[E], -1015 supports_null_if_invalid: bool = False, -1016) -> t.Callable[[t.List], E]: -1017 def _parse_json_extract_path(args: t.List) -> E: -1018 null_if_invalid = None -1019 -1020 segments: t.List[exp.JSONPathPart] = [exp.JSONPathRoot()] -1021 for arg in args[1:]: -1022 if isinstance(arg, exp.Literal): -1023 text = arg.name -1024 if is_int(text): -1025 segments.append(exp.JSONPathSubscript(this=int(text))) -1026 else: -1027 segments.append(exp.JSONPathKey(this=text)) -1028 elif supports_null_if_invalid: -1029 null_if_invalid = arg -1030 -1031 this = seq_get(args, 0) -1032 jsonpath = exp.JSONPath(expressions=segments) -1033 -1034 # This is done to avoid failing in the expression validator due to the arg count -1035 del args[2:] -1036 -1037 if expr_type is exp.JSONExtractScalar: -1038 return expr_type(this=this, expression=jsonpath, null_if_invalid=null_if_invalid) -1039 -1040 return expr_type(this=this, expression=jsonpath) -1041 -1042 return _parse_json_extract_path -1043 -1044 -1045def json_path_segments(self: Generator, expression: exp.JSONPath) -> t.List[str]: -1046 segments = [] -1047 for segment in expression.expressions: -1048 path = self.sql(segment) -1049 if path: -1050 segments.append(f"{self.dialect.QUOTE_START}{path}{self.dialect.QUOTE_END}") -1051 -1052 return segments +1014 expr_type: t.Type[F], zero_based_indexing: bool = True +1015) -> t.Callable[[t.List], F]: +1016 def _parse_json_extract_path(args: t.List) -> F: +1017 segments: t.List[exp.JSONPathPart] = [exp.JSONPathRoot()] +1018 for arg in args[1:]: +1019 if not isinstance(arg, exp.Literal): +1020 # We use the fallback parser because we can't really transpile non-literals safely +1021 return expr_type.from_arg_list(args) +1022 +1023 text = arg.name +1024 if is_int(text): +1025 index = int(text) +1026 segments.append( +1027 exp.JSONPathSubscript(this=index if zero_based_indexing else index - 1) +1028 ) +1029 else: +1030 segments.append(exp.JSONPathKey(this=text)) +1031 +1032 # This is done to avoid failing in the expression validator due to the arg count +1033 del args[2:] +1034 return expr_type(this=seq_get(args, 0), expression=exp.JSONPath(expressions=segments)) +1035 +1036 return _parse_json_extract_path +1037 +1038 +1039def json_extract_segments( +1040 name: str, quoted_index: bool = True +1041) -> t.Callable[[Generator, JSON_EXTRACT_TYPE], str]: +1042 def _json_extract_segments(self: Generator, expression: JSON_EXTRACT_TYPE) -> str: +1043 path = expression.expression +1044 if not isinstance(path, exp.JSONPath): +1045 return rename_func(name)(self, expression) +1046 +1047 segments = [] +1048 for segment in path.expressions: +1049 path = self.sql(segment) +1050 if path: +1051 if isinstance(segment, exp.JSONPathPart) and ( +1052 quoted_index or not isinstance(segment, exp.JSONPathSubscript) +1053 ): +1054 path = f"{self.dialect.QUOTE_START}{path}{self.dialect.QUOTE_END}" +1055 +1056 segments.append(path) +1057 +1058 return self.func(name, expression.this, *segments) +1059 +1060 return _json_extract_segments +1061 +1062 +1063def json_path_key_only_name(self: Generator, expression: exp.JSONPathKey) -> str: +1064 if isinstance(expression.this, exp.JSONPathWildcard): +1065 self.unsupported("Unsupported wildcard in JSONPathKey expression") +1066 +1067 return expression.name @@ -1620,32 +1638,32 @@ -
    28class Dialects(str, Enum):
    -29    """Dialects supported by SQLGLot."""
    -30
    -31    DIALECT = ""
    +            
    30class Dialects(str, Enum):
    +31    """Dialects supported by SQLGLot."""
     32
    -33    BIGQUERY = "bigquery"
    -34    CLICKHOUSE = "clickhouse"
    -35    DATABRICKS = "databricks"
    -36    DORIS = "doris"
    -37    DRILL = "drill"
    -38    DUCKDB = "duckdb"
    -39    HIVE = "hive"
    -40    MYSQL = "mysql"
    -41    ORACLE = "oracle"
    -42    POSTGRES = "postgres"
    -43    PRESTO = "presto"
    -44    REDSHIFT = "redshift"
    -45    SNOWFLAKE = "snowflake"
    -46    SPARK = "spark"
    -47    SPARK2 = "spark2"
    -48    SQLITE = "sqlite"
    -49    STARROCKS = "starrocks"
    -50    TABLEAU = "tableau"
    -51    TERADATA = "teradata"
    -52    TRINO = "trino"
    -53    TSQL = "tsql"
    +33    DIALECT = ""
    +34
    +35    BIGQUERY = "bigquery"
    +36    CLICKHOUSE = "clickhouse"
    +37    DATABRICKS = "databricks"
    +38    DORIS = "doris"
    +39    DRILL = "drill"
    +40    DUCKDB = "duckdb"
    +41    HIVE = "hive"
    +42    MYSQL = "mysql"
    +43    ORACLE = "oracle"
    +44    POSTGRES = "postgres"
    +45    PRESTO = "presto"
    +46    REDSHIFT = "redshift"
    +47    SNOWFLAKE = "snowflake"
    +48    SPARK = "spark"
    +49    SPARK2 = "spark2"
    +50    SQLITE = "sqlite"
    +51    STARROCKS = "starrocks"
    +52    TABLEAU = "tableau"
    +53    TERADATA = "teradata"
    +54    TRINO = "trino"
    +55    TSQL = "tsql"
     
    @@ -1989,20 +2007,20 @@
    -
    56class NormalizationStrategy(str, AutoName):
    -57    """Specifies the strategy according to which identifiers should be normalized."""
    -58
    -59    LOWERCASE = auto()
    -60    """Unquoted identifiers are lowercased."""
    -61
    -62    UPPERCASE = auto()
    -63    """Unquoted identifiers are uppercased."""
    -64
    -65    CASE_SENSITIVE = auto()
    -66    """Always case-sensitive, regardless of quotes."""
    -67
    -68    CASE_INSENSITIVE = auto()
    -69    """Always case-insensitive, regardless of quotes."""
    +            
    58class NormalizationStrategy(str, AutoName):
    +59    """Specifies the strategy according to which identifiers should be normalized."""
    +60
    +61    LOWERCASE = auto()
    +62    """Unquoted identifiers are lowercased."""
    +63
    +64    UPPERCASE = auto()
    +65    """Unquoted identifiers are uppercased."""
    +66
    +67    CASE_SENSITIVE = auto()
    +68    """Always case-sensitive, regardless of quotes."""
    +69
    +70    CASE_INSENSITIVE = auto()
    +71    """Always case-insensitive, regardless of quotes."""
     
    @@ -2138,350 +2156,350 @@
    -
    148class Dialect(metaclass=_Dialect):
    -149    INDEX_OFFSET = 0
    -150    """Determines the base index offset for arrays."""
    -151
    -152    WEEK_OFFSET = 0
    -153    """Determines the day of week of DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday."""
    -154
    -155    UNNEST_COLUMN_ONLY = False
    -156    """Determines whether or not `UNNEST` table aliases are treated as column aliases."""
    -157
    -158    ALIAS_POST_TABLESAMPLE = False
    -159    """Determines whether or not the table alias comes after tablesample."""
    -160
    -161    TABLESAMPLE_SIZE_IS_PERCENT = False
    -162    """Determines whether or not a size in the table sample clause represents percentage."""
    -163
    -164    NORMALIZATION_STRATEGY = NormalizationStrategy.LOWERCASE
    -165    """Specifies the strategy according to which identifiers should be normalized."""
    -166
    -167    IDENTIFIERS_CAN_START_WITH_DIGIT = False
    -168    """Determines whether or not an unquoted identifier can start with a digit."""
    -169
    -170    DPIPE_IS_STRING_CONCAT = True
    -171    """Determines whether or not the DPIPE token (`||`) is a string concatenation operator."""
    -172
    -173    STRICT_STRING_CONCAT = False
    -174    """Determines whether or not `CONCAT`'s arguments must be strings."""
    -175
    -176    SUPPORTS_USER_DEFINED_TYPES = True
    -177    """Determines whether or not user-defined data types are supported."""
    -178
    -179    SUPPORTS_SEMI_ANTI_JOIN = True
    -180    """Determines whether or not `SEMI` or `ANTI` joins are supported."""
    -181
    -182    NORMALIZE_FUNCTIONS: bool | str = "upper"
    -183    """Determines how function names are going to be normalized."""
    -184
    -185    LOG_BASE_FIRST = True
    -186    """Determines whether the base comes first in the `LOG` function."""
    -187
    -188    NULL_ORDERING = "nulls_are_small"
    -189    """
    -190    Indicates the default `NULL` ordering method to use if not explicitly set.
    -191    Possible values: `"nulls_are_small"`, `"nulls_are_large"`, `"nulls_are_last"`
    -192    """
    -193
    -194    TYPED_DIVISION = False
    -195    """
    -196    Whether the behavior of `a / b` depends on the types of `a` and `b`.
    -197    False means `a / b` is always float division.
    -198    True means `a / b` is integer division if both `a` and `b` are integers.
    -199    """
    -200
    -201    SAFE_DIVISION = False
    -202    """Determines whether division by zero throws an error (`False`) or returns NULL (`True`)."""
    -203
    -204    CONCAT_COALESCE = False
    -205    """A `NULL` arg in `CONCAT` yields `NULL` by default, but in some dialects it yields an empty string."""
    -206
    -207    DATE_FORMAT = "'%Y-%m-%d'"
    -208    DATEINT_FORMAT = "'%Y%m%d'"
    -209    TIME_FORMAT = "'%Y-%m-%d %H:%M:%S'"
    -210
    -211    TIME_MAPPING: t.Dict[str, str] = {}
    -212    """Associates this dialect's time formats with their equivalent Python `strftime` format."""
    -213
    -214    # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_model_rules_date_time
    -215    # https://docs.teradata.com/r/Teradata-Database-SQL-Functions-Operators-Expressions-and-Predicates/March-2017/Data-Type-Conversions/Character-to-DATE-Conversion/Forcing-a-FORMAT-on-CAST-for-Converting-Character-to-DATE
    -216    FORMAT_MAPPING: t.Dict[str, str] = {}
    -217    """
    -218    Helper which is used for parsing the special syntax `CAST(x AS DATE FORMAT 'yyyy')`.
    -219    If empty, the corresponding trie will be constructed off of `TIME_MAPPING`.
    -220    """
    -221
    -222    ESCAPE_SEQUENCES: t.Dict[str, str] = {}
    -223    """Mapping of an unescaped escape sequence to the corresponding character."""
    -224
    -225    PSEUDOCOLUMNS: t.Set[str] = set()
    -226    """
    -227    Columns that are auto-generated by the engine corresponding to this dialect.
    -228    For example, such columns may be excluded from `SELECT *` queries.
    -229    """
    -230
    -231    PREFER_CTE_ALIAS_COLUMN = False
    -232    """
    -233    Some dialects, such as Snowflake, allow you to reference a CTE column alias in the
    -234    HAVING clause of the CTE. This flag will cause the CTE alias columns to override
    -235    any projection aliases in the subquery.
    -236
    -237    For example,
    -238        WITH y(c) AS (
    -239            SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0
    -240        ) SELECT c FROM y;
    -241
    -242        will be rewritten as
    +            
    150class Dialect(metaclass=_Dialect):
    +151    INDEX_OFFSET = 0
    +152    """Determines the base index offset for arrays."""
    +153
    +154    WEEK_OFFSET = 0
    +155    """Determines the day of week of DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday."""
    +156
    +157    UNNEST_COLUMN_ONLY = False
    +158    """Determines whether or not `UNNEST` table aliases are treated as column aliases."""
    +159
    +160    ALIAS_POST_TABLESAMPLE = False
    +161    """Determines whether or not the table alias comes after tablesample."""
    +162
    +163    TABLESAMPLE_SIZE_IS_PERCENT = False
    +164    """Determines whether or not a size in the table sample clause represents percentage."""
    +165
    +166    NORMALIZATION_STRATEGY = NormalizationStrategy.LOWERCASE
    +167    """Specifies the strategy according to which identifiers should be normalized."""
    +168
    +169    IDENTIFIERS_CAN_START_WITH_DIGIT = False
    +170    """Determines whether or not an unquoted identifier can start with a digit."""
    +171
    +172    DPIPE_IS_STRING_CONCAT = True
    +173    """Determines whether or not the DPIPE token (`||`) is a string concatenation operator."""
    +174
    +175    STRICT_STRING_CONCAT = False
    +176    """Determines whether or not `CONCAT`'s arguments must be strings."""
    +177
    +178    SUPPORTS_USER_DEFINED_TYPES = True
    +179    """Determines whether or not user-defined data types are supported."""
    +180
    +181    SUPPORTS_SEMI_ANTI_JOIN = True
    +182    """Determines whether or not `SEMI` or `ANTI` joins are supported."""
    +183
    +184    NORMALIZE_FUNCTIONS: bool | str = "upper"
    +185    """Determines how function names are going to be normalized."""
    +186
    +187    LOG_BASE_FIRST = True
    +188    """Determines whether the base comes first in the `LOG` function."""
    +189
    +190    NULL_ORDERING = "nulls_are_small"
    +191    """
    +192    Indicates the default `NULL` ordering method to use if not explicitly set.
    +193    Possible values: `"nulls_are_small"`, `"nulls_are_large"`, `"nulls_are_last"`
    +194    """
    +195
    +196    TYPED_DIVISION = False
    +197    """
    +198    Whether the behavior of `a / b` depends on the types of `a` and `b`.
    +199    False means `a / b` is always float division.
    +200    True means `a / b` is integer division if both `a` and `b` are integers.
    +201    """
    +202
    +203    SAFE_DIVISION = False
    +204    """Determines whether division by zero throws an error (`False`) or returns NULL (`True`)."""
    +205
    +206    CONCAT_COALESCE = False
    +207    """A `NULL` arg in `CONCAT` yields `NULL` by default, but in some dialects it yields an empty string."""
    +208
    +209    DATE_FORMAT = "'%Y-%m-%d'"
    +210    DATEINT_FORMAT = "'%Y%m%d'"
    +211    TIME_FORMAT = "'%Y-%m-%d %H:%M:%S'"
    +212
    +213    TIME_MAPPING: t.Dict[str, str] = {}
    +214    """Associates this dialect's time formats with their equivalent Python `strftime` format."""
    +215
    +216    # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_model_rules_date_time
    +217    # https://docs.teradata.com/r/Teradata-Database-SQL-Functions-Operators-Expressions-and-Predicates/March-2017/Data-Type-Conversions/Character-to-DATE-Conversion/Forcing-a-FORMAT-on-CAST-for-Converting-Character-to-DATE
    +218    FORMAT_MAPPING: t.Dict[str, str] = {}
    +219    """
    +220    Helper which is used for parsing the special syntax `CAST(x AS DATE FORMAT 'yyyy')`.
    +221    If empty, the corresponding trie will be constructed off of `TIME_MAPPING`.
    +222    """
    +223
    +224    ESCAPE_SEQUENCES: t.Dict[str, str] = {}
    +225    """Mapping of an unescaped escape sequence to the corresponding character."""
    +226
    +227    PSEUDOCOLUMNS: t.Set[str] = set()
    +228    """
    +229    Columns that are auto-generated by the engine corresponding to this dialect.
    +230    For example, such columns may be excluded from `SELECT *` queries.
    +231    """
    +232
    +233    PREFER_CTE_ALIAS_COLUMN = False
    +234    """
    +235    Some dialects, such as Snowflake, allow you to reference a CTE column alias in the
    +236    HAVING clause of the CTE. This flag will cause the CTE alias columns to override
    +237    any projection aliases in the subquery.
    +238
    +239    For example,
    +240        WITH y(c) AS (
    +241            SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0
    +242        ) SELECT c FROM y;
     243
    -244        WITH y(c) AS (
    -245            SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
    -246        ) SELECT c FROM y;
    -247    """
    -248
    -249    # --- Autofilled ---
    +244        will be rewritten as
    +245
    +246        WITH y(c) AS (
    +247            SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
    +248        ) SELECT c FROM y;
    +249    """
     250
    -251    tokenizer_class = Tokenizer
    -252    parser_class = Parser
    -253    generator_class = Generator
    -254
    -255    # A trie of the time_mapping keys
    -256    TIME_TRIE: t.Dict = {}
    -257    FORMAT_TRIE: t.Dict = {}
    -258
    -259    INVERSE_TIME_MAPPING: t.Dict[str, str] = {}
    -260    INVERSE_TIME_TRIE: t.Dict = {}
    -261
    -262    INVERSE_ESCAPE_SEQUENCES: t.Dict[str, str] = {}
    +251    # --- Autofilled ---
    +252
    +253    tokenizer_class = Tokenizer
    +254    parser_class = Parser
    +255    generator_class = Generator
    +256
    +257    # A trie of the time_mapping keys
    +258    TIME_TRIE: t.Dict = {}
    +259    FORMAT_TRIE: t.Dict = {}
    +260
    +261    INVERSE_TIME_MAPPING: t.Dict[str, str] = {}
    +262    INVERSE_TIME_TRIE: t.Dict = {}
     263
    -264    # Delimiters for string literals and identifiers
    -265    QUOTE_START = "'"
    -266    QUOTE_END = "'"
    -267    IDENTIFIER_START = '"'
    -268    IDENTIFIER_END = '"'
    -269
    -270    # Delimiters for bit, hex, byte and unicode literals
    -271    BIT_START: t.Optional[str] = None
    -272    BIT_END: t.Optional[str] = None
    -273    HEX_START: t.Optional[str] = None
    -274    HEX_END: t.Optional[str] = None
    -275    BYTE_START: t.Optional[str] = None
    -276    BYTE_END: t.Optional[str] = None
    -277    UNICODE_START: t.Optional[str] = None
    -278    UNICODE_END: t.Optional[str] = None
    -279
    -280    @classmethod
    -281    def get_or_raise(cls, dialect: DialectType) -> Dialect:
    -282        """
    -283        Look up a dialect in the global dialect registry and return it if it exists.
    -284
    -285        Args:
    -286            dialect: The target dialect. If this is a string, it can be optionally followed by
    -287                additional key-value pairs that are separated by commas and are used to specify
    -288                dialect settings, such as whether the dialect's identifiers are case-sensitive.
    -289
    -290        Example:
    -291            >>> dialect = dialect_class = get_or_raise("duckdb")
    -292            >>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive")
    -293
    -294        Returns:
    -295            The corresponding Dialect instance.
    -296        """
    -297
    -298        if not dialect:
    -299            return cls()
    -300        if isinstance(dialect, _Dialect):
    -301            return dialect()
    -302        if isinstance(dialect, Dialect):
    -303            return dialect
    -304        if isinstance(dialect, str):
    -305            try:
    -306                dialect_name, *kv_pairs = dialect.split(",")
    -307                kwargs = {k.strip(): v.strip() for k, v in (kv.split("=") for kv in kv_pairs)}
    -308            except ValueError:
    -309                raise ValueError(
    -310                    f"Invalid dialect format: '{dialect}'. "
    -311                    "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'."
    -312                )
    -313
    -314            result = cls.get(dialect_name.strip())
    -315            if not result:
    -316                from difflib import get_close_matches
    -317
    -318                similar = seq_get(get_close_matches(dialect_name, cls.classes, n=1), 0) or ""
    -319                if similar:
    -320                    similar = f" Did you mean {similar}?"
    -321
    -322                raise ValueError(f"Unknown dialect '{dialect_name}'.{similar}")
    +264    INVERSE_ESCAPE_SEQUENCES: t.Dict[str, str] = {}
    +265
    +266    # Delimiters for string literals and identifiers
    +267    QUOTE_START = "'"
    +268    QUOTE_END = "'"
    +269    IDENTIFIER_START = '"'
    +270    IDENTIFIER_END = '"'
    +271
    +272    # Delimiters for bit, hex, byte and unicode literals
    +273    BIT_START: t.Optional[str] = None
    +274    BIT_END: t.Optional[str] = None
    +275    HEX_START: t.Optional[str] = None
    +276    HEX_END: t.Optional[str] = None
    +277    BYTE_START: t.Optional[str] = None
    +278    BYTE_END: t.Optional[str] = None
    +279    UNICODE_START: t.Optional[str] = None
    +280    UNICODE_END: t.Optional[str] = None
    +281
    +282    @classmethod
    +283    def get_or_raise(cls, dialect: DialectType) -> Dialect:
    +284        """
    +285        Look up a dialect in the global dialect registry and return it if it exists.
    +286
    +287        Args:
    +288            dialect: The target dialect. If this is a string, it can be optionally followed by
    +289                additional key-value pairs that are separated by commas and are used to specify
    +290                dialect settings, such as whether the dialect's identifiers are case-sensitive.
    +291
    +292        Example:
    +293            >>> dialect = dialect_class = get_or_raise("duckdb")
    +294            >>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive")
    +295
    +296        Returns:
    +297            The corresponding Dialect instance.
    +298        """
    +299
    +300        if not dialect:
    +301            return cls()
    +302        if isinstance(dialect, _Dialect):
    +303            return dialect()
    +304        if isinstance(dialect, Dialect):
    +305            return dialect
    +306        if isinstance(dialect, str):
    +307            try:
    +308                dialect_name, *kv_pairs = dialect.split(",")
    +309                kwargs = {k.strip(): v.strip() for k, v in (kv.split("=") for kv in kv_pairs)}
    +310            except ValueError:
    +311                raise ValueError(
    +312                    f"Invalid dialect format: '{dialect}'. "
    +313                    "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'."
    +314                )
    +315
    +316            result = cls.get(dialect_name.strip())
    +317            if not result:
    +318                from difflib import get_close_matches
    +319
    +320                similar = seq_get(get_close_matches(dialect_name, cls.classes, n=1), 0) or ""
    +321                if similar:
    +322                    similar = f" Did you mean {similar}?"
     323
    -324            return result(**kwargs)
    +324                raise ValueError(f"Unknown dialect '{dialect_name}'.{similar}")
     325
    -326        raise ValueError(f"Invalid dialect type for '{dialect}': '{type(dialect)}'.")
    +326            return result(**kwargs)
     327
    -328    @classmethod
    -329    def format_time(
    -330        cls, expression: t.Optional[str | exp.Expression]
    -331    ) -> t.Optional[exp.Expression]:
    -332        """Converts a time format in this dialect to its equivalent Python `strftime` format."""
    -333        if isinstance(expression, str):
    -334            return exp.Literal.string(
    -335                # the time formats are quoted
    -336                format_time(expression[1:-1], cls.TIME_MAPPING, cls.TIME_TRIE)
    -337            )
    -338
    -339        if expression and expression.is_string:
    -340            return exp.Literal.string(format_time(expression.this, cls.TIME_MAPPING, cls.TIME_TRIE))
    -341
    -342        return expression
    +328        raise ValueError(f"Invalid dialect type for '{dialect}': '{type(dialect)}'.")
    +329
    +330    @classmethod
    +331    def format_time(
    +332        cls, expression: t.Optional[str | exp.Expression]
    +333    ) -> t.Optional[exp.Expression]:
    +334        """Converts a time format in this dialect to its equivalent Python `strftime` format."""
    +335        if isinstance(expression, str):
    +336            return exp.Literal.string(
    +337                # the time formats are quoted
    +338                format_time(expression[1:-1], cls.TIME_MAPPING, cls.TIME_TRIE)
    +339            )
    +340
    +341        if expression and expression.is_string:
    +342            return exp.Literal.string(format_time(expression.this, cls.TIME_MAPPING, cls.TIME_TRIE))
     343
    -344    def __init__(self, **kwargs) -> None:
    -345        normalization_strategy = kwargs.get("normalization_strategy")
    -346
    -347        if normalization_strategy is None:
    -348            self.normalization_strategy = self.NORMALIZATION_STRATEGY
    -349        else:
    -350            self.normalization_strategy = NormalizationStrategy(normalization_strategy.upper())
    -351
    -352    def __eq__(self, other: t.Any) -> bool:
    -353        # Does not currently take dialect state into account
    -354        return type(self) == other
    -355
    -356    def __hash__(self) -> int:
    -357        # Does not currently take dialect state into account
    -358        return hash(type(self))
    -359
    -360    def normalize_identifier(self, expression: E) -> E:
    -361        """
    -362        Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
    -363
    -364        For example, an identifier like `FoO` would be resolved as `foo` in Postgres, because it
    -365        lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
    -366        it would resolve it as `FOO`. If it was quoted, it'd need to be treated as case-sensitive,
    -367        and so any normalization would be prohibited in order to avoid "breaking" the identifier.
    -368
    -369        There are also dialects like Spark, which are case-insensitive even when quotes are
    -370        present, and dialects like MySQL, whose resolution rules match those employed by the
    -371        underlying operating system, for example they may always be case-sensitive in Linux.
    -372
    -373        Finally, the normalization behavior of some engines can even be controlled through flags,
    -374        like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
    -375
    -376        SQLGlot aims to understand and handle all of these different behaviors gracefully, so
    -377        that it can analyze queries in the optimizer and successfully capture their semantics.
    -378        """
    -379        if (
    -380            isinstance(expression, exp.Identifier)
    -381            and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE
    -382            and (
    -383                not expression.quoted
    -384                or self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE
    -385            )
    -386        ):
    -387            expression.set(
    -388                "this",
    -389                (
    -390                    expression.this.upper()
    -391                    if self.normalization_strategy is NormalizationStrategy.UPPERCASE
    -392                    else expression.this.lower()
    -393                ),
    -394            )
    -395
    -396        return expression
    +344        return expression
    +345
    +346    def __init__(self, **kwargs) -> None:
    +347        normalization_strategy = kwargs.get("normalization_strategy")
    +348
    +349        if normalization_strategy is None:
    +350            self.normalization_strategy = self.NORMALIZATION_STRATEGY
    +351        else:
    +352            self.normalization_strategy = NormalizationStrategy(normalization_strategy.upper())
    +353
    +354    def __eq__(self, other: t.Any) -> bool:
    +355        # Does not currently take dialect state into account
    +356        return type(self) == other
    +357
    +358    def __hash__(self) -> int:
    +359        # Does not currently take dialect state into account
    +360        return hash(type(self))
    +361
    +362    def normalize_identifier(self, expression: E) -> E:
    +363        """
    +364        Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
    +365
    +366        For example, an identifier like `FoO` would be resolved as `foo` in Postgres, because it
    +367        lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
    +368        it would resolve it as `FOO`. If it was quoted, it'd need to be treated as case-sensitive,
    +369        and so any normalization would be prohibited in order to avoid "breaking" the identifier.
    +370
    +371        There are also dialects like Spark, which are case-insensitive even when quotes are
    +372        present, and dialects like MySQL, whose resolution rules match those employed by the
    +373        underlying operating system, for example they may always be case-sensitive in Linux.
    +374
    +375        Finally, the normalization behavior of some engines can even be controlled through flags,
    +376        like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
    +377
    +378        SQLGlot aims to understand and handle all of these different behaviors gracefully, so
    +379        that it can analyze queries in the optimizer and successfully capture their semantics.
    +380        """
    +381        if (
    +382            isinstance(expression, exp.Identifier)
    +383            and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE
    +384            and (
    +385                not expression.quoted
    +386                or self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE
    +387            )
    +388        ):
    +389            expression.set(
    +390                "this",
    +391                (
    +392                    expression.this.upper()
    +393                    if self.normalization_strategy is NormalizationStrategy.UPPERCASE
    +394                    else expression.this.lower()
    +395                ),
    +396            )
     397
    -398    def case_sensitive(self, text: str) -> bool:
    -399        """Checks if text contains any case sensitive characters, based on the dialect's rules."""
    -400        if self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE:
    -401            return False
    -402
    -403        unsafe = (
    -404            str.islower
    -405            if self.normalization_strategy is NormalizationStrategy.UPPERCASE
    -406            else str.isupper
    -407        )
    -408        return any(unsafe(char) for char in text)
    -409
    -410    def can_identify(self, text: str, identify: str | bool = "safe") -> bool:
    -411        """Checks if text can be identified given an identify option.
    -412
    -413        Args:
    -414            text: The text to check.
    -415            identify:
    -416                `"always"` or `True`: Always returns `True`.
    -417                `"safe"`: Only returns `True` if the identifier is case-insensitive.
    -418
    -419        Returns:
    -420            Whether or not the given text can be identified.
    -421        """
    -422        if identify is True or identify == "always":
    -423            return True
    -424
    -425        if identify == "safe":
    -426            return not self.case_sensitive(text)
    -427
    -428        return False
    +398        return expression
    +399
    +400    def case_sensitive(self, text: str) -> bool:
    +401        """Checks if text contains any case sensitive characters, based on the dialect's rules."""
    +402        if self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE:
    +403            return False
    +404
    +405        unsafe = (
    +406            str.islower
    +407            if self.normalization_strategy is NormalizationStrategy.UPPERCASE
    +408            else str.isupper
    +409        )
    +410        return any(unsafe(char) for char in text)
    +411
    +412    def can_identify(self, text: str, identify: str | bool = "safe") -> bool:
    +413        """Checks if text can be identified given an identify option.
    +414
    +415        Args:
    +416            text: The text to check.
    +417            identify:
    +418                `"always"` or `True`: Always returns `True`.
    +419                `"safe"`: Only returns `True` if the identifier is case-insensitive.
    +420
    +421        Returns:
    +422            Whether or not the given text can be identified.
    +423        """
    +424        if identify is True or identify == "always":
    +425            return True
    +426
    +427        if identify == "safe":
    +428            return not self.case_sensitive(text)
     429
    -430    def quote_identifier(self, expression: E, identify: bool = True) -> E:
    -431        """
    -432        Adds quotes to a given identifier.
    -433
    -434        Args:
    -435            expression: The expression of interest. If it's not an `Identifier`, this method is a no-op.
    -436            identify: If set to `False`, the quotes will only be added if the identifier is deemed
    -437                "unsafe", with respect to its characters and this dialect's normalization strategy.
    -438        """
    -439        if isinstance(expression, exp.Identifier):
    -440            name = expression.this
    -441            expression.set(
    -442                "quoted",
    -443                identify or self.case_sensitive(name) or not exp.SAFE_IDENTIFIER_RE.match(name),
    -444            )
    -445
    -446        return expression
    +430        return False
    +431
    +432    def quote_identifier(self, expression: E, identify: bool = True) -> E:
    +433        """
    +434        Adds quotes to a given identifier.
    +435
    +436        Args:
    +437            expression: The expression of interest. If it's not an `Identifier`, this method is a no-op.
    +438            identify: If set to `False`, the quotes will only be added if the identifier is deemed
    +439                "unsafe", with respect to its characters and this dialect's normalization strategy.
    +440        """
    +441        if isinstance(expression, exp.Identifier):
    +442            name = expression.this
    +443            expression.set(
    +444                "quoted",
    +445                identify or self.case_sensitive(name) or not exp.SAFE_IDENTIFIER_RE.match(name),
    +446            )
     447
    -448    def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
    -449        if isinstance(path, exp.Literal):
    -450            path_text = path.name
    -451            if path.is_number:
    -452                path_text = f"[{path_text}]"
    -453
    -454            try:
    -455                return parse_json_path(path_text)
    -456            except ParseError as e:
    -457                logger.warning(f"Invalid JSON path syntax. {str(e)}")
    -458
    -459        return path
    +448        return expression
    +449
    +450    def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
    +451        if isinstance(path, exp.Literal):
    +452            path_text = path.name
    +453            if path.is_number:
    +454                path_text = f"[{path_text}]"
    +455
    +456            try:
    +457                return parse_json_path(path_text)
    +458            except ParseError as e:
    +459                logger.warning(f"Invalid JSON path syntax. {str(e)}")
     460
    -461    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
    -462        return self.parser(**opts).parse(self.tokenize(sql), sql)
    -463
    -464    def parse_into(
    -465        self, expression_type: exp.IntoType, sql: str, **opts
    -466    ) -> t.List[t.Optional[exp.Expression]]:
    -467        return self.parser(**opts).parse_into(expression_type, self.tokenize(sql), sql)
    -468
    -469    def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str:
    -470        return self.generator(**opts).generate(expression, copy=copy)
    -471
    -472    def transpile(self, sql: str, **opts) -> t.List[str]:
    -473        return [
    -474            self.generate(expression, copy=False, **opts) if expression else ""
    -475            for expression in self.parse(sql)
    -476        ]
    -477
    -478    def tokenize(self, sql: str) -> t.List[Token]:
    -479        return self.tokenizer.tokenize(sql)
    -480
    -481    @property
    -482    def tokenizer(self) -> Tokenizer:
    -483        if not hasattr(self, "_tokenizer"):
    -484            self._tokenizer = self.tokenizer_class(dialect=self)
    -485        return self._tokenizer
    -486
    -487    def parser(self, **opts) -> Parser:
    -488        return self.parser_class(dialect=self, **opts)
    -489
    -490    def generator(self, **opts) -> Generator:
    -491        return self.generator_class(dialect=self, **opts)
    +461        return path
    +462
    +463    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
    +464        return self.parser(**opts).parse(self.tokenize(sql), sql)
    +465
    +466    def parse_into(
    +467        self, expression_type: exp.IntoType, sql: str, **opts
    +468    ) -> t.List[t.Optional[exp.Expression]]:
    +469        return self.parser(**opts).parse_into(expression_type, self.tokenize(sql), sql)
    +470
    +471    def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str:
    +472        return self.generator(**opts).generate(expression, copy=copy)
    +473
    +474    def transpile(self, sql: str, **opts) -> t.List[str]:
    +475        return [
    +476            self.generate(expression, copy=False, **opts) if expression else ""
    +477            for expression in self.parse(sql)
    +478        ]
    +479
    +480    def tokenize(self, sql: str) -> t.List[Token]:
    +481        return self.tokenizer.tokenize(sql)
    +482
    +483    @property
    +484    def tokenizer(self) -> Tokenizer:
    +485        if not hasattr(self, "_tokenizer"):
    +486            self._tokenizer = self.tokenizer_class(dialect=self)
    +487        return self._tokenizer
    +488
    +489    def parser(self, **opts) -> Parser:
    +490        return self.parser_class(dialect=self, **opts)
    +491
    +492    def generator(self, **opts) -> Generator:
    +493        return self.generator_class(dialect=self, **opts)
     
    @@ -2497,13 +2515,13 @@
    -
    344    def __init__(self, **kwargs) -> None:
    -345        normalization_strategy = kwargs.get("normalization_strategy")
    -346
    -347        if normalization_strategy is None:
    -348            self.normalization_strategy = self.NORMALIZATION_STRATEGY
    -349        else:
    -350            self.normalization_strategy = NormalizationStrategy(normalization_strategy.upper())
    +            
    346    def __init__(self, **kwargs) -> None:
    +347        normalization_strategy = kwargs.get("normalization_strategy")
    +348
    +349        if normalization_strategy is None:
    +350            self.normalization_strategy = self.NORMALIZATION_STRATEGY
    +351        else:
    +352            self.normalization_strategy = NormalizationStrategy(normalization_strategy.upper())
     
    @@ -3125,53 +3143,53 @@ WITH y(c) AS (
    -
    280    @classmethod
    -281    def get_or_raise(cls, dialect: DialectType) -> Dialect:
    -282        """
    -283        Look up a dialect in the global dialect registry and return it if it exists.
    -284
    -285        Args:
    -286            dialect: The target dialect. If this is a string, it can be optionally followed by
    -287                additional key-value pairs that are separated by commas and are used to specify
    -288                dialect settings, such as whether the dialect's identifiers are case-sensitive.
    -289
    -290        Example:
    -291            >>> dialect = dialect_class = get_or_raise("duckdb")
    -292            >>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive")
    -293
    -294        Returns:
    -295            The corresponding Dialect instance.
    -296        """
    -297
    -298        if not dialect:
    -299            return cls()
    -300        if isinstance(dialect, _Dialect):
    -301            return dialect()
    -302        if isinstance(dialect, Dialect):
    -303            return dialect
    -304        if isinstance(dialect, str):
    -305            try:
    -306                dialect_name, *kv_pairs = dialect.split(",")
    -307                kwargs = {k.strip(): v.strip() for k, v in (kv.split("=") for kv in kv_pairs)}
    -308            except ValueError:
    -309                raise ValueError(
    -310                    f"Invalid dialect format: '{dialect}'. "
    -311                    "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'."
    -312                )
    -313
    -314            result = cls.get(dialect_name.strip())
    -315            if not result:
    -316                from difflib import get_close_matches
    -317
    -318                similar = seq_get(get_close_matches(dialect_name, cls.classes, n=1), 0) or ""
    -319                if similar:
    -320                    similar = f" Did you mean {similar}?"
    -321
    -322                raise ValueError(f"Unknown dialect '{dialect_name}'.{similar}")
    +            
    282    @classmethod
    +283    def get_or_raise(cls, dialect: DialectType) -> Dialect:
    +284        """
    +285        Look up a dialect in the global dialect registry and return it if it exists.
    +286
    +287        Args:
    +288            dialect: The target dialect. If this is a string, it can be optionally followed by
    +289                additional key-value pairs that are separated by commas and are used to specify
    +290                dialect settings, such as whether the dialect's identifiers are case-sensitive.
    +291
    +292        Example:
    +293            >>> dialect = dialect_class = get_or_raise("duckdb")
    +294            >>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive")
    +295
    +296        Returns:
    +297            The corresponding Dialect instance.
    +298        """
    +299
    +300        if not dialect:
    +301            return cls()
    +302        if isinstance(dialect, _Dialect):
    +303            return dialect()
    +304        if isinstance(dialect, Dialect):
    +305            return dialect
    +306        if isinstance(dialect, str):
    +307            try:
    +308                dialect_name, *kv_pairs = dialect.split(",")
    +309                kwargs = {k.strip(): v.strip() for k, v in (kv.split("=") for kv in kv_pairs)}
    +310            except ValueError:
    +311                raise ValueError(
    +312                    f"Invalid dialect format: '{dialect}'. "
    +313                    "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'."
    +314                )
    +315
    +316            result = cls.get(dialect_name.strip())
    +317            if not result:
    +318                from difflib import get_close_matches
    +319
    +320                similar = seq_get(get_close_matches(dialect_name, cls.classes, n=1), 0) or ""
    +321                if similar:
    +322                    similar = f" Did you mean {similar}?"
     323
    -324            return result(**kwargs)
    +324                raise ValueError(f"Unknown dialect '{dialect_name}'.{similar}")
     325
    -326        raise ValueError(f"Invalid dialect type for '{dialect}': '{type(dialect)}'.")
    +326            return result(**kwargs)
    +327
    +328        raise ValueError(f"Invalid dialect type for '{dialect}': '{type(dialect)}'.")
     
    @@ -3216,21 +3234,21 @@ dialect settings, such as whether the dialect's identifiers are case-sensitive.<
    -
    328    @classmethod
    -329    def format_time(
    -330        cls, expression: t.Optional[str | exp.Expression]
    -331    ) -> t.Optional[exp.Expression]:
    -332        """Converts a time format in this dialect to its equivalent Python `strftime` format."""
    -333        if isinstance(expression, str):
    -334            return exp.Literal.string(
    -335                # the time formats are quoted
    -336                format_time(expression[1:-1], cls.TIME_MAPPING, cls.TIME_TRIE)
    -337            )
    -338
    -339        if expression and expression.is_string:
    -340            return exp.Literal.string(format_time(expression.this, cls.TIME_MAPPING, cls.TIME_TRIE))
    -341
    -342        return expression
    +            
    330    @classmethod
    +331    def format_time(
    +332        cls, expression: t.Optional[str | exp.Expression]
    +333    ) -> t.Optional[exp.Expression]:
    +334        """Converts a time format in this dialect to its equivalent Python `strftime` format."""
    +335        if isinstance(expression, str):
    +336            return exp.Literal.string(
    +337                # the time formats are quoted
    +338                format_time(expression[1:-1], cls.TIME_MAPPING, cls.TIME_TRIE)
    +339            )
    +340
    +341        if expression and expression.is_string:
    +342            return exp.Literal.string(format_time(expression.this, cls.TIME_MAPPING, cls.TIME_TRIE))
    +343
    +344        return expression
     
    @@ -3250,43 +3268,43 @@ dialect settings, such as whether the dialect's identifiers are case-sensitive.<
    -
    360    def normalize_identifier(self, expression: E) -> E:
    -361        """
    -362        Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
    -363
    -364        For example, an identifier like `FoO` would be resolved as `foo` in Postgres, because it
    -365        lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
    -366        it would resolve it as `FOO`. If it was quoted, it'd need to be treated as case-sensitive,
    -367        and so any normalization would be prohibited in order to avoid "breaking" the identifier.
    -368
    -369        There are also dialects like Spark, which are case-insensitive even when quotes are
    -370        present, and dialects like MySQL, whose resolution rules match those employed by the
    -371        underlying operating system, for example they may always be case-sensitive in Linux.
    -372
    -373        Finally, the normalization behavior of some engines can even be controlled through flags,
    -374        like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
    -375
    -376        SQLGlot aims to understand and handle all of these different behaviors gracefully, so
    -377        that it can analyze queries in the optimizer and successfully capture their semantics.
    -378        """
    -379        if (
    -380            isinstance(expression, exp.Identifier)
    -381            and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE
    -382            and (
    -383                not expression.quoted
    -384                or self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE
    -385            )
    -386        ):
    -387            expression.set(
    -388                "this",
    -389                (
    -390                    expression.this.upper()
    -391                    if self.normalization_strategy is NormalizationStrategy.UPPERCASE
    -392                    else expression.this.lower()
    -393                ),
    -394            )
    -395
    -396        return expression
    +            
    362    def normalize_identifier(self, expression: E) -> E:
    +363        """
    +364        Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
    +365
    +366        For example, an identifier like `FoO` would be resolved as `foo` in Postgres, because it
    +367        lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
    +368        it would resolve it as `FOO`. If it was quoted, it'd need to be treated as case-sensitive,
    +369        and so any normalization would be prohibited in order to avoid "breaking" the identifier.
    +370
    +371        There are also dialects like Spark, which are case-insensitive even when quotes are
    +372        present, and dialects like MySQL, whose resolution rules match those employed by the
    +373        underlying operating system, for example they may always be case-sensitive in Linux.
    +374
    +375        Finally, the normalization behavior of some engines can even be controlled through flags,
    +376        like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
    +377
    +378        SQLGlot aims to understand and handle all of these different behaviors gracefully, so
    +379        that it can analyze queries in the optimizer and successfully capture their semantics.
    +380        """
    +381        if (
    +382            isinstance(expression, exp.Identifier)
    +383            and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE
    +384            and (
    +385                not expression.quoted
    +386                or self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE
    +387            )
    +388        ):
    +389            expression.set(
    +390                "this",
    +391                (
    +392                    expression.this.upper()
    +393                    if self.normalization_strategy is NormalizationStrategy.UPPERCASE
    +394                    else expression.this.lower()
    +395                ),
    +396            )
    +397
    +398        return expression
     
    @@ -3321,17 +3339,17 @@ that it can analyze queries in the optimizer and successfully capture their sema
    -
    398    def case_sensitive(self, text: str) -> bool:
    -399        """Checks if text contains any case sensitive characters, based on the dialect's rules."""
    -400        if self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE:
    -401            return False
    -402
    -403        unsafe = (
    -404            str.islower
    -405            if self.normalization_strategy is NormalizationStrategy.UPPERCASE
    -406            else str.isupper
    -407        )
    -408        return any(unsafe(char) for char in text)
    +            
    400    def case_sensitive(self, text: str) -> bool:
    +401        """Checks if text contains any case sensitive characters, based on the dialect's rules."""
    +402        if self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE:
    +403            return False
    +404
    +405        unsafe = (
    +406            str.islower
    +407            if self.normalization_strategy is NormalizationStrategy.UPPERCASE
    +408            else str.isupper
    +409        )
    +410        return any(unsafe(char) for char in text)
     
    @@ -3351,25 +3369,25 @@ that it can analyze queries in the optimizer and successfully capture their sema
    -
    410    def can_identify(self, text: str, identify: str | bool = "safe") -> bool:
    -411        """Checks if text can be identified given an identify option.
    -412
    -413        Args:
    -414            text: The text to check.
    -415            identify:
    -416                `"always"` or `True`: Always returns `True`.
    -417                `"safe"`: Only returns `True` if the identifier is case-insensitive.
    -418
    -419        Returns:
    -420            Whether or not the given text can be identified.
    -421        """
    -422        if identify is True or identify == "always":
    -423            return True
    -424
    -425        if identify == "safe":
    -426            return not self.case_sensitive(text)
    -427
    -428        return False
    +            
    412    def can_identify(self, text: str, identify: str | bool = "safe") -> bool:
    +413        """Checks if text can be identified given an identify option.
    +414
    +415        Args:
    +416            text: The text to check.
    +417            identify:
    +418                `"always"` or `True`: Always returns `True`.
    +419                `"safe"`: Only returns `True` if the identifier is case-insensitive.
    +420
    +421        Returns:
    +422            Whether or not the given text can be identified.
    +423        """
    +424        if identify is True or identify == "always":
    +425            return True
    +426
    +427        if identify == "safe":
    +428            return not self.case_sensitive(text)
    +429
    +430        return False
     
    @@ -3403,23 +3421,23 @@ that it can analyze queries in the optimizer and successfully capture their sema
    -
    430    def quote_identifier(self, expression: E, identify: bool = True) -> E:
    -431        """
    -432        Adds quotes to a given identifier.
    -433
    -434        Args:
    -435            expression: The expression of interest. If it's not an `Identifier`, this method is a no-op.
    -436            identify: If set to `False`, the quotes will only be added if the identifier is deemed
    -437                "unsafe", with respect to its characters and this dialect's normalization strategy.
    -438        """
    -439        if isinstance(expression, exp.Identifier):
    -440            name = expression.this
    -441            expression.set(
    -442                "quoted",
    -443                identify or self.case_sensitive(name) or not exp.SAFE_IDENTIFIER_RE.match(name),
    -444            )
    -445
    -446        return expression
    +            
    432    def quote_identifier(self, expression: E, identify: bool = True) -> E:
    +433        """
    +434        Adds quotes to a given identifier.
    +435
    +436        Args:
    +437            expression: The expression of interest. If it's not an `Identifier`, this method is a no-op.
    +438            identify: If set to `False`, the quotes will only be added if the identifier is deemed
    +439                "unsafe", with respect to its characters and this dialect's normalization strategy.
    +440        """
    +441        if isinstance(expression, exp.Identifier):
    +442            name = expression.this
    +443            expression.set(
    +444                "quoted",
    +445                identify or self.case_sensitive(name) or not exp.SAFE_IDENTIFIER_RE.match(name),
    +446            )
    +447
    +448        return expression
     
    @@ -3447,18 +3465,18 @@ that it can analyze queries in the optimizer and successfully capture their sema
    -
    448    def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
    -449        if isinstance(path, exp.Literal):
    -450            path_text = path.name
    -451            if path.is_number:
    -452                path_text = f"[{path_text}]"
    -453
    -454            try:
    -455                return parse_json_path(path_text)
    -456            except ParseError as e:
    -457                logger.warning(f"Invalid JSON path syntax. {str(e)}")
    -458
    -459        return path
    +            
    450    def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
    +451        if isinstance(path, exp.Literal):
    +452            path_text = path.name
    +453            if path.is_number:
    +454                path_text = f"[{path_text}]"
    +455
    +456            try:
    +457                return parse_json_path(path_text)
    +458            except ParseError as e:
    +459                logger.warning(f"Invalid JSON path syntax. {str(e)}")
    +460
    +461        return path
     
    @@ -3476,8 +3494,8 @@ that it can analyze queries in the optimizer and successfully capture their sema
    -
    461    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
    -462        return self.parser(**opts).parse(self.tokenize(sql), sql)
    +            
    463    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
    +464        return self.parser(**opts).parse(self.tokenize(sql), sql)
     
    @@ -3495,10 +3513,10 @@ that it can analyze queries in the optimizer and successfully capture their sema
    -
    464    def parse_into(
    -465        self, expression_type: exp.IntoType, sql: str, **opts
    -466    ) -> t.List[t.Optional[exp.Expression]]:
    -467        return self.parser(**opts).parse_into(expression_type, self.tokenize(sql), sql)
    +            
    466    def parse_into(
    +467        self, expression_type: exp.IntoType, sql: str, **opts
    +468    ) -> t.List[t.Optional[exp.Expression]]:
    +469        return self.parser(**opts).parse_into(expression_type, self.tokenize(sql), sql)
     
    @@ -3516,8 +3534,8 @@ that it can analyze queries in the optimizer and successfully capture their sema
    -
    469    def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str:
    -470        return self.generator(**opts).generate(expression, copy=copy)
    +            
    471    def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str:
    +472        return self.generator(**opts).generate(expression, copy=copy)
     
    @@ -3535,11 +3553,11 @@ that it can analyze queries in the optimizer and successfully capture their sema
    -
    472    def transpile(self, sql: str, **opts) -> t.List[str]:
    -473        return [
    -474            self.generate(expression, copy=False, **opts) if expression else ""
    -475            for expression in self.parse(sql)
    -476        ]
    +            
    474    def transpile(self, sql: str, **opts) -> t.List[str]:
    +475        return [
    +476            self.generate(expression, copy=False, **opts) if expression else ""
    +477            for expression in self.parse(sql)
    +478        ]
     
    @@ -3557,8 +3575,8 @@ that it can analyze queries in the optimizer and successfully capture their sema
    -
    478    def tokenize(self, sql: str) -> t.List[Token]:
    -479        return self.tokenizer.tokenize(sql)
    +            
    480    def tokenize(self, sql: str) -> t.List[Token]:
    +481        return self.tokenizer.tokenize(sql)
     
    @@ -3574,11 +3592,11 @@ that it can analyze queries in the optimizer and successfully capture their sema
    -
    481    @property
    -482    def tokenizer(self) -> Tokenizer:
    -483        if not hasattr(self, "_tokenizer"):
    -484            self._tokenizer = self.tokenizer_class(dialect=self)
    -485        return self._tokenizer
    +            
    483    @property
    +484    def tokenizer(self) -> Tokenizer:
    +485        if not hasattr(self, "_tokenizer"):
    +486            self._tokenizer = self.tokenizer_class(dialect=self)
    +487        return self._tokenizer
     
    @@ -3596,8 +3614,8 @@ that it can analyze queries in the optimizer and successfully capture their sema
    -
    487    def parser(self, **opts) -> Parser:
    -488        return self.parser_class(dialect=self, **opts)
    +            
    489    def parser(self, **opts) -> Parser:
    +490        return self.parser_class(dialect=self, **opts)
     
    @@ -3615,8 +3633,8 @@ that it can analyze queries in the optimizer and successfully capture their sema
    -
    490    def generator(self, **opts) -> Generator:
    -491        return self.generator_class(dialect=self, **opts)
    +            
    492    def generator(self, **opts) -> Generator:
    +493        return self.generator_class(dialect=self, **opts)
     
    @@ -3648,8 +3666,8 @@ that it can analyze queries in the optimizer and successfully capture their sema
    -
    497def rename_func(name: str) -> t.Callable[[Generator, exp.Expression], str]:
    -498    return lambda self, expression: self.func(name, *flatten(expression.args.values()))
    +            
    499def rename_func(name: str) -> t.Callable[[Generator, exp.Expression], str]:
    +500    return lambda self, expression: self.func(name, *flatten(expression.args.values()))
     
    @@ -3667,10 +3685,10 @@ that it can analyze queries in the optimizer and successfully capture their sema
    -
    501def approx_count_distinct_sql(self: Generator, expression: exp.ApproxDistinct) -> str:
    -502    if expression.args.get("accuracy"):
    -503        self.unsupported("APPROX_COUNT_DISTINCT does not support accuracy")
    -504    return self.func("APPROX_COUNT_DISTINCT", expression.this)
    +            
    503def approx_count_distinct_sql(self: Generator, expression: exp.ApproxDistinct) -> str:
    +504    if expression.args.get("accuracy"):
    +505        self.unsupported("APPROX_COUNT_DISTINCT does not support accuracy")
    +506    return self.func("APPROX_COUNT_DISTINCT", expression.this)
     
    @@ -3688,18 +3706,18 @@ that it can analyze queries in the optimizer and successfully capture their sema
    -
    507def if_sql(
    -508    name: str = "IF", false_value: t.Optional[exp.Expression | str] = None
    -509) -> t.Callable[[Generator, exp.If], str]:
    -510    def _if_sql(self: Generator, expression: exp.If) -> str:
    -511        return self.func(
    -512            name,
    -513            expression.this,
    -514            expression.args.get("true"),
    -515            expression.args.get("false") or false_value,
    -516        )
    -517
    -518    return _if_sql
    +            
    509def if_sql(
    +510    name: str = "IF", false_value: t.Optional[exp.Expression | str] = None
    +511) -> t.Callable[[Generator, exp.If], str]:
    +512    def _if_sql(self: Generator, expression: exp.If) -> str:
    +513        return self.func(
    +514            name,
    +515            expression.this,
    +516            expression.args.get("true"),
    +517            expression.args.get("false") or false_value,
    +518        )
    +519
    +520    return _if_sql
     
    @@ -3711,15 +3729,13 @@ that it can analyze queries in the optimizer and successfully capture their sema
    def - arrow_json_extract_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.JSONExtract | sqlglot.expressions.JSONExtractScalar) -> str: + arrow_json_extract_sql( self: sqlglot.generator.Generator, expression: Union[sqlglot.expressions.JSONExtract, sqlglot.expressions.JSONExtractScalar]) -> str:
    -
    521def arrow_json_extract_sql(
    -522    self: Generator, expression: exp.JSONExtract | exp.JSONExtractScalar
    -523) -> str:
    +            
    523def arrow_json_extract_sql(self: Generator, expression: JSON_EXTRACT_TYPE) -> str:
     524    this = expression.this
     525    if self.JSON_TYPE_REQUIRED_FOR_EXTRACTION and isinstance(this, exp.Literal) and this.is_string:
     526        this.replace(exp.cast(this, "json"))
    @@ -5004,67 +5020,97 @@ columns are removed from the create statement.

    def - parse_json_extract_path( expr_type: Type[~E], supports_null_if_invalid: bool = False) -> Callable[[List], ~E]: + parse_json_extract_path( expr_type: Type[~F], zero_based_indexing: bool = True) -> Callable[[List], ~F]:
    1014def parse_json_extract_path(
    -1015    expr_type: t.Type[E],
    -1016    supports_null_if_invalid: bool = False,
    -1017) -> t.Callable[[t.List], E]:
    -1018    def _parse_json_extract_path(args: t.List) -> E:
    -1019        null_if_invalid = None
    -1020
    -1021        segments: t.List[exp.JSONPathPart] = [exp.JSONPathRoot()]
    -1022        for arg in args[1:]:
    -1023            if isinstance(arg, exp.Literal):
    -1024                text = arg.name
    -1025                if is_int(text):
    -1026                    segments.append(exp.JSONPathSubscript(this=int(text)))
    -1027                else:
    -1028                    segments.append(exp.JSONPathKey(this=text))
    -1029            elif supports_null_if_invalid:
    -1030                null_if_invalid = arg
    -1031
    -1032        this = seq_get(args, 0)
    -1033        jsonpath = exp.JSONPath(expressions=segments)
    -1034
    -1035        # This is done to avoid failing in the expression validator due to the arg count
    -1036        del args[2:]
    -1037
    -1038        if expr_type is exp.JSONExtractScalar:
    -1039            return expr_type(this=this, expression=jsonpath, null_if_invalid=null_if_invalid)
    -1040
    -1041        return expr_type(this=this, expression=jsonpath)
    -1042
    -1043    return _parse_json_extract_path
    +1015    expr_type: t.Type[F], zero_based_indexing: bool = True
    +1016) -> t.Callable[[t.List], F]:
    +1017    def _parse_json_extract_path(args: t.List) -> F:
    +1018        segments: t.List[exp.JSONPathPart] = [exp.JSONPathRoot()]
    +1019        for arg in args[1:]:
    +1020            if not isinstance(arg, exp.Literal):
    +1021                # We use the fallback parser because we can't really transpile non-literals safely
    +1022                return expr_type.from_arg_list(args)
    +1023
    +1024            text = arg.name
    +1025            if is_int(text):
    +1026                index = int(text)
    +1027                segments.append(
    +1028                    exp.JSONPathSubscript(this=index if zero_based_indexing else index - 1)
    +1029                )
    +1030            else:
    +1031                segments.append(exp.JSONPathKey(this=text))
    +1032
    +1033        # This is done to avoid failing in the expression validator due to the arg count
    +1034        del args[2:]
    +1035        return expr_type(this=seq_get(args, 0), expression=exp.JSONPath(expressions=segments))
    +1036
    +1037    return _parse_json_extract_path
    +
    + + + + + +
    + +
    + + def + json_extract_segments( name: str, quoted_index: bool = True) -> Callable[[sqlglot.generator.Generator, Union[sqlglot.expressions.JSONExtract, sqlglot.expressions.JSONExtractScalar]], str]: + + + +
    + +
    1040def json_extract_segments(
    +1041    name: str, quoted_index: bool = True
    +1042) -> t.Callable[[Generator, JSON_EXTRACT_TYPE], str]:
    +1043    def _json_extract_segments(self: Generator, expression: JSON_EXTRACT_TYPE) -> str:
    +1044        path = expression.expression
    +1045        if not isinstance(path, exp.JSONPath):
    +1046            return rename_func(name)(self, expression)
    +1047
    +1048        segments = []
    +1049        for segment in path.expressions:
    +1050            path = self.sql(segment)
    +1051            if path:
    +1052                if isinstance(segment, exp.JSONPathPart) and (
    +1053                    quoted_index or not isinstance(segment, exp.JSONPathSubscript)
    +1054                ):
    +1055                    path = f"{self.dialect.QUOTE_START}{path}{self.dialect.QUOTE_END}"
    +1056
    +1057                segments.append(path)
    +1058
    +1059        return self.func(name, expression.this, *segments)
    +1060
    +1061    return _json_extract_segments
     
    -
    - +
    +
    def - json_path_segments( self: sqlglot.generator.Generator, expression: sqlglot.expressions.JSONPath) -> List[str]: + json_path_key_only_name( self: sqlglot.generator.Generator, expression: sqlglot.expressions.JSONPathKey) -> str: - +
    - -
    1046def json_path_segments(self: Generator, expression: exp.JSONPath) -> t.List[str]:
    -1047    segments = []
    -1048    for segment in expression.expressions:
    -1049        path = self.sql(segment)
    -1050        if path:
    -1051            segments.append(f"{self.dialect.QUOTE_START}{path}{self.dialect.QUOTE_END}")
    -1052
    -1053    return segments
    +    
    +            
    1064def json_path_key_only_name(self: Generator, expression: exp.JSONPathKey) -> str:
    +1065    if isinstance(expression.this, exp.JSONPathWildcard):
    +1066        self.unsupported("Unsupported wildcard in JSONPathKey expression")
    +1067
    +1068    return expression.name
     
    -- cgit v1.2.3