sqlglot.dialects.redshift
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, transforms 6from sqlglot.dialects.dialect import ( 7 NormalizationStrategy, 8 concat_to_dpipe_sql, 9 concat_ws_to_dpipe_sql, 10 date_delta_sql, 11 generatedasidentitycolumnconstraint_sql, 12 json_extract_segments, 13 no_tablesample_sql, 14 rename_func, 15) 16from sqlglot.dialects.postgres import Postgres 17from sqlglot.helper import seq_get 18from sqlglot.tokens import TokenType 19 20if t.TYPE_CHECKING: 21 from sqlglot._typing import E 22 23 24def _parse_date_delta(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 25 def _parse_delta(args: t.List) -> E: 26 expr = expr_type(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 27 if expr_type is exp.TsOrDsAdd: 28 expr.set("return_type", exp.DataType.build("TIMESTAMP")) 29 30 return expr 31 32 return _parse_delta 33 34 35class Redshift(Postgres): 36 # https://docs.aws.amazon.com/redshift/latest/dg/r_names.html 37 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 38 39 SUPPORTS_USER_DEFINED_TYPES = False 40 INDEX_OFFSET = 0 41 42 TIME_FORMAT = "'YYYY-MM-DD HH:MI:SS'" 43 TIME_MAPPING = { 44 **Postgres.TIME_MAPPING, 45 "MON": "%b", 46 "HH": "%H", 47 } 48 49 class Parser(Postgres.Parser): 50 FUNCTIONS = { 51 **Postgres.Parser.FUNCTIONS, 52 "ADD_MONTHS": lambda args: exp.TsOrDsAdd( 53 this=seq_get(args, 0), 54 expression=seq_get(args, 1), 55 unit=exp.var("month"), 56 return_type=exp.DataType.build("TIMESTAMP"), 57 ), 58 "DATEADD": _parse_date_delta(exp.TsOrDsAdd), 59 "DATE_ADD": _parse_date_delta(exp.TsOrDsAdd), 60 "DATEDIFF": _parse_date_delta(exp.TsOrDsDiff), 61 "DATE_DIFF": _parse_date_delta(exp.TsOrDsDiff), 62 "GETDATE": exp.CurrentTimestamp.from_arg_list, 63 "LISTAGG": exp.GroupConcat.from_arg_list, 64 "STRTOL": exp.FromBase.from_arg_list, 65 } 66 67 NO_PAREN_FUNCTION_PARSERS = { 68 **Postgres.Parser.NO_PAREN_FUNCTION_PARSERS, 69 "APPROXIMATE": lambda self: self._parse_approximate_count(), 70 "SYSDATE": lambda self: self.expression(exp.CurrentTimestamp, transaction=True), 71 } 72 73 def _parse_table( 74 self, 75 schema: bool = False, 76 joins: bool = False, 77 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 78 parse_bracket: bool = False, 79 is_db_reference: bool = False, 80 ) -> t.Optional[exp.Expression]: 81 # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr` 82 unpivot = self._match(TokenType.UNPIVOT) 83 table = super()._parse_table( 84 schema=schema, 85 joins=joins, 86 alias_tokens=alias_tokens, 87 parse_bracket=parse_bracket, 88 is_db_reference=is_db_reference, 89 ) 90 91 return self.expression(exp.Pivot, this=table, unpivot=True) if unpivot else table 92 93 def _parse_types( 94 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 95 ) -> t.Optional[exp.Expression]: 96 this = super()._parse_types( 97 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 98 ) 99 100 if ( 101 isinstance(this, exp.DataType) 102 and this.is_type("varchar") 103 and this.expressions 104 and this.expressions[0].this == exp.column("MAX") 105 ): 106 this.set("expressions", [exp.var("MAX")]) 107 108 return this 109 110 def _parse_convert( 111 self, strict: bool, safe: t.Optional[bool] = None 112 ) -> t.Optional[exp.Expression]: 113 to = self._parse_types() 114 self._match(TokenType.COMMA) 115 this = self._parse_bitwise() 116 return self.expression(exp.TryCast, this=this, to=to, safe=safe) 117 118 def _parse_approximate_count(self) -> t.Optional[exp.ApproxDistinct]: 119 index = self._index - 1 120 func = self._parse_function() 121 122 if isinstance(func, exp.Count) and isinstance(func.this, exp.Distinct): 123 return self.expression(exp.ApproxDistinct, this=seq_get(func.this.expressions, 0)) 124 self._retreat(index) 125 return None 126 127 def _parse_query_modifiers( 128 self, this: t.Optional[exp.Expression] 129 ) -> t.Optional[exp.Expression]: 130 this = super()._parse_query_modifiers(this) 131 132 if this: 133 refs = set() 134 135 for i, join in enumerate(this.args.get("joins", [])): 136 refs.add( 137 ( 138 this.args["from"] if i == 0 else this.args["joins"][i - 1] 139 ).this.alias.lower() 140 ) 141 142 table = join.this 143 if isinstance(table, exp.Table) and not join.args.get("on"): 144 if table.parts[0].name.lower() in refs: 145 table.replace(table.to_column()) 146 return this 147 148 class Tokenizer(Postgres.Tokenizer): 149 BIT_STRINGS = [] 150 HEX_STRINGS = [] 151 STRING_ESCAPES = ["\\", "'"] 152 153 KEYWORDS = { 154 **Postgres.Tokenizer.KEYWORDS, 155 "HLLSKETCH": TokenType.HLLSKETCH, 156 "SUPER": TokenType.SUPER, 157 "TOP": TokenType.TOP, 158 "UNLOAD": TokenType.COMMAND, 159 "VARBYTE": TokenType.VARBINARY, 160 } 161 KEYWORDS.pop("VALUES") 162 163 # Redshift allows # to appear as a table identifier prefix 164 SINGLE_TOKENS = Postgres.Tokenizer.SINGLE_TOKENS.copy() 165 SINGLE_TOKENS.pop("#") 166 167 class Generator(Postgres.Generator): 168 LOCKING_READS_SUPPORTED = False 169 QUERY_HINTS = False 170 VALUES_AS_TABLE = False 171 TZ_TO_WITH_TIME_ZONE = True 172 NVL2_SUPPORTED = True 173 LAST_DAY_SUPPORTS_DATE_PART = False 174 175 TYPE_MAPPING = { 176 **Postgres.Generator.TYPE_MAPPING, 177 exp.DataType.Type.BINARY: "VARBYTE", 178 exp.DataType.Type.INT: "INTEGER", 179 exp.DataType.Type.TIMETZ: "TIME", 180 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 181 exp.DataType.Type.VARBINARY: "VARBYTE", 182 } 183 184 TRANSFORMS = { 185 **Postgres.Generator.TRANSFORMS, 186 exp.Concat: concat_to_dpipe_sql, 187 exp.ConcatWs: concat_ws_to_dpipe_sql, 188 exp.ApproxDistinct: lambda self, 189 e: f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})", 190 exp.CurrentTimestamp: lambda self, e: ( 191 "SYSDATE" if e.args.get("transaction") else "GETDATE()" 192 ), 193 exp.DateAdd: date_delta_sql("DATEADD"), 194 exp.DateDiff: date_delta_sql("DATEDIFF"), 195 exp.DistKeyProperty: lambda self, e: f"DISTKEY({e.name})", 196 exp.DistStyleProperty: lambda self, e: self.naked_property(e), 197 exp.FromBase: rename_func("STRTOL"), 198 exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, 199 exp.JSONExtract: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 200 exp.GroupConcat: rename_func("LISTAGG"), 201 exp.ParseJSON: rename_func("JSON_PARSE"), 202 exp.Select: transforms.preprocess( 203 [transforms.eliminate_distinct_on, transforms.eliminate_semi_and_anti_joins] 204 ), 205 exp.SortKeyProperty: lambda self, 206 e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})", 207 exp.TableSample: no_tablesample_sql, 208 exp.TsOrDsAdd: date_delta_sql("DATEADD"), 209 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 210 } 211 212 # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots 213 TRANSFORMS.pop(exp.Pivot) 214 215 # Redshift uses the POW | POWER (expr1, expr2) syntax instead of expr1 ^ expr2 (postgres) 216 TRANSFORMS.pop(exp.Pow) 217 218 # Redshift supports ANY_VALUE(..) 219 TRANSFORMS.pop(exp.AnyValue) 220 221 # Redshift supports LAST_DAY(..) 222 TRANSFORMS.pop(exp.LastDay) 223 224 RESERVED_KEYWORDS = {*Postgres.Generator.RESERVED_KEYWORDS, "snapshot", "type"} 225 226 def with_properties(self, properties: exp.Properties) -> str: 227 """Redshift doesn't have `WITH` as part of their with_properties so we remove it""" 228 return self.properties(properties, prefix=" ", suffix="") 229 230 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 231 if expression.is_type(exp.DataType.Type.JSON): 232 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 233 return self.sql(expression, "this") 234 235 return super().cast_sql(expression, safe_prefix=safe_prefix) 236 237 def datatype_sql(self, expression: exp.DataType) -> str: 238 """ 239 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 240 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 241 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 242 `TEXT` to `VARCHAR`. 243 """ 244 if expression.is_type("text"): 245 expression.set("this", exp.DataType.Type.VARCHAR) 246 precision = expression.args.get("expressions") 247 248 if not precision: 249 expression.append("expressions", exp.var("MAX")) 250 251 return super().datatype_sql(expression)
36class Redshift(Postgres): 37 # https://docs.aws.amazon.com/redshift/latest/dg/r_names.html 38 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 39 40 SUPPORTS_USER_DEFINED_TYPES = False 41 INDEX_OFFSET = 0 42 43 TIME_FORMAT = "'YYYY-MM-DD HH:MI:SS'" 44 TIME_MAPPING = { 45 **Postgres.TIME_MAPPING, 46 "MON": "%b", 47 "HH": "%H", 48 } 49 50 class Parser(Postgres.Parser): 51 FUNCTIONS = { 52 **Postgres.Parser.FUNCTIONS, 53 "ADD_MONTHS": lambda args: exp.TsOrDsAdd( 54 this=seq_get(args, 0), 55 expression=seq_get(args, 1), 56 unit=exp.var("month"), 57 return_type=exp.DataType.build("TIMESTAMP"), 58 ), 59 "DATEADD": _parse_date_delta(exp.TsOrDsAdd), 60 "DATE_ADD": _parse_date_delta(exp.TsOrDsAdd), 61 "DATEDIFF": _parse_date_delta(exp.TsOrDsDiff), 62 "DATE_DIFF": _parse_date_delta(exp.TsOrDsDiff), 63 "GETDATE": exp.CurrentTimestamp.from_arg_list, 64 "LISTAGG": exp.GroupConcat.from_arg_list, 65 "STRTOL": exp.FromBase.from_arg_list, 66 } 67 68 NO_PAREN_FUNCTION_PARSERS = { 69 **Postgres.Parser.NO_PAREN_FUNCTION_PARSERS, 70 "APPROXIMATE": lambda self: self._parse_approximate_count(), 71 "SYSDATE": lambda self: self.expression(exp.CurrentTimestamp, transaction=True), 72 } 73 74 def _parse_table( 75 self, 76 schema: bool = False, 77 joins: bool = False, 78 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 79 parse_bracket: bool = False, 80 is_db_reference: bool = False, 81 ) -> t.Optional[exp.Expression]: 82 # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr` 83 unpivot = self._match(TokenType.UNPIVOT) 84 table = super()._parse_table( 85 schema=schema, 86 joins=joins, 87 alias_tokens=alias_tokens, 88 parse_bracket=parse_bracket, 89 is_db_reference=is_db_reference, 90 ) 91 92 return self.expression(exp.Pivot, this=table, unpivot=True) if unpivot else table 93 94 def _parse_types( 95 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 96 ) -> t.Optional[exp.Expression]: 97 this = super()._parse_types( 98 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 99 ) 100 101 if ( 102 isinstance(this, exp.DataType) 103 and this.is_type("varchar") 104 and this.expressions 105 and this.expressions[0].this == exp.column("MAX") 106 ): 107 this.set("expressions", [exp.var("MAX")]) 108 109 return this 110 111 def _parse_convert( 112 self, strict: bool, safe: t.Optional[bool] = None 113 ) -> t.Optional[exp.Expression]: 114 to = self._parse_types() 115 self._match(TokenType.COMMA) 116 this = self._parse_bitwise() 117 return self.expression(exp.TryCast, this=this, to=to, safe=safe) 118 119 def _parse_approximate_count(self) -> t.Optional[exp.ApproxDistinct]: 120 index = self._index - 1 121 func = self._parse_function() 122 123 if isinstance(func, exp.Count) and isinstance(func.this, exp.Distinct): 124 return self.expression(exp.ApproxDistinct, this=seq_get(func.this.expressions, 0)) 125 self._retreat(index) 126 return None 127 128 def _parse_query_modifiers( 129 self, this: t.Optional[exp.Expression] 130 ) -> t.Optional[exp.Expression]: 131 this = super()._parse_query_modifiers(this) 132 133 if this: 134 refs = set() 135 136 for i, join in enumerate(this.args.get("joins", [])): 137 refs.add( 138 ( 139 this.args["from"] if i == 0 else this.args["joins"][i - 1] 140 ).this.alias.lower() 141 ) 142 143 table = join.this 144 if isinstance(table, exp.Table) and not join.args.get("on"): 145 if table.parts[0].name.lower() in refs: 146 table.replace(table.to_column()) 147 return this 148 149 class Tokenizer(Postgres.Tokenizer): 150 BIT_STRINGS = [] 151 HEX_STRINGS = [] 152 STRING_ESCAPES = ["\\", "'"] 153 154 KEYWORDS = { 155 **Postgres.Tokenizer.KEYWORDS, 156 "HLLSKETCH": TokenType.HLLSKETCH, 157 "SUPER": TokenType.SUPER, 158 "TOP": TokenType.TOP, 159 "UNLOAD": TokenType.COMMAND, 160 "VARBYTE": TokenType.VARBINARY, 161 } 162 KEYWORDS.pop("VALUES") 163 164 # Redshift allows # to appear as a table identifier prefix 165 SINGLE_TOKENS = Postgres.Tokenizer.SINGLE_TOKENS.copy() 166 SINGLE_TOKENS.pop("#") 167 168 class Generator(Postgres.Generator): 169 LOCKING_READS_SUPPORTED = False 170 QUERY_HINTS = False 171 VALUES_AS_TABLE = False 172 TZ_TO_WITH_TIME_ZONE = True 173 NVL2_SUPPORTED = True 174 LAST_DAY_SUPPORTS_DATE_PART = False 175 176 TYPE_MAPPING = { 177 **Postgres.Generator.TYPE_MAPPING, 178 exp.DataType.Type.BINARY: "VARBYTE", 179 exp.DataType.Type.INT: "INTEGER", 180 exp.DataType.Type.TIMETZ: "TIME", 181 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 182 exp.DataType.Type.VARBINARY: "VARBYTE", 183 } 184 185 TRANSFORMS = { 186 **Postgres.Generator.TRANSFORMS, 187 exp.Concat: concat_to_dpipe_sql, 188 exp.ConcatWs: concat_ws_to_dpipe_sql, 189 exp.ApproxDistinct: lambda self, 190 e: f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})", 191 exp.CurrentTimestamp: lambda self, e: ( 192 "SYSDATE" if e.args.get("transaction") else "GETDATE()" 193 ), 194 exp.DateAdd: date_delta_sql("DATEADD"), 195 exp.DateDiff: date_delta_sql("DATEDIFF"), 196 exp.DistKeyProperty: lambda self, e: f"DISTKEY({e.name})", 197 exp.DistStyleProperty: lambda self, e: self.naked_property(e), 198 exp.FromBase: rename_func("STRTOL"), 199 exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, 200 exp.JSONExtract: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 201 exp.GroupConcat: rename_func("LISTAGG"), 202 exp.ParseJSON: rename_func("JSON_PARSE"), 203 exp.Select: transforms.preprocess( 204 [transforms.eliminate_distinct_on, transforms.eliminate_semi_and_anti_joins] 205 ), 206 exp.SortKeyProperty: lambda self, 207 e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})", 208 exp.TableSample: no_tablesample_sql, 209 exp.TsOrDsAdd: date_delta_sql("DATEADD"), 210 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 211 } 212 213 # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots 214 TRANSFORMS.pop(exp.Pivot) 215 216 # Redshift uses the POW | POWER (expr1, expr2) syntax instead of expr1 ^ expr2 (postgres) 217 TRANSFORMS.pop(exp.Pow) 218 219 # Redshift supports ANY_VALUE(..) 220 TRANSFORMS.pop(exp.AnyValue) 221 222 # Redshift supports LAST_DAY(..) 223 TRANSFORMS.pop(exp.LastDay) 224 225 RESERVED_KEYWORDS = {*Postgres.Generator.RESERVED_KEYWORDS, "snapshot", "type"} 226 227 def with_properties(self, properties: exp.Properties) -> str: 228 """Redshift doesn't have `WITH` as part of their with_properties so we remove it""" 229 return self.properties(properties, prefix=" ", suffix="") 230 231 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 232 if expression.is_type(exp.DataType.Type.JSON): 233 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 234 return self.sql(expression, "this") 235 236 return super().cast_sql(expression, safe_prefix=safe_prefix) 237 238 def datatype_sql(self, expression: exp.DataType) -> str: 239 """ 240 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 241 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 242 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 243 `TEXT` to `VARCHAR`. 244 """ 245 if expression.is_type("text"): 246 expression.set("this", exp.DataType.Type.VARCHAR) 247 precision = expression.args.get("expressions") 248 249 if not precision: 250 expression.append("expressions", exp.var("MAX")) 251 252 return super().datatype_sql(expression)
Specifies the strategy according to which identifiers should be normalized.
Determines whether or not user-defined data types are supported.
Associates this dialect's time formats with their equivalent Python strftime
format.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- SAFE_DIVISION
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- ESCAPE_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
50 class Parser(Postgres.Parser): 51 FUNCTIONS = { 52 **Postgres.Parser.FUNCTIONS, 53 "ADD_MONTHS": lambda args: exp.TsOrDsAdd( 54 this=seq_get(args, 0), 55 expression=seq_get(args, 1), 56 unit=exp.var("month"), 57 return_type=exp.DataType.build("TIMESTAMP"), 58 ), 59 "DATEADD": _parse_date_delta(exp.TsOrDsAdd), 60 "DATE_ADD": _parse_date_delta(exp.TsOrDsAdd), 61 "DATEDIFF": _parse_date_delta(exp.TsOrDsDiff), 62 "DATE_DIFF": _parse_date_delta(exp.TsOrDsDiff), 63 "GETDATE": exp.CurrentTimestamp.from_arg_list, 64 "LISTAGG": exp.GroupConcat.from_arg_list, 65 "STRTOL": exp.FromBase.from_arg_list, 66 } 67 68 NO_PAREN_FUNCTION_PARSERS = { 69 **Postgres.Parser.NO_PAREN_FUNCTION_PARSERS, 70 "APPROXIMATE": lambda self: self._parse_approximate_count(), 71 "SYSDATE": lambda self: self.expression(exp.CurrentTimestamp, transaction=True), 72 } 73 74 def _parse_table( 75 self, 76 schema: bool = False, 77 joins: bool = False, 78 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 79 parse_bracket: bool = False, 80 is_db_reference: bool = False, 81 ) -> t.Optional[exp.Expression]: 82 # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr` 83 unpivot = self._match(TokenType.UNPIVOT) 84 table = super()._parse_table( 85 schema=schema, 86 joins=joins, 87 alias_tokens=alias_tokens, 88 parse_bracket=parse_bracket, 89 is_db_reference=is_db_reference, 90 ) 91 92 return self.expression(exp.Pivot, this=table, unpivot=True) if unpivot else table 93 94 def _parse_types( 95 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 96 ) -> t.Optional[exp.Expression]: 97 this = super()._parse_types( 98 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 99 ) 100 101 if ( 102 isinstance(this, exp.DataType) 103 and this.is_type("varchar") 104 and this.expressions 105 and this.expressions[0].this == exp.column("MAX") 106 ): 107 this.set("expressions", [exp.var("MAX")]) 108 109 return this 110 111 def _parse_convert( 112 self, strict: bool, safe: t.Optional[bool] = None 113 ) -> t.Optional[exp.Expression]: 114 to = self._parse_types() 115 self._match(TokenType.COMMA) 116 this = self._parse_bitwise() 117 return self.expression(exp.TryCast, this=this, to=to, safe=safe) 118 119 def _parse_approximate_count(self) -> t.Optional[exp.ApproxDistinct]: 120 index = self._index - 1 121 func = self._parse_function() 122 123 if isinstance(func, exp.Count) and isinstance(func.this, exp.Distinct): 124 return self.expression(exp.ApproxDistinct, this=seq_get(func.this.expressions, 0)) 125 self._retreat(index) 126 return None 127 128 def _parse_query_modifiers( 129 self, this: t.Optional[exp.Expression] 130 ) -> t.Optional[exp.Expression]: 131 this = super()._parse_query_modifiers(this) 132 133 if this: 134 refs = set() 135 136 for i, join in enumerate(this.args.get("joins", [])): 137 refs.add( 138 ( 139 this.args["from"] if i == 0 else this.args["joins"][i - 1] 140 ).this.alias.lower() 141 ) 142 143 table = join.this 144 if isinstance(table, exp.Table) and not join.args.get("on"): 145 if table.parts[0].name.lower() in refs: 146 table.replace(table.to_column()) 147 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- VALUES_FOLLOWED_BY_PAREN
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
149 class Tokenizer(Postgres.Tokenizer): 150 BIT_STRINGS = [] 151 HEX_STRINGS = [] 152 STRING_ESCAPES = ["\\", "'"] 153 154 KEYWORDS = { 155 **Postgres.Tokenizer.KEYWORDS, 156 "HLLSKETCH": TokenType.HLLSKETCH, 157 "SUPER": TokenType.SUPER, 158 "TOP": TokenType.TOP, 159 "UNLOAD": TokenType.COMMAND, 160 "VARBYTE": TokenType.VARBINARY, 161 } 162 KEYWORDS.pop("VALUES") 163 164 # Redshift allows # to appear as a table identifier prefix 165 SINGLE_TOKENS = Postgres.Tokenizer.SINGLE_TOKENS.copy() 166 SINGLE_TOKENS.pop("#")
Inherited Members
168 class Generator(Postgres.Generator): 169 LOCKING_READS_SUPPORTED = False 170 QUERY_HINTS = False 171 VALUES_AS_TABLE = False 172 TZ_TO_WITH_TIME_ZONE = True 173 NVL2_SUPPORTED = True 174 LAST_DAY_SUPPORTS_DATE_PART = False 175 176 TYPE_MAPPING = { 177 **Postgres.Generator.TYPE_MAPPING, 178 exp.DataType.Type.BINARY: "VARBYTE", 179 exp.DataType.Type.INT: "INTEGER", 180 exp.DataType.Type.TIMETZ: "TIME", 181 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 182 exp.DataType.Type.VARBINARY: "VARBYTE", 183 } 184 185 TRANSFORMS = { 186 **Postgres.Generator.TRANSFORMS, 187 exp.Concat: concat_to_dpipe_sql, 188 exp.ConcatWs: concat_ws_to_dpipe_sql, 189 exp.ApproxDistinct: lambda self, 190 e: f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})", 191 exp.CurrentTimestamp: lambda self, e: ( 192 "SYSDATE" if e.args.get("transaction") else "GETDATE()" 193 ), 194 exp.DateAdd: date_delta_sql("DATEADD"), 195 exp.DateDiff: date_delta_sql("DATEDIFF"), 196 exp.DistKeyProperty: lambda self, e: f"DISTKEY({e.name})", 197 exp.DistStyleProperty: lambda self, e: self.naked_property(e), 198 exp.FromBase: rename_func("STRTOL"), 199 exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, 200 exp.JSONExtract: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 201 exp.GroupConcat: rename_func("LISTAGG"), 202 exp.ParseJSON: rename_func("JSON_PARSE"), 203 exp.Select: transforms.preprocess( 204 [transforms.eliminate_distinct_on, transforms.eliminate_semi_and_anti_joins] 205 ), 206 exp.SortKeyProperty: lambda self, 207 e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})", 208 exp.TableSample: no_tablesample_sql, 209 exp.TsOrDsAdd: date_delta_sql("DATEADD"), 210 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 211 } 212 213 # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots 214 TRANSFORMS.pop(exp.Pivot) 215 216 # Redshift uses the POW | POWER (expr1, expr2) syntax instead of expr1 ^ expr2 (postgres) 217 TRANSFORMS.pop(exp.Pow) 218 219 # Redshift supports ANY_VALUE(..) 220 TRANSFORMS.pop(exp.AnyValue) 221 222 # Redshift supports LAST_DAY(..) 223 TRANSFORMS.pop(exp.LastDay) 224 225 RESERVED_KEYWORDS = {*Postgres.Generator.RESERVED_KEYWORDS, "snapshot", "type"} 226 227 def with_properties(self, properties: exp.Properties) -> str: 228 """Redshift doesn't have `WITH` as part of their with_properties so we remove it""" 229 return self.properties(properties, prefix=" ", suffix="") 230 231 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 232 if expression.is_type(exp.DataType.Type.JSON): 233 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 234 return self.sql(expression, "this") 235 236 return super().cast_sql(expression, safe_prefix=safe_prefix) 237 238 def datatype_sql(self, expression: exp.DataType) -> str: 239 """ 240 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 241 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 242 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 243 `TEXT` to `VARCHAR`. 244 """ 245 if expression.is_type("text"): 246 expression.set("this", exp.DataType.Type.VARCHAR) 247 precision = expression.args.get("expressions") 248 249 if not precision: 250 expression.append("expressions", exp.var("MAX")) 251 252 return super().datatype_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
227 def with_properties(self, properties: exp.Properties) -> str: 228 """Redshift doesn't have `WITH` as part of their with_properties so we remove it""" 229 return self.properties(properties, prefix=" ", suffix="")
Redshift doesn't have WITH
as part of their with_properties so we remove it
231 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 232 if expression.is_type(exp.DataType.Type.JSON): 233 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 234 return self.sql(expression, "this") 235 236 return super().cast_sql(expression, safe_prefix=safe_prefix)
238 def datatype_sql(self, expression: exp.DataType) -> str: 239 """ 240 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 241 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 242 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 243 `TEXT` to `VARCHAR`. 244 """ 245 if expression.is_type("text"): 246 expression.set("this", exp.DataType.Type.VARCHAR) 247 precision = expression.args.get("expressions") 248 249 if not precision: 250 expression.append("expressions", exp.var("MAX")) 251 252 return super().datatype_sql(expression)
Redshift converts the TEXT
data type to VARCHAR(255)
by default when people more generally mean
VARCHAR of max length which is VARCHAR(max)
in Redshift. Therefore if we get a TEXT
data type
without precision we convert it to VARCHAR(max)
and if it does have precision then we just convert
TEXT
to VARCHAR
.
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- KEY_VALUE_DEFINITIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- sqlglot.dialects.postgres.Postgres.Generator
- SINGLE_STRING_INTERVAL
- RENAME_TABLE_WITH_DB
- JOIN_HINTS
- TABLE_HINTS
- PARAMETER_TOKEN
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_SEED_KEYWORD
- SUPPORTS_SELECT_INTO
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- SUPPORTED_JSON_PATH_PARTS
- PROPERTIES_LOCATION
- bracket_sql
- matchagainst_sql