sqlglot.dialects.postgres
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 arrow_json_extract_scalar_sql, 9 arrow_json_extract_sql, 10 datestrtodate_sql, 11 format_time_lambda, 12 max_or_greatest, 13 min_or_least, 14 no_paren_current_date_sql, 15 no_pivot_sql, 16 no_tablesample_sql, 17 no_trycast_sql, 18 rename_func, 19 str_position_sql, 20 timestamptrunc_sql, 21 trim_sql, 22) 23from sqlglot.helper import seq_get 24from sqlglot.parser import binary_range_parser 25from sqlglot.tokens import TokenType 26 27DATE_DIFF_FACTOR = { 28 "MICROSECOND": " * 1000000", 29 "MILLISECOND": " * 1000", 30 "SECOND": "", 31 "MINUTE": " / 60", 32 "HOUR": " / 3600", 33 "DAY": " / 86400", 34} 35 36 37def _date_add_sql(kind: str) -> t.Callable[[generator.Generator, exp.DateAdd | exp.DateSub], str]: 38 def func(self: generator.Generator, expression: exp.DateAdd | exp.DateSub) -> str: 39 from sqlglot.optimizer.simplify import simplify 40 41 this = self.sql(expression, "this") 42 unit = expression.args.get("unit") 43 expression = simplify(expression.args["expression"]) 44 45 if not isinstance(expression, exp.Literal): 46 self.unsupported("Cannot add non literal") 47 48 expression = expression.copy() 49 expression.args["is_string"] = True 50 return f"{this} {kind} {self.sql(exp.Interval(this=expression, unit=unit))}" 51 52 return func 53 54 55def _date_diff_sql(self: generator.Generator, expression: exp.DateDiff) -> str: 56 unit = expression.text("unit").upper() 57 factor = DATE_DIFF_FACTOR.get(unit) 58 59 end = f"CAST({expression.this} AS TIMESTAMP)" 60 start = f"CAST({expression.expression} AS TIMESTAMP)" 61 62 if factor is not None: 63 return f"CAST(EXTRACT(epoch FROM {end} - {start}){factor} AS BIGINT)" 64 65 age = f"AGE({end}, {start})" 66 67 if unit == "WEEK": 68 unit = f"EXTRACT(year FROM {age}) * 48 + EXTRACT(month FROM {age}) * 4 + EXTRACT(day FROM {age}) / 7" 69 elif unit == "MONTH": 70 unit = f"EXTRACT(year FROM {age}) * 12 + EXTRACT(month FROM {age})" 71 elif unit == "QUARTER": 72 unit = f"EXTRACT(year FROM {age}) * 4 + EXTRACT(month FROM {age}) / 3" 73 elif unit == "YEAR": 74 unit = f"EXTRACT(year FROM {age})" 75 else: 76 unit = age 77 78 return f"CAST({unit} AS BIGINT)" 79 80 81def _substring_sql(self: generator.Generator, expression: exp.Substring) -> str: 82 this = self.sql(expression, "this") 83 start = self.sql(expression, "start") 84 length = self.sql(expression, "length") 85 86 from_part = f" FROM {start}" if start else "" 87 for_part = f" FOR {length}" if length else "" 88 89 return f"SUBSTRING({this}{from_part}{for_part})" 90 91 92def _string_agg_sql(self: generator.Generator, expression: exp.GroupConcat) -> str: 93 expression = expression.copy() 94 separator = expression.args.get("separator") or exp.Literal.string(",") 95 96 order = "" 97 this = expression.this 98 if isinstance(this, exp.Order): 99 if this.this: 100 this = this.this.pop() 101 order = self.sql(expression.this) # Order has a leading space 102 103 return f"STRING_AGG({self.format_args(this, separator)}{order})" 104 105 106def _datatype_sql(self: generator.Generator, expression: exp.DataType) -> str: 107 if expression.this == exp.DataType.Type.ARRAY: 108 return f"{self.expressions(expression, flat=True)}[]" 109 return self.datatype_sql(expression) 110 111 112def _auto_increment_to_serial(expression: exp.Expression) -> exp.Expression: 113 auto = expression.find(exp.AutoIncrementColumnConstraint) 114 115 if auto: 116 expression = expression.copy() 117 expression.args["constraints"].remove(auto.parent) 118 kind = expression.args["kind"] 119 120 if kind.this == exp.DataType.Type.INT: 121 kind.replace(exp.DataType(this=exp.DataType.Type.SERIAL)) 122 elif kind.this == exp.DataType.Type.SMALLINT: 123 kind.replace(exp.DataType(this=exp.DataType.Type.SMALLSERIAL)) 124 elif kind.this == exp.DataType.Type.BIGINT: 125 kind.replace(exp.DataType(this=exp.DataType.Type.BIGSERIAL)) 126 127 return expression 128 129 130def _serial_to_generated(expression: exp.Expression) -> exp.Expression: 131 kind = expression.args["kind"] 132 133 if kind.this == exp.DataType.Type.SERIAL: 134 data_type = exp.DataType(this=exp.DataType.Type.INT) 135 elif kind.this == exp.DataType.Type.SMALLSERIAL: 136 data_type = exp.DataType(this=exp.DataType.Type.SMALLINT) 137 elif kind.this == exp.DataType.Type.BIGSERIAL: 138 data_type = exp.DataType(this=exp.DataType.Type.BIGINT) 139 else: 140 data_type = None 141 142 if data_type: 143 expression = expression.copy() 144 expression.args["kind"].replace(data_type) 145 constraints = expression.args["constraints"] 146 generated = exp.ColumnConstraint(kind=exp.GeneratedAsIdentityColumnConstraint(this=False)) 147 notnull = exp.ColumnConstraint(kind=exp.NotNullColumnConstraint()) 148 149 if notnull not in constraints: 150 constraints.insert(0, notnull) 151 if generated not in constraints: 152 constraints.insert(0, generated) 153 154 return expression 155 156 157def _generate_series(args: t.List) -> exp.Expression: 158 # The goal is to convert step values like '1 day' or INTERVAL '1 day' into INTERVAL '1' day 159 step = seq_get(args, 2) 160 161 if step is None: 162 # Postgres allows calls with just two arguments -- the "step" argument defaults to 1 163 return exp.GenerateSeries.from_arg_list(args) 164 165 if step.is_string: 166 args[2] = exp.to_interval(step.this) 167 elif isinstance(step, exp.Interval) and not step.args.get("unit"): 168 args[2] = exp.to_interval(step.this.this) 169 170 return exp.GenerateSeries.from_arg_list(args) 171 172 173def _to_timestamp(args: t.List) -> exp.Expression: 174 # TO_TIMESTAMP accepts either a single double argument or (text, text) 175 if len(args) == 1: 176 # https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-TABLE 177 return exp.UnixToTime.from_arg_list(args) 178 179 # https://www.postgresql.org/docs/current/functions-formatting.html 180 return format_time_lambda(exp.StrToTime, "postgres")(args) 181 182 183class Postgres(Dialect): 184 null_ordering = "nulls_are_large" 185 time_format = "'YYYY-MM-DD HH24:MI:SS'" 186 time_mapping = { 187 "AM": "%p", 188 "PM": "%p", 189 "D": "%u", # 1-based day of week 190 "DD": "%d", # day of month 191 "DDD": "%j", # zero padded day of year 192 "FMDD": "%-d", # - is no leading zero for Python; same for FM in postgres 193 "FMDDD": "%-j", # day of year 194 "FMHH12": "%-I", # 9 195 "FMHH24": "%-H", # 9 196 "FMMI": "%-M", # Minute 197 "FMMM": "%-m", # 1 198 "FMSS": "%-S", # Second 199 "HH12": "%I", # 09 200 "HH24": "%H", # 09 201 "MI": "%M", # zero padded minute 202 "MM": "%m", # 01 203 "OF": "%z", # utc offset 204 "SS": "%S", # zero padded second 205 "TMDay": "%A", # TM is locale dependent 206 "TMDy": "%a", 207 "TMMon": "%b", # Sep 208 "TMMonth": "%B", # September 209 "TZ": "%Z", # uppercase timezone name 210 "US": "%f", # zero padded microsecond 211 "WW": "%U", # 1-based week of year 212 "YY": "%y", # 15 213 "YYYY": "%Y", # 2015 214 } 215 216 class Tokenizer(tokens.Tokenizer): 217 QUOTES = ["'", "$$"] 218 219 BIT_STRINGS = [("b'", "'"), ("B'", "'")] 220 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 221 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 222 223 KEYWORDS = { 224 **tokens.Tokenizer.KEYWORDS, 225 "~~": TokenType.LIKE, 226 "~~*": TokenType.ILIKE, 227 "~*": TokenType.IRLIKE, 228 "~": TokenType.RLIKE, 229 "@>": TokenType.AT_GT, 230 "<@": TokenType.LT_AT, 231 "BEGIN": TokenType.COMMAND, 232 "BEGIN TRANSACTION": TokenType.BEGIN, 233 "BIGSERIAL": TokenType.BIGSERIAL, 234 "CHARACTER VARYING": TokenType.VARCHAR, 235 "DECLARE": TokenType.COMMAND, 236 "DO": TokenType.COMMAND, 237 "HSTORE": TokenType.HSTORE, 238 "JSONB": TokenType.JSONB, 239 "REFRESH": TokenType.COMMAND, 240 "REINDEX": TokenType.COMMAND, 241 "RESET": TokenType.COMMAND, 242 "RETURNING": TokenType.RETURNING, 243 "REVOKE": TokenType.COMMAND, 244 "SERIAL": TokenType.SERIAL, 245 "SMALLSERIAL": TokenType.SMALLSERIAL, 246 "TEMP": TokenType.TEMPORARY, 247 "CSTRING": TokenType.PSEUDO_TYPE, 248 } 249 250 SINGLE_TOKENS = { 251 **tokens.Tokenizer.SINGLE_TOKENS, 252 "$": TokenType.PARAMETER, 253 } 254 255 VAR_SINGLE_TOKENS = {"$"} 256 257 class Parser(parser.Parser): 258 STRICT_CAST = False 259 260 FUNCTIONS = { 261 **parser.Parser.FUNCTIONS, 262 "DATE_TRUNC": lambda args: exp.TimestampTrunc( 263 this=seq_get(args, 1), unit=seq_get(args, 0) 264 ), 265 "GENERATE_SERIES": _generate_series, 266 "NOW": exp.CurrentTimestamp.from_arg_list, 267 "TO_CHAR": format_time_lambda(exp.TimeToStr, "postgres"), 268 "TO_TIMESTAMP": _to_timestamp, 269 } 270 271 FUNCTION_PARSERS = { 272 **parser.Parser.FUNCTION_PARSERS, 273 "DATE_PART": lambda self: self._parse_date_part(), 274 } 275 276 BITWISE = { 277 **parser.Parser.BITWISE, 278 TokenType.HASH: exp.BitwiseXor, 279 } 280 281 EXPONENT = { 282 TokenType.CARET: exp.Pow, 283 } 284 285 RANGE_PARSERS = { 286 **parser.Parser.RANGE_PARSERS, 287 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 288 TokenType.AT_GT: binary_range_parser(exp.ArrayContains), 289 TokenType.LT_AT: binary_range_parser(exp.ArrayContained), 290 } 291 292 def _parse_factor(self) -> t.Optional[exp.Expression]: 293 return self._parse_tokens(self._parse_exponent, self.FACTOR) 294 295 def _parse_exponent(self) -> t.Optional[exp.Expression]: 296 return self._parse_tokens(self._parse_unary, self.EXPONENT) 297 298 def _parse_date_part(self) -> exp.Expression: 299 part = self._parse_type() 300 self._match(TokenType.COMMA) 301 value = self._parse_bitwise() 302 303 if part and part.is_string: 304 part = exp.Var(this=part.name) 305 306 return self.expression(exp.Extract, this=part, expression=value) 307 308 class Generator(generator.Generator): 309 SINGLE_STRING_INTERVAL = True 310 LOCKING_READS_SUPPORTED = True 311 JOIN_HINTS = False 312 TABLE_HINTS = False 313 PARAMETER_TOKEN = "$" 314 315 TYPE_MAPPING = { 316 **generator.Generator.TYPE_MAPPING, 317 exp.DataType.Type.TINYINT: "SMALLINT", 318 exp.DataType.Type.FLOAT: "REAL", 319 exp.DataType.Type.DOUBLE: "DOUBLE PRECISION", 320 exp.DataType.Type.BINARY: "BYTEA", 321 exp.DataType.Type.VARBINARY: "BYTEA", 322 exp.DataType.Type.DATETIME: "TIMESTAMP", 323 } 324 325 TRANSFORMS = { 326 **generator.Generator.TRANSFORMS, 327 exp.BitwiseXor: lambda self, e: self.binary(e, "#"), 328 exp.ColumnDef: transforms.preprocess([_auto_increment_to_serial, _serial_to_generated]), 329 exp.JSONExtract: arrow_json_extract_sql, 330 exp.JSONExtractScalar: arrow_json_extract_scalar_sql, 331 exp.JSONBExtract: lambda self, e: self.binary(e, "#>"), 332 exp.JSONBExtractScalar: lambda self, e: self.binary(e, "#>>"), 333 exp.JSONBContains: lambda self, e: self.binary(e, "?"), 334 exp.Pow: lambda self, e: self.binary(e, "^"), 335 exp.CurrentDate: no_paren_current_date_sql, 336 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 337 exp.DateAdd: _date_add_sql("+"), 338 exp.DateStrToDate: datestrtodate_sql, 339 exp.DateSub: _date_add_sql("-"), 340 exp.DateDiff: _date_diff_sql, 341 exp.LogicalOr: rename_func("BOOL_OR"), 342 exp.LogicalAnd: rename_func("BOOL_AND"), 343 exp.Max: max_or_greatest, 344 exp.Min: min_or_least, 345 exp.ArrayOverlaps: lambda self, e: self.binary(e, "&&"), 346 exp.ArrayContains: lambda self, e: self.binary(e, "@>"), 347 exp.ArrayContained: lambda self, e: self.binary(e, "<@"), 348 exp.Merge: transforms.preprocess([transforms.remove_target_from_merge]), 349 exp.Pivot: no_pivot_sql, 350 exp.RegexpLike: lambda self, e: self.binary(e, "~"), 351 exp.RegexpILike: lambda self, e: self.binary(e, "~*"), 352 exp.StrPosition: str_position_sql, 353 exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 354 exp.Substring: _substring_sql, 355 exp.TimestampTrunc: timestamptrunc_sql, 356 exp.TimeStrToTime: lambda self, e: f"CAST({self.sql(e, 'this')} AS TIMESTAMP)", 357 exp.TimeToStr: lambda self, e: f"TO_CHAR({self.sql(e, 'this')}, {self.format_time(e)})", 358 exp.TableSample: no_tablesample_sql, 359 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 360 exp.Trim: trim_sql, 361 exp.TryCast: no_trycast_sql, 362 exp.UnixToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')})", 363 exp.DataType: _datatype_sql, 364 exp.GroupConcat: _string_agg_sql, 365 exp.Array: lambda self, e: f"{self.normalize_func('ARRAY')}({self.sql(e.expressions[0])})" 366 if isinstance(seq_get(e.expressions, 0), exp.Select) 367 else f"{self.normalize_func('ARRAY')}[{self.expressions(e, flat=True)}]", 368 } 369 370 PROPERTIES_LOCATION = { 371 **generator.Generator.PROPERTIES_LOCATION, 372 exp.TransientProperty: exp.Properties.Location.UNSUPPORTED, 373 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 374 }
184class Postgres(Dialect): 185 null_ordering = "nulls_are_large" 186 time_format = "'YYYY-MM-DD HH24:MI:SS'" 187 time_mapping = { 188 "AM": "%p", 189 "PM": "%p", 190 "D": "%u", # 1-based day of week 191 "DD": "%d", # day of month 192 "DDD": "%j", # zero padded day of year 193 "FMDD": "%-d", # - is no leading zero for Python; same for FM in postgres 194 "FMDDD": "%-j", # day of year 195 "FMHH12": "%-I", # 9 196 "FMHH24": "%-H", # 9 197 "FMMI": "%-M", # Minute 198 "FMMM": "%-m", # 1 199 "FMSS": "%-S", # Second 200 "HH12": "%I", # 09 201 "HH24": "%H", # 09 202 "MI": "%M", # zero padded minute 203 "MM": "%m", # 01 204 "OF": "%z", # utc offset 205 "SS": "%S", # zero padded second 206 "TMDay": "%A", # TM is locale dependent 207 "TMDy": "%a", 208 "TMMon": "%b", # Sep 209 "TMMonth": "%B", # September 210 "TZ": "%Z", # uppercase timezone name 211 "US": "%f", # zero padded microsecond 212 "WW": "%U", # 1-based week of year 213 "YY": "%y", # 15 214 "YYYY": "%Y", # 2015 215 } 216 217 class Tokenizer(tokens.Tokenizer): 218 QUOTES = ["'", "$$"] 219 220 BIT_STRINGS = [("b'", "'"), ("B'", "'")] 221 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 222 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 223 224 KEYWORDS = { 225 **tokens.Tokenizer.KEYWORDS, 226 "~~": TokenType.LIKE, 227 "~~*": TokenType.ILIKE, 228 "~*": TokenType.IRLIKE, 229 "~": TokenType.RLIKE, 230 "@>": TokenType.AT_GT, 231 "<@": TokenType.LT_AT, 232 "BEGIN": TokenType.COMMAND, 233 "BEGIN TRANSACTION": TokenType.BEGIN, 234 "BIGSERIAL": TokenType.BIGSERIAL, 235 "CHARACTER VARYING": TokenType.VARCHAR, 236 "DECLARE": TokenType.COMMAND, 237 "DO": TokenType.COMMAND, 238 "HSTORE": TokenType.HSTORE, 239 "JSONB": TokenType.JSONB, 240 "REFRESH": TokenType.COMMAND, 241 "REINDEX": TokenType.COMMAND, 242 "RESET": TokenType.COMMAND, 243 "RETURNING": TokenType.RETURNING, 244 "REVOKE": TokenType.COMMAND, 245 "SERIAL": TokenType.SERIAL, 246 "SMALLSERIAL": TokenType.SMALLSERIAL, 247 "TEMP": TokenType.TEMPORARY, 248 "CSTRING": TokenType.PSEUDO_TYPE, 249 } 250 251 SINGLE_TOKENS = { 252 **tokens.Tokenizer.SINGLE_TOKENS, 253 "$": TokenType.PARAMETER, 254 } 255 256 VAR_SINGLE_TOKENS = {"$"} 257 258 class Parser(parser.Parser): 259 STRICT_CAST = False 260 261 FUNCTIONS = { 262 **parser.Parser.FUNCTIONS, 263 "DATE_TRUNC": lambda args: exp.TimestampTrunc( 264 this=seq_get(args, 1), unit=seq_get(args, 0) 265 ), 266 "GENERATE_SERIES": _generate_series, 267 "NOW": exp.CurrentTimestamp.from_arg_list, 268 "TO_CHAR": format_time_lambda(exp.TimeToStr, "postgres"), 269 "TO_TIMESTAMP": _to_timestamp, 270 } 271 272 FUNCTION_PARSERS = { 273 **parser.Parser.FUNCTION_PARSERS, 274 "DATE_PART": lambda self: self._parse_date_part(), 275 } 276 277 BITWISE = { 278 **parser.Parser.BITWISE, 279 TokenType.HASH: exp.BitwiseXor, 280 } 281 282 EXPONENT = { 283 TokenType.CARET: exp.Pow, 284 } 285 286 RANGE_PARSERS = { 287 **parser.Parser.RANGE_PARSERS, 288 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 289 TokenType.AT_GT: binary_range_parser(exp.ArrayContains), 290 TokenType.LT_AT: binary_range_parser(exp.ArrayContained), 291 } 292 293 def _parse_factor(self) -> t.Optional[exp.Expression]: 294 return self._parse_tokens(self._parse_exponent, self.FACTOR) 295 296 def _parse_exponent(self) -> t.Optional[exp.Expression]: 297 return self._parse_tokens(self._parse_unary, self.EXPONENT) 298 299 def _parse_date_part(self) -> exp.Expression: 300 part = self._parse_type() 301 self._match(TokenType.COMMA) 302 value = self._parse_bitwise() 303 304 if part and part.is_string: 305 part = exp.Var(this=part.name) 306 307 return self.expression(exp.Extract, this=part, expression=value) 308 309 class Generator(generator.Generator): 310 SINGLE_STRING_INTERVAL = True 311 LOCKING_READS_SUPPORTED = True 312 JOIN_HINTS = False 313 TABLE_HINTS = False 314 PARAMETER_TOKEN = "$" 315 316 TYPE_MAPPING = { 317 **generator.Generator.TYPE_MAPPING, 318 exp.DataType.Type.TINYINT: "SMALLINT", 319 exp.DataType.Type.FLOAT: "REAL", 320 exp.DataType.Type.DOUBLE: "DOUBLE PRECISION", 321 exp.DataType.Type.BINARY: "BYTEA", 322 exp.DataType.Type.VARBINARY: "BYTEA", 323 exp.DataType.Type.DATETIME: "TIMESTAMP", 324 } 325 326 TRANSFORMS = { 327 **generator.Generator.TRANSFORMS, 328 exp.BitwiseXor: lambda self, e: self.binary(e, "#"), 329 exp.ColumnDef: transforms.preprocess([_auto_increment_to_serial, _serial_to_generated]), 330 exp.JSONExtract: arrow_json_extract_sql, 331 exp.JSONExtractScalar: arrow_json_extract_scalar_sql, 332 exp.JSONBExtract: lambda self, e: self.binary(e, "#>"), 333 exp.JSONBExtractScalar: lambda self, e: self.binary(e, "#>>"), 334 exp.JSONBContains: lambda self, e: self.binary(e, "?"), 335 exp.Pow: lambda self, e: self.binary(e, "^"), 336 exp.CurrentDate: no_paren_current_date_sql, 337 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 338 exp.DateAdd: _date_add_sql("+"), 339 exp.DateStrToDate: datestrtodate_sql, 340 exp.DateSub: _date_add_sql("-"), 341 exp.DateDiff: _date_diff_sql, 342 exp.LogicalOr: rename_func("BOOL_OR"), 343 exp.LogicalAnd: rename_func("BOOL_AND"), 344 exp.Max: max_or_greatest, 345 exp.Min: min_or_least, 346 exp.ArrayOverlaps: lambda self, e: self.binary(e, "&&"), 347 exp.ArrayContains: lambda self, e: self.binary(e, "@>"), 348 exp.ArrayContained: lambda self, e: self.binary(e, "<@"), 349 exp.Merge: transforms.preprocess([transforms.remove_target_from_merge]), 350 exp.Pivot: no_pivot_sql, 351 exp.RegexpLike: lambda self, e: self.binary(e, "~"), 352 exp.RegexpILike: lambda self, e: self.binary(e, "~*"), 353 exp.StrPosition: str_position_sql, 354 exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 355 exp.Substring: _substring_sql, 356 exp.TimestampTrunc: timestamptrunc_sql, 357 exp.TimeStrToTime: lambda self, e: f"CAST({self.sql(e, 'this')} AS TIMESTAMP)", 358 exp.TimeToStr: lambda self, e: f"TO_CHAR({self.sql(e, 'this')}, {self.format_time(e)})", 359 exp.TableSample: no_tablesample_sql, 360 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 361 exp.Trim: trim_sql, 362 exp.TryCast: no_trycast_sql, 363 exp.UnixToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')})", 364 exp.DataType: _datatype_sql, 365 exp.GroupConcat: _string_agg_sql, 366 exp.Array: lambda self, e: f"{self.normalize_func('ARRAY')}({self.sql(e.expressions[0])})" 367 if isinstance(seq_get(e.expressions, 0), exp.Select) 368 else f"{self.normalize_func('ARRAY')}[{self.expressions(e, flat=True)}]", 369 } 370 371 PROPERTIES_LOCATION = { 372 **generator.Generator.PROPERTIES_LOCATION, 373 exp.TransientProperty: exp.Properties.Location.UNSUPPORTED, 374 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 375 }
217 class Tokenizer(tokens.Tokenizer): 218 QUOTES = ["'", "$$"] 219 220 BIT_STRINGS = [("b'", "'"), ("B'", "'")] 221 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 222 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 223 224 KEYWORDS = { 225 **tokens.Tokenizer.KEYWORDS, 226 "~~": TokenType.LIKE, 227 "~~*": TokenType.ILIKE, 228 "~*": TokenType.IRLIKE, 229 "~": TokenType.RLIKE, 230 "@>": TokenType.AT_GT, 231 "<@": TokenType.LT_AT, 232 "BEGIN": TokenType.COMMAND, 233 "BEGIN TRANSACTION": TokenType.BEGIN, 234 "BIGSERIAL": TokenType.BIGSERIAL, 235 "CHARACTER VARYING": TokenType.VARCHAR, 236 "DECLARE": TokenType.COMMAND, 237 "DO": TokenType.COMMAND, 238 "HSTORE": TokenType.HSTORE, 239 "JSONB": TokenType.JSONB, 240 "REFRESH": TokenType.COMMAND, 241 "REINDEX": TokenType.COMMAND, 242 "RESET": TokenType.COMMAND, 243 "RETURNING": TokenType.RETURNING, 244 "REVOKE": TokenType.COMMAND, 245 "SERIAL": TokenType.SERIAL, 246 "SMALLSERIAL": TokenType.SMALLSERIAL, 247 "TEMP": TokenType.TEMPORARY, 248 "CSTRING": TokenType.PSEUDO_TYPE, 249 } 250 251 SINGLE_TOKENS = { 252 **tokens.Tokenizer.SINGLE_TOKENS, 253 "$": TokenType.PARAMETER, 254 } 255 256 VAR_SINGLE_TOKENS = {"$"}
Inherited Members
258 class Parser(parser.Parser): 259 STRICT_CAST = False 260 261 FUNCTIONS = { 262 **parser.Parser.FUNCTIONS, 263 "DATE_TRUNC": lambda args: exp.TimestampTrunc( 264 this=seq_get(args, 1), unit=seq_get(args, 0) 265 ), 266 "GENERATE_SERIES": _generate_series, 267 "NOW": exp.CurrentTimestamp.from_arg_list, 268 "TO_CHAR": format_time_lambda(exp.TimeToStr, "postgres"), 269 "TO_TIMESTAMP": _to_timestamp, 270 } 271 272 FUNCTION_PARSERS = { 273 **parser.Parser.FUNCTION_PARSERS, 274 "DATE_PART": lambda self: self._parse_date_part(), 275 } 276 277 BITWISE = { 278 **parser.Parser.BITWISE, 279 TokenType.HASH: exp.BitwiseXor, 280 } 281 282 EXPONENT = { 283 TokenType.CARET: exp.Pow, 284 } 285 286 RANGE_PARSERS = { 287 **parser.Parser.RANGE_PARSERS, 288 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 289 TokenType.AT_GT: binary_range_parser(exp.ArrayContains), 290 TokenType.LT_AT: binary_range_parser(exp.ArrayContained), 291 } 292 293 def _parse_factor(self) -> t.Optional[exp.Expression]: 294 return self._parse_tokens(self._parse_exponent, self.FACTOR) 295 296 def _parse_exponent(self) -> t.Optional[exp.Expression]: 297 return self._parse_tokens(self._parse_unary, self.EXPONENT) 298 299 def _parse_date_part(self) -> exp.Expression: 300 part = self._parse_type() 301 self._match(TokenType.COMMA) 302 value = self._parse_bitwise() 303 304 if part and part.is_string: 305 part = exp.Var(this=part.name) 306 307 return self.expression(exp.Extract, this=part, expression=value)
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
Inherited Members
309 class Generator(generator.Generator): 310 SINGLE_STRING_INTERVAL = True 311 LOCKING_READS_SUPPORTED = True 312 JOIN_HINTS = False 313 TABLE_HINTS = False 314 PARAMETER_TOKEN = "$" 315 316 TYPE_MAPPING = { 317 **generator.Generator.TYPE_MAPPING, 318 exp.DataType.Type.TINYINT: "SMALLINT", 319 exp.DataType.Type.FLOAT: "REAL", 320 exp.DataType.Type.DOUBLE: "DOUBLE PRECISION", 321 exp.DataType.Type.BINARY: "BYTEA", 322 exp.DataType.Type.VARBINARY: "BYTEA", 323 exp.DataType.Type.DATETIME: "TIMESTAMP", 324 } 325 326 TRANSFORMS = { 327 **generator.Generator.TRANSFORMS, 328 exp.BitwiseXor: lambda self, e: self.binary(e, "#"), 329 exp.ColumnDef: transforms.preprocess([_auto_increment_to_serial, _serial_to_generated]), 330 exp.JSONExtract: arrow_json_extract_sql, 331 exp.JSONExtractScalar: arrow_json_extract_scalar_sql, 332 exp.JSONBExtract: lambda self, e: self.binary(e, "#>"), 333 exp.JSONBExtractScalar: lambda self, e: self.binary(e, "#>>"), 334 exp.JSONBContains: lambda self, e: self.binary(e, "?"), 335 exp.Pow: lambda self, e: self.binary(e, "^"), 336 exp.CurrentDate: no_paren_current_date_sql, 337 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 338 exp.DateAdd: _date_add_sql("+"), 339 exp.DateStrToDate: datestrtodate_sql, 340 exp.DateSub: _date_add_sql("-"), 341 exp.DateDiff: _date_diff_sql, 342 exp.LogicalOr: rename_func("BOOL_OR"), 343 exp.LogicalAnd: rename_func("BOOL_AND"), 344 exp.Max: max_or_greatest, 345 exp.Min: min_or_least, 346 exp.ArrayOverlaps: lambda self, e: self.binary(e, "&&"), 347 exp.ArrayContains: lambda self, e: self.binary(e, "@>"), 348 exp.ArrayContained: lambda self, e: self.binary(e, "<@"), 349 exp.Merge: transforms.preprocess([transforms.remove_target_from_merge]), 350 exp.Pivot: no_pivot_sql, 351 exp.RegexpLike: lambda self, e: self.binary(e, "~"), 352 exp.RegexpILike: lambda self, e: self.binary(e, "~*"), 353 exp.StrPosition: str_position_sql, 354 exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 355 exp.Substring: _substring_sql, 356 exp.TimestampTrunc: timestamptrunc_sql, 357 exp.TimeStrToTime: lambda self, e: f"CAST({self.sql(e, 'this')} AS TIMESTAMP)", 358 exp.TimeToStr: lambda self, e: f"TO_CHAR({self.sql(e, 'this')}, {self.format_time(e)})", 359 exp.TableSample: no_tablesample_sql, 360 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 361 exp.Trim: trim_sql, 362 exp.TryCast: no_trycast_sql, 363 exp.UnixToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')})", 364 exp.DataType: _datatype_sql, 365 exp.GroupConcat: _string_agg_sql, 366 exp.Array: lambda self, e: f"{self.normalize_func('ARRAY')}({self.sql(e.expressions[0])})" 367 if isinstance(seq_get(e.expressions, 0), exp.Select) 368 else f"{self.normalize_func('ARRAY')}[{self.expressions(e, flat=True)}]", 369 } 370 371 PROPERTIES_LOCATION = { 372 **generator.Generator.PROPERTIES_LOCATION, 373 exp.TransientProperty: exp.Properties.Location.UNSUPPORTED, 374 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 375 }
Generator interprets the given syntax tree and produces a SQL string as an output.
Arguments:
- time_mapping (dict): the dictionary of custom time mappings in which the key represents a python time format and the output the target time format
- time_trie (trie): a trie of the time_mapping keys
- pretty (bool): if set to True the returned string will be formatted. Default: False.
- quote_start (str): specifies which starting character to use to delimit quotes. Default: '.
- quote_end (str): specifies which ending character to use to delimit quotes. Default: '.
- identifier_start (str): specifies which starting character to use to delimit identifiers. Default: ".
- identifier_end (str): specifies which ending character to use to delimit identifiers. Default: ".
- bit_start (str): specifies which starting character to use to delimit bit literals. Default: None.
- bit_end (str): specifies which ending character to use to delimit bit literals. Default: None.
- hex_start (str): specifies which starting character to use to delimit hex literals. Default: None.
- hex_end (str): specifies which ending character to use to delimit hex literals. Default: None.
- byte_start (str): specifies which starting character to use to delimit byte literals. Default: None.
- byte_end (str): specifies which ending character to use to delimit byte literals. Default: None.
- raw_start (str): specifies which starting character to use to delimit raw literals. Default: None.
- raw_end (str): specifies which ending character to use to delimit raw literals. Default: None.
- identify (bool | str): 'always': always quote, 'safe': quote identifiers if they don't contain an upcase, True defaults to always.
- normalize (bool): if set to True all identifiers will lower cased
- string_escape (str): specifies a string escape character. Default: '.
- identifier_escape (str): specifies an identifier escape character. Default: ".
- pad (int): determines padding in a formatted string. Default: 2.
- indent (int): determines the size of indentation in a formatted string. Default: 4.
- unnest_column_only (bool): if true unnest table aliases are considered only as column aliases
- normalize_functions (str): normalize function names, "upper", "lower", or None Default: "upper"
- alias_post_tablesample (bool): if the table alias comes after tablesample Default: False
- unsupported_level (ErrorLevel): determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- null_ordering (str): Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
- max_unsupported (int): Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma (bool): if the the comma is leading or trailing in select statements Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
Inherited Members
- sqlglot.generator.Generator
- Generator
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- rawstring_sql
- datatypesize_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- pseudotype_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- concat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- unique_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonobject_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql