sqlglot.dialects.tsql
1from __future__ import annotations 2 3import re 4import typing as t 5 6from sqlglot import exp, generator, parser, tokens 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 min_or_least, 10 parse_date_delta, 11 rename_func, 12) 13from sqlglot.expressions import DataType 14from sqlglot.helper import seq_get 15from sqlglot.time import format_time 16from sqlglot.tokens import TokenType 17 18FULL_FORMAT_TIME_MAPPING = { 19 "weekday": "%A", 20 "dw": "%A", 21 "w": "%A", 22 "month": "%B", 23 "mm": "%B", 24 "m": "%B", 25} 26 27DATE_DELTA_INTERVAL = { 28 "year": "year", 29 "yyyy": "year", 30 "yy": "year", 31 "quarter": "quarter", 32 "qq": "quarter", 33 "q": "quarter", 34 "month": "month", 35 "mm": "month", 36 "m": "month", 37 "week": "week", 38 "ww": "week", 39 "wk": "week", 40 "day": "day", 41 "dd": "day", 42 "d": "day", 43} 44 45 46DATE_FMT_RE = re.compile("([dD]{1,2})|([mM]{1,2})|([yY]{1,4})|([hH]{1,2})|([sS]{1,2})") 47 48# N = Numeric, C=Currency 49TRANSPILE_SAFE_NUMBER_FMT = {"N", "C"} 50 51 52def _format_time_lambda(exp_class, full_format_mapping=None, default=None): 53 def _format_time(args): 54 return exp_class( 55 this=seq_get(args, 1), 56 format=exp.Literal.string( 57 format_time( 58 seq_get(args, 0).name or (TSQL.time_format if default is True else default), 59 {**TSQL.time_mapping, **FULL_FORMAT_TIME_MAPPING} 60 if full_format_mapping 61 else TSQL.time_mapping, 62 ) 63 ), 64 ) 65 66 return _format_time 67 68 69def _parse_format(args): 70 fmt = seq_get(args, 1) 71 number_fmt = fmt.name in TRANSPILE_SAFE_NUMBER_FMT or not DATE_FMT_RE.search(fmt.this) 72 if number_fmt: 73 return exp.NumberToStr(this=seq_get(args, 0), format=fmt) 74 return exp.TimeToStr( 75 this=seq_get(args, 0), 76 format=exp.Literal.string( 77 format_time(fmt.name, TSQL.format_time_mapping) 78 if len(fmt.name) == 1 79 else format_time(fmt.name, TSQL.time_mapping) 80 ), 81 ) 82 83 84def _parse_eomonth(args): 85 date = seq_get(args, 0) 86 month_lag = seq_get(args, 1) 87 unit = DATE_DELTA_INTERVAL.get("month") 88 89 if month_lag is None: 90 return exp.LastDateOfMonth(this=date) 91 92 # Remove month lag argument in parser as its compared with the number of arguments of the resulting class 93 args.remove(month_lag) 94 95 return exp.LastDateOfMonth(this=exp.DateAdd(this=date, expression=month_lag, unit=unit)) 96 97 98def generate_date_delta_with_unit_sql(self, e): 99 func = "DATEADD" if isinstance(e, exp.DateAdd) else "DATEDIFF" 100 return self.func(func, e.text("unit"), e.expression, e.this) 101 102 103def _format_sql(self, e): 104 fmt = ( 105 e.args["format"] 106 if isinstance(e, exp.NumberToStr) 107 else exp.Literal.string(format_time(e.text("format"), TSQL.inverse_time_mapping)) 108 ) 109 return self.func("FORMAT", e.this, fmt) 110 111 112def _string_agg_sql(self, e): 113 e = e.copy() 114 115 this = e.this 116 distinct = e.find(exp.Distinct) 117 if distinct: 118 # exp.Distinct can appear below an exp.Order or an exp.GroupConcat expression 119 self.unsupported("T-SQL STRING_AGG doesn't support DISTINCT.") 120 this = distinct.expressions[0] 121 distinct.pop() 122 123 order = "" 124 if isinstance(e.this, exp.Order): 125 if e.this.this: 126 this = e.this.this 127 e.this.this.pop() 128 order = f" WITHIN GROUP ({self.sql(e.this)[1:]})" # Order has a leading space 129 130 separator = e.args.get("separator") or exp.Literal.string(",") 131 return f"STRING_AGG({self.format_args(this, separator)}){order}" 132 133 134class TSQL(Dialect): 135 null_ordering = "nulls_are_small" 136 time_format = "'yyyy-mm-dd hh:mm:ss'" 137 138 time_mapping = { 139 "year": "%Y", 140 "qq": "%q", 141 "q": "%q", 142 "quarter": "%q", 143 "dayofyear": "%j", 144 "day": "%d", 145 "dy": "%d", 146 "y": "%Y", 147 "week": "%W", 148 "ww": "%W", 149 "wk": "%W", 150 "hour": "%h", 151 "hh": "%I", 152 "minute": "%M", 153 "mi": "%M", 154 "n": "%M", 155 "second": "%S", 156 "ss": "%S", 157 "s": "%-S", 158 "millisecond": "%f", 159 "ms": "%f", 160 "weekday": "%W", 161 "dw": "%W", 162 "month": "%m", 163 "mm": "%M", 164 "m": "%-M", 165 "Y": "%Y", 166 "YYYY": "%Y", 167 "YY": "%y", 168 "MMMM": "%B", 169 "MMM": "%b", 170 "MM": "%m", 171 "M": "%-m", 172 "dd": "%d", 173 "d": "%-d", 174 "HH": "%H", 175 "H": "%-H", 176 "h": "%-I", 177 "S": "%f", 178 "yyyy": "%Y", 179 "yy": "%y", 180 } 181 182 convert_format_mapping = { 183 "0": "%b %d %Y %-I:%M%p", 184 "1": "%m/%d/%y", 185 "2": "%y.%m.%d", 186 "3": "%d/%m/%y", 187 "4": "%d.%m.%y", 188 "5": "%d-%m-%y", 189 "6": "%d %b %y", 190 "7": "%b %d, %y", 191 "8": "%H:%M:%S", 192 "9": "%b %d %Y %-I:%M:%S:%f%p", 193 "10": "mm-dd-yy", 194 "11": "yy/mm/dd", 195 "12": "yymmdd", 196 "13": "%d %b %Y %H:%M:ss:%f", 197 "14": "%H:%M:%S:%f", 198 "20": "%Y-%m-%d %H:%M:%S", 199 "21": "%Y-%m-%d %H:%M:%S.%f", 200 "22": "%m/%d/%y %-I:%M:%S %p", 201 "23": "%Y-%m-%d", 202 "24": "%H:%M:%S", 203 "25": "%Y-%m-%d %H:%M:%S.%f", 204 "100": "%b %d %Y %-I:%M%p", 205 "101": "%m/%d/%Y", 206 "102": "%Y.%m.%d", 207 "103": "%d/%m/%Y", 208 "104": "%d.%m.%Y", 209 "105": "%d-%m-%Y", 210 "106": "%d %b %Y", 211 "107": "%b %d, %Y", 212 "108": "%H:%M:%S", 213 "109": "%b %d %Y %-I:%M:%S:%f%p", 214 "110": "%m-%d-%Y", 215 "111": "%Y/%m/%d", 216 "112": "%Y%m%d", 217 "113": "%d %b %Y %H:%M:%S:%f", 218 "114": "%H:%M:%S:%f", 219 "120": "%Y-%m-%d %H:%M:%S", 220 "121": "%Y-%m-%d %H:%M:%S.%f", 221 } 222 # not sure if complete 223 format_time_mapping = { 224 "y": "%B %Y", 225 "d": "%m/%d/%Y", 226 "H": "%-H", 227 "h": "%-I", 228 "s": "%Y-%m-%d %H:%M:%S", 229 "D": "%A,%B,%Y", 230 "f": "%A,%B,%Y %-I:%M %p", 231 "F": "%A,%B,%Y %-I:%M:%S %p", 232 "g": "%m/%d/%Y %-I:%M %p", 233 "G": "%m/%d/%Y %-I:%M:%S %p", 234 "M": "%B %-d", 235 "m": "%B %-d", 236 "O": "%Y-%m-%dT%H:%M:%S", 237 "u": "%Y-%M-%D %H:%M:%S%z", 238 "U": "%A, %B %D, %Y %H:%M:%S%z", 239 "T": "%-I:%M:%S %p", 240 "t": "%-I:%M", 241 "Y": "%a %Y", 242 } 243 244 class Tokenizer(tokens.Tokenizer): 245 IDENTIFIERS = ['"', ("[", "]")] 246 247 QUOTES = ["'", '"'] 248 249 KEYWORDS = { 250 **tokens.Tokenizer.KEYWORDS, 251 "DATETIME2": TokenType.DATETIME, 252 "DATETIMEOFFSET": TokenType.TIMESTAMPTZ, 253 "DECLARE": TokenType.COMMAND, 254 "IMAGE": TokenType.IMAGE, 255 "MONEY": TokenType.MONEY, 256 "NTEXT": TokenType.TEXT, 257 "NVARCHAR(MAX)": TokenType.TEXT, 258 "PRINT": TokenType.COMMAND, 259 "PROC": TokenType.PROCEDURE, 260 "REAL": TokenType.FLOAT, 261 "ROWVERSION": TokenType.ROWVERSION, 262 "SMALLDATETIME": TokenType.DATETIME, 263 "SMALLMONEY": TokenType.SMALLMONEY, 264 "SQL_VARIANT": TokenType.VARIANT, 265 "TIME": TokenType.TIMESTAMP, 266 "TOP": TokenType.TOP, 267 "UNIQUEIDENTIFIER": TokenType.UNIQUEIDENTIFIER, 268 "VARCHAR(MAX)": TokenType.TEXT, 269 "XML": TokenType.XML, 270 } 271 272 # TSQL allows @, # to appear as a variable/identifier prefix 273 SINGLE_TOKENS = tokens.Tokenizer.SINGLE_TOKENS.copy() 274 SINGLE_TOKENS.pop("@") 275 SINGLE_TOKENS.pop("#") 276 277 class Parser(parser.Parser): 278 FUNCTIONS = { 279 **parser.Parser.FUNCTIONS, # type: ignore 280 "CHARINDEX": lambda args: exp.StrPosition( 281 this=seq_get(args, 1), 282 substr=seq_get(args, 0), 283 position=seq_get(args, 2), 284 ), 285 "ISNULL": exp.Coalesce.from_arg_list, 286 "DATEADD": parse_date_delta(exp.DateAdd, unit_mapping=DATE_DELTA_INTERVAL), 287 "DATEDIFF": parse_date_delta(exp.DateDiff, unit_mapping=DATE_DELTA_INTERVAL), 288 "DATENAME": _format_time_lambda(exp.TimeToStr, full_format_mapping=True), 289 "DATEPART": _format_time_lambda(exp.TimeToStr), 290 "GETDATE": exp.CurrentTimestamp.from_arg_list, 291 "SYSDATETIME": exp.CurrentTimestamp.from_arg_list, 292 "IIF": exp.If.from_arg_list, 293 "LEN": exp.Length.from_arg_list, 294 "REPLICATE": exp.Repeat.from_arg_list, 295 "JSON_VALUE": exp.JSONExtractScalar.from_arg_list, 296 "FORMAT": _parse_format, 297 "EOMONTH": _parse_eomonth, 298 } 299 300 VAR_LENGTH_DATATYPES = { 301 DataType.Type.NVARCHAR, 302 DataType.Type.VARCHAR, 303 DataType.Type.CHAR, 304 DataType.Type.NCHAR, 305 } 306 307 RETURNS_TABLE_TOKENS = parser.Parser.ID_VAR_TOKENS - { # type: ignore 308 TokenType.TABLE, 309 *parser.Parser.TYPE_TOKENS, # type: ignore 310 } 311 312 STATEMENT_PARSERS = { 313 **parser.Parser.STATEMENT_PARSERS, # type: ignore 314 TokenType.END: lambda self: self._parse_command(), 315 } 316 317 def _parse_system_time(self) -> t.Optional[exp.Expression]: 318 if not self._match_text_seq("FOR", "SYSTEM_TIME"): 319 return None 320 321 if self._match_text_seq("AS", "OF"): 322 system_time = self.expression( 323 exp.SystemTime, this=self._parse_bitwise(), kind="AS OF" 324 ) 325 elif self._match_set((TokenType.FROM, TokenType.BETWEEN)): 326 kind = self._prev.text 327 this = self._parse_bitwise() 328 self._match_texts(("TO", "AND")) 329 expression = self._parse_bitwise() 330 system_time = self.expression( 331 exp.SystemTime, this=this, expression=expression, kind=kind 332 ) 333 elif self._match_text_seq("CONTAINED", "IN"): 334 args = self._parse_wrapped_csv(self._parse_bitwise) 335 system_time = self.expression( 336 exp.SystemTime, 337 this=seq_get(args, 0), 338 expression=seq_get(args, 1), 339 kind="CONTAINED IN", 340 ) 341 elif self._match(TokenType.ALL): 342 system_time = self.expression(exp.SystemTime, kind="ALL") 343 else: 344 system_time = None 345 self.raise_error("Unable to parse FOR SYSTEM_TIME clause") 346 347 return system_time 348 349 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 350 table = super()._parse_table_parts(schema=schema) 351 table.set("system_time", self._parse_system_time()) 352 return table 353 354 def _parse_returns(self) -> exp.Expression: 355 table = self._parse_id_var(any_token=False, tokens=self.RETURNS_TABLE_TOKENS) 356 returns = super()._parse_returns() 357 returns.set("table", table) 358 return returns 359 360 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 361 to = self._parse_types() 362 self._match(TokenType.COMMA) 363 this = self._parse_conjunction() 364 365 if not to or not this: 366 return None 367 368 # Retrieve length of datatype and override to default if not specified 369 if seq_get(to.expressions, 0) is None and to.this in self.VAR_LENGTH_DATATYPES: 370 to = exp.DataType.build(to.this, expressions=[exp.Literal.number(30)], nested=False) 371 372 # Check whether a conversion with format is applicable 373 if self._match(TokenType.COMMA): 374 format_val = self._parse_number() 375 format_val_name = format_val.name if format_val else "" 376 377 if format_val_name not in TSQL.convert_format_mapping: 378 raise ValueError( 379 f"CONVERT function at T-SQL does not support format style {format_val_name}" 380 ) 381 382 format_norm = exp.Literal.string(TSQL.convert_format_mapping[format_val_name]) 383 384 # Check whether the convert entails a string to date format 385 if to.this == DataType.Type.DATE: 386 return self.expression(exp.StrToDate, this=this, format=format_norm) 387 # Check whether the convert entails a string to datetime format 388 elif to.this == DataType.Type.DATETIME: 389 return self.expression(exp.StrToTime, this=this, format=format_norm) 390 # Check whether the convert entails a date to string format 391 elif to.this in self.VAR_LENGTH_DATATYPES: 392 return self.expression( 393 exp.Cast if strict else exp.TryCast, 394 to=to, 395 this=self.expression(exp.TimeToStr, this=this, format=format_norm), 396 ) 397 elif to.this == DataType.Type.TEXT: 398 return self.expression(exp.TimeToStr, this=this, format=format_norm) 399 400 # Entails a simple cast without any format requirement 401 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 402 403 def _parse_user_defined_function( 404 self, kind: t.Optional[TokenType] = None 405 ) -> t.Optional[exp.Expression]: 406 this = super()._parse_user_defined_function(kind=kind) 407 408 if ( 409 kind == TokenType.FUNCTION 410 or isinstance(this, exp.UserDefinedFunction) 411 or self._match(TokenType.ALIAS, advance=False) 412 ): 413 return this 414 415 expressions = self._parse_csv(self._parse_function_parameter) 416 return self.expression(exp.UserDefinedFunction, this=this, expressions=expressions) 417 418 class Generator(generator.Generator): 419 LOCKING_READS_SUPPORTED = True 420 421 TYPE_MAPPING = { 422 **generator.Generator.TYPE_MAPPING, # type: ignore 423 exp.DataType.Type.INT: "INTEGER", 424 exp.DataType.Type.DECIMAL: "NUMERIC", 425 exp.DataType.Type.DATETIME: "DATETIME2", 426 exp.DataType.Type.VARIANT: "SQL_VARIANT", 427 } 428 429 TRANSFORMS = { 430 **generator.Generator.TRANSFORMS, # type: ignore 431 exp.DateAdd: generate_date_delta_with_unit_sql, 432 exp.DateDiff: generate_date_delta_with_unit_sql, 433 exp.CurrentDate: rename_func("GETDATE"), 434 exp.CurrentTimestamp: rename_func("GETDATE"), 435 exp.If: rename_func("IIF"), 436 exp.NumberToStr: _format_sql, 437 exp.TimeToStr: _format_sql, 438 exp.GroupConcat: _string_agg_sql, 439 exp.Min: min_or_least, 440 } 441 442 TRANSFORMS.pop(exp.ReturnsProperty) 443 444 def systemtime_sql(self, expression: exp.SystemTime) -> str: 445 kind = expression.args["kind"] 446 if kind == "ALL": 447 return "FOR SYSTEM_TIME ALL" 448 449 start = self.sql(expression, "this") 450 if kind == "AS OF": 451 return f"FOR SYSTEM_TIME AS OF {start}" 452 453 end = self.sql(expression, "expression") 454 if kind == "FROM": 455 return f"FOR SYSTEM_TIME FROM {start} TO {end}" 456 if kind == "BETWEEN": 457 return f"FOR SYSTEM_TIME BETWEEN {start} AND {end}" 458 459 return f"FOR SYSTEM_TIME CONTAINED IN ({start}, {end})" 460 461 def returnsproperty_sql(self, expression: exp.ReturnsProperty) -> str: 462 table = expression.args.get("table") 463 table = f"{table} " if table else "" 464 return f"RETURNS {table}{self.sql(expression, 'this')}"
def
generate_date_delta_with_unit_sql(self, e):
135class TSQL(Dialect): 136 null_ordering = "nulls_are_small" 137 time_format = "'yyyy-mm-dd hh:mm:ss'" 138 139 time_mapping = { 140 "year": "%Y", 141 "qq": "%q", 142 "q": "%q", 143 "quarter": "%q", 144 "dayofyear": "%j", 145 "day": "%d", 146 "dy": "%d", 147 "y": "%Y", 148 "week": "%W", 149 "ww": "%W", 150 "wk": "%W", 151 "hour": "%h", 152 "hh": "%I", 153 "minute": "%M", 154 "mi": "%M", 155 "n": "%M", 156 "second": "%S", 157 "ss": "%S", 158 "s": "%-S", 159 "millisecond": "%f", 160 "ms": "%f", 161 "weekday": "%W", 162 "dw": "%W", 163 "month": "%m", 164 "mm": "%M", 165 "m": "%-M", 166 "Y": "%Y", 167 "YYYY": "%Y", 168 "YY": "%y", 169 "MMMM": "%B", 170 "MMM": "%b", 171 "MM": "%m", 172 "M": "%-m", 173 "dd": "%d", 174 "d": "%-d", 175 "HH": "%H", 176 "H": "%-H", 177 "h": "%-I", 178 "S": "%f", 179 "yyyy": "%Y", 180 "yy": "%y", 181 } 182 183 convert_format_mapping = { 184 "0": "%b %d %Y %-I:%M%p", 185 "1": "%m/%d/%y", 186 "2": "%y.%m.%d", 187 "3": "%d/%m/%y", 188 "4": "%d.%m.%y", 189 "5": "%d-%m-%y", 190 "6": "%d %b %y", 191 "7": "%b %d, %y", 192 "8": "%H:%M:%S", 193 "9": "%b %d %Y %-I:%M:%S:%f%p", 194 "10": "mm-dd-yy", 195 "11": "yy/mm/dd", 196 "12": "yymmdd", 197 "13": "%d %b %Y %H:%M:ss:%f", 198 "14": "%H:%M:%S:%f", 199 "20": "%Y-%m-%d %H:%M:%S", 200 "21": "%Y-%m-%d %H:%M:%S.%f", 201 "22": "%m/%d/%y %-I:%M:%S %p", 202 "23": "%Y-%m-%d", 203 "24": "%H:%M:%S", 204 "25": "%Y-%m-%d %H:%M:%S.%f", 205 "100": "%b %d %Y %-I:%M%p", 206 "101": "%m/%d/%Y", 207 "102": "%Y.%m.%d", 208 "103": "%d/%m/%Y", 209 "104": "%d.%m.%Y", 210 "105": "%d-%m-%Y", 211 "106": "%d %b %Y", 212 "107": "%b %d, %Y", 213 "108": "%H:%M:%S", 214 "109": "%b %d %Y %-I:%M:%S:%f%p", 215 "110": "%m-%d-%Y", 216 "111": "%Y/%m/%d", 217 "112": "%Y%m%d", 218 "113": "%d %b %Y %H:%M:%S:%f", 219 "114": "%H:%M:%S:%f", 220 "120": "%Y-%m-%d %H:%M:%S", 221 "121": "%Y-%m-%d %H:%M:%S.%f", 222 } 223 # not sure if complete 224 format_time_mapping = { 225 "y": "%B %Y", 226 "d": "%m/%d/%Y", 227 "H": "%-H", 228 "h": "%-I", 229 "s": "%Y-%m-%d %H:%M:%S", 230 "D": "%A,%B,%Y", 231 "f": "%A,%B,%Y %-I:%M %p", 232 "F": "%A,%B,%Y %-I:%M:%S %p", 233 "g": "%m/%d/%Y %-I:%M %p", 234 "G": "%m/%d/%Y %-I:%M:%S %p", 235 "M": "%B %-d", 236 "m": "%B %-d", 237 "O": "%Y-%m-%dT%H:%M:%S", 238 "u": "%Y-%M-%D %H:%M:%S%z", 239 "U": "%A, %B %D, %Y %H:%M:%S%z", 240 "T": "%-I:%M:%S %p", 241 "t": "%-I:%M", 242 "Y": "%a %Y", 243 } 244 245 class Tokenizer(tokens.Tokenizer): 246 IDENTIFIERS = ['"', ("[", "]")] 247 248 QUOTES = ["'", '"'] 249 250 KEYWORDS = { 251 **tokens.Tokenizer.KEYWORDS, 252 "DATETIME2": TokenType.DATETIME, 253 "DATETIMEOFFSET": TokenType.TIMESTAMPTZ, 254 "DECLARE": TokenType.COMMAND, 255 "IMAGE": TokenType.IMAGE, 256 "MONEY": TokenType.MONEY, 257 "NTEXT": TokenType.TEXT, 258 "NVARCHAR(MAX)": TokenType.TEXT, 259 "PRINT": TokenType.COMMAND, 260 "PROC": TokenType.PROCEDURE, 261 "REAL": TokenType.FLOAT, 262 "ROWVERSION": TokenType.ROWVERSION, 263 "SMALLDATETIME": TokenType.DATETIME, 264 "SMALLMONEY": TokenType.SMALLMONEY, 265 "SQL_VARIANT": TokenType.VARIANT, 266 "TIME": TokenType.TIMESTAMP, 267 "TOP": TokenType.TOP, 268 "UNIQUEIDENTIFIER": TokenType.UNIQUEIDENTIFIER, 269 "VARCHAR(MAX)": TokenType.TEXT, 270 "XML": TokenType.XML, 271 } 272 273 # TSQL allows @, # to appear as a variable/identifier prefix 274 SINGLE_TOKENS = tokens.Tokenizer.SINGLE_TOKENS.copy() 275 SINGLE_TOKENS.pop("@") 276 SINGLE_TOKENS.pop("#") 277 278 class Parser(parser.Parser): 279 FUNCTIONS = { 280 **parser.Parser.FUNCTIONS, # type: ignore 281 "CHARINDEX": lambda args: exp.StrPosition( 282 this=seq_get(args, 1), 283 substr=seq_get(args, 0), 284 position=seq_get(args, 2), 285 ), 286 "ISNULL": exp.Coalesce.from_arg_list, 287 "DATEADD": parse_date_delta(exp.DateAdd, unit_mapping=DATE_DELTA_INTERVAL), 288 "DATEDIFF": parse_date_delta(exp.DateDiff, unit_mapping=DATE_DELTA_INTERVAL), 289 "DATENAME": _format_time_lambda(exp.TimeToStr, full_format_mapping=True), 290 "DATEPART": _format_time_lambda(exp.TimeToStr), 291 "GETDATE": exp.CurrentTimestamp.from_arg_list, 292 "SYSDATETIME": exp.CurrentTimestamp.from_arg_list, 293 "IIF": exp.If.from_arg_list, 294 "LEN": exp.Length.from_arg_list, 295 "REPLICATE": exp.Repeat.from_arg_list, 296 "JSON_VALUE": exp.JSONExtractScalar.from_arg_list, 297 "FORMAT": _parse_format, 298 "EOMONTH": _parse_eomonth, 299 } 300 301 VAR_LENGTH_DATATYPES = { 302 DataType.Type.NVARCHAR, 303 DataType.Type.VARCHAR, 304 DataType.Type.CHAR, 305 DataType.Type.NCHAR, 306 } 307 308 RETURNS_TABLE_TOKENS = parser.Parser.ID_VAR_TOKENS - { # type: ignore 309 TokenType.TABLE, 310 *parser.Parser.TYPE_TOKENS, # type: ignore 311 } 312 313 STATEMENT_PARSERS = { 314 **parser.Parser.STATEMENT_PARSERS, # type: ignore 315 TokenType.END: lambda self: self._parse_command(), 316 } 317 318 def _parse_system_time(self) -> t.Optional[exp.Expression]: 319 if not self._match_text_seq("FOR", "SYSTEM_TIME"): 320 return None 321 322 if self._match_text_seq("AS", "OF"): 323 system_time = self.expression( 324 exp.SystemTime, this=self._parse_bitwise(), kind="AS OF" 325 ) 326 elif self._match_set((TokenType.FROM, TokenType.BETWEEN)): 327 kind = self._prev.text 328 this = self._parse_bitwise() 329 self._match_texts(("TO", "AND")) 330 expression = self._parse_bitwise() 331 system_time = self.expression( 332 exp.SystemTime, this=this, expression=expression, kind=kind 333 ) 334 elif self._match_text_seq("CONTAINED", "IN"): 335 args = self._parse_wrapped_csv(self._parse_bitwise) 336 system_time = self.expression( 337 exp.SystemTime, 338 this=seq_get(args, 0), 339 expression=seq_get(args, 1), 340 kind="CONTAINED IN", 341 ) 342 elif self._match(TokenType.ALL): 343 system_time = self.expression(exp.SystemTime, kind="ALL") 344 else: 345 system_time = None 346 self.raise_error("Unable to parse FOR SYSTEM_TIME clause") 347 348 return system_time 349 350 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 351 table = super()._parse_table_parts(schema=schema) 352 table.set("system_time", self._parse_system_time()) 353 return table 354 355 def _parse_returns(self) -> exp.Expression: 356 table = self._parse_id_var(any_token=False, tokens=self.RETURNS_TABLE_TOKENS) 357 returns = super()._parse_returns() 358 returns.set("table", table) 359 return returns 360 361 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 362 to = self._parse_types() 363 self._match(TokenType.COMMA) 364 this = self._parse_conjunction() 365 366 if not to or not this: 367 return None 368 369 # Retrieve length of datatype and override to default if not specified 370 if seq_get(to.expressions, 0) is None and to.this in self.VAR_LENGTH_DATATYPES: 371 to = exp.DataType.build(to.this, expressions=[exp.Literal.number(30)], nested=False) 372 373 # Check whether a conversion with format is applicable 374 if self._match(TokenType.COMMA): 375 format_val = self._parse_number() 376 format_val_name = format_val.name if format_val else "" 377 378 if format_val_name not in TSQL.convert_format_mapping: 379 raise ValueError( 380 f"CONVERT function at T-SQL does not support format style {format_val_name}" 381 ) 382 383 format_norm = exp.Literal.string(TSQL.convert_format_mapping[format_val_name]) 384 385 # Check whether the convert entails a string to date format 386 if to.this == DataType.Type.DATE: 387 return self.expression(exp.StrToDate, this=this, format=format_norm) 388 # Check whether the convert entails a string to datetime format 389 elif to.this == DataType.Type.DATETIME: 390 return self.expression(exp.StrToTime, this=this, format=format_norm) 391 # Check whether the convert entails a date to string format 392 elif to.this in self.VAR_LENGTH_DATATYPES: 393 return self.expression( 394 exp.Cast if strict else exp.TryCast, 395 to=to, 396 this=self.expression(exp.TimeToStr, this=this, format=format_norm), 397 ) 398 elif to.this == DataType.Type.TEXT: 399 return self.expression(exp.TimeToStr, this=this, format=format_norm) 400 401 # Entails a simple cast without any format requirement 402 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 403 404 def _parse_user_defined_function( 405 self, kind: t.Optional[TokenType] = None 406 ) -> t.Optional[exp.Expression]: 407 this = super()._parse_user_defined_function(kind=kind) 408 409 if ( 410 kind == TokenType.FUNCTION 411 or isinstance(this, exp.UserDefinedFunction) 412 or self._match(TokenType.ALIAS, advance=False) 413 ): 414 return this 415 416 expressions = self._parse_csv(self._parse_function_parameter) 417 return self.expression(exp.UserDefinedFunction, this=this, expressions=expressions) 418 419 class Generator(generator.Generator): 420 LOCKING_READS_SUPPORTED = True 421 422 TYPE_MAPPING = { 423 **generator.Generator.TYPE_MAPPING, # type: ignore 424 exp.DataType.Type.INT: "INTEGER", 425 exp.DataType.Type.DECIMAL: "NUMERIC", 426 exp.DataType.Type.DATETIME: "DATETIME2", 427 exp.DataType.Type.VARIANT: "SQL_VARIANT", 428 } 429 430 TRANSFORMS = { 431 **generator.Generator.TRANSFORMS, # type: ignore 432 exp.DateAdd: generate_date_delta_with_unit_sql, 433 exp.DateDiff: generate_date_delta_with_unit_sql, 434 exp.CurrentDate: rename_func("GETDATE"), 435 exp.CurrentTimestamp: rename_func("GETDATE"), 436 exp.If: rename_func("IIF"), 437 exp.NumberToStr: _format_sql, 438 exp.TimeToStr: _format_sql, 439 exp.GroupConcat: _string_agg_sql, 440 exp.Min: min_or_least, 441 } 442 443 TRANSFORMS.pop(exp.ReturnsProperty) 444 445 def systemtime_sql(self, expression: exp.SystemTime) -> str: 446 kind = expression.args["kind"] 447 if kind == "ALL": 448 return "FOR SYSTEM_TIME ALL" 449 450 start = self.sql(expression, "this") 451 if kind == "AS OF": 452 return f"FOR SYSTEM_TIME AS OF {start}" 453 454 end = self.sql(expression, "expression") 455 if kind == "FROM": 456 return f"FOR SYSTEM_TIME FROM {start} TO {end}" 457 if kind == "BETWEEN": 458 return f"FOR SYSTEM_TIME BETWEEN {start} AND {end}" 459 460 return f"FOR SYSTEM_TIME CONTAINED IN ({start}, {end})" 461 462 def returnsproperty_sql(self, expression: exp.ReturnsProperty) -> str: 463 table = expression.args.get("table") 464 table = f"{table} " if table else "" 465 return f"RETURNS {table}{self.sql(expression, 'this')}"
Inherited Members
245 class Tokenizer(tokens.Tokenizer): 246 IDENTIFIERS = ['"', ("[", "]")] 247 248 QUOTES = ["'", '"'] 249 250 KEYWORDS = { 251 **tokens.Tokenizer.KEYWORDS, 252 "DATETIME2": TokenType.DATETIME, 253 "DATETIMEOFFSET": TokenType.TIMESTAMPTZ, 254 "DECLARE": TokenType.COMMAND, 255 "IMAGE": TokenType.IMAGE, 256 "MONEY": TokenType.MONEY, 257 "NTEXT": TokenType.TEXT, 258 "NVARCHAR(MAX)": TokenType.TEXT, 259 "PRINT": TokenType.COMMAND, 260 "PROC": TokenType.PROCEDURE, 261 "REAL": TokenType.FLOAT, 262 "ROWVERSION": TokenType.ROWVERSION, 263 "SMALLDATETIME": TokenType.DATETIME, 264 "SMALLMONEY": TokenType.SMALLMONEY, 265 "SQL_VARIANT": TokenType.VARIANT, 266 "TIME": TokenType.TIMESTAMP, 267 "TOP": TokenType.TOP, 268 "UNIQUEIDENTIFIER": TokenType.UNIQUEIDENTIFIER, 269 "VARCHAR(MAX)": TokenType.TEXT, 270 "XML": TokenType.XML, 271 } 272 273 # TSQL allows @, # to appear as a variable/identifier prefix 274 SINGLE_TOKENS = tokens.Tokenizer.SINGLE_TOKENS.copy() 275 SINGLE_TOKENS.pop("@") 276 SINGLE_TOKENS.pop("#")
Inherited Members
278 class Parser(parser.Parser): 279 FUNCTIONS = { 280 **parser.Parser.FUNCTIONS, # type: ignore 281 "CHARINDEX": lambda args: exp.StrPosition( 282 this=seq_get(args, 1), 283 substr=seq_get(args, 0), 284 position=seq_get(args, 2), 285 ), 286 "ISNULL": exp.Coalesce.from_arg_list, 287 "DATEADD": parse_date_delta(exp.DateAdd, unit_mapping=DATE_DELTA_INTERVAL), 288 "DATEDIFF": parse_date_delta(exp.DateDiff, unit_mapping=DATE_DELTA_INTERVAL), 289 "DATENAME": _format_time_lambda(exp.TimeToStr, full_format_mapping=True), 290 "DATEPART": _format_time_lambda(exp.TimeToStr), 291 "GETDATE": exp.CurrentTimestamp.from_arg_list, 292 "SYSDATETIME": exp.CurrentTimestamp.from_arg_list, 293 "IIF": exp.If.from_arg_list, 294 "LEN": exp.Length.from_arg_list, 295 "REPLICATE": exp.Repeat.from_arg_list, 296 "JSON_VALUE": exp.JSONExtractScalar.from_arg_list, 297 "FORMAT": _parse_format, 298 "EOMONTH": _parse_eomonth, 299 } 300 301 VAR_LENGTH_DATATYPES = { 302 DataType.Type.NVARCHAR, 303 DataType.Type.VARCHAR, 304 DataType.Type.CHAR, 305 DataType.Type.NCHAR, 306 } 307 308 RETURNS_TABLE_TOKENS = parser.Parser.ID_VAR_TOKENS - { # type: ignore 309 TokenType.TABLE, 310 *parser.Parser.TYPE_TOKENS, # type: ignore 311 } 312 313 STATEMENT_PARSERS = { 314 **parser.Parser.STATEMENT_PARSERS, # type: ignore 315 TokenType.END: lambda self: self._parse_command(), 316 } 317 318 def _parse_system_time(self) -> t.Optional[exp.Expression]: 319 if not self._match_text_seq("FOR", "SYSTEM_TIME"): 320 return None 321 322 if self._match_text_seq("AS", "OF"): 323 system_time = self.expression( 324 exp.SystemTime, this=self._parse_bitwise(), kind="AS OF" 325 ) 326 elif self._match_set((TokenType.FROM, TokenType.BETWEEN)): 327 kind = self._prev.text 328 this = self._parse_bitwise() 329 self._match_texts(("TO", "AND")) 330 expression = self._parse_bitwise() 331 system_time = self.expression( 332 exp.SystemTime, this=this, expression=expression, kind=kind 333 ) 334 elif self._match_text_seq("CONTAINED", "IN"): 335 args = self._parse_wrapped_csv(self._parse_bitwise) 336 system_time = self.expression( 337 exp.SystemTime, 338 this=seq_get(args, 0), 339 expression=seq_get(args, 1), 340 kind="CONTAINED IN", 341 ) 342 elif self._match(TokenType.ALL): 343 system_time = self.expression(exp.SystemTime, kind="ALL") 344 else: 345 system_time = None 346 self.raise_error("Unable to parse FOR SYSTEM_TIME clause") 347 348 return system_time 349 350 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 351 table = super()._parse_table_parts(schema=schema) 352 table.set("system_time", self._parse_system_time()) 353 return table 354 355 def _parse_returns(self) -> exp.Expression: 356 table = self._parse_id_var(any_token=False, tokens=self.RETURNS_TABLE_TOKENS) 357 returns = super()._parse_returns() 358 returns.set("table", table) 359 return returns 360 361 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 362 to = self._parse_types() 363 self._match(TokenType.COMMA) 364 this = self._parse_conjunction() 365 366 if not to or not this: 367 return None 368 369 # Retrieve length of datatype and override to default if not specified 370 if seq_get(to.expressions, 0) is None and to.this in self.VAR_LENGTH_DATATYPES: 371 to = exp.DataType.build(to.this, expressions=[exp.Literal.number(30)], nested=False) 372 373 # Check whether a conversion with format is applicable 374 if self._match(TokenType.COMMA): 375 format_val = self._parse_number() 376 format_val_name = format_val.name if format_val else "" 377 378 if format_val_name not in TSQL.convert_format_mapping: 379 raise ValueError( 380 f"CONVERT function at T-SQL does not support format style {format_val_name}" 381 ) 382 383 format_norm = exp.Literal.string(TSQL.convert_format_mapping[format_val_name]) 384 385 # Check whether the convert entails a string to date format 386 if to.this == DataType.Type.DATE: 387 return self.expression(exp.StrToDate, this=this, format=format_norm) 388 # Check whether the convert entails a string to datetime format 389 elif to.this == DataType.Type.DATETIME: 390 return self.expression(exp.StrToTime, this=this, format=format_norm) 391 # Check whether the convert entails a date to string format 392 elif to.this in self.VAR_LENGTH_DATATYPES: 393 return self.expression( 394 exp.Cast if strict else exp.TryCast, 395 to=to, 396 this=self.expression(exp.TimeToStr, this=this, format=format_norm), 397 ) 398 elif to.this == DataType.Type.TEXT: 399 return self.expression(exp.TimeToStr, this=this, format=format_norm) 400 401 # Entails a simple cast without any format requirement 402 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 403 404 def _parse_user_defined_function( 405 self, kind: t.Optional[TokenType] = None 406 ) -> t.Optional[exp.Expression]: 407 this = super()._parse_user_defined_function(kind=kind) 408 409 if ( 410 kind == TokenType.FUNCTION 411 or isinstance(this, exp.UserDefinedFunction) 412 or self._match(TokenType.ALIAS, advance=False) 413 ): 414 return this 415 416 expressions = self._parse_csv(self._parse_function_parameter) 417 return self.expression(exp.UserDefinedFunction, this=this, expressions=expressions)
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
Inherited Members
419 class Generator(generator.Generator): 420 LOCKING_READS_SUPPORTED = True 421 422 TYPE_MAPPING = { 423 **generator.Generator.TYPE_MAPPING, # type: ignore 424 exp.DataType.Type.INT: "INTEGER", 425 exp.DataType.Type.DECIMAL: "NUMERIC", 426 exp.DataType.Type.DATETIME: "DATETIME2", 427 exp.DataType.Type.VARIANT: "SQL_VARIANT", 428 } 429 430 TRANSFORMS = { 431 **generator.Generator.TRANSFORMS, # type: ignore 432 exp.DateAdd: generate_date_delta_with_unit_sql, 433 exp.DateDiff: generate_date_delta_with_unit_sql, 434 exp.CurrentDate: rename_func("GETDATE"), 435 exp.CurrentTimestamp: rename_func("GETDATE"), 436 exp.If: rename_func("IIF"), 437 exp.NumberToStr: _format_sql, 438 exp.TimeToStr: _format_sql, 439 exp.GroupConcat: _string_agg_sql, 440 exp.Min: min_or_least, 441 } 442 443 TRANSFORMS.pop(exp.ReturnsProperty) 444 445 def systemtime_sql(self, expression: exp.SystemTime) -> str: 446 kind = expression.args["kind"] 447 if kind == "ALL": 448 return "FOR SYSTEM_TIME ALL" 449 450 start = self.sql(expression, "this") 451 if kind == "AS OF": 452 return f"FOR SYSTEM_TIME AS OF {start}" 453 454 end = self.sql(expression, "expression") 455 if kind == "FROM": 456 return f"FOR SYSTEM_TIME FROM {start} TO {end}" 457 if kind == "BETWEEN": 458 return f"FOR SYSTEM_TIME BETWEEN {start} AND {end}" 459 460 return f"FOR SYSTEM_TIME CONTAINED IN ({start}, {end})" 461 462 def returnsproperty_sql(self, expression: exp.ReturnsProperty) -> str: 463 table = expression.args.get("table") 464 table = f"{table} " if table else "" 465 return f"RETURNS {table}{self.sql(expression, 'this')}"
Generator interprets the given syntax tree and produces a SQL string as an output.
Arguments:
- time_mapping (dict): the dictionary of custom time mappings in which the key represents a python time format and the output the target time format
- time_trie (trie): a trie of the time_mapping keys
- pretty (bool): if set to True the returned string will be formatted. Default: False.
- quote_start (str): specifies which starting character to use to delimit quotes. Default: '.
- quote_end (str): specifies which ending character to use to delimit quotes. Default: '.
- identifier_start (str): specifies which starting character to use to delimit identifiers. Default: ".
- identifier_end (str): specifies which ending character to use to delimit identifiers. Default: ".
- identify (bool): if set to True all identifiers will be delimited by the corresponding character.
- normalize (bool): if set to True all identifiers will lower cased
- string_escape (str): specifies a string escape character. Default: '.
- identifier_escape (str): specifies an identifier escape character. Default: ".
- pad (int): determines padding in a formatted string. Default: 2.
- indent (int): determines the size of indentation in a formatted string. Default: 4.
- unnest_column_only (bool): if true unnest table aliases are considered only as column aliases
- normalize_functions (str): normalize function names, "upper", "lower", or None Default: "upper"
- alias_post_tablesample (bool): if the table alias comes after tablesample Default: False
- unsupported_level (ErrorLevel): determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- null_ordering (str): Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
- max_unsupported (int): Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma (bool): if the the comma is leading or trailing in select statements Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
445 def systemtime_sql(self, expression: exp.SystemTime) -> str: 446 kind = expression.args["kind"] 447 if kind == "ALL": 448 return "FOR SYSTEM_TIME ALL" 449 450 start = self.sql(expression, "this") 451 if kind == "AS OF": 452 return f"FOR SYSTEM_TIME AS OF {start}" 453 454 end = self.sql(expression, "expression") 455 if kind == "FROM": 456 return f"FOR SYSTEM_TIME FROM {start} TO {end}" 457 if kind == "BETWEEN": 458 return f"FOR SYSTEM_TIME BETWEEN {start} AND {end}" 459 460 return f"FOR SYSTEM_TIME CONTAINED IN ({start}, {end})"
Inherited Members
- sqlglot.generator.Generator
- Generator
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columndef_sql
- columnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- afterjournalproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- pseudotype_sql
- returning_sql
- rowformatdelimitedproperty_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- lock_sql
- literal_sql
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- select_sql
- schema_sql
- star_sql
- structkwarg_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- window_spec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- extract_sql
- trim_sql
- concat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- unique_sql
- if_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- div_sql
- floatdiv_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- is_sql
- like_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql