sqlglot.dialects.tsql
1from __future__ import annotations 2 3import re 4import typing as t 5 6from sqlglot import exp, generator, parser, tokens 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 min_or_least, 10 parse_date_delta, 11 rename_func, 12) 13from sqlglot.expressions import DataType 14from sqlglot.helper import seq_get 15from sqlglot.time import format_time 16from sqlglot.tokens import TokenType 17 18FULL_FORMAT_TIME_MAPPING = { 19 "weekday": "%A", 20 "dw": "%A", 21 "w": "%A", 22 "month": "%B", 23 "mm": "%B", 24 "m": "%B", 25} 26 27DATE_DELTA_INTERVAL = { 28 "year": "year", 29 "yyyy": "year", 30 "yy": "year", 31 "quarter": "quarter", 32 "qq": "quarter", 33 "q": "quarter", 34 "month": "month", 35 "mm": "month", 36 "m": "month", 37 "week": "week", 38 "ww": "week", 39 "wk": "week", 40 "day": "day", 41 "dd": "day", 42 "d": "day", 43} 44 45 46DATE_FMT_RE = re.compile("([dD]{1,2})|([mM]{1,2})|([yY]{1,4})|([hH]{1,2})|([sS]{1,2})") 47 48# N = Numeric, C=Currency 49TRANSPILE_SAFE_NUMBER_FMT = {"N", "C"} 50 51 52def _format_time_lambda(exp_class, full_format_mapping=None, default=None): 53 def _format_time(args): 54 return exp_class( 55 this=seq_get(args, 1), 56 format=exp.Literal.string( 57 format_time( 58 seq_get(args, 0).name or (TSQL.time_format if default is True else default), 59 {**TSQL.time_mapping, **FULL_FORMAT_TIME_MAPPING} 60 if full_format_mapping 61 else TSQL.time_mapping, 62 ) 63 ), 64 ) 65 66 return _format_time 67 68 69def _parse_format(args): 70 fmt = seq_get(args, 1) 71 number_fmt = fmt.name in TRANSPILE_SAFE_NUMBER_FMT or not DATE_FMT_RE.search(fmt.this) 72 if number_fmt: 73 return exp.NumberToStr(this=seq_get(args, 0), format=fmt) 74 return exp.TimeToStr( 75 this=seq_get(args, 0), 76 format=exp.Literal.string( 77 format_time(fmt.name, TSQL.format_time_mapping) 78 if len(fmt.name) == 1 79 else format_time(fmt.name, TSQL.time_mapping) 80 ), 81 ) 82 83 84def _parse_eomonth(args): 85 date = seq_get(args, 0) 86 month_lag = seq_get(args, 1) 87 unit = DATE_DELTA_INTERVAL.get("month") 88 89 if month_lag is None: 90 return exp.LastDateOfMonth(this=date) 91 92 # Remove month lag argument in parser as its compared with the number of arguments of the resulting class 93 args.remove(month_lag) 94 95 return exp.LastDateOfMonth(this=exp.DateAdd(this=date, expression=month_lag, unit=unit)) 96 97 98def generate_date_delta_with_unit_sql(self, e): 99 func = "DATEADD" if isinstance(e, exp.DateAdd) else "DATEDIFF" 100 return self.func(func, e.text("unit"), e.expression, e.this) 101 102 103def _format_sql(self, e): 104 fmt = ( 105 e.args["format"] 106 if isinstance(e, exp.NumberToStr) 107 else exp.Literal.string(format_time(e.text("format"), TSQL.inverse_time_mapping)) 108 ) 109 return self.func("FORMAT", e.this, fmt) 110 111 112def _string_agg_sql(self, e): 113 e = e.copy() 114 115 this = e.this 116 distinct = e.find(exp.Distinct) 117 if distinct: 118 # exp.Distinct can appear below an exp.Order or an exp.GroupConcat expression 119 self.unsupported("T-SQL STRING_AGG doesn't support DISTINCT.") 120 this = distinct.expressions[0] 121 distinct.pop() 122 123 order = "" 124 if isinstance(e.this, exp.Order): 125 if e.this.this: 126 this = e.this.this 127 e.this.this.pop() 128 order = f" WITHIN GROUP ({self.sql(e.this)[1:]})" # Order has a leading space 129 130 separator = e.args.get("separator") or exp.Literal.string(",") 131 return f"STRING_AGG({self.format_args(this, separator)}){order}" 132 133 134class TSQL(Dialect): 135 null_ordering = "nulls_are_small" 136 time_format = "'yyyy-mm-dd hh:mm:ss'" 137 138 time_mapping = { 139 "year": "%Y", 140 "qq": "%q", 141 "q": "%q", 142 "quarter": "%q", 143 "dayofyear": "%j", 144 "day": "%d", 145 "dy": "%d", 146 "y": "%Y", 147 "week": "%W", 148 "ww": "%W", 149 "wk": "%W", 150 "hour": "%h", 151 "hh": "%I", 152 "minute": "%M", 153 "mi": "%M", 154 "n": "%M", 155 "second": "%S", 156 "ss": "%S", 157 "s": "%-S", 158 "millisecond": "%f", 159 "ms": "%f", 160 "weekday": "%W", 161 "dw": "%W", 162 "month": "%m", 163 "mm": "%M", 164 "m": "%-M", 165 "Y": "%Y", 166 "YYYY": "%Y", 167 "YY": "%y", 168 "MMMM": "%B", 169 "MMM": "%b", 170 "MM": "%m", 171 "M": "%-m", 172 "dd": "%d", 173 "d": "%-d", 174 "HH": "%H", 175 "H": "%-H", 176 "h": "%-I", 177 "S": "%f", 178 "yyyy": "%Y", 179 "yy": "%y", 180 } 181 182 convert_format_mapping = { 183 "0": "%b %d %Y %-I:%M%p", 184 "1": "%m/%d/%y", 185 "2": "%y.%m.%d", 186 "3": "%d/%m/%y", 187 "4": "%d.%m.%y", 188 "5": "%d-%m-%y", 189 "6": "%d %b %y", 190 "7": "%b %d, %y", 191 "8": "%H:%M:%S", 192 "9": "%b %d %Y %-I:%M:%S:%f%p", 193 "10": "mm-dd-yy", 194 "11": "yy/mm/dd", 195 "12": "yymmdd", 196 "13": "%d %b %Y %H:%M:ss:%f", 197 "14": "%H:%M:%S:%f", 198 "20": "%Y-%m-%d %H:%M:%S", 199 "21": "%Y-%m-%d %H:%M:%S.%f", 200 "22": "%m/%d/%y %-I:%M:%S %p", 201 "23": "%Y-%m-%d", 202 "24": "%H:%M:%S", 203 "25": "%Y-%m-%d %H:%M:%S.%f", 204 "100": "%b %d %Y %-I:%M%p", 205 "101": "%m/%d/%Y", 206 "102": "%Y.%m.%d", 207 "103": "%d/%m/%Y", 208 "104": "%d.%m.%Y", 209 "105": "%d-%m-%Y", 210 "106": "%d %b %Y", 211 "107": "%b %d, %Y", 212 "108": "%H:%M:%S", 213 "109": "%b %d %Y %-I:%M:%S:%f%p", 214 "110": "%m-%d-%Y", 215 "111": "%Y/%m/%d", 216 "112": "%Y%m%d", 217 "113": "%d %b %Y %H:%M:%S:%f", 218 "114": "%H:%M:%S:%f", 219 "120": "%Y-%m-%d %H:%M:%S", 220 "121": "%Y-%m-%d %H:%M:%S.%f", 221 } 222 # not sure if complete 223 format_time_mapping = { 224 "y": "%B %Y", 225 "d": "%m/%d/%Y", 226 "H": "%-H", 227 "h": "%-I", 228 "s": "%Y-%m-%d %H:%M:%S", 229 "D": "%A,%B,%Y", 230 "f": "%A,%B,%Y %-I:%M %p", 231 "F": "%A,%B,%Y %-I:%M:%S %p", 232 "g": "%m/%d/%Y %-I:%M %p", 233 "G": "%m/%d/%Y %-I:%M:%S %p", 234 "M": "%B %-d", 235 "m": "%B %-d", 236 "O": "%Y-%m-%dT%H:%M:%S", 237 "u": "%Y-%M-%D %H:%M:%S%z", 238 "U": "%A, %B %D, %Y %H:%M:%S%z", 239 "T": "%-I:%M:%S %p", 240 "t": "%-I:%M", 241 "Y": "%a %Y", 242 } 243 244 class Tokenizer(tokens.Tokenizer): 245 IDENTIFIERS = ['"', ("[", "]")] 246 247 QUOTES = ["'", '"'] 248 249 KEYWORDS = { 250 **tokens.Tokenizer.KEYWORDS, 251 "BIT": TokenType.BOOLEAN, 252 "DATETIME2": TokenType.DATETIME, 253 "DATETIMEOFFSET": TokenType.TIMESTAMPTZ, 254 "DECLARE": TokenType.COMMAND, 255 "IMAGE": TokenType.IMAGE, 256 "MONEY": TokenType.MONEY, 257 "NTEXT": TokenType.TEXT, 258 "NVARCHAR(MAX)": TokenType.TEXT, 259 "PRINT": TokenType.COMMAND, 260 "PROC": TokenType.PROCEDURE, 261 "REAL": TokenType.FLOAT, 262 "ROWVERSION": TokenType.ROWVERSION, 263 "SMALLDATETIME": TokenType.DATETIME, 264 "SMALLMONEY": TokenType.SMALLMONEY, 265 "SQL_VARIANT": TokenType.VARIANT, 266 "TIME": TokenType.TIMESTAMP, 267 "TOP": TokenType.TOP, 268 "UNIQUEIDENTIFIER": TokenType.UNIQUEIDENTIFIER, 269 "VARCHAR(MAX)": TokenType.TEXT, 270 "XML": TokenType.XML, 271 } 272 273 # TSQL allows @, # to appear as a variable/identifier prefix 274 SINGLE_TOKENS = tokens.Tokenizer.SINGLE_TOKENS.copy() 275 SINGLE_TOKENS.pop("@") 276 SINGLE_TOKENS.pop("#") 277 278 class Parser(parser.Parser): 279 FUNCTIONS = { 280 **parser.Parser.FUNCTIONS, # type: ignore 281 "CHARINDEX": lambda args: exp.StrPosition( 282 this=seq_get(args, 1), 283 substr=seq_get(args, 0), 284 position=seq_get(args, 2), 285 ), 286 "ISNULL": exp.Coalesce.from_arg_list, 287 "DATEADD": parse_date_delta(exp.DateAdd, unit_mapping=DATE_DELTA_INTERVAL), 288 "DATEDIFF": parse_date_delta(exp.DateDiff, unit_mapping=DATE_DELTA_INTERVAL), 289 "DATENAME": _format_time_lambda(exp.TimeToStr, full_format_mapping=True), 290 "DATEPART": _format_time_lambda(exp.TimeToStr), 291 "GETDATE": exp.CurrentTimestamp.from_arg_list, 292 "SYSDATETIME": exp.CurrentTimestamp.from_arg_list, 293 "IIF": exp.If.from_arg_list, 294 "LEN": exp.Length.from_arg_list, 295 "REPLICATE": exp.Repeat.from_arg_list, 296 "JSON_VALUE": exp.JSONExtractScalar.from_arg_list, 297 "FORMAT": _parse_format, 298 "EOMONTH": _parse_eomonth, 299 } 300 301 VAR_LENGTH_DATATYPES = { 302 DataType.Type.NVARCHAR, 303 DataType.Type.VARCHAR, 304 DataType.Type.CHAR, 305 DataType.Type.NCHAR, 306 } 307 308 RETURNS_TABLE_TOKENS = parser.Parser.ID_VAR_TOKENS - { # type: ignore 309 TokenType.TABLE, 310 *parser.Parser.TYPE_TOKENS, # type: ignore 311 } 312 313 STATEMENT_PARSERS = { 314 **parser.Parser.STATEMENT_PARSERS, # type: ignore 315 TokenType.END: lambda self: self._parse_command(), 316 } 317 318 def _parse_system_time(self) -> t.Optional[exp.Expression]: 319 if not self._match_text_seq("FOR", "SYSTEM_TIME"): 320 return None 321 322 if self._match_text_seq("AS", "OF"): 323 system_time = self.expression( 324 exp.SystemTime, this=self._parse_bitwise(), kind="AS OF" 325 ) 326 elif self._match_set((TokenType.FROM, TokenType.BETWEEN)): 327 kind = self._prev.text 328 this = self._parse_bitwise() 329 self._match_texts(("TO", "AND")) 330 expression = self._parse_bitwise() 331 system_time = self.expression( 332 exp.SystemTime, this=this, expression=expression, kind=kind 333 ) 334 elif self._match_text_seq("CONTAINED", "IN"): 335 args = self._parse_wrapped_csv(self._parse_bitwise) 336 system_time = self.expression( 337 exp.SystemTime, 338 this=seq_get(args, 0), 339 expression=seq_get(args, 1), 340 kind="CONTAINED IN", 341 ) 342 elif self._match(TokenType.ALL): 343 system_time = self.expression(exp.SystemTime, kind="ALL") 344 else: 345 system_time = None 346 self.raise_error("Unable to parse FOR SYSTEM_TIME clause") 347 348 return system_time 349 350 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 351 table = super()._parse_table_parts(schema=schema) 352 table.set("system_time", self._parse_system_time()) 353 return table 354 355 def _parse_returns(self) -> exp.Expression: 356 table = self._parse_id_var(any_token=False, tokens=self.RETURNS_TABLE_TOKENS) 357 returns = super()._parse_returns() 358 returns.set("table", table) 359 return returns 360 361 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 362 to = self._parse_types() 363 self._match(TokenType.COMMA) 364 this = self._parse_conjunction() 365 366 if not to or not this: 367 return None 368 369 # Retrieve length of datatype and override to default if not specified 370 if seq_get(to.expressions, 0) is None and to.this in self.VAR_LENGTH_DATATYPES: 371 to = exp.DataType.build(to.this, expressions=[exp.Literal.number(30)], nested=False) 372 373 # Check whether a conversion with format is applicable 374 if self._match(TokenType.COMMA): 375 format_val = self._parse_number() 376 format_val_name = format_val.name if format_val else "" 377 378 if format_val_name not in TSQL.convert_format_mapping: 379 raise ValueError( 380 f"CONVERT function at T-SQL does not support format style {format_val_name}" 381 ) 382 383 format_norm = exp.Literal.string(TSQL.convert_format_mapping[format_val_name]) 384 385 # Check whether the convert entails a string to date format 386 if to.this == DataType.Type.DATE: 387 return self.expression(exp.StrToDate, this=this, format=format_norm) 388 # Check whether the convert entails a string to datetime format 389 elif to.this == DataType.Type.DATETIME: 390 return self.expression(exp.StrToTime, this=this, format=format_norm) 391 # Check whether the convert entails a date to string format 392 elif to.this in self.VAR_LENGTH_DATATYPES: 393 return self.expression( 394 exp.Cast if strict else exp.TryCast, 395 to=to, 396 this=self.expression(exp.TimeToStr, this=this, format=format_norm), 397 ) 398 elif to.this == DataType.Type.TEXT: 399 return self.expression(exp.TimeToStr, this=this, format=format_norm) 400 401 # Entails a simple cast without any format requirement 402 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 403 404 def _parse_user_defined_function( 405 self, kind: t.Optional[TokenType] = None 406 ) -> t.Optional[exp.Expression]: 407 this = super()._parse_user_defined_function(kind=kind) 408 409 if ( 410 kind == TokenType.FUNCTION 411 or isinstance(this, exp.UserDefinedFunction) 412 or self._match(TokenType.ALIAS, advance=False) 413 ): 414 return this 415 416 expressions = self._parse_csv(self._parse_function_parameter) 417 return self.expression(exp.UserDefinedFunction, this=this, expressions=expressions) 418 419 class Generator(generator.Generator): 420 LOCKING_READS_SUPPORTED = True 421 422 TYPE_MAPPING = { 423 **generator.Generator.TYPE_MAPPING, # type: ignore 424 exp.DataType.Type.BOOLEAN: "BIT", 425 exp.DataType.Type.INT: "INTEGER", 426 exp.DataType.Type.DECIMAL: "NUMERIC", 427 exp.DataType.Type.DATETIME: "DATETIME2", 428 exp.DataType.Type.VARIANT: "SQL_VARIANT", 429 } 430 431 TRANSFORMS = { 432 **generator.Generator.TRANSFORMS, # type: ignore 433 exp.DateAdd: generate_date_delta_with_unit_sql, 434 exp.DateDiff: generate_date_delta_with_unit_sql, 435 exp.CurrentDate: rename_func("GETDATE"), 436 exp.CurrentTimestamp: rename_func("GETDATE"), 437 exp.If: rename_func("IIF"), 438 exp.NumberToStr: _format_sql, 439 exp.TimeToStr: _format_sql, 440 exp.GroupConcat: _string_agg_sql, 441 exp.Min: min_or_least, 442 } 443 444 TRANSFORMS.pop(exp.ReturnsProperty) 445 446 def systemtime_sql(self, expression: exp.SystemTime) -> str: 447 kind = expression.args["kind"] 448 if kind == "ALL": 449 return "FOR SYSTEM_TIME ALL" 450 451 start = self.sql(expression, "this") 452 if kind == "AS OF": 453 return f"FOR SYSTEM_TIME AS OF {start}" 454 455 end = self.sql(expression, "expression") 456 if kind == "FROM": 457 return f"FOR SYSTEM_TIME FROM {start} TO {end}" 458 if kind == "BETWEEN": 459 return f"FOR SYSTEM_TIME BETWEEN {start} AND {end}" 460 461 return f"FOR SYSTEM_TIME CONTAINED IN ({start}, {end})" 462 463 def returnsproperty_sql(self, expression: exp.ReturnsProperty) -> str: 464 table = expression.args.get("table") 465 table = f"{table} " if table else "" 466 return f"RETURNS {table}{self.sql(expression, 'this')}"
def
generate_date_delta_with_unit_sql(self, e):
135class TSQL(Dialect): 136 null_ordering = "nulls_are_small" 137 time_format = "'yyyy-mm-dd hh:mm:ss'" 138 139 time_mapping = { 140 "year": "%Y", 141 "qq": "%q", 142 "q": "%q", 143 "quarter": "%q", 144 "dayofyear": "%j", 145 "day": "%d", 146 "dy": "%d", 147 "y": "%Y", 148 "week": "%W", 149 "ww": "%W", 150 "wk": "%W", 151 "hour": "%h", 152 "hh": "%I", 153 "minute": "%M", 154 "mi": "%M", 155 "n": "%M", 156 "second": "%S", 157 "ss": "%S", 158 "s": "%-S", 159 "millisecond": "%f", 160 "ms": "%f", 161 "weekday": "%W", 162 "dw": "%W", 163 "month": "%m", 164 "mm": "%M", 165 "m": "%-M", 166 "Y": "%Y", 167 "YYYY": "%Y", 168 "YY": "%y", 169 "MMMM": "%B", 170 "MMM": "%b", 171 "MM": "%m", 172 "M": "%-m", 173 "dd": "%d", 174 "d": "%-d", 175 "HH": "%H", 176 "H": "%-H", 177 "h": "%-I", 178 "S": "%f", 179 "yyyy": "%Y", 180 "yy": "%y", 181 } 182 183 convert_format_mapping = { 184 "0": "%b %d %Y %-I:%M%p", 185 "1": "%m/%d/%y", 186 "2": "%y.%m.%d", 187 "3": "%d/%m/%y", 188 "4": "%d.%m.%y", 189 "5": "%d-%m-%y", 190 "6": "%d %b %y", 191 "7": "%b %d, %y", 192 "8": "%H:%M:%S", 193 "9": "%b %d %Y %-I:%M:%S:%f%p", 194 "10": "mm-dd-yy", 195 "11": "yy/mm/dd", 196 "12": "yymmdd", 197 "13": "%d %b %Y %H:%M:ss:%f", 198 "14": "%H:%M:%S:%f", 199 "20": "%Y-%m-%d %H:%M:%S", 200 "21": "%Y-%m-%d %H:%M:%S.%f", 201 "22": "%m/%d/%y %-I:%M:%S %p", 202 "23": "%Y-%m-%d", 203 "24": "%H:%M:%S", 204 "25": "%Y-%m-%d %H:%M:%S.%f", 205 "100": "%b %d %Y %-I:%M%p", 206 "101": "%m/%d/%Y", 207 "102": "%Y.%m.%d", 208 "103": "%d/%m/%Y", 209 "104": "%d.%m.%Y", 210 "105": "%d-%m-%Y", 211 "106": "%d %b %Y", 212 "107": "%b %d, %Y", 213 "108": "%H:%M:%S", 214 "109": "%b %d %Y %-I:%M:%S:%f%p", 215 "110": "%m-%d-%Y", 216 "111": "%Y/%m/%d", 217 "112": "%Y%m%d", 218 "113": "%d %b %Y %H:%M:%S:%f", 219 "114": "%H:%M:%S:%f", 220 "120": "%Y-%m-%d %H:%M:%S", 221 "121": "%Y-%m-%d %H:%M:%S.%f", 222 } 223 # not sure if complete 224 format_time_mapping = { 225 "y": "%B %Y", 226 "d": "%m/%d/%Y", 227 "H": "%-H", 228 "h": "%-I", 229 "s": "%Y-%m-%d %H:%M:%S", 230 "D": "%A,%B,%Y", 231 "f": "%A,%B,%Y %-I:%M %p", 232 "F": "%A,%B,%Y %-I:%M:%S %p", 233 "g": "%m/%d/%Y %-I:%M %p", 234 "G": "%m/%d/%Y %-I:%M:%S %p", 235 "M": "%B %-d", 236 "m": "%B %-d", 237 "O": "%Y-%m-%dT%H:%M:%S", 238 "u": "%Y-%M-%D %H:%M:%S%z", 239 "U": "%A, %B %D, %Y %H:%M:%S%z", 240 "T": "%-I:%M:%S %p", 241 "t": "%-I:%M", 242 "Y": "%a %Y", 243 } 244 245 class Tokenizer(tokens.Tokenizer): 246 IDENTIFIERS = ['"', ("[", "]")] 247 248 QUOTES = ["'", '"'] 249 250 KEYWORDS = { 251 **tokens.Tokenizer.KEYWORDS, 252 "BIT": TokenType.BOOLEAN, 253 "DATETIME2": TokenType.DATETIME, 254 "DATETIMEOFFSET": TokenType.TIMESTAMPTZ, 255 "DECLARE": TokenType.COMMAND, 256 "IMAGE": TokenType.IMAGE, 257 "MONEY": TokenType.MONEY, 258 "NTEXT": TokenType.TEXT, 259 "NVARCHAR(MAX)": TokenType.TEXT, 260 "PRINT": TokenType.COMMAND, 261 "PROC": TokenType.PROCEDURE, 262 "REAL": TokenType.FLOAT, 263 "ROWVERSION": TokenType.ROWVERSION, 264 "SMALLDATETIME": TokenType.DATETIME, 265 "SMALLMONEY": TokenType.SMALLMONEY, 266 "SQL_VARIANT": TokenType.VARIANT, 267 "TIME": TokenType.TIMESTAMP, 268 "TOP": TokenType.TOP, 269 "UNIQUEIDENTIFIER": TokenType.UNIQUEIDENTIFIER, 270 "VARCHAR(MAX)": TokenType.TEXT, 271 "XML": TokenType.XML, 272 } 273 274 # TSQL allows @, # to appear as a variable/identifier prefix 275 SINGLE_TOKENS = tokens.Tokenizer.SINGLE_TOKENS.copy() 276 SINGLE_TOKENS.pop("@") 277 SINGLE_TOKENS.pop("#") 278 279 class Parser(parser.Parser): 280 FUNCTIONS = { 281 **parser.Parser.FUNCTIONS, # type: ignore 282 "CHARINDEX": lambda args: exp.StrPosition( 283 this=seq_get(args, 1), 284 substr=seq_get(args, 0), 285 position=seq_get(args, 2), 286 ), 287 "ISNULL": exp.Coalesce.from_arg_list, 288 "DATEADD": parse_date_delta(exp.DateAdd, unit_mapping=DATE_DELTA_INTERVAL), 289 "DATEDIFF": parse_date_delta(exp.DateDiff, unit_mapping=DATE_DELTA_INTERVAL), 290 "DATENAME": _format_time_lambda(exp.TimeToStr, full_format_mapping=True), 291 "DATEPART": _format_time_lambda(exp.TimeToStr), 292 "GETDATE": exp.CurrentTimestamp.from_arg_list, 293 "SYSDATETIME": exp.CurrentTimestamp.from_arg_list, 294 "IIF": exp.If.from_arg_list, 295 "LEN": exp.Length.from_arg_list, 296 "REPLICATE": exp.Repeat.from_arg_list, 297 "JSON_VALUE": exp.JSONExtractScalar.from_arg_list, 298 "FORMAT": _parse_format, 299 "EOMONTH": _parse_eomonth, 300 } 301 302 VAR_LENGTH_DATATYPES = { 303 DataType.Type.NVARCHAR, 304 DataType.Type.VARCHAR, 305 DataType.Type.CHAR, 306 DataType.Type.NCHAR, 307 } 308 309 RETURNS_TABLE_TOKENS = parser.Parser.ID_VAR_TOKENS - { # type: ignore 310 TokenType.TABLE, 311 *parser.Parser.TYPE_TOKENS, # type: ignore 312 } 313 314 STATEMENT_PARSERS = { 315 **parser.Parser.STATEMENT_PARSERS, # type: ignore 316 TokenType.END: lambda self: self._parse_command(), 317 } 318 319 def _parse_system_time(self) -> t.Optional[exp.Expression]: 320 if not self._match_text_seq("FOR", "SYSTEM_TIME"): 321 return None 322 323 if self._match_text_seq("AS", "OF"): 324 system_time = self.expression( 325 exp.SystemTime, this=self._parse_bitwise(), kind="AS OF" 326 ) 327 elif self._match_set((TokenType.FROM, TokenType.BETWEEN)): 328 kind = self._prev.text 329 this = self._parse_bitwise() 330 self._match_texts(("TO", "AND")) 331 expression = self._parse_bitwise() 332 system_time = self.expression( 333 exp.SystemTime, this=this, expression=expression, kind=kind 334 ) 335 elif self._match_text_seq("CONTAINED", "IN"): 336 args = self._parse_wrapped_csv(self._parse_bitwise) 337 system_time = self.expression( 338 exp.SystemTime, 339 this=seq_get(args, 0), 340 expression=seq_get(args, 1), 341 kind="CONTAINED IN", 342 ) 343 elif self._match(TokenType.ALL): 344 system_time = self.expression(exp.SystemTime, kind="ALL") 345 else: 346 system_time = None 347 self.raise_error("Unable to parse FOR SYSTEM_TIME clause") 348 349 return system_time 350 351 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 352 table = super()._parse_table_parts(schema=schema) 353 table.set("system_time", self._parse_system_time()) 354 return table 355 356 def _parse_returns(self) -> exp.Expression: 357 table = self._parse_id_var(any_token=False, tokens=self.RETURNS_TABLE_TOKENS) 358 returns = super()._parse_returns() 359 returns.set("table", table) 360 return returns 361 362 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 363 to = self._parse_types() 364 self._match(TokenType.COMMA) 365 this = self._parse_conjunction() 366 367 if not to or not this: 368 return None 369 370 # Retrieve length of datatype and override to default if not specified 371 if seq_get(to.expressions, 0) is None and to.this in self.VAR_LENGTH_DATATYPES: 372 to = exp.DataType.build(to.this, expressions=[exp.Literal.number(30)], nested=False) 373 374 # Check whether a conversion with format is applicable 375 if self._match(TokenType.COMMA): 376 format_val = self._parse_number() 377 format_val_name = format_val.name if format_val else "" 378 379 if format_val_name not in TSQL.convert_format_mapping: 380 raise ValueError( 381 f"CONVERT function at T-SQL does not support format style {format_val_name}" 382 ) 383 384 format_norm = exp.Literal.string(TSQL.convert_format_mapping[format_val_name]) 385 386 # Check whether the convert entails a string to date format 387 if to.this == DataType.Type.DATE: 388 return self.expression(exp.StrToDate, this=this, format=format_norm) 389 # Check whether the convert entails a string to datetime format 390 elif to.this == DataType.Type.DATETIME: 391 return self.expression(exp.StrToTime, this=this, format=format_norm) 392 # Check whether the convert entails a date to string format 393 elif to.this in self.VAR_LENGTH_DATATYPES: 394 return self.expression( 395 exp.Cast if strict else exp.TryCast, 396 to=to, 397 this=self.expression(exp.TimeToStr, this=this, format=format_norm), 398 ) 399 elif to.this == DataType.Type.TEXT: 400 return self.expression(exp.TimeToStr, this=this, format=format_norm) 401 402 # Entails a simple cast without any format requirement 403 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 404 405 def _parse_user_defined_function( 406 self, kind: t.Optional[TokenType] = None 407 ) -> t.Optional[exp.Expression]: 408 this = super()._parse_user_defined_function(kind=kind) 409 410 if ( 411 kind == TokenType.FUNCTION 412 or isinstance(this, exp.UserDefinedFunction) 413 or self._match(TokenType.ALIAS, advance=False) 414 ): 415 return this 416 417 expressions = self._parse_csv(self._parse_function_parameter) 418 return self.expression(exp.UserDefinedFunction, this=this, expressions=expressions) 419 420 class Generator(generator.Generator): 421 LOCKING_READS_SUPPORTED = True 422 423 TYPE_MAPPING = { 424 **generator.Generator.TYPE_MAPPING, # type: ignore 425 exp.DataType.Type.BOOLEAN: "BIT", 426 exp.DataType.Type.INT: "INTEGER", 427 exp.DataType.Type.DECIMAL: "NUMERIC", 428 exp.DataType.Type.DATETIME: "DATETIME2", 429 exp.DataType.Type.VARIANT: "SQL_VARIANT", 430 } 431 432 TRANSFORMS = { 433 **generator.Generator.TRANSFORMS, # type: ignore 434 exp.DateAdd: generate_date_delta_with_unit_sql, 435 exp.DateDiff: generate_date_delta_with_unit_sql, 436 exp.CurrentDate: rename_func("GETDATE"), 437 exp.CurrentTimestamp: rename_func("GETDATE"), 438 exp.If: rename_func("IIF"), 439 exp.NumberToStr: _format_sql, 440 exp.TimeToStr: _format_sql, 441 exp.GroupConcat: _string_agg_sql, 442 exp.Min: min_or_least, 443 } 444 445 TRANSFORMS.pop(exp.ReturnsProperty) 446 447 def systemtime_sql(self, expression: exp.SystemTime) -> str: 448 kind = expression.args["kind"] 449 if kind == "ALL": 450 return "FOR SYSTEM_TIME ALL" 451 452 start = self.sql(expression, "this") 453 if kind == "AS OF": 454 return f"FOR SYSTEM_TIME AS OF {start}" 455 456 end = self.sql(expression, "expression") 457 if kind == "FROM": 458 return f"FOR SYSTEM_TIME FROM {start} TO {end}" 459 if kind == "BETWEEN": 460 return f"FOR SYSTEM_TIME BETWEEN {start} AND {end}" 461 462 return f"FOR SYSTEM_TIME CONTAINED IN ({start}, {end})" 463 464 def returnsproperty_sql(self, expression: exp.ReturnsProperty) -> str: 465 table = expression.args.get("table") 466 table = f"{table} " if table else "" 467 return f"RETURNS {table}{self.sql(expression, 'this')}"
Inherited Members
245 class Tokenizer(tokens.Tokenizer): 246 IDENTIFIERS = ['"', ("[", "]")] 247 248 QUOTES = ["'", '"'] 249 250 KEYWORDS = { 251 **tokens.Tokenizer.KEYWORDS, 252 "BIT": TokenType.BOOLEAN, 253 "DATETIME2": TokenType.DATETIME, 254 "DATETIMEOFFSET": TokenType.TIMESTAMPTZ, 255 "DECLARE": TokenType.COMMAND, 256 "IMAGE": TokenType.IMAGE, 257 "MONEY": TokenType.MONEY, 258 "NTEXT": TokenType.TEXT, 259 "NVARCHAR(MAX)": TokenType.TEXT, 260 "PRINT": TokenType.COMMAND, 261 "PROC": TokenType.PROCEDURE, 262 "REAL": TokenType.FLOAT, 263 "ROWVERSION": TokenType.ROWVERSION, 264 "SMALLDATETIME": TokenType.DATETIME, 265 "SMALLMONEY": TokenType.SMALLMONEY, 266 "SQL_VARIANT": TokenType.VARIANT, 267 "TIME": TokenType.TIMESTAMP, 268 "TOP": TokenType.TOP, 269 "UNIQUEIDENTIFIER": TokenType.UNIQUEIDENTIFIER, 270 "VARCHAR(MAX)": TokenType.TEXT, 271 "XML": TokenType.XML, 272 } 273 274 # TSQL allows @, # to appear as a variable/identifier prefix 275 SINGLE_TOKENS = tokens.Tokenizer.SINGLE_TOKENS.copy() 276 SINGLE_TOKENS.pop("@") 277 SINGLE_TOKENS.pop("#")
Inherited Members
279 class Parser(parser.Parser): 280 FUNCTIONS = { 281 **parser.Parser.FUNCTIONS, # type: ignore 282 "CHARINDEX": lambda args: exp.StrPosition( 283 this=seq_get(args, 1), 284 substr=seq_get(args, 0), 285 position=seq_get(args, 2), 286 ), 287 "ISNULL": exp.Coalesce.from_arg_list, 288 "DATEADD": parse_date_delta(exp.DateAdd, unit_mapping=DATE_DELTA_INTERVAL), 289 "DATEDIFF": parse_date_delta(exp.DateDiff, unit_mapping=DATE_DELTA_INTERVAL), 290 "DATENAME": _format_time_lambda(exp.TimeToStr, full_format_mapping=True), 291 "DATEPART": _format_time_lambda(exp.TimeToStr), 292 "GETDATE": exp.CurrentTimestamp.from_arg_list, 293 "SYSDATETIME": exp.CurrentTimestamp.from_arg_list, 294 "IIF": exp.If.from_arg_list, 295 "LEN": exp.Length.from_arg_list, 296 "REPLICATE": exp.Repeat.from_arg_list, 297 "JSON_VALUE": exp.JSONExtractScalar.from_arg_list, 298 "FORMAT": _parse_format, 299 "EOMONTH": _parse_eomonth, 300 } 301 302 VAR_LENGTH_DATATYPES = { 303 DataType.Type.NVARCHAR, 304 DataType.Type.VARCHAR, 305 DataType.Type.CHAR, 306 DataType.Type.NCHAR, 307 } 308 309 RETURNS_TABLE_TOKENS = parser.Parser.ID_VAR_TOKENS - { # type: ignore 310 TokenType.TABLE, 311 *parser.Parser.TYPE_TOKENS, # type: ignore 312 } 313 314 STATEMENT_PARSERS = { 315 **parser.Parser.STATEMENT_PARSERS, # type: ignore 316 TokenType.END: lambda self: self._parse_command(), 317 } 318 319 def _parse_system_time(self) -> t.Optional[exp.Expression]: 320 if not self._match_text_seq("FOR", "SYSTEM_TIME"): 321 return None 322 323 if self._match_text_seq("AS", "OF"): 324 system_time = self.expression( 325 exp.SystemTime, this=self._parse_bitwise(), kind="AS OF" 326 ) 327 elif self._match_set((TokenType.FROM, TokenType.BETWEEN)): 328 kind = self._prev.text 329 this = self._parse_bitwise() 330 self._match_texts(("TO", "AND")) 331 expression = self._parse_bitwise() 332 system_time = self.expression( 333 exp.SystemTime, this=this, expression=expression, kind=kind 334 ) 335 elif self._match_text_seq("CONTAINED", "IN"): 336 args = self._parse_wrapped_csv(self._parse_bitwise) 337 system_time = self.expression( 338 exp.SystemTime, 339 this=seq_get(args, 0), 340 expression=seq_get(args, 1), 341 kind="CONTAINED IN", 342 ) 343 elif self._match(TokenType.ALL): 344 system_time = self.expression(exp.SystemTime, kind="ALL") 345 else: 346 system_time = None 347 self.raise_error("Unable to parse FOR SYSTEM_TIME clause") 348 349 return system_time 350 351 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 352 table = super()._parse_table_parts(schema=schema) 353 table.set("system_time", self._parse_system_time()) 354 return table 355 356 def _parse_returns(self) -> exp.Expression: 357 table = self._parse_id_var(any_token=False, tokens=self.RETURNS_TABLE_TOKENS) 358 returns = super()._parse_returns() 359 returns.set("table", table) 360 return returns 361 362 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 363 to = self._parse_types() 364 self._match(TokenType.COMMA) 365 this = self._parse_conjunction() 366 367 if not to or not this: 368 return None 369 370 # Retrieve length of datatype and override to default if not specified 371 if seq_get(to.expressions, 0) is None and to.this in self.VAR_LENGTH_DATATYPES: 372 to = exp.DataType.build(to.this, expressions=[exp.Literal.number(30)], nested=False) 373 374 # Check whether a conversion with format is applicable 375 if self._match(TokenType.COMMA): 376 format_val = self._parse_number() 377 format_val_name = format_val.name if format_val else "" 378 379 if format_val_name not in TSQL.convert_format_mapping: 380 raise ValueError( 381 f"CONVERT function at T-SQL does not support format style {format_val_name}" 382 ) 383 384 format_norm = exp.Literal.string(TSQL.convert_format_mapping[format_val_name]) 385 386 # Check whether the convert entails a string to date format 387 if to.this == DataType.Type.DATE: 388 return self.expression(exp.StrToDate, this=this, format=format_norm) 389 # Check whether the convert entails a string to datetime format 390 elif to.this == DataType.Type.DATETIME: 391 return self.expression(exp.StrToTime, this=this, format=format_norm) 392 # Check whether the convert entails a date to string format 393 elif to.this in self.VAR_LENGTH_DATATYPES: 394 return self.expression( 395 exp.Cast if strict else exp.TryCast, 396 to=to, 397 this=self.expression(exp.TimeToStr, this=this, format=format_norm), 398 ) 399 elif to.this == DataType.Type.TEXT: 400 return self.expression(exp.TimeToStr, this=this, format=format_norm) 401 402 # Entails a simple cast without any format requirement 403 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 404 405 def _parse_user_defined_function( 406 self, kind: t.Optional[TokenType] = None 407 ) -> t.Optional[exp.Expression]: 408 this = super()._parse_user_defined_function(kind=kind) 409 410 if ( 411 kind == TokenType.FUNCTION 412 or isinstance(this, exp.UserDefinedFunction) 413 or self._match(TokenType.ALIAS, advance=False) 414 ): 415 return this 416 417 expressions = self._parse_csv(self._parse_function_parameter) 418 return self.expression(exp.UserDefinedFunction, this=this, expressions=expressions)
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
Inherited Members
420 class Generator(generator.Generator): 421 LOCKING_READS_SUPPORTED = True 422 423 TYPE_MAPPING = { 424 **generator.Generator.TYPE_MAPPING, # type: ignore 425 exp.DataType.Type.BOOLEAN: "BIT", 426 exp.DataType.Type.INT: "INTEGER", 427 exp.DataType.Type.DECIMAL: "NUMERIC", 428 exp.DataType.Type.DATETIME: "DATETIME2", 429 exp.DataType.Type.VARIANT: "SQL_VARIANT", 430 } 431 432 TRANSFORMS = { 433 **generator.Generator.TRANSFORMS, # type: ignore 434 exp.DateAdd: generate_date_delta_with_unit_sql, 435 exp.DateDiff: generate_date_delta_with_unit_sql, 436 exp.CurrentDate: rename_func("GETDATE"), 437 exp.CurrentTimestamp: rename_func("GETDATE"), 438 exp.If: rename_func("IIF"), 439 exp.NumberToStr: _format_sql, 440 exp.TimeToStr: _format_sql, 441 exp.GroupConcat: _string_agg_sql, 442 exp.Min: min_or_least, 443 } 444 445 TRANSFORMS.pop(exp.ReturnsProperty) 446 447 def systemtime_sql(self, expression: exp.SystemTime) -> str: 448 kind = expression.args["kind"] 449 if kind == "ALL": 450 return "FOR SYSTEM_TIME ALL" 451 452 start = self.sql(expression, "this") 453 if kind == "AS OF": 454 return f"FOR SYSTEM_TIME AS OF {start}" 455 456 end = self.sql(expression, "expression") 457 if kind == "FROM": 458 return f"FOR SYSTEM_TIME FROM {start} TO {end}" 459 if kind == "BETWEEN": 460 return f"FOR SYSTEM_TIME BETWEEN {start} AND {end}" 461 462 return f"FOR SYSTEM_TIME CONTAINED IN ({start}, {end})" 463 464 def returnsproperty_sql(self, expression: exp.ReturnsProperty) -> str: 465 table = expression.args.get("table") 466 table = f"{table} " if table else "" 467 return f"RETURNS {table}{self.sql(expression, 'this')}"
Generator interprets the given syntax tree and produces a SQL string as an output.
Arguments:
- time_mapping (dict): the dictionary of custom time mappings in which the key represents a python time format and the output the target time format
- time_trie (trie): a trie of the time_mapping keys
- pretty (bool): if set to True the returned string will be formatted. Default: False.
- quote_start (str): specifies which starting character to use to delimit quotes. Default: '.
- quote_end (str): specifies which ending character to use to delimit quotes. Default: '.
- identifier_start (str): specifies which starting character to use to delimit identifiers. Default: ".
- identifier_end (str): specifies which ending character to use to delimit identifiers. Default: ".
- identify (bool): if set to True all identifiers will be delimited by the corresponding character.
- normalize (bool): if set to True all identifiers will lower cased
- string_escape (str): specifies a string escape character. Default: '.
- identifier_escape (str): specifies an identifier escape character. Default: ".
- pad (int): determines padding in a formatted string. Default: 2.
- indent (int): determines the size of indentation in a formatted string. Default: 4.
- unnest_column_only (bool): if true unnest table aliases are considered only as column aliases
- normalize_functions (str): normalize function names, "upper", "lower", or None Default: "upper"
- alias_post_tablesample (bool): if the table alias comes after tablesample Default: False
- unsupported_level (ErrorLevel): determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- null_ordering (str): Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
- max_unsupported (int): Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma (bool): if the the comma is leading or trailing in select statements Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
447 def systemtime_sql(self, expression: exp.SystemTime) -> str: 448 kind = expression.args["kind"] 449 if kind == "ALL": 450 return "FOR SYSTEM_TIME ALL" 451 452 start = self.sql(expression, "this") 453 if kind == "AS OF": 454 return f"FOR SYSTEM_TIME AS OF {start}" 455 456 end = self.sql(expression, "expression") 457 if kind == "FROM": 458 return f"FOR SYSTEM_TIME FROM {start} TO {end}" 459 if kind == "BETWEEN": 460 return f"FOR SYSTEM_TIME BETWEEN {start} AND {end}" 461 462 return f"FOR SYSTEM_TIME CONTAINED IN ({start}, {end})"
Inherited Members
- sqlglot.generator.Generator
- Generator
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columndef_sql
- columnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- afterjournalproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- pseudotype_sql
- returning_sql
- rowformatdelimitedproperty_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- lock_sql
- literal_sql
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- select_sql
- schema_sql
- star_sql
- structkwarg_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- window_spec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- extract_sql
- trim_sql
- concat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- unique_sql
- if_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- div_sql
- floatdiv_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- is_sql
- like_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql