sqlglot.dialects.tsql
1from __future__ import annotations 2 3import re 4import typing as t 5 6from sqlglot import exp, generator, parser, tokens 7from sqlglot.dialects.dialect import Dialect, parse_date_delta, rename_func 8from sqlglot.expressions import DataType 9from sqlglot.helper import seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import TokenType 12 13FULL_FORMAT_TIME_MAPPING = { 14 "weekday": "%A", 15 "dw": "%A", 16 "w": "%A", 17 "month": "%B", 18 "mm": "%B", 19 "m": "%B", 20} 21 22DATE_DELTA_INTERVAL = { 23 "year": "year", 24 "yyyy": "year", 25 "yy": "year", 26 "quarter": "quarter", 27 "qq": "quarter", 28 "q": "quarter", 29 "month": "month", 30 "mm": "month", 31 "m": "month", 32 "week": "week", 33 "ww": "week", 34 "wk": "week", 35 "day": "day", 36 "dd": "day", 37 "d": "day", 38} 39 40 41DATE_FMT_RE = re.compile("([dD]{1,2})|([mM]{1,2})|([yY]{1,4})|([hH]{1,2})|([sS]{1,2})") 42 43# N = Numeric, C=Currency 44TRANSPILE_SAFE_NUMBER_FMT = {"N", "C"} 45 46 47def _format_time_lambda(exp_class, full_format_mapping=None, default=None): 48 def _format_time(args): 49 return exp_class( 50 this=seq_get(args, 1), 51 format=exp.Literal.string( 52 format_time( 53 seq_get(args, 0).name or (TSQL.time_format if default is True else default), 54 {**TSQL.time_mapping, **FULL_FORMAT_TIME_MAPPING} 55 if full_format_mapping 56 else TSQL.time_mapping, 57 ) 58 ), 59 ) 60 61 return _format_time 62 63 64def _parse_format(args): 65 fmt = seq_get(args, 1) 66 number_fmt = fmt.name in TRANSPILE_SAFE_NUMBER_FMT or not DATE_FMT_RE.search(fmt.this) 67 if number_fmt: 68 return exp.NumberToStr(this=seq_get(args, 0), format=fmt) 69 return exp.TimeToStr( 70 this=seq_get(args, 0), 71 format=exp.Literal.string( 72 format_time(fmt.name, TSQL.format_time_mapping) 73 if len(fmt.name) == 1 74 else format_time(fmt.name, TSQL.time_mapping) 75 ), 76 ) 77 78 79def _parse_eomonth(args): 80 date = seq_get(args, 0) 81 month_lag = seq_get(args, 1) 82 unit = DATE_DELTA_INTERVAL.get("month") 83 84 if month_lag is None: 85 return exp.LastDateOfMonth(this=date) 86 87 # Remove month lag argument in parser as its compared with the number of arguments of the resulting class 88 args.remove(month_lag) 89 90 return exp.LastDateOfMonth(this=exp.DateAdd(this=date, expression=month_lag, unit=unit)) 91 92 93def generate_date_delta_with_unit_sql(self, e): 94 func = "DATEADD" if isinstance(e, exp.DateAdd) else "DATEDIFF" 95 return self.func(func, e.text("unit"), e.expression, e.this) 96 97 98def _format_sql(self, e): 99 fmt = ( 100 e.args["format"] 101 if isinstance(e, exp.NumberToStr) 102 else exp.Literal.string(format_time(e.text("format"), TSQL.inverse_time_mapping)) 103 ) 104 return self.func("FORMAT", e.this, fmt) 105 106 107def _string_agg_sql(self, e): 108 e = e.copy() 109 110 this = e.this 111 distinct = e.find(exp.Distinct) 112 if distinct: 113 # exp.Distinct can appear below an exp.Order or an exp.GroupConcat expression 114 self.unsupported("T-SQL STRING_AGG doesn't support DISTINCT.") 115 this = distinct.expressions[0] 116 distinct.pop() 117 118 order = "" 119 if isinstance(e.this, exp.Order): 120 if e.this.this: 121 this = e.this.this 122 e.this.this.pop() 123 order = f" WITHIN GROUP ({self.sql(e.this)[1:]})" # Order has a leading space 124 125 separator = e.args.get("separator") or exp.Literal.string(",") 126 return f"STRING_AGG({self.format_args(this, separator)}){order}" 127 128 129class TSQL(Dialect): 130 null_ordering = "nulls_are_small" 131 time_format = "'yyyy-mm-dd hh:mm:ss'" 132 133 time_mapping = { 134 "year": "%Y", 135 "qq": "%q", 136 "q": "%q", 137 "quarter": "%q", 138 "dayofyear": "%j", 139 "day": "%d", 140 "dy": "%d", 141 "y": "%Y", 142 "week": "%W", 143 "ww": "%W", 144 "wk": "%W", 145 "hour": "%h", 146 "hh": "%I", 147 "minute": "%M", 148 "mi": "%M", 149 "n": "%M", 150 "second": "%S", 151 "ss": "%S", 152 "s": "%-S", 153 "millisecond": "%f", 154 "ms": "%f", 155 "weekday": "%W", 156 "dw": "%W", 157 "month": "%m", 158 "mm": "%M", 159 "m": "%-M", 160 "Y": "%Y", 161 "YYYY": "%Y", 162 "YY": "%y", 163 "MMMM": "%B", 164 "MMM": "%b", 165 "MM": "%m", 166 "M": "%-m", 167 "dd": "%d", 168 "d": "%-d", 169 "HH": "%H", 170 "H": "%-H", 171 "h": "%-I", 172 "S": "%f", 173 "yyyy": "%Y", 174 "yy": "%y", 175 } 176 177 convert_format_mapping = { 178 "0": "%b %d %Y %-I:%M%p", 179 "1": "%m/%d/%y", 180 "2": "%y.%m.%d", 181 "3": "%d/%m/%y", 182 "4": "%d.%m.%y", 183 "5": "%d-%m-%y", 184 "6": "%d %b %y", 185 "7": "%b %d, %y", 186 "8": "%H:%M:%S", 187 "9": "%b %d %Y %-I:%M:%S:%f%p", 188 "10": "mm-dd-yy", 189 "11": "yy/mm/dd", 190 "12": "yymmdd", 191 "13": "%d %b %Y %H:%M:ss:%f", 192 "14": "%H:%M:%S:%f", 193 "20": "%Y-%m-%d %H:%M:%S", 194 "21": "%Y-%m-%d %H:%M:%S.%f", 195 "22": "%m/%d/%y %-I:%M:%S %p", 196 "23": "%Y-%m-%d", 197 "24": "%H:%M:%S", 198 "25": "%Y-%m-%d %H:%M:%S.%f", 199 "100": "%b %d %Y %-I:%M%p", 200 "101": "%m/%d/%Y", 201 "102": "%Y.%m.%d", 202 "103": "%d/%m/%Y", 203 "104": "%d.%m.%Y", 204 "105": "%d-%m-%Y", 205 "106": "%d %b %Y", 206 "107": "%b %d, %Y", 207 "108": "%H:%M:%S", 208 "109": "%b %d %Y %-I:%M:%S:%f%p", 209 "110": "%m-%d-%Y", 210 "111": "%Y/%m/%d", 211 "112": "%Y%m%d", 212 "113": "%d %b %Y %H:%M:%S:%f", 213 "114": "%H:%M:%S:%f", 214 "120": "%Y-%m-%d %H:%M:%S", 215 "121": "%Y-%m-%d %H:%M:%S.%f", 216 } 217 # not sure if complete 218 format_time_mapping = { 219 "y": "%B %Y", 220 "d": "%m/%d/%Y", 221 "H": "%-H", 222 "h": "%-I", 223 "s": "%Y-%m-%d %H:%M:%S", 224 "D": "%A,%B,%Y", 225 "f": "%A,%B,%Y %-I:%M %p", 226 "F": "%A,%B,%Y %-I:%M:%S %p", 227 "g": "%m/%d/%Y %-I:%M %p", 228 "G": "%m/%d/%Y %-I:%M:%S %p", 229 "M": "%B %-d", 230 "m": "%B %-d", 231 "O": "%Y-%m-%dT%H:%M:%S", 232 "u": "%Y-%M-%D %H:%M:%S%z", 233 "U": "%A, %B %D, %Y %H:%M:%S%z", 234 "T": "%-I:%M:%S %p", 235 "t": "%-I:%M", 236 "Y": "%a %Y", 237 } 238 239 class Tokenizer(tokens.Tokenizer): 240 IDENTIFIERS = ['"', ("[", "]")] 241 242 QUOTES = ["'", '"'] 243 244 KEYWORDS = { 245 **tokens.Tokenizer.KEYWORDS, 246 "BIT": TokenType.BOOLEAN, 247 "DATETIME2": TokenType.DATETIME, 248 "DATETIMEOFFSET": TokenType.TIMESTAMPTZ, 249 "DECLARE": TokenType.COMMAND, 250 "IMAGE": TokenType.IMAGE, 251 "MONEY": TokenType.MONEY, 252 "NTEXT": TokenType.TEXT, 253 "NVARCHAR(MAX)": TokenType.TEXT, 254 "PRINT": TokenType.COMMAND, 255 "PROC": TokenType.PROCEDURE, 256 "REAL": TokenType.FLOAT, 257 "ROWVERSION": TokenType.ROWVERSION, 258 "SMALLDATETIME": TokenType.DATETIME, 259 "SMALLMONEY": TokenType.SMALLMONEY, 260 "SQL_VARIANT": TokenType.VARIANT, 261 "TIME": TokenType.TIMESTAMP, 262 "TOP": TokenType.TOP, 263 "UNIQUEIDENTIFIER": TokenType.UNIQUEIDENTIFIER, 264 "VARCHAR(MAX)": TokenType.TEXT, 265 "XML": TokenType.XML, 266 } 267 268 # TSQL allows @, # to appear as a variable/identifier prefix 269 SINGLE_TOKENS = tokens.Tokenizer.SINGLE_TOKENS.copy() 270 SINGLE_TOKENS.pop("@") 271 SINGLE_TOKENS.pop("#") 272 273 class Parser(parser.Parser): 274 FUNCTIONS = { 275 **parser.Parser.FUNCTIONS, # type: ignore 276 "CHARINDEX": lambda args: exp.StrPosition( 277 this=seq_get(args, 1), 278 substr=seq_get(args, 0), 279 position=seq_get(args, 2), 280 ), 281 "ISNULL": exp.Coalesce.from_arg_list, 282 "DATEADD": parse_date_delta(exp.DateAdd, unit_mapping=DATE_DELTA_INTERVAL), 283 "DATEDIFF": parse_date_delta(exp.DateDiff, unit_mapping=DATE_DELTA_INTERVAL), 284 "DATENAME": _format_time_lambda(exp.TimeToStr, full_format_mapping=True), 285 "DATEPART": _format_time_lambda(exp.TimeToStr), 286 "GETDATE": exp.CurrentTimestamp.from_arg_list, 287 "SYSDATETIME": exp.CurrentTimestamp.from_arg_list, 288 "IIF": exp.If.from_arg_list, 289 "LEN": exp.Length.from_arg_list, 290 "REPLICATE": exp.Repeat.from_arg_list, 291 "JSON_VALUE": exp.JSONExtractScalar.from_arg_list, 292 "FORMAT": _parse_format, 293 "EOMONTH": _parse_eomonth, 294 } 295 296 VAR_LENGTH_DATATYPES = { 297 DataType.Type.NVARCHAR, 298 DataType.Type.VARCHAR, 299 DataType.Type.CHAR, 300 DataType.Type.NCHAR, 301 } 302 303 RETURNS_TABLE_TOKENS = parser.Parser.ID_VAR_TOKENS - { # type: ignore 304 TokenType.TABLE, 305 *parser.Parser.TYPE_TOKENS, # type: ignore 306 } 307 308 STATEMENT_PARSERS = { 309 **parser.Parser.STATEMENT_PARSERS, # type: ignore 310 TokenType.END: lambda self: self._parse_command(), 311 } 312 313 def _parse_system_time(self) -> t.Optional[exp.Expression]: 314 if not self._match_text_seq("FOR", "SYSTEM_TIME"): 315 return None 316 317 if self._match_text_seq("AS", "OF"): 318 system_time = self.expression( 319 exp.SystemTime, this=self._parse_bitwise(), kind="AS OF" 320 ) 321 elif self._match_set((TokenType.FROM, TokenType.BETWEEN)): 322 kind = self._prev.text 323 this = self._parse_bitwise() 324 self._match_texts(("TO", "AND")) 325 expression = self._parse_bitwise() 326 system_time = self.expression( 327 exp.SystemTime, this=this, expression=expression, kind=kind 328 ) 329 elif self._match_text_seq("CONTAINED", "IN"): 330 args = self._parse_wrapped_csv(self._parse_bitwise) 331 system_time = self.expression( 332 exp.SystemTime, 333 this=seq_get(args, 0), 334 expression=seq_get(args, 1), 335 kind="CONTAINED IN", 336 ) 337 elif self._match(TokenType.ALL): 338 system_time = self.expression(exp.SystemTime, kind="ALL") 339 else: 340 system_time = None 341 self.raise_error("Unable to parse FOR SYSTEM_TIME clause") 342 343 return system_time 344 345 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 346 table = super()._parse_table_parts(schema=schema) 347 table.set("system_time", self._parse_system_time()) 348 return table 349 350 def _parse_returns(self) -> exp.Expression: 351 table = self._parse_id_var(any_token=False, tokens=self.RETURNS_TABLE_TOKENS) 352 returns = super()._parse_returns() 353 returns.set("table", table) 354 return returns 355 356 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 357 to = self._parse_types() 358 self._match(TokenType.COMMA) 359 this = self._parse_conjunction() 360 361 if not to or not this: 362 return None 363 364 # Retrieve length of datatype and override to default if not specified 365 if seq_get(to.expressions, 0) is None and to.this in self.VAR_LENGTH_DATATYPES: 366 to = exp.DataType.build(to.this, expressions=[exp.Literal.number(30)], nested=False) 367 368 # Check whether a conversion with format is applicable 369 if self._match(TokenType.COMMA): 370 format_val = self._parse_number() 371 format_val_name = format_val.name if format_val else "" 372 373 if format_val_name not in TSQL.convert_format_mapping: 374 raise ValueError( 375 f"CONVERT function at T-SQL does not support format style {format_val_name}" 376 ) 377 378 format_norm = exp.Literal.string(TSQL.convert_format_mapping[format_val_name]) 379 380 # Check whether the convert entails a string to date format 381 if to.this == DataType.Type.DATE: 382 return self.expression(exp.StrToDate, this=this, format=format_norm) 383 # Check whether the convert entails a string to datetime format 384 elif to.this == DataType.Type.DATETIME: 385 return self.expression(exp.StrToTime, this=this, format=format_norm) 386 # Check whether the convert entails a date to string format 387 elif to.this in self.VAR_LENGTH_DATATYPES: 388 return self.expression( 389 exp.Cast if strict else exp.TryCast, 390 to=to, 391 this=self.expression(exp.TimeToStr, this=this, format=format_norm), 392 ) 393 elif to.this == DataType.Type.TEXT: 394 return self.expression(exp.TimeToStr, this=this, format=format_norm) 395 396 # Entails a simple cast without any format requirement 397 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 398 399 def _parse_user_defined_function( 400 self, kind: t.Optional[TokenType] = None 401 ) -> t.Optional[exp.Expression]: 402 this = super()._parse_user_defined_function(kind=kind) 403 404 if ( 405 kind == TokenType.FUNCTION 406 or isinstance(this, exp.UserDefinedFunction) 407 or self._match(TokenType.ALIAS, advance=False) 408 ): 409 return this 410 411 expressions = self._parse_csv(self._parse_function_parameter) 412 return self.expression(exp.UserDefinedFunction, this=this, expressions=expressions) 413 414 class Generator(generator.Generator): 415 LOCKING_READS_SUPPORTED = True 416 417 TYPE_MAPPING = { 418 **generator.Generator.TYPE_MAPPING, # type: ignore 419 exp.DataType.Type.BOOLEAN: "BIT", 420 exp.DataType.Type.INT: "INTEGER", 421 exp.DataType.Type.DECIMAL: "NUMERIC", 422 exp.DataType.Type.DATETIME: "DATETIME2", 423 exp.DataType.Type.VARIANT: "SQL_VARIANT", 424 } 425 426 TRANSFORMS = { 427 **generator.Generator.TRANSFORMS, # type: ignore 428 exp.DateAdd: generate_date_delta_with_unit_sql, 429 exp.DateDiff: generate_date_delta_with_unit_sql, 430 exp.CurrentDate: rename_func("GETDATE"), 431 exp.CurrentTimestamp: rename_func("GETDATE"), 432 exp.If: rename_func("IIF"), 433 exp.NumberToStr: _format_sql, 434 exp.TimeToStr: _format_sql, 435 exp.GroupConcat: _string_agg_sql, 436 } 437 438 TRANSFORMS.pop(exp.ReturnsProperty) 439 440 def systemtime_sql(self, expression: exp.SystemTime) -> str: 441 kind = expression.args["kind"] 442 if kind == "ALL": 443 return "FOR SYSTEM_TIME ALL" 444 445 start = self.sql(expression, "this") 446 if kind == "AS OF": 447 return f"FOR SYSTEM_TIME AS OF {start}" 448 449 end = self.sql(expression, "expression") 450 if kind == "FROM": 451 return f"FOR SYSTEM_TIME FROM {start} TO {end}" 452 if kind == "BETWEEN": 453 return f"FOR SYSTEM_TIME BETWEEN {start} AND {end}" 454 455 return f"FOR SYSTEM_TIME CONTAINED IN ({start}, {end})" 456 457 def returnsproperty_sql(self, expression: exp.ReturnsProperty) -> str: 458 table = expression.args.get("table") 459 table = f"{table} " if table else "" 460 return f"RETURNS {table}{self.sql(expression, 'this')}"
def
generate_date_delta_with_unit_sql(self, e):
130class TSQL(Dialect): 131 null_ordering = "nulls_are_small" 132 time_format = "'yyyy-mm-dd hh:mm:ss'" 133 134 time_mapping = { 135 "year": "%Y", 136 "qq": "%q", 137 "q": "%q", 138 "quarter": "%q", 139 "dayofyear": "%j", 140 "day": "%d", 141 "dy": "%d", 142 "y": "%Y", 143 "week": "%W", 144 "ww": "%W", 145 "wk": "%W", 146 "hour": "%h", 147 "hh": "%I", 148 "minute": "%M", 149 "mi": "%M", 150 "n": "%M", 151 "second": "%S", 152 "ss": "%S", 153 "s": "%-S", 154 "millisecond": "%f", 155 "ms": "%f", 156 "weekday": "%W", 157 "dw": "%W", 158 "month": "%m", 159 "mm": "%M", 160 "m": "%-M", 161 "Y": "%Y", 162 "YYYY": "%Y", 163 "YY": "%y", 164 "MMMM": "%B", 165 "MMM": "%b", 166 "MM": "%m", 167 "M": "%-m", 168 "dd": "%d", 169 "d": "%-d", 170 "HH": "%H", 171 "H": "%-H", 172 "h": "%-I", 173 "S": "%f", 174 "yyyy": "%Y", 175 "yy": "%y", 176 } 177 178 convert_format_mapping = { 179 "0": "%b %d %Y %-I:%M%p", 180 "1": "%m/%d/%y", 181 "2": "%y.%m.%d", 182 "3": "%d/%m/%y", 183 "4": "%d.%m.%y", 184 "5": "%d-%m-%y", 185 "6": "%d %b %y", 186 "7": "%b %d, %y", 187 "8": "%H:%M:%S", 188 "9": "%b %d %Y %-I:%M:%S:%f%p", 189 "10": "mm-dd-yy", 190 "11": "yy/mm/dd", 191 "12": "yymmdd", 192 "13": "%d %b %Y %H:%M:ss:%f", 193 "14": "%H:%M:%S:%f", 194 "20": "%Y-%m-%d %H:%M:%S", 195 "21": "%Y-%m-%d %H:%M:%S.%f", 196 "22": "%m/%d/%y %-I:%M:%S %p", 197 "23": "%Y-%m-%d", 198 "24": "%H:%M:%S", 199 "25": "%Y-%m-%d %H:%M:%S.%f", 200 "100": "%b %d %Y %-I:%M%p", 201 "101": "%m/%d/%Y", 202 "102": "%Y.%m.%d", 203 "103": "%d/%m/%Y", 204 "104": "%d.%m.%Y", 205 "105": "%d-%m-%Y", 206 "106": "%d %b %Y", 207 "107": "%b %d, %Y", 208 "108": "%H:%M:%S", 209 "109": "%b %d %Y %-I:%M:%S:%f%p", 210 "110": "%m-%d-%Y", 211 "111": "%Y/%m/%d", 212 "112": "%Y%m%d", 213 "113": "%d %b %Y %H:%M:%S:%f", 214 "114": "%H:%M:%S:%f", 215 "120": "%Y-%m-%d %H:%M:%S", 216 "121": "%Y-%m-%d %H:%M:%S.%f", 217 } 218 # not sure if complete 219 format_time_mapping = { 220 "y": "%B %Y", 221 "d": "%m/%d/%Y", 222 "H": "%-H", 223 "h": "%-I", 224 "s": "%Y-%m-%d %H:%M:%S", 225 "D": "%A,%B,%Y", 226 "f": "%A,%B,%Y %-I:%M %p", 227 "F": "%A,%B,%Y %-I:%M:%S %p", 228 "g": "%m/%d/%Y %-I:%M %p", 229 "G": "%m/%d/%Y %-I:%M:%S %p", 230 "M": "%B %-d", 231 "m": "%B %-d", 232 "O": "%Y-%m-%dT%H:%M:%S", 233 "u": "%Y-%M-%D %H:%M:%S%z", 234 "U": "%A, %B %D, %Y %H:%M:%S%z", 235 "T": "%-I:%M:%S %p", 236 "t": "%-I:%M", 237 "Y": "%a %Y", 238 } 239 240 class Tokenizer(tokens.Tokenizer): 241 IDENTIFIERS = ['"', ("[", "]")] 242 243 QUOTES = ["'", '"'] 244 245 KEYWORDS = { 246 **tokens.Tokenizer.KEYWORDS, 247 "BIT": TokenType.BOOLEAN, 248 "DATETIME2": TokenType.DATETIME, 249 "DATETIMEOFFSET": TokenType.TIMESTAMPTZ, 250 "DECLARE": TokenType.COMMAND, 251 "IMAGE": TokenType.IMAGE, 252 "MONEY": TokenType.MONEY, 253 "NTEXT": TokenType.TEXT, 254 "NVARCHAR(MAX)": TokenType.TEXT, 255 "PRINT": TokenType.COMMAND, 256 "PROC": TokenType.PROCEDURE, 257 "REAL": TokenType.FLOAT, 258 "ROWVERSION": TokenType.ROWVERSION, 259 "SMALLDATETIME": TokenType.DATETIME, 260 "SMALLMONEY": TokenType.SMALLMONEY, 261 "SQL_VARIANT": TokenType.VARIANT, 262 "TIME": TokenType.TIMESTAMP, 263 "TOP": TokenType.TOP, 264 "UNIQUEIDENTIFIER": TokenType.UNIQUEIDENTIFIER, 265 "VARCHAR(MAX)": TokenType.TEXT, 266 "XML": TokenType.XML, 267 } 268 269 # TSQL allows @, # to appear as a variable/identifier prefix 270 SINGLE_TOKENS = tokens.Tokenizer.SINGLE_TOKENS.copy() 271 SINGLE_TOKENS.pop("@") 272 SINGLE_TOKENS.pop("#") 273 274 class Parser(parser.Parser): 275 FUNCTIONS = { 276 **parser.Parser.FUNCTIONS, # type: ignore 277 "CHARINDEX": lambda args: exp.StrPosition( 278 this=seq_get(args, 1), 279 substr=seq_get(args, 0), 280 position=seq_get(args, 2), 281 ), 282 "ISNULL": exp.Coalesce.from_arg_list, 283 "DATEADD": parse_date_delta(exp.DateAdd, unit_mapping=DATE_DELTA_INTERVAL), 284 "DATEDIFF": parse_date_delta(exp.DateDiff, unit_mapping=DATE_DELTA_INTERVAL), 285 "DATENAME": _format_time_lambda(exp.TimeToStr, full_format_mapping=True), 286 "DATEPART": _format_time_lambda(exp.TimeToStr), 287 "GETDATE": exp.CurrentTimestamp.from_arg_list, 288 "SYSDATETIME": exp.CurrentTimestamp.from_arg_list, 289 "IIF": exp.If.from_arg_list, 290 "LEN": exp.Length.from_arg_list, 291 "REPLICATE": exp.Repeat.from_arg_list, 292 "JSON_VALUE": exp.JSONExtractScalar.from_arg_list, 293 "FORMAT": _parse_format, 294 "EOMONTH": _parse_eomonth, 295 } 296 297 VAR_LENGTH_DATATYPES = { 298 DataType.Type.NVARCHAR, 299 DataType.Type.VARCHAR, 300 DataType.Type.CHAR, 301 DataType.Type.NCHAR, 302 } 303 304 RETURNS_TABLE_TOKENS = parser.Parser.ID_VAR_TOKENS - { # type: ignore 305 TokenType.TABLE, 306 *parser.Parser.TYPE_TOKENS, # type: ignore 307 } 308 309 STATEMENT_PARSERS = { 310 **parser.Parser.STATEMENT_PARSERS, # type: ignore 311 TokenType.END: lambda self: self._parse_command(), 312 } 313 314 def _parse_system_time(self) -> t.Optional[exp.Expression]: 315 if not self._match_text_seq("FOR", "SYSTEM_TIME"): 316 return None 317 318 if self._match_text_seq("AS", "OF"): 319 system_time = self.expression( 320 exp.SystemTime, this=self._parse_bitwise(), kind="AS OF" 321 ) 322 elif self._match_set((TokenType.FROM, TokenType.BETWEEN)): 323 kind = self._prev.text 324 this = self._parse_bitwise() 325 self._match_texts(("TO", "AND")) 326 expression = self._parse_bitwise() 327 system_time = self.expression( 328 exp.SystemTime, this=this, expression=expression, kind=kind 329 ) 330 elif self._match_text_seq("CONTAINED", "IN"): 331 args = self._parse_wrapped_csv(self._parse_bitwise) 332 system_time = self.expression( 333 exp.SystemTime, 334 this=seq_get(args, 0), 335 expression=seq_get(args, 1), 336 kind="CONTAINED IN", 337 ) 338 elif self._match(TokenType.ALL): 339 system_time = self.expression(exp.SystemTime, kind="ALL") 340 else: 341 system_time = None 342 self.raise_error("Unable to parse FOR SYSTEM_TIME clause") 343 344 return system_time 345 346 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 347 table = super()._parse_table_parts(schema=schema) 348 table.set("system_time", self._parse_system_time()) 349 return table 350 351 def _parse_returns(self) -> exp.Expression: 352 table = self._parse_id_var(any_token=False, tokens=self.RETURNS_TABLE_TOKENS) 353 returns = super()._parse_returns() 354 returns.set("table", table) 355 return returns 356 357 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 358 to = self._parse_types() 359 self._match(TokenType.COMMA) 360 this = self._parse_conjunction() 361 362 if not to or not this: 363 return None 364 365 # Retrieve length of datatype and override to default if not specified 366 if seq_get(to.expressions, 0) is None and to.this in self.VAR_LENGTH_DATATYPES: 367 to = exp.DataType.build(to.this, expressions=[exp.Literal.number(30)], nested=False) 368 369 # Check whether a conversion with format is applicable 370 if self._match(TokenType.COMMA): 371 format_val = self._parse_number() 372 format_val_name = format_val.name if format_val else "" 373 374 if format_val_name not in TSQL.convert_format_mapping: 375 raise ValueError( 376 f"CONVERT function at T-SQL does not support format style {format_val_name}" 377 ) 378 379 format_norm = exp.Literal.string(TSQL.convert_format_mapping[format_val_name]) 380 381 # Check whether the convert entails a string to date format 382 if to.this == DataType.Type.DATE: 383 return self.expression(exp.StrToDate, this=this, format=format_norm) 384 # Check whether the convert entails a string to datetime format 385 elif to.this == DataType.Type.DATETIME: 386 return self.expression(exp.StrToTime, this=this, format=format_norm) 387 # Check whether the convert entails a date to string format 388 elif to.this in self.VAR_LENGTH_DATATYPES: 389 return self.expression( 390 exp.Cast if strict else exp.TryCast, 391 to=to, 392 this=self.expression(exp.TimeToStr, this=this, format=format_norm), 393 ) 394 elif to.this == DataType.Type.TEXT: 395 return self.expression(exp.TimeToStr, this=this, format=format_norm) 396 397 # Entails a simple cast without any format requirement 398 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 399 400 def _parse_user_defined_function( 401 self, kind: t.Optional[TokenType] = None 402 ) -> t.Optional[exp.Expression]: 403 this = super()._parse_user_defined_function(kind=kind) 404 405 if ( 406 kind == TokenType.FUNCTION 407 or isinstance(this, exp.UserDefinedFunction) 408 or self._match(TokenType.ALIAS, advance=False) 409 ): 410 return this 411 412 expressions = self._parse_csv(self._parse_function_parameter) 413 return self.expression(exp.UserDefinedFunction, this=this, expressions=expressions) 414 415 class Generator(generator.Generator): 416 LOCKING_READS_SUPPORTED = True 417 418 TYPE_MAPPING = { 419 **generator.Generator.TYPE_MAPPING, # type: ignore 420 exp.DataType.Type.BOOLEAN: "BIT", 421 exp.DataType.Type.INT: "INTEGER", 422 exp.DataType.Type.DECIMAL: "NUMERIC", 423 exp.DataType.Type.DATETIME: "DATETIME2", 424 exp.DataType.Type.VARIANT: "SQL_VARIANT", 425 } 426 427 TRANSFORMS = { 428 **generator.Generator.TRANSFORMS, # type: ignore 429 exp.DateAdd: generate_date_delta_with_unit_sql, 430 exp.DateDiff: generate_date_delta_with_unit_sql, 431 exp.CurrentDate: rename_func("GETDATE"), 432 exp.CurrentTimestamp: rename_func("GETDATE"), 433 exp.If: rename_func("IIF"), 434 exp.NumberToStr: _format_sql, 435 exp.TimeToStr: _format_sql, 436 exp.GroupConcat: _string_agg_sql, 437 } 438 439 TRANSFORMS.pop(exp.ReturnsProperty) 440 441 def systemtime_sql(self, expression: exp.SystemTime) -> str: 442 kind = expression.args["kind"] 443 if kind == "ALL": 444 return "FOR SYSTEM_TIME ALL" 445 446 start = self.sql(expression, "this") 447 if kind == "AS OF": 448 return f"FOR SYSTEM_TIME AS OF {start}" 449 450 end = self.sql(expression, "expression") 451 if kind == "FROM": 452 return f"FOR SYSTEM_TIME FROM {start} TO {end}" 453 if kind == "BETWEEN": 454 return f"FOR SYSTEM_TIME BETWEEN {start} AND {end}" 455 456 return f"FOR SYSTEM_TIME CONTAINED IN ({start}, {end})" 457 458 def returnsproperty_sql(self, expression: exp.ReturnsProperty) -> str: 459 table = expression.args.get("table") 460 table = f"{table} " if table else "" 461 return f"RETURNS {table}{self.sql(expression, 'this')}"
Inherited Members
240 class Tokenizer(tokens.Tokenizer): 241 IDENTIFIERS = ['"', ("[", "]")] 242 243 QUOTES = ["'", '"'] 244 245 KEYWORDS = { 246 **tokens.Tokenizer.KEYWORDS, 247 "BIT": TokenType.BOOLEAN, 248 "DATETIME2": TokenType.DATETIME, 249 "DATETIMEOFFSET": TokenType.TIMESTAMPTZ, 250 "DECLARE": TokenType.COMMAND, 251 "IMAGE": TokenType.IMAGE, 252 "MONEY": TokenType.MONEY, 253 "NTEXT": TokenType.TEXT, 254 "NVARCHAR(MAX)": TokenType.TEXT, 255 "PRINT": TokenType.COMMAND, 256 "PROC": TokenType.PROCEDURE, 257 "REAL": TokenType.FLOAT, 258 "ROWVERSION": TokenType.ROWVERSION, 259 "SMALLDATETIME": TokenType.DATETIME, 260 "SMALLMONEY": TokenType.SMALLMONEY, 261 "SQL_VARIANT": TokenType.VARIANT, 262 "TIME": TokenType.TIMESTAMP, 263 "TOP": TokenType.TOP, 264 "UNIQUEIDENTIFIER": TokenType.UNIQUEIDENTIFIER, 265 "VARCHAR(MAX)": TokenType.TEXT, 266 "XML": TokenType.XML, 267 } 268 269 # TSQL allows @, # to appear as a variable/identifier prefix 270 SINGLE_TOKENS = tokens.Tokenizer.SINGLE_TOKENS.copy() 271 SINGLE_TOKENS.pop("@") 272 SINGLE_TOKENS.pop("#")
Inherited Members
274 class Parser(parser.Parser): 275 FUNCTIONS = { 276 **parser.Parser.FUNCTIONS, # type: ignore 277 "CHARINDEX": lambda args: exp.StrPosition( 278 this=seq_get(args, 1), 279 substr=seq_get(args, 0), 280 position=seq_get(args, 2), 281 ), 282 "ISNULL": exp.Coalesce.from_arg_list, 283 "DATEADD": parse_date_delta(exp.DateAdd, unit_mapping=DATE_DELTA_INTERVAL), 284 "DATEDIFF": parse_date_delta(exp.DateDiff, unit_mapping=DATE_DELTA_INTERVAL), 285 "DATENAME": _format_time_lambda(exp.TimeToStr, full_format_mapping=True), 286 "DATEPART": _format_time_lambda(exp.TimeToStr), 287 "GETDATE": exp.CurrentTimestamp.from_arg_list, 288 "SYSDATETIME": exp.CurrentTimestamp.from_arg_list, 289 "IIF": exp.If.from_arg_list, 290 "LEN": exp.Length.from_arg_list, 291 "REPLICATE": exp.Repeat.from_arg_list, 292 "JSON_VALUE": exp.JSONExtractScalar.from_arg_list, 293 "FORMAT": _parse_format, 294 "EOMONTH": _parse_eomonth, 295 } 296 297 VAR_LENGTH_DATATYPES = { 298 DataType.Type.NVARCHAR, 299 DataType.Type.VARCHAR, 300 DataType.Type.CHAR, 301 DataType.Type.NCHAR, 302 } 303 304 RETURNS_TABLE_TOKENS = parser.Parser.ID_VAR_TOKENS - { # type: ignore 305 TokenType.TABLE, 306 *parser.Parser.TYPE_TOKENS, # type: ignore 307 } 308 309 STATEMENT_PARSERS = { 310 **parser.Parser.STATEMENT_PARSERS, # type: ignore 311 TokenType.END: lambda self: self._parse_command(), 312 } 313 314 def _parse_system_time(self) -> t.Optional[exp.Expression]: 315 if not self._match_text_seq("FOR", "SYSTEM_TIME"): 316 return None 317 318 if self._match_text_seq("AS", "OF"): 319 system_time = self.expression( 320 exp.SystemTime, this=self._parse_bitwise(), kind="AS OF" 321 ) 322 elif self._match_set((TokenType.FROM, TokenType.BETWEEN)): 323 kind = self._prev.text 324 this = self._parse_bitwise() 325 self._match_texts(("TO", "AND")) 326 expression = self._parse_bitwise() 327 system_time = self.expression( 328 exp.SystemTime, this=this, expression=expression, kind=kind 329 ) 330 elif self._match_text_seq("CONTAINED", "IN"): 331 args = self._parse_wrapped_csv(self._parse_bitwise) 332 system_time = self.expression( 333 exp.SystemTime, 334 this=seq_get(args, 0), 335 expression=seq_get(args, 1), 336 kind="CONTAINED IN", 337 ) 338 elif self._match(TokenType.ALL): 339 system_time = self.expression(exp.SystemTime, kind="ALL") 340 else: 341 system_time = None 342 self.raise_error("Unable to parse FOR SYSTEM_TIME clause") 343 344 return system_time 345 346 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 347 table = super()._parse_table_parts(schema=schema) 348 table.set("system_time", self._parse_system_time()) 349 return table 350 351 def _parse_returns(self) -> exp.Expression: 352 table = self._parse_id_var(any_token=False, tokens=self.RETURNS_TABLE_TOKENS) 353 returns = super()._parse_returns() 354 returns.set("table", table) 355 return returns 356 357 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 358 to = self._parse_types() 359 self._match(TokenType.COMMA) 360 this = self._parse_conjunction() 361 362 if not to or not this: 363 return None 364 365 # Retrieve length of datatype and override to default if not specified 366 if seq_get(to.expressions, 0) is None and to.this in self.VAR_LENGTH_DATATYPES: 367 to = exp.DataType.build(to.this, expressions=[exp.Literal.number(30)], nested=False) 368 369 # Check whether a conversion with format is applicable 370 if self._match(TokenType.COMMA): 371 format_val = self._parse_number() 372 format_val_name = format_val.name if format_val else "" 373 374 if format_val_name not in TSQL.convert_format_mapping: 375 raise ValueError( 376 f"CONVERT function at T-SQL does not support format style {format_val_name}" 377 ) 378 379 format_norm = exp.Literal.string(TSQL.convert_format_mapping[format_val_name]) 380 381 # Check whether the convert entails a string to date format 382 if to.this == DataType.Type.DATE: 383 return self.expression(exp.StrToDate, this=this, format=format_norm) 384 # Check whether the convert entails a string to datetime format 385 elif to.this == DataType.Type.DATETIME: 386 return self.expression(exp.StrToTime, this=this, format=format_norm) 387 # Check whether the convert entails a date to string format 388 elif to.this in self.VAR_LENGTH_DATATYPES: 389 return self.expression( 390 exp.Cast if strict else exp.TryCast, 391 to=to, 392 this=self.expression(exp.TimeToStr, this=this, format=format_norm), 393 ) 394 elif to.this == DataType.Type.TEXT: 395 return self.expression(exp.TimeToStr, this=this, format=format_norm) 396 397 # Entails a simple cast without any format requirement 398 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 399 400 def _parse_user_defined_function( 401 self, kind: t.Optional[TokenType] = None 402 ) -> t.Optional[exp.Expression]: 403 this = super()._parse_user_defined_function(kind=kind) 404 405 if ( 406 kind == TokenType.FUNCTION 407 or isinstance(this, exp.UserDefinedFunction) 408 or self._match(TokenType.ALIAS, advance=False) 409 ): 410 return this 411 412 expressions = self._parse_csv(self._parse_function_parameter) 413 return self.expression(exp.UserDefinedFunction, this=this, expressions=expressions)
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
Inherited Members
415 class Generator(generator.Generator): 416 LOCKING_READS_SUPPORTED = True 417 418 TYPE_MAPPING = { 419 **generator.Generator.TYPE_MAPPING, # type: ignore 420 exp.DataType.Type.BOOLEAN: "BIT", 421 exp.DataType.Type.INT: "INTEGER", 422 exp.DataType.Type.DECIMAL: "NUMERIC", 423 exp.DataType.Type.DATETIME: "DATETIME2", 424 exp.DataType.Type.VARIANT: "SQL_VARIANT", 425 } 426 427 TRANSFORMS = { 428 **generator.Generator.TRANSFORMS, # type: ignore 429 exp.DateAdd: generate_date_delta_with_unit_sql, 430 exp.DateDiff: generate_date_delta_with_unit_sql, 431 exp.CurrentDate: rename_func("GETDATE"), 432 exp.CurrentTimestamp: rename_func("GETDATE"), 433 exp.If: rename_func("IIF"), 434 exp.NumberToStr: _format_sql, 435 exp.TimeToStr: _format_sql, 436 exp.GroupConcat: _string_agg_sql, 437 } 438 439 TRANSFORMS.pop(exp.ReturnsProperty) 440 441 def systemtime_sql(self, expression: exp.SystemTime) -> str: 442 kind = expression.args["kind"] 443 if kind == "ALL": 444 return "FOR SYSTEM_TIME ALL" 445 446 start = self.sql(expression, "this") 447 if kind == "AS OF": 448 return f"FOR SYSTEM_TIME AS OF {start}" 449 450 end = self.sql(expression, "expression") 451 if kind == "FROM": 452 return f"FOR SYSTEM_TIME FROM {start} TO {end}" 453 if kind == "BETWEEN": 454 return f"FOR SYSTEM_TIME BETWEEN {start} AND {end}" 455 456 return f"FOR SYSTEM_TIME CONTAINED IN ({start}, {end})" 457 458 def returnsproperty_sql(self, expression: exp.ReturnsProperty) -> str: 459 table = expression.args.get("table") 460 table = f"{table} " if table else "" 461 return f"RETURNS {table}{self.sql(expression, 'this')}"
Generator interprets the given syntax tree and produces a SQL string as an output.
Arguments:
- time_mapping (dict): the dictionary of custom time mappings in which the key represents a python time format and the output the target time format
- time_trie (trie): a trie of the time_mapping keys
- pretty (bool): if set to True the returned string will be formatted. Default: False.
- quote_start (str): specifies which starting character to use to delimit quotes. Default: '.
- quote_end (str): specifies which ending character to use to delimit quotes. Default: '.
- identifier_start (str): specifies which starting character to use to delimit identifiers. Default: ".
- identifier_end (str): specifies which ending character to use to delimit identifiers. Default: ".
- identify (bool): if set to True all identifiers will be delimited by the corresponding character.
- normalize (bool): if set to True all identifiers will lower cased
- string_escape (str): specifies a string escape character. Default: '.
- identifier_escape (str): specifies an identifier escape character. Default: ".
- pad (int): determines padding in a formatted string. Default: 2.
- indent (int): determines the size of indentation in a formatted string. Default: 4.
- unnest_column_only (bool): if true unnest table aliases are considered only as column aliases
- normalize_functions (str): normalize function names, "upper", "lower", or None Default: "upper"
- alias_post_tablesample (bool): if the table alias comes after tablesample Default: False
- unsupported_level (ErrorLevel): determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- null_ordering (str): Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
- max_unsupported (int): Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma (bool): if the the comma is leading or trailing in select statements Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
441 def systemtime_sql(self, expression: exp.SystemTime) -> str: 442 kind = expression.args["kind"] 443 if kind == "ALL": 444 return "FOR SYSTEM_TIME ALL" 445 446 start = self.sql(expression, "this") 447 if kind == "AS OF": 448 return f"FOR SYSTEM_TIME AS OF {start}" 449 450 end = self.sql(expression, "expression") 451 if kind == "FROM": 452 return f"FOR SYSTEM_TIME FROM {start} TO {end}" 453 if kind == "BETWEEN": 454 return f"FOR SYSTEM_TIME BETWEEN {start} AND {end}" 455 456 return f"FOR SYSTEM_TIME CONTAINED IN ({start}, {end})"
Inherited Members
- sqlglot.generator.Generator
- Generator
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columndef_sql
- columnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- afterjournalproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- pseudotype_sql
- rowformatdelimitedproperty_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- lock_sql
- literal_sql
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- select_sql
- schema_sql
- star_sql
- structkwarg_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- window_spec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- extract_sql
- trim_sql
- concat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- unique_sql
- if_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- div_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- is_sql
- like_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql