import re from sqlglot import exp from sqlglot.dialects.dialect import Dialect, parse_date_delta, rename_func from sqlglot.expressions import DataType from sqlglot.generator import Generator from sqlglot.helper import list_get from sqlglot.parser import Parser from sqlglot.time import format_time from sqlglot.tokens import Tokenizer, TokenType FULL_FORMAT_TIME_MAPPING = {"weekday": "%A", "dw": "%A", "w": "%A", "month": "%B", "mm": "%B", "m": "%B"} DATE_DELTA_INTERVAL = { "year": "year", "yyyy": "year", "yy": "year", "quarter": "quarter", "qq": "quarter", "q": "quarter", "month": "month", "mm": "month", "m": "month", "week": "week", "ww": "week", "wk": "week", "day": "day", "dd": "day", "d": "day", } DATE_FMT_RE = re.compile("([dD]{1,2})|([mM]{1,2})|([yY]{1,4})|([hH]{1,2})|([sS]{1,2})") # N = Numeric, C=Currency TRANSPILE_SAFE_NUMBER_FMT = {"N", "C"} def tsql_format_time_lambda(exp_class, full_format_mapping=None, default=None): def _format_time(args): return exp_class( this=list_get(args, 1), format=exp.Literal.string( format_time( list_get(args, 0).name or (TSQL.time_format if default is True else default), {**TSQL.time_mapping, **FULL_FORMAT_TIME_MAPPING} if full_format_mapping else TSQL.time_mapping, ) ), ) return _format_time def parse_format(args): fmt = list_get(args, 1) number_fmt = fmt.name in TRANSPILE_SAFE_NUMBER_FMT or not DATE_FMT_RE.search(fmt.this) if number_fmt: return exp.NumberToStr(this=list_get(args, 0), format=fmt) return exp.TimeToStr( this=list_get(args, 0), format=exp.Literal.string( format_time(fmt.name, TSQL.format_time_mapping) if len(fmt.name) == 1 else format_time(fmt.name, TSQL.time_mapping) ), ) def generate_date_delta_with_unit_sql(self, e): func = "DATEADD" if isinstance(e, exp.DateAdd) else "DATEDIFF" return f"{func}({self.format_args(e.text('unit'), e.expression, e.this)})" def generate_format_sql(self, e): fmt = ( e.args["format"] if isinstance(e, exp.NumberToStr) else exp.Literal.string(format_time(e.text("format"), TSQL.inverse_time_mapping)) ) return f"FORMAT({self.format_args(e.this, fmt)})" class TSQL(Dialect): null_ordering = "nulls_are_small" time_format = "'yyyy-mm-dd hh:mm:ss'" time_mapping = { "year": "%Y", "qq": "%q", "q": "%q", "quarter": "%q", "dayofyear": "%j", "day": "%d", "dy": "%d", "y": "%Y", "week": "%W", "ww": "%W", "wk": "%W", "hour": "%h", "hh": "%I", "minute": "%M", "mi": "%M", "n": "%M", "second": "%S", "ss": "%S", "s": "%-S", "millisecond": "%f", "ms": "%f", "weekday": "%W", "dw": "%W", "month": "%m", "mm": "%M", "m": "%-M", "Y": "%Y", "YYYY": "%Y", "YY": "%y", "MMMM": "%B", "MMM": "%b", "MM": "%m", "M": "%-m", "dd": "%d", "d": "%-d", "HH": "%H", "H": "%-H", "h": "%-I", "S": "%f", "yyyy": "%Y", "yy": "%y", } convert_format_mapping = { "0": "%b %d %Y %-I:%M%p", "1": "%m/%d/%y", "2": "%y.%m.%d", "3": "%d/%m/%y", "4": "%d.%m.%y", "5": "%d-%m-%y", "6": "%d %b %y", "7": "%b %d, %y", "8": "%H:%M:%S", "9": "%b %d %Y %-I:%M:%S:%f%p", "10": "mm-dd-yy", "11": "yy/mm/dd", "12": "yymmdd", "13": "%d %b %Y %H:%M:ss:%f", "14": "%H:%M:%S:%f", "20": "%Y-%m-%d %H:%M:%S", "21": "%Y-%m-%d %H:%M:%S.%f", "22": "%m/%d/%y %-I:%M:%S %p", "23": "%Y-%m-%d", "24": "%H:%M:%S", "25": "%Y-%m-%d %H:%M:%S.%f", "100": "%b %d %Y %-I:%M%p", "101": "%m/%d/%Y", "102": "%Y.%m.%d", "103": "%d/%m/%Y", "104": "%d.%m.%Y", "105": "%d-%m-%Y", "106": "%d %b %Y", "107": "%b %d, %Y", "108": "%H:%M:%S", "109": "%b %d %Y %-I:%M:%S:%f%p", "110": "%m-%d-%Y", "111": "%Y/%m/%d", "112": "%Y%m%d", "113": "%d %b %Y %H:%M:%S:%f", "114": "%H:%M:%S:%f", "120": "%Y-%m-%d %H:%M:%S", "121": "%Y-%m-%d %H:%M:%S.%f", } # not sure if complete format_time_mapping = { "y": "%B %Y", "d": "%m/%d/%Y", "H": "%-H", "h": "%-I", "s": "%Y-%m-%d %H:%M:%S", "D": "%A,%B,%Y", "f": "%A,%B,%Y %-I:%M %p", "F": "%A,%B,%Y %-I:%M:%S %p", "g": "%m/%d/%Y %-I:%M %p", "G": "%m/%d/%Y %-I:%M:%S %p", "M": "%B %-d", "m": "%B %-d", "O": "%Y-%m-%dT%H:%M:%S", "u": "%Y-%M-%D %H:%M:%S%z", "U": "%A, %B %D, %Y %H:%M:%S%z", "T": "%-I:%M:%S %p", "t": "%-I:%M", "Y": "%a %Y", } class Tokenizer(Tokenizer): IDENTIFIERS = ['"', ("[", "]")] KEYWORDS = { **Tokenizer.KEYWORDS, "BIT": TokenType.BOOLEAN, "REAL": TokenType.FLOAT, "NTEXT": TokenType.TEXT, "SMALLDATETIME": TokenType.DATETIME, "DATETIME2": TokenType.DATETIME, "DATETIMEOFFSET": TokenType.TIMESTAMPTZ, "TIME": TokenType.TIMESTAMP, "VARBINARY": TokenType.BINARY, "IMAGE": TokenType.IMAGE, "MONEY": TokenType.MONEY, "SMALLMONEY": TokenType.SMALLMONEY, "ROWVERSION": TokenType.ROWVERSION, "UNIQUEIDENTIFIER": TokenType.UNIQUEIDENTIFIER, "XML": TokenType.XML, "SQL_VARIANT": TokenType.VARIANT, "NVARCHAR(MAX)": TokenType.TEXT, "VARCHAR(MAX)": TokenType.TEXT, "TOP": TokenType.TOP, } class Parser(Parser): FUNCTIONS = { **Parser.FUNCTIONS, "CHARINDEX": exp.StrPosition.from_arg_list, "ISNULL": exp.Coalesce.from_arg_list, "DATEADD": parse_date_delta(exp.DateAdd, unit_mapping=DATE_DELTA_INTERVAL), "DATEDIFF": parse_date_delta(exp.DateDiff, unit_mapping=DATE_DELTA_INTERVAL), "DATENAME": tsql_format_time_lambda(exp.TimeToStr, full_format_mapping=True), "DATEPART": tsql_format_time_lambda(exp.TimeToStr), "GETDATE": exp.CurrentDate.from_arg_list, "IIF": exp.If.from_arg_list, "LEN": exp.Length.from_arg_list, "REPLICATE": exp.Repeat.from_arg_list, "JSON_VALUE": exp.JSONExtractScalar.from_arg_list, "FORMAT": parse_format, } VAR_LENGTH_DATATYPES = { DataType.Type.NVARCHAR, DataType.Type.VARCHAR, DataType.Type.CHAR, DataType.Type.NCHAR, } def _parse_convert(self, strict): to = self._parse_types() self._match(TokenType.COMMA) this = self._parse_column() # Retrieve length of datatype and override to default if not specified if list_get(to.expressions, 0) is None and to.this in self.VAR_LENGTH_DATATYPES: to = exp.DataType.build(to.this, expressions=[exp.Literal.number(30)], nested=False) # Check whether a conversion with format is applicable if self._match(TokenType.COMMA): format_val = self._parse_number().name if format_val not in TSQL.convert_format_mapping: raise ValueError(f"CONVERT function at T-SQL does not support format style {format_val}") format_norm = exp.Literal.string(TSQL.convert_format_mapping[format_val]) # Check whether the convert entails a string to date format if to.this == DataType.Type.DATE: return self.expression(exp.StrToDate, this=this, format=format_norm) # Check whether the convert entails a string to datetime format elif to.this == DataType.Type.DATETIME: return self.expression(exp.StrToTime, this=this, format=format_norm) # Check whether the convert entails a date to string format elif to.this in self.VAR_LENGTH_DATATYPES: return self.expression( exp.Cast if strict else exp.TryCast, to=to, this=self.expression(exp.TimeToStr, this=this, format=format_norm), ) elif to.this == DataType.Type.TEXT: return self.expression(exp.TimeToStr, this=this, format=format_norm) # Entails a simple cast without any format requirement return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) class Generator(Generator): TYPE_MAPPING = { **Generator.TYPE_MAPPING, exp.DataType.Type.BOOLEAN: "BIT", exp.DataType.Type.INT: "INTEGER", exp.DataType.Type.DECIMAL: "NUMERIC", exp.DataType.Type.DATETIME: "DATETIME2", exp.DataType.Type.VARIANT: "SQL_VARIANT", } TRANSFORMS = { **Generator.TRANSFORMS, exp.DateAdd: generate_date_delta_with_unit_sql, exp.DateDiff: generate_date_delta_with_unit_sql, exp.CurrentDate: rename_func("GETDATE"), exp.If: rename_func("IIF"), exp.NumberToStr: generate_format_sql, exp.TimeToStr: generate_format_sql, }