diff options
Diffstat (limited to 'sqlglot/tokens.py')
-rw-r--r-- | sqlglot/tokens.py | 27 |
1 files changed, 24 insertions, 3 deletions
diff --git a/sqlglot/tokens.py b/sqlglot/tokens.py index b25ef8d..0efa7d0 100644 --- a/sqlglot/tokens.py +++ b/sqlglot/tokens.py @@ -48,6 +48,7 @@ class TokenType(AutoName): DOLLAR = auto() PARAMETER = auto() SESSION_PARAMETER = auto() + NATIONAL = auto() BLOCK_START = auto() BLOCK_END = auto() @@ -111,6 +112,7 @@ class TokenType(AutoName): # keywords ALIAS = auto() + ALTER = auto() ALWAYS = auto() ALL = auto() ANTI = auto() @@ -196,6 +198,7 @@ class TokenType(AutoName): INTERVAL = auto() INTO = auto() INTRODUCER = auto() + IRLIKE = auto() IS = auto() ISNULL = auto() JOIN = auto() @@ -241,6 +244,7 @@ class TokenType(AutoName): PRIMARY_KEY = auto() PROCEDURE = auto() PROPERTIES = auto() + PSEUDO_TYPE = auto() QUALIFY = auto() QUOTE = auto() RANGE = auto() @@ -346,7 +350,11 @@ class _Tokenizer(type): def __new__(cls, clsname, bases, attrs): # type: ignore klass = super().__new__(cls, clsname, bases, attrs) - klass._QUOTES = cls._delimeter_list_to_dict(klass.QUOTES) + klass._QUOTES = { + f"{prefix}{s}": e + for s, e in cls._delimeter_list_to_dict(klass.QUOTES).items() + for prefix in (("",) if s[0].isalpha() else ("", "n", "N")) + } klass._BIT_STRINGS = cls._delimeter_list_to_dict(klass.BIT_STRINGS) klass._HEX_STRINGS = cls._delimeter_list_to_dict(klass.HEX_STRINGS) klass._BYTE_STRINGS = cls._delimeter_list_to_dict(klass.BYTE_STRINGS) @@ -470,6 +478,7 @@ class Tokenizer(metaclass=_Tokenizer): "CHECK": TokenType.CHECK, "CLUSTER BY": TokenType.CLUSTER_BY, "COLLATE": TokenType.COLLATE, + "COLUMN": TokenType.COLUMN, "COMMENT": TokenType.SCHEMA_COMMENT, "COMMIT": TokenType.COMMIT, "COMPOUND": TokenType.COMPOUND, @@ -587,6 +596,7 @@ class Tokenizer(metaclass=_Tokenizer): "SEMI": TokenType.SEMI, "SET": TokenType.SET, "SHOW": TokenType.SHOW, + "SIMILAR TO": TokenType.SIMILAR_TO, "SOME": TokenType.SOME, "SORTKEY": TokenType.SORTKEY, "SORT BY": TokenType.SORT_BY, @@ -614,6 +624,7 @@ class Tokenizer(metaclass=_Tokenizer): "VOLATILE": TokenType.VOLATILE, "WHEN": TokenType.WHEN, "WHERE": TokenType.WHERE, + "WINDOW": TokenType.WINDOW, "WITH": TokenType.WITH, "WITH TIME ZONE": TokenType.WITH_TIME_ZONE, "WITH LOCAL TIME ZONE": TokenType.WITH_LOCAL_TIME_ZONE, @@ -652,6 +663,7 @@ class Tokenizer(metaclass=_Tokenizer): "VARCHAR2": TokenType.VARCHAR, "NVARCHAR": TokenType.NVARCHAR, "NVARCHAR2": TokenType.NVARCHAR, + "STR": TokenType.TEXT, "STRING": TokenType.TEXT, "TEXT": TokenType.TEXT, "CLOB": TokenType.TEXT, @@ -667,7 +679,16 @@ class Tokenizer(metaclass=_Tokenizer): "UNIQUE": TokenType.UNIQUE, "STRUCT": TokenType.STRUCT, "VARIANT": TokenType.VARIANT, - "ALTER": TokenType.COMMAND, + "ALTER": TokenType.ALTER, + "ALTER AGGREGATE": TokenType.COMMAND, + "ALTER DEFAULT": TokenType.COMMAND, + "ALTER DOMAIN": TokenType.COMMAND, + "ALTER ROLE": TokenType.COMMAND, + "ALTER RULE": TokenType.COMMAND, + "ALTER SEQUENCE": TokenType.COMMAND, + "ALTER TYPE": TokenType.COMMAND, + "ALTER USER": TokenType.COMMAND, + "ALTER VIEW": TokenType.COMMAND, "ANALYZE": TokenType.COMMAND, "CALL": TokenType.COMMAND, "EXPLAIN": TokenType.COMMAND, @@ -967,7 +988,7 @@ class Tokenizer(metaclass=_Tokenizer): text = self._extract_string(quote_end) text = text.encode(self.ENCODE).decode(self.ENCODE) if self.ENCODE else text # type: ignore text = text.replace("\\\\", "\\") if self._replace_backslash else text - self._add(TokenType.STRING, text) + self._add(TokenType.NATIONAL if quote[0].upper() == "N" else TokenType.STRING, text) return True # X'1234, b'0110', E'\\\\\' etc. |