diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2022-10-21 09:29:26 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2022-10-21 09:29:26 +0000 |
commit | 8b4272814fb4585be120f183eb7c26bb8acde974 (patch) | |
tree | 85d56a8f5ac4ac94ab924d5bbc578586eeb2a998 /sqlglot/tokens.py | |
parent | Releasing debian version 7.1.3-1. (diff) | |
download | sqlglot-8b4272814fb4585be120f183eb7c26bb8acde974.tar.xz sqlglot-8b4272814fb4585be120f183eb7c26bb8acde974.zip |
Merging upstream version 9.0.1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/tokens.py')
-rw-r--r-- | sqlglot/tokens.py | 26 |
1 files changed, 20 insertions, 6 deletions
diff --git a/sqlglot/tokens.py b/sqlglot/tokens.py index fc8e6e7..1a9d72e 100644 --- a/sqlglot/tokens.py +++ b/sqlglot/tokens.py @@ -56,6 +56,7 @@ class TokenType(AutoName): VAR = auto() BIT_STRING = auto() HEX_STRING = auto() + BYTE_STRING = auto() # types BOOLEAN = auto() @@ -320,6 +321,7 @@ class _Tokenizer(type): klass._QUOTES = cls._delimeter_list_to_dict(klass.QUOTES) klass._BIT_STRINGS = cls._delimeter_list_to_dict(klass.BIT_STRINGS) klass._HEX_STRINGS = cls._delimeter_list_to_dict(klass.HEX_STRINGS) + klass._BYTE_STRINGS = cls._delimeter_list_to_dict(klass.BYTE_STRINGS) klass._IDENTIFIERS = cls._delimeter_list_to_dict(klass.IDENTIFIERS) klass._COMMENTS = dict( (comment, None) if isinstance(comment, str) else (comment[0], comment[1]) for comment in klass.COMMENTS @@ -333,6 +335,7 @@ class _Tokenizer(type): **{quote: TokenType.QUOTE for quote in klass._QUOTES}, **{bit_string: TokenType.BIT_STRING for bit_string in klass._BIT_STRINGS}, **{hex_string: TokenType.HEX_STRING for hex_string in klass._HEX_STRINGS}, + **{byte_string: TokenType.BYTE_STRING for byte_string in klass._BYTE_STRINGS}, }.items() if " " in key or any(single in key for single in klass.SINGLE_TOKENS) ) @@ -385,6 +388,8 @@ class Tokenizer(metaclass=_Tokenizer): HEX_STRINGS = [] + BYTE_STRINGS = [] + IDENTIFIERS = ['"'] ESCAPE = "'" @@ -799,7 +804,7 @@ class Tokenizer(metaclass=_Tokenizer): if self._scan_string(word): return - if self._scan_numeric_string(word): + if self._scan_formatted_string(word): return if self._scan_comment(word): return @@ -906,7 +911,8 @@ class Tokenizer(metaclass=_Tokenizer): self._add(TokenType.STRING, text) return True - def _scan_numeric_string(self, string_start): + # X'1234, b'0110', E'\\\\\' etc. + def _scan_formatted_string(self, string_start): if string_start in self._HEX_STRINGS: delimiters = self._HEX_STRINGS token_type = TokenType.HEX_STRING @@ -915,6 +921,10 @@ class Tokenizer(metaclass=_Tokenizer): delimiters = self._BIT_STRINGS token_type = TokenType.BIT_STRING base = 2 + elif string_start in self._BYTE_STRINGS: + delimiters = self._BYTE_STRINGS + token_type = TokenType.BYTE_STRING + base = None else: return False @@ -922,10 +932,14 @@ class Tokenizer(metaclass=_Tokenizer): string_end = delimiters.get(string_start) text = self._extract_string(string_end) - try: - self._add(token_type, f"{int(text, base)}") - except ValueError: - raise RuntimeError(f"Numeric string contains invalid characters from {self._line}:{self._start}") + if base is None: + self._add(token_type, text) + else: + try: + self._add(token_type, f"{int(text, base)}") + except: + raise RuntimeError(f"Numeric string contains invalid characters from {self._line}:{self._start}") + return True def _scan_identifier(self, identifier_end): |