diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-10-04 12:14:45 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-10-04 12:14:45 +0000 |
commit | a34653eb21369376f0e054dd989311afcb167f5b (patch) | |
tree | 5a0280adce195af0be654f79fd99395fd2932c19 /sqlglot/tokens.py | |
parent | Releasing debian version 18.7.0-1. (diff) | |
download | sqlglot-a34653eb21369376f0e054dd989311afcb167f5b.tar.xz sqlglot-a34653eb21369376f0e054dd989311afcb167f5b.zip |
Merging upstream version 18.11.2.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/tokens.py')
-rw-r--r-- | sqlglot/tokens.py | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/sqlglot/tokens.py b/sqlglot/tokens.py index 4d5f198..080a86b 100644 --- a/sqlglot/tokens.py +++ b/sqlglot/tokens.py @@ -77,6 +77,7 @@ class TokenType(AutoName): BYTE_STRING = auto() NATIONAL_STRING = auto() RAW_STRING = auto() + HEREDOC_STRING = auto() # types BIT = auto() @@ -98,6 +99,7 @@ class TokenType(AutoName): FLOAT = auto() DOUBLE = auto() DECIMAL = auto() + UDECIMAL = auto() BIGDECIMAL = auto() CHAR = auto() NCHAR = auto() @@ -418,6 +420,7 @@ class _Tokenizer(type): **_quotes_to_format(TokenType.BYTE_STRING, klass.BYTE_STRINGS), **_quotes_to_format(TokenType.HEX_STRING, klass.HEX_STRINGS), **_quotes_to_format(TokenType.RAW_STRING, klass.RAW_STRINGS), + **_quotes_to_format(TokenType.HEREDOC_STRING, klass.HEREDOC_STRINGS), } klass._STRING_ESCAPES = set(klass.STRING_ESCAPES) @@ -484,11 +487,13 @@ class Tokenizer(metaclass=_Tokenizer): BYTE_STRINGS: t.List[str | t.Tuple[str, str]] = [] HEX_STRINGS: t.List[str | t.Tuple[str, str]] = [] RAW_STRINGS: t.List[str | t.Tuple[str, str]] = [] + HEREDOC_STRINGS: t.List[str | t.Tuple[str, str]] = [] IDENTIFIERS: t.List[str | t.Tuple[str, str]] = ['"'] IDENTIFIER_ESCAPES = ['"'] QUOTES: t.List[t.Tuple[str, str] | str] = ["'"] STRING_ESCAPES = ["'"] VAR_SINGLE_TOKENS: t.Set[str] = set() + ESCAPE_SEQUENCES: t.Dict[str, str] = {} # Autofilled IDENTIFIERS_CAN_START_WITH_DIGIT: bool = False @@ -997,9 +1002,11 @@ class Tokenizer(metaclass=_Tokenizer): word = word.upper() self._add(self.KEYWORDS[word], text=word) return + if self._char in self.SINGLE_TOKENS: self._add(self.SINGLE_TOKENS[self._char], text=self._char) return + self._scan_var() def _scan_comment(self, comment_start: str) -> bool: @@ -1126,6 +1133,10 @@ class Tokenizer(metaclass=_Tokenizer): base = 16 elif token_type == TokenType.BIT_STRING: base = 2 + elif token_type == TokenType.HEREDOC_STRING: + self._advance() + tag = "" if self._char == end else self._extract_string(end) + end = f"{start}{tag}{end}" else: return False @@ -1193,6 +1204,13 @@ class Tokenizer(metaclass=_Tokenizer): if self._end: raise TokenError(f"Missing {delimiter} from {self._line}:{self._start}") + if self.ESCAPE_SEQUENCES and self._peek and self._char in self.STRING_ESCAPES: + escaped_sequence = self.ESCAPE_SEQUENCES.get(self._char + self._peek) + if escaped_sequence: + self._advance(2) + text += escaped_sequence + continue + current = self._current - 1 self._advance(alnum=True) text += self.sql[current : self._current - 1] |