summaryrefslogtreecommitdiffstats
path: root/sqlglot/tokens.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2023-10-04 12:14:45 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2023-10-04 12:14:45 +0000
commita34653eb21369376f0e054dd989311afcb167f5b (patch)
tree5a0280adce195af0be654f79fd99395fd2932c19 /sqlglot/tokens.py
parentReleasing debian version 18.7.0-1. (diff)
downloadsqlglot-a34653eb21369376f0e054dd989311afcb167f5b.tar.xz
sqlglot-a34653eb21369376f0e054dd989311afcb167f5b.zip
Merging upstream version 18.11.2.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/tokens.py')
-rw-r--r--sqlglot/tokens.py18
1 files changed, 18 insertions, 0 deletions
diff --git a/sqlglot/tokens.py b/sqlglot/tokens.py
index 4d5f198..080a86b 100644
--- a/sqlglot/tokens.py
+++ b/sqlglot/tokens.py
@@ -77,6 +77,7 @@ class TokenType(AutoName):
BYTE_STRING = auto()
NATIONAL_STRING = auto()
RAW_STRING = auto()
+ HEREDOC_STRING = auto()
# types
BIT = auto()
@@ -98,6 +99,7 @@ class TokenType(AutoName):
FLOAT = auto()
DOUBLE = auto()
DECIMAL = auto()
+ UDECIMAL = auto()
BIGDECIMAL = auto()
CHAR = auto()
NCHAR = auto()
@@ -418,6 +420,7 @@ class _Tokenizer(type):
**_quotes_to_format(TokenType.BYTE_STRING, klass.BYTE_STRINGS),
**_quotes_to_format(TokenType.HEX_STRING, klass.HEX_STRINGS),
**_quotes_to_format(TokenType.RAW_STRING, klass.RAW_STRINGS),
+ **_quotes_to_format(TokenType.HEREDOC_STRING, klass.HEREDOC_STRINGS),
}
klass._STRING_ESCAPES = set(klass.STRING_ESCAPES)
@@ -484,11 +487,13 @@ class Tokenizer(metaclass=_Tokenizer):
BYTE_STRINGS: t.List[str | t.Tuple[str, str]] = []
HEX_STRINGS: t.List[str | t.Tuple[str, str]] = []
RAW_STRINGS: t.List[str | t.Tuple[str, str]] = []
+ HEREDOC_STRINGS: t.List[str | t.Tuple[str, str]] = []
IDENTIFIERS: t.List[str | t.Tuple[str, str]] = ['"']
IDENTIFIER_ESCAPES = ['"']
QUOTES: t.List[t.Tuple[str, str] | str] = ["'"]
STRING_ESCAPES = ["'"]
VAR_SINGLE_TOKENS: t.Set[str] = set()
+ ESCAPE_SEQUENCES: t.Dict[str, str] = {}
# Autofilled
IDENTIFIERS_CAN_START_WITH_DIGIT: bool = False
@@ -997,9 +1002,11 @@ class Tokenizer(metaclass=_Tokenizer):
word = word.upper()
self._add(self.KEYWORDS[word], text=word)
return
+
if self._char in self.SINGLE_TOKENS:
self._add(self.SINGLE_TOKENS[self._char], text=self._char)
return
+
self._scan_var()
def _scan_comment(self, comment_start: str) -> bool:
@@ -1126,6 +1133,10 @@ class Tokenizer(metaclass=_Tokenizer):
base = 16
elif token_type == TokenType.BIT_STRING:
base = 2
+ elif token_type == TokenType.HEREDOC_STRING:
+ self._advance()
+ tag = "" if self._char == end else self._extract_string(end)
+ end = f"{start}{tag}{end}"
else:
return False
@@ -1193,6 +1204,13 @@ class Tokenizer(metaclass=_Tokenizer):
if self._end:
raise TokenError(f"Missing {delimiter} from {self._line}:{self._start}")
+ if self.ESCAPE_SEQUENCES and self._peek and self._char in self.STRING_ESCAPES:
+ escaped_sequence = self.ESCAPE_SEQUENCES.get(self._char + self._peek)
+ if escaped_sequence:
+ self._advance(2)
+ text += escaped_sequence
+ continue
+
current = self._current - 1
self._advance(alnum=True)
text += self.sql[current : self._current - 1]