diff options
Diffstat (limited to '')
-rw-r--r-- | sqlglot/tokens.py | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/sqlglot/tokens.py b/sqlglot/tokens.py index 87a4924..b064957 100644 --- a/sqlglot/tokens.py +++ b/sqlglot/tokens.py @@ -504,6 +504,7 @@ class _Tokenizer(type): command_prefix_tokens={ _TOKEN_TYPE_TO_INDEX[v] for v in klass.COMMAND_PREFIX_TOKENS }, + heredoc_tag_is_identifier=klass.HEREDOC_TAG_IS_IDENTIFIER, ) token_types = RsTokenTypeSettings( bit_string=_TOKEN_TYPE_TO_INDEX[TokenType.BIT_STRING], @@ -517,6 +518,7 @@ class _Tokenizer(type): semicolon=_TOKEN_TYPE_TO_INDEX[TokenType.SEMICOLON], string=_TOKEN_TYPE_TO_INDEX[TokenType.STRING], var=_TOKEN_TYPE_TO_INDEX[TokenType.VAR], + heredoc_string_alternative=_TOKEN_TYPE_TO_INDEX[klass.HEREDOC_STRING_ALTERNATIVE], ) klass._RS_TOKENIZER = RsTokenizer(settings, token_types) else: @@ -573,6 +575,12 @@ class Tokenizer(metaclass=_Tokenizer): STRING_ESCAPES = ["'"] VAR_SINGLE_TOKENS: t.Set[str] = set() + # Whether or not the heredoc tags follow the same lexical rules as unquoted identifiers + HEREDOC_TAG_IS_IDENTIFIER = False + + # Token that we'll generate as a fallback if the heredoc prefix doesn't correspond to a heredoc + HEREDOC_STRING_ALTERNATIVE = TokenType.VAR + # Autofilled _COMMENTS: t.Dict[str, str] = {} _FORMAT_STRINGS: t.Dict[str, t.Tuple[str, TokenType]] = {} @@ -1249,6 +1257,18 @@ class Tokenizer(metaclass=_Tokenizer): elif token_type == TokenType.BIT_STRING: base = 2 elif token_type == TokenType.HEREDOC_STRING: + if ( + self.HEREDOC_TAG_IS_IDENTIFIER + and not self._peek.isidentifier() + and not self._peek == end + ): + if self.HEREDOC_STRING_ALTERNATIVE != token_type.VAR: + self._add(self.HEREDOC_STRING_ALTERNATIVE) + else: + self._scan_var() + + return True + self._advance() tag = "" if self._char == end else self._extract_string(end) end = f"{start}{tag}{end}" |