summaryrefslogtreecommitdiffstats
path: root/sqlglot/__init__.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-08 08:11:53 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-08 08:12:02 +0000
commit8d36f5966675e23bee7026ba37ae0647fbf47300 (patch)
treedf4227bbb3b07cb70df87237bcff03c8efd7822d /sqlglot/__init__.py
parentReleasing debian version 22.2.0-1. (diff)
downloadsqlglot-8d36f5966675e23bee7026ba37ae0647fbf47300.tar.xz
sqlglot-8d36f5966675e23bee7026ba37ae0647fbf47300.zip
Merging upstream version 23.7.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/__init__.py')
-rw-r--r--sqlglot/__init__.py17
1 files changed, 16 insertions, 1 deletions
diff --git a/sqlglot/__init__.py b/sqlglot/__init__.py
index e30232c..756532f 100644
--- a/sqlglot/__init__.py
+++ b/sqlglot/__init__.py
@@ -45,7 +45,7 @@ from sqlglot.expressions import (
from sqlglot.generator import Generator as Generator
from sqlglot.parser import Parser as Parser
from sqlglot.schema import MappingSchema as MappingSchema, Schema as Schema
-from sqlglot.tokens import Tokenizer as Tokenizer, TokenType as TokenType
+from sqlglot.tokens import Token as Token, Tokenizer as Tokenizer, TokenType as TokenType
if t.TYPE_CHECKING:
from sqlglot._typing import E
@@ -69,6 +69,21 @@ schema = MappingSchema()
"""The default schema used by SQLGlot (e.g. in the optimizer)."""
+def tokenize(sql: str, read: DialectType = None, dialect: DialectType = None) -> t.List[Token]:
+ """
+ Tokenizes the given SQL string.
+
+ Args:
+ sql: the SQL code string to tokenize.
+ read: the SQL dialect to apply during tokenizing (eg. "spark", "hive", "presto", "mysql").
+ dialect: the SQL dialect (alias for read).
+
+ Returns:
+ The resulting list of tokens.
+ """
+ return Dialect.get_or_raise(read or dialect).tokenize(sql)
+
+
def parse(
sql: str, read: DialectType = None, dialect: DialectType = None, **opts
) -> t.List[t.Optional[Expression]]: