From 8d36f5966675e23bee7026ba37ae0647fbf47300 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 8 Apr 2024 10:11:53 +0200 Subject: Merging upstream version 23.7.0. Signed-off-by: Daniel Baumann --- sqlglot/__init__.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'sqlglot/__init__.py') diff --git a/sqlglot/__init__.py b/sqlglot/__init__.py index e30232c..756532f 100644 --- a/sqlglot/__init__.py +++ b/sqlglot/__init__.py @@ -45,7 +45,7 @@ from sqlglot.expressions import ( from sqlglot.generator import Generator as Generator from sqlglot.parser import Parser as Parser from sqlglot.schema import MappingSchema as MappingSchema, Schema as Schema -from sqlglot.tokens import Tokenizer as Tokenizer, TokenType as TokenType +from sqlglot.tokens import Token as Token, Tokenizer as Tokenizer, TokenType as TokenType if t.TYPE_CHECKING: from sqlglot._typing import E @@ -69,6 +69,21 @@ schema = MappingSchema() """The default schema used by SQLGlot (e.g. in the optimizer).""" +def tokenize(sql: str, read: DialectType = None, dialect: DialectType = None) -> t.List[Token]: + """ + Tokenizes the given SQL string. + + Args: + sql: the SQL code string to tokenize. + read: the SQL dialect to apply during tokenizing (eg. "spark", "hive", "presto", "mysql"). + dialect: the SQL dialect (alias for read). + + Returns: + The resulting list of tokens. + """ + return Dialect.get_or_raise(read or dialect).tokenize(sql) + + def parse( sql: str, read: DialectType = None, dialect: DialectType = None, **opts ) -> t.List[t.Optional[Expression]]: -- cgit v1.2.3