diff options
Diffstat (limited to 'sqlglot/dialects')
-rw-r--r-- | sqlglot/dialects/__init__.py | 15 | ||||
-rw-r--r-- | sqlglot/dialects/bigquery.py | 7 | ||||
-rw-r--r-- | sqlglot/dialects/duckdb.py | 9 | ||||
-rw-r--r-- | sqlglot/dialects/hive.py | 2 | ||||
-rw-r--r-- | sqlglot/dialects/mysql.py | 2 | ||||
-rw-r--r-- | sqlglot/dialects/oracle.py | 10 | ||||
-rw-r--r-- | sqlglot/dialects/postgres.py | 14 | ||||
-rw-r--r-- | sqlglot/dialects/presto.py | 2 | ||||
-rw-r--r-- | sqlglot/dialects/redshift.py | 2 | ||||
-rw-r--r-- | sqlglot/dialects/snowflake.py | 6 | ||||
-rw-r--r-- | sqlglot/dialects/spark.py | 3 |
11 files changed, 45 insertions, 27 deletions
diff --git a/sqlglot/dialects/__init__.py b/sqlglot/dialects/__init__.py index 34cf613..191e703 100644 --- a/sqlglot/dialects/__init__.py +++ b/sqlglot/dialects/__init__.py @@ -1,17 +1,14 @@ """ ## Dialects -One of the core abstractions in SQLGlot is the concept of a "dialect". The `Dialect` class essentially implements a -"SQLGlot dialect", which aims to be as generic and ANSI-compliant as possible. It relies on the base `Tokenizer`, -`Parser` and `Generator` classes to achieve this goal, so these need to be very lenient when it comes to consuming -SQL code. +While there is a SQL standard, most SQL engines support a variation of that standard. This makes it difficult +to write portable SQL code. SQLGlot bridges all the different variations, called "dialects", with an extensible +SQL transpilation framework. -However, there are cases where the syntax of different SQL dialects varies wildly, even for common tasks. One such -example is the date/time functions, which can be hard to deal with. For this reason, it's sometimes necessary to -override the base dialect in order to specialize its behavior. This can be easily done in SQLGlot: supporting new -dialects is as simple as subclassing from `Dialect` and overriding its various components (e.g. the `Parser` class), -in order to implement the target behavior. +The base `sqlglot.dialects.dialect.Dialect` class implements a generic dialect that aims to be as universal as possible. +Each SQL variation has its own `Dialect` subclass, extending the corresponding `Tokenizer`, `Parser` and `Generator` +classes as needed. ### Implementing a custom Dialect diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py index e7d30ec..27dca48 100644 --- a/sqlglot/dialects/bigquery.py +++ b/sqlglot/dialects/bigquery.py @@ -169,6 +169,13 @@ class BigQuery(Dialect): TokenType.VALUES, } + PROPERTY_PARSERS = { + **parser.Parser.PROPERTY_PARSERS, # type: ignore + "NOT DETERMINISTIC": lambda self: self.expression( + exp.VolatilityProperty, this=exp.Literal.string("VOLATILE") + ), + } + class Generator(generator.Generator): TRANSFORMS = { **generator.Generator.TRANSFORMS, # type: ignore diff --git a/sqlglot/dialects/duckdb.py b/sqlglot/dialects/duckdb.py index 81941f7..4646eb4 100644 --- a/sqlglot/dialects/duckdb.py +++ b/sqlglot/dialects/duckdb.py @@ -66,12 +66,11 @@ def _sort_array_reverse(args): return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) -def _struct_pack_sql(self, expression): +def _struct_sql(self, expression): args = [ - self.binary(e, ":=") if isinstance(e, exp.EQ) else self.sql(e) - for e in expression.expressions + f"'{e.name or e.this.name}': {self.sql(e, 'expression')}" for e in expression.expressions ] - return f"STRUCT_PACK({', '.join(args)})" + return f"{{{', '.join(args)}}}" def _datatype_sql(self, expression): @@ -153,7 +152,7 @@ class DuckDB(Dialect): exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", exp.StrToTime: _str_to_time_sql, exp.StrToUnix: lambda self, e: f"EPOCH(STRPTIME({self.sql(e, 'this')}, {self.format_time(e)}))", - exp.Struct: _struct_pack_sql, + exp.Struct: _struct_sql, exp.TableSample: no_tablesample_sql, exp.TimeStrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE)", exp.TimeStrToTime: timestrtotime_sql, diff --git a/sqlglot/dialects/hive.py b/sqlglot/dialects/hive.py index ddfd1e8..4bbec70 100644 --- a/sqlglot/dialects/hive.py +++ b/sqlglot/dialects/hive.py @@ -251,7 +251,7 @@ class Hive(Dialect): PROPERTY_PARSERS = { **parser.Parser.PROPERTY_PARSERS, # type: ignore - TokenType.SERDE_PROPERTIES: lambda self: exp.SerdeProperties( + "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( expressions=self._parse_wrapped_csv(self._parse_property) ), } diff --git a/sqlglot/dialects/mysql.py b/sqlglot/dialects/mysql.py index 2a0a917..cd8c30c 100644 --- a/sqlglot/dialects/mysql.py +++ b/sqlglot/dialects/mysql.py @@ -202,7 +202,7 @@ class MySQL(Dialect): PROPERTY_PARSERS = { **parser.Parser.PROPERTY_PARSERS, # type: ignore - TokenType.ENGINE: lambda self: self._parse_property_assignment(exp.EngineProperty), + "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), } STATEMENT_PARSERS = { diff --git a/sqlglot/dialects/oracle.py b/sqlglot/dialects/oracle.py index 86caa6b..67d791d 100644 --- a/sqlglot/dialects/oracle.py +++ b/sqlglot/dialects/oracle.py @@ -74,13 +74,16 @@ class Oracle(Dialect): def query_modifiers(self, expression, *sqls): return csv( *sqls, - *[self.sql(sql) for sql in expression.args.get("laterals", [])], - *[self.sql(sql) for sql in expression.args.get("joins", [])], + *[self.sql(sql) for sql in expression.args.get("joins") or []], + self.sql(expression, "match"), + *[self.sql(sql) for sql in expression.args.get("laterals") or []], self.sql(expression, "where"), self.sql(expression, "group"), self.sql(expression, "having"), self.sql(expression, "qualify"), - self.sql(expression, "window"), + self.seg("WINDOW ") + self.expressions(expression, "windows", flat=True) + if expression.args.get("windows") + else "", self.sql(expression, "distribute"), self.sql(expression, "sort"), self.sql(expression, "cluster"), @@ -99,6 +102,7 @@ class Oracle(Dialect): class Tokenizer(tokens.Tokenizer): KEYWORDS = { **tokens.Tokenizer.KEYWORDS, + "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, "MINUS": TokenType.EXCEPT, "START": TokenType.BEGIN, "TOP": TokenType.TOP, diff --git a/sqlglot/dialects/postgres.py b/sqlglot/dialects/postgres.py index 6f597f1..0d74b3a 100644 --- a/sqlglot/dialects/postgres.py +++ b/sqlglot/dialects/postgres.py @@ -9,6 +9,7 @@ from sqlglot.dialects.dialect import ( no_paren_current_date_sql, no_tablesample_sql, no_trycast_sql, + rename_func, str_position_sql, trim_sql, ) @@ -260,6 +261,16 @@ class Postgres(Dialect): "TO_CHAR": format_time_lambda(exp.TimeToStr, "postgres"), } + BITWISE = { + **parser.Parser.BITWISE, # type: ignore + TokenType.HASH: exp.BitwiseXor, + } + + FACTOR = { + **parser.Parser.FACTOR, # type: ignore + TokenType.CARET: exp.Pow, + } + class Generator(generator.Generator): TYPE_MAPPING = { **generator.Generator.TYPE_MAPPING, # type: ignore @@ -273,6 +284,7 @@ class Postgres(Dialect): TRANSFORMS = { **generator.Generator.TRANSFORMS, # type: ignore + exp.BitwiseXor: lambda self, e: self.binary(e, "#"), exp.ColumnDef: preprocess( [ _auto_increment_to_serial, @@ -285,11 +297,13 @@ class Postgres(Dialect): exp.JSONBExtract: lambda self, e: self.binary(e, "#>"), exp.JSONBExtractScalar: lambda self, e: self.binary(e, "#>>"), exp.JSONBContains: lambda self, e: self.binary(e, "?"), + exp.Pow: lambda self, e: self.binary(e, "^"), exp.CurrentDate: no_paren_current_date_sql, exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", exp.DateAdd: _date_add_sql("+"), exp.DateSub: _date_add_sql("-"), exp.DateDiff: _date_diff_sql, + exp.LogicalOr: rename_func("BOOL_OR"), exp.RegexpLike: lambda self, e: self.binary(e, "~"), exp.RegexpILike: lambda self, e: self.binary(e, "~*"), exp.StrPosition: str_position_sql, diff --git a/sqlglot/dialects/presto.py b/sqlglot/dialects/presto.py index a79a9f9..8175d6f 100644 --- a/sqlglot/dialects/presto.py +++ b/sqlglot/dialects/presto.py @@ -174,6 +174,7 @@ class Presto(Dialect): "DATE_FORMAT": format_time_lambda(exp.TimeToStr, "presto"), "DATE_PARSE": format_time_lambda(exp.StrToTime, "presto"), "FROM_UNIXTIME": _from_unixtime, + "NOW": exp.CurrentTimestamp.from_arg_list, "STRPOS": lambda args: exp.StrPosition( this=seq_get(args, 0), substr=seq_get(args, 1), @@ -194,7 +195,6 @@ class Presto(Dialect): FUNCTION_PARSERS.pop("TRIM") class Generator(generator.Generator): - STRUCT_DELIMITER = ("(", ")") ROOT_PROPERTIES = {exp.SchemaCommentProperty} diff --git a/sqlglot/dialects/redshift.py b/sqlglot/dialects/redshift.py index afd7913..7da881f 100644 --- a/sqlglot/dialects/redshift.py +++ b/sqlglot/dialects/redshift.py @@ -93,7 +93,7 @@ class Redshift(Postgres): rows = [tuple_exp.expressions for tuple_exp in expression.expressions] selects = [] for i, row in enumerate(rows): - if i == 0: + if i == 0 and expression.alias: row = [ exp.alias_(value, column_name) for value, column_name in zip(row, expression.args["alias"].args["columns"]) diff --git a/sqlglot/dialects/snowflake.py b/sqlglot/dialects/snowflake.py index 6225a53..db72a34 100644 --- a/sqlglot/dialects/snowflake.py +++ b/sqlglot/dialects/snowflake.py @@ -178,11 +178,6 @@ class Snowflake(Dialect): ), } - PROPERTY_PARSERS = { - **parser.Parser.PROPERTY_PARSERS, - TokenType.PARTITION_BY: lambda self: self._parse_partitioned_by(), - } - class Tokenizer(tokens.Tokenizer): QUOTES = ["'", "$$"] ESCAPES = ["\\", "'"] @@ -195,6 +190,7 @@ class Snowflake(Dialect): KEYWORDS = { **tokens.Tokenizer.KEYWORDS, "EXCLUDE": TokenType.EXCEPT, + "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, "RENAME": TokenType.REPLACE, "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, "TIMESTAMP_NTZ": TokenType.TIMESTAMP, diff --git a/sqlglot/dialects/spark.py b/sqlglot/dialects/spark.py index 42d34c2..fc711ab 100644 --- a/sqlglot/dialects/spark.py +++ b/sqlglot/dialects/spark.py @@ -1,7 +1,7 @@ from __future__ import annotations from sqlglot import exp, parser -from sqlglot.dialects.dialect import create_with_partitions_sql, rename_func +from sqlglot.dialects.dialect import create_with_partitions_sql, rename_func, trim_sql from sqlglot.dialects.hive import Hive from sqlglot.helper import seq_get @@ -122,6 +122,7 @@ class Spark(Hive): exp.Reduce: rename_func("AGGREGATE"), exp.StructKwarg: lambda self, e: f"{self.sql(e, 'this')}: {self.sql(e, 'expression')}", exp.TimestampTrunc: lambda self, e: f"DATE_TRUNC({self.sql(e, 'unit')}, {self.sql(e, 'this')})", + exp.Trim: trim_sql, exp.VariancePop: rename_func("VAR_POP"), exp.DateFromParts: rename_func("MAKE_DATE"), exp.LogicalOr: rename_func("BOOL_OR"), |