Edit on GitHub

sqlglot.dialects.clickhouse

  1from __future__ import annotations
  2
  3import typing as t
  4
  5from sqlglot import exp, generator, parser, tokens
  6from sqlglot.dialects.dialect import Dialect, inline_array_sql, var_map_sql
  7from sqlglot.errors import ParseError
  8from sqlglot.parser import parse_var_map
  9from sqlglot.tokens import TokenType
 10
 11
 12def _lower_func(sql: str) -> str:
 13    index = sql.index("(")
 14    return sql[:index].lower() + sql[index:]
 15
 16
 17class ClickHouse(Dialect):
 18    normalize_functions = None
 19    null_ordering = "nulls_are_last"
 20
 21    class Tokenizer(tokens.Tokenizer):
 22        COMMENTS = ["--", "#", "#!", ("/*", "*/")]
 23        IDENTIFIERS = ['"', "`"]
 24
 25        KEYWORDS = {
 26            **tokens.Tokenizer.KEYWORDS,
 27            "ASOF": TokenType.ASOF,
 28            "GLOBAL": TokenType.GLOBAL,
 29            "DATETIME64": TokenType.DATETIME,
 30            "FINAL": TokenType.FINAL,
 31            "FLOAT32": TokenType.FLOAT,
 32            "FLOAT64": TokenType.DOUBLE,
 33            "INT16": TokenType.SMALLINT,
 34            "INT32": TokenType.INT,
 35            "INT64": TokenType.BIGINT,
 36            "INT8": TokenType.TINYINT,
 37            "TUPLE": TokenType.STRUCT,
 38        }
 39
 40    class Parser(parser.Parser):
 41        FUNCTIONS = {
 42            **parser.Parser.FUNCTIONS,  # type: ignore
 43            "MAP": parse_var_map,
 44            "QUANTILE": lambda params, args: exp.Quantile(this=args, quantile=params),
 45            "QUANTILES": lambda params, args: exp.Quantiles(parameters=params, expressions=args),
 46            "QUANTILEIF": lambda params, args: exp.QuantileIf(parameters=params, expressions=args),
 47        }
 48
 49        RANGE_PARSERS = {
 50            **parser.Parser.RANGE_PARSERS,
 51            TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN)
 52            and self._parse_in(this, is_global=True),
 53        }
 54
 55        JOIN_KINDS = {*parser.Parser.JOIN_KINDS, TokenType.ANY, TokenType.ASOF}  # type: ignore
 56
 57        TABLE_ALIAS_TOKENS = {*parser.Parser.TABLE_ALIAS_TOKENS} - {TokenType.ANY}  # type: ignore
 58
 59        INTEGER_DIVISION = False
 60
 61        def _parse_in(
 62            self, this: t.Optional[exp.Expression], is_global: bool = False
 63        ) -> exp.Expression:
 64            this = super()._parse_in(this)
 65            this.set("is_global", is_global)
 66            return this
 67
 68        def _parse_table(
 69            self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
 70        ) -> t.Optional[exp.Expression]:
 71            this = super()._parse_table(schema=schema, alias_tokens=alias_tokens)
 72
 73            if self._match(TokenType.FINAL):
 74                this = self.expression(exp.Final, this=this)
 75
 76            return this
 77
 78        def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
 79            return super()._parse_position(haystack_first=True)
 80
 81        # https://clickhouse.com/docs/en/sql-reference/statements/select/with/
 82        def _parse_cte(self) -> exp.Expression:
 83            index = self._index
 84            try:
 85                # WITH <identifier> AS <subquery expression>
 86                return super()._parse_cte()
 87            except ParseError:
 88                # WITH <expression> AS <identifier>
 89                self._retreat(index)
 90                statement = self._parse_statement()
 91
 92                if statement and isinstance(statement.this, exp.Alias):
 93                    self.raise_error("Expected CTE to have alias")
 94
 95                return self.expression(exp.CTE, this=statement, alias=statement and statement.this)
 96
 97    class Generator(generator.Generator):
 98        STRUCT_DELIMITER = ("(", ")")
 99        INTEGER_DIVISION = False
100
101        TYPE_MAPPING = {
102            **generator.Generator.TYPE_MAPPING,  # type: ignore
103            exp.DataType.Type.NULLABLE: "Nullable",
104            exp.DataType.Type.DATETIME: "DateTime64",
105            exp.DataType.Type.MAP: "Map",
106            exp.DataType.Type.ARRAY: "Array",
107            exp.DataType.Type.STRUCT: "Tuple",
108            exp.DataType.Type.TINYINT: "Int8",
109            exp.DataType.Type.SMALLINT: "Int16",
110            exp.DataType.Type.INT: "Int32",
111            exp.DataType.Type.BIGINT: "Int64",
112            exp.DataType.Type.FLOAT: "Float32",
113            exp.DataType.Type.DOUBLE: "Float64",
114        }
115
116        TRANSFORMS = {
117            **generator.Generator.TRANSFORMS,  # type: ignore
118            exp.Array: inline_array_sql,
119            exp.StrPosition: lambda self, e: f"position({self.format_args(e.this, e.args.get('substr'), e.args.get('position'))})",
120            exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL",
121            exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)),
122            exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)),
123            exp.Quantile: lambda self, e: f"quantile{self._param_args_sql(e, 'quantile', 'this')}",
124            exp.Quantiles: lambda self, e: f"quantiles{self._param_args_sql(e, 'parameters', 'expressions')}",
125            exp.QuantileIf: lambda self, e: f"quantileIf{self._param_args_sql(e, 'parameters', 'expressions')}",
126        }
127
128        EXPLICIT_UNION = True
129
130        def _param_args_sql(
131            self, expression: exp.Expression, params_name: str, args_name: str
132        ) -> str:
133            params = self.format_args(self.expressions(expression, params_name))
134            args = self.format_args(self.expressions(expression, args_name))
135            return f"({params})({args})"
136
137        def cte_sql(self, expression: exp.CTE) -> str:
138            if isinstance(expression.this, exp.Alias):
139                return self.sql(expression, "this")
140
141            return super().cte_sql(expression)
class ClickHouse(sqlglot.dialects.dialect.Dialect):
 18class ClickHouse(Dialect):
 19    normalize_functions = None
 20    null_ordering = "nulls_are_last"
 21
 22    class Tokenizer(tokens.Tokenizer):
 23        COMMENTS = ["--", "#", "#!", ("/*", "*/")]
 24        IDENTIFIERS = ['"', "`"]
 25
 26        KEYWORDS = {
 27            **tokens.Tokenizer.KEYWORDS,
 28            "ASOF": TokenType.ASOF,
 29            "GLOBAL": TokenType.GLOBAL,
 30            "DATETIME64": TokenType.DATETIME,
 31            "FINAL": TokenType.FINAL,
 32            "FLOAT32": TokenType.FLOAT,
 33            "FLOAT64": TokenType.DOUBLE,
 34            "INT16": TokenType.SMALLINT,
 35            "INT32": TokenType.INT,
 36            "INT64": TokenType.BIGINT,
 37            "INT8": TokenType.TINYINT,
 38            "TUPLE": TokenType.STRUCT,
 39        }
 40
 41    class Parser(parser.Parser):
 42        FUNCTIONS = {
 43            **parser.Parser.FUNCTIONS,  # type: ignore
 44            "MAP": parse_var_map,
 45            "QUANTILE": lambda params, args: exp.Quantile(this=args, quantile=params),
 46            "QUANTILES": lambda params, args: exp.Quantiles(parameters=params, expressions=args),
 47            "QUANTILEIF": lambda params, args: exp.QuantileIf(parameters=params, expressions=args),
 48        }
 49
 50        RANGE_PARSERS = {
 51            **parser.Parser.RANGE_PARSERS,
 52            TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN)
 53            and self._parse_in(this, is_global=True),
 54        }
 55
 56        JOIN_KINDS = {*parser.Parser.JOIN_KINDS, TokenType.ANY, TokenType.ASOF}  # type: ignore
 57
 58        TABLE_ALIAS_TOKENS = {*parser.Parser.TABLE_ALIAS_TOKENS} - {TokenType.ANY}  # type: ignore
 59
 60        INTEGER_DIVISION = False
 61
 62        def _parse_in(
 63            self, this: t.Optional[exp.Expression], is_global: bool = False
 64        ) -> exp.Expression:
 65            this = super()._parse_in(this)
 66            this.set("is_global", is_global)
 67            return this
 68
 69        def _parse_table(
 70            self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
 71        ) -> t.Optional[exp.Expression]:
 72            this = super()._parse_table(schema=schema, alias_tokens=alias_tokens)
 73
 74            if self._match(TokenType.FINAL):
 75                this = self.expression(exp.Final, this=this)
 76
 77            return this
 78
 79        def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
 80            return super()._parse_position(haystack_first=True)
 81
 82        # https://clickhouse.com/docs/en/sql-reference/statements/select/with/
 83        def _parse_cte(self) -> exp.Expression:
 84            index = self._index
 85            try:
 86                # WITH <identifier> AS <subquery expression>
 87                return super()._parse_cte()
 88            except ParseError:
 89                # WITH <expression> AS <identifier>
 90                self._retreat(index)
 91                statement = self._parse_statement()
 92
 93                if statement and isinstance(statement.this, exp.Alias):
 94                    self.raise_error("Expected CTE to have alias")
 95
 96                return self.expression(exp.CTE, this=statement, alias=statement and statement.this)
 97
 98    class Generator(generator.Generator):
 99        STRUCT_DELIMITER = ("(", ")")
100        INTEGER_DIVISION = False
101
102        TYPE_MAPPING = {
103            **generator.Generator.TYPE_MAPPING,  # type: ignore
104            exp.DataType.Type.NULLABLE: "Nullable",
105            exp.DataType.Type.DATETIME: "DateTime64",
106            exp.DataType.Type.MAP: "Map",
107            exp.DataType.Type.ARRAY: "Array",
108            exp.DataType.Type.STRUCT: "Tuple",
109            exp.DataType.Type.TINYINT: "Int8",
110            exp.DataType.Type.SMALLINT: "Int16",
111            exp.DataType.Type.INT: "Int32",
112            exp.DataType.Type.BIGINT: "Int64",
113            exp.DataType.Type.FLOAT: "Float32",
114            exp.DataType.Type.DOUBLE: "Float64",
115        }
116
117        TRANSFORMS = {
118            **generator.Generator.TRANSFORMS,  # type: ignore
119            exp.Array: inline_array_sql,
120            exp.StrPosition: lambda self, e: f"position({self.format_args(e.this, e.args.get('substr'), e.args.get('position'))})",
121            exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL",
122            exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)),
123            exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)),
124            exp.Quantile: lambda self, e: f"quantile{self._param_args_sql(e, 'quantile', 'this')}",
125            exp.Quantiles: lambda self, e: f"quantiles{self._param_args_sql(e, 'parameters', 'expressions')}",
126            exp.QuantileIf: lambda self, e: f"quantileIf{self._param_args_sql(e, 'parameters', 'expressions')}",
127        }
128
129        EXPLICIT_UNION = True
130
131        def _param_args_sql(
132            self, expression: exp.Expression, params_name: str, args_name: str
133        ) -> str:
134            params = self.format_args(self.expressions(expression, params_name))
135            args = self.format_args(self.expressions(expression, args_name))
136            return f"({params})({args})"
137
138        def cte_sql(self, expression: exp.CTE) -> str:
139            if isinstance(expression.this, exp.Alias):
140                return self.sql(expression, "this")
141
142            return super().cte_sql(expression)
class ClickHouse.Tokenizer(sqlglot.tokens.Tokenizer):
22    class Tokenizer(tokens.Tokenizer):
23        COMMENTS = ["--", "#", "#!", ("/*", "*/")]
24        IDENTIFIERS = ['"', "`"]
25
26        KEYWORDS = {
27            **tokens.Tokenizer.KEYWORDS,
28            "ASOF": TokenType.ASOF,
29            "GLOBAL": TokenType.GLOBAL,
30            "DATETIME64": TokenType.DATETIME,
31            "FINAL": TokenType.FINAL,
32            "FLOAT32": TokenType.FLOAT,
33            "FLOAT64": TokenType.DOUBLE,
34            "INT16": TokenType.SMALLINT,
35            "INT32": TokenType.INT,
36            "INT64": TokenType.BIGINT,
37            "INT8": TokenType.TINYINT,
38            "TUPLE": TokenType.STRUCT,
39        }
class ClickHouse.Parser(sqlglot.parser.Parser):
41    class Parser(parser.Parser):
42        FUNCTIONS = {
43            **parser.Parser.FUNCTIONS,  # type: ignore
44            "MAP": parse_var_map,
45            "QUANTILE": lambda params, args: exp.Quantile(this=args, quantile=params),
46            "QUANTILES": lambda params, args: exp.Quantiles(parameters=params, expressions=args),
47            "QUANTILEIF": lambda params, args: exp.QuantileIf(parameters=params, expressions=args),
48        }
49
50        RANGE_PARSERS = {
51            **parser.Parser.RANGE_PARSERS,
52            TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN)
53            and self._parse_in(this, is_global=True),
54        }
55
56        JOIN_KINDS = {*parser.Parser.JOIN_KINDS, TokenType.ANY, TokenType.ASOF}  # type: ignore
57
58        TABLE_ALIAS_TOKENS = {*parser.Parser.TABLE_ALIAS_TOKENS} - {TokenType.ANY}  # type: ignore
59
60        INTEGER_DIVISION = False
61
62        def _parse_in(
63            self, this: t.Optional[exp.Expression], is_global: bool = False
64        ) -> exp.Expression:
65            this = super()._parse_in(this)
66            this.set("is_global", is_global)
67            return this
68
69        def _parse_table(
70            self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
71        ) -> t.Optional[exp.Expression]:
72            this = super()._parse_table(schema=schema, alias_tokens=alias_tokens)
73
74            if self._match(TokenType.FINAL):
75                this = self.expression(exp.Final, this=this)
76
77            return this
78
79        def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
80            return super()._parse_position(haystack_first=True)
81
82        # https://clickhouse.com/docs/en/sql-reference/statements/select/with/
83        def _parse_cte(self) -> exp.Expression:
84            index = self._index
85            try:
86                # WITH <identifier> AS <subquery expression>
87                return super()._parse_cte()
88            except ParseError:
89                # WITH <expression> AS <identifier>
90                self._retreat(index)
91                statement = self._parse_statement()
92
93                if statement and isinstance(statement.this, exp.Alias):
94                    self.raise_error("Expected CTE to have alias")
95
96                return self.expression(exp.CTE, this=statement, alias=statement and statement.this)

Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: the desired error level. Default: ErrorLevel.RAISE
  • error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
  • index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
  • alias_post_tablesample: If the table alias comes after tablesample. Default: False
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
  • null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
class ClickHouse.Generator(sqlglot.generator.Generator):
 98    class Generator(generator.Generator):
 99        STRUCT_DELIMITER = ("(", ")")
100        INTEGER_DIVISION = False
101
102        TYPE_MAPPING = {
103            **generator.Generator.TYPE_MAPPING,  # type: ignore
104            exp.DataType.Type.NULLABLE: "Nullable",
105            exp.DataType.Type.DATETIME: "DateTime64",
106            exp.DataType.Type.MAP: "Map",
107            exp.DataType.Type.ARRAY: "Array",
108            exp.DataType.Type.STRUCT: "Tuple",
109            exp.DataType.Type.TINYINT: "Int8",
110            exp.DataType.Type.SMALLINT: "Int16",
111            exp.DataType.Type.INT: "Int32",
112            exp.DataType.Type.BIGINT: "Int64",
113            exp.DataType.Type.FLOAT: "Float32",
114            exp.DataType.Type.DOUBLE: "Float64",
115        }
116
117        TRANSFORMS = {
118            **generator.Generator.TRANSFORMS,  # type: ignore
119            exp.Array: inline_array_sql,
120            exp.StrPosition: lambda self, e: f"position({self.format_args(e.this, e.args.get('substr'), e.args.get('position'))})",
121            exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL",
122            exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)),
123            exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)),
124            exp.Quantile: lambda self, e: f"quantile{self._param_args_sql(e, 'quantile', 'this')}",
125            exp.Quantiles: lambda self, e: f"quantiles{self._param_args_sql(e, 'parameters', 'expressions')}",
126            exp.QuantileIf: lambda self, e: f"quantileIf{self._param_args_sql(e, 'parameters', 'expressions')}",
127        }
128
129        EXPLICIT_UNION = True
130
131        def _param_args_sql(
132            self, expression: exp.Expression, params_name: str, args_name: str
133        ) -> str:
134            params = self.format_args(self.expressions(expression, params_name))
135            args = self.format_args(self.expressions(expression, args_name))
136            return f"({params})({args})"
137
138        def cte_sql(self, expression: exp.CTE) -> str:
139            if isinstance(expression.this, exp.Alias):
140                return self.sql(expression, "this")
141
142            return super().cte_sql(expression)

Generator interprets the given syntax tree and produces a SQL string as an output.

Arguments:
  • time_mapping (dict): the dictionary of custom time mappings in which the key represents a python time format and the output the target time format
  • time_trie (trie): a trie of the time_mapping keys
  • pretty (bool): if set to True the returned string will be formatted. Default: False.
  • quote_start (str): specifies which starting character to use to delimit quotes. Default: '.
  • quote_end (str): specifies which ending character to use to delimit quotes. Default: '.
  • identifier_start (str): specifies which starting character to use to delimit identifiers. Default: ".
  • identifier_end (str): specifies which ending character to use to delimit identifiers. Default: ".
  • identify (bool): if set to True all identifiers will be delimited by the corresponding character.
  • normalize (bool): if set to True all identifiers will lower cased
  • string_escape (str): specifies a string escape character. Default: '.
  • identifier_escape (str): specifies an identifier escape character. Default: ".
  • pad (int): determines padding in a formatted string. Default: 2.
  • indent (int): determines the size of indentation in a formatted string. Default: 4.
  • unnest_column_only (bool): if true unnest table aliases are considered only as column aliases
  • normalize_functions (str): normalize function names, "upper", "lower", or None Default: "upper"
  • alias_post_tablesample (bool): if the table alias comes after tablesample Default: False
  • unsupported_level (ErrorLevel): determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
  • null_ordering (str): Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
  • max_unsupported (int): Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
  • leading_comma (bool): if the the comma is leading or trailing in select statements Default: False
  • max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
  • comments: Whether or not to preserve comments in the output SQL code. Default: True
def cte_sql(self, expression: sqlglot.expressions.CTE) -> str:
138        def cte_sql(self, expression: exp.CTE) -> str:
139            if isinstance(expression.this, exp.Alias):
140                return self.sql(expression, "this")
141
142            return super().cte_sql(expression)
Inherited Members
sqlglot.generator.Generator
Generator
generate
unsupported
sep
seg
pad_comment
maybe_comment
wrap
no_identify
normalize_func
indent
sql
uncache_sql
cache_sql
characterset_sql
column_sql
columndef_sql
columnconstraint_sql
autoincrementcolumnconstraint_sql
compresscolumnconstraint_sql
generatedasidentitycolumnconstraint_sql
notnullcolumnconstraint_sql
primarykeycolumnconstraint_sql
uniquecolumnconstraint_sql
create_sql
describe_sql
prepend_ctes
with_sql
tablealias_sql
bitstring_sql
hexstring_sql
datatype_sql
directory_sql
delete_sql
drop_sql
except_sql
except_op
fetch_sql
filter_sql
hint_sql
index_sql
identifier_sql
national_sql
partition_sql
properties_sql
root_properties
properties
with_properties
locate_properties
property_sql
likeproperty_sql
fallbackproperty_sql
journalproperty_sql
freespaceproperty_sql
afterjournalproperty_sql
checksumproperty_sql
mergeblockratioproperty_sql
datablocksizeproperty_sql
blockcompressionproperty_sql
isolatedloadingproperty_sql
lockingproperty_sql
withdataproperty_sql
insert_sql
intersect_sql
intersect_op
introducer_sql
pseudotype_sql
returning_sql
rowformatdelimitedproperty_sql
table_sql
tablesample_sql
pivot_sql
tuple_sql
update_sql
values_sql
var_sql
into_sql
from_sql
group_sql
having_sql
join_sql
lambda_sql
lateral_sql
limit_sql
offset_sql
lock_sql
literal_sql
loaddata_sql
null_sql
boolean_sql
order_sql
cluster_sql
distribute_sql
sort_sql
ordered_sql
matchrecognize_sql
query_modifiers
select_sql
schema_sql
star_sql
structkwarg_sql
parameter_sql
sessionparameter_sql
placeholder_sql
subquery_sql
qualify_sql
union_sql
union_op
unnest_sql
where_sql
window_sql
partition_by_sql
window_spec_sql
withingroup_sql
between_sql
bracket_sql
all_sql
any_sql
exists_sql
case_sql
constraint_sql
extract_sql
trim_sql
concat_sql
check_sql
foreignkey_sql
primarykey_sql
unique_sql
if_sql
in_sql
in_unnest_op
interval_sql
return_sql
reference_sql
anonymous_sql
paren_sql
neg_sql
not_sql
alias_sql
aliases_sql
attimezone_sql
add_sql
and_sql
connector_sql
bitwiseand_sql
bitwiseleftshift_sql
bitwisenot_sql
bitwiseor_sql
bitwiserightshift_sql
bitwisexor_sql
cast_sql
currentdate_sql
collate_sql
command_sql
comment_sql
transaction_sql
commit_sql
rollback_sql
altercolumn_sql
renametable_sql
altertable_sql
droppartition_sql
addconstraint_sql
distinct_sql
ignorenulls_sql
respectnulls_sql
intdiv_sql
dpipe_sql
div_sql
floatdiv_sql
overlaps_sql
distance_sql
dot_sql
eq_sql
escape_sql
glob_sql
gt_sql
gte_sql
ilike_sql
is_sql
like_sql
similarto_sql
lt_sql
lte_sql
mod_sql
mul_sql
neq_sql
nullsafeeq_sql
nullsafeneq_sql
or_sql
slice_sql
sub_sql
trycast_sql
use_sql
binary
function_fallback_sql
func
format_args
text_width
format_time
expressions
op_expressions
naked_property
set_operation
tag_sql
token_sql
userdefinedfunction_sql
joinhint_sql
kwarg_sql
when_sql
merge_sql