Edit on GitHub

sqlglot.dialects.clickhouse

  1from __future__ import annotations
  2
  3import typing as t
  4
  5from sqlglot import exp, generator, parser, tokens
  6from sqlglot.dialects.dialect import Dialect, inline_array_sql, var_map_sql
  7from sqlglot.errors import ParseError
  8from sqlglot.parser import parse_var_map
  9from sqlglot.tokens import TokenType
 10
 11
 12def _lower_func(sql: str) -> str:
 13    index = sql.index("(")
 14    return sql[:index].lower() + sql[index:]
 15
 16
 17class ClickHouse(Dialect):
 18    normalize_functions = None
 19    null_ordering = "nulls_are_last"
 20
 21    class Tokenizer(tokens.Tokenizer):
 22        COMMENTS = ["--", "#", "#!", ("/*", "*/")]
 23        IDENTIFIERS = ['"', "`"]
 24
 25        KEYWORDS = {
 26            **tokens.Tokenizer.KEYWORDS,
 27            "ASOF": TokenType.ASOF,
 28            "GLOBAL": TokenType.GLOBAL,
 29            "DATETIME64": TokenType.DATETIME,
 30            "FINAL": TokenType.FINAL,
 31            "FLOAT32": TokenType.FLOAT,
 32            "FLOAT64": TokenType.DOUBLE,
 33            "INT16": TokenType.SMALLINT,
 34            "INT32": TokenType.INT,
 35            "INT64": TokenType.BIGINT,
 36            "INT8": TokenType.TINYINT,
 37            "TUPLE": TokenType.STRUCT,
 38        }
 39
 40    class Parser(parser.Parser):
 41        FUNCTIONS = {
 42            **parser.Parser.FUNCTIONS,  # type: ignore
 43            "MAP": parse_var_map,
 44            "QUANTILE": lambda params, args: exp.Quantile(this=args, quantile=params),
 45            "QUANTILES": lambda params, args: exp.Quantiles(parameters=params, expressions=args),
 46            "QUANTILEIF": lambda params, args: exp.QuantileIf(parameters=params, expressions=args),
 47        }
 48
 49        RANGE_PARSERS = {
 50            **parser.Parser.RANGE_PARSERS,
 51            TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN)
 52            and self._parse_in(this, is_global=True),
 53        }
 54
 55        JOIN_KINDS = {*parser.Parser.JOIN_KINDS, TokenType.ANY, TokenType.ASOF}  # type: ignore
 56
 57        TABLE_ALIAS_TOKENS = {*parser.Parser.TABLE_ALIAS_TOKENS} - {TokenType.ANY}  # type: ignore
 58
 59        def _parse_in(
 60            self, this: t.Optional[exp.Expression], is_global: bool = False
 61        ) -> exp.Expression:
 62            this = super()._parse_in(this)
 63            this.set("is_global", is_global)
 64            return this
 65
 66        def _parse_table(
 67            self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
 68        ) -> t.Optional[exp.Expression]:
 69            this = super()._parse_table(schema=schema, alias_tokens=alias_tokens)
 70
 71            if self._match(TokenType.FINAL):
 72                this = self.expression(exp.Final, this=this)
 73
 74            return this
 75
 76        def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
 77            return super()._parse_position(haystack_first=True)
 78
 79        # https://clickhouse.com/docs/en/sql-reference/statements/select/with/
 80        def _parse_cte(self) -> exp.Expression:
 81            index = self._index
 82            try:
 83                # WITH <identifier> AS <subquery expression>
 84                return super()._parse_cte()
 85            except ParseError:
 86                # WITH <expression> AS <identifier>
 87                self._retreat(index)
 88                statement = self._parse_statement()
 89
 90                if statement and isinstance(statement.this, exp.Alias):
 91                    self.raise_error("Expected CTE to have alias")
 92
 93                return self.expression(exp.CTE, this=statement, alias=statement and statement.this)
 94
 95    class Generator(generator.Generator):
 96        STRUCT_DELIMITER = ("(", ")")
 97
 98        TYPE_MAPPING = {
 99            **generator.Generator.TYPE_MAPPING,  # type: ignore
100            exp.DataType.Type.NULLABLE: "Nullable",
101            exp.DataType.Type.DATETIME: "DateTime64",
102            exp.DataType.Type.MAP: "Map",
103            exp.DataType.Type.ARRAY: "Array",
104            exp.DataType.Type.STRUCT: "Tuple",
105            exp.DataType.Type.TINYINT: "Int8",
106            exp.DataType.Type.SMALLINT: "Int16",
107            exp.DataType.Type.INT: "Int32",
108            exp.DataType.Type.BIGINT: "Int64",
109            exp.DataType.Type.FLOAT: "Float32",
110            exp.DataType.Type.DOUBLE: "Float64",
111        }
112
113        TRANSFORMS = {
114            **generator.Generator.TRANSFORMS,  # type: ignore
115            exp.Array: inline_array_sql,
116            exp.StrPosition: lambda self, e: f"position({self.format_args(e.this, e.args.get('substr'), e.args.get('position'))})",
117            exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL",
118            exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)),
119            exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)),
120            exp.Quantile: lambda self, e: f"quantile{self._param_args_sql(e, 'quantile', 'this')}",
121            exp.Quantiles: lambda self, e: f"quantiles{self._param_args_sql(e, 'parameters', 'expressions')}",
122            exp.QuantileIf: lambda self, e: f"quantileIf{self._param_args_sql(e, 'parameters', 'expressions')}",
123        }
124
125        EXPLICIT_UNION = True
126
127        def _param_args_sql(
128            self, expression: exp.Expression, params_name: str, args_name: str
129        ) -> str:
130            params = self.format_args(self.expressions(expression, params_name))
131            args = self.format_args(self.expressions(expression, args_name))
132            return f"({params})({args})"
133
134        def cte_sql(self, expression: exp.CTE) -> str:
135            if isinstance(expression.this, exp.Alias):
136                return self.sql(expression, "this")
137
138            return super().cte_sql(expression)
class ClickHouse(sqlglot.dialects.dialect.Dialect):
 18class ClickHouse(Dialect):
 19    normalize_functions = None
 20    null_ordering = "nulls_are_last"
 21
 22    class Tokenizer(tokens.Tokenizer):
 23        COMMENTS = ["--", "#", "#!", ("/*", "*/")]
 24        IDENTIFIERS = ['"', "`"]
 25
 26        KEYWORDS = {
 27            **tokens.Tokenizer.KEYWORDS,
 28            "ASOF": TokenType.ASOF,
 29            "GLOBAL": TokenType.GLOBAL,
 30            "DATETIME64": TokenType.DATETIME,
 31            "FINAL": TokenType.FINAL,
 32            "FLOAT32": TokenType.FLOAT,
 33            "FLOAT64": TokenType.DOUBLE,
 34            "INT16": TokenType.SMALLINT,
 35            "INT32": TokenType.INT,
 36            "INT64": TokenType.BIGINT,
 37            "INT8": TokenType.TINYINT,
 38            "TUPLE": TokenType.STRUCT,
 39        }
 40
 41    class Parser(parser.Parser):
 42        FUNCTIONS = {
 43            **parser.Parser.FUNCTIONS,  # type: ignore
 44            "MAP": parse_var_map,
 45            "QUANTILE": lambda params, args: exp.Quantile(this=args, quantile=params),
 46            "QUANTILES": lambda params, args: exp.Quantiles(parameters=params, expressions=args),
 47            "QUANTILEIF": lambda params, args: exp.QuantileIf(parameters=params, expressions=args),
 48        }
 49
 50        RANGE_PARSERS = {
 51            **parser.Parser.RANGE_PARSERS,
 52            TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN)
 53            and self._parse_in(this, is_global=True),
 54        }
 55
 56        JOIN_KINDS = {*parser.Parser.JOIN_KINDS, TokenType.ANY, TokenType.ASOF}  # type: ignore
 57
 58        TABLE_ALIAS_TOKENS = {*parser.Parser.TABLE_ALIAS_TOKENS} - {TokenType.ANY}  # type: ignore
 59
 60        def _parse_in(
 61            self, this: t.Optional[exp.Expression], is_global: bool = False
 62        ) -> exp.Expression:
 63            this = super()._parse_in(this)
 64            this.set("is_global", is_global)
 65            return this
 66
 67        def _parse_table(
 68            self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
 69        ) -> t.Optional[exp.Expression]:
 70            this = super()._parse_table(schema=schema, alias_tokens=alias_tokens)
 71
 72            if self._match(TokenType.FINAL):
 73                this = self.expression(exp.Final, this=this)
 74
 75            return this
 76
 77        def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
 78            return super()._parse_position(haystack_first=True)
 79
 80        # https://clickhouse.com/docs/en/sql-reference/statements/select/with/
 81        def _parse_cte(self) -> exp.Expression:
 82            index = self._index
 83            try:
 84                # WITH <identifier> AS <subquery expression>
 85                return super()._parse_cte()
 86            except ParseError:
 87                # WITH <expression> AS <identifier>
 88                self._retreat(index)
 89                statement = self._parse_statement()
 90
 91                if statement and isinstance(statement.this, exp.Alias):
 92                    self.raise_error("Expected CTE to have alias")
 93
 94                return self.expression(exp.CTE, this=statement, alias=statement and statement.this)
 95
 96    class Generator(generator.Generator):
 97        STRUCT_DELIMITER = ("(", ")")
 98
 99        TYPE_MAPPING = {
100            **generator.Generator.TYPE_MAPPING,  # type: ignore
101            exp.DataType.Type.NULLABLE: "Nullable",
102            exp.DataType.Type.DATETIME: "DateTime64",
103            exp.DataType.Type.MAP: "Map",
104            exp.DataType.Type.ARRAY: "Array",
105            exp.DataType.Type.STRUCT: "Tuple",
106            exp.DataType.Type.TINYINT: "Int8",
107            exp.DataType.Type.SMALLINT: "Int16",
108            exp.DataType.Type.INT: "Int32",
109            exp.DataType.Type.BIGINT: "Int64",
110            exp.DataType.Type.FLOAT: "Float32",
111            exp.DataType.Type.DOUBLE: "Float64",
112        }
113
114        TRANSFORMS = {
115            **generator.Generator.TRANSFORMS,  # type: ignore
116            exp.Array: inline_array_sql,
117            exp.StrPosition: lambda self, e: f"position({self.format_args(e.this, e.args.get('substr'), e.args.get('position'))})",
118            exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL",
119            exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)),
120            exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)),
121            exp.Quantile: lambda self, e: f"quantile{self._param_args_sql(e, 'quantile', 'this')}",
122            exp.Quantiles: lambda self, e: f"quantiles{self._param_args_sql(e, 'parameters', 'expressions')}",
123            exp.QuantileIf: lambda self, e: f"quantileIf{self._param_args_sql(e, 'parameters', 'expressions')}",
124        }
125
126        EXPLICIT_UNION = True
127
128        def _param_args_sql(
129            self, expression: exp.Expression, params_name: str, args_name: str
130        ) -> str:
131            params = self.format_args(self.expressions(expression, params_name))
132            args = self.format_args(self.expressions(expression, args_name))
133            return f"({params})({args})"
134
135        def cte_sql(self, expression: exp.CTE) -> str:
136            if isinstance(expression.this, exp.Alias):
137                return self.sql(expression, "this")
138
139            return super().cte_sql(expression)
class ClickHouse.Tokenizer(sqlglot.tokens.Tokenizer):
22    class Tokenizer(tokens.Tokenizer):
23        COMMENTS = ["--", "#", "#!", ("/*", "*/")]
24        IDENTIFIERS = ['"', "`"]
25
26        KEYWORDS = {
27            **tokens.Tokenizer.KEYWORDS,
28            "ASOF": TokenType.ASOF,
29            "GLOBAL": TokenType.GLOBAL,
30            "DATETIME64": TokenType.DATETIME,
31            "FINAL": TokenType.FINAL,
32            "FLOAT32": TokenType.FLOAT,
33            "FLOAT64": TokenType.DOUBLE,
34            "INT16": TokenType.SMALLINT,
35            "INT32": TokenType.INT,
36            "INT64": TokenType.BIGINT,
37            "INT8": TokenType.TINYINT,
38            "TUPLE": TokenType.STRUCT,
39        }
class ClickHouse.Parser(sqlglot.parser.Parser):
41    class Parser(parser.Parser):
42        FUNCTIONS = {
43            **parser.Parser.FUNCTIONS,  # type: ignore
44            "MAP": parse_var_map,
45            "QUANTILE": lambda params, args: exp.Quantile(this=args, quantile=params),
46            "QUANTILES": lambda params, args: exp.Quantiles(parameters=params, expressions=args),
47            "QUANTILEIF": lambda params, args: exp.QuantileIf(parameters=params, expressions=args),
48        }
49
50        RANGE_PARSERS = {
51            **parser.Parser.RANGE_PARSERS,
52            TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN)
53            and self._parse_in(this, is_global=True),
54        }
55
56        JOIN_KINDS = {*parser.Parser.JOIN_KINDS, TokenType.ANY, TokenType.ASOF}  # type: ignore
57
58        TABLE_ALIAS_TOKENS = {*parser.Parser.TABLE_ALIAS_TOKENS} - {TokenType.ANY}  # type: ignore
59
60        def _parse_in(
61            self, this: t.Optional[exp.Expression], is_global: bool = False
62        ) -> exp.Expression:
63            this = super()._parse_in(this)
64            this.set("is_global", is_global)
65            return this
66
67        def _parse_table(
68            self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
69        ) -> t.Optional[exp.Expression]:
70            this = super()._parse_table(schema=schema, alias_tokens=alias_tokens)
71
72            if self._match(TokenType.FINAL):
73                this = self.expression(exp.Final, this=this)
74
75            return this
76
77        def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
78            return super()._parse_position(haystack_first=True)
79
80        # https://clickhouse.com/docs/en/sql-reference/statements/select/with/
81        def _parse_cte(self) -> exp.Expression:
82            index = self._index
83            try:
84                # WITH <identifier> AS <subquery expression>
85                return super()._parse_cte()
86            except ParseError:
87                # WITH <expression> AS <identifier>
88                self._retreat(index)
89                statement = self._parse_statement()
90
91                if statement and isinstance(statement.this, exp.Alias):
92                    self.raise_error("Expected CTE to have alias")
93
94                return self.expression(exp.CTE, this=statement, alias=statement and statement.this)

Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: the desired error level. Default: ErrorLevel.RAISE
  • error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
  • index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
  • alias_post_tablesample: If the table alias comes after tablesample. Default: False
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
  • null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
class ClickHouse.Generator(sqlglot.generator.Generator):
 96    class Generator(generator.Generator):
 97        STRUCT_DELIMITER = ("(", ")")
 98
 99        TYPE_MAPPING = {
100            **generator.Generator.TYPE_MAPPING,  # type: ignore
101            exp.DataType.Type.NULLABLE: "Nullable",
102            exp.DataType.Type.DATETIME: "DateTime64",
103            exp.DataType.Type.MAP: "Map",
104            exp.DataType.Type.ARRAY: "Array",
105            exp.DataType.Type.STRUCT: "Tuple",
106            exp.DataType.Type.TINYINT: "Int8",
107            exp.DataType.Type.SMALLINT: "Int16",
108            exp.DataType.Type.INT: "Int32",
109            exp.DataType.Type.BIGINT: "Int64",
110            exp.DataType.Type.FLOAT: "Float32",
111            exp.DataType.Type.DOUBLE: "Float64",
112        }
113
114        TRANSFORMS = {
115            **generator.Generator.TRANSFORMS,  # type: ignore
116            exp.Array: inline_array_sql,
117            exp.StrPosition: lambda self, e: f"position({self.format_args(e.this, e.args.get('substr'), e.args.get('position'))})",
118            exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL",
119            exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)),
120            exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)),
121            exp.Quantile: lambda self, e: f"quantile{self._param_args_sql(e, 'quantile', 'this')}",
122            exp.Quantiles: lambda self, e: f"quantiles{self._param_args_sql(e, 'parameters', 'expressions')}",
123            exp.QuantileIf: lambda self, e: f"quantileIf{self._param_args_sql(e, 'parameters', 'expressions')}",
124        }
125
126        EXPLICIT_UNION = True
127
128        def _param_args_sql(
129            self, expression: exp.Expression, params_name: str, args_name: str
130        ) -> str:
131            params = self.format_args(self.expressions(expression, params_name))
132            args = self.format_args(self.expressions(expression, args_name))
133            return f"({params})({args})"
134
135        def cte_sql(self, expression: exp.CTE) -> str:
136            if isinstance(expression.this, exp.Alias):
137                return self.sql(expression, "this")
138
139            return super().cte_sql(expression)

Generator interprets the given syntax tree and produces a SQL string as an output.

Arguments:
  • time_mapping (dict): the dictionary of custom time mappings in which the key represents a python time format and the output the target time format
  • time_trie (trie): a trie of the time_mapping keys
  • pretty (bool): if set to True the returned string will be formatted. Default: False.
  • quote_start (str): specifies which starting character to use to delimit quotes. Default: '.
  • quote_end (str): specifies which ending character to use to delimit quotes. Default: '.
  • identifier_start (str): specifies which starting character to use to delimit identifiers. Default: ".
  • identifier_end (str): specifies which ending character to use to delimit identifiers. Default: ".
  • identify (bool): if set to True all identifiers will be delimited by the corresponding character.
  • normalize (bool): if set to True all identifiers will lower cased
  • string_escape (str): specifies a string escape character. Default: '.
  • identifier_escape (str): specifies an identifier escape character. Default: ".
  • pad (int): determines padding in a formatted string. Default: 2.
  • indent (int): determines the size of indentation in a formatted string. Default: 4.
  • unnest_column_only (bool): if true unnest table aliases are considered only as column aliases
  • normalize_functions (str): normalize function names, "upper", "lower", or None Default: "upper"
  • alias_post_tablesample (bool): if the table alias comes after tablesample Default: False
  • unsupported_level (ErrorLevel): determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
  • null_ordering (str): Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
  • max_unsupported (int): Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
  • leading_comma (bool): if the the comma is leading or trailing in select statements Default: False
  • max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
  • comments: Whether or not to preserve comments in the output SQL code. Default: True
def cte_sql(self, expression: sqlglot.expressions.CTE) -> str:
135        def cte_sql(self, expression: exp.CTE) -> str:
136            if isinstance(expression.this, exp.Alias):
137                return self.sql(expression, "this")
138
139            return super().cte_sql(expression)
Inherited Members
sqlglot.generator.Generator
Generator
generate
unsupported
sep
seg
pad_comment
maybe_comment
wrap
no_identify
normalize_func
indent
sql
uncache_sql
cache_sql
characterset_sql
column_sql
columndef_sql
columnconstraint_sql
autoincrementcolumnconstraint_sql
compresscolumnconstraint_sql
generatedasidentitycolumnconstraint_sql
notnullcolumnconstraint_sql
primarykeycolumnconstraint_sql
uniquecolumnconstraint_sql
create_sql
describe_sql
prepend_ctes
with_sql
tablealias_sql
bitstring_sql
hexstring_sql
datatype_sql
directory_sql
delete_sql
drop_sql
except_sql
except_op
fetch_sql
filter_sql
hint_sql
index_sql
identifier_sql
national_sql
partition_sql
properties_sql
root_properties
properties
with_properties
locate_properties
property_sql
likeproperty_sql
fallbackproperty_sql
journalproperty_sql
freespaceproperty_sql
afterjournalproperty_sql
checksumproperty_sql
mergeblockratioproperty_sql
datablocksizeproperty_sql
blockcompressionproperty_sql
isolatedloadingproperty_sql
lockingproperty_sql
withdataproperty_sql
insert_sql
intersect_sql
intersect_op
introducer_sql
pseudotype_sql
rowformatdelimitedproperty_sql
table_sql
tablesample_sql
pivot_sql
tuple_sql
update_sql
values_sql
var_sql
into_sql
from_sql
group_sql
having_sql
join_sql
lambda_sql
lateral_sql
limit_sql
offset_sql
lock_sql
literal_sql
loaddata_sql
null_sql
boolean_sql
order_sql
cluster_sql
distribute_sql
sort_sql
ordered_sql
matchrecognize_sql
query_modifiers
select_sql
schema_sql
star_sql
structkwarg_sql
parameter_sql
sessionparameter_sql
placeholder_sql
subquery_sql
qualify_sql
union_sql
union_op
unnest_sql
where_sql
window_sql
partition_by_sql
window_spec_sql
withingroup_sql
between_sql
bracket_sql
all_sql
any_sql
exists_sql
case_sql
constraint_sql
extract_sql
trim_sql
concat_sql
check_sql
foreignkey_sql
primarykey_sql
unique_sql
if_sql
in_sql
in_unnest_op
interval_sql
return_sql
reference_sql
anonymous_sql
paren_sql
neg_sql
not_sql
alias_sql
aliases_sql
attimezone_sql
add_sql
and_sql
connector_sql
bitwiseand_sql
bitwiseleftshift_sql
bitwisenot_sql
bitwiseor_sql
bitwiserightshift_sql
bitwisexor_sql
cast_sql
currentdate_sql
collate_sql
command_sql
transaction_sql
commit_sql
rollback_sql
altercolumn_sql
renametable_sql
altertable_sql
droppartition_sql
addconstraint_sql
distinct_sql
ignorenulls_sql
respectnulls_sql
intdiv_sql
dpipe_sql
div_sql
distance_sql
dot_sql
eq_sql
escape_sql
glob_sql
gt_sql
gte_sql
ilike_sql
is_sql
like_sql
similarto_sql
lt_sql
lte_sql
mod_sql
mul_sql
neq_sql
nullsafeeq_sql
nullsafeneq_sql
or_sql
slice_sql
sub_sql
trycast_sql
use_sql
binary
function_fallback_sql
func
format_args
text_width
format_time
expressions
op_expressions
naked_property
set_operation
tag_sql
token_sql
userdefinedfunction_sql
joinhint_sql
kwarg_sql
when_sql
merge_sql