summaryrefslogtreecommitdiffstats
path: root/sqlglot/dialects/clickhouse.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-01-23 05:06:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-01-23 05:06:14 +0000
commit38e6461a8afbd7cb83709ddb998f03d40ba87755 (patch)
tree64b68a893a3b946111b9cab69503f83ca233c335 /sqlglot/dialects/clickhouse.py
parentReleasing debian version 20.4.0-1. (diff)
downloadsqlglot-38e6461a8afbd7cb83709ddb998f03d40ba87755.tar.xz
sqlglot-38e6461a8afbd7cb83709ddb998f03d40ba87755.zip
Merging upstream version 20.9.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/dialects/clickhouse.py')
-rw-r--r--sqlglot/dialects/clickhouse.py201
1 files changed, 192 insertions, 9 deletions
diff --git a/sqlglot/dialects/clickhouse.py b/sqlglot/dialects/clickhouse.py
index 870f402..f2e4fe1 100644
--- a/sqlglot/dialects/clickhouse.py
+++ b/sqlglot/dialects/clickhouse.py
@@ -6,6 +6,7 @@ from sqlglot import exp, generator, parser, tokens, transforms
from sqlglot.dialects.dialect import (
Dialect,
arg_max_or_min_no_count,
+ date_delta_sql,
inline_array_sql,
no_pivot_sql,
rename_func,
@@ -22,16 +23,25 @@ def _lower_func(sql: str) -> str:
return sql[:index].lower() + sql[index:]
-def _quantile_sql(self, e):
+def _quantile_sql(self: ClickHouse.Generator, e: exp.Quantile) -> str:
quantile = e.args["quantile"]
args = f"({self.sql(e, 'this')})"
+
if isinstance(quantile, exp.Array):
func = self.func("quantiles", *quantile)
else:
func = self.func("quantile", quantile)
+
return func + args
+def _parse_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc:
+ if len(args) == 1:
+ return exp.CountIf(this=seq_get(args, 0))
+
+ return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If"))
+
+
class ClickHouse(Dialect):
NORMALIZE_FUNCTIONS: bool | str = False
NULL_ORDERING = "nulls_are_last"
@@ -53,6 +63,7 @@ class ClickHouse(Dialect):
KEYWORDS = {
**tokens.Tokenizer.KEYWORDS,
"ATTACH": TokenType.COMMAND,
+ "DATE32": TokenType.DATE32,
"DATETIME64": TokenType.DATETIME64,
"DICTIONARY": TokenType.DICTIONARY,
"ENUM": TokenType.ENUM,
@@ -75,6 +86,8 @@ class ClickHouse(Dialect):
"UINT32": TokenType.UINT,
"UINT64": TokenType.UBIGINT,
"UINT8": TokenType.UTINYINT,
+ "IPV4": TokenType.IPV4,
+ "IPV6": TokenType.IPV6,
}
SINGLE_TOKENS = {
@@ -91,6 +104,8 @@ class ClickHouse(Dialect):
FUNCTIONS = {
**parser.Parser.FUNCTIONS,
"ANY": exp.AnyValue.from_arg_list,
+ "ARRAYSUM": exp.ArraySum.from_arg_list,
+ "COUNTIF": _parse_count_if,
"DATE_ADD": lambda args: exp.DateAdd(
this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)
),
@@ -110,6 +125,138 @@ class ClickHouse(Dialect):
"XOR": lambda args: exp.Xor(expressions=args),
}
+ AGG_FUNCTIONS = {
+ "count",
+ "min",
+ "max",
+ "sum",
+ "avg",
+ "any",
+ "stddevPop",
+ "stddevSamp",
+ "varPop",
+ "varSamp",
+ "corr",
+ "covarPop",
+ "covarSamp",
+ "entropy",
+ "exponentialMovingAverage",
+ "intervalLengthSum",
+ "kolmogorovSmirnovTest",
+ "mannWhitneyUTest",
+ "median",
+ "rankCorr",
+ "sumKahan",
+ "studentTTest",
+ "welchTTest",
+ "anyHeavy",
+ "anyLast",
+ "boundingRatio",
+ "first_value",
+ "last_value",
+ "argMin",
+ "argMax",
+ "avgWeighted",
+ "topK",
+ "topKWeighted",
+ "deltaSum",
+ "deltaSumTimestamp",
+ "groupArray",
+ "groupArrayLast",
+ "groupUniqArray",
+ "groupArrayInsertAt",
+ "groupArrayMovingAvg",
+ "groupArrayMovingSum",
+ "groupArraySample",
+ "groupBitAnd",
+ "groupBitOr",
+ "groupBitXor",
+ "groupBitmap",
+ "groupBitmapAnd",
+ "groupBitmapOr",
+ "groupBitmapXor",
+ "sumWithOverflow",
+ "sumMap",
+ "minMap",
+ "maxMap",
+ "skewSamp",
+ "skewPop",
+ "kurtSamp",
+ "kurtPop",
+ "uniq",
+ "uniqExact",
+ "uniqCombined",
+ "uniqCombined64",
+ "uniqHLL12",
+ "uniqTheta",
+ "quantile",
+ "quantiles",
+ "quantileExact",
+ "quantilesExact",
+ "quantileExactLow",
+ "quantilesExactLow",
+ "quantileExactHigh",
+ "quantilesExactHigh",
+ "quantileExactWeighted",
+ "quantilesExactWeighted",
+ "quantileTiming",
+ "quantilesTiming",
+ "quantileTimingWeighted",
+ "quantilesTimingWeighted",
+ "quantileDeterministic",
+ "quantilesDeterministic",
+ "quantileTDigest",
+ "quantilesTDigest",
+ "quantileTDigestWeighted",
+ "quantilesTDigestWeighted",
+ "quantileBFloat16",
+ "quantilesBFloat16",
+ "quantileBFloat16Weighted",
+ "quantilesBFloat16Weighted",
+ "simpleLinearRegression",
+ "stochasticLinearRegression",
+ "stochasticLogisticRegression",
+ "categoricalInformationValue",
+ "contingency",
+ "cramersV",
+ "cramersVBiasCorrected",
+ "theilsU",
+ "maxIntersections",
+ "maxIntersectionsPosition",
+ "meanZTest",
+ "quantileInterpolatedWeighted",
+ "quantilesInterpolatedWeighted",
+ "quantileGK",
+ "quantilesGK",
+ "sparkBar",
+ "sumCount",
+ "largestTriangleThreeBuckets",
+ }
+
+ AGG_FUNCTIONS_SUFFIXES = [
+ "If",
+ "Array",
+ "ArrayIf",
+ "Map",
+ "SimpleState",
+ "State",
+ "Merge",
+ "MergeState",
+ "ForEach",
+ "Distinct",
+ "OrDefault",
+ "OrNull",
+ "Resample",
+ "ArgMin",
+ "ArgMax",
+ ]
+
+ AGG_FUNC_MAPPING = (
+ lambda functions, suffixes: {
+ f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions
+ }
+ )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES)
+
FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"}
FUNCTION_PARSERS = {
@@ -272,9 +419,18 @@ class ClickHouse(Dialect):
)
if isinstance(func, exp.Anonymous):
+ parts = self.AGG_FUNC_MAPPING.get(func.this)
params = self._parse_func_params(func)
if params:
+ if parts and parts[1]:
+ return self.expression(
+ exp.CombinedParameterizedAgg,
+ this=func.this,
+ expressions=func.expressions,
+ params=params,
+ parts=parts,
+ )
return self.expression(
exp.ParameterizedAgg,
this=func.this,
@@ -282,6 +438,20 @@ class ClickHouse(Dialect):
params=params,
)
+ if parts:
+ if parts[1]:
+ return self.expression(
+ exp.CombinedAggFunc,
+ this=func.this,
+ expressions=func.expressions,
+ parts=parts,
+ )
+ return self.expression(
+ exp.AnonymousAggFunc,
+ this=func.this,
+ expressions=func.expressions,
+ )
+
return func
def _parse_func_params(
@@ -329,6 +499,9 @@ class ClickHouse(Dialect):
STRUCT_DELIMITER = ("(", ")")
NVL2_SUPPORTED = False
TABLESAMPLE_REQUIRES_PARENS = False
+ TABLESAMPLE_SIZE_IS_ROWS = False
+ TABLESAMPLE_KEYWORDS = "SAMPLE"
+ LAST_DAY_SUPPORTS_DATE_PART = False
STRING_TYPE_MAPPING = {
exp.DataType.Type.CHAR: "String",
@@ -348,6 +521,7 @@ class ClickHouse(Dialect):
**STRING_TYPE_MAPPING,
exp.DataType.Type.ARRAY: "Array",
exp.DataType.Type.BIGINT: "Int64",
+ exp.DataType.Type.DATE32: "Date32",
exp.DataType.Type.DATETIME64: "DateTime64",
exp.DataType.Type.DOUBLE: "Float64",
exp.DataType.Type.ENUM: "Enum",
@@ -372,24 +546,23 @@ class ClickHouse(Dialect):
exp.DataType.Type.UINT256: "UInt256",
exp.DataType.Type.USMALLINT: "UInt16",
exp.DataType.Type.UTINYINT: "UInt8",
+ exp.DataType.Type.IPV4: "IPv4",
+ exp.DataType.Type.IPV6: "IPv6",
}
TRANSFORMS = {
**generator.Generator.TRANSFORMS,
- exp.Select: transforms.preprocess([transforms.eliminate_qualify]),
exp.AnyValue: rename_func("any"),
exp.ApproxDistinct: rename_func("uniq"),
+ exp.ArraySum: rename_func("arraySum"),
exp.ArgMax: arg_max_or_min_no_count("argMax"),
exp.ArgMin: arg_max_or_min_no_count("argMin"),
exp.Array: inline_array_sql,
exp.CastToStrType: rename_func("CAST"),
+ exp.CountIf: rename_func("countIf"),
exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"),
- exp.DateAdd: lambda self, e: self.func(
- "DATE_ADD", exp.Literal.string(e.text("unit") or "day"), e.expression, e.this
- ),
- exp.DateDiff: lambda self, e: self.func(
- "DATE_DIFF", exp.Literal.string(e.text("unit") or "day"), e.expression, e.this
- ),
+ exp.DateAdd: date_delta_sql("DATE_ADD"),
+ exp.DateDiff: date_delta_sql("DATE_DIFF"),
exp.Explode: rename_func("arrayJoin"),
exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL",
exp.IsNan: rename_func("isNaN"),
@@ -400,6 +573,7 @@ class ClickHouse(Dialect):
exp.Quantile: _quantile_sql,
exp.RegexpLike: lambda self, e: f"match({self.format_args(e.this, e.expression)})",
exp.Rand: rename_func("randCanonical"),
+ exp.Select: transforms.preprocess([transforms.eliminate_qualify]),
exp.StartsWith: rename_func("startsWith"),
exp.StrPosition: lambda self, e: f"position({self.format_args(e.this, e.args.get('substr'), e.args.get('position'))})",
exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)),
@@ -485,10 +659,19 @@ class ClickHouse(Dialect):
else "",
]
- def parameterizedagg_sql(self, expression: exp.Anonymous) -> str:
+ def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str:
params = self.expressions(expression, key="params", flat=True)
return self.func(expression.name, *expression.expressions) + f"({params})"
+ def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str:
+ return self.func(expression.name, *expression.expressions)
+
+ def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str:
+ return self.anonymousaggfunc_sql(expression)
+
+ def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str:
+ return self.parameterizedagg_sql(expression)
+
def placeholder_sql(self, expression: exp.Placeholder) -> str:
return f"{{{expression.name}: {self.sql(expression, 'kind')}}}"