diff options
Diffstat (limited to 'sqlglot/dialects/clickhouse.py')
-rw-r--r-- | sqlglot/dialects/clickhouse.py | 201 |
1 files changed, 192 insertions, 9 deletions
diff --git a/sqlglot/dialects/clickhouse.py b/sqlglot/dialects/clickhouse.py index 870f402..f2e4fe1 100644 --- a/sqlglot/dialects/clickhouse.py +++ b/sqlglot/dialects/clickhouse.py @@ -6,6 +6,7 @@ from sqlglot import exp, generator, parser, tokens, transforms from sqlglot.dialects.dialect import ( Dialect, arg_max_or_min_no_count, + date_delta_sql, inline_array_sql, no_pivot_sql, rename_func, @@ -22,16 +23,25 @@ def _lower_func(sql: str) -> str: return sql[:index].lower() + sql[index:] -def _quantile_sql(self, e): +def _quantile_sql(self: ClickHouse.Generator, e: exp.Quantile) -> str: quantile = e.args["quantile"] args = f"({self.sql(e, 'this')})" + if isinstance(quantile, exp.Array): func = self.func("quantiles", *quantile) else: func = self.func("quantile", quantile) + return func + args +def _parse_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: + if len(args) == 1: + return exp.CountIf(this=seq_get(args, 0)) + + return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) + + class ClickHouse(Dialect): NORMALIZE_FUNCTIONS: bool | str = False NULL_ORDERING = "nulls_are_last" @@ -53,6 +63,7 @@ class ClickHouse(Dialect): KEYWORDS = { **tokens.Tokenizer.KEYWORDS, "ATTACH": TokenType.COMMAND, + "DATE32": TokenType.DATE32, "DATETIME64": TokenType.DATETIME64, "DICTIONARY": TokenType.DICTIONARY, "ENUM": TokenType.ENUM, @@ -75,6 +86,8 @@ class ClickHouse(Dialect): "UINT32": TokenType.UINT, "UINT64": TokenType.UBIGINT, "UINT8": TokenType.UTINYINT, + "IPV4": TokenType.IPV4, + "IPV6": TokenType.IPV6, } SINGLE_TOKENS = { @@ -91,6 +104,8 @@ class ClickHouse(Dialect): FUNCTIONS = { **parser.Parser.FUNCTIONS, "ANY": exp.AnyValue.from_arg_list, + "ARRAYSUM": exp.ArraySum.from_arg_list, + "COUNTIF": _parse_count_if, "DATE_ADD": lambda args: exp.DateAdd( this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) ), @@ -110,6 +125,138 @@ class ClickHouse(Dialect): "XOR": lambda args: exp.Xor(expressions=args), } + AGG_FUNCTIONS = { + "count", + "min", + "max", + "sum", + "avg", + "any", + "stddevPop", + "stddevSamp", + "varPop", + "varSamp", + "corr", + "covarPop", + "covarSamp", + "entropy", + "exponentialMovingAverage", + "intervalLengthSum", + "kolmogorovSmirnovTest", + "mannWhitneyUTest", + "median", + "rankCorr", + "sumKahan", + "studentTTest", + "welchTTest", + "anyHeavy", + "anyLast", + "boundingRatio", + "first_value", + "last_value", + "argMin", + "argMax", + "avgWeighted", + "topK", + "topKWeighted", + "deltaSum", + "deltaSumTimestamp", + "groupArray", + "groupArrayLast", + "groupUniqArray", + "groupArrayInsertAt", + "groupArrayMovingAvg", + "groupArrayMovingSum", + "groupArraySample", + "groupBitAnd", + "groupBitOr", + "groupBitXor", + "groupBitmap", + "groupBitmapAnd", + "groupBitmapOr", + "groupBitmapXor", + "sumWithOverflow", + "sumMap", + "minMap", + "maxMap", + "skewSamp", + "skewPop", + "kurtSamp", + "kurtPop", + "uniq", + "uniqExact", + "uniqCombined", + "uniqCombined64", + "uniqHLL12", + "uniqTheta", + "quantile", + "quantiles", + "quantileExact", + "quantilesExact", + "quantileExactLow", + "quantilesExactLow", + "quantileExactHigh", + "quantilesExactHigh", + "quantileExactWeighted", + "quantilesExactWeighted", + "quantileTiming", + "quantilesTiming", + "quantileTimingWeighted", + "quantilesTimingWeighted", + "quantileDeterministic", + "quantilesDeterministic", + "quantileTDigest", + "quantilesTDigest", + "quantileTDigestWeighted", + "quantilesTDigestWeighted", + "quantileBFloat16", + "quantilesBFloat16", + "quantileBFloat16Weighted", + "quantilesBFloat16Weighted", + "simpleLinearRegression", + "stochasticLinearRegression", + "stochasticLogisticRegression", + "categoricalInformationValue", + "contingency", + "cramersV", + "cramersVBiasCorrected", + "theilsU", + "maxIntersections", + "maxIntersectionsPosition", + "meanZTest", + "quantileInterpolatedWeighted", + "quantilesInterpolatedWeighted", + "quantileGK", + "quantilesGK", + "sparkBar", + "sumCount", + "largestTriangleThreeBuckets", + } + + AGG_FUNCTIONS_SUFFIXES = [ + "If", + "Array", + "ArrayIf", + "Map", + "SimpleState", + "State", + "Merge", + "MergeState", + "ForEach", + "Distinct", + "OrDefault", + "OrNull", + "Resample", + "ArgMin", + "ArgMax", + ] + + AGG_FUNC_MAPPING = ( + lambda functions, suffixes: { + f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions + } + )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) + FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} FUNCTION_PARSERS = { @@ -272,9 +419,18 @@ class ClickHouse(Dialect): ) if isinstance(func, exp.Anonymous): + parts = self.AGG_FUNC_MAPPING.get(func.this) params = self._parse_func_params(func) if params: + if parts and parts[1]: + return self.expression( + exp.CombinedParameterizedAgg, + this=func.this, + expressions=func.expressions, + params=params, + parts=parts, + ) return self.expression( exp.ParameterizedAgg, this=func.this, @@ -282,6 +438,20 @@ class ClickHouse(Dialect): params=params, ) + if parts: + if parts[1]: + return self.expression( + exp.CombinedAggFunc, + this=func.this, + expressions=func.expressions, + parts=parts, + ) + return self.expression( + exp.AnonymousAggFunc, + this=func.this, + expressions=func.expressions, + ) + return func def _parse_func_params( @@ -329,6 +499,9 @@ class ClickHouse(Dialect): STRUCT_DELIMITER = ("(", ")") NVL2_SUPPORTED = False TABLESAMPLE_REQUIRES_PARENS = False + TABLESAMPLE_SIZE_IS_ROWS = False + TABLESAMPLE_KEYWORDS = "SAMPLE" + LAST_DAY_SUPPORTS_DATE_PART = False STRING_TYPE_MAPPING = { exp.DataType.Type.CHAR: "String", @@ -348,6 +521,7 @@ class ClickHouse(Dialect): **STRING_TYPE_MAPPING, exp.DataType.Type.ARRAY: "Array", exp.DataType.Type.BIGINT: "Int64", + exp.DataType.Type.DATE32: "Date32", exp.DataType.Type.DATETIME64: "DateTime64", exp.DataType.Type.DOUBLE: "Float64", exp.DataType.Type.ENUM: "Enum", @@ -372,24 +546,23 @@ class ClickHouse(Dialect): exp.DataType.Type.UINT256: "UInt256", exp.DataType.Type.USMALLINT: "UInt16", exp.DataType.Type.UTINYINT: "UInt8", + exp.DataType.Type.IPV4: "IPv4", + exp.DataType.Type.IPV6: "IPv6", } TRANSFORMS = { **generator.Generator.TRANSFORMS, - exp.Select: transforms.preprocess([transforms.eliminate_qualify]), exp.AnyValue: rename_func("any"), exp.ApproxDistinct: rename_func("uniq"), + exp.ArraySum: rename_func("arraySum"), exp.ArgMax: arg_max_or_min_no_count("argMax"), exp.ArgMin: arg_max_or_min_no_count("argMin"), exp.Array: inline_array_sql, exp.CastToStrType: rename_func("CAST"), + exp.CountIf: rename_func("countIf"), exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), - exp.DateAdd: lambda self, e: self.func( - "DATE_ADD", exp.Literal.string(e.text("unit") or "day"), e.expression, e.this - ), - exp.DateDiff: lambda self, e: self.func( - "DATE_DIFF", exp.Literal.string(e.text("unit") or "day"), e.expression, e.this - ), + exp.DateAdd: date_delta_sql("DATE_ADD"), + exp.DateDiff: date_delta_sql("DATE_DIFF"), exp.Explode: rename_func("arrayJoin"), exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", exp.IsNan: rename_func("isNaN"), @@ -400,6 +573,7 @@ class ClickHouse(Dialect): exp.Quantile: _quantile_sql, exp.RegexpLike: lambda self, e: f"match({self.format_args(e.this, e.expression)})", exp.Rand: rename_func("randCanonical"), + exp.Select: transforms.preprocess([transforms.eliminate_qualify]), exp.StartsWith: rename_func("startsWith"), exp.StrPosition: lambda self, e: f"position({self.format_args(e.this, e.args.get('substr'), e.args.get('position'))})", exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), @@ -485,10 +659,19 @@ class ClickHouse(Dialect): else "", ] - def parameterizedagg_sql(self, expression: exp.Anonymous) -> str: + def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: params = self.expressions(expression, key="params", flat=True) return self.func(expression.name, *expression.expressions) + f"({params})" + def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: + return self.func(expression.name, *expression.expressions) + + def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: + return self.anonymousaggfunc_sql(expression) + + def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: + return self.parameterizedagg_sql(expression) + def placeholder_sql(self, expression: exp.Placeholder) -> str: return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" |