Edit on GitHub

sqlglot.transforms

  1from __future__ import annotations
  2
  3import typing as t
  4
  5from sqlglot import expressions as exp
  6from sqlglot.helper import find_new_name
  7
  8if t.TYPE_CHECKING:
  9    from sqlglot.generator import Generator
 10
 11
 12def unalias_group(expression: exp.Expression) -> exp.Expression:
 13    """
 14    Replace references to select aliases in GROUP BY clauses.
 15
 16    Example:
 17        >>> import sqlglot
 18        >>> sqlglot.parse_one("SELECT a AS b FROM x GROUP BY b").transform(unalias_group).sql()
 19        'SELECT a AS b FROM x GROUP BY 1'
 20
 21    Args:
 22        expression: the expression that will be transformed.
 23
 24    Returns:
 25        The transformed expression.
 26    """
 27    if isinstance(expression, exp.Group) and isinstance(expression.parent, exp.Select):
 28        aliased_selects = {
 29            e.alias: i
 30            for i, e in enumerate(expression.parent.expressions, start=1)
 31            if isinstance(e, exp.Alias)
 32        }
 33
 34        for group_by in expression.expressions:
 35            if (
 36                isinstance(group_by, exp.Column)
 37                and not group_by.table
 38                and group_by.name in aliased_selects
 39            ):
 40                group_by.replace(exp.Literal.number(aliased_selects.get(group_by.name)))
 41
 42    return expression
 43
 44
 45def eliminate_distinct_on(expression: exp.Expression) -> exp.Expression:
 46    """
 47    Convert SELECT DISTINCT ON statements to a subquery with a window function.
 48
 49    This is useful for dialects that don't support SELECT DISTINCT ON but support window functions.
 50
 51    Args:
 52        expression: the expression that will be transformed.
 53
 54    Returns:
 55        The transformed expression.
 56    """
 57    if (
 58        isinstance(expression, exp.Select)
 59        and expression.args.get("distinct")
 60        and expression.args["distinct"].args.get("on")
 61        and isinstance(expression.args["distinct"].args["on"], exp.Tuple)
 62    ):
 63        distinct_cols = expression.args["distinct"].pop().args["on"].expressions
 64        outer_selects = expression.selects
 65        row_number = find_new_name(expression.named_selects, "_row_number")
 66        window = exp.Window(
 67            this=exp.RowNumber(),
 68            partition_by=distinct_cols,
 69        )
 70        order = expression.args.get("order")
 71        if order:
 72            window.set("order", order.pop().copy())
 73        window = exp.alias_(window, row_number)
 74        expression.select(window, copy=False)
 75        return exp.select(*outer_selects).from_(expression.subquery()).where(f'"{row_number}" = 1')
 76    return expression
 77
 78
 79def eliminate_qualify(expression: exp.Expression) -> exp.Expression:
 80    """
 81    Convert SELECT statements that contain the QUALIFY clause into subqueries, filtered equivalently.
 82
 83    The idea behind this transformation can be seen in Snowflake's documentation for QUALIFY:
 84    https://docs.snowflake.com/en/sql-reference/constructs/qualify
 85
 86    Some dialects don't support window functions in the WHERE clause, so we need to include them as
 87    projections in the subquery, in order to refer to them in the outer filter using aliases. Also,
 88    if a column is referenced in the QUALIFY clause but is not selected, we need to include it too,
 89    otherwise we won't be able to refer to it in the outer query's WHERE clause.
 90    """
 91    if isinstance(expression, exp.Select) and expression.args.get("qualify"):
 92        taken = set(expression.named_selects)
 93        for select in expression.selects:
 94            if not select.alias_or_name:
 95                alias = find_new_name(taken, "_c")
 96                select.replace(exp.alias_(select.copy(), alias))
 97                taken.add(alias)
 98
 99        outer_selects = exp.select(*[select.alias_or_name for select in expression.selects])
100        qualify_filters = expression.args["qualify"].pop().this
101
102        for expr in qualify_filters.find_all((exp.Window, exp.Column)):
103            if isinstance(expr, exp.Window):
104                alias = find_new_name(expression.named_selects, "_w")
105                expression.select(exp.alias_(expr.copy(), alias), copy=False)
106                expr.replace(exp.column(alias))
107            elif expr.name not in expression.named_selects:
108                expression.select(expr.copy(), copy=False)
109
110        return outer_selects.from_(expression.subquery(alias="_t")).where(qualify_filters)
111
112    return expression
113
114
115def remove_precision_parameterized_types(expression: exp.Expression) -> exp.Expression:
116    """
117    Some dialects only allow the precision for parameterized types to be defined in the DDL and not in other expressions.
118    This transforms removes the precision from parameterized types in expressions.
119    """
120    return expression.transform(
121        lambda node: exp.DataType(
122            **{
123                **node.args,
124                "expressions": [
125                    node_expression
126                    for node_expression in node.expressions
127                    if isinstance(node_expression, exp.DataType)
128                ],
129            }
130        )
131        if isinstance(node, exp.DataType)
132        else node,
133    )
134
135
136def preprocess(
137    transforms: t.List[t.Callable[[exp.Expression], exp.Expression]],
138    to_sql: t.Callable[[Generator, exp.Expression], str],
139) -> t.Callable[[Generator, exp.Expression], str]:
140    """
141    Creates a new transform by chaining a sequence of transformations and converts the resulting
142    expression to SQL, using an appropriate `Generator.TRANSFORMS` function.
143
144    Args:
145        transforms: sequence of transform functions. These will be called in order.
146        to_sql: final transform that converts the resulting expression to a SQL string.
147
148    Returns:
149        Function that can be used as a generator transform.
150    """
151
152    def _to_sql(self, expression):
153        expression = transforms[0](expression.copy())
154        for t in transforms[1:]:
155            expression = t(expression)
156        return to_sql(self, expression)
157
158    return _to_sql
159
160
161def delegate(attr: str) -> t.Callable:
162    """
163    Create a new method that delegates to `attr`. This is useful for creating `Generator.TRANSFORMS`
164    functions that delegate to existing generator methods.
165    """
166
167    def _transform(self, *args, **kwargs):
168        return getattr(self, attr)(*args, **kwargs)
169
170    return _transform
171
172
173UNALIAS_GROUP = {exp.Group: preprocess([unalias_group], delegate("group_sql"))}
174ELIMINATE_DISTINCT_ON = {exp.Select: preprocess([eliminate_distinct_on], delegate("select_sql"))}
175ELIMINATE_QUALIFY = {exp.Select: preprocess([eliminate_qualify], delegate("select_sql"))}
176REMOVE_PRECISION_PARAMETERIZED_TYPES = {
177    exp.Cast: preprocess([remove_precision_parameterized_types], delegate("cast_sql"))
178}
def unalias_group( expression: sqlglot.expressions.Expression) -> sqlglot.expressions.Expression:
13def unalias_group(expression: exp.Expression) -> exp.Expression:
14    """
15    Replace references to select aliases in GROUP BY clauses.
16
17    Example:
18        >>> import sqlglot
19        >>> sqlglot.parse_one("SELECT a AS b FROM x GROUP BY b").transform(unalias_group).sql()
20        'SELECT a AS b FROM x GROUP BY 1'
21
22    Args:
23        expression: the expression that will be transformed.
24
25    Returns:
26        The transformed expression.
27    """
28    if isinstance(expression, exp.Group) and isinstance(expression.parent, exp.Select):
29        aliased_selects = {
30            e.alias: i
31            for i, e in enumerate(expression.parent.expressions, start=1)
32            if isinstance(e, exp.Alias)
33        }
34
35        for group_by in expression.expressions:
36            if (
37                isinstance(group_by, exp.Column)
38                and not group_by.table
39                and group_by.name in aliased_selects
40            ):
41                group_by.replace(exp.Literal.number(aliased_selects.get(group_by.name)))
42
43    return expression

Replace references to select aliases in GROUP BY clauses.

Example:
>>> import sqlglot
>>> sqlglot.parse_one("SELECT a AS b FROM x GROUP BY b").transform(unalias_group).sql()
'SELECT a AS b FROM x GROUP BY 1'
Arguments:
  • expression: the expression that will be transformed.
Returns:

The transformed expression.

def eliminate_distinct_on( expression: sqlglot.expressions.Expression) -> sqlglot.expressions.Expression:
46def eliminate_distinct_on(expression: exp.Expression) -> exp.Expression:
47    """
48    Convert SELECT DISTINCT ON statements to a subquery with a window function.
49
50    This is useful for dialects that don't support SELECT DISTINCT ON but support window functions.
51
52    Args:
53        expression: the expression that will be transformed.
54
55    Returns:
56        The transformed expression.
57    """
58    if (
59        isinstance(expression, exp.Select)
60        and expression.args.get("distinct")
61        and expression.args["distinct"].args.get("on")
62        and isinstance(expression.args["distinct"].args["on"], exp.Tuple)
63    ):
64        distinct_cols = expression.args["distinct"].pop().args["on"].expressions
65        outer_selects = expression.selects
66        row_number = find_new_name(expression.named_selects, "_row_number")
67        window = exp.Window(
68            this=exp.RowNumber(),
69            partition_by=distinct_cols,
70        )
71        order = expression.args.get("order")
72        if order:
73            window.set("order", order.pop().copy())
74        window = exp.alias_(window, row_number)
75        expression.select(window, copy=False)
76        return exp.select(*outer_selects).from_(expression.subquery()).where(f'"{row_number}" = 1')
77    return expression

Convert SELECT DISTINCT ON statements to a subquery with a window function.

This is useful for dialects that don't support SELECT DISTINCT ON but support window functions.

Arguments:
  • expression: the expression that will be transformed.
Returns:

The transformed expression.

def eliminate_qualify( expression: sqlglot.expressions.Expression) -> sqlglot.expressions.Expression:
 80def eliminate_qualify(expression: exp.Expression) -> exp.Expression:
 81    """
 82    Convert SELECT statements that contain the QUALIFY clause into subqueries, filtered equivalently.
 83
 84    The idea behind this transformation can be seen in Snowflake's documentation for QUALIFY:
 85    https://docs.snowflake.com/en/sql-reference/constructs/qualify
 86
 87    Some dialects don't support window functions in the WHERE clause, so we need to include them as
 88    projections in the subquery, in order to refer to them in the outer filter using aliases. Also,
 89    if a column is referenced in the QUALIFY clause but is not selected, we need to include it too,
 90    otherwise we won't be able to refer to it in the outer query's WHERE clause.
 91    """
 92    if isinstance(expression, exp.Select) and expression.args.get("qualify"):
 93        taken = set(expression.named_selects)
 94        for select in expression.selects:
 95            if not select.alias_or_name:
 96                alias = find_new_name(taken, "_c")
 97                select.replace(exp.alias_(select.copy(), alias))
 98                taken.add(alias)
 99
100        outer_selects = exp.select(*[select.alias_or_name for select in expression.selects])
101        qualify_filters = expression.args["qualify"].pop().this
102
103        for expr in qualify_filters.find_all((exp.Window, exp.Column)):
104            if isinstance(expr, exp.Window):
105                alias = find_new_name(expression.named_selects, "_w")
106                expression.select(exp.alias_(expr.copy(), alias), copy=False)
107                expr.replace(exp.column(alias))
108            elif expr.name not in expression.named_selects:
109                expression.select(expr.copy(), copy=False)
110
111        return outer_selects.from_(expression.subquery(alias="_t")).where(qualify_filters)
112
113    return expression

Convert SELECT statements that contain the QUALIFY clause into subqueries, filtered equivalently.

The idea behind this transformation can be seen in Snowflake's documentation for QUALIFY: https://docs.snowflake.com/en/sql-reference/constructs/qualify

Some dialects don't support window functions in the WHERE clause, so we need to include them as projections in the subquery, in order to refer to them in the outer filter using aliases. Also, if a column is referenced in the QUALIFY clause but is not selected, we need to include it too, otherwise we won't be able to refer to it in the outer query's WHERE clause.

def remove_precision_parameterized_types( expression: sqlglot.expressions.Expression) -> sqlglot.expressions.Expression:
116def remove_precision_parameterized_types(expression: exp.Expression) -> exp.Expression:
117    """
118    Some dialects only allow the precision for parameterized types to be defined in the DDL and not in other expressions.
119    This transforms removes the precision from parameterized types in expressions.
120    """
121    return expression.transform(
122        lambda node: exp.DataType(
123            **{
124                **node.args,
125                "expressions": [
126                    node_expression
127                    for node_expression in node.expressions
128                    if isinstance(node_expression, exp.DataType)
129                ],
130            }
131        )
132        if isinstance(node, exp.DataType)
133        else node,
134    )

Some dialects only allow the precision for parameterized types to be defined in the DDL and not in other expressions. This transforms removes the precision from parameterized types in expressions.

def preprocess( transforms: List[Callable[[sqlglot.expressions.Expression], sqlglot.expressions.Expression]], to_sql: Callable[[sqlglot.generator.Generator, sqlglot.expressions.Expression], str]) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.Expression], str]:
137def preprocess(
138    transforms: t.List[t.Callable[[exp.Expression], exp.Expression]],
139    to_sql: t.Callable[[Generator, exp.Expression], str],
140) -> t.Callable[[Generator, exp.Expression], str]:
141    """
142    Creates a new transform by chaining a sequence of transformations and converts the resulting
143    expression to SQL, using an appropriate `Generator.TRANSFORMS` function.
144
145    Args:
146        transforms: sequence of transform functions. These will be called in order.
147        to_sql: final transform that converts the resulting expression to a SQL string.
148
149    Returns:
150        Function that can be used as a generator transform.
151    """
152
153    def _to_sql(self, expression):
154        expression = transforms[0](expression.copy())
155        for t in transforms[1:]:
156            expression = t(expression)
157        return to_sql(self, expression)
158
159    return _to_sql

Creates a new transform by chaining a sequence of transformations and converts the resulting expression to SQL, using an appropriate Generator.TRANSFORMS function.

Arguments:
  • transforms: sequence of transform functions. These will be called in order.
  • to_sql: final transform that converts the resulting expression to a SQL string.
Returns:

Function that can be used as a generator transform.

def delegate(attr: str) -> Callable:
162def delegate(attr: str) -> t.Callable:
163    """
164    Create a new method that delegates to `attr`. This is useful for creating `Generator.TRANSFORMS`
165    functions that delegate to existing generator methods.
166    """
167
168    def _transform(self, *args, **kwargs):
169        return getattr(self, attr)(*args, **kwargs)
170
171    return _transform

Create a new method that delegates to attr. This is useful for creating Generator.TRANSFORMS functions that delegate to existing generator methods.