sqlglot.transforms
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import expressions as exp 6from sqlglot.helper import find_new_name 7 8if t.TYPE_CHECKING: 9 from sqlglot.generator import Generator 10 11 12def unalias_group(expression: exp.Expression) -> exp.Expression: 13 """ 14 Replace references to select aliases in GROUP BY clauses. 15 16 Example: 17 >>> import sqlglot 18 >>> sqlglot.parse_one("SELECT a AS b FROM x GROUP BY b").transform(unalias_group).sql() 19 'SELECT a AS b FROM x GROUP BY 1' 20 21 Args: 22 expression: the expression that will be transformed. 23 24 Returns: 25 The transformed expression. 26 """ 27 if isinstance(expression, exp.Group) and isinstance(expression.parent, exp.Select): 28 aliased_selects = { 29 e.alias: i 30 for i, e in enumerate(expression.parent.expressions, start=1) 31 if isinstance(e, exp.Alias) 32 } 33 34 for group_by in expression.expressions: 35 if ( 36 isinstance(group_by, exp.Column) 37 and not group_by.table 38 and group_by.name in aliased_selects 39 ): 40 group_by.replace(exp.Literal.number(aliased_selects.get(group_by.name))) 41 42 return expression 43 44 45def eliminate_distinct_on(expression: exp.Expression) -> exp.Expression: 46 """ 47 Convert SELECT DISTINCT ON statements to a subquery with a window function. 48 49 This is useful for dialects that don't support SELECT DISTINCT ON but support window functions. 50 51 Args: 52 expression: the expression that will be transformed. 53 54 Returns: 55 The transformed expression. 56 """ 57 if ( 58 isinstance(expression, exp.Select) 59 and expression.args.get("distinct") 60 and expression.args["distinct"].args.get("on") 61 and isinstance(expression.args["distinct"].args["on"], exp.Tuple) 62 ): 63 distinct_cols = expression.args["distinct"].pop().args["on"].expressions 64 outer_selects = expression.selects 65 row_number = find_new_name(expression.named_selects, "_row_number") 66 window = exp.Window( 67 this=exp.RowNumber(), 68 partition_by=distinct_cols, 69 ) 70 order = expression.args.get("order") 71 if order: 72 window.set("order", order.pop().copy()) 73 window = exp.alias_(window, row_number) 74 expression.select(window, copy=False) 75 return exp.select(*outer_selects).from_(expression.subquery()).where(f'"{row_number}" = 1') 76 return expression 77 78 79def eliminate_qualify(expression: exp.Expression) -> exp.Expression: 80 """ 81 Convert SELECT statements that contain the QUALIFY clause into subqueries, filtered equivalently. 82 83 The idea behind this transformation can be seen in Snowflake's documentation for QUALIFY: 84 https://docs.snowflake.com/en/sql-reference/constructs/qualify 85 86 Some dialects don't support window functions in the WHERE clause, so we need to include them as 87 projections in the subquery, in order to refer to them in the outer filter using aliases. Also, 88 if a column is referenced in the QUALIFY clause but is not selected, we need to include it too, 89 otherwise we won't be able to refer to it in the outer query's WHERE clause. 90 """ 91 if isinstance(expression, exp.Select) and expression.args.get("qualify"): 92 taken = set(expression.named_selects) 93 for select in expression.selects: 94 if not select.alias_or_name: 95 alias = find_new_name(taken, "_c") 96 select.replace(exp.alias_(select.copy(), alias)) 97 taken.add(alias) 98 99 outer_selects = exp.select(*[select.alias_or_name for select in expression.selects]) 100 qualify_filters = expression.args["qualify"].pop().this 101 102 for expr in qualify_filters.find_all((exp.Window, exp.Column)): 103 if isinstance(expr, exp.Window): 104 alias = find_new_name(expression.named_selects, "_w") 105 expression.select(exp.alias_(expr.copy(), alias), copy=False) 106 expr.replace(exp.column(alias)) 107 elif expr.name not in expression.named_selects: 108 expression.select(expr.copy(), copy=False) 109 110 return outer_selects.from_(expression.subquery(alias="_t")).where(qualify_filters) 111 112 return expression 113 114 115def remove_precision_parameterized_types(expression: exp.Expression) -> exp.Expression: 116 """ 117 Some dialects only allow the precision for parameterized types to be defined in the DDL and not in other expressions. 118 This transforms removes the precision from parameterized types in expressions. 119 """ 120 return expression.transform( 121 lambda node: exp.DataType( 122 **{ 123 **node.args, 124 "expressions": [ 125 node_expression 126 for node_expression in node.expressions 127 if isinstance(node_expression, exp.DataType) 128 ], 129 } 130 ) 131 if isinstance(node, exp.DataType) 132 else node, 133 ) 134 135 136def preprocess( 137 transforms: t.List[t.Callable[[exp.Expression], exp.Expression]], 138 to_sql: t.Callable[[Generator, exp.Expression], str], 139) -> t.Callable[[Generator, exp.Expression], str]: 140 """ 141 Creates a new transform by chaining a sequence of transformations and converts the resulting 142 expression to SQL, using an appropriate `Generator.TRANSFORMS` function. 143 144 Args: 145 transforms: sequence of transform functions. These will be called in order. 146 to_sql: final transform that converts the resulting expression to a SQL string. 147 148 Returns: 149 Function that can be used as a generator transform. 150 """ 151 152 def _to_sql(self, expression): 153 expression = transforms[0](expression.copy()) 154 for t in transforms[1:]: 155 expression = t(expression) 156 return to_sql(self, expression) 157 158 return _to_sql 159 160 161def delegate(attr: str) -> t.Callable: 162 """ 163 Create a new method that delegates to `attr`. This is useful for creating `Generator.TRANSFORMS` 164 functions that delegate to existing generator methods. 165 """ 166 167 def _transform(self, *args, **kwargs): 168 return getattr(self, attr)(*args, **kwargs) 169 170 return _transform 171 172 173UNALIAS_GROUP = {exp.Group: preprocess([unalias_group], delegate("group_sql"))} 174ELIMINATE_DISTINCT_ON = {exp.Select: preprocess([eliminate_distinct_on], delegate("select_sql"))} 175ELIMINATE_QUALIFY = {exp.Select: preprocess([eliminate_qualify], delegate("select_sql"))} 176REMOVE_PRECISION_PARAMETERIZED_TYPES = { 177 exp.Cast: preprocess([remove_precision_parameterized_types], delegate("cast_sql")) 178}
13def unalias_group(expression: exp.Expression) -> exp.Expression: 14 """ 15 Replace references to select aliases in GROUP BY clauses. 16 17 Example: 18 >>> import sqlglot 19 >>> sqlglot.parse_one("SELECT a AS b FROM x GROUP BY b").transform(unalias_group).sql() 20 'SELECT a AS b FROM x GROUP BY 1' 21 22 Args: 23 expression: the expression that will be transformed. 24 25 Returns: 26 The transformed expression. 27 """ 28 if isinstance(expression, exp.Group) and isinstance(expression.parent, exp.Select): 29 aliased_selects = { 30 e.alias: i 31 for i, e in enumerate(expression.parent.expressions, start=1) 32 if isinstance(e, exp.Alias) 33 } 34 35 for group_by in expression.expressions: 36 if ( 37 isinstance(group_by, exp.Column) 38 and not group_by.table 39 and group_by.name in aliased_selects 40 ): 41 group_by.replace(exp.Literal.number(aliased_selects.get(group_by.name))) 42 43 return expression
Replace references to select aliases in GROUP BY clauses.
Example:
>>> import sqlglot >>> sqlglot.parse_one("SELECT a AS b FROM x GROUP BY b").transform(unalias_group).sql() 'SELECT a AS b FROM x GROUP BY 1'
Arguments:
- expression: the expression that will be transformed.
Returns:
The transformed expression.
46def eliminate_distinct_on(expression: exp.Expression) -> exp.Expression: 47 """ 48 Convert SELECT DISTINCT ON statements to a subquery with a window function. 49 50 This is useful for dialects that don't support SELECT DISTINCT ON but support window functions. 51 52 Args: 53 expression: the expression that will be transformed. 54 55 Returns: 56 The transformed expression. 57 """ 58 if ( 59 isinstance(expression, exp.Select) 60 and expression.args.get("distinct") 61 and expression.args["distinct"].args.get("on") 62 and isinstance(expression.args["distinct"].args["on"], exp.Tuple) 63 ): 64 distinct_cols = expression.args["distinct"].pop().args["on"].expressions 65 outer_selects = expression.selects 66 row_number = find_new_name(expression.named_selects, "_row_number") 67 window = exp.Window( 68 this=exp.RowNumber(), 69 partition_by=distinct_cols, 70 ) 71 order = expression.args.get("order") 72 if order: 73 window.set("order", order.pop().copy()) 74 window = exp.alias_(window, row_number) 75 expression.select(window, copy=False) 76 return exp.select(*outer_selects).from_(expression.subquery()).where(f'"{row_number}" = 1') 77 return expression
Convert SELECT DISTINCT ON statements to a subquery with a window function.
This is useful for dialects that don't support SELECT DISTINCT ON but support window functions.
Arguments:
- expression: the expression that will be transformed.
Returns:
The transformed expression.
80def eliminate_qualify(expression: exp.Expression) -> exp.Expression: 81 """ 82 Convert SELECT statements that contain the QUALIFY clause into subqueries, filtered equivalently. 83 84 The idea behind this transformation can be seen in Snowflake's documentation for QUALIFY: 85 https://docs.snowflake.com/en/sql-reference/constructs/qualify 86 87 Some dialects don't support window functions in the WHERE clause, so we need to include them as 88 projections in the subquery, in order to refer to them in the outer filter using aliases. Also, 89 if a column is referenced in the QUALIFY clause but is not selected, we need to include it too, 90 otherwise we won't be able to refer to it in the outer query's WHERE clause. 91 """ 92 if isinstance(expression, exp.Select) and expression.args.get("qualify"): 93 taken = set(expression.named_selects) 94 for select in expression.selects: 95 if not select.alias_or_name: 96 alias = find_new_name(taken, "_c") 97 select.replace(exp.alias_(select.copy(), alias)) 98 taken.add(alias) 99 100 outer_selects = exp.select(*[select.alias_or_name for select in expression.selects]) 101 qualify_filters = expression.args["qualify"].pop().this 102 103 for expr in qualify_filters.find_all((exp.Window, exp.Column)): 104 if isinstance(expr, exp.Window): 105 alias = find_new_name(expression.named_selects, "_w") 106 expression.select(exp.alias_(expr.copy(), alias), copy=False) 107 expr.replace(exp.column(alias)) 108 elif expr.name not in expression.named_selects: 109 expression.select(expr.copy(), copy=False) 110 111 return outer_selects.from_(expression.subquery(alias="_t")).where(qualify_filters) 112 113 return expression
Convert SELECT statements that contain the QUALIFY clause into subqueries, filtered equivalently.
The idea behind this transformation can be seen in Snowflake's documentation for QUALIFY: https://docs.snowflake.com/en/sql-reference/constructs/qualify
Some dialects don't support window functions in the WHERE clause, so we need to include them as projections in the subquery, in order to refer to them in the outer filter using aliases. Also, if a column is referenced in the QUALIFY clause but is not selected, we need to include it too, otherwise we won't be able to refer to it in the outer query's WHERE clause.
116def remove_precision_parameterized_types(expression: exp.Expression) -> exp.Expression: 117 """ 118 Some dialects only allow the precision for parameterized types to be defined in the DDL and not in other expressions. 119 This transforms removes the precision from parameterized types in expressions. 120 """ 121 return expression.transform( 122 lambda node: exp.DataType( 123 **{ 124 **node.args, 125 "expressions": [ 126 node_expression 127 for node_expression in node.expressions 128 if isinstance(node_expression, exp.DataType) 129 ], 130 } 131 ) 132 if isinstance(node, exp.DataType) 133 else node, 134 )
Some dialects only allow the precision for parameterized types to be defined in the DDL and not in other expressions. This transforms removes the precision from parameterized types in expressions.
137def preprocess( 138 transforms: t.List[t.Callable[[exp.Expression], exp.Expression]], 139 to_sql: t.Callable[[Generator, exp.Expression], str], 140) -> t.Callable[[Generator, exp.Expression], str]: 141 """ 142 Creates a new transform by chaining a sequence of transformations and converts the resulting 143 expression to SQL, using an appropriate `Generator.TRANSFORMS` function. 144 145 Args: 146 transforms: sequence of transform functions. These will be called in order. 147 to_sql: final transform that converts the resulting expression to a SQL string. 148 149 Returns: 150 Function that can be used as a generator transform. 151 """ 152 153 def _to_sql(self, expression): 154 expression = transforms[0](expression.copy()) 155 for t in transforms[1:]: 156 expression = t(expression) 157 return to_sql(self, expression) 158 159 return _to_sql
Creates a new transform by chaining a sequence of transformations and converts the resulting
expression to SQL, using an appropriate Generator.TRANSFORMS
function.
Arguments:
- transforms: sequence of transform functions. These will be called in order.
- to_sql: final transform that converts the resulting expression to a SQL string.
Returns:
Function that can be used as a generator transform.
162def delegate(attr: str) -> t.Callable: 163 """ 164 Create a new method that delegates to `attr`. This is useful for creating `Generator.TRANSFORMS` 165 functions that delegate to existing generator methods. 166 """ 167 168 def _transform(self, *args, **kwargs): 169 return getattr(self, attr)(*args, **kwargs) 170 171 return _transform
Create a new method that delegates to attr
. This is useful for creating Generator.TRANSFORMS
functions that delegate to existing generator methods.