sqlglot.optimizer.canonicalize
1from __future__ import annotations 2 3import itertools 4 5from sqlglot import exp 6from sqlglot.helper import should_identify 7 8 9def canonicalize(expression: exp.Expression, identify: str = "safe") -> exp.Expression: 10 """Converts a sql expression into a standard form. 11 12 This method relies on annotate_types because many of the 13 conversions rely on type inference. 14 15 Args: 16 expression: The expression to canonicalize. 17 identify: Whether or not to force identify identifier. 18 """ 19 exp.replace_children(expression, canonicalize, identify=identify) 20 21 expression = add_text_to_concat(expression) 22 expression = coerce_type(expression) 23 expression = remove_redundant_casts(expression) 24 expression = ensure_bool_predicates(expression) 25 26 if isinstance(expression, exp.Identifier): 27 if should_identify(expression.this, identify): 28 expression.set("quoted", True) 29 30 return expression 31 32 33def add_text_to_concat(node: exp.Expression) -> exp.Expression: 34 if isinstance(node, exp.Add) and node.type and node.type.this in exp.DataType.TEXT_TYPES: 35 node = exp.Concat(this=node.this, expression=node.expression) 36 return node 37 38 39def coerce_type(node: exp.Expression) -> exp.Expression: 40 if isinstance(node, exp.Binary): 41 _coerce_date(node.left, node.right) 42 elif isinstance(node, exp.Between): 43 _coerce_date(node.this, node.args["low"]) 44 elif isinstance(node, exp.Extract): 45 if node.expression.type.this not in exp.DataType.TEMPORAL_TYPES: 46 _replace_cast(node.expression, "datetime") 47 return node 48 49 50def remove_redundant_casts(expression: exp.Expression) -> exp.Expression: 51 if ( 52 isinstance(expression, exp.Cast) 53 and expression.to.type 54 and expression.this.type 55 and expression.to.type.this == expression.this.type.this 56 ): 57 return expression.this 58 return expression 59 60 61def ensure_bool_predicates(expression: exp.Expression) -> exp.Expression: 62 if isinstance(expression, exp.Connector): 63 _replace_int_predicate(expression.left) 64 _replace_int_predicate(expression.right) 65 66 elif isinstance(expression, (exp.Where, exp.Having)): 67 _replace_int_predicate(expression.this) 68 69 return expression 70 71 72def _coerce_date(a: exp.Expression, b: exp.Expression) -> None: 73 for a, b in itertools.permutations([a, b]): 74 if ( 75 a.type 76 and a.type.this == exp.DataType.Type.DATE 77 and b.type 78 and b.type.this not in (exp.DataType.Type.DATE, exp.DataType.Type.INTERVAL) 79 ): 80 _replace_cast(b, "date") 81 82 83def _replace_cast(node: exp.Expression, to: str) -> None: 84 data_type = exp.DataType.build(to) 85 cast = exp.Cast(this=node.copy(), to=data_type) 86 cast.type = data_type 87 node.replace(cast) 88 89 90def _replace_int_predicate(expression: exp.Expression) -> None: 91 if expression.type and expression.type.this in exp.DataType.INTEGER_TYPES: 92 expression.replace(exp.NEQ(this=expression.copy(), expression=exp.Literal.number(0)))
def
canonicalize( expression: sqlglot.expressions.Expression, identify: str = 'safe') -> sqlglot.expressions.Expression:
10def canonicalize(expression: exp.Expression, identify: str = "safe") -> exp.Expression: 11 """Converts a sql expression into a standard form. 12 13 This method relies on annotate_types because many of the 14 conversions rely on type inference. 15 16 Args: 17 expression: The expression to canonicalize. 18 identify: Whether or not to force identify identifier. 19 """ 20 exp.replace_children(expression, canonicalize, identify=identify) 21 22 expression = add_text_to_concat(expression) 23 expression = coerce_type(expression) 24 expression = remove_redundant_casts(expression) 25 expression = ensure_bool_predicates(expression) 26 27 if isinstance(expression, exp.Identifier): 28 if should_identify(expression.this, identify): 29 expression.set("quoted", True) 30 31 return expression
Converts a sql expression into a standard form.
This method relies on annotate_types because many of the conversions rely on type inference.
Arguments:
- expression: The expression to canonicalize.
- identify: Whether or not to force identify identifier.
40def coerce_type(node: exp.Expression) -> exp.Expression: 41 if isinstance(node, exp.Binary): 42 _coerce_date(node.left, node.right) 43 elif isinstance(node, exp.Between): 44 _coerce_date(node.this, node.args["low"]) 45 elif isinstance(node, exp.Extract): 46 if node.expression.type.this not in exp.DataType.TEMPORAL_TYPES: 47 _replace_cast(node.expression, "datetime") 48 return node
def
remove_redundant_casts( expression: sqlglot.expressions.Expression) -> sqlglot.expressions.Expression:
def
ensure_bool_predicates( expression: sqlglot.expressions.Expression) -> sqlglot.expressions.Expression:
62def ensure_bool_predicates(expression: exp.Expression) -> exp.Expression: 63 if isinstance(expression, exp.Connector): 64 _replace_int_predicate(expression.left) 65 _replace_int_predicate(expression.right) 66 67 elif isinstance(expression, (exp.Where, exp.Having)): 68 _replace_int_predicate(expression.this) 69 70 return expression