From 374a0f6318bcf423b1b784d30b25a8327c65cb24 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 6 Jul 2023 09:28:12 +0200 Subject: Merging upstream version 17.2.0. Signed-off-by: Daniel Baumann --- docs/sqlglot/planner.html | 2116 +++++++++++++++++++++++---------------------- 1 file changed, 1081 insertions(+), 1035 deletions(-) (limited to 'docs/sqlglot/planner.html') diff --git a/docs/sqlglot/planner.html b/docs/sqlglot/planner.html index 275b324..4fcafea 100644 --- a/docs/sqlglot/planner.html +++ b/docs/sqlglot/planner.html @@ -216,412 +216,429 @@ 23 while nodes: 24 node = nodes.pop() 25 dag[node] = set() - 26 for dep in node.dependencies: - 27 dag[node].add(dep) - 28 nodes.add(dep) - 29 self._dag = dag + 26 + 27 for dep in node.dependencies: + 28 dag[node].add(dep) + 29 nodes.add(dep) 30 - 31 return self._dag + 31 self._dag = dag 32 - 33 @property - 34 def leaves(self) -> t.Iterator[Step]: - 35 return (node for node, deps in self.dag.items() if not deps) - 36 - 37 def __repr__(self) -> str: - 38 return f"Plan\n----\n{repr(self.root)}" - 39 - 40 - 41class Step: - 42 @classmethod - 43 def from_expression( - 44 cls, expression: exp.Expression, ctes: t.Optional[t.Dict[str, Step]] = None - 45 ) -> Step: - 46 """ - 47 Builds a DAG of Steps from a SQL expression so that it's easier to execute in an engine. - 48 Note: the expression's tables and subqueries must be aliased for this method to work. For - 49 example, given the following expression: - 50 - 51 SELECT - 52 x.a, - 53 SUM(x.b) - 54 FROM x AS x - 55 JOIN y AS y - 56 ON x.a = y.a - 57 GROUP BY x.a - 58 - 59 the following DAG is produced (the expression IDs might differ per execution): + 33 return self._dag + 34 + 35 @property + 36 def leaves(self) -> t.Iterator[Step]: + 37 return (node for node, deps in self.dag.items() if not deps) + 38 + 39 def __repr__(self) -> str: + 40 return f"Plan\n----\n{repr(self.root)}" + 41 + 42 + 43class Step: + 44 @classmethod + 45 def from_expression( + 46 cls, expression: exp.Expression, ctes: t.Optional[t.Dict[str, Step]] = None + 47 ) -> Step: + 48 """ + 49 Builds a DAG of Steps from a SQL expression so that it's easier to execute in an engine. + 50 Note: the expression's tables and subqueries must be aliased for this method to work. For + 51 example, given the following expression: + 52 + 53 SELECT + 54 x.a, + 55 SUM(x.b) + 56 FROM x AS x + 57 JOIN y AS y + 58 ON x.a = y.a + 59 GROUP BY x.a 60 - 61 - Aggregate: x (4347984624) - 62 Context: - 63 Aggregations: - 64 - SUM(x.b) - 65 Group: - 66 - x.a - 67 Projections: - 68 - x.a - 69 - "x"."" - 70 Dependencies: - 71 - Join: x (4347985296) - 72 Context: - 73 y: - 74 On: x.a = y.a - 75 Projections: - 76 Dependencies: - 77 - Scan: x (4347983136) - 78 Context: - 79 Source: x AS x - 80 Projections: - 81 - Scan: y (4343416624) - 82 Context: - 83 Source: y AS y - 84 Projections: - 85 - 86 Args: - 87 expression: the expression to build the DAG from. - 88 ctes: a dictionary that maps CTEs to their corresponding Step DAG by name. - 89 - 90 Returns: - 91 A Step DAG corresponding to `expression`. - 92 """ - 93 ctes = ctes or {} - 94 expression = expression.unnest() - 95 with_ = expression.args.get("with") - 96 - 97 # CTEs break the mold of scope and introduce themselves to all in the context. - 98 if with_: - 99 ctes = ctes.copy() -100 for cte in with_.expressions: -101 step = Step.from_expression(cte.this, ctes) -102 step.name = cte.alias -103 ctes[step.name] = step # type: ignore -104 -105 from_ = expression.args.get("from") + 61 the following DAG is produced (the expression IDs might differ per execution): + 62 + 63 - Aggregate: x (4347984624) + 64 Context: + 65 Aggregations: + 66 - SUM(x.b) + 67 Group: + 68 - x.a + 69 Projections: + 70 - x.a + 71 - "x"."" + 72 Dependencies: + 73 - Join: x (4347985296) + 74 Context: + 75 y: + 76 On: x.a = y.a + 77 Projections: + 78 Dependencies: + 79 - Scan: x (4347983136) + 80 Context: + 81 Source: x AS x + 82 Projections: + 83 - Scan: y (4343416624) + 84 Context: + 85 Source: y AS y + 86 Projections: + 87 + 88 Args: + 89 expression: the expression to build the DAG from. + 90 ctes: a dictionary that maps CTEs to their corresponding Step DAG by name. + 91 + 92 Returns: + 93 A Step DAG corresponding to `expression`. + 94 """ + 95 ctes = ctes or {} + 96 expression = expression.unnest() + 97 with_ = expression.args.get("with") + 98 + 99 # CTEs break the mold of scope and introduce themselves to all in the context. +100 if with_: +101 ctes = ctes.copy() +102 for cte in with_.expressions: +103 step = Step.from_expression(cte.this, ctes) +104 step.name = cte.alias +105 ctes[step.name] = step # type: ignore 106 -107 if isinstance(expression, exp.Select) and from_: -108 step = Scan.from_expression(from_.this, ctes) -109 elif isinstance(expression, exp.Union): -110 step = SetOperation.from_expression(expression, ctes) -111 else: -112 step = Scan() -113 -114 joins = expression.args.get("joins") +107 from_ = expression.args.get("from") +108 +109 if isinstance(expression, exp.Select) and from_: +110 step = Scan.from_expression(from_.this, ctes) +111 elif isinstance(expression, exp.Union): +112 step = SetOperation.from_expression(expression, ctes) +113 else: +114 step = Scan() 115 -116 if joins: -117 join = Join.from_joins(joins, ctes) -118 join.name = step.name -119 join.add_dependency(step) -120 step = join -121 -122 projections = [] # final selects in this chain of steps representing a select -123 operands = {} # intermediate computations of agg funcs eg x + 1 in SUM(x + 1) -124 aggregations = set() -125 next_operand_name = name_sequence("_a_") -126 -127 def extract_agg_operands(expression): -128 agg_funcs = tuple(expression.find_all(exp.AggFunc)) -129 if agg_funcs: -130 aggregations.add(expression) -131 for agg in agg_funcs: -132 for operand in agg.unnest_operands(): -133 if isinstance(operand, exp.Column): -134 continue -135 if operand not in operands: -136 operands[operand] = next_operand_name() -137 operand.replace(exp.column(operands[operand], quoted=True)) -138 return bool(agg_funcs) -139 -140 for e in expression.expressions: -141 if e.find(exp.AggFunc): -142 projections.append(exp.column(e.alias_or_name, step.name, quoted=True)) -143 extract_agg_operands(e) -144 else: -145 projections.append(e) -146 -147 where = expression.args.get("where") +116 joins = expression.args.get("joins") +117 +118 if joins: +119 join = Join.from_joins(joins, ctes) +120 join.name = step.name +121 join.add_dependency(step) +122 step = join +123 +124 projections = [] # final selects in this chain of steps representing a select +125 operands = {} # intermediate computations of agg funcs eg x + 1 in SUM(x + 1) +126 aggregations = set() +127 next_operand_name = name_sequence("_a_") +128 +129 def extract_agg_operands(expression): +130 agg_funcs = tuple(expression.find_all(exp.AggFunc)) +131 if agg_funcs: +132 aggregations.add(expression) +133 +134 for agg in agg_funcs: +135 for operand in agg.unnest_operands(): +136 if isinstance(operand, exp.Column): +137 continue +138 if operand not in operands: +139 operands[operand] = next_operand_name() +140 +141 operand.replace(exp.column(operands[operand], quoted=True)) +142 +143 return bool(agg_funcs) +144 +145 def set_ops_and_aggs(step): +146 step.operands = tuple(alias(operand, alias_) for operand, alias_ in operands.items()) +147 step.aggregations = list(aggregations) 148 -149 if where: -150 step.condition = where.this -151 -152 group = expression.args.get("group") -153 -154 if group or aggregations: -155 aggregate = Aggregate() -156 aggregate.source = step.name -157 aggregate.name = step.name -158 -159 having = expression.args.get("having") +149 for e in expression.expressions: +150 if e.find(exp.AggFunc): +151 projections.append(exp.column(e.alias_or_name, step.name, quoted=True)) +152 extract_agg_operands(e) +153 else: +154 projections.append(e) +155 +156 where = expression.args.get("where") +157 +158 if where: +159 step.condition = where.this 160 -161 if having: -162 if extract_agg_operands(exp.alias_(having.this, "_h", quoted=True)): -163 aggregate.condition = exp.column("_h", step.name, quoted=True) -164 else: -165 aggregate.condition = having.this -166 -167 aggregate.operands = tuple( -168 alias(operand, alias_) for operand, alias_ in operands.items() -169 ) -170 aggregate.aggregations = list(aggregations) -171 -172 # give aggregates names and replace projections with references to them -173 aggregate.group = { -174 f"_g{i}": e for i, e in enumerate(group.expressions if group else []) -175 } -176 -177 intermediate: t.Dict[str | exp.Expression, str] = {} -178 for k, v in aggregate.group.items(): -179 intermediate[v] = k -180 if isinstance(v, exp.Column): -181 intermediate[v.alias_or_name] = k +161 group = expression.args.get("group") +162 +163 if group or aggregations: +164 aggregate = Aggregate() +165 aggregate.source = step.name +166 aggregate.name = step.name +167 +168 having = expression.args.get("having") +169 +170 if having: +171 if extract_agg_operands(exp.alias_(having.this, "_h", quoted=True)): +172 aggregate.condition = exp.column("_h", step.name, quoted=True) +173 else: +174 aggregate.condition = having.this +175 +176 set_ops_and_aggs(aggregate) +177 +178 # give aggregates names and replace projections with references to them +179 aggregate.group = { +180 f"_g{i}": e for i, e in enumerate(group.expressions if group else []) +181 } 182 -183 for projection in projections: -184 for node, *_ in projection.walk(): -185 name = intermediate.get(node) -186 if name: -187 node.replace(exp.column(name, step.name)) -188 if aggregate.condition: -189 for node, *_ in aggregate.condition.walk(): -190 name = intermediate.get(node) or intermediate.get(node.name) -191 if name: -192 node.replace(exp.column(name, step.name)) -193 -194 aggregate.add_dependency(step) -195 step = aggregate -196 -197 order = expression.args.get("order") -198 -199 if order: -200 sort = Sort() -201 sort.name = step.name -202 sort.key = order.expressions -203 sort.add_dependency(step) -204 step = sort +183 intermediate: t.Dict[str | exp.Expression, str] = {} +184 for k, v in aggregate.group.items(): +185 intermediate[v] = k +186 if isinstance(v, exp.Column): +187 intermediate[v.name] = k +188 +189 for projection in projections: +190 for node, *_ in projection.walk(): +191 name = intermediate.get(node) +192 if name: +193 node.replace(exp.column(name, step.name)) +194 +195 if aggregate.condition: +196 for node, *_ in aggregate.condition.walk(): +197 name = intermediate.get(node) or intermediate.get(node.name) +198 if name: +199 node.replace(exp.column(name, step.name)) +200 +201 aggregate.add_dependency(step) +202 step = aggregate +203 +204 order = expression.args.get("order") 205 -206 step.projections = projections -207 -208 if isinstance(expression, exp.Select) and expression.args.get("distinct"): -209 distinct = Aggregate() -210 distinct.source = step.name -211 distinct.name = step.name -212 distinct.group = { -213 e.alias_or_name: exp.column(col=e.alias_or_name, table=step.name) -214 for e in projections or expression.expressions -215 } -216 distinct.add_dependency(step) -217 step = distinct -218 -219 limit = expression.args.get("limit") -220 -221 if limit: -222 step.limit = int(limit.text("expression")) -223 -224 return step -225 -226 def __init__(self) -> None: -227 self.name: t.Optional[str] = None -228 self.dependencies: t.Set[Step] = set() -229 self.dependents: t.Set[Step] = set() -230 self.projections: t.Sequence[exp.Expression] = [] -231 self.limit: float = math.inf -232 self.condition: t.Optional[exp.Expression] = None -233 -234 def add_dependency(self, dependency: Step) -> None: -235 self.dependencies.add(dependency) -236 dependency.dependents.add(self) +206 if order: +207 if isinstance(step, Aggregate): +208 for i, ordered in enumerate(order.expressions): +209 if extract_agg_operands(exp.alias_(ordered.this, f"_o_{i}", quoted=True)): +210 ordered.this.replace(exp.column(f"_o_{i}", step.name, quoted=True)) +211 +212 set_ops_and_aggs(aggregate) +213 +214 sort = Sort() +215 sort.name = step.name +216 sort.key = order.expressions +217 sort.add_dependency(step) +218 step = sort +219 +220 step.projections = projections +221 +222 if isinstance(expression, exp.Select) and expression.args.get("distinct"): +223 distinct = Aggregate() +224 distinct.source = step.name +225 distinct.name = step.name +226 distinct.group = { +227 e.alias_or_name: exp.column(col=e.alias_or_name, table=step.name) +228 for e in projections or expression.expressions +229 } +230 distinct.add_dependency(step) +231 step = distinct +232 +233 limit = expression.args.get("limit") +234 +235 if limit: +236 step.limit = int(limit.text("expression")) 237 -238 def __repr__(self) -> str: -239 return self.to_s() -240 -241 def to_s(self, level: int = 0) -> str: -242 indent = " " * level -243 nested = f"{indent} " -244 -245 context = self._to_s(f"{nested} ") -246 -247 if context: -248 context = [f"{nested}Context:"] + context -249 -250 lines = [ -251 f"{indent}- {self.id}", -252 *context, -253 f"{nested}Projections:", -254 ] -255 -256 for expression in self.projections: -257 lines.append(f"{nested} - {expression.sql()}") +238 return step +239 +240 def __init__(self) -> None: +241 self.name: t.Optional[str] = None +242 self.dependencies: t.Set[Step] = set() +243 self.dependents: t.Set[Step] = set() +244 self.projections: t.Sequence[exp.Expression] = [] +245 self.limit: float = math.inf +246 self.condition: t.Optional[exp.Expression] = None +247 +248 def add_dependency(self, dependency: Step) -> None: +249 self.dependencies.add(dependency) +250 dependency.dependents.add(self) +251 +252 def __repr__(self) -> str: +253 return self.to_s() +254 +255 def to_s(self, level: int = 0) -> str: +256 indent = " " * level +257 nested = f"{indent} " 258 -259 if self.condition: -260 lines.append(f"{nested}Condition: {self.condition.sql()}") -261 -262 if self.limit is not math.inf: -263 lines.append(f"{nested}Limit: {self.limit}") -264 -265 if self.dependencies: -266 lines.append(f"{nested}Dependencies:") -267 for dependency in self.dependencies: -268 lines.append(" " + dependency.to_s(level + 1)) +259 context = self._to_s(f"{nested} ") +260 +261 if context: +262 context = [f"{nested}Context:"] + context +263 +264 lines = [ +265 f"{indent}- {self.id}", +266 *context, +267 f"{nested}Projections:", +268 ] 269 -270 return "\n".join(lines) -271 -272 @property -273 def type_name(self) -> str: -274 return self.__class__.__name__ +270 for expression in self.projections: +271 lines.append(f"{nested} - {expression.sql()}") +272 +273 if self.condition: +274 lines.append(f"{nested}Condition: {self.condition.sql()}") 275 -276 @property -277 def id(self) -> str: -278 name = self.name -279 name = f" {name}" if name else "" -280 return f"{self.type_name}:{name} ({id(self)})" -281 -282 def _to_s(self, _indent: str) -> t.List[str]: -283 return [] -284 +276 if self.limit is not math.inf: +277 lines.append(f"{nested}Limit: {self.limit}") +278 +279 if self.dependencies: +280 lines.append(f"{nested}Dependencies:") +281 for dependency in self.dependencies: +282 lines.append(" " + dependency.to_s(level + 1)) +283 +284 return "\n".join(lines) 285 -286class Scan(Step): -287 @classmethod -288 def from_expression( -289 cls, expression: exp.Expression, ctes: t.Optional[t.Dict[str, Step]] = None -290 ) -> Step: -291 table = expression -292 alias_ = expression.alias_or_name -293 -294 if isinstance(expression, exp.Subquery): -295 table = expression.this -296 step = Step.from_expression(table, ctes) -297 step.name = alias_ -298 return step +286 @property +287 def type_name(self) -> str: +288 return self.__class__.__name__ +289 +290 @property +291 def id(self) -> str: +292 name = self.name +293 name = f" {name}" if name else "" +294 return f"{self.type_name}:{name} ({id(self)})" +295 +296 def _to_s(self, _indent: str) -> t.List[str]: +297 return [] +298 299 -300 step = Scan() -301 step.name = alias_ -302 step.source = expression -303 if ctes and table.name in ctes: -304 step.add_dependency(ctes[table.name]) -305 -306 return step +300class Scan(Step): +301 @classmethod +302 def from_expression( +303 cls, expression: exp.Expression, ctes: t.Optional[t.Dict[str, Step]] = None +304 ) -> Step: +305 table = expression +306 alias_ = expression.alias_or_name 307 -308 def __init__(self) -> None: -309 super().__init__() -310 self.source: t.Optional[exp.Expression] = None -311 -312 def _to_s(self, indent: str) -> t.List[str]: -313 return [f"{indent}Source: {self.source.sql() if self.source else '-static-'}"] # type: ignore -314 -315 -316class Join(Step): -317 @classmethod -318 def from_joins( -319 cls, joins: t.Iterable[exp.Join], ctes: t.Optional[t.Dict[str, Step]] = None -320 ) -> Step: -321 step = Join() -322 -323 for join in joins: -324 source_key, join_key, condition = join_condition(join) -325 step.joins[join.alias_or_name] = { -326 "side": join.side, # type: ignore -327 "join_key": join_key, -328 "source_key": source_key, -329 "condition": condition, -330 } -331 -332 step.add_dependency(Scan.from_expression(join.this, ctes)) -333 -334 return step -335 -336 def __init__(self) -> None: -337 super().__init__() -338 self.joins: t.Dict[str, t.Dict[str, t.List[str] | exp.Expression]] = {} -339 -340 def _to_s(self, indent: str) -> t.List[str]: -341 lines = [] -342 for name, join in self.joins.items(): -343 lines.append(f"{indent}{name}: {join['side']}") -344 if join.get("condition"): -345 lines.append(f"{indent}On: {join['condition'].sql()}") # type: ignore -346 return lines +308 if isinstance(expression, exp.Subquery): +309 table = expression.this +310 step = Step.from_expression(table, ctes) +311 step.name = alias_ +312 return step +313 +314 step = Scan() +315 step.name = alias_ +316 step.source = expression +317 if ctes and table.name in ctes: +318 step.add_dependency(ctes[table.name]) +319 +320 return step +321 +322 def __init__(self) -> None: +323 super().__init__() +324 self.source: t.Optional[exp.Expression] = None +325 +326 def _to_s(self, indent: str) -> t.List[str]: +327 return [f"{indent}Source: {self.source.sql() if self.source else '-static-'}"] # type: ignore +328 +329 +330class Join(Step): +331 @classmethod +332 def from_joins( +333 cls, joins: t.Iterable[exp.Join], ctes: t.Optional[t.Dict[str, Step]] = None +334 ) -> Step: +335 step = Join() +336 +337 for join in joins: +338 source_key, join_key, condition = join_condition(join) +339 step.joins[join.alias_or_name] = { +340 "side": join.side, # type: ignore +341 "join_key": join_key, +342 "source_key": source_key, +343 "condition": condition, +344 } +345 +346 step.add_dependency(Scan.from_expression(join.this, ctes)) 347 -348 -349class Aggregate(Step): +348 return step +349 350 def __init__(self) -> None: 351 super().__init__() -352 self.aggregations: t.List[exp.Expression] = [] -353 self.operands: t.Tuple[exp.Expression, ...] = () -354 self.group: t.Dict[str, exp.Expression] = {} -355 self.source: t.Optional[str] = None -356 -357 def _to_s(self, indent: str) -> t.List[str]: -358 lines = [f"{indent}Aggregations:"] -359 -360 for expression in self.aggregations: -361 lines.append(f"{indent} - {expression.sql()}") -362 -363 if self.group: -364 lines.append(f"{indent}Group:") -365 for expression in self.group.values(): -366 lines.append(f"{indent} - {expression.sql()}") -367 if self.condition: -368 lines.append(f"{indent}Having:") -369 lines.append(f"{indent} - {self.condition.sql()}") -370 if self.operands: -371 lines.append(f"{indent}Operands:") -372 for expression in self.operands: -373 lines.append(f"{indent} - {expression.sql()}") -374 -375 return lines +352 self.joins: t.Dict[str, t.Dict[str, t.List[str] | exp.Expression]] = {} +353 +354 def _to_s(self, indent: str) -> t.List[str]: +355 lines = [] +356 for name, join in self.joins.items(): +357 lines.append(f"{indent}{name}: {join['side'] or 'INNER'}") +358 join_key = ", ".join(str(key) for key in t.cast(list, join.get("join_key") or [])) +359 if join_key: +360 lines.append(f"{indent}Key: {join_key}") +361 if join.get("condition"): +362 lines.append(f"{indent}On: {join['condition'].sql()}") # type: ignore +363 return lines +364 +365 +366class Aggregate(Step): +367 def __init__(self) -> None: +368 super().__init__() +369 self.aggregations: t.List[exp.Expression] = [] +370 self.operands: t.Tuple[exp.Expression, ...] = () +371 self.group: t.Dict[str, exp.Expression] = {} +372 self.source: t.Optional[str] = None +373 +374 def _to_s(self, indent: str) -> t.List[str]: +375 lines = [f"{indent}Aggregations:"] 376 -377 -378class Sort(Step): -379 def __init__(self) -> None: -380 super().__init__() -381 self.key = None -382 -383 def _to_s(self, indent: str) -> t.List[str]: -384 lines = [f"{indent}Key:"] -385 -386 for expression in self.key: # type: ignore -387 lines.append(f"{indent} - {expression.sql()}") -388 -389 return lines -390 +377 for expression in self.aggregations: +378 lines.append(f"{indent} - {expression.sql()}") +379 +380 if self.group: +381 lines.append(f"{indent}Group:") +382 for expression in self.group.values(): +383 lines.append(f"{indent} - {expression.sql()}") +384 if self.condition: +385 lines.append(f"{indent}Having:") +386 lines.append(f"{indent} - {self.condition.sql()}") +387 if self.operands: +388 lines.append(f"{indent}Operands:") +389 for expression in self.operands: +390 lines.append(f"{indent} - {expression.sql()}") 391 -392class SetOperation(Step): -393 def __init__( -394 self, -395 op: t.Type[exp.Expression], -396 left: str | None, -397 right: str | None, -398 distinct: bool = False, -399 ) -> None: -400 super().__init__() -401 self.op = op -402 self.left = left -403 self.right = right -404 self.distinct = distinct +392 return lines +393 +394 +395class Sort(Step): +396 def __init__(self) -> None: +397 super().__init__() +398 self.key = None +399 +400 def _to_s(self, indent: str) -> t.List[str]: +401 lines = [f"{indent}Key:"] +402 +403 for expression in self.key: # type: ignore +404 lines.append(f"{indent} - {expression.sql()}") 405 -406 @classmethod -407 def from_expression( -408 cls, expression: exp.Expression, ctes: t.Optional[t.Dict[str, Step]] = None -409 ) -> Step: -410 assert isinstance(expression, exp.Union) -411 left = Step.from_expression(expression.left, ctes) -412 right = Step.from_expression(expression.right, ctes) -413 step = cls( -414 op=expression.__class__, -415 left=left.name, -416 right=right.name, -417 distinct=bool(expression.args.get("distinct")), -418 ) -419 step.add_dependency(left) -420 step.add_dependency(right) -421 return step +406 return lines +407 +408 +409class SetOperation(Step): +410 def __init__( +411 self, +412 op: t.Type[exp.Expression], +413 left: str | None, +414 right: str | None, +415 distinct: bool = False, +416 ) -> None: +417 super().__init__() +418 self.op = op +419 self.left = left +420 self.right = right +421 self.distinct = distinct 422 -423 def _to_s(self, indent: str) -> t.List[str]: -424 lines = [] -425 if self.distinct: -426 lines.append(f"{indent}Distinct: {self.distinct}") -427 return lines -428 -429 @property -430 def type_name(self) -> str: -431 return self.op.__name__ +423 @classmethod +424 def from_expression( +425 cls, expression: exp.Expression, ctes: t.Optional[t.Dict[str, Step]] = None +426 ) -> Step: +427 assert isinstance(expression, exp.Union) +428 left = Step.from_expression(expression.left, ctes) +429 right = Step.from_expression(expression.right, ctes) +430 step = cls( +431 op=expression.__class__, +432 left=left.name, +433 right=right.name, +434 distinct=bool(expression.args.get("distinct")), +435 ) +436 step.add_dependency(left) +437 step.add_dependency(right) +438 return step +439 +440 def _to_s(self, indent: str) -> t.List[str]: +441 lines = [] +442 if self.distinct: +443 lines.append(f"{indent}Distinct: {self.distinct}") +444 return lines +445 +446 @property +447 def type_name(self) -> str: +448 return self.op.__name__ @@ -652,19 +669,21 @@ 24 while nodes: 25 node = nodes.pop() 26 dag[node] = set() -27 for dep in node.dependencies: -28 dag[node].add(dep) -29 nodes.add(dep) -30 self._dag = dag +27 +28 for dep in node.dependencies: +29 dag[node].add(dep) +30 nodes.add(dep) 31 -32 return self._dag +32 self._dag = dag 33 -34 @property -35 def leaves(self) -> t.Iterator[Step]: -36 return (node for node, deps in self.dag.items() if not deps) -37 -38 def __repr__(self) -> str: -39 return f"Plan\n----\n{repr(self.root)}" +34 return self._dag +35 +36 @property +37 def leaves(self) -> t.Iterator[Step]: +38 return (node for node, deps in self.dag.items() if not deps) +39 +40 def __repr__(self) -> str: +41 return f"Plan\n----\n{repr(self.root)}" @@ -746,249 +765,261 @@ -
 42class Step:
- 43    @classmethod
- 44    def from_expression(
- 45        cls, expression: exp.Expression, ctes: t.Optional[t.Dict[str, Step]] = None
- 46    ) -> Step:
- 47        """
- 48        Builds a DAG of Steps from a SQL expression so that it's easier to execute in an engine.
- 49        Note: the expression's tables and subqueries must be aliased for this method to work. For
- 50        example, given the following expression:
- 51
- 52        SELECT
- 53          x.a,
- 54          SUM(x.b)
- 55        FROM x AS x
- 56        JOIN y AS y
- 57          ON x.a = y.a
- 58        GROUP BY x.a
- 59
- 60        the following DAG is produced (the expression IDs might differ per execution):
+            
 44class Step:
+ 45    @classmethod
+ 46    def from_expression(
+ 47        cls, expression: exp.Expression, ctes: t.Optional[t.Dict[str, Step]] = None
+ 48    ) -> Step:
+ 49        """
+ 50        Builds a DAG of Steps from a SQL expression so that it's easier to execute in an engine.
+ 51        Note: the expression's tables and subqueries must be aliased for this method to work. For
+ 52        example, given the following expression:
+ 53
+ 54        SELECT
+ 55          x.a,
+ 56          SUM(x.b)
+ 57        FROM x AS x
+ 58        JOIN y AS y
+ 59          ON x.a = y.a
+ 60        GROUP BY x.a
  61
- 62        - Aggregate: x (4347984624)
- 63            Context:
- 64              Aggregations:
- 65                - SUM(x.b)
- 66              Group:
- 67                - x.a
- 68            Projections:
- 69              - x.a
- 70              - "x".""
- 71            Dependencies:
- 72            - Join: x (4347985296)
- 73              Context:
- 74                y:
- 75                On: x.a = y.a
- 76              Projections:
- 77              Dependencies:
- 78              - Scan: x (4347983136)
- 79                Context:
- 80                  Source: x AS x
- 81                Projections:
- 82              - Scan: y (4343416624)
- 83                Context:
- 84                  Source: y AS y
- 85                Projections:
- 86
- 87        Args:
- 88            expression: the expression to build the DAG from.
- 89            ctes: a dictionary that maps CTEs to their corresponding Step DAG by name.
- 90
- 91        Returns:
- 92            A Step DAG corresponding to `expression`.
- 93        """
- 94        ctes = ctes or {}
- 95        expression = expression.unnest()
- 96        with_ = expression.args.get("with")
- 97
- 98        # CTEs break the mold of scope and introduce themselves to all in the context.
- 99        if with_:
-100            ctes = ctes.copy()
-101            for cte in with_.expressions:
-102                step = Step.from_expression(cte.this, ctes)
-103                step.name = cte.alias
-104                ctes[step.name] = step  # type: ignore
-105
-106        from_ = expression.args.get("from")
+ 62        the following DAG is produced (the expression IDs might differ per execution):
+ 63
+ 64        - Aggregate: x (4347984624)
+ 65            Context:
+ 66              Aggregations:
+ 67                - SUM(x.b)
+ 68              Group:
+ 69                - x.a
+ 70            Projections:
+ 71              - x.a
+ 72              - "x".""
+ 73            Dependencies:
+ 74            - Join: x (4347985296)
+ 75              Context:
+ 76                y:
+ 77                On: x.a = y.a
+ 78              Projections:
+ 79              Dependencies:
+ 80              - Scan: x (4347983136)
+ 81                Context:
+ 82                  Source: x AS x
+ 83                Projections:
+ 84              - Scan: y (4343416624)
+ 85                Context:
+ 86                  Source: y AS y
+ 87                Projections:
+ 88
+ 89        Args:
+ 90            expression: the expression to build the DAG from.
+ 91            ctes: a dictionary that maps CTEs to their corresponding Step DAG by name.
+ 92
+ 93        Returns:
+ 94            A Step DAG corresponding to `expression`.
+ 95        """
+ 96        ctes = ctes or {}
+ 97        expression = expression.unnest()
+ 98        with_ = expression.args.get("with")
+ 99
+100        # CTEs break the mold of scope and introduce themselves to all in the context.
+101        if with_:
+102            ctes = ctes.copy()
+103            for cte in with_.expressions:
+104                step = Step.from_expression(cte.this, ctes)
+105                step.name = cte.alias
+106                ctes[step.name] = step  # type: ignore
 107
-108        if isinstance(expression, exp.Select) and from_:
-109            step = Scan.from_expression(from_.this, ctes)
-110        elif isinstance(expression, exp.Union):
-111            step = SetOperation.from_expression(expression, ctes)
-112        else:
-113            step = Scan()
-114
-115        joins = expression.args.get("joins")
+108        from_ = expression.args.get("from")
+109
+110        if isinstance(expression, exp.Select) and from_:
+111            step = Scan.from_expression(from_.this, ctes)
+112        elif isinstance(expression, exp.Union):
+113            step = SetOperation.from_expression(expression, ctes)
+114        else:
+115            step = Scan()
 116
-117        if joins:
-118            join = Join.from_joins(joins, ctes)
-119            join.name = step.name
-120            join.add_dependency(step)
-121            step = join
-122
-123        projections = []  # final selects in this chain of steps representing a select
-124        operands = {}  # intermediate computations of agg funcs eg x + 1 in SUM(x + 1)
-125        aggregations = set()
-126        next_operand_name = name_sequence("_a_")
-127
-128        def extract_agg_operands(expression):
-129            agg_funcs = tuple(expression.find_all(exp.AggFunc))
-130            if agg_funcs:
-131                aggregations.add(expression)
-132            for agg in agg_funcs:
-133                for operand in agg.unnest_operands():
-134                    if isinstance(operand, exp.Column):
-135                        continue
-136                    if operand not in operands:
-137                        operands[operand] = next_operand_name()
-138                    operand.replace(exp.column(operands[operand], quoted=True))
-139            return bool(agg_funcs)
-140
-141        for e in expression.expressions:
-142            if e.find(exp.AggFunc):
-143                projections.append(exp.column(e.alias_or_name, step.name, quoted=True))
-144                extract_agg_operands(e)
-145            else:
-146                projections.append(e)
-147
-148        where = expression.args.get("where")
+117        joins = expression.args.get("joins")
+118
+119        if joins:
+120            join = Join.from_joins(joins, ctes)
+121            join.name = step.name
+122            join.add_dependency(step)
+123            step = join
+124
+125        projections = []  # final selects in this chain of steps representing a select
+126        operands = {}  # intermediate computations of agg funcs eg x + 1 in SUM(x + 1)
+127        aggregations = set()
+128        next_operand_name = name_sequence("_a_")
+129
+130        def extract_agg_operands(expression):
+131            agg_funcs = tuple(expression.find_all(exp.AggFunc))
+132            if agg_funcs:
+133                aggregations.add(expression)
+134
+135            for agg in agg_funcs:
+136                for operand in agg.unnest_operands():
+137                    if isinstance(operand, exp.Column):
+138                        continue
+139                    if operand not in operands:
+140                        operands[operand] = next_operand_name()
+141
+142                    operand.replace(exp.column(operands[operand], quoted=True))
+143
+144            return bool(agg_funcs)
+145
+146        def set_ops_and_aggs(step):
+147            step.operands = tuple(alias(operand, alias_) for operand, alias_ in operands.items())
+148            step.aggregations = list(aggregations)
 149
-150        if where:
-151            step.condition = where.this
-152
-153        group = expression.args.get("group")
-154
-155        if group or aggregations:
-156            aggregate = Aggregate()
-157            aggregate.source = step.name
-158            aggregate.name = step.name
-159
-160            having = expression.args.get("having")
+150        for e in expression.expressions:
+151            if e.find(exp.AggFunc):
+152                projections.append(exp.column(e.alias_or_name, step.name, quoted=True))
+153                extract_agg_operands(e)
+154            else:
+155                projections.append(e)
+156
+157        where = expression.args.get("where")
+158
+159        if where:
+160            step.condition = where.this
 161
-162            if having:
-163                if extract_agg_operands(exp.alias_(having.this, "_h", quoted=True)):
-164                    aggregate.condition = exp.column("_h", step.name, quoted=True)
-165                else:
-166                    aggregate.condition = having.this
-167
-168            aggregate.operands = tuple(
-169                alias(operand, alias_) for operand, alias_ in operands.items()
-170            )
-171            aggregate.aggregations = list(aggregations)
-172
-173            # give aggregates names and replace projections with references to them
-174            aggregate.group = {
-175                f"_g{i}": e for i, e in enumerate(group.expressions if group else [])
-176            }
-177
-178            intermediate: t.Dict[str | exp.Expression, str] = {}
-179            for k, v in aggregate.group.items():
-180                intermediate[v] = k
-181                if isinstance(v, exp.Column):
-182                    intermediate[v.alias_or_name] = k
+162        group = expression.args.get("group")
+163
+164        if group or aggregations:
+165            aggregate = Aggregate()
+166            aggregate.source = step.name
+167            aggregate.name = step.name
+168
+169            having = expression.args.get("having")
+170
+171            if having:
+172                if extract_agg_operands(exp.alias_(having.this, "_h", quoted=True)):
+173                    aggregate.condition = exp.column("_h", step.name, quoted=True)
+174                else:
+175                    aggregate.condition = having.this
+176
+177            set_ops_and_aggs(aggregate)
+178
+179            # give aggregates names and replace projections with references to them
+180            aggregate.group = {
+181                f"_g{i}": e for i, e in enumerate(group.expressions if group else [])
+182            }
 183
-184            for projection in projections:
-185                for node, *_ in projection.walk():
-186                    name = intermediate.get(node)
-187                    if name:
-188                        node.replace(exp.column(name, step.name))
-189            if aggregate.condition:
-190                for node, *_ in aggregate.condition.walk():
-191                    name = intermediate.get(node) or intermediate.get(node.name)
-192                    if name:
-193                        node.replace(exp.column(name, step.name))
-194
-195            aggregate.add_dependency(step)
-196            step = aggregate
-197
-198        order = expression.args.get("order")
-199
-200        if order:
-201            sort = Sort()
-202            sort.name = step.name
-203            sort.key = order.expressions
-204            sort.add_dependency(step)
-205            step = sort
+184            intermediate: t.Dict[str | exp.Expression, str] = {}
+185            for k, v in aggregate.group.items():
+186                intermediate[v] = k
+187                if isinstance(v, exp.Column):
+188                    intermediate[v.name] = k
+189
+190            for projection in projections:
+191                for node, *_ in projection.walk():
+192                    name = intermediate.get(node)
+193                    if name:
+194                        node.replace(exp.column(name, step.name))
+195
+196            if aggregate.condition:
+197                for node, *_ in aggregate.condition.walk():
+198                    name = intermediate.get(node) or intermediate.get(node.name)
+199                    if name:
+200                        node.replace(exp.column(name, step.name))
+201
+202            aggregate.add_dependency(step)
+203            step = aggregate
+204
+205        order = expression.args.get("order")
 206
-207        step.projections = projections
-208
-209        if isinstance(expression, exp.Select) and expression.args.get("distinct"):
-210            distinct = Aggregate()
-211            distinct.source = step.name
-212            distinct.name = step.name
-213            distinct.group = {
-214                e.alias_or_name: exp.column(col=e.alias_or_name, table=step.name)
-215                for e in projections or expression.expressions
-216            }
-217            distinct.add_dependency(step)
-218            step = distinct
-219
-220        limit = expression.args.get("limit")
-221
-222        if limit:
-223            step.limit = int(limit.text("expression"))
-224
-225        return step
-226
-227    def __init__(self) -> None:
-228        self.name: t.Optional[str] = None
-229        self.dependencies: t.Set[Step] = set()
-230        self.dependents: t.Set[Step] = set()
-231        self.projections: t.Sequence[exp.Expression] = []
-232        self.limit: float = math.inf
-233        self.condition: t.Optional[exp.Expression] = None
-234
-235    def add_dependency(self, dependency: Step) -> None:
-236        self.dependencies.add(dependency)
-237        dependency.dependents.add(self)
+207        if order:
+208            if isinstance(step, Aggregate):
+209                for i, ordered in enumerate(order.expressions):
+210                    if extract_agg_operands(exp.alias_(ordered.this, f"_o_{i}", quoted=True)):
+211                        ordered.this.replace(exp.column(f"_o_{i}", step.name, quoted=True))
+212
+213                set_ops_and_aggs(aggregate)
+214
+215            sort = Sort()
+216            sort.name = step.name
+217            sort.key = order.expressions
+218            sort.add_dependency(step)
+219            step = sort
+220
+221        step.projections = projections
+222
+223        if isinstance(expression, exp.Select) and expression.args.get("distinct"):
+224            distinct = Aggregate()
+225            distinct.source = step.name
+226            distinct.name = step.name
+227            distinct.group = {
+228                e.alias_or_name: exp.column(col=e.alias_or_name, table=step.name)
+229                for e in projections or expression.expressions
+230            }
+231            distinct.add_dependency(step)
+232            step = distinct
+233
+234        limit = expression.args.get("limit")
+235
+236        if limit:
+237            step.limit = int(limit.text("expression"))
 238
-239    def __repr__(self) -> str:
-240        return self.to_s()
-241
-242    def to_s(self, level: int = 0) -> str:
-243        indent = "  " * level
-244        nested = f"{indent}    "
-245
-246        context = self._to_s(f"{nested}  ")
-247
-248        if context:
-249            context = [f"{nested}Context:"] + context
-250
-251        lines = [
-252            f"{indent}- {self.id}",
-253            *context,
-254            f"{nested}Projections:",
-255        ]
-256
-257        for expression in self.projections:
-258            lines.append(f"{nested}  - {expression.sql()}")
+239        return step
+240
+241    def __init__(self) -> None:
+242        self.name: t.Optional[str] = None
+243        self.dependencies: t.Set[Step] = set()
+244        self.dependents: t.Set[Step] = set()
+245        self.projections: t.Sequence[exp.Expression] = []
+246        self.limit: float = math.inf
+247        self.condition: t.Optional[exp.Expression] = None
+248
+249    def add_dependency(self, dependency: Step) -> None:
+250        self.dependencies.add(dependency)
+251        dependency.dependents.add(self)
+252
+253    def __repr__(self) -> str:
+254        return self.to_s()
+255
+256    def to_s(self, level: int = 0) -> str:
+257        indent = "  " * level
+258        nested = f"{indent}    "
 259
-260        if self.condition:
-261            lines.append(f"{nested}Condition: {self.condition.sql()}")
-262
-263        if self.limit is not math.inf:
-264            lines.append(f"{nested}Limit: {self.limit}")
-265
-266        if self.dependencies:
-267            lines.append(f"{nested}Dependencies:")
-268            for dependency in self.dependencies:
-269                lines.append("  " + dependency.to_s(level + 1))
+260        context = self._to_s(f"{nested}  ")
+261
+262        if context:
+263            context = [f"{nested}Context:"] + context
+264
+265        lines = [
+266            f"{indent}- {self.id}",
+267            *context,
+268            f"{nested}Projections:",
+269        ]
 270
-271        return "\n".join(lines)
-272
-273    @property
-274    def type_name(self) -> str:
-275        return self.__class__.__name__
+271        for expression in self.projections:
+272            lines.append(f"{nested}  - {expression.sql()}")
+273
+274        if self.condition:
+275            lines.append(f"{nested}Condition: {self.condition.sql()}")
 276
-277    @property
-278    def id(self) -> str:
-279        name = self.name
-280        name = f" {name}" if name else ""
-281        return f"{self.type_name}:{name} ({id(self)})"
-282
-283    def _to_s(self, _indent: str) -> t.List[str]:
-284        return []
+277        if self.limit is not math.inf:
+278            lines.append(f"{nested}Limit: {self.limit}")
+279
+280        if self.dependencies:
+281            lines.append(f"{nested}Dependencies:")
+282            for dependency in self.dependencies:
+283                lines.append("  " + dependency.to_s(level + 1))
+284
+285        return "\n".join(lines)
+286
+287    @property
+288    def type_name(self) -> str:
+289        return self.__class__.__name__
+290
+291    @property
+292    def id(self) -> str:
+293        name = self.name
+294        name = f" {name}" if name else ""
+295        return f"{self.type_name}:{name} ({id(self)})"
+296
+297    def _to_s(self, _indent: str) -> t.List[str]:
+298        return []
 
@@ -1006,189 +1037,201 @@
-
 43    @classmethod
- 44    def from_expression(
- 45        cls, expression: exp.Expression, ctes: t.Optional[t.Dict[str, Step]] = None
- 46    ) -> Step:
- 47        """
- 48        Builds a DAG of Steps from a SQL expression so that it's easier to execute in an engine.
- 49        Note: the expression's tables and subqueries must be aliased for this method to work. For
- 50        example, given the following expression:
- 51
- 52        SELECT
- 53          x.a,
- 54          SUM(x.b)
- 55        FROM x AS x
- 56        JOIN y AS y
- 57          ON x.a = y.a
- 58        GROUP BY x.a
- 59
- 60        the following DAG is produced (the expression IDs might differ per execution):
+            
 45    @classmethod
+ 46    def from_expression(
+ 47        cls, expression: exp.Expression, ctes: t.Optional[t.Dict[str, Step]] = None
+ 48    ) -> Step:
+ 49        """
+ 50        Builds a DAG of Steps from a SQL expression so that it's easier to execute in an engine.
+ 51        Note: the expression's tables and subqueries must be aliased for this method to work. For
+ 52        example, given the following expression:
+ 53
+ 54        SELECT
+ 55          x.a,
+ 56          SUM(x.b)
+ 57        FROM x AS x
+ 58        JOIN y AS y
+ 59          ON x.a = y.a
+ 60        GROUP BY x.a
  61
- 62        - Aggregate: x (4347984624)
- 63            Context:
- 64              Aggregations:
- 65                - SUM(x.b)
- 66              Group:
- 67                - x.a
- 68            Projections:
- 69              - x.a
- 70              - "x".""
- 71            Dependencies:
- 72            - Join: x (4347985296)
- 73              Context:
- 74                y:
- 75                On: x.a = y.a
- 76              Projections:
- 77              Dependencies:
- 78              - Scan: x (4347983136)
- 79                Context:
- 80                  Source: x AS x
- 81                Projections:
- 82              - Scan: y (4343416624)
- 83                Context:
- 84                  Source: y AS y
- 85                Projections:
- 86
- 87        Args:
- 88            expression: the expression to build the DAG from.
- 89            ctes: a dictionary that maps CTEs to their corresponding Step DAG by name.
- 90
- 91        Returns:
- 92            A Step DAG corresponding to `expression`.
- 93        """
- 94        ctes = ctes or {}
- 95        expression = expression.unnest()
- 96        with_ = expression.args.get("with")
- 97
- 98        # CTEs break the mold of scope and introduce themselves to all in the context.
- 99        if with_:
-100            ctes = ctes.copy()
-101            for cte in with_.expressions:
-102                step = Step.from_expression(cte.this, ctes)
-103                step.name = cte.alias
-104                ctes[step.name] = step  # type: ignore
-105
-106        from_ = expression.args.get("from")
+ 62        the following DAG is produced (the expression IDs might differ per execution):
+ 63
+ 64        - Aggregate: x (4347984624)
+ 65            Context:
+ 66              Aggregations:
+ 67                - SUM(x.b)
+ 68              Group:
+ 69                - x.a
+ 70            Projections:
+ 71              - x.a
+ 72              - "x".""
+ 73            Dependencies:
+ 74            - Join: x (4347985296)
+ 75              Context:
+ 76                y:
+ 77                On: x.a = y.a
+ 78              Projections:
+ 79              Dependencies:
+ 80              - Scan: x (4347983136)
+ 81                Context:
+ 82                  Source: x AS x
+ 83                Projections:
+ 84              - Scan: y (4343416624)
+ 85                Context:
+ 86                  Source: y AS y
+ 87                Projections:
+ 88
+ 89        Args:
+ 90            expression: the expression to build the DAG from.
+ 91            ctes: a dictionary that maps CTEs to their corresponding Step DAG by name.
+ 92
+ 93        Returns:
+ 94            A Step DAG corresponding to `expression`.
+ 95        """
+ 96        ctes = ctes or {}
+ 97        expression = expression.unnest()
+ 98        with_ = expression.args.get("with")
+ 99
+100        # CTEs break the mold of scope and introduce themselves to all in the context.
+101        if with_:
+102            ctes = ctes.copy()
+103            for cte in with_.expressions:
+104                step = Step.from_expression(cte.this, ctes)
+105                step.name = cte.alias
+106                ctes[step.name] = step  # type: ignore
 107
-108        if isinstance(expression, exp.Select) and from_:
-109            step = Scan.from_expression(from_.this, ctes)
-110        elif isinstance(expression, exp.Union):
-111            step = SetOperation.from_expression(expression, ctes)
-112        else:
-113            step = Scan()
-114
-115        joins = expression.args.get("joins")
+108        from_ = expression.args.get("from")
+109
+110        if isinstance(expression, exp.Select) and from_:
+111            step = Scan.from_expression(from_.this, ctes)
+112        elif isinstance(expression, exp.Union):
+113            step = SetOperation.from_expression(expression, ctes)
+114        else:
+115            step = Scan()
 116
-117        if joins:
-118            join = Join.from_joins(joins, ctes)
-119            join.name = step.name
-120            join.add_dependency(step)
-121            step = join
-122
-123        projections = []  # final selects in this chain of steps representing a select
-124        operands = {}  # intermediate computations of agg funcs eg x + 1 in SUM(x + 1)
-125        aggregations = set()
-126        next_operand_name = name_sequence("_a_")
-127
-128        def extract_agg_operands(expression):
-129            agg_funcs = tuple(expression.find_all(exp.AggFunc))
-130            if agg_funcs:
-131                aggregations.add(expression)
-132            for agg in agg_funcs:
-133                for operand in agg.unnest_operands():
-134                    if isinstance(operand, exp.Column):
-135                        continue
-136                    if operand not in operands:
-137                        operands[operand] = next_operand_name()
-138                    operand.replace(exp.column(operands[operand], quoted=True))
-139            return bool(agg_funcs)
-140
-141        for e in expression.expressions:
-142            if e.find(exp.AggFunc):
-143                projections.append(exp.column(e.alias_or_name, step.name, quoted=True))
-144                extract_agg_operands(e)
-145            else:
-146                projections.append(e)
-147
-148        where = expression.args.get("where")
+117        joins = expression.args.get("joins")
+118
+119        if joins:
+120            join = Join.from_joins(joins, ctes)
+121            join.name = step.name
+122            join.add_dependency(step)
+123            step = join
+124
+125        projections = []  # final selects in this chain of steps representing a select
+126        operands = {}  # intermediate computations of agg funcs eg x + 1 in SUM(x + 1)
+127        aggregations = set()
+128        next_operand_name = name_sequence("_a_")
+129
+130        def extract_agg_operands(expression):
+131            agg_funcs = tuple(expression.find_all(exp.AggFunc))
+132            if agg_funcs:
+133                aggregations.add(expression)
+134
+135            for agg in agg_funcs:
+136                for operand in agg.unnest_operands():
+137                    if isinstance(operand, exp.Column):
+138                        continue
+139                    if operand not in operands:
+140                        operands[operand] = next_operand_name()
+141
+142                    operand.replace(exp.column(operands[operand], quoted=True))
+143
+144            return bool(agg_funcs)
+145
+146        def set_ops_and_aggs(step):
+147            step.operands = tuple(alias(operand, alias_) for operand, alias_ in operands.items())
+148            step.aggregations = list(aggregations)
 149
-150        if where:
-151            step.condition = where.this
-152
-153        group = expression.args.get("group")
-154
-155        if group or aggregations:
-156            aggregate = Aggregate()
-157            aggregate.source = step.name
-158            aggregate.name = step.name
-159
-160            having = expression.args.get("having")
+150        for e in expression.expressions:
+151            if e.find(exp.AggFunc):
+152                projections.append(exp.column(e.alias_or_name, step.name, quoted=True))
+153                extract_agg_operands(e)
+154            else:
+155                projections.append(e)
+156
+157        where = expression.args.get("where")
+158
+159        if where:
+160            step.condition = where.this
 161
-162            if having:
-163                if extract_agg_operands(exp.alias_(having.this, "_h", quoted=True)):
-164                    aggregate.condition = exp.column("_h", step.name, quoted=True)
-165                else:
-166                    aggregate.condition = having.this
-167
-168            aggregate.operands = tuple(
-169                alias(operand, alias_) for operand, alias_ in operands.items()
-170            )
-171            aggregate.aggregations = list(aggregations)
-172
-173            # give aggregates names and replace projections with references to them
-174            aggregate.group = {
-175                f"_g{i}": e for i, e in enumerate(group.expressions if group else [])
-176            }
-177
-178            intermediate: t.Dict[str | exp.Expression, str] = {}
-179            for k, v in aggregate.group.items():
-180                intermediate[v] = k
-181                if isinstance(v, exp.Column):
-182                    intermediate[v.alias_or_name] = k
+162        group = expression.args.get("group")
+163
+164        if group or aggregations:
+165            aggregate = Aggregate()
+166            aggregate.source = step.name
+167            aggregate.name = step.name
+168
+169            having = expression.args.get("having")
+170
+171            if having:
+172                if extract_agg_operands(exp.alias_(having.this, "_h", quoted=True)):
+173                    aggregate.condition = exp.column("_h", step.name, quoted=True)
+174                else:
+175                    aggregate.condition = having.this
+176
+177            set_ops_and_aggs(aggregate)
+178
+179            # give aggregates names and replace projections with references to them
+180            aggregate.group = {
+181                f"_g{i}": e for i, e in enumerate(group.expressions if group else [])
+182            }
 183
-184            for projection in projections:
-185                for node, *_ in projection.walk():
-186                    name = intermediate.get(node)
-187                    if name:
-188                        node.replace(exp.column(name, step.name))
-189            if aggregate.condition:
-190                for node, *_ in aggregate.condition.walk():
-191                    name = intermediate.get(node) or intermediate.get(node.name)
-192                    if name:
-193                        node.replace(exp.column(name, step.name))
-194
-195            aggregate.add_dependency(step)
-196            step = aggregate
-197
-198        order = expression.args.get("order")
-199
-200        if order:
-201            sort = Sort()
-202            sort.name = step.name
-203            sort.key = order.expressions
-204            sort.add_dependency(step)
-205            step = sort
+184            intermediate: t.Dict[str | exp.Expression, str] = {}
+185            for k, v in aggregate.group.items():
+186                intermediate[v] = k
+187                if isinstance(v, exp.Column):
+188                    intermediate[v.name] = k
+189
+190            for projection in projections:
+191                for node, *_ in projection.walk():
+192                    name = intermediate.get(node)
+193                    if name:
+194                        node.replace(exp.column(name, step.name))
+195
+196            if aggregate.condition:
+197                for node, *_ in aggregate.condition.walk():
+198                    name = intermediate.get(node) or intermediate.get(node.name)
+199                    if name:
+200                        node.replace(exp.column(name, step.name))
+201
+202            aggregate.add_dependency(step)
+203            step = aggregate
+204
+205        order = expression.args.get("order")
 206
-207        step.projections = projections
-208
-209        if isinstance(expression, exp.Select) and expression.args.get("distinct"):
-210            distinct = Aggregate()
-211            distinct.source = step.name
-212            distinct.name = step.name
-213            distinct.group = {
-214                e.alias_or_name: exp.column(col=e.alias_or_name, table=step.name)
-215                for e in projections or expression.expressions
-216            }
-217            distinct.add_dependency(step)
-218            step = distinct
-219
-220        limit = expression.args.get("limit")
-221
-222        if limit:
-223            step.limit = int(limit.text("expression"))
-224
-225        return step
+207        if order:
+208            if isinstance(step, Aggregate):
+209                for i, ordered in enumerate(order.expressions):
+210                    if extract_agg_operands(exp.alias_(ordered.this, f"_o_{i}", quoted=True)):
+211                        ordered.this.replace(exp.column(f"_o_{i}", step.name, quoted=True))
+212
+213                set_ops_and_aggs(aggregate)
+214
+215            sort = Sort()
+216            sort.name = step.name
+217            sort.key = order.expressions
+218            sort.add_dependency(step)
+219            step = sort
+220
+221        step.projections = projections
+222
+223        if isinstance(expression, exp.Select) and expression.args.get("distinct"):
+224            distinct = Aggregate()
+225            distinct.source = step.name
+226            distinct.name = step.name
+227            distinct.group = {
+228                e.alias_or_name: exp.column(col=e.alias_or_name, table=step.name)
+229                for e in projections or expression.expressions
+230            }
+231            distinct.add_dependency(step)
+232            step = distinct
+233
+234        limit = expression.args.get("limit")
+235
+236        if limit:
+237            step.limit = int(limit.text("expression"))
+238
+239        return step
 
@@ -1330,9 +1373,9 @@ Projections:
-
235    def add_dependency(self, dependency: Step) -> None:
-236        self.dependencies.add(dependency)
-237        dependency.dependents.add(self)
+            
249    def add_dependency(self, dependency: Step) -> None:
+250        self.dependencies.add(dependency)
+251        dependency.dependents.add(self)
 
@@ -1350,36 +1393,36 @@ Projections:
-
242    def to_s(self, level: int = 0) -> str:
-243        indent = "  " * level
-244        nested = f"{indent}    "
-245
-246        context = self._to_s(f"{nested}  ")
-247
-248        if context:
-249            context = [f"{nested}Context:"] + context
-250
-251        lines = [
-252            f"{indent}- {self.id}",
-253            *context,
-254            f"{nested}Projections:",
-255        ]
-256
-257        for expression in self.projections:
-258            lines.append(f"{nested}  - {expression.sql()}")
+            
256    def to_s(self, level: int = 0) -> str:
+257        indent = "  " * level
+258        nested = f"{indent}    "
 259
-260        if self.condition:
-261            lines.append(f"{nested}Condition: {self.condition.sql()}")
-262
-263        if self.limit is not math.inf:
-264            lines.append(f"{nested}Limit: {self.limit}")
-265
-266        if self.dependencies:
-267            lines.append(f"{nested}Dependencies:")
-268            for dependency in self.dependencies:
-269                lines.append("  " + dependency.to_s(level + 1))
+260        context = self._to_s(f"{nested}  ")
+261
+262        if context:
+263            context = [f"{nested}Context:"] + context
+264
+265        lines = [
+266            f"{indent}- {self.id}",
+267            *context,
+268            f"{nested}Projections:",
+269        ]
 270
-271        return "\n".join(lines)
+271        for expression in self.projections:
+272            lines.append(f"{nested}  - {expression.sql()}")
+273
+274        if self.condition:
+275            lines.append(f"{nested}Condition: {self.condition.sql()}")
+276
+277        if self.limit is not math.inf:
+278            lines.append(f"{nested}Limit: {self.limit}")
+279
+280        if self.dependencies:
+281            lines.append(f"{nested}Dependencies:")
+282            for dependency in self.dependencies:
+283                lines.append("  " + dependency.to_s(level + 1))
+284
+285        return "\n".join(lines)
 
@@ -1420,34 +1463,34 @@ Projections:
-
287class Scan(Step):
-288    @classmethod
-289    def from_expression(
-290        cls, expression: exp.Expression, ctes: t.Optional[t.Dict[str, Step]] = None
-291    ) -> Step:
-292        table = expression
-293        alias_ = expression.alias_or_name
-294
-295        if isinstance(expression, exp.Subquery):
-296            table = expression.this
-297            step = Step.from_expression(table, ctes)
-298            step.name = alias_
-299            return step
-300
-301        step = Scan()
-302        step.name = alias_
-303        step.source = expression
-304        if ctes and table.name in ctes:
-305            step.add_dependency(ctes[table.name])
-306
-307        return step
+            
301class Scan(Step):
+302    @classmethod
+303    def from_expression(
+304        cls, expression: exp.Expression, ctes: t.Optional[t.Dict[str, Step]] = None
+305    ) -> Step:
+306        table = expression
+307        alias_ = expression.alias_or_name
 308
-309    def __init__(self) -> None:
-310        super().__init__()
-311        self.source: t.Optional[exp.Expression] = None
-312
-313    def _to_s(self, indent: str) -> t.List[str]:
-314        return [f"{indent}Source: {self.source.sql() if self.source else '-static-'}"]  # type: ignore
+309        if isinstance(expression, exp.Subquery):
+310            table = expression.this
+311            step = Step.from_expression(table, ctes)
+312            step.name = alias_
+313            return step
+314
+315        step = Scan()
+316        step.name = alias_
+317        step.source = expression
+318        if ctes and table.name in ctes:
+319            step.add_dependency(ctes[table.name])
+320
+321        return step
+322
+323    def __init__(self) -> None:
+324        super().__init__()
+325        self.source: t.Optional[exp.Expression] = None
+326
+327    def _to_s(self, indent: str) -> t.List[str]:
+328        return [f"{indent}Source: {self.source.sql() if self.source else '-static-'}"]  # type: ignore
 
@@ -1465,26 +1508,26 @@ Projections:
-
288    @classmethod
-289    def from_expression(
-290        cls, expression: exp.Expression, ctes: t.Optional[t.Dict[str, Step]] = None
-291    ) -> Step:
-292        table = expression
-293        alias_ = expression.alias_or_name
-294
-295        if isinstance(expression, exp.Subquery):
-296            table = expression.this
-297            step = Step.from_expression(table, ctes)
-298            step.name = alias_
-299            return step
-300
-301        step = Scan()
-302        step.name = alias_
-303        step.source = expression
-304        if ctes and table.name in ctes:
-305            step.add_dependency(ctes[table.name])
-306
-307        return step
+            
302    @classmethod
+303    def from_expression(
+304        cls, expression: exp.Expression, ctes: t.Optional[t.Dict[str, Step]] = None
+305    ) -> Step:
+306        table = expression
+307        alias_ = expression.alias_or_name
+308
+309        if isinstance(expression, exp.Subquery):
+310            table = expression.this
+311            step = Step.from_expression(table, ctes)
+312            step.name = alias_
+313            return step
+314
+315        step = Scan()
+316        step.name = alias_
+317        step.source = expression
+318        if ctes and table.name in ctes:
+319            step.add_dependency(ctes[table.name])
+320
+321        return step
 
@@ -1590,37 +1633,40 @@ Projections:
-
317class Join(Step):
-318    @classmethod
-319    def from_joins(
-320        cls, joins: t.Iterable[exp.Join], ctes: t.Optional[t.Dict[str, Step]] = None
-321    ) -> Step:
-322        step = Join()
-323
-324        for join in joins:
-325            source_key, join_key, condition = join_condition(join)
-326            step.joins[join.alias_or_name] = {
-327                "side": join.side,  # type: ignore
-328                "join_key": join_key,
-329                "source_key": source_key,
-330                "condition": condition,
-331            }
-332
-333            step.add_dependency(Scan.from_expression(join.this, ctes))
-334
-335        return step
-336
-337    def __init__(self) -> None:
-338        super().__init__()
-339        self.joins: t.Dict[str, t.Dict[str, t.List[str] | exp.Expression]] = {}
-340
-341    def _to_s(self, indent: str) -> t.List[str]:
-342        lines = []
-343        for name, join in self.joins.items():
-344            lines.append(f"{indent}{name}: {join['side']}")
-345            if join.get("condition"):
-346                lines.append(f"{indent}On: {join['condition'].sql()}")  # type: ignore
-347        return lines
+            
331class Join(Step):
+332    @classmethod
+333    def from_joins(
+334        cls, joins: t.Iterable[exp.Join], ctes: t.Optional[t.Dict[str, Step]] = None
+335    ) -> Step:
+336        step = Join()
+337
+338        for join in joins:
+339            source_key, join_key, condition = join_condition(join)
+340            step.joins[join.alias_or_name] = {
+341                "side": join.side,  # type: ignore
+342                "join_key": join_key,
+343                "source_key": source_key,
+344                "condition": condition,
+345            }
+346
+347            step.add_dependency(Scan.from_expression(join.this, ctes))
+348
+349        return step
+350
+351    def __init__(self) -> None:
+352        super().__init__()
+353        self.joins: t.Dict[str, t.Dict[str, t.List[str] | exp.Expression]] = {}
+354
+355    def _to_s(self, indent: str) -> t.List[str]:
+356        lines = []
+357        for name, join in self.joins.items():
+358            lines.append(f"{indent}{name}: {join['side'] or 'INNER'}")
+359            join_key = ", ".join(str(key) for key in t.cast(list, join.get("join_key") or []))
+360            if join_key:
+361                lines.append(f"{indent}Key: {join_key}")
+362            if join.get("condition"):
+363                lines.append(f"{indent}On: {join['condition'].sql()}")  # type: ignore
+364        return lines
 
@@ -1638,24 +1684,24 @@ Projections:
-
318    @classmethod
-319    def from_joins(
-320        cls, joins: t.Iterable[exp.Join], ctes: t.Optional[t.Dict[str, Step]] = None
-321    ) -> Step:
-322        step = Join()
-323
-324        for join in joins:
-325            source_key, join_key, condition = join_condition(join)
-326            step.joins[join.alias_or_name] = {
-327                "side": join.side,  # type: ignore
-328                "join_key": join_key,
-329                "source_key": source_key,
-330                "condition": condition,
-331            }
-332
-333            step.add_dependency(Scan.from_expression(join.this, ctes))
-334
-335        return step
+            
332    @classmethod
+333    def from_joins(
+334        cls, joins: t.Iterable[exp.Join], ctes: t.Optional[t.Dict[str, Step]] = None
+335    ) -> Step:
+336        step = Join()
+337
+338        for join in joins:
+339            source_key, join_key, condition = join_condition(join)
+340            step.joins[join.alias_or_name] = {
+341                "side": join.side,  # type: ignore
+342                "join_key": join_key,
+343                "source_key": source_key,
+344                "condition": condition,
+345            }
+346
+347            step.add_dependency(Scan.from_expression(join.this, ctes))
+348
+349        return step
 
@@ -1704,33 +1750,33 @@ Projections:
-
350class Aggregate(Step):
-351    def __init__(self) -> None:
-352        super().__init__()
-353        self.aggregations: t.List[exp.Expression] = []
-354        self.operands: t.Tuple[exp.Expression, ...] = ()
-355        self.group: t.Dict[str, exp.Expression] = {}
-356        self.source: t.Optional[str] = None
-357
-358    def _to_s(self, indent: str) -> t.List[str]:
-359        lines = [f"{indent}Aggregations:"]
-360
-361        for expression in self.aggregations:
-362            lines.append(f"{indent}  - {expression.sql()}")
-363
-364        if self.group:
-365            lines.append(f"{indent}Group:")
-366            for expression in self.group.values():
-367                lines.append(f"{indent}  - {expression.sql()}")
-368        if self.condition:
-369            lines.append(f"{indent}Having:")
-370            lines.append(f"{indent}  - {self.condition.sql()}")
-371        if self.operands:
-372            lines.append(f"{indent}Operands:")
-373            for expression in self.operands:
-374                lines.append(f"{indent}  - {expression.sql()}")
-375
-376        return lines
+            
367class Aggregate(Step):
+368    def __init__(self) -> None:
+369        super().__init__()
+370        self.aggregations: t.List[exp.Expression] = []
+371        self.operands: t.Tuple[exp.Expression, ...] = ()
+372        self.group: t.Dict[str, exp.Expression] = {}
+373        self.source: t.Optional[str] = None
+374
+375    def _to_s(self, indent: str) -> t.List[str]:
+376        lines = [f"{indent}Aggregations:"]
+377
+378        for expression in self.aggregations:
+379            lines.append(f"{indent}  - {expression.sql()}")
+380
+381        if self.group:
+382            lines.append(f"{indent}Group:")
+383            for expression in self.group.values():
+384                lines.append(f"{indent}  - {expression.sql()}")
+385        if self.condition:
+386            lines.append(f"{indent}Having:")
+387            lines.append(f"{indent}  - {self.condition.sql()}")
+388        if self.operands:
+389            lines.append(f"{indent}Operands:")
+390            for expression in self.operands:
+391                lines.append(f"{indent}  - {expression.sql()}")
+392
+393        return lines
 
@@ -1811,18 +1857,18 @@ Projections:
-
379class Sort(Step):
-380    def __init__(self) -> None:
-381        super().__init__()
-382        self.key = None
-383
-384    def _to_s(self, indent: str) -> t.List[str]:
-385        lines = [f"{indent}Key:"]
-386
-387        for expression in self.key:  # type: ignore
-388            lines.append(f"{indent}  - {expression.sql()}")
-389
-390        return lines
+            
396class Sort(Step):
+397    def __init__(self) -> None:
+398        super().__init__()
+399        self.key = None
+400
+401    def _to_s(self, indent: str) -> t.List[str]:
+402        lines = [f"{indent}Key:"]
+403
+404        for expression in self.key:  # type: ignore
+405            lines.append(f"{indent}  - {expression.sql()}")
+406
+407        return lines
 
@@ -1870,46 +1916,46 @@ Projections:
-
393class SetOperation(Step):
-394    def __init__(
-395        self,
-396        op: t.Type[exp.Expression],
-397        left: str | None,
-398        right: str | None,
-399        distinct: bool = False,
-400    ) -> None:
-401        super().__init__()
-402        self.op = op
-403        self.left = left
-404        self.right = right
-405        self.distinct = distinct
-406
-407    @classmethod
-408    def from_expression(
-409        cls, expression: exp.Expression, ctes: t.Optional[t.Dict[str, Step]] = None
-410    ) -> Step:
-411        assert isinstance(expression, exp.Union)
-412        left = Step.from_expression(expression.left, ctes)
-413        right = Step.from_expression(expression.right, ctes)
-414        step = cls(
-415            op=expression.__class__,
-416            left=left.name,
-417            right=right.name,
-418            distinct=bool(expression.args.get("distinct")),
-419        )
-420        step.add_dependency(left)
-421        step.add_dependency(right)
-422        return step
+            
410class SetOperation(Step):
+411    def __init__(
+412        self,
+413        op: t.Type[exp.Expression],
+414        left: str | None,
+415        right: str | None,
+416        distinct: bool = False,
+417    ) -> None:
+418        super().__init__()
+419        self.op = op
+420        self.left = left
+421        self.right = right
+422        self.distinct = distinct
 423
-424    def _to_s(self, indent: str) -> t.List[str]:
-425        lines = []
-426        if self.distinct:
-427            lines.append(f"{indent}Distinct: {self.distinct}")
-428        return lines
-429
-430    @property
-431    def type_name(self) -> str:
-432        return self.op.__name__
+424    @classmethod
+425    def from_expression(
+426        cls, expression: exp.Expression, ctes: t.Optional[t.Dict[str, Step]] = None
+427    ) -> Step:
+428        assert isinstance(expression, exp.Union)
+429        left = Step.from_expression(expression.left, ctes)
+430        right = Step.from_expression(expression.right, ctes)
+431        step = cls(
+432            op=expression.__class__,
+433            left=left.name,
+434            right=right.name,
+435            distinct=bool(expression.args.get("distinct")),
+436        )
+437        step.add_dependency(left)
+438        step.add_dependency(right)
+439        return step
+440
+441    def _to_s(self, indent: str) -> t.List[str]:
+442        lines = []
+443        if self.distinct:
+444            lines.append(f"{indent}Distinct: {self.distinct}")
+445        return lines
+446
+447    @property
+448    def type_name(self) -> str:
+449        return self.op.__name__
 
@@ -1925,18 +1971,18 @@ Projections:
-
394    def __init__(
-395        self,
-396        op: t.Type[exp.Expression],
-397        left: str | None,
-398        right: str | None,
-399        distinct: bool = False,
-400    ) -> None:
-401        super().__init__()
-402        self.op = op
-403        self.left = left
-404        self.right = right
-405        self.distinct = distinct
+            
411    def __init__(
+412        self,
+413        op: t.Type[exp.Expression],
+414        left: str | None,
+415        right: str | None,
+416        distinct: bool = False,
+417    ) -> None:
+418        super().__init__()
+419        self.op = op
+420        self.left = left
+421        self.right = right
+422        self.distinct = distinct
 
@@ -1999,22 +2045,22 @@ Projections:
-
407    @classmethod
-408    def from_expression(
-409        cls, expression: exp.Expression, ctes: t.Optional[t.Dict[str, Step]] = None
-410    ) -> Step:
-411        assert isinstance(expression, exp.Union)
-412        left = Step.from_expression(expression.left, ctes)
-413        right = Step.from_expression(expression.right, ctes)
-414        step = cls(
-415            op=expression.__class__,
-416            left=left.name,
-417            right=right.name,
-418            distinct=bool(expression.args.get("distinct")),
-419        )
-420        step.add_dependency(left)
-421        step.add_dependency(right)
-422        return step
+            
424    @classmethod
+425    def from_expression(
+426        cls, expression: exp.Expression, ctes: t.Optional[t.Dict[str, Step]] = None
+427    ) -> Step:
+428        assert isinstance(expression, exp.Union)
+429        left = Step.from_expression(expression.left, ctes)
+430        right = Step.from_expression(expression.right, ctes)
+431        step = cls(
+432            op=expression.__class__,
+433            left=left.name,
+434            right=right.name,
+435            distinct=bool(expression.args.get("distinct")),
+436        )
+437        step.add_dependency(left)
+438        step.add_dependency(right)
+439        return step
 
-- cgit v1.2.3