sqlglot.optimizer.scope
1import itertools 2from collections import defaultdict 3from enum import Enum, auto 4 5from sqlglot import exp 6from sqlglot.errors import OptimizeError 7 8 9class ScopeType(Enum): 10 ROOT = auto() 11 SUBQUERY = auto() 12 DERIVED_TABLE = auto() 13 CTE = auto() 14 UNION = auto() 15 UDTF = auto() 16 17 18class Scope: 19 """ 20 Selection scope. 21 22 Attributes: 23 expression (exp.Select|exp.Union): Root expression of this scope 24 sources (dict[str, exp.Table|Scope]): Mapping of source name to either 25 a Table expression or another Scope instance. For example: 26 SELECT * FROM x {"x": Table(this="x")} 27 SELECT * FROM x AS y {"y": Table(this="x")} 28 SELECT * FROM (SELECT ...) AS y {"y": Scope(...)} 29 outer_column_list (list[str]): If this is a derived table or CTE, and the outer query 30 defines a column list of it's alias of this scope, this is that list of columns. 31 For example: 32 SELECT * FROM (SELECT ...) AS y(col1, col2) 33 The inner query would have `["col1", "col2"]` for its `outer_column_list` 34 parent (Scope): Parent scope 35 scope_type (ScopeType): Type of this scope, relative to it's parent 36 subquery_scopes (list[Scope]): List of all child scopes for subqueries 37 cte_scopes = (list[Scope]) List of all child scopes for CTEs 38 derived_table_scopes = (list[Scope]) List of all child scopes for derived_tables 39 union_scopes (list[Scope, Scope]): If this Scope is for a Union expression, this will be 40 a list of the left and right child scopes. 41 """ 42 43 def __init__( 44 self, 45 expression, 46 sources=None, 47 outer_column_list=None, 48 parent=None, 49 scope_type=ScopeType.ROOT, 50 ): 51 self.expression = expression 52 self.sources = sources or {} 53 self.outer_column_list = outer_column_list or [] 54 self.parent = parent 55 self.scope_type = scope_type 56 self.subquery_scopes = [] 57 self.derived_table_scopes = [] 58 self.cte_scopes = [] 59 self.union_scopes = [] 60 self.clear_cache() 61 62 def clear_cache(self): 63 self._collected = False 64 self._raw_columns = None 65 self._derived_tables = None 66 self._tables = None 67 self._ctes = None 68 self._subqueries = None 69 self._selected_sources = None 70 self._columns = None 71 self._external_columns = None 72 self._join_hints = None 73 74 def branch(self, expression, scope_type, chain_sources=None, **kwargs): 75 """Branch from the current scope to a new, inner scope""" 76 return Scope( 77 expression=expression.unnest(), 78 sources={**self.cte_sources, **(chain_sources or {})}, 79 parent=self, 80 scope_type=scope_type, 81 **kwargs, 82 ) 83 84 def _collect(self): 85 self._tables = [] 86 self._ctes = [] 87 self._subqueries = [] 88 self._derived_tables = [] 89 self._raw_columns = [] 90 self._join_hints = [] 91 92 for node, parent, _ in self.walk(bfs=False): 93 if node is self.expression: 94 continue 95 elif isinstance(node, exp.Column) and not isinstance(node.this, exp.Star): 96 self._raw_columns.append(node) 97 elif isinstance(node, exp.Table) and not isinstance(node.parent, exp.JoinHint): 98 self._tables.append(node) 99 elif isinstance(node, exp.JoinHint): 100 self._join_hints.append(node) 101 elif isinstance(node, exp.UDTF): 102 self._derived_tables.append(node) 103 elif isinstance(node, exp.CTE): 104 self._ctes.append(node) 105 elif isinstance(node, exp.Subquery) and isinstance(parent, (exp.From, exp.Join)): 106 self._derived_tables.append(node) 107 elif isinstance(node, exp.Subqueryable): 108 self._subqueries.append(node) 109 110 self._collected = True 111 112 def _ensure_collected(self): 113 if not self._collected: 114 self._collect() 115 116 def walk(self, bfs=True): 117 return walk_in_scope(self.expression, bfs=bfs) 118 119 def find(self, *expression_types, bfs=True): 120 """ 121 Returns the first node in this scope which matches at least one of the specified types. 122 123 This does NOT traverse into subscopes. 124 125 Args: 126 expression_types (type): the expression type(s) to match. 127 bfs (bool): True to use breadth-first search, False to use depth-first. 128 129 Returns: 130 exp.Expression: the node which matches the criteria or None if no node matching 131 the criteria was found. 132 """ 133 return next(self.find_all(*expression_types, bfs=bfs), None) 134 135 def find_all(self, *expression_types, bfs=True): 136 """ 137 Returns a generator object which visits all nodes in this scope and only yields those that 138 match at least one of the specified expression types. 139 140 This does NOT traverse into subscopes. 141 142 Args: 143 expression_types (type): the expression type(s) to match. 144 bfs (bool): True to use breadth-first search, False to use depth-first. 145 146 Yields: 147 exp.Expression: nodes 148 """ 149 for expression, _, _ in self.walk(bfs=bfs): 150 if isinstance(expression, expression_types): 151 yield expression 152 153 def replace(self, old, new): 154 """ 155 Replace `old` with `new`. 156 157 This can be used instead of `exp.Expression.replace` to ensure the `Scope` is kept up-to-date. 158 159 Args: 160 old (exp.Expression): old node 161 new (exp.Expression): new node 162 """ 163 old.replace(new) 164 self.clear_cache() 165 166 @property 167 def tables(self): 168 """ 169 List of tables in this scope. 170 171 Returns: 172 list[exp.Table]: tables 173 """ 174 self._ensure_collected() 175 return self._tables 176 177 @property 178 def ctes(self): 179 """ 180 List of CTEs in this scope. 181 182 Returns: 183 list[exp.CTE]: ctes 184 """ 185 self._ensure_collected() 186 return self._ctes 187 188 @property 189 def derived_tables(self): 190 """ 191 List of derived tables in this scope. 192 193 For example: 194 SELECT * FROM (SELECT ...) <- that's a derived table 195 196 Returns: 197 list[exp.Subquery]: derived tables 198 """ 199 self._ensure_collected() 200 return self._derived_tables 201 202 @property 203 def subqueries(self): 204 """ 205 List of subqueries in this scope. 206 207 For example: 208 SELECT * FROM x WHERE a IN (SELECT ...) <- that's a subquery 209 210 Returns: 211 list[exp.Subqueryable]: subqueries 212 """ 213 self._ensure_collected() 214 return self._subqueries 215 216 @property 217 def columns(self): 218 """ 219 List of columns in this scope. 220 221 Returns: 222 list[exp.Column]: Column instances in this scope, plus any 223 Columns that reference this scope from correlated subqueries. 224 """ 225 if self._columns is None: 226 self._ensure_collected() 227 columns = self._raw_columns 228 229 external_columns = [ 230 column for scope in self.subquery_scopes for column in scope.external_columns 231 ] 232 233 named_selects = set(self.expression.named_selects) 234 235 self._columns = [] 236 for column in columns + external_columns: 237 ancestor = column.find_ancestor(exp.Qualify, exp.Order, exp.Having, exp.Hint) 238 if ( 239 not ancestor 240 # Window functions can have an ORDER BY clause 241 or not isinstance(ancestor.parent, exp.Select) 242 or column.table 243 or (column.name not in named_selects and not isinstance(ancestor, exp.Hint)) 244 ): 245 self._columns.append(column) 246 247 return self._columns 248 249 @property 250 def selected_sources(self): 251 """ 252 Mapping of nodes and sources that are actually selected from in this scope. 253 254 That is, all tables in a schema are selectable at any point. But a 255 table only becomes a selected source if it's included in a FROM or JOIN clause. 256 257 Returns: 258 dict[str, (exp.Table|exp.Select, exp.Table|Scope)]: selected sources and nodes 259 """ 260 if self._selected_sources is None: 261 referenced_names = [] 262 263 for table in self.tables: 264 referenced_names.append((table.alias_or_name, table)) 265 for derived_table in self.derived_tables: 266 referenced_names.append((derived_table.alias, derived_table.unnest())) 267 268 result = {} 269 270 for name, node in referenced_names: 271 if name in self.sources: 272 result[name] = (node, self.sources[name]) 273 274 self._selected_sources = result 275 return self._selected_sources 276 277 @property 278 def cte_sources(self): 279 """ 280 Sources that are CTEs. 281 282 Returns: 283 dict[str, Scope]: Mapping of source alias to Scope 284 """ 285 return { 286 alias: scope 287 for alias, scope in self.sources.items() 288 if isinstance(scope, Scope) and scope.is_cte 289 } 290 291 @property 292 def selects(self): 293 """ 294 Select expressions of this scope. 295 296 For example, for the following expression: 297 SELECT 1 as a, 2 as b FROM x 298 299 The outputs are the "1 as a" and "2 as b" expressions. 300 301 Returns: 302 list[exp.Expression]: expressions 303 """ 304 if isinstance(self.expression, exp.Union): 305 return self.expression.unnest().selects 306 return self.expression.selects 307 308 @property 309 def external_columns(self): 310 """ 311 Columns that appear to reference sources in outer scopes. 312 313 Returns: 314 list[exp.Column]: Column instances that don't reference 315 sources in the current scope. 316 """ 317 if self._external_columns is None: 318 self._external_columns = [ 319 c for c in self.columns if c.table not in self.selected_sources 320 ] 321 return self._external_columns 322 323 @property 324 def unqualified_columns(self): 325 """ 326 Unqualified columns in the current scope. 327 328 Returns: 329 list[exp.Column]: Unqualified columns 330 """ 331 return [c for c in self.columns if not c.table] 332 333 @property 334 def join_hints(self): 335 """ 336 Hints that exist in the scope that reference tables 337 338 Returns: 339 list[exp.JoinHint]: Join hints that are referenced within the scope 340 """ 341 if self._join_hints is None: 342 return [] 343 return self._join_hints 344 345 def source_columns(self, source_name): 346 """ 347 Get all columns in the current scope for a particular source. 348 349 Args: 350 source_name (str): Name of the source 351 Returns: 352 list[exp.Column]: Column instances that reference `source_name` 353 """ 354 return [column for column in self.columns if column.table == source_name] 355 356 @property 357 def is_subquery(self): 358 """Determine if this scope is a subquery""" 359 return self.scope_type == ScopeType.SUBQUERY 360 361 @property 362 def is_derived_table(self): 363 """Determine if this scope is a derived table""" 364 return self.scope_type == ScopeType.DERIVED_TABLE 365 366 @property 367 def is_union(self): 368 """Determine if this scope is a union""" 369 return self.scope_type == ScopeType.UNION 370 371 @property 372 def is_cte(self): 373 """Determine if this scope is a common table expression""" 374 return self.scope_type == ScopeType.CTE 375 376 @property 377 def is_root(self): 378 """Determine if this is the root scope""" 379 return self.scope_type == ScopeType.ROOT 380 381 @property 382 def is_udtf(self): 383 """Determine if this scope is a UDTF (User Defined Table Function)""" 384 return self.scope_type == ScopeType.UDTF 385 386 @property 387 def is_correlated_subquery(self): 388 """Determine if this scope is a correlated subquery""" 389 return bool(self.is_subquery and self.external_columns) 390 391 def rename_source(self, old_name, new_name): 392 """Rename a source in this scope""" 393 columns = self.sources.pop(old_name or "", []) 394 self.sources[new_name] = columns 395 396 def add_source(self, name, source): 397 """Add a source to this scope""" 398 self.sources[name] = source 399 self.clear_cache() 400 401 def remove_source(self, name): 402 """Remove a source from this scope""" 403 self.sources.pop(name, None) 404 self.clear_cache() 405 406 def __repr__(self): 407 return f"Scope<{self.expression.sql()}>" 408 409 def traverse(self): 410 """ 411 Traverse the scope tree from this node. 412 413 Yields: 414 Scope: scope instances in depth-first-search post-order 415 """ 416 for child_scope in itertools.chain( 417 self.cte_scopes, self.union_scopes, self.derived_table_scopes, self.subquery_scopes 418 ): 419 yield from child_scope.traverse() 420 yield self 421 422 def ref_count(self): 423 """ 424 Count the number of times each scope in this tree is referenced. 425 426 Returns: 427 dict[int, int]: Mapping of Scope instance ID to reference count 428 """ 429 scope_ref_count = defaultdict(lambda: 0) 430 431 for scope in self.traverse(): 432 for _, source in scope.selected_sources.values(): 433 scope_ref_count[id(source)] += 1 434 435 return scope_ref_count 436 437 438def traverse_scope(expression): 439 """ 440 Traverse an expression by it's "scopes". 441 442 "Scope" represents the current context of a Select statement. 443 444 This is helpful for optimizing queries, where we need more information than 445 the expression tree itself. For example, we might care about the source 446 names within a subquery. Returns a list because a generator could result in 447 incomplete properties which is confusing. 448 449 Examples: 450 >>> import sqlglot 451 >>> expression = sqlglot.parse_one("SELECT a FROM (SELECT a FROM x) AS y") 452 >>> scopes = traverse_scope(expression) 453 >>> scopes[0].expression.sql(), list(scopes[0].sources) 454 ('SELECT a FROM x', ['x']) 455 >>> scopes[1].expression.sql(), list(scopes[1].sources) 456 ('SELECT a FROM (SELECT a FROM x) AS y', ['y']) 457 458 Args: 459 expression (exp.Expression): expression to traverse 460 Returns: 461 list[Scope]: scope instances 462 """ 463 return list(_traverse_scope(Scope(expression))) 464 465 466def build_scope(expression): 467 """ 468 Build a scope tree. 469 470 Args: 471 expression (exp.Expression): expression to build the scope tree for 472 Returns: 473 Scope: root scope 474 """ 475 return traverse_scope(expression)[-1] 476 477 478def _traverse_scope(scope): 479 if isinstance(scope.expression, exp.Select): 480 yield from _traverse_select(scope) 481 elif isinstance(scope.expression, exp.Union): 482 yield from _traverse_union(scope) 483 elif isinstance(scope.expression, exp.UDTF): 484 _set_udtf_scope(scope) 485 elif isinstance(scope.expression, exp.Subquery): 486 yield from _traverse_subqueries(scope) 487 else: 488 raise OptimizeError(f"Unexpected expression type: {type(scope.expression)}") 489 yield scope 490 491 492def _traverse_select(scope): 493 yield from _traverse_derived_tables(scope.ctes, scope, ScopeType.CTE) 494 yield from _traverse_derived_tables(scope.derived_tables, scope, ScopeType.DERIVED_TABLE) 495 yield from _traverse_subqueries(scope) 496 _add_table_sources(scope) 497 498 499def _traverse_union(scope): 500 yield from _traverse_derived_tables(scope.ctes, scope, scope_type=ScopeType.CTE) 501 502 # The last scope to be yield should be the top most scope 503 left = None 504 for left in _traverse_scope(scope.branch(scope.expression.left, scope_type=ScopeType.UNION)): 505 yield left 506 507 right = None 508 for right in _traverse_scope(scope.branch(scope.expression.right, scope_type=ScopeType.UNION)): 509 yield right 510 511 scope.union_scopes = [left, right] 512 513 514def _set_udtf_scope(scope): 515 parent = scope.expression.parent 516 from_ = parent.args.get("from") 517 518 if not from_: 519 return 520 521 for table in from_.expressions: 522 if isinstance(table, exp.Table): 523 scope.tables.append(table) 524 elif isinstance(table, exp.Subquery): 525 scope.subqueries.append(table) 526 _add_table_sources(scope) 527 _traverse_subqueries(scope) 528 529 530def _traverse_derived_tables(derived_tables, scope, scope_type): 531 sources = {} 532 is_cte = scope_type == ScopeType.CTE 533 534 for derived_table in derived_tables: 535 recursive_scope = None 536 537 # if the scope is a recursive cte, it must be in the form of 538 # base_case UNION recursive. thus the recursive scope is the first 539 # section of the union. 540 if is_cte and scope.expression.args["with"].recursive: 541 union = derived_table.this 542 543 if isinstance(union, exp.Union): 544 recursive_scope = scope.branch(union.this, scope_type=ScopeType.CTE) 545 546 for child_scope in _traverse_scope( 547 scope.branch( 548 derived_table if isinstance(derived_table, exp.UDTF) else derived_table.this, 549 chain_sources=sources if scope_type == ScopeType.CTE else None, 550 outer_column_list=derived_table.alias_column_names, 551 scope_type=ScopeType.UDTF if isinstance(derived_table, exp.UDTF) else scope_type, 552 ) 553 ): 554 yield child_scope 555 556 # Tables without aliases will be set as "" 557 # This shouldn't be a problem once qualify_columns runs, as it adds aliases on everything. 558 # Until then, this means that only a single, unaliased derived table is allowed (rather, 559 # the latest one wins. 560 alias = derived_table.alias 561 sources[alias] = child_scope 562 563 if recursive_scope: 564 child_scope.add_source(alias, recursive_scope) 565 566 # append the final child_scope yielded 567 if is_cte: 568 scope.cte_scopes.append(child_scope) 569 else: 570 scope.derived_table_scopes.append(child_scope) 571 572 scope.sources.update(sources) 573 574 575def _add_table_sources(scope): 576 sources = {} 577 for table in scope.tables: 578 table_name = table.name 579 580 if table.alias: 581 source_name = table.alias 582 else: 583 source_name = table_name 584 585 if table_name in scope.sources: 586 # This is a reference to a parent source (e.g. a CTE), not an actual table. 587 scope.sources[source_name] = scope.sources[table_name] 588 else: 589 sources[source_name] = table 590 591 scope.sources.update(sources) 592 593 594def _traverse_subqueries(scope): 595 for subquery in scope.subqueries: 596 top = None 597 for child_scope in _traverse_scope(scope.branch(subquery, scope_type=ScopeType.SUBQUERY)): 598 yield child_scope 599 top = child_scope 600 scope.subquery_scopes.append(top) 601 602 603def walk_in_scope(expression, bfs=True): 604 """ 605 Returns a generator object which visits all nodes in the syntrax tree, stopping at 606 nodes that start child scopes. 607 608 Args: 609 expression (exp.Expression): 610 bfs (bool): if set to True the BFS traversal order will be applied, 611 otherwise the DFS traversal will be used instead. 612 613 Yields: 614 tuple[exp.Expression, Optional[exp.Expression], str]: node, parent, arg key 615 """ 616 # We'll use this variable to pass state into the dfs generator. 617 # Whenever we set it to True, we exclude a subtree from traversal. 618 prune = False 619 620 for node, parent, key in expression.walk(bfs=bfs, prune=lambda *_: prune): 621 prune = False 622 623 yield node, parent, key 624 625 if node is expression: 626 continue 627 elif isinstance(node, exp.CTE): 628 prune = True 629 elif isinstance(node, exp.Subquery) and isinstance(parent, (exp.From, exp.Join)): 630 prune = True 631 elif isinstance(node, exp.Subqueryable): 632 prune = True
10class ScopeType(Enum): 11 ROOT = auto() 12 SUBQUERY = auto() 13 DERIVED_TABLE = auto() 14 CTE = auto() 15 UNION = auto() 16 UDTF = auto()
An enumeration.
Inherited Members
- enum.Enum
- name
- value
19class Scope: 20 """ 21 Selection scope. 22 23 Attributes: 24 expression (exp.Select|exp.Union): Root expression of this scope 25 sources (dict[str, exp.Table|Scope]): Mapping of source name to either 26 a Table expression or another Scope instance. For example: 27 SELECT * FROM x {"x": Table(this="x")} 28 SELECT * FROM x AS y {"y": Table(this="x")} 29 SELECT * FROM (SELECT ...) AS y {"y": Scope(...)} 30 outer_column_list (list[str]): If this is a derived table or CTE, and the outer query 31 defines a column list of it's alias of this scope, this is that list of columns. 32 For example: 33 SELECT * FROM (SELECT ...) AS y(col1, col2) 34 The inner query would have `["col1", "col2"]` for its `outer_column_list` 35 parent (Scope): Parent scope 36 scope_type (ScopeType): Type of this scope, relative to it's parent 37 subquery_scopes (list[Scope]): List of all child scopes for subqueries 38 cte_scopes = (list[Scope]) List of all child scopes for CTEs 39 derived_table_scopes = (list[Scope]) List of all child scopes for derived_tables 40 union_scopes (list[Scope, Scope]): If this Scope is for a Union expression, this will be 41 a list of the left and right child scopes. 42 """ 43 44 def __init__( 45 self, 46 expression, 47 sources=None, 48 outer_column_list=None, 49 parent=None, 50 scope_type=ScopeType.ROOT, 51 ): 52 self.expression = expression 53 self.sources = sources or {} 54 self.outer_column_list = outer_column_list or [] 55 self.parent = parent 56 self.scope_type = scope_type 57 self.subquery_scopes = [] 58 self.derived_table_scopes = [] 59 self.cte_scopes = [] 60 self.union_scopes = [] 61 self.clear_cache() 62 63 def clear_cache(self): 64 self._collected = False 65 self._raw_columns = None 66 self._derived_tables = None 67 self._tables = None 68 self._ctes = None 69 self._subqueries = None 70 self._selected_sources = None 71 self._columns = None 72 self._external_columns = None 73 self._join_hints = None 74 75 def branch(self, expression, scope_type, chain_sources=None, **kwargs): 76 """Branch from the current scope to a new, inner scope""" 77 return Scope( 78 expression=expression.unnest(), 79 sources={**self.cte_sources, **(chain_sources or {})}, 80 parent=self, 81 scope_type=scope_type, 82 **kwargs, 83 ) 84 85 def _collect(self): 86 self._tables = [] 87 self._ctes = [] 88 self._subqueries = [] 89 self._derived_tables = [] 90 self._raw_columns = [] 91 self._join_hints = [] 92 93 for node, parent, _ in self.walk(bfs=False): 94 if node is self.expression: 95 continue 96 elif isinstance(node, exp.Column) and not isinstance(node.this, exp.Star): 97 self._raw_columns.append(node) 98 elif isinstance(node, exp.Table) and not isinstance(node.parent, exp.JoinHint): 99 self._tables.append(node) 100 elif isinstance(node, exp.JoinHint): 101 self._join_hints.append(node) 102 elif isinstance(node, exp.UDTF): 103 self._derived_tables.append(node) 104 elif isinstance(node, exp.CTE): 105 self._ctes.append(node) 106 elif isinstance(node, exp.Subquery) and isinstance(parent, (exp.From, exp.Join)): 107 self._derived_tables.append(node) 108 elif isinstance(node, exp.Subqueryable): 109 self._subqueries.append(node) 110 111 self._collected = True 112 113 def _ensure_collected(self): 114 if not self._collected: 115 self._collect() 116 117 def walk(self, bfs=True): 118 return walk_in_scope(self.expression, bfs=bfs) 119 120 def find(self, *expression_types, bfs=True): 121 """ 122 Returns the first node in this scope which matches at least one of the specified types. 123 124 This does NOT traverse into subscopes. 125 126 Args: 127 expression_types (type): the expression type(s) to match. 128 bfs (bool): True to use breadth-first search, False to use depth-first. 129 130 Returns: 131 exp.Expression: the node which matches the criteria or None if no node matching 132 the criteria was found. 133 """ 134 return next(self.find_all(*expression_types, bfs=bfs), None) 135 136 def find_all(self, *expression_types, bfs=True): 137 """ 138 Returns a generator object which visits all nodes in this scope and only yields those that 139 match at least one of the specified expression types. 140 141 This does NOT traverse into subscopes. 142 143 Args: 144 expression_types (type): the expression type(s) to match. 145 bfs (bool): True to use breadth-first search, False to use depth-first. 146 147 Yields: 148 exp.Expression: nodes 149 """ 150 for expression, _, _ in self.walk(bfs=bfs): 151 if isinstance(expression, expression_types): 152 yield expression 153 154 def replace(self, old, new): 155 """ 156 Replace `old` with `new`. 157 158 This can be used instead of `exp.Expression.replace` to ensure the `Scope` is kept up-to-date. 159 160 Args: 161 old (exp.Expression): old node 162 new (exp.Expression): new node 163 """ 164 old.replace(new) 165 self.clear_cache() 166 167 @property 168 def tables(self): 169 """ 170 List of tables in this scope. 171 172 Returns: 173 list[exp.Table]: tables 174 """ 175 self._ensure_collected() 176 return self._tables 177 178 @property 179 def ctes(self): 180 """ 181 List of CTEs in this scope. 182 183 Returns: 184 list[exp.CTE]: ctes 185 """ 186 self._ensure_collected() 187 return self._ctes 188 189 @property 190 def derived_tables(self): 191 """ 192 List of derived tables in this scope. 193 194 For example: 195 SELECT * FROM (SELECT ...) <- that's a derived table 196 197 Returns: 198 list[exp.Subquery]: derived tables 199 """ 200 self._ensure_collected() 201 return self._derived_tables 202 203 @property 204 def subqueries(self): 205 """ 206 List of subqueries in this scope. 207 208 For example: 209 SELECT * FROM x WHERE a IN (SELECT ...) <- that's a subquery 210 211 Returns: 212 list[exp.Subqueryable]: subqueries 213 """ 214 self._ensure_collected() 215 return self._subqueries 216 217 @property 218 def columns(self): 219 """ 220 List of columns in this scope. 221 222 Returns: 223 list[exp.Column]: Column instances in this scope, plus any 224 Columns that reference this scope from correlated subqueries. 225 """ 226 if self._columns is None: 227 self._ensure_collected() 228 columns = self._raw_columns 229 230 external_columns = [ 231 column for scope in self.subquery_scopes for column in scope.external_columns 232 ] 233 234 named_selects = set(self.expression.named_selects) 235 236 self._columns = [] 237 for column in columns + external_columns: 238 ancestor = column.find_ancestor(exp.Qualify, exp.Order, exp.Having, exp.Hint) 239 if ( 240 not ancestor 241 # Window functions can have an ORDER BY clause 242 or not isinstance(ancestor.parent, exp.Select) 243 or column.table 244 or (column.name not in named_selects and not isinstance(ancestor, exp.Hint)) 245 ): 246 self._columns.append(column) 247 248 return self._columns 249 250 @property 251 def selected_sources(self): 252 """ 253 Mapping of nodes and sources that are actually selected from in this scope. 254 255 That is, all tables in a schema are selectable at any point. But a 256 table only becomes a selected source if it's included in a FROM or JOIN clause. 257 258 Returns: 259 dict[str, (exp.Table|exp.Select, exp.Table|Scope)]: selected sources and nodes 260 """ 261 if self._selected_sources is None: 262 referenced_names = [] 263 264 for table in self.tables: 265 referenced_names.append((table.alias_or_name, table)) 266 for derived_table in self.derived_tables: 267 referenced_names.append((derived_table.alias, derived_table.unnest())) 268 269 result = {} 270 271 for name, node in referenced_names: 272 if name in self.sources: 273 result[name] = (node, self.sources[name]) 274 275 self._selected_sources = result 276 return self._selected_sources 277 278 @property 279 def cte_sources(self): 280 """ 281 Sources that are CTEs. 282 283 Returns: 284 dict[str, Scope]: Mapping of source alias to Scope 285 """ 286 return { 287 alias: scope 288 for alias, scope in self.sources.items() 289 if isinstance(scope, Scope) and scope.is_cte 290 } 291 292 @property 293 def selects(self): 294 """ 295 Select expressions of this scope. 296 297 For example, for the following expression: 298 SELECT 1 as a, 2 as b FROM x 299 300 The outputs are the "1 as a" and "2 as b" expressions. 301 302 Returns: 303 list[exp.Expression]: expressions 304 """ 305 if isinstance(self.expression, exp.Union): 306 return self.expression.unnest().selects 307 return self.expression.selects 308 309 @property 310 def external_columns(self): 311 """ 312 Columns that appear to reference sources in outer scopes. 313 314 Returns: 315 list[exp.Column]: Column instances that don't reference 316 sources in the current scope. 317 """ 318 if self._external_columns is None: 319 self._external_columns = [ 320 c for c in self.columns if c.table not in self.selected_sources 321 ] 322 return self._external_columns 323 324 @property 325 def unqualified_columns(self): 326 """ 327 Unqualified columns in the current scope. 328 329 Returns: 330 list[exp.Column]: Unqualified columns 331 """ 332 return [c for c in self.columns if not c.table] 333 334 @property 335 def join_hints(self): 336 """ 337 Hints that exist in the scope that reference tables 338 339 Returns: 340 list[exp.JoinHint]: Join hints that are referenced within the scope 341 """ 342 if self._join_hints is None: 343 return [] 344 return self._join_hints 345 346 def source_columns(self, source_name): 347 """ 348 Get all columns in the current scope for a particular source. 349 350 Args: 351 source_name (str): Name of the source 352 Returns: 353 list[exp.Column]: Column instances that reference `source_name` 354 """ 355 return [column for column in self.columns if column.table == source_name] 356 357 @property 358 def is_subquery(self): 359 """Determine if this scope is a subquery""" 360 return self.scope_type == ScopeType.SUBQUERY 361 362 @property 363 def is_derived_table(self): 364 """Determine if this scope is a derived table""" 365 return self.scope_type == ScopeType.DERIVED_TABLE 366 367 @property 368 def is_union(self): 369 """Determine if this scope is a union""" 370 return self.scope_type == ScopeType.UNION 371 372 @property 373 def is_cte(self): 374 """Determine if this scope is a common table expression""" 375 return self.scope_type == ScopeType.CTE 376 377 @property 378 def is_root(self): 379 """Determine if this is the root scope""" 380 return self.scope_type == ScopeType.ROOT 381 382 @property 383 def is_udtf(self): 384 """Determine if this scope is a UDTF (User Defined Table Function)""" 385 return self.scope_type == ScopeType.UDTF 386 387 @property 388 def is_correlated_subquery(self): 389 """Determine if this scope is a correlated subquery""" 390 return bool(self.is_subquery and self.external_columns) 391 392 def rename_source(self, old_name, new_name): 393 """Rename a source in this scope""" 394 columns = self.sources.pop(old_name or "", []) 395 self.sources[new_name] = columns 396 397 def add_source(self, name, source): 398 """Add a source to this scope""" 399 self.sources[name] = source 400 self.clear_cache() 401 402 def remove_source(self, name): 403 """Remove a source from this scope""" 404 self.sources.pop(name, None) 405 self.clear_cache() 406 407 def __repr__(self): 408 return f"Scope<{self.expression.sql()}>" 409 410 def traverse(self): 411 """ 412 Traverse the scope tree from this node. 413 414 Yields: 415 Scope: scope instances in depth-first-search post-order 416 """ 417 for child_scope in itertools.chain( 418 self.cte_scopes, self.union_scopes, self.derived_table_scopes, self.subquery_scopes 419 ): 420 yield from child_scope.traverse() 421 yield self 422 423 def ref_count(self): 424 """ 425 Count the number of times each scope in this tree is referenced. 426 427 Returns: 428 dict[int, int]: Mapping of Scope instance ID to reference count 429 """ 430 scope_ref_count = defaultdict(lambda: 0) 431 432 for scope in self.traverse(): 433 for _, source in scope.selected_sources.values(): 434 scope_ref_count[id(source)] += 1 435 436 return scope_ref_count
Selection scope.
Attributes:
- expression (exp.Select|exp.Union): Root expression of this scope
- sources (dict[str, exp.Table|Scope]): Mapping of source name to either a Table expression or another Scope instance. For example: SELECT * FROM x {"x": Table(this="x")} SELECT * FROM x AS y {"y": Table(this="x")} SELECT * FROM (SELECT ...) AS y {"y": Scope(...)}
- outer_column_list (list[str]): If this is a derived table or CTE, and the outer query
defines a column list of it's alias of this scope, this is that list of columns.
For example:
SELECT * FROM (SELECT ...) AS y(col1, col2)
The inner query would have
["col1", "col2"]
for itsouter_column_list
- parent (Scope): Parent scope
- scope_type (ScopeType): Type of this scope, relative to it's parent
- subquery_scopes (list[Scope]): List of all child scopes for subqueries
- cte_scopes = (list[Scope]) List of all child scopes for CTEs
- derived_table_scopes = (list[Scope]) List of all child scopes for derived_tables
- union_scopes (list[Scope, Scope]): If this Scope is for a Union expression, this will be a list of the left and right child scopes.
44 def __init__( 45 self, 46 expression, 47 sources=None, 48 outer_column_list=None, 49 parent=None, 50 scope_type=ScopeType.ROOT, 51 ): 52 self.expression = expression 53 self.sources = sources or {} 54 self.outer_column_list = outer_column_list or [] 55 self.parent = parent 56 self.scope_type = scope_type 57 self.subquery_scopes = [] 58 self.derived_table_scopes = [] 59 self.cte_scopes = [] 60 self.union_scopes = [] 61 self.clear_cache()
75 def branch(self, expression, scope_type, chain_sources=None, **kwargs): 76 """Branch from the current scope to a new, inner scope""" 77 return Scope( 78 expression=expression.unnest(), 79 sources={**self.cte_sources, **(chain_sources or {})}, 80 parent=self, 81 scope_type=scope_type, 82 **kwargs, 83 )
Branch from the current scope to a new, inner scope
120 def find(self, *expression_types, bfs=True): 121 """ 122 Returns the first node in this scope which matches at least one of the specified types. 123 124 This does NOT traverse into subscopes. 125 126 Args: 127 expression_types (type): the expression type(s) to match. 128 bfs (bool): True to use breadth-first search, False to use depth-first. 129 130 Returns: 131 exp.Expression: the node which matches the criteria or None if no node matching 132 the criteria was found. 133 """ 134 return next(self.find_all(*expression_types, bfs=bfs), None)
Returns the first node in this scope which matches at least one of the specified types.
This does NOT traverse into subscopes.
Arguments:
- expression_types (type): the expression type(s) to match.
- bfs (bool): True to use breadth-first search, False to use depth-first.
Returns:
exp.Expression: the node which matches the criteria or None if no node matching the criteria was found.
136 def find_all(self, *expression_types, bfs=True): 137 """ 138 Returns a generator object which visits all nodes in this scope and only yields those that 139 match at least one of the specified expression types. 140 141 This does NOT traverse into subscopes. 142 143 Args: 144 expression_types (type): the expression type(s) to match. 145 bfs (bool): True to use breadth-first search, False to use depth-first. 146 147 Yields: 148 exp.Expression: nodes 149 """ 150 for expression, _, _ in self.walk(bfs=bfs): 151 if isinstance(expression, expression_types): 152 yield expression
Returns a generator object which visits all nodes in this scope and only yields those that match at least one of the specified expression types.
This does NOT traverse into subscopes.
Arguments:
- expression_types (type): the expression type(s) to match.
- bfs (bool): True to use breadth-first search, False to use depth-first.
Yields:
exp.Expression: nodes
154 def replace(self, old, new): 155 """ 156 Replace `old` with `new`. 157 158 This can be used instead of `exp.Expression.replace` to ensure the `Scope` is kept up-to-date. 159 160 Args: 161 old (exp.Expression): old node 162 new (exp.Expression): new node 163 """ 164 old.replace(new) 165 self.clear_cache()
Replace old
with new
.
This can be used instead of exp.Expression.replace
to ensure the Scope
is kept up-to-date.
Arguments:
- old (exp.Expression): old node
- new (exp.Expression): new node
List of derived tables in this scope.
For example:
SELECT * FROM (SELECT ...) <- that's a derived table
Returns:
list[exp.Subquery]: derived tables
List of subqueries in this scope.
For example:
SELECT * FROM x WHERE a IN (SELECT ...) <- that's a subquery
Returns:
list[exp.Subqueryable]: subqueries
List of columns in this scope.
Returns:
list[exp.Column]: Column instances in this scope, plus any Columns that reference this scope from correlated subqueries.
Mapping of nodes and sources that are actually selected from in this scope.
That is, all tables in a schema are selectable at any point. But a table only becomes a selected source if it's included in a FROM or JOIN clause.
Returns:
dict[str, (exp.Table|exp.Select, exp.Table|Scope)]: selected sources and nodes
Select expressions of this scope.
For example, for the following expression: SELECT 1 as a, 2 as b FROM x
The outputs are the "1 as a" and "2 as b" expressions.
Returns:
list[exp.Expression]: expressions
Columns that appear to reference sources in outer scopes.
Returns:
list[exp.Column]: Column instances that don't reference sources in the current scope.
Unqualified columns in the current scope.
Returns:
list[exp.Column]: Unqualified columns
Hints that exist in the scope that reference tables
Returns:
list[exp.JoinHint]: Join hints that are referenced within the scope
346 def source_columns(self, source_name): 347 """ 348 Get all columns in the current scope for a particular source. 349 350 Args: 351 source_name (str): Name of the source 352 Returns: 353 list[exp.Column]: Column instances that reference `source_name` 354 """ 355 return [column for column in self.columns if column.table == source_name]
Get all columns in the current scope for a particular source.
Arguments:
- source_name (str): Name of the source
Returns:
list[exp.Column]: Column instances that reference
source_name
392 def rename_source(self, old_name, new_name): 393 """Rename a source in this scope""" 394 columns = self.sources.pop(old_name or "", []) 395 self.sources[new_name] = columns
Rename a source in this scope
397 def add_source(self, name, source): 398 """Add a source to this scope""" 399 self.sources[name] = source 400 self.clear_cache()
Add a source to this scope
402 def remove_source(self, name): 403 """Remove a source from this scope""" 404 self.sources.pop(name, None) 405 self.clear_cache()
Remove a source from this scope
410 def traverse(self): 411 """ 412 Traverse the scope tree from this node. 413 414 Yields: 415 Scope: scope instances in depth-first-search post-order 416 """ 417 for child_scope in itertools.chain( 418 self.cte_scopes, self.union_scopes, self.derived_table_scopes, self.subquery_scopes 419 ): 420 yield from child_scope.traverse() 421 yield self
Traverse the scope tree from this node.
Yields:
Scope: scope instances in depth-first-search post-order
423 def ref_count(self): 424 """ 425 Count the number of times each scope in this tree is referenced. 426 427 Returns: 428 dict[int, int]: Mapping of Scope instance ID to reference count 429 """ 430 scope_ref_count = defaultdict(lambda: 0) 431 432 for scope in self.traverse(): 433 for _, source in scope.selected_sources.values(): 434 scope_ref_count[id(source)] += 1 435 436 return scope_ref_count
Count the number of times each scope in this tree is referenced.
Returns:
dict[int, int]: Mapping of Scope instance ID to reference count
439def traverse_scope(expression): 440 """ 441 Traverse an expression by it's "scopes". 442 443 "Scope" represents the current context of a Select statement. 444 445 This is helpful for optimizing queries, where we need more information than 446 the expression tree itself. For example, we might care about the source 447 names within a subquery. Returns a list because a generator could result in 448 incomplete properties which is confusing. 449 450 Examples: 451 >>> import sqlglot 452 >>> expression = sqlglot.parse_one("SELECT a FROM (SELECT a FROM x) AS y") 453 >>> scopes = traverse_scope(expression) 454 >>> scopes[0].expression.sql(), list(scopes[0].sources) 455 ('SELECT a FROM x', ['x']) 456 >>> scopes[1].expression.sql(), list(scopes[1].sources) 457 ('SELECT a FROM (SELECT a FROM x) AS y', ['y']) 458 459 Args: 460 expression (exp.Expression): expression to traverse 461 Returns: 462 list[Scope]: scope instances 463 """ 464 return list(_traverse_scope(Scope(expression)))
Traverse an expression by it's "scopes".
"Scope" represents the current context of a Select statement.
This is helpful for optimizing queries, where we need more information than the expression tree itself. For example, we might care about the source names within a subquery. Returns a list because a generator could result in incomplete properties which is confusing.
Examples:
>>> import sqlglot >>> expression = sqlglot.parse_one("SELECT a FROM (SELECT a FROM x) AS y") >>> scopes = traverse_scope(expression) >>> scopes[0].expression.sql(), list(scopes[0].sources) ('SELECT a FROM x', ['x']) >>> scopes[1].expression.sql(), list(scopes[1].sources) ('SELECT a FROM (SELECT a FROM x) AS y', ['y'])
Arguments:
- expression (exp.Expression): expression to traverse
Returns:
list[Scope]: scope instances
467def build_scope(expression): 468 """ 469 Build a scope tree. 470 471 Args: 472 expression (exp.Expression): expression to build the scope tree for 473 Returns: 474 Scope: root scope 475 """ 476 return traverse_scope(expression)[-1]
Build a scope tree.
Arguments:
- expression (exp.Expression): expression to build the scope tree for
Returns:
Scope: root scope
604def walk_in_scope(expression, bfs=True): 605 """ 606 Returns a generator object which visits all nodes in the syntrax tree, stopping at 607 nodes that start child scopes. 608 609 Args: 610 expression (exp.Expression): 611 bfs (bool): if set to True the BFS traversal order will be applied, 612 otherwise the DFS traversal will be used instead. 613 614 Yields: 615 tuple[exp.Expression, Optional[exp.Expression], str]: node, parent, arg key 616 """ 617 # We'll use this variable to pass state into the dfs generator. 618 # Whenever we set it to True, we exclude a subtree from traversal. 619 prune = False 620 621 for node, parent, key in expression.walk(bfs=bfs, prune=lambda *_: prune): 622 prune = False 623 624 yield node, parent, key 625 626 if node is expression: 627 continue 628 elif isinstance(node, exp.CTE): 629 prune = True 630 elif isinstance(node, exp.Subquery) and isinstance(parent, (exp.From, exp.Join)): 631 prune = True 632 elif isinstance(node, exp.Subqueryable): 633 prune = True
Returns a generator object which visits all nodes in the syntrax tree, stopping at nodes that start child scopes.
Arguments:
- expression (exp.Expression):
- bfs (bool): if set to True the BFS traversal order will be applied, otherwise the DFS traversal will be used instead.
Yields:
tuple[exp.Expression, Optional[exp.Expression], str]: node, parent, arg key