sqlglot/optimizer/qualify.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92

from __future__ import annotations

import typing as t

from sqlglot import exp
from sqlglot.dialects.dialect import Dialect, DialectType
from sqlglot.optimizer.isolate_table_selects import isolate_table_selects
from sqlglot.optimizer.normalize_identifiers import normalize_identifiers
from sqlglot.optimizer.qualify_columns import (
    pushdown_cte_alias_columns as pushdown_cte_alias_columns_func,
    qualify_columns as qualify_columns_func,
    quote_identifiers as quote_identifiers_func,
    validate_qualify_columns as validate_qualify_columns_func,
)
from sqlglot.optimizer.qualify_tables import qualify_tables
from sqlglot.schema import Schema, ensure_schema


def qualify(
    expression: exp.Expression,
    dialect: DialectType = None,
    db: t.Optional[str] = None,
    catalog: t.Optional[str] = None,
    schema: t.Optional[dict | Schema] = None,
    expand_alias_refs: bool = True,
    expand_stars: bool = True,
    infer_schema: t.Optional[bool] = None,
    isolate_tables: bool = False,
    qualify_columns: bool = True,
    validate_qualify_columns: bool = True,
    quote_identifiers: bool = True,
    identify: bool = True,
) -> exp.Expression:
    """
    Rewrite sqlglot AST to have normalized and qualified tables and columns.

    This step is necessary for all further SQLGlot optimizations.

    Example:
        >>> import sqlglot
        >>> schema = {"tbl": {"col": "INT"}}
        >>> expression = sqlglot.parse_one("SELECT col FROM tbl")
        >>> qualify(expression, schema=schema).sql()
        'SELECT "tbl"."col" AS "col" FROM "tbl" AS "tbl"'

    Args:
        expression: Expression to qualify.
        db: Default database name for tables.
        catalog: Default catalog name for tables.
        schema: Schema to infer column names and types.
        expand_alias_refs: Whether or not to expand references to aliases.
        expand_stars: Whether or not to expand star queries. This is a necessary step
            for most of the optimizer's rules to work; do not set to False unless you
            know what you're doing!
        infer_schema: Whether or not to infer the schema if missing.
        isolate_tables: Whether or not to isolate table selects.
        qualify_columns: Whether or not to qualify columns.
        validate_qualify_columns: Whether or not to validate columns.
        quote_identifiers: Whether or not to run the quote_identifiers step.
            This step is necessary to ensure correctness for case sensitive queries.
            But this flag is provided in case this step is performed at a later time.
        identify: If True, quote all identifiers, else only necessary ones.

    Returns:
        The qualified expression.
    """
    schema = ensure_schema(schema, dialect=dialect)
    expression = normalize_identifiers(expression, dialect=dialect)
    expression = qualify_tables(expression, db=db, catalog=catalog, schema=schema)

    if isolate_tables:
        expression = isolate_table_selects(expression, schema=schema)

    if Dialect.get_or_raise(dialect).PREFER_CTE_ALIAS_COLUMN:
        expression = pushdown_cte_alias_columns_func(expression)

    if qualify_columns:
        expression = qualify_columns_func(
            expression,
            schema,
            expand_alias_refs=expand_alias_refs,
            expand_stars=expand_stars,
            infer_schema=infer_schema,
        )

    if quote_identifiers:
        expression = quote_identifiers_func(expression, dialect=dialect, identify=identify)

    if validate_qualify_columns:
        validate_qualify_columns_func(expression)

    return expression