from __future__ import annotations import typing as t from sqlglot import exp from sqlglot.dialects.dialect import Dialect, DialectType from sqlglot.optimizer.isolate_table_selects import isolate_table_selects from sqlglot.optimizer.normalize_identifiers import normalize_identifiers from sqlglot.optimizer.qualify_columns import ( pushdown_cte_alias_columns as pushdown_cte_alias_columns_func, qualify_columns as qualify_columns_func, quote_identifiers as quote_identifiers_func, validate_qualify_columns as validate_qualify_columns_func, ) from sqlglot.optimizer.qualify_tables import qualify_tables from sqlglot.schema import Schema, ensure_schema def qualify( expression: exp.Expression, dialect: DialectType = None, db: t.Optional[str] = None, catalog: t.Optional[str] = None, schema: t.Optional[dict | Schema] = None, expand_alias_refs: bool = True, expand_stars: bool = True, infer_schema: t.Optional[bool] = None, isolate_tables: bool = False, qualify_columns: bool = True, validate_qualify_columns: bool = True, quote_identifiers: bool = True, identify: bool = True, ) -> exp.Expression: """ Rewrite sqlglot AST to have normalized and qualified tables and columns. This step is necessary for all further SQLGlot optimizations. Example: >>> import sqlglot >>> schema = {"tbl": {"col": "INT"}} >>> expression = sqlglot.parse_one("SELECT col FROM tbl") >>> qualify(expression, schema=schema).sql() 'SELECT "tbl"."col" AS "col" FROM "tbl" AS "tbl"' Args: expression: Expression to qualify. db: Default database name for tables. catalog: Default catalog name for tables. schema: Schema to infer column names and types. expand_alias_refs: Whether to expand references to aliases. expand_stars: Whether to expand star queries. This is a necessary step for most of the optimizer's rules to work; do not set to False unless you know what you're doing! infer_schema: Whether to infer the schema if missing. isolate_tables: Whether to isolate table selects. qualify_columns: Whether to qualify columns. validate_qualify_columns: Whether to validate columns. quote_identifiers: Whether to run the quote_identifiers step. This step is necessary to ensure correctness for case sensitive queries. But this flag is provided in case this step is performed at a later time. identify: If True, quote all identifiers, else only necessary ones. Returns: The qualified expression. """ schema = ensure_schema(schema, dialect=dialect) expression = normalize_identifiers(expression, dialect=dialect) expression = qualify_tables(expression, db=db, catalog=catalog, schema=schema, dialect=dialect) if isolate_tables: expression = isolate_table_selects(expression, schema=schema) if Dialect.get_or_raise(dialect).PREFER_CTE_ALIAS_COLUMN: expression = pushdown_cte_alias_columns_func(expression) if qualify_columns: expression = qualify_columns_func( expression, schema, expand_alias_refs=expand_alias_refs, expand_stars=expand_stars, infer_schema=infer_schema, ) if quote_identifiers: expression = quote_identifiers_func(expression, dialect=dialect, identify=identify) if validate_qualify_columns: validate_qualify_columns_func(expression) return expression