diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-06-02 23:59:11 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2023-06-02 23:59:11 +0000 |
commit | caea5267cb8e1fea3702adbdf6f68fd37d13b3b7 (patch) | |
tree | f06f1da1ab3b6906beca1c3c7222d28ff00766ac /sqlglot/dataframe/sql/dataframe.py | |
parent | Adding upstream version 12.2.0. (diff) | |
download | sqlglot-caea5267cb8e1fea3702adbdf6f68fd37d13b3b7.tar.xz sqlglot-caea5267cb8e1fea3702adbdf6f68fd37d13b3b7.zip |
Adding upstream version 15.0.0.upstream/15.0.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/dataframe/sql/dataframe.py')
-rw-r--r-- | sqlglot/dataframe/sql/dataframe.py | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/sqlglot/dataframe/sql/dataframe.py b/sqlglot/dataframe/sql/dataframe.py index f3a6f6f..3fc9232 100644 --- a/sqlglot/dataframe/sql/dataframe.py +++ b/sqlglot/dataframe/sql/dataframe.py @@ -127,7 +127,7 @@ class DataFrame: sequence_id: t.Optional[str] = None, **kwargs, ) -> t.Tuple[exp.CTE, str]: - name = self.spark._random_name + name = self._create_hash_from_expression(expression) expression_to_cte = expression.copy() expression_to_cte.set("with", None) cte = exp.Select().with_(name, as_=expression_to_cte, **kwargs).ctes[0] @@ -263,7 +263,7 @@ class DataFrame: return [Column(x) for x in (expression.find(exp.Select) or exp.Select()).expressions] @classmethod - def _create_hash_from_expression(cls, expression: exp.Select): + def _create_hash_from_expression(cls, expression: exp.Expression) -> str: value = expression.sql(dialect="spark").encode("utf-8") return f"t{zlib.crc32(value)}"[:6] @@ -299,7 +299,7 @@ class DataFrame: for expression_type, select_expression in select_expressions: select_expression = select_expression.transform(replace_id_value, replacement_mapping) if optimize: - select_expression = optimize_func(select_expression, identify="always") + select_expression = t.cast(exp.Select, optimize_func(select_expression)) select_expression = df._replace_cte_names_with_hashes(select_expression) expression: t.Union[exp.Select, exp.Cache, exp.Drop] if expression_type == exp.Cache: @@ -570,9 +570,9 @@ class DataFrame: r_expressions.append(l_column) r_columns_unused.remove(l_column) else: - r_expressions.append(exp.alias_(exp.Null(), l_column)) + r_expressions.append(exp.alias_(exp.Null(), l_column, copy=False)) for r_column in r_columns_unused: - l_expressions.append(exp.alias_(exp.Null(), r_column)) + l_expressions.append(exp.alias_(exp.Null(), r_column, copy=False)) r_expressions.append(r_column) r_df = ( other.copy()._convert_leaf_to_cte().select(*self._ensure_list_of_columns(r_expressions)) @@ -761,7 +761,7 @@ class DataFrame: raise ValueError("Tried to rename a column that doesn't exist") for existing_column in existing_columns: if isinstance(existing_column, exp.Column): - existing_column.replace(exp.alias_(existing_column.copy(), new)) + existing_column.replace(exp.alias_(existing_column, new)) else: existing_column.set("alias", exp.to_identifier(new)) return self.copy(expression=expression) |