summaryrefslogtreecommitdiffstats
path: root/sqlglot/dataframe/sql/dataframe.py
diff options
context:
space:
mode:
Diffstat (limited to 'sqlglot/dataframe/sql/dataframe.py')
-rw-r--r--sqlglot/dataframe/sql/dataframe.py12
1 files changed, 6 insertions, 6 deletions
diff --git a/sqlglot/dataframe/sql/dataframe.py b/sqlglot/dataframe/sql/dataframe.py
index f3a6f6f..3fc9232 100644
--- a/sqlglot/dataframe/sql/dataframe.py
+++ b/sqlglot/dataframe/sql/dataframe.py
@@ -127,7 +127,7 @@ class DataFrame:
sequence_id: t.Optional[str] = None,
**kwargs,
) -> t.Tuple[exp.CTE, str]:
- name = self.spark._random_name
+ name = self._create_hash_from_expression(expression)
expression_to_cte = expression.copy()
expression_to_cte.set("with", None)
cte = exp.Select().with_(name, as_=expression_to_cte, **kwargs).ctes[0]
@@ -263,7 +263,7 @@ class DataFrame:
return [Column(x) for x in (expression.find(exp.Select) or exp.Select()).expressions]
@classmethod
- def _create_hash_from_expression(cls, expression: exp.Select):
+ def _create_hash_from_expression(cls, expression: exp.Expression) -> str:
value = expression.sql(dialect="spark").encode("utf-8")
return f"t{zlib.crc32(value)}"[:6]
@@ -299,7 +299,7 @@ class DataFrame:
for expression_type, select_expression in select_expressions:
select_expression = select_expression.transform(replace_id_value, replacement_mapping)
if optimize:
- select_expression = optimize_func(select_expression, identify="always")
+ select_expression = t.cast(exp.Select, optimize_func(select_expression))
select_expression = df._replace_cte_names_with_hashes(select_expression)
expression: t.Union[exp.Select, exp.Cache, exp.Drop]
if expression_type == exp.Cache:
@@ -570,9 +570,9 @@ class DataFrame:
r_expressions.append(l_column)
r_columns_unused.remove(l_column)
else:
- r_expressions.append(exp.alias_(exp.Null(), l_column))
+ r_expressions.append(exp.alias_(exp.Null(), l_column, copy=False))
for r_column in r_columns_unused:
- l_expressions.append(exp.alias_(exp.Null(), r_column))
+ l_expressions.append(exp.alias_(exp.Null(), r_column, copy=False))
r_expressions.append(r_column)
r_df = (
other.copy()._convert_leaf_to_cte().select(*self._ensure_list_of_columns(r_expressions))
@@ -761,7 +761,7 @@ class DataFrame:
raise ValueError("Tried to rename a column that doesn't exist")
for existing_column in existing_columns:
if isinstance(existing_column, exp.Column):
- existing_column.replace(exp.alias_(existing_column.copy(), new))
+ existing_column.replace(exp.alias_(existing_column, new))
else:
existing_column.set("alias", exp.to_identifier(new))
return self.copy(expression=expression)