summaryrefslogtreecommitdiffstats
path: root/sqlglot/dataframe/sql
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2023-06-22 18:53:34 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2023-06-22 18:53:34 +0000
commit8f88a01462641cbf930b3c43b780565d0fb7d37e (patch)
treee211588c29e6ce6d16fbbfd33d8cda63237c2e6e /sqlglot/dataframe/sql
parentReleasing debian version 16.2.1-1. (diff)
downloadsqlglot-8f88a01462641cbf930b3c43b780565d0fb7d37e.tar.xz
sqlglot-8f88a01462641cbf930b3c43b780565d0fb7d37e.zip
Merging upstream version 16.4.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sqlglot/dataframe/sql')
-rw-r--r--sqlglot/dataframe/sql/column.py5
-rw-r--r--sqlglot/dataframe/sql/dataframe.py1
-rw-r--r--sqlglot/dataframe/sql/normalize.py2
-rw-r--r--sqlglot/dataframe/sql/readwriter.py14
4 files changed, 14 insertions, 8 deletions
diff --git a/sqlglot/dataframe/sql/column.py b/sqlglot/dataframe/sql/column.py
index a8b89d1..f4cfeba 100644
--- a/sqlglot/dataframe/sql/column.py
+++ b/sqlglot/dataframe/sql/column.py
@@ -5,6 +5,7 @@ import typing as t
import sqlglot
from sqlglot import expressions as exp
from sqlglot.dataframe.sql.types import DataType
+from sqlglot.dialects import Spark
from sqlglot.helper import flatten, is_iterable
if t.TYPE_CHECKING:
@@ -22,6 +23,10 @@ class Column:
expression = sqlglot.maybe_parse(expression, dialect="spark")
if expression is None:
raise ValueError(f"Could not parse {expression}")
+
+ if isinstance(expression, exp.Column):
+ expression.transform(Spark.normalize_identifier, copy=False)
+
self.expression: exp.Expression = expression
def __repr__(self):
diff --git a/sqlglot/dataframe/sql/dataframe.py b/sqlglot/dataframe/sql/dataframe.py
index 3fc9232..64cceea 100644
--- a/sqlglot/dataframe/sql/dataframe.py
+++ b/sqlglot/dataframe/sql/dataframe.py
@@ -316,6 +316,7 @@ class DataFrame:
expression.alias_or_name: expression.type.sql("spark")
for expression in select_expression.expressions
},
+ dialect="spark",
)
cache_storage_level = select_expression.args["cache_storage_level"]
options = [
diff --git a/sqlglot/dataframe/sql/normalize.py b/sqlglot/dataframe/sql/normalize.py
index 75feba7..4eec782 100644
--- a/sqlglot/dataframe/sql/normalize.py
+++ b/sqlglot/dataframe/sql/normalize.py
@@ -5,6 +5,7 @@ import typing as t
from sqlglot import expressions as exp
from sqlglot.dataframe.sql.column import Column
from sqlglot.dataframe.sql.util import get_tables_from_expression_with_join
+from sqlglot.dialects import Spark
from sqlglot.helper import ensure_list
NORMALIZE_INPUT = t.TypeVar("NORMALIZE_INPUT", bound=t.Union[str, exp.Expression, Column])
@@ -19,6 +20,7 @@ def normalize(spark: SparkSession, expression_context: exp.Select, expr: t.List[
for expression in expressions:
identifiers = expression.find_all(exp.Identifier)
for identifier in identifiers:
+ Spark.normalize_identifier(identifier)
replace_alias_name_with_cte_name(spark, expression_context, identifier)
replace_branch_and_sequence_ids_with_cte_name(spark, expression_context, identifier)
diff --git a/sqlglot/dataframe/sql/readwriter.py b/sqlglot/dataframe/sql/readwriter.py
index cc2f181..9d87d4a 100644
--- a/sqlglot/dataframe/sql/readwriter.py
+++ b/sqlglot/dataframe/sql/readwriter.py
@@ -4,7 +4,8 @@ import typing as t
import sqlglot
from sqlglot import expressions as exp
-from sqlglot.helper import object_to_dict, should_identify
+from sqlglot.dialects import Spark
+from sqlglot.helper import object_to_dict
if t.TYPE_CHECKING:
from sqlglot.dataframe.sql.dataframe import DataFrame
@@ -18,17 +19,14 @@ class DataFrameReader:
def table(self, tableName: str) -> DataFrame:
from sqlglot.dataframe.sql.dataframe import DataFrame
- sqlglot.schema.add_table(tableName)
+ sqlglot.schema.add_table(tableName, dialect="spark")
return DataFrame(
self.spark,
exp.Select()
- .from_(tableName)
+ .from_(exp.to_table(tableName, dialect="spark").transform(Spark.normalize_identifier))
.select(
- *(
- column if should_identify(column, "safe") else f'"{column}"'
- for column in sqlglot.schema.column_names(tableName)
- )
+ *(column for column in sqlglot.schema.column_names(tableName, dialect="spark"))
),
)
@@ -73,7 +71,7 @@ class DataFrameWriter:
)
df = self._df.copy(output_expression_container=output_expression_container)
if self._by_name:
- columns = sqlglot.schema.column_names(tableName, only_visible=True)
+ columns = sqlglot.schema.column_names(tableName, only_visible=True, dialect="spark")
df = df._convert_leaf_to_cte().select(*columns)
return self.copy(_df=df)