1 files changed, 39 insertions, 2 deletions
diff --git a/sqlglot/dialects/hive.py b/sqlglot/dialects/hive.py
index fbd626a..650a1e1 100644
--- a/sqlglot/dialects/hive.py
+++ b/sqlglot/dialects/hive.py
@@ -9,6 +9,7 @@ from sqlglot.dialects.dialect import (
     create_with_partitions_sql,
     format_time_lambda,
     if_sql,
+    left_to_substring_sql,
     locate_to_strposition,
     max_or_greatest,
     min_or_least,
@@ -17,6 +18,7 @@ from sqlglot.dialects.dialect import (
     no_safe_divide_sql,
     no_trycast_sql,
     rename_func,
+    right_to_substring_sql,
     strposition_to_locate_sql,
     struct_extract_sql,
     timestrtotime_sql,
@@ -89,7 +91,7 @@ def _json_format_sql(self: generator.Generator, expression: exp.JSONFormat) -> s
 
         annotate_types(this)
 
-    if this.type.is_type(exp.DataType.Type.JSON):
+    if this.type.is_type("json"):
         return self.sql(this)
     return self.func("TO_JSON", this, expression.args.get("options"))
 
@@ -149,6 +151,7 @@ def _to_date_sql(self: generator.Generator, expression: exp.TsOrDsToDate) -> str
 
 class Hive(Dialect):
     alias_post_tablesample = True
+    identifiers_can_start_with_digit = True
 
     time_mapping = {
         "y": "%Y",
@@ -190,7 +193,6 @@ class Hive(Dialect):
         IDENTIFIERS = ["`"]
         STRING_ESCAPES = ["\\"]
         ENCODE = "utf-8"
-        IDENTIFIER_CAN_START_WITH_DIGIT = True
 
         KEYWORDS = {
             **tokens.Tokenizer.KEYWORDS,
@@ -276,6 +278,39 @@ class Hive(Dialect):
             "cluster": lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"),
         }
 
+        def _parse_types(
+            self, check_func: bool = False, schema: bool = False
+        ) -> t.Optional[exp.Expression]:
+            """
+            Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to
+            STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0:
+
+                spark-sql (default)> select cast(1234 as varchar(2));
+                23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support
+                char/varchar type and simply treats them as string type. Please use string type
+                directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString
+                to true, so that Spark treat them as string type as same as Spark 3.0 and earlier
+
+                1234
+                Time taken: 4.265 seconds, Fetched 1 row(s)
+
+            This shows that Spark doesn't truncate the value into '12', which is inconsistent with
+            what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly.
+
+            Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html
+            """
+            this = super()._parse_types(check_func=check_func, schema=schema)
+
+            if this and not schema:
+                return this.transform(
+                    lambda node: node.replace(exp.DataType.build("text"))
+                    if isinstance(node, exp.DataType) and node.is_type("char", "varchar")
+                    else node,
+                    copy=False,
+                )
+
+            return this
+
     class Generator(generator.Generator):
         LIMIT_FETCH = "LIMIT"
         TABLESAMPLE_WITH_METHOD = False
@@ -323,6 +358,7 @@ class Hive(Dialect):
             exp.JSONExtract: rename_func("GET_JSON_OBJECT"),
             exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"),
             exp.JSONFormat: _json_format_sql,
+            exp.Left: left_to_substring_sql,
             exp.Map: var_map_sql,
             exp.Max: max_or_greatest,
             exp.Min: min_or_least,
@@ -332,6 +368,7 @@ class Hive(Dialect):
             exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"),
             exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"),
             exp.RegexpSplit: rename_func("SPLIT"),
+            exp.Right: right_to_substring_sql,
             exp.SafeDivide: no_safe_divide_sql,
             exp.SchemaCommentProperty: lambda self, e: self.naked_property(e),
             exp.SetAgg: rename_func("COLLECT_SET"),