summaryrefslogtreecommitdiffstats
path: root/sqlglot/dialects/hive.py
diff options
context:
space:
mode:
Diffstat (limited to 'sqlglot/dialects/hive.py')
-rw-r--r--sqlglot/dialects/hive.py49
1 files changed, 48 insertions, 1 deletions
diff --git a/sqlglot/dialects/hive.py b/sqlglot/dialects/hive.py
index f968f6a..e131434 100644
--- a/sqlglot/dialects/hive.py
+++ b/sqlglot/dialects/hive.py
@@ -18,6 +18,7 @@ from sqlglot.dialects.dialect import (
no_safe_divide_sql,
no_trycast_sql,
regexp_extract_sql,
+ regexp_replace_sql,
rename_func,
right_to_substring_sql,
strposition_to_locate_sql,
@@ -211,6 +212,7 @@ class Hive(Dialect):
"ADD JAR": TokenType.COMMAND,
"ADD JARS": TokenType.COMMAND,
"MSCK REPAIR": TokenType.COMMAND,
+ "REFRESH": TokenType.COMMAND,
"WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES,
}
@@ -270,6 +272,11 @@ class Hive(Dialect):
"YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)),
}
+ FUNCTION_PARSERS = {
+ **parser.Parser.FUNCTION_PARSERS,
+ "TRANSFORM": lambda self: self._parse_transform(),
+ }
+
PROPERTY_PARSERS = {
**parser.Parser.PROPERTY_PARSERS,
"WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties(
@@ -277,6 +284,40 @@ class Hive(Dialect):
),
}
+ def _parse_transform(self) -> exp.Transform | exp.QueryTransform:
+ args = self._parse_csv(self._parse_lambda)
+ self._match_r_paren()
+
+ row_format_before = self._parse_row_format(match_row=True)
+
+ record_writer = None
+ if self._match_text_seq("RECORDWRITER"):
+ record_writer = self._parse_string()
+
+ if not self._match(TokenType.USING):
+ return exp.Transform.from_arg_list(args)
+
+ command_script = self._parse_string()
+
+ self._match(TokenType.ALIAS)
+ schema = self._parse_schema()
+
+ row_format_after = self._parse_row_format(match_row=True)
+ record_reader = None
+ if self._match_text_seq("RECORDREADER"):
+ record_reader = self._parse_string()
+
+ return self.expression(
+ exp.QueryTransform,
+ expressions=args,
+ command_script=command_script,
+ schema=schema,
+ row_format_before=row_format_before,
+ record_writer=record_writer,
+ row_format_after=row_format_after,
+ record_reader=record_reader,
+ )
+
def _parse_types(
self, check_func: bool = False, schema: bool = False
) -> t.Optional[exp.Expression]:
@@ -363,11 +404,13 @@ class Hive(Dialect):
exp.Max: max_or_greatest,
exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
exp.Min: min_or_least,
+ exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression),
exp.VarMap: var_map_sql,
exp.Create: create_with_partitions_sql,
exp.Quantile: rename_func("PERCENTILE"),
exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"),
exp.RegexpExtract: regexp_extract_sql,
+ exp.RegexpReplace: regexp_replace_sql,
exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"),
exp.RegexpSplit: rename_func("SPLIT"),
exp.Right: right_to_substring_sql,
@@ -396,7 +439,6 @@ class Hive(Dialect):
exp.UnixToTime: rename_func("FROM_UNIXTIME"),
exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"),
exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}",
- exp.RowFormatSerdeProperty: lambda self, e: f"ROW FORMAT SERDE {self.sql(e, 'this')}",
exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"),
exp.NumberToStr: rename_func("FORMAT_NUMBER"),
exp.LastDateOfMonth: rename_func("LAST_DAY"),
@@ -410,6 +452,11 @@ class Hive(Dialect):
exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
}
+ def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str:
+ serde_props = self.sql(expression, "serde_properties")
+ serde_props = f" {serde_props}" if serde_props else ""
+ return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}"
+
def arrayagg_sql(self, expression: exp.ArrayAgg) -> str:
return self.func(
"COLLECT_LIST",