8 files changed, 114 insertions, 51 deletions
diff --git a/sqlglot/dialects/bigquery.py b/sqlglot/dialects/bigquery.py
index 40298e7..86e46cf 100644
--- a/sqlglot/dialects/bigquery.py
+++ b/sqlglot/dialects/bigquery.py
@@ -135,6 +135,7 @@ class BigQuery(Dialect):
             exp.DateSub: _date_add_sql("DATE", "SUB"),
             exp.DatetimeAdd: _date_add_sql("DATETIME", "ADD"),
             exp.DatetimeSub: _date_add_sql("DATETIME", "SUB"),
+            exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})",
             exp.ILike: no_ilike_sql,
             exp.TimeAdd: _date_add_sql("TIME", "ADD"),
             exp.TimeSub: _date_add_sql("TIME", "SUB"),
@@ -172,12 +173,11 @@ class BigQuery(Dialect):
             exp.AnonymousProperty,
         }
 
+        EXPLICIT_UNION = True
+
         def in_unnest_op(self, unnest):
             return self.sql(unnest)
 
-        def union_op(self, expression):
-            return f"UNION{' DISTINCT' if expression.args.get('distinct') else ' ALL'}"
-
         def except_op(self, expression):
             if not expression.args.get("distinct", False):
                 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery")
diff --git a/sqlglot/dialects/clickhouse.py b/sqlglot/dialects/clickhouse.py
index 55dad7a..da5c856 100644
--- a/sqlglot/dialects/clickhouse.py
+++ b/sqlglot/dialects/clickhouse.py
@@ -1,10 +1,16 @@
 from sqlglot import exp
-from sqlglot.dialects.dialect import Dialect, inline_array_sql
+from sqlglot.dialects.dialect import Dialect, inline_array_sql, var_map_sql
 from sqlglot.generator import Generator
-from sqlglot.parser import Parser
+from sqlglot.helper import csv
+from sqlglot.parser import Parser, parse_var_map
 from sqlglot.tokens import Tokenizer, TokenType
 
 
+def _lower_func(sql):
+    index = sql.index("(")
+    return sql[:index].lower() + sql[index:]
+
+
 class ClickHouse(Dialect):
     normalize_functions = None
     null_ordering = "nulls_are_last"
@@ -14,17 +20,23 @@ class ClickHouse(Dialect):
 
         KEYWORDS = {
             **Tokenizer.KEYWORDS,
-            "NULLABLE": TokenType.NULLABLE,
             "FINAL": TokenType.FINAL,
+            "DATETIME64": TokenType.DATETIME,
             "INT8": TokenType.TINYINT,
             "INT16": TokenType.SMALLINT,
             "INT32": TokenType.INT,
             "INT64": TokenType.BIGINT,
             "FLOAT32": TokenType.FLOAT,
             "FLOAT64": TokenType.DOUBLE,
+            "TUPLE": TokenType.STRUCT,
         }
 
     class Parser(Parser):
+        FUNCTIONS = {
+            **Parser.FUNCTIONS,
+            "MAP": parse_var_map,
+        }
+
         def _parse_table(self, schema=False):
             this = super()._parse_table(schema)
 
@@ -39,10 +51,25 @@ class ClickHouse(Dialect):
         TYPE_MAPPING = {
             **Generator.TYPE_MAPPING,
             exp.DataType.Type.NULLABLE: "Nullable",
+            exp.DataType.Type.DATETIME: "DateTime64",
+            exp.DataType.Type.MAP: "Map",
+            exp.DataType.Type.ARRAY: "Array",
+            exp.DataType.Type.STRUCT: "Tuple",
+            exp.DataType.Type.TINYINT: "Int8",
+            exp.DataType.Type.SMALLINT: "Int16",
+            exp.DataType.Type.INT: "Int32",
+            exp.DataType.Type.BIGINT: "Int64",
+            exp.DataType.Type.FLOAT: "Float32",
+            exp.DataType.Type.DOUBLE: "Float64",
         }
 
         TRANSFORMS = {
             **Generator.TRANSFORMS,
             exp.Array: inline_array_sql,
+            exp.StrPosition: lambda self, e: f"position({csv(self.sql(e, 'this'), self.sql(e, 'substr'), self.sql(e, 'position'))})",
             exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL",
+            exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)),
+            exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)),
         }
+
+        EXPLICIT_UNION = True
diff --git a/sqlglot/dialects/dialect.py b/sqlglot/dialects/dialect.py
index 98dc330..f7c6cb5 100644
--- a/sqlglot/dialects/dialect.py
+++ b/sqlglot/dialects/dialect.py
@@ -77,7 +77,6 @@ class Dialect(metaclass=_Dialect):
     alias_post_tablesample = False
     normalize_functions = "upper"
     null_ordering = "nulls_are_small"
-    wrap_derived_values = True
 
     date_format = "'%Y-%m-%d'"
     dateint_format = "'%Y%m%d'"
@@ -170,7 +169,6 @@ class Dialect(metaclass=_Dialect):
                 "alias_post_tablesample": self.alias_post_tablesample,
                 "normalize_functions": self.normalize_functions,
                 "null_ordering": self.null_ordering,
-                "wrap_derived_values": self.wrap_derived_values,
                 **opts,
             }
         )
@@ -271,6 +269,21 @@ def struct_extract_sql(self, expression):
     return f"{this}.{struct_key}"
 
 
+def var_map_sql(self, expression):
+    keys = expression.args["keys"]
+    values = expression.args["values"]
+
+    if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array):
+        self.unsupported("Cannot convert array columns into map.")
+        return f"MAP({self.sql(keys)}, {self.sql(values)})"
+
+    args = []
+    for key, value in zip(keys.expressions, values.expressions):
+        args.append(self.sql(key))
+        args.append(self.sql(value))
+    return f"MAP({csv(*args)})"
+
+
 def format_time_lambda(exp_class, dialect, default=None):
     """Helper used for time expressions.
 
diff --git a/sqlglot/dialects/hive.py b/sqlglot/dialects/hive.py
index 7a27bb3..55d7bcc 100644
--- a/sqlglot/dialects/hive.py
+++ b/sqlglot/dialects/hive.py
@@ -11,40 +11,14 @@ from sqlglot.dialects.dialect import (
     no_trycast_sql,
     rename_func,
     struct_extract_sql,
+    var_map_sql,
 )
 from sqlglot.generator import Generator
 from sqlglot.helper import csv, list_get
-from sqlglot.parser import Parser
+from sqlglot.parser import Parser, parse_var_map
 from sqlglot.tokens import Tokenizer
 
 
-def _parse_map(args):
-    keys = []
-    values = []
-    for i in range(0, len(args), 2):
-        keys.append(args[i])
-        values.append(args[i + 1])
-    return HiveMap(
-        keys=exp.Array(expressions=keys),
-        values=exp.Array(expressions=values),
-    )
-
-
-def _map_sql(self, expression):
-    keys = expression.args["keys"]
-    values = expression.args["values"]
-
-    if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array):
-        self.unsupported("Cannot convert array columns into map use SparkSQL instead.")
-        return f"MAP({self.sql(keys)}, {self.sql(values)})"
-
-    args = []
-    for key, value in zip(keys.expressions, values.expressions):
-        args.append(self.sql(key))
-        args.append(self.sql(value))
-    return f"MAP({csv(*args)})"
-
-
 def _array_sort(self, expression):
     if expression.expression:
         self.unsupported("Hive SORT_ARRAY does not support a comparator")
@@ -122,10 +96,6 @@ def _index_sql(self, expression):
     return f"{this} ON TABLE {table} {columns}"
 
 
-class HiveMap(exp.Map):
-    is_var_len_args = True
-
-
 class Hive(Dialect):
     alias_post_tablesample = True
 
@@ -206,7 +176,7 @@ class Hive(Dialect):
                 position=list_get(args, 2),
             ),
             "LOG": (lambda args: exp.Log.from_arg_list(args) if len(args) > 1 else exp.Ln.from_arg_list(args)),
-            "MAP": _parse_map,
+            "MAP": parse_var_map,
             "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)),
             "PERCENTILE": exp.Quantile.from_arg_list,
             "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list,
@@ -245,8 +215,8 @@ class Hive(Dialect):
             exp.Join: _unnest_to_explode_sql,
             exp.JSONExtract: rename_func("GET_JSON_OBJECT"),
             exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"),
-            exp.Map: _map_sql,
-            HiveMap: _map_sql,
+            exp.Map: var_map_sql,
+            exp.VarMap: var_map_sql,
             exp.Create: create_with_partitions_sql,
             exp.Quantile: rename_func("PERCENTILE"),
             exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"),
diff --git a/sqlglot/dialects/oracle.py b/sqlglot/dialects/oracle.py
index 91e30b2..8041ff0 100644
--- a/sqlglot/dialects/oracle.py
+++ b/sqlglot/dialects/oracle.py
@@ -10,6 +10,32 @@ def _limit_sql(self, expression):
 
 
 class Oracle(Dialect):
+    # https://docs.oracle.com/database/121/SQLRF/sql_elements004.htm#SQLRF00212
+    # https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes
+    time_mapping = {
+        "AM": "%p",  # Meridian indicator with or without periods
+        "A.M.": "%p",  # Meridian indicator with or without periods
+        "PM": "%p",  # Meridian indicator with or without periods
+        "P.M.": "%p",  # Meridian indicator with or without periods
+        "D": "%u",  # Day of week (1-7)
+        "DAY": "%A",  # name of day
+        "DD": "%d",  # day of month (1-31)
+        "DDD": "%j",  # day of year (1-366)
+        "DY": "%a",  # abbreviated name of day
+        "HH": "%I",  # Hour of day (1-12)
+        "HH12": "%I",  # alias for HH
+        "HH24": "%H",  # Hour of day (0-23)
+        "IW": "%V",  # Calendar week of year (1-52 or 1-53), as defined by the ISO 8601 standard
+        "MI": "%M",  # Minute (0-59)
+        "MM": "%m",  # Month (01-12; January = 01)
+        "MON": "%b",  # Abbreviated name of month
+        "MONTH": "%B",  # Name of month
+        "SS": "%S",  # Second (0-59)
+        "WW": "%W",  # Week of year (1-53)
+        "YY": "%y",  # 15
+        "YYYY": "%Y",  # 2015
+    }
+
     class Generator(Generator):
         TYPE_MAPPING = {
             **Generator.TYPE_MAPPING,
@@ -30,6 +56,9 @@ class Oracle(Dialect):
             **transforms.UNALIAS_GROUP,
             exp.ILike: no_ilike_sql,
             exp.Limit: _limit_sql,
+            exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})",
+            exp.TimeToStr: lambda self, e: f"TO_CHAR({self.sql(e, 'this')}, {self.format_time(e)})",
+            exp.UnixToTime: lambda self, e: f"TO_DATE('1970-01-01','YYYY-MM-DD') + ({self.sql(e, 'this')} / 86400)",
         }
 
         def query_modifiers(self, expression, *sqls):
diff --git a/sqlglot/dialects/postgres.py b/sqlglot/dialects/postgres.py
index aaa07a1..731e28e 100644
--- a/sqlglot/dialects/postgres.py
+++ b/sqlglot/dialects/postgres.py
@@ -118,13 +118,22 @@ def _serial_to_generated(expression):
     return expression
 
 
+def _to_timestamp(args):
+    # TO_TIMESTAMP accepts either a single double argument or (text, text)
+    if len(args) == 1 and args[0].is_number:
+        # https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-TABLE
+        return exp.UnixToTime.from_arg_list(args)
+    # https://www.postgresql.org/docs/current/functions-formatting.html
+    return format_time_lambda(exp.StrToTime, "postgres")(args)
+
+
 class Postgres(Dialect):
     null_ordering = "nulls_are_large"
     time_format = "'YYYY-MM-DD HH24:MI:SS'"
     time_mapping = {
         "AM": "%p",
         "PM": "%p",
-        "D": "%w",  # 1-based day of week
+        "D": "%u",  # 1-based day of week
         "DD": "%d",  # day of month
         "DDD": "%j",  # zero padded day of year
         "FMDD": "%-d",  # - is no leading zero for Python; same for FM in postgres
@@ -172,7 +181,7 @@ class Postgres(Dialect):
 
         FUNCTIONS = {
             **Parser.FUNCTIONS,
-            "TO_TIMESTAMP": format_time_lambda(exp.StrToTime, "postgres"),
+            "TO_TIMESTAMP": _to_timestamp,
             "TO_CHAR": format_time_lambda(exp.TimeToStr, "postgres"),
         }
 
@@ -211,4 +220,5 @@ class Postgres(Dialect):
             exp.TableSample: no_tablesample_sql,
             exp.Trim: _trim_sql,
             exp.TryCast: no_trycast_sql,
+            exp.UnixToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')})",
         }
diff --git a/sqlglot/dialects/snowflake.py b/sqlglot/dialects/snowflake.py
index fb2d900..19a427c 100644
--- a/sqlglot/dialects/snowflake.py
+++ b/sqlglot/dialects/snowflake.py
@@ -121,6 +121,7 @@ class Snowflake(Dialect):
         FUNC_TOKENS = {
             *Parser.FUNC_TOKENS,
             TokenType.RLIKE,
+            TokenType.TABLE,
         }
 
         COLUMN_OPERATORS = {
@@ -143,7 +144,7 @@ class Snowflake(Dialect):
 
         SINGLE_TOKENS = {
             **Tokenizer.SINGLE_TOKENS,
-            "$": TokenType.DOLLAR,  # needed to break for quotes
+            "$": TokenType.PARAMETER,
         }
 
         KEYWORDS = {
@@ -164,6 +165,8 @@ class Snowflake(Dialect):
             exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})",
             exp.UnixToTime: _unix_to_time,
             exp.Array: inline_array_sql,
+            exp.StrPosition: rename_func("POSITION"),
+            exp.Parameter: lambda self, e: f"${self.sql(e, 'this')}",
             exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'value')}",
         }
 
diff --git a/sqlglot/dialects/spark.py b/sqlglot/dialects/spark.py
index e8da07a..95a7ab4 100644
--- a/sqlglot/dialects/spark.py
+++ b/sqlglot/dialects/spark.py
@@ -4,8 +4,9 @@ from sqlglot.dialects.dialect import (
     no_ilike_sql,
     rename_func,
 )
-from sqlglot.dialects.hive import Hive, HiveMap
+from sqlglot.dialects.hive import Hive
 from sqlglot.helper import list_get
+from sqlglot.parser import Parser
 
 
 def _create_sql(self, e):
@@ -47,8 +48,6 @@ def _unix_to_time(self, expression):
 
 
 class Spark(Hive):
-    wrap_derived_values = False
-
     class Parser(Hive.Parser):
         FUNCTIONS = {
             **Hive.Parser.FUNCTIONS,
@@ -78,8 +77,19 @@ class Spark(Hive):
             "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list,
         }
 
-    class Generator(Hive.Generator):
+        FUNCTION_PARSERS = {
+            **Parser.FUNCTION_PARSERS,
+            "BROADCAST": lambda self: self._parse_join_hint("BROADCAST"),
+            "BROADCASTJOIN": lambda self: self._parse_join_hint("BROADCASTJOIN"),
+            "MAPJOIN": lambda self: self._parse_join_hint("MAPJOIN"),
+            "MERGE": lambda self: self._parse_join_hint("MERGE"),
+            "SHUFFLEMERGE": lambda self: self._parse_join_hint("SHUFFLEMERGE"),
+            "MERGEJOIN": lambda self: self._parse_join_hint("MERGEJOIN"),
+            "SHUFFLE_HASH": lambda self: self._parse_join_hint("SHUFFLE_HASH"),
+            "SHUFFLE_REPLICATE_NL": lambda self: self._parse_join_hint("SHUFFLE_REPLICATE_NL"),
+        }
 
+    class Generator(Hive.Generator):
         TYPE_MAPPING = {
             **Hive.Generator.TYPE_MAPPING,
             exp.DataType.Type.TINYINT: "BYTE",
@@ -102,8 +112,9 @@ class Spark(Hive):
             exp.Map: _map_sql,
             exp.Reduce: rename_func("AGGREGATE"),
             exp.StructKwarg: lambda self, e: f"{self.sql(e, 'this')}: {self.sql(e, 'expression')}",
-            HiveMap: _map_sql,
         }
 
+        WRAP_DERIVED_VALUES = False
+
     class Tokenizer(Hive.Tokenizer):
         HEX_STRINGS = [("X'", "'")]