summaryrefslogtreecommitdiffstats
path: root/src/arrow/cpp/src/gandiva/precompiled
diff options
context:
space:
mode:
Diffstat (limited to 'src/arrow/cpp/src/gandiva/precompiled')
-rw-r--r--src/arrow/cpp/src/gandiva/precompiled/CMakeLists.txt142
-rw-r--r--src/arrow/cpp/src/gandiva/precompiled/arithmetic_ops.cc274
-rw-r--r--src/arrow/cpp/src/gandiva/precompiled/arithmetic_ops_test.cc180
-rw-r--r--src/arrow/cpp/src/gandiva/precompiled/bitmap.cc60
-rw-r--r--src/arrow/cpp/src/gandiva/precompiled/bitmap_test.cc62
-rw-r--r--src/arrow/cpp/src/gandiva/precompiled/decimal_ops.cc723
-rw-r--r--src/arrow/cpp/src/gandiva/precompiled/decimal_ops.h90
-rw-r--r--src/arrow/cpp/src/gandiva/precompiled/decimal_ops_test.cc1095
-rw-r--r--src/arrow/cpp/src/gandiva/precompiled/decimal_wrapper.cc433
-rw-r--r--src/arrow/cpp/src/gandiva/precompiled/epoch_time_point.h118
-rw-r--r--src/arrow/cpp/src/gandiva/precompiled/epoch_time_point_test.cc103
-rw-r--r--src/arrow/cpp/src/gandiva/precompiled/extended_math_ops.cc410
-rw-r--r--src/arrow/cpp/src/gandiva/precompiled/extended_math_ops_test.cc349
-rw-r--r--src/arrow/cpp/src/gandiva/precompiled/hash.cc407
-rw-r--r--src/arrow/cpp/src/gandiva/precompiled/hash_test.cc122
-rw-r--r--src/arrow/cpp/src/gandiva/precompiled/print.cc28
-rw-r--r--src/arrow/cpp/src/gandiva/precompiled/string_ops.cc2198
-rw-r--r--src/arrow/cpp/src/gandiva/precompiled/string_ops_test.cc1758
-rw-r--r--src/arrow/cpp/src/gandiva/precompiled/testing.h43
-rw-r--r--src/arrow/cpp/src/gandiva/precompiled/time.cc894
-rw-r--r--src/arrow/cpp/src/gandiva/precompiled/time_constants.h30
-rw-r--r--src/arrow/cpp/src/gandiva/precompiled/time_fields.h35
-rw-r--r--src/arrow/cpp/src/gandiva/precompiled/time_test.cc953
-rw-r--r--src/arrow/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc283
-rw-r--r--src/arrow/cpp/src/gandiva/precompiled/types.h592
25 files changed, 11382 insertions, 0 deletions
diff --git a/src/arrow/cpp/src/gandiva/precompiled/CMakeLists.txt b/src/arrow/cpp/src/gandiva/precompiled/CMakeLists.txt
new file mode 100644
index 000000000..650b80f6b
--- /dev/null
+++ b/src/arrow/cpp/src/gandiva/precompiled/CMakeLists.txt
@@ -0,0 +1,142 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+project(gandiva)
+
+set(PRECOMPILED_SRCS
+ arithmetic_ops.cc
+ bitmap.cc
+ decimal_ops.cc
+ decimal_wrapper.cc
+ extended_math_ops.cc
+ hash.cc
+ print.cc
+ string_ops.cc
+ time.cc
+ timestamp_arithmetic.cc
+ ../../arrow/util/basic_decimal.cc)
+
+if(MSVC)
+ # clang pretends to be a particular version of MSVC. 191[0-9] is
+ # Visual Studio 2017, and the standard library uses C++14 features,
+ # so we have to use that -std version to get the IR compilation to work
+ if(MSVC_VERSION MATCHES "^191[0-9]$")
+ set(FMS_COMPATIBILITY 19.10)
+ else()
+ message(FATAL_ERROR "Unsupported MSVC_VERSION=${MSVC_VERSION}")
+ endif()
+ set(PLATFORM_CLANG_OPTIONS -std=c++14 -fms-compatibility
+ -fms-compatibility-version=${FMS_COMPATIBILITY})
+else()
+ set(PLATFORM_CLANG_OPTIONS -std=c++11)
+endif()
+
+# Create bitcode for each of the source files.
+foreach(SRC_FILE ${PRECOMPILED_SRCS})
+ get_filename_component(SRC_BASE ${SRC_FILE} NAME_WE)
+ get_filename_component(ABSOLUTE_SRC ${SRC_FILE} ABSOLUTE)
+ set(BC_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SRC_BASE}.bc)
+ set(PRECOMPILE_COMMAND)
+ if(CMAKE_OSX_SYSROOT)
+ list(APPEND
+ PRECOMPILE_COMMAND
+ ${CMAKE_COMMAND}
+ -E
+ env
+ SDKROOT=${CMAKE_OSX_SYSROOT})
+ endif()
+ list(APPEND
+ PRECOMPILE_COMMAND
+ ${CLANG_EXECUTABLE}
+ ${PLATFORM_CLANG_OPTIONS}
+ -DGANDIVA_IR
+ -DNDEBUG # DCHECK macros not implemented in precompiled code
+ -DARROW_STATIC # Do not set __declspec(dllimport) on MSVC on Arrow symbols
+ -DGANDIVA_STATIC # Do not set __declspec(dllimport) on MSVC on Gandiva symbols
+ -fno-use-cxa-atexit # Workaround for unresolved __dso_handle
+ -emit-llvm
+ -O3
+ -c
+ ${ABSOLUTE_SRC}
+ -o
+ ${BC_FILE}
+ ${ARROW_GANDIVA_PC_CXX_FLAGS}
+ -I${CMAKE_SOURCE_DIR}/src
+ -I${ARROW_BINARY_DIR}/src)
+
+ if(NOT ARROW_USE_NATIVE_INT128)
+ list(APPEND PRECOMPILE_COMMAND -I${Boost_INCLUDE_DIR})
+ endif()
+ add_custom_command(OUTPUT ${BC_FILE}
+ COMMAND ${PRECOMPILE_COMMAND}
+ DEPENDS ${SRC_FILE})
+ list(APPEND BC_FILES ${BC_FILE})
+endforeach()
+
+# link all of the bitcode files into a single bitcode file.
+add_custom_command(OUTPUT ${GANDIVA_PRECOMPILED_BC_PATH}
+ COMMAND ${LLVM_LINK_EXECUTABLE} -o ${GANDIVA_PRECOMPILED_BC_PATH}
+ ${BC_FILES}
+ DEPENDS ${BC_FILES})
+
+# turn the bitcode file into a C++ static data variable.
+add_custom_command(OUTPUT ${GANDIVA_PRECOMPILED_CC_PATH}
+ COMMAND ${PYTHON_EXECUTABLE}
+ "${CMAKE_CURRENT_SOURCE_DIR}/../make_precompiled_bitcode.py"
+ ${GANDIVA_PRECOMPILED_CC_IN_PATH}
+ ${GANDIVA_PRECOMPILED_BC_PATH} ${GANDIVA_PRECOMPILED_CC_PATH}
+ DEPENDS ${GANDIVA_PRECOMPILED_CC_IN_PATH}
+ ${GANDIVA_PRECOMPILED_BC_PATH})
+
+add_custom_target(precompiled ALL DEPENDS ${GANDIVA_PRECOMPILED_BC_PATH}
+ ${GANDIVA_PRECOMPILED_CC_PATH})
+
+# testing
+if(ARROW_BUILD_TESTS)
+ add_executable(gandiva-precompiled-test
+ ../context_helper.cc
+ bitmap_test.cc
+ bitmap.cc
+ epoch_time_point_test.cc
+ time_test.cc
+ time.cc
+ timestamp_arithmetic.cc
+ ../cast_time.cc
+ ../../arrow/vendored/datetime/tz.cpp
+ hash_test.cc
+ hash.cc
+ string_ops_test.cc
+ string_ops.cc
+ arithmetic_ops_test.cc
+ arithmetic_ops.cc
+ extended_math_ops_test.cc
+ extended_math_ops.cc
+ decimal_ops_test.cc
+ decimal_ops.cc
+ ../decimal_type_util.cc
+ ../decimal_xlarge.cc)
+ target_include_directories(gandiva-precompiled-test PRIVATE ${CMAKE_SOURCE_DIR}/src)
+ target_link_libraries(gandiva-precompiled-test PRIVATE ${ARROW_TEST_LINK_LIBS})
+ target_compile_definitions(gandiva-precompiled-test PRIVATE GANDIVA_UNIT_TEST=1
+ ARROW_STATIC GANDIVA_STATIC)
+ set(TEST_PATH "${EXECUTABLE_OUTPUT_PATH}/gandiva-precompiled-test")
+ add_test(gandiva-precompiled-test ${TEST_PATH})
+ set_property(TEST gandiva-precompiled-test
+ APPEND
+ PROPERTY LABELS "unittest;gandiva-tests")
+ add_dependencies(gandiva-tests gandiva-precompiled-test)
+endif()
diff --git a/src/arrow/cpp/src/gandiva/precompiled/arithmetic_ops.cc b/src/arrow/cpp/src/gandiva/precompiled/arithmetic_ops.cc
new file mode 100644
index 000000000..c736c38d3
--- /dev/null
+++ b/src/arrow/cpp/src/gandiva/precompiled/arithmetic_ops.cc
@@ -0,0 +1,274 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern "C" {
+
+#include <math.h>
+#include "./types.h"
+
+// Expand inner macro for all numeric types.
+#define NUMERIC_TYPES(INNER, NAME, OP) \
+ INNER(NAME, int8, OP) \
+ INNER(NAME, int16, OP) \
+ INNER(NAME, int32, OP) \
+ INNER(NAME, int64, OP) \
+ INNER(NAME, uint8, OP) \
+ INNER(NAME, uint16, OP) \
+ INNER(NAME, uint32, OP) \
+ INNER(NAME, uint64, OP) \
+ INNER(NAME, float32, OP) \
+ INNER(NAME, float64, OP)
+
+// Expand inner macros for all date/time types.
+#define DATE_TYPES(INNER, NAME, OP) \
+ INNER(NAME, date64, OP) \
+ INNER(NAME, date32, OP) \
+ INNER(NAME, timestamp, OP) \
+ INNER(NAME, time32, OP)
+
+#define NUMERIC_DATE_TYPES(INNER, NAME, OP) \
+ NUMERIC_TYPES(INNER, NAME, OP) \
+ DATE_TYPES(INNER, NAME, OP)
+
+#define NUMERIC_BOOL_DATE_TYPES(INNER, NAME, OP) \
+ NUMERIC_TYPES(INNER, NAME, OP) \
+ DATE_TYPES(INNER, NAME, OP) \
+ INNER(NAME, boolean, OP)
+
+#define MOD_OP(NAME, IN_TYPE1, IN_TYPE2, OUT_TYPE) \
+ FORCE_INLINE \
+ gdv_##OUT_TYPE NAME##_##IN_TYPE1##_##IN_TYPE2(gdv_##IN_TYPE1 left, \
+ gdv_##IN_TYPE2 right) { \
+ return (right == 0 ? static_cast<gdv_##OUT_TYPE>(left) \
+ : static_cast<gdv_##OUT_TYPE>(left % right)); \
+ }
+
+// Symmetric binary fns : left, right params and return type are same.
+#define BINARY_SYMMETRIC(NAME, TYPE, OP) \
+ FORCE_INLINE \
+ gdv_##TYPE NAME##_##TYPE##_##TYPE(gdv_##TYPE left, gdv_##TYPE right) { \
+ return static_cast<gdv_##TYPE>(left OP right); \
+ }
+
+NUMERIC_TYPES(BINARY_SYMMETRIC, add, +)
+NUMERIC_TYPES(BINARY_SYMMETRIC, subtract, -)
+NUMERIC_TYPES(BINARY_SYMMETRIC, multiply, *)
+BINARY_SYMMETRIC(bitwise_and, int32, &)
+BINARY_SYMMETRIC(bitwise_and, int64, &)
+BINARY_SYMMETRIC(bitwise_or, int32, |)
+BINARY_SYMMETRIC(bitwise_or, int64, |)
+BINARY_SYMMETRIC(bitwise_xor, int32, ^)
+BINARY_SYMMETRIC(bitwise_xor, int64, ^)
+
+#undef BINARY_SYMMETRIC
+
+MOD_OP(mod, int64, int32, int32)
+MOD_OP(mod, int64, int64, int64)
+
+#undef MOD_OP
+
+gdv_float64 mod_float64_float64(int64_t context, gdv_float64 x, gdv_float64 y) {
+ if (y == 0.0) {
+ char const* err_msg = "divide by zero error";
+ gdv_fn_context_set_error_msg(context, err_msg);
+ return 0.0;
+ }
+ return fmod(x, y);
+}
+
+// Relational binary fns : left, right params are same, return is bool.
+#define BINARY_RELATIONAL(NAME, TYPE, OP) \
+ FORCE_INLINE \
+ bool NAME##_##TYPE##_##TYPE(gdv_##TYPE left, gdv_##TYPE right) { return left OP right; }
+
+NUMERIC_BOOL_DATE_TYPES(BINARY_RELATIONAL, equal, ==)
+NUMERIC_BOOL_DATE_TYPES(BINARY_RELATIONAL, not_equal, !=)
+NUMERIC_DATE_TYPES(BINARY_RELATIONAL, less_than, <)
+NUMERIC_DATE_TYPES(BINARY_RELATIONAL, less_than_or_equal_to, <=)
+NUMERIC_DATE_TYPES(BINARY_RELATIONAL, greater_than, >)
+NUMERIC_DATE_TYPES(BINARY_RELATIONAL, greater_than_or_equal_to, >=)
+
+#undef BINARY_RELATIONAL
+
+// cast fns : takes one param type, returns another type.
+#define CAST_UNARY(NAME, IN_TYPE, OUT_TYPE) \
+ FORCE_INLINE \
+ gdv_##OUT_TYPE NAME##_##IN_TYPE(gdv_##IN_TYPE in) { \
+ return static_cast<gdv_##OUT_TYPE>(in); \
+ }
+
+CAST_UNARY(castBIGINT, int32, int64)
+CAST_UNARY(castINT, int64, int32)
+CAST_UNARY(castFLOAT4, int32, float32)
+CAST_UNARY(castFLOAT4, int64, float32)
+CAST_UNARY(castFLOAT8, int32, float64)
+CAST_UNARY(castFLOAT8, int64, float64)
+CAST_UNARY(castFLOAT8, float32, float64)
+CAST_UNARY(castFLOAT4, float64, float32)
+
+#undef CAST_UNARY
+
+// cast float types to int types.
+#define CAST_INT_FLOAT(NAME, IN_TYPE, OUT_TYPE) \
+ FORCE_INLINE \
+ gdv_##OUT_TYPE NAME##_##IN_TYPE(gdv_##IN_TYPE in) { \
+ gdv_##OUT_TYPE out = static_cast<gdv_##OUT_TYPE>(round(in)); \
+ return out; \
+ }
+
+CAST_INT_FLOAT(castBIGINT, float32, int64)
+CAST_INT_FLOAT(castBIGINT, float64, int64)
+CAST_INT_FLOAT(castINT, float32, int32)
+CAST_INT_FLOAT(castINT, float64, int32)
+
+#undef CAST_INT_FLOAT
+
+// simple nullable functions, result value = fn(input validity)
+#define VALIDITY_OP(NAME, TYPE, OP) \
+ FORCE_INLINE \
+ bool NAME##_##TYPE(gdv_##TYPE in, gdv_boolean is_valid) { return OP is_valid; }
+
+NUMERIC_BOOL_DATE_TYPES(VALIDITY_OP, isnull, !)
+NUMERIC_BOOL_DATE_TYPES(VALIDITY_OP, isnotnull, +)
+NUMERIC_TYPES(VALIDITY_OP, isnumeric, +)
+
+#undef VALIDITY_OP
+
+#define NUMERIC_FUNCTION(INNER) \
+ INNER(int8) \
+ INNER(int16) \
+ INNER(int32) \
+ INNER(int64) \
+ INNER(uint8) \
+ INNER(uint16) \
+ INNER(uint32) \
+ INNER(uint64) \
+ INNER(float32) \
+ INNER(float64)
+
+#define DATE_FUNCTION(INNER) \
+ INNER(date32) \
+ INNER(date64) \
+ INNER(timestamp) \
+ INNER(time32)
+
+#define NUMERIC_BOOL_DATE_FUNCTION(INNER) \
+ NUMERIC_FUNCTION(INNER) \
+ DATE_FUNCTION(INNER) \
+ INNER(boolean)
+
+FORCE_INLINE
+gdv_boolean not_boolean(gdv_boolean in) { return !in; }
+
+// is_distinct_from
+#define IS_DISTINCT_FROM(TYPE) \
+ FORCE_INLINE \
+ bool is_distinct_from_##TYPE##_##TYPE(gdv_##TYPE in1, gdv_boolean is_valid1, \
+ gdv_##TYPE in2, gdv_boolean is_valid2) { \
+ if (is_valid1 != is_valid2) { \
+ return true; \
+ } \
+ if (!is_valid1) { \
+ return false; \
+ } \
+ return in1 != in2; \
+ }
+
+// is_not_distinct_from
+#define IS_NOT_DISTINCT_FROM(TYPE) \
+ FORCE_INLINE \
+ bool is_not_distinct_from_##TYPE##_##TYPE(gdv_##TYPE in1, gdv_boolean is_valid1, \
+ gdv_##TYPE in2, gdv_boolean is_valid2) { \
+ if (is_valid1 != is_valid2) { \
+ return false; \
+ } \
+ if (!is_valid1) { \
+ return true; \
+ } \
+ return in1 == in2; \
+ }
+
+NUMERIC_BOOL_DATE_FUNCTION(IS_DISTINCT_FROM)
+NUMERIC_BOOL_DATE_FUNCTION(IS_NOT_DISTINCT_FROM)
+
+#undef IS_DISTINCT_FROM
+#undef IS_NOT_DISTINCT_FROM
+
+#define DIVIDE(TYPE) \
+ FORCE_INLINE \
+ gdv_##TYPE divide_##TYPE##_##TYPE(gdv_int64 context, gdv_##TYPE in1, gdv_##TYPE in2) { \
+ if (in2 == 0) { \
+ char const* err_msg = "divide by zero error"; \
+ gdv_fn_context_set_error_msg(context, err_msg); \
+ return 0; \
+ } \
+ return static_cast<gdv_##TYPE>(in1 / in2); \
+ }
+
+NUMERIC_FUNCTION(DIVIDE)
+
+#undef DIVIDE
+
+#define DIV(TYPE) \
+ FORCE_INLINE \
+ gdv_##TYPE div_##TYPE##_##TYPE(gdv_int64 context, gdv_##TYPE in1, gdv_##TYPE in2) { \
+ if (in2 == 0) { \
+ char const* err_msg = "divide by zero error"; \
+ gdv_fn_context_set_error_msg(context, err_msg); \
+ return 0; \
+ } \
+ return static_cast<gdv_##TYPE>(in1 / in2); \
+ }
+
+DIV(int32)
+DIV(int64)
+
+#undef DIV
+
+#define DIV_FLOAT(TYPE) \
+ FORCE_INLINE \
+ gdv_##TYPE div_##TYPE##_##TYPE(gdv_int64 context, gdv_##TYPE in1, gdv_##TYPE in2) { \
+ if (in2 == 0) { \
+ char const* err_msg = "divide by zero error"; \
+ gdv_fn_context_set_error_msg(context, err_msg); \
+ return 0; \
+ } \
+ return static_cast<gdv_##TYPE>(::trunc(in1 / in2)); \
+ }
+
+DIV_FLOAT(float32)
+DIV_FLOAT(float64)
+
+#undef DIV_FLOAT
+
+#define BITWISE_NOT(TYPE) \
+ FORCE_INLINE \
+ gdv_##TYPE bitwise_not_##TYPE(gdv_##TYPE in) { return static_cast<gdv_##TYPE>(~in); }
+
+BITWISE_NOT(int32)
+BITWISE_NOT(int64)
+
+#undef BITWISE_NOT
+
+#undef DATE_FUNCTION
+#undef DATE_TYPES
+#undef NUMERIC_BOOL_DATE_TYPES
+#undef NUMERIC_DATE_TYPES
+#undef NUMERIC_FUNCTION
+#undef NUMERIC_TYPES
+
+} // extern "C"
diff --git a/src/arrow/cpp/src/gandiva/precompiled/arithmetic_ops_test.cc b/src/arrow/cpp/src/gandiva/precompiled/arithmetic_ops_test.cc
new file mode 100644
index 000000000..36b50bcfd
--- /dev/null
+++ b/src/arrow/cpp/src/gandiva/precompiled/arithmetic_ops_test.cc
@@ -0,0 +1,180 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include "../execution_context.h"
+#include "gandiva/precompiled/types.h"
+
+namespace gandiva {
+
+TEST(TestArithmeticOps, TestIsDistinctFrom) {
+ EXPECT_EQ(is_distinct_from_timestamp_timestamp(1000, true, 1000, false), true);
+ EXPECT_EQ(is_distinct_from_timestamp_timestamp(1000, false, 1000, true), true);
+ EXPECT_EQ(is_distinct_from_timestamp_timestamp(1000, false, 1000, false), false);
+ EXPECT_EQ(is_distinct_from_timestamp_timestamp(1000, true, 1000, true), false);
+
+ EXPECT_EQ(is_not_distinct_from_int32_int32(1000, true, 1000, false), false);
+ EXPECT_EQ(is_not_distinct_from_int32_int32(1000, false, 1000, true), false);
+ EXPECT_EQ(is_not_distinct_from_int32_int32(1000, false, 1000, false), true);
+ EXPECT_EQ(is_not_distinct_from_int32_int32(1000, true, 1000, true), true);
+}
+
+TEST(TestArithmeticOps, TestMod) {
+ gandiva::ExecutionContext context;
+ EXPECT_EQ(mod_int64_int32(10, 0), 10);
+
+ const double acceptable_abs_error = 0.00000000001; // 1e-10
+
+ EXPECT_DOUBLE_EQ(mod_float64_float64(reinterpret_cast<gdv_int64>(&context), 2.5, 0.0),
+ 0.0);
+ EXPECT_TRUE(context.has_error());
+ EXPECT_EQ(context.get_error(), "divide by zero error");
+
+ context.Reset();
+ EXPECT_NEAR(mod_float64_float64(reinterpret_cast<gdv_int64>(&context), 2.5, 1.2), 0.1,
+ acceptable_abs_error);
+ EXPECT_FALSE(context.has_error());
+
+ context.Reset();
+ EXPECT_DOUBLE_EQ(mod_float64_float64(reinterpret_cast<gdv_int64>(&context), 2.5, 2.5),
+ 0.0);
+ EXPECT_FALSE(context.has_error());
+
+ context.Reset();
+ EXPECT_NEAR(mod_float64_float64(reinterpret_cast<gdv_int64>(&context), 9.2, 3.7), 1.8,
+ acceptable_abs_error);
+ EXPECT_FALSE(context.has_error());
+}
+
+TEST(TestArithmeticOps, TestDivide) {
+ gandiva::ExecutionContext context;
+ EXPECT_EQ(divide_int64_int64(reinterpret_cast<gdv_int64>(&context), 10, 0), 0);
+ EXPECT_EQ(context.has_error(), true);
+ EXPECT_EQ(context.get_error(), "divide by zero error");
+
+ context.Reset();
+ EXPECT_EQ(divide_int64_int64(reinterpret_cast<gdv_int64>(&context), 10, 2), 5);
+ EXPECT_EQ(context.has_error(), false);
+}
+
+TEST(TestArithmeticOps, TestDiv) {
+ gandiva::ExecutionContext context;
+ EXPECT_EQ(div_int64_int64(reinterpret_cast<gdv_int64>(&context), 101, 0), 0);
+ EXPECT_EQ(context.has_error(), true);
+ EXPECT_EQ(context.get_error(), "divide by zero error");
+ context.Reset();
+
+ EXPECT_EQ(div_int64_int64(reinterpret_cast<gdv_int64>(&context), 101, 111), 0);
+ EXPECT_EQ(context.has_error(), false);
+ context.Reset();
+
+ EXPECT_EQ(div_float64_float64(reinterpret_cast<gdv_int64>(&context), 1010.1010, 2.1),
+ 481.0);
+ EXPECT_EQ(context.has_error(), false);
+ context.Reset();
+
+ EXPECT_EQ(
+ div_float64_float64(reinterpret_cast<gdv_int64>(&context), 1010.1010, 0.00000),
+ 0.0);
+ EXPECT_EQ(context.has_error(), true);
+ EXPECT_EQ(context.get_error(), "divide by zero error");
+ context.Reset();
+
+ EXPECT_EQ(div_float32_float32(reinterpret_cast<gdv_int64>(&context), 1010.1010f, 2.1f),
+ 481.0f);
+ EXPECT_EQ(context.has_error(), false);
+ context.Reset();
+}
+
+TEST(TestArithmeticOps, TestBitwiseOps) {
+ // bitwise AND
+ EXPECT_EQ(bitwise_and_int32_int32(0x0147D, 0x17159), 0x01059);
+ EXPECT_EQ(bitwise_and_int32_int32(0xFFFFFFCC, 0x00000297), 0x00000284);
+ EXPECT_EQ(bitwise_and_int32_int32(0x000, 0x285), 0x000);
+ EXPECT_EQ(bitwise_and_int64_int64(0x563672F83, 0x0D9FCF85B), 0x041642803);
+ EXPECT_EQ(bitwise_and_int64_int64(0xFFFFFFFFFFDA8F6A, 0xFFFFFFFFFFFF791C),
+ 0xFFFFFFFFFFDA0908);
+ EXPECT_EQ(bitwise_and_int64_int64(0x6A5B1, 0x00000), 0x00000);
+
+ // bitwise OR
+ EXPECT_EQ(bitwise_or_int32_int32(0x0147D, 0x17159), 0x1757D);
+ EXPECT_EQ(bitwise_or_int32_int32(0xFFFFFFCC, 0x00000297), 0xFFFFFFDF);
+ EXPECT_EQ(bitwise_or_int32_int32(0x000, 0x285), 0x285);
+ EXPECT_EQ(bitwise_or_int64_int64(0x563672F83, 0x0D9FCF85B), 0x5FBFFFFDB);
+ EXPECT_EQ(bitwise_or_int64_int64(0xFFFFFFFFFFDA8F6A, 0xFFFFFFFFFFFF791C),
+ 0xFFFFFFFFFFFFFF7E);
+ EXPECT_EQ(bitwise_or_int64_int64(0x6A5B1, 0x00000), 0x6A5B1);
+
+ // bitwise XOR
+ EXPECT_EQ(bitwise_xor_int32_int32(0x0147D, 0x17159), 0x16524);
+ EXPECT_EQ(bitwise_xor_int32_int32(0xFFFFFFCC, 0x00000297), 0XFFFFFD5B);
+ EXPECT_EQ(bitwise_xor_int32_int32(0x000, 0x285), 0x285);
+ EXPECT_EQ(bitwise_xor_int64_int64(0x563672F83, 0x0D9FCF85B), 0x5BA9BD7D8);
+ EXPECT_EQ(bitwise_xor_int64_int64(0xFFFFFFFFFFDA8F6A, 0xFFFFFFFFFFFF791C), 0X25F676);
+ EXPECT_EQ(bitwise_xor_int64_int64(0x6A5B1, 0x00000), 0x6A5B1);
+ EXPECT_EQ(bitwise_xor_int64_int64(0x6A5B1, 0x6A5B1), 0x00000);
+
+ // bitwise NOT
+ EXPECT_EQ(bitwise_not_int32(0x00017159), 0xFFFE8EA6);
+ EXPECT_EQ(bitwise_not_int32(0xFFFFF226), 0x00000DD9);
+ EXPECT_EQ(bitwise_not_int64(0x000000008BCAE9B4), 0xFFFFFFFF7435164B);
+ EXPECT_EQ(bitwise_not_int64(0xFFFFFF966C8D7997), 0x0000006993728668);
+ EXPECT_EQ(bitwise_not_int64(0x0000000000000000), 0xFFFFFFFFFFFFFFFF);
+}
+
+TEST(TestArithmeticOps, TestIntCastFloatDouble) {
+ // castINT from floats
+ EXPECT_EQ(castINT_float32(6.6f), 7);
+ EXPECT_EQ(castINT_float32(-6.6f), -7);
+ EXPECT_EQ(castINT_float32(-6.3f), -6);
+ EXPECT_EQ(castINT_float32(0.0f), 0);
+ EXPECT_EQ(castINT_float32(-0), 0);
+
+ // castINT from doubles
+ EXPECT_EQ(castINT_float64(6.6), 7);
+ EXPECT_EQ(castINT_float64(-6.6), -7);
+ EXPECT_EQ(castINT_float64(-6.3), -6);
+ EXPECT_EQ(castINT_float64(0.0), 0);
+ EXPECT_EQ(castINT_float64(-0), 0);
+ EXPECT_EQ(castINT_float64(999999.99999999999999999999999), 1000000);
+ EXPECT_EQ(castINT_float64(-999999.99999999999999999999999), -1000000);
+ EXPECT_EQ(castINT_float64(INT32_MAX), 2147483647);
+ EXPECT_EQ(castINT_float64(-2147483647), -2147483647);
+}
+
+TEST(TestArithmeticOps, TestBigIntCastFloatDouble) {
+ // castINT from floats
+ EXPECT_EQ(castBIGINT_float32(6.6f), 7);
+ EXPECT_EQ(castBIGINT_float32(-6.6f), -7);
+ EXPECT_EQ(castBIGINT_float32(-6.3f), -6);
+ EXPECT_EQ(castBIGINT_float32(0.0f), 0);
+ EXPECT_EQ(castBIGINT_float32(-0), 0);
+
+ // castINT from doubles
+ EXPECT_EQ(castBIGINT_float64(6.6), 7);
+ EXPECT_EQ(castBIGINT_float64(-6.6), -7);
+ EXPECT_EQ(castBIGINT_float64(-6.3), -6);
+ EXPECT_EQ(castBIGINT_float64(0.0), 0);
+ EXPECT_EQ(castBIGINT_float64(-0), 0);
+ EXPECT_EQ(castBIGINT_float64(999999.99999999999999999999999), 1000000);
+ EXPECT_EQ(castBIGINT_float64(-999999.99999999999999999999999), -1000000);
+ EXPECT_EQ(castBIGINT_float64(INT32_MAX), 2147483647);
+ EXPECT_EQ(castBIGINT_float64(-2147483647), -2147483647);
+}
+
+} // namespace gandiva
diff --git a/src/arrow/cpp/src/gandiva/precompiled/bitmap.cc b/src/arrow/cpp/src/gandiva/precompiled/bitmap.cc
new file mode 100644
index 000000000..332f08dbe
--- /dev/null
+++ b/src/arrow/cpp/src/gandiva/precompiled/bitmap.cc
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// BitMap functions
+
+#include "arrow/util/bit_util.h"
+
+extern "C" {
+
+#include "./types.h"
+
+#define BITS_TO_BYTES(x) ((x + 7) / 8)
+#define BITS_TO_WORDS(x) ((x + 63) / 64)
+
+#define POS_TO_BYTE_INDEX(p) (p / 8)
+#define POS_TO_BIT_INDEX(p) (p % 8)
+
+FORCE_INLINE
+bool bitMapGetBit(const uint8_t* bmap, int64_t position) {
+ return arrow::BitUtil::GetBit(bmap, position);
+}
+
+FORCE_INLINE
+bool bitMapValidityGetBit(const uint8_t* bmap, int64_t position) {
+ if (bmap == nullptr) {
+ // if validity bitmap is null, all entries are valid.
+ return true;
+ } else {
+ return bitMapGetBit(bmap, position);
+ }
+}
+
+FORCE_INLINE
+void bitMapSetBit(uint8_t* bmap, int64_t position, bool value) {
+ arrow::BitUtil::SetBitTo(bmap, position, value);
+}
+
+// Clear the bit if value = false. Does nothing if value = true.
+FORCE_INLINE
+void bitMapClearBitIfFalse(uint8_t* bmap, int64_t position, bool value) {
+ if (!value) {
+ arrow::BitUtil::ClearBit(bmap, position);
+ }
+}
+
+} // extern "C"
diff --git a/src/arrow/cpp/src/gandiva/precompiled/bitmap_test.cc b/src/arrow/cpp/src/gandiva/precompiled/bitmap_test.cc
new file mode 100644
index 000000000..ac3084ade
--- /dev/null
+++ b/src/arrow/cpp/src/gandiva/precompiled/bitmap_test.cc
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include "gandiva/precompiled/types.h"
+
+namespace gandiva {
+
+TEST(TestBitMap, TestSimple) {
+ static const int kNumBytes = 16;
+ uint8_t bit_map[kNumBytes];
+ memset(bit_map, 0, kNumBytes);
+
+ EXPECT_EQ(bitMapGetBit(bit_map, 100), false);
+
+ // set 100th bit and verify
+ bitMapSetBit(bit_map, 100, true);
+ EXPECT_EQ(bitMapGetBit(bit_map, 100), true);
+
+ // clear 100th bit and verify
+ bitMapSetBit(bit_map, 100, false);
+ EXPECT_EQ(bitMapGetBit(bit_map, 100), false);
+}
+
+TEST(TestBitMap, TestClearIfFalse) {
+ static const int kNumBytes = 32;
+ uint8_t bit_map[kNumBytes];
+ memset(bit_map, 0, kNumBytes);
+
+ bitMapSetBit(bit_map, 24, true);
+
+ // bit should remain unchanged.
+ bitMapClearBitIfFalse(bit_map, 24, true);
+ EXPECT_EQ(bitMapGetBit(bit_map, 24), true);
+
+ // bit should be cleared.
+ bitMapClearBitIfFalse(bit_map, 24, false);
+ EXPECT_EQ(bitMapGetBit(bit_map, 24), false);
+
+ // this function should have no impact if the bit is already clear.
+ bitMapClearBitIfFalse(bit_map, 24, true);
+ EXPECT_EQ(bitMapGetBit(bit_map, 24), false);
+
+ bitMapClearBitIfFalse(bit_map, 24, false);
+ EXPECT_EQ(bitMapGetBit(bit_map, 24), false);
+}
+
+} // namespace gandiva
diff --git a/src/arrow/cpp/src/gandiva/precompiled/decimal_ops.cc b/src/arrow/cpp/src/gandiva/precompiled/decimal_ops.cc
new file mode 100644
index 000000000..61cac6062
--- /dev/null
+++ b/src/arrow/cpp/src/gandiva/precompiled/decimal_ops.cc
@@ -0,0 +1,723 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Algorithms adapted from Apache Impala
+
+#include "gandiva/precompiled/decimal_ops.h"
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+
+#include "arrow/util/logging.h"
+#include "gandiva/decimal_type_util.h"
+#include "gandiva/decimal_xlarge.h"
+#include "gandiva/gdv_function_stubs.h"
+
+// Several operations (multiply, divide, mod, ..) require converting to 256-bit, and we
+// use the boost library for doing 256-bit operations. To avoid references to boost from
+// the precompiled-to-ir code (this causes issues with symbol resolution at runtime), we
+// use a wrapper exported from the CPP code. The wrapper functions are named gdv_xlarge_xx
+
+namespace gandiva {
+namespace decimalops {
+
+using arrow::BasicDecimal128;
+
+static BasicDecimal128 CheckAndIncreaseScale(const BasicDecimal128& in, int32_t delta) {
+ return (delta <= 0) ? in : in.IncreaseScaleBy(delta);
+}
+
+static BasicDecimal128 CheckAndReduceScale(const BasicDecimal128& in, int32_t delta) {
+ return (delta <= 0) ? in : in.ReduceScaleBy(delta);
+}
+
+/// Adjust x and y to the same scale, and add them.
+static BasicDecimal128 AddFastPath(const BasicDecimalScalar128& x,
+ const BasicDecimalScalar128& y, int32_t out_scale) {
+ auto higher_scale = std::max(x.scale(), y.scale());
+
+ auto x_scaled = CheckAndIncreaseScale(x.value(), higher_scale - x.scale());
+ auto y_scaled = CheckAndIncreaseScale(y.value(), higher_scale - y.scale());
+ return x_scaled + y_scaled;
+}
+
+/// Add x and y, caller has ensured there can be no overflow.
+static BasicDecimal128 AddNoOverflow(const BasicDecimalScalar128& x,
+ const BasicDecimalScalar128& y, int32_t out_scale) {
+ auto higher_scale = std::max(x.scale(), y.scale());
+ auto sum = AddFastPath(x, y, out_scale);
+ return CheckAndReduceScale(sum, higher_scale - out_scale);
+}
+
+/// Both x_value and y_value must be >= 0
+static BasicDecimal128 AddLargePositive(const BasicDecimalScalar128& x,
+ const BasicDecimalScalar128& y,
+ int32_t out_scale) {
+ DCHECK_GE(x.value(), 0);
+ DCHECK_GE(y.value(), 0);
+
+ // separate out whole/fractions.
+ BasicDecimal128 x_left, x_right, y_left, y_right;
+ x.value().GetWholeAndFraction(x.scale(), &x_left, &x_right);
+ y.value().GetWholeAndFraction(y.scale(), &y_left, &y_right);
+
+ // Adjust fractional parts to higher scale.
+ auto higher_scale = std::max(x.scale(), y.scale());
+ auto x_right_scaled = CheckAndIncreaseScale(x_right, higher_scale - x.scale());
+ auto y_right_scaled = CheckAndIncreaseScale(y_right, higher_scale - y.scale());
+
+ BasicDecimal128 right;
+ BasicDecimal128 carry_to_left;
+ auto multiplier = BasicDecimal128::GetScaleMultiplier(higher_scale);
+ if (x_right_scaled >= multiplier - y_right_scaled) {
+ right = x_right_scaled - (multiplier - y_right_scaled);
+ carry_to_left = 1;
+ } else {
+ right = x_right_scaled + y_right_scaled;
+ carry_to_left = 0;
+ }
+ right = CheckAndReduceScale(right, higher_scale - out_scale);
+
+ auto left = x_left + y_left + carry_to_left;
+ return (left * BasicDecimal128::GetScaleMultiplier(out_scale)) + right;
+}
+
+/// x_value and y_value cannot be 0, and one must be positive and the other negative.
+static BasicDecimal128 AddLargeNegative(const BasicDecimalScalar128& x,
+ const BasicDecimalScalar128& y,
+ int32_t out_scale) {
+ DCHECK_NE(x.value(), 0);
+ DCHECK_NE(y.value(), 0);
+ DCHECK((x.value() < 0 && y.value() > 0) || (x.value() > 0 && y.value() < 0));
+
+ // separate out whole/fractions.
+ BasicDecimal128 x_left, x_right, y_left, y_right;
+ x.value().GetWholeAndFraction(x.scale(), &x_left, &x_right);
+ y.value().GetWholeAndFraction(y.scale(), &y_left, &y_right);
+
+ // Adjust fractional parts to higher scale.
+ auto higher_scale = std::max(x.scale(), y.scale());
+ x_right = CheckAndIncreaseScale(x_right, higher_scale - x.scale());
+ y_right = CheckAndIncreaseScale(y_right, higher_scale - y.scale());
+
+ // Overflow not possible because one is +ve and the other is -ve.
+ auto left = x_left + y_left;
+ auto right = x_right + y_right;
+
+ // If the whole and fractional parts have different signs, then we need to make the
+ // fractional part have the same sign as the whole part. If either left or right is
+ // zero, then nothing needs to be done.
+ if (left < 0 && right > 0) {
+ left += 1;
+ right -= BasicDecimal128::GetScaleMultiplier(higher_scale);
+ } else if (left > 0 && right < 0) {
+ left -= 1;
+ right += BasicDecimal128::GetScaleMultiplier(higher_scale);
+ }
+ right = CheckAndReduceScale(right, higher_scale - out_scale);
+ return (left * BasicDecimal128::GetScaleMultiplier(out_scale)) + right;
+}
+
+static BasicDecimal128 AddLarge(const BasicDecimalScalar128& x,
+ const BasicDecimalScalar128& y, int32_t out_scale) {
+ if (x.value() >= 0 && y.value() >= 0) {
+ // both positive or 0
+ return AddLargePositive(x, y, out_scale);
+ } else if (x.value() <= 0 && y.value() <= 0) {
+ // both negative or 0
+ BasicDecimalScalar128 x_neg(-x.value(), x.precision(), x.scale());
+ BasicDecimalScalar128 y_neg(-y.value(), y.precision(), y.scale());
+ return -AddLargePositive(x_neg, y_neg, out_scale);
+ } else {
+ // one positive and the other negative
+ return AddLargeNegative(x, y, out_scale);
+ }
+}
+
+// Suppose we have a number that requires x bits to be represented and we scale it up by
+// 10^scale_by. Let's say now y bits are required to represent it. This function returns
+// the maximum possible y - x for a given 'scale_by'.
+inline int32_t MaxBitsRequiredIncreaseAfterScaling(int32_t scale_by) {
+ // We rely on the following formula:
+ // bits_required(x * 10^y) <= bits_required(x) + floor(log2(10^y)) + 1
+ // We precompute floor(log2(10^x)) + 1 for x = 0, 1, 2...75, 76
+ DCHECK_GE(scale_by, 0);
+ DCHECK_LE(scale_by, 76);
+ static const int32_t floor_log2_plus_one[] = {
+ 0, 4, 7, 10, 14, 17, 20, 24, 27, 30, 34, 37, 40, 44, 47, 50,
+ 54, 57, 60, 64, 67, 70, 74, 77, 80, 84, 87, 90, 94, 97, 100, 103,
+ 107, 110, 113, 117, 120, 123, 127, 130, 133, 137, 140, 143, 147, 150, 153, 157,
+ 160, 163, 167, 170, 173, 177, 180, 183, 187, 190, 193, 196, 200, 203, 206, 210,
+ 213, 216, 220, 223, 226, 230, 233, 236, 240, 243, 246, 250, 253};
+ return floor_log2_plus_one[scale_by];
+}
+
+// If we have a number with 'num_lz' leading zeros, and we scale it up by 10^scale_by,
+// this function returns the minimum number of leading zeros the result can have.
+inline int32_t MinLeadingZerosAfterScaling(int32_t num_lz, int32_t scale_by) {
+ DCHECK_GE(scale_by, 0);
+ DCHECK_LE(scale_by, 76);
+ int32_t result = num_lz - MaxBitsRequiredIncreaseAfterScaling(scale_by);
+ return result;
+}
+
+// Returns the maximum possible number of bits required to represent num * 10^scale_by.
+inline int32_t MaxBitsRequiredAfterScaling(const BasicDecimalScalar128& num,
+ int32_t scale_by) {
+ auto value = num.value();
+ auto value_abs = value.Abs();
+
+ int32_t num_occupied = 128 - value_abs.CountLeadingBinaryZeros();
+ DCHECK_GE(scale_by, 0);
+ DCHECK_LE(scale_by, 76);
+ return num_occupied + MaxBitsRequiredIncreaseAfterScaling(scale_by);
+}
+
+// Returns the minimum number of leading zero x or y would have after one of them gets
+// scaled up to match the scale of the other one.
+inline int32_t MinLeadingZeros(const BasicDecimalScalar128& x,
+ const BasicDecimalScalar128& y) {
+ auto x_value = x.value();
+ auto x_value_abs = x_value.Abs();
+
+ auto y_value = y.value();
+ auto y_value_abs = y_value.Abs();
+
+ int32_t x_lz = x_value_abs.CountLeadingBinaryZeros();
+ int32_t y_lz = y_value_abs.CountLeadingBinaryZeros();
+ if (x.scale() < y.scale()) {
+ x_lz = MinLeadingZerosAfterScaling(x_lz, y.scale() - x.scale());
+ } else if (x.scale() > y.scale()) {
+ y_lz = MinLeadingZerosAfterScaling(y_lz, x.scale() - y.scale());
+ }
+ return std::min(x_lz, y_lz);
+}
+
+BasicDecimal128 Add(const BasicDecimalScalar128& x, const BasicDecimalScalar128& y,
+ int32_t out_precision, int32_t out_scale) {
+ if (out_precision < DecimalTypeUtil::kMaxPrecision) {
+ // fast-path add
+ return AddFastPath(x, y, out_scale);
+ } else {
+ int32_t min_lz = MinLeadingZeros(x, y);
+ if (min_lz >= 3) {
+ // If both numbers have at least MIN_LZ leading zeros, we can add them directly
+ // without the risk of overflow.
+ // We want the result to have at least 2 leading zeros, which ensures that it fits
+ // into the maximum decimal because 2^126 - 1 < 10^38 - 1. If both x and y have at
+ // least 3 leading zeros, then we are guaranteed that the result will have at lest 2
+ // leading zeros.
+ return AddNoOverflow(x, y, out_scale);
+ } else {
+ // slower-version : add whole/fraction parts separately, and then, combine.
+ return AddLarge(x, y, out_scale);
+ }
+ }
+}
+
+BasicDecimal128 Subtract(const BasicDecimalScalar128& x, const BasicDecimalScalar128& y,
+ int32_t out_precision, int32_t out_scale) {
+ return Add(x, {-y.value(), y.precision(), y.scale()}, out_precision, out_scale);
+}
+
+// Multiply when the out_precision is 38, and there is no trimming of the scale i.e
+// the intermediate value is the same as the final value.
+static BasicDecimal128 MultiplyMaxPrecisionNoScaleDown(const BasicDecimalScalar128& x,
+ const BasicDecimalScalar128& y,
+ int32_t out_scale,
+ bool* overflow) {
+ DCHECK_EQ(x.scale() + y.scale(), out_scale);
+
+ BasicDecimal128 result;
+ auto x_abs = BasicDecimal128::Abs(x.value());
+ auto y_abs = BasicDecimal128::Abs(y.value());
+
+ if (x_abs > BasicDecimal128::GetMaxValue() / y_abs) {
+ *overflow = true;
+ } else {
+ // We've verified that the result will fit into 128 bits.
+ *overflow = false;
+ result = x.value() * y.value();
+ }
+ return result;
+}
+
+// Multiply when the out_precision is 38, and there is trimming of the scale i.e
+// the intermediate value could be larger than the final value.
+static BasicDecimal128 MultiplyMaxPrecisionAndScaleDown(const BasicDecimalScalar128& x,
+ const BasicDecimalScalar128& y,
+ int32_t out_scale,
+ bool* overflow) {
+ auto delta_scale = x.scale() + y.scale() - out_scale;
+ DCHECK_GT(delta_scale, 0);
+
+ *overflow = false;
+ BasicDecimal128 result;
+ auto x_abs = BasicDecimal128::Abs(x.value());
+ auto y_abs = BasicDecimal128::Abs(y.value());
+
+ // It's possible that the intermediate value does not fit in 128-bits, but the
+ // final value will (after scaling down).
+ bool needs_int256 = false;
+ int32_t total_leading_zeros =
+ x_abs.CountLeadingBinaryZeros() + y_abs.CountLeadingBinaryZeros();
+ // This check is quick, but conservative. In some cases it will indicate that
+ // converting to 256 bits is necessary, when it's not actually the case.
+ needs_int256 = total_leading_zeros <= 128;
+ if (ARROW_PREDICT_FALSE(needs_int256)) {
+ int64_t result_high;
+ uint64_t result_low;
+
+ // This requires converting to 256-bit, and we use the boost library for that. To
+ // avoid references to boost from the precompiled-to-ir code (this causes issues
+ // with symbol resolution at runtime), we use a wrapper exported from the CPP code.
+ gdv_xlarge_multiply_and_scale_down(x.value().high_bits(), x.value().low_bits(),
+ y.value().high_bits(), y.value().low_bits(),
+ delta_scale, &result_high, &result_low, overflow);
+ result = BasicDecimal128(result_high, result_low);
+ } else {
+ if (ARROW_PREDICT_TRUE(delta_scale <= 38)) {
+ // The largest value that result can have here is (2^64 - 1) * (2^63 - 1), which is
+ // greater than BasicDecimal128::kMaxValue.
+ result = x.value() * y.value();
+ // Since delta_scale is greater than zero, result can now be at most
+ // ((2^64 - 1) * (2^63 - 1)) / 10, which is less than BasicDecimal128::kMaxValue, so
+ // there cannot be any overflow.
+ result = result.ReduceScaleBy(delta_scale);
+ } else {
+ // We are multiplying decimal(38, 38) by decimal(38, 38). The result should be a
+ // decimal(38, 37), so delta scale = 38 + 38 - 37 = 39. Since we are not in the
+ // 256 bit intermediate value case and we are scaling down by 39, then we are
+ // guaranteed that the result is 0 (even if we try to round). The largest possible
+ // intermediate result is 38 "9"s. If we scale down by 39, the leftmost 9 is now
+ // two digits to the right of the rightmost "visible" one. The reason why we have
+ // to handle this case separately is because a scale multiplier with a delta_scale
+ // 39 does not fit into 128 bit.
+ DCHECK_EQ(delta_scale, 39);
+ result = 0;
+ }
+ }
+ return result;
+}
+
+// Multiply when the out_precision is 38.
+static BasicDecimal128 MultiplyMaxPrecision(const BasicDecimalScalar128& x,
+ const BasicDecimalScalar128& y,
+ int32_t out_scale, bool* overflow) {
+ auto delta_scale = x.scale() + y.scale() - out_scale;
+ DCHECK_GE(delta_scale, 0);
+ if (delta_scale == 0) {
+ return MultiplyMaxPrecisionNoScaleDown(x, y, out_scale, overflow);
+ } else {
+ return MultiplyMaxPrecisionAndScaleDown(x, y, out_scale, overflow);
+ }
+}
+
+BasicDecimal128 Multiply(const BasicDecimalScalar128& x, const BasicDecimalScalar128& y,
+ int32_t out_precision, int32_t out_scale, bool* overflow) {
+ BasicDecimal128 result;
+ *overflow = false;
+ if (out_precision < DecimalTypeUtil::kMaxPrecision) {
+ // fast-path multiply
+ result = x.value() * y.value();
+ DCHECK_EQ(x.scale() + y.scale(), out_scale);
+ DCHECK_LE(BasicDecimal128::Abs(result), BasicDecimal128::GetMaxValue());
+ } else if (x.value() == 0 || y.value() == 0) {
+ // Handle this separately to avoid divide-by-zero errors.
+ result = BasicDecimal128(0, 0);
+ } else {
+ result = MultiplyMaxPrecision(x, y, out_scale, overflow);
+ }
+ DCHECK(*overflow || BasicDecimal128::Abs(result) <= BasicDecimal128::GetMaxValue());
+ return result;
+}
+
+BasicDecimal128 Divide(int64_t context, const BasicDecimalScalar128& x,
+ const BasicDecimalScalar128& y, int32_t out_precision,
+ int32_t out_scale, bool* overflow) {
+ if (y.value() == 0) {
+ char const* err_msg = "divide by zero error";
+ gdv_fn_context_set_error_msg(context, err_msg);
+ return 0;
+ }
+
+ // scale up to the output scale, and do an integer division.
+ int32_t delta_scale = out_scale + y.scale() - x.scale();
+ DCHECK_GE(delta_scale, 0);
+
+ BasicDecimal128 result;
+ auto num_bits_required_after_scaling = MaxBitsRequiredAfterScaling(x, delta_scale);
+ if (num_bits_required_after_scaling <= 127) {
+ // fast-path. The dividend fits in 128-bit after scaling too.
+ *overflow = false;
+
+ // do the division.
+ auto x_scaled = CheckAndIncreaseScale(x.value(), delta_scale);
+ BasicDecimal128 remainder;
+ auto status = x_scaled.Divide(y.value(), &result, &remainder);
+ DCHECK_EQ(status, arrow::DecimalStatus::kSuccess);
+
+ // round-up
+ if (BasicDecimal128::Abs(2 * remainder) >= BasicDecimal128::Abs(y.value())) {
+ result += (x.value().Sign() ^ y.value().Sign()) + 1;
+ }
+ } else {
+ // convert to 256-bit and do the divide.
+ *overflow = delta_scale > 38 && num_bits_required_after_scaling > 255;
+ if (!*overflow) {
+ int64_t result_high;
+ uint64_t result_low;
+
+ gdv_xlarge_scale_up_and_divide(x.value().high_bits(), x.value().low_bits(),
+ y.value().high_bits(), y.value().low_bits(),
+ delta_scale, &result_high, &result_low, overflow);
+ result = BasicDecimal128(result_high, result_low);
+ }
+ }
+ return result;
+}
+
+BasicDecimal128 Mod(int64_t context, const BasicDecimalScalar128& x,
+ const BasicDecimalScalar128& y, int32_t out_precision,
+ int32_t out_scale, bool* overflow) {
+ if (y.value() == 0) {
+ char const* err_msg = "divide by zero error";
+ gdv_fn_context_set_error_msg(context, err_msg);
+ return 0;
+ }
+
+ // Adsjust x and y to the same scale (higher one), and then, do a integer mod.
+ *overflow = false;
+ BasicDecimal128 result;
+ int32_t min_lz = MinLeadingZeros(x, y);
+ if (min_lz >= 2) {
+ auto higher_scale = std::max(x.scale(), y.scale());
+ auto x_scaled = CheckAndIncreaseScale(x.value(), higher_scale - x.scale());
+ auto y_scaled = CheckAndIncreaseScale(y.value(), higher_scale - y.scale());
+ result = x_scaled % y_scaled;
+ DCHECK_LE(BasicDecimal128::Abs(result), BasicDecimal128::GetMaxValue());
+ } else {
+ int64_t result_high;
+ uint64_t result_low;
+
+ gdv_xlarge_mod(x.value().high_bits(), x.value().low_bits(), x.scale(),
+ y.value().high_bits(), y.value().low_bits(), y.scale(), &result_high,
+ &result_low);
+ result = BasicDecimal128(result_high, result_low);
+ }
+ DCHECK(BasicDecimal128::Abs(result) <= BasicDecimal128::Abs(x.value()) ||
+ BasicDecimal128::Abs(result) <= BasicDecimal128::Abs(y.value()));
+ return result;
+}
+
+int32_t CompareSameScale(const BasicDecimal128& x, const BasicDecimal128& y) {
+ if (x == y) {
+ return 0;
+ } else if (x < y) {
+ return -1;
+ } else {
+ return 1;
+ }
+}
+
+int32_t Compare(const BasicDecimalScalar128& x, const BasicDecimalScalar128& y) {
+ int32_t delta_scale = x.scale() - y.scale();
+
+ // fast-path : both are of the same scale.
+ if (delta_scale == 0) {
+ return CompareSameScale(x.value(), y.value());
+ }
+
+ // Check if we'll need more than 256-bits after adjusting the scale.
+ bool need256 =
+ (delta_scale < 0 && x.precision() - delta_scale > DecimalTypeUtil::kMaxPrecision) ||
+ (y.precision() + delta_scale > DecimalTypeUtil::kMaxPrecision);
+ if (need256) {
+ return gdv_xlarge_compare(x.value().high_bits(), x.value().low_bits(), x.scale(),
+ y.value().high_bits(), y.value().low_bits(), y.scale());
+ } else {
+ BasicDecimal128 x_scaled;
+ BasicDecimal128 y_scaled;
+
+ if (delta_scale < 0) {
+ x_scaled = x.value().IncreaseScaleBy(-delta_scale);
+ y_scaled = y.value();
+ } else {
+ x_scaled = x.value();
+ y_scaled = y.value().IncreaseScaleBy(delta_scale);
+ }
+ return CompareSameScale(x_scaled, y_scaled);
+ }
+}
+
+#define DECIMAL_OVERFLOW_IF(condition, overflow) \
+ do { \
+ if (*overflow || (condition)) { \
+ *overflow = true; \
+ return 0; \
+ } \
+ } while (0)
+
+static BasicDecimal128 GetMaxValue(int32_t precision) {
+ return BasicDecimal128::GetScaleMultiplier(precision) - 1;
+}
+
+// Compute the double scale multipliers once.
+static std::array<double, DecimalTypeUtil::kMaxPrecision + 1> kDoubleScaleMultipliers =
+ ([]() -> std::array<double, DecimalTypeUtil::kMaxPrecision + 1> {
+ std::array<double, DecimalTypeUtil::kMaxPrecision + 1> values;
+ values[0] = 1.0;
+ for (int32_t idx = 1; idx <= DecimalTypeUtil::kMaxPrecision; idx++) {
+ values[idx] = values[idx - 1] * 10;
+ }
+ return values;
+ })();
+
+BasicDecimal128 FromDouble(double in, int32_t precision, int32_t scale, bool* overflow) {
+ // Multiply decimal with the scale
+ auto unscaled = in * kDoubleScaleMultipliers[scale];
+ DECIMAL_OVERFLOW_IF(std::isnan(unscaled), overflow);
+
+ unscaled = std::round(unscaled);
+
+ // convert scaled double to int128
+ int32_t sign = unscaled < 0 ? -1 : 1;
+ auto unscaled_abs = std::abs(unscaled);
+
+ // overflow if > 2^127 - 1
+ DECIMAL_OVERFLOW_IF(unscaled_abs > std::ldexp(static_cast<double>(1), 127) - 1,
+ overflow);
+
+ uint64_t high_bits = static_cast<uint64_t>(std::ldexp(unscaled_abs, -64));
+ uint64_t low_bits = static_cast<uint64_t>(
+ unscaled_abs - std::ldexp(static_cast<double>(high_bits), 64));
+
+ auto result = BasicDecimal128(static_cast<int64_t>(high_bits), low_bits);
+
+ // overflow if > max value based on precision
+ DECIMAL_OVERFLOW_IF(result > GetMaxValue(precision), overflow);
+ return result * sign;
+}
+
+double ToDouble(const BasicDecimalScalar128& in, bool* overflow) {
+ // convert int128 to double
+ int64_t sign = in.value().Sign();
+ auto value_abs = BasicDecimal128::Abs(in.value());
+ double unscaled = static_cast<double>(value_abs.low_bits()) +
+ std::ldexp(static_cast<double>(value_abs.high_bits()), 64);
+
+ // scale double.
+ return (unscaled * sign) / kDoubleScaleMultipliers[in.scale()];
+}
+
+BasicDecimal128 FromInt64(int64_t in, int32_t precision, int32_t scale, bool* overflow) {
+ // check if multiplying by scale will cause an overflow.
+ DECIMAL_OVERFLOW_IF(std::abs(in) > GetMaxValue(precision - scale), overflow);
+ return in * BasicDecimal128::GetScaleMultiplier(scale);
+}
+
+// Helper function to modify the scale and/or precision of a decimal value.
+static BasicDecimal128 ModifyScaleAndPrecision(const BasicDecimalScalar128& x,
+ int32_t out_precision, int32_t out_scale,
+ bool* overflow) {
+ int32_t delta_scale = out_scale - x.scale();
+ if (delta_scale >= 0) {
+ // check if multiplying by delta_scale will cause an overflow.
+ DECIMAL_OVERFLOW_IF(
+ BasicDecimal128::Abs(x.value()) > GetMaxValue(out_precision - delta_scale),
+ overflow);
+ return x.value().IncreaseScaleBy(delta_scale);
+ } else {
+ // Do not do any rounding, that is handled by the caller.
+ auto result = x.value().ReduceScaleBy(-delta_scale, false);
+ DECIMAL_OVERFLOW_IF(BasicDecimal128::Abs(result) > GetMaxValue(out_precision),
+ overflow);
+ return result;
+ }
+}
+
+enum RoundType {
+ kRoundTypeCeil, // +1 if +ve and trailing value is > 0, else no rounding.
+ kRoundTypeFloor, // -1 if -ve and trailing value is < 0, else no rounding.
+ kRoundTypeTrunc, // no rounding, truncate the trailing digits.
+ kRoundTypeHalfRoundUp, // if +ve and trailing value is >= half of base, +1.
+ // else if -ve and trailing value is >= half of base, -1.
+};
+
+// Compute the rounding delta for the givven rounding type.
+static int32_t ComputeRoundingDelta(const BasicDecimal128& x, int32_t x_scale,
+ int32_t out_scale, RoundType type) {
+ if (type == kRoundTypeTrunc || // no rounding for this type.
+ out_scale >= x_scale) { // no digits dropped, so no rounding.
+ return 0;
+ }
+
+ int32_t result = 0;
+ switch (type) {
+ case kRoundTypeHalfRoundUp: {
+ auto base = BasicDecimal128::GetScaleMultiplier(x_scale - out_scale);
+ auto trailing = x % base;
+ if (trailing == 0) {
+ result = 0;
+ } else if (trailing.Abs() < base / 2) {
+ result = 0;
+ } else {
+ result = (x < 0) ? -1 : 1;
+ }
+ break;
+ }
+
+ case kRoundTypeCeil:
+ if (x < 0) {
+ // no rounding for -ve
+ result = 0;
+ } else {
+ auto base = BasicDecimal128::GetScaleMultiplier(x_scale - out_scale);
+ auto trailing = x % base;
+ result = (trailing == 0) ? 0 : 1;
+ }
+ break;
+
+ case kRoundTypeFloor:
+ if (x > 0) {
+ // no rounding for +ve
+ result = 0;
+ } else {
+ auto base = BasicDecimal128::GetScaleMultiplier(x_scale - out_scale);
+ auto trailing = x % base;
+ result = (trailing == 0) ? 0 : -1;
+ }
+ break;
+
+ case kRoundTypeTrunc:
+ break;
+ }
+ return result;
+}
+
+// Modify the scale and round.
+static BasicDecimal128 RoundWithPositiveScale(const BasicDecimalScalar128& x,
+ int32_t out_precision, int32_t out_scale,
+ RoundType round_type, bool* overflow) {
+ DCHECK_GE(out_scale, 0);
+
+ auto scaled = ModifyScaleAndPrecision(x, out_precision, out_scale, overflow);
+ if (*overflow) {
+ return 0;
+ }
+
+ auto delta = ComputeRoundingDelta(x.value(), x.scale(), out_scale, round_type);
+ if (delta == 0) {
+ return scaled;
+ }
+
+ // If there is a rounding delta, the output scale must be less than the input scale.
+ // That means at least one digit is dropped after the decimal. The delta add can add
+ // utmost one digit before the decimal. So, overflow will occur only if the output
+ // precision has changed.
+ DCHECK_GT(x.scale(), out_scale);
+ auto result = scaled + delta;
+ DECIMAL_OVERFLOW_IF(out_precision < x.precision() &&
+ BasicDecimal128::Abs(result) > GetMaxValue(out_precision),
+ overflow);
+ return result;
+}
+
+// Modify scale to drop all digits to the right of the decimal and round.
+// Then, zero out 'rounding_scale' number of digits to the left of the decimal point.
+static BasicDecimal128 RoundWithNegativeScale(const BasicDecimalScalar128& x,
+ int32_t out_precision,
+ int32_t rounding_scale,
+ RoundType round_type, bool* overflow) {
+ DCHECK_LT(rounding_scale, 0);
+
+ // get rid of the fractional part.
+ auto scaled = ModifyScaleAndPrecision(x, out_precision, 0, overflow);
+ auto rounding_delta = ComputeRoundingDelta(scaled, 0, -rounding_scale, round_type);
+
+ auto base = BasicDecimal128::GetScaleMultiplier(-rounding_scale);
+ auto delta = rounding_delta * base - (scaled % base);
+ DECIMAL_OVERFLOW_IF(BasicDecimal128::Abs(scaled) >
+ GetMaxValue(out_precision) - BasicDecimal128::Abs(delta),
+ overflow);
+ return scaled + delta;
+}
+
+BasicDecimal128 Round(const BasicDecimalScalar128& x, int32_t out_precision,
+ int32_t out_scale, int32_t rounding_scale, bool* overflow) {
+ // no-op if target scale is same as arg scale
+ if (x.scale() == out_scale && rounding_scale >= 0) {
+ return x.value();
+ }
+
+ if (rounding_scale < 0) {
+ return RoundWithNegativeScale(x, out_precision, rounding_scale,
+ RoundType::kRoundTypeHalfRoundUp, overflow);
+ } else {
+ return RoundWithPositiveScale(x, out_precision, rounding_scale,
+ RoundType::kRoundTypeHalfRoundUp, overflow);
+ }
+}
+
+BasicDecimal128 Truncate(const BasicDecimalScalar128& x, int32_t out_precision,
+ int32_t out_scale, int32_t rounding_scale, bool* overflow) {
+ // no-op if target scale is same as arg scale
+ if (x.scale() == out_scale && rounding_scale >= 0) {
+ return x.value();
+ }
+
+ if (rounding_scale < 0) {
+ return RoundWithNegativeScale(x, out_precision, rounding_scale,
+ RoundType::kRoundTypeTrunc, overflow);
+ } else {
+ return RoundWithPositiveScale(x, out_precision, rounding_scale,
+ RoundType::kRoundTypeTrunc, overflow);
+ }
+}
+
+BasicDecimal128 Ceil(const BasicDecimalScalar128& x, bool* overflow) {
+ return RoundWithPositiveScale(x, x.precision(), 0, RoundType::kRoundTypeCeil, overflow);
+}
+
+BasicDecimal128 Floor(const BasicDecimalScalar128& x, bool* overflow) {
+ return RoundWithPositiveScale(x, x.precision(), 0, RoundType::kRoundTypeFloor,
+ overflow);
+}
+
+BasicDecimal128 Convert(const BasicDecimalScalar128& x, int32_t out_precision,
+ int32_t out_scale, bool* overflow) {
+ DCHECK_GE(out_scale, 0);
+ DCHECK_LE(out_scale, DecimalTypeUtil::kMaxScale);
+ DCHECK_GT(out_precision, 0);
+ DCHECK_LE(out_precision, DecimalTypeUtil::kMaxScale);
+
+ return RoundWithPositiveScale(x, out_precision, out_scale,
+ RoundType::kRoundTypeHalfRoundUp, overflow);
+}
+
+int64_t ToInt64(const BasicDecimalScalar128& in, bool* overflow) {
+ auto rounded = RoundWithPositiveScale(in, in.precision(), 0 /*scale*/,
+ RoundType::kRoundTypeHalfRoundUp, overflow);
+ DECIMAL_OVERFLOW_IF((rounded > std::numeric_limits<int64_t>::max()) ||
+ (rounded < std::numeric_limits<int64_t>::min()),
+ overflow);
+ return static_cast<int64_t>(rounded.low_bits());
+}
+
+} // namespace decimalops
+} // namespace gandiva
diff --git a/src/arrow/cpp/src/gandiva/precompiled/decimal_ops.h b/src/arrow/cpp/src/gandiva/precompiled/decimal_ops.h
new file mode 100644
index 000000000..292dce220
--- /dev/null
+++ b/src/arrow/cpp/src/gandiva/precompiled/decimal_ops.h
@@ -0,0 +1,90 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+#include "gandiva/basic_decimal_scalar.h"
+
+namespace gandiva {
+namespace decimalops {
+
+/// Return the sum of 'x' and 'y'.
+/// out_precision and out_scale are passed along for efficiency, they must match
+/// the rules in DecimalTypeSql::GetResultType.
+arrow::BasicDecimal128 Add(const BasicDecimalScalar128& x, const BasicDecimalScalar128& y,
+ int32_t out_precision, int32_t out_scale);
+
+/// Subtract 'y' from 'x', and return the result.
+arrow::BasicDecimal128 Subtract(const BasicDecimalScalar128& x,
+ const BasicDecimalScalar128& y, int32_t out_precision,
+ int32_t out_scale);
+
+/// Multiply 'x' from 'y', and return the result.
+arrow::BasicDecimal128 Multiply(const BasicDecimalScalar128& x,
+ const BasicDecimalScalar128& y, int32_t out_precision,
+ int32_t out_scale, bool* overflow);
+
+/// Divide 'x' by 'y', and return the result.
+arrow::BasicDecimal128 Divide(int64_t context, const BasicDecimalScalar128& x,
+ const BasicDecimalScalar128& y, int32_t out_precision,
+ int32_t out_scale, bool* overflow);
+
+/// Divide 'x' by 'y', and return the remainder.
+arrow::BasicDecimal128 Mod(int64_t context, const BasicDecimalScalar128& x,
+ const BasicDecimalScalar128& y, int32_t out_precision,
+ int32_t out_scale, bool* overflow);
+
+/// Compare two decimals. Returns :
+/// 0 if x == y
+/// 1 if x > y
+/// -1 if x < y
+int32_t Compare(const BasicDecimalScalar128& x, const BasicDecimalScalar128& y);
+
+/// Convert to decimal from double.
+BasicDecimal128 FromDouble(double in, int32_t precision, int32_t scale, bool* overflow);
+
+/// Convert from decimal to double.
+double ToDouble(const BasicDecimalScalar128& in, bool* overflow);
+
+/// Convert to decimal from gdv_int64.
+BasicDecimal128 FromInt64(int64_t in, int32_t precision, int32_t scale, bool* overflow);
+
+/// Convert from decimal to gdv_int64
+int64_t ToInt64(const BasicDecimalScalar128& in, bool* overflow);
+
+/// Convert from one decimal scale/precision to another.
+BasicDecimal128 Convert(const BasicDecimalScalar128& x, int32_t out_precision,
+ int32_t out_scale, bool* overflow);
+
+/// round decimal.
+BasicDecimal128 Round(const BasicDecimalScalar128& x, int32_t out_precision,
+ int32_t out_scale, int32_t rounding_scale, bool* overflow);
+
+/// truncate decimal.
+BasicDecimal128 Truncate(const BasicDecimalScalar128& x, int32_t out_precision,
+ int32_t out_scale, int32_t rounding_scale, bool* overflow);
+
+/// ceil decimal
+BasicDecimal128 Ceil(const BasicDecimalScalar128& x, bool* overflow);
+
+/// floor decimal
+BasicDecimal128 Floor(const BasicDecimalScalar128& x, bool* overflow);
+
+} // namespace decimalops
+} // namespace gandiva
diff --git a/src/arrow/cpp/src/gandiva/precompiled/decimal_ops_test.cc b/src/arrow/cpp/src/gandiva/precompiled/decimal_ops_test.cc
new file mode 100644
index 000000000..be8a1fe8a
--- /dev/null
+++ b/src/arrow/cpp/src/gandiva/precompiled/decimal_ops_test.cc
@@ -0,0 +1,1095 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <algorithm>
+#include <limits>
+#include <memory>
+#include <tuple>
+#include <vector>
+
+#include "arrow/testing/gtest_util.h"
+#include "gandiva/decimal_scalar.h"
+#include "gandiva/decimal_type_util.h"
+#include "gandiva/execution_context.h"
+#include "gandiva/precompiled/decimal_ops.h"
+#include "gandiva/precompiled/types.h"
+
+namespace gandiva {
+
+const arrow::Decimal128 kThirtyFive9s(std::string(35, '9'));
+const arrow::Decimal128 kThirtySix9s(std::string(36, '9'));
+const arrow::Decimal128 kThirtyEight9s(std::string(38, '9'));
+
+class TestDecimalSql : public ::testing::Test {
+ protected:
+ static void Verify(DecimalTypeUtil::Op op, const DecimalScalar128& x,
+ const DecimalScalar128& y, const DecimalScalar128& expected_result,
+ bool expected_overflow);
+
+ static void VerifyAllSign(DecimalTypeUtil::Op op, const DecimalScalar128& left,
+ const DecimalScalar128& right,
+ const DecimalScalar128& expected_output,
+ bool expected_overflow);
+
+ void AddAndVerify(const DecimalScalar128& x, const DecimalScalar128& y,
+ const DecimalScalar128& expected_result) {
+ // TODO: overflow checks
+ return Verify(DecimalTypeUtil::kOpAdd, x, y, expected_result, false);
+ }
+
+ void SubtractAndVerify(const DecimalScalar128& x, const DecimalScalar128& y,
+ const DecimalScalar128& expected_result) {
+ // TODO: overflow checks
+ return Verify(DecimalTypeUtil::kOpSubtract, x, y, expected_result, false);
+ }
+
+ void MultiplyAndVerify(const DecimalScalar128& x, const DecimalScalar128& y,
+ const DecimalScalar128& expected_result,
+ bool expected_overflow) {
+ return Verify(DecimalTypeUtil::kOpMultiply, x, y, expected_result, expected_overflow);
+ }
+
+ void MultiplyAndVerifyAllSign(const DecimalScalar128& x, const DecimalScalar128& y,
+ const DecimalScalar128& expected_result,
+ bool expected_overflow) {
+ return VerifyAllSign(DecimalTypeUtil::kOpMultiply, x, y, expected_result,
+ expected_overflow);
+ }
+
+ void DivideAndVerify(const DecimalScalar128& x, const DecimalScalar128& y,
+ const DecimalScalar128& expected_result, bool expected_overflow) {
+ return Verify(DecimalTypeUtil::kOpDivide, x, y, expected_result, expected_overflow);
+ }
+
+ void DivideAndVerifyAllSign(const DecimalScalar128& x, const DecimalScalar128& y,
+ const DecimalScalar128& expected_result,
+ bool expected_overflow) {
+ return VerifyAllSign(DecimalTypeUtil::kOpDivide, x, y, expected_result,
+ expected_overflow);
+ }
+
+ void ModAndVerify(const DecimalScalar128& x, const DecimalScalar128& y,
+ const DecimalScalar128& expected_result, bool expected_overflow) {
+ return Verify(DecimalTypeUtil::kOpMod, x, y, expected_result, expected_overflow);
+ }
+
+ void ModAndVerifyAllSign(const DecimalScalar128& x, const DecimalScalar128& y,
+ const DecimalScalar128& expected_result,
+ bool expected_overflow) {
+ return VerifyAllSign(DecimalTypeUtil::kOpMod, x, y, expected_result,
+ expected_overflow);
+ }
+};
+
+#define EXPECT_DECIMAL_EQ(op, x, y, expected_result, expected_overflow, actual_result, \
+ actual_overflow) \
+ { \
+ EXPECT_TRUE(expected_overflow == actual_overflow) \
+ << op << "(" << (x).ToString() << " and " << (y).ToString() << ")" \
+ << " expected overflow : " << expected_overflow \
+ << " actual overflow : " << actual_overflow; \
+ if (!expected_overflow) { \
+ EXPECT_TRUE(expected_result == actual_result) \
+ << op << "(" << (x).ToString() << " and " << (y).ToString() << ")" \
+ << " expected : " << expected_result.ToString() \
+ << " actual : " << actual_result.ToString(); \
+ } \
+ }
+
+void TestDecimalSql::Verify(DecimalTypeUtil::Op op, const DecimalScalar128& x,
+ const DecimalScalar128& y,
+ const DecimalScalar128& expected_result,
+ bool expected_overflow) {
+ auto t1 = std::make_shared<arrow::Decimal128Type>(x.precision(), x.scale());
+ auto t2 = std::make_shared<arrow::Decimal128Type>(y.precision(), y.scale());
+ bool overflow = false;
+ int64_t context = 0;
+
+ Decimal128TypePtr out_type;
+ ARROW_EXPECT_OK(DecimalTypeUtil::GetResultType(op, {t1, t2}, &out_type));
+
+ arrow::BasicDecimal128 out_value;
+ std::string op_name;
+ switch (op) {
+ case DecimalTypeUtil::kOpAdd:
+ op_name = "add";
+ out_value = decimalops::Add(x, y, out_type->precision(), out_type->scale());
+ break;
+
+ case DecimalTypeUtil::kOpSubtract:
+ op_name = "subtract";
+ out_value = decimalops::Subtract(x, y, out_type->precision(), out_type->scale());
+ break;
+
+ case DecimalTypeUtil::kOpMultiply:
+ op_name = "multiply";
+ out_value =
+ decimalops::Multiply(x, y, out_type->precision(), out_type->scale(), &overflow);
+ break;
+
+ case DecimalTypeUtil::kOpDivide:
+ op_name = "divide";
+ out_value = decimalops::Divide(context, x, y, out_type->precision(),
+ out_type->scale(), &overflow);
+ break;
+
+ case DecimalTypeUtil::kOpMod:
+ op_name = "mod";
+ out_value = decimalops::Mod(context, x, y, out_type->precision(), out_type->scale(),
+ &overflow);
+ break;
+
+ default:
+ // not implemented.
+ ASSERT_FALSE(true);
+ }
+ EXPECT_DECIMAL_EQ(op_name, x, y, expected_result, expected_overflow,
+ DecimalScalar128(out_value, out_type->precision(), out_type->scale()),
+ overflow);
+}
+
+void TestDecimalSql::VerifyAllSign(DecimalTypeUtil::Op op, const DecimalScalar128& left,
+ const DecimalScalar128& right,
+ const DecimalScalar128& expected_output,
+ bool expected_overflow) {
+ // both +ve
+ Verify(op, left, right, expected_output, expected_overflow);
+
+ // left -ve
+ Verify(op, -left, right, -expected_output, expected_overflow);
+
+ if (op == DecimalTypeUtil::kOpMod) {
+ // right -ve
+ Verify(op, left, -right, expected_output, expected_overflow);
+
+ // both -ve
+ Verify(op, -left, -right, -expected_output, expected_overflow);
+ } else {
+ ASSERT_TRUE(op == DecimalTypeUtil::kOpMultiply || op == DecimalTypeUtil::kOpDivide);
+
+ // right -ve
+ Verify(op, left, -right, -expected_output, expected_overflow);
+
+ // both -ve
+ Verify(op, -left, -right, expected_output, expected_overflow);
+ }
+}
+
+TEST_F(TestDecimalSql, Add) {
+ // fast-path
+ AddAndVerify(DecimalScalar128{"201", 30, 3}, // x
+ DecimalScalar128{"301", 30, 3}, // y
+ DecimalScalar128{"502", 31, 3}); // expected
+
+ // max precision
+ AddAndVerify(DecimalScalar128{"09999999999999999999999999999999000000", 38, 5}, // x
+ DecimalScalar128{"100", 38, 7}, // y
+ DecimalScalar128{"99999999999999999999999999999990000010", 38, 6});
+
+ // Both -ve
+ AddAndVerify(DecimalScalar128{"-201", 30, 3}, // x
+ DecimalScalar128{"-301", 30, 2}, // y
+ DecimalScalar128{"-3211", 32, 3}); // expected
+
+ // -ve and max precision
+ AddAndVerify(DecimalScalar128{"-09999999999999999999999999999999000000", 38, 5}, // x
+ DecimalScalar128{"-100", 38, 7}, // y
+ DecimalScalar128{"-99999999999999999999999999999990000010", 38, 6});
+}
+
+TEST_F(TestDecimalSql, Subtract) {
+ // fast-path
+ SubtractAndVerify(DecimalScalar128{"201", 30, 3}, // x
+ DecimalScalar128{"301", 30, 3}, // y
+ DecimalScalar128{"-100", 31, 3}); // expected
+
+ // max precision
+ SubtractAndVerify(
+ DecimalScalar128{"09999999999999999999999999999999000000", 38, 5}, // x
+ DecimalScalar128{"100", 38, 7}, // y
+ DecimalScalar128{"99999999999999999999999999999989999990", 38, 6});
+
+ // Both -ve
+ SubtractAndVerify(DecimalScalar128{"-201", 30, 3}, // x
+ DecimalScalar128{"-301", 30, 2}, // y
+ DecimalScalar128{"2809", 32, 3}); // expected
+
+ // -ve and max precision
+ SubtractAndVerify(
+ DecimalScalar128{"-09999999999999999999999999999999000000", 38, 5}, // x
+ DecimalScalar128{"-100", 38, 7}, // y
+ DecimalScalar128{"-99999999999999999999999999999989999990", 38, 6});
+}
+
+TEST_F(TestDecimalSql, Multiply) {
+ // fast-path : out_precision < 38
+ MultiplyAndVerifyAllSign(DecimalScalar128{"201", 10, 3}, // x
+ DecimalScalar128{"301", 10, 2}, // y
+ DecimalScalar128{"60501", 21, 5}, // expected
+ false); // overflow
+
+ // right 0
+ MultiplyAndVerify(DecimalScalar128{"201", 20, 3}, // x
+ DecimalScalar128{"0", 20, 2}, // y
+ DecimalScalar128{"0", 38, 5}, // expected
+ false); // overflow
+
+ // left 0
+ MultiplyAndVerify(DecimalScalar128{"0", 20, 3}, // x
+ DecimalScalar128{"301", 20, 2}, // y
+ DecimalScalar128{"0", 38, 5}, // expected
+ false); // overflow
+
+ // out_precision == 38, small input values, no trimming of scale (scale <= 6 doesn't
+ // get trimmed).
+ MultiplyAndVerify(DecimalScalar128{"201", 20, 3}, // x
+ DecimalScalar128{"301", 20, 2}, // y
+ DecimalScalar128{"60501", 38, 5}, // expected
+ false); // overflow
+
+ // out_precision == 38, large values, no trimming of scale (scale <= 6 doesn't
+ // get trimmed).
+ MultiplyAndVerifyAllSign(
+ DecimalScalar128{"201", 20, 3}, // x
+ DecimalScalar128{kThirtyFive9s, 35, 2}, // y
+ DecimalScalar128{"20099999999999999999999999999999999799", 38, 5}, // expected
+ false); // overflow
+
+ // out_precision == 38, very large values, no trimming of scale (scale <= 6 doesn't
+ // get trimmed). overflow expected.
+ MultiplyAndVerifyAllSign(DecimalScalar128{"201", 20, 3}, // x
+ DecimalScalar128{kThirtySix9s, 35, 2}, // y
+ DecimalScalar128{"0", 38, 5}, // expected
+ true); // overflow
+
+ MultiplyAndVerifyAllSign(DecimalScalar128{"201", 20, 3}, // x
+ DecimalScalar128{kThirtyEight9s, 35, 2}, // y
+ DecimalScalar128{"0", 38, 5}, // expected
+ true); // overflow
+
+ // out_precision == 38, small input values, trimming of scale.
+ MultiplyAndVerifyAllSign(DecimalScalar128{"201", 20, 5}, // x
+ DecimalScalar128{"301", 20, 5}, // y
+ DecimalScalar128{"61", 38, 7}, // expected
+ false); // overflow
+
+ // out_precision == 38, large values, trimming of scale.
+ MultiplyAndVerifyAllSign(
+ DecimalScalar128{"201", 20, 5}, // x
+ DecimalScalar128{kThirtyFive9s, 35, 5}, // y
+ DecimalScalar128{"2010000000000000000000000000000000", 38, 6}, // expected
+ false); // overflow
+
+ // out_precision == 38, very large values, trimming of scale (requires convert to 256).
+ MultiplyAndVerifyAllSign(
+ DecimalScalar128{kThirtyFive9s, 38, 20}, // x
+ DecimalScalar128{kThirtySix9s, 38, 20}, // y
+ DecimalScalar128{"9999999999999999999999999999999999890", 38, 6}, // expected
+ false); // overflow
+
+ // out_precision == 38, very large values, trimming of scale (requires convert to 256).
+ // should cause overflow.
+ MultiplyAndVerifyAllSign(DecimalScalar128{kThirtyFive9s, 38, 4}, // x
+ DecimalScalar128{kThirtySix9s, 38, 4}, // y
+ DecimalScalar128{"0", 38, 6}, // expected
+ true); // overflow
+
+ // corner cases.
+ MultiplyAndVerifyAllSign(
+ DecimalScalar128{0, UINT64_MAX, 38, 4}, // x
+ DecimalScalar128{0, UINT64_MAX, 38, 4}, // y
+ DecimalScalar128{"3402823669209384634264811192843491082", 38, 6}, // expected
+ false); // overflow
+
+ MultiplyAndVerifyAllSign(
+ DecimalScalar128{0, UINT64_MAX, 38, 4}, // x
+ DecimalScalar128{0, INT64_MAX, 38, 4}, // y
+ DecimalScalar128{"1701411834604692317040171876053197783", 38, 6}, // expected
+ false); // overflow
+
+ MultiplyAndVerifyAllSign(DecimalScalar128{"201", 38, 38}, // x
+ DecimalScalar128{"301", 38, 38}, // y
+ DecimalScalar128{"0", 38, 37}, // expected
+ false); // overflow
+
+ MultiplyAndVerifyAllSign(DecimalScalar128{0, UINT64_MAX, 38, 38}, // x
+ DecimalScalar128{0, UINT64_MAX, 38, 38}, // y
+ DecimalScalar128{"0", 38, 37}, // expected
+ false); // overflow
+
+ MultiplyAndVerifyAllSign(
+ DecimalScalar128{kThirtyFive9s, 38, 38}, // x
+ DecimalScalar128{kThirtySix9s, 38, 38}, // y
+ DecimalScalar128{"100000000000000000000000000000000", 38, 37}, // expected
+ false); // overflow
+}
+
+TEST_F(TestDecimalSql, Divide) {
+ DivideAndVerifyAllSign(DecimalScalar128{"201", 10, 3}, // x
+ DecimalScalar128{"301", 10, 2}, // y
+ DecimalScalar128{"6677740863787", 23, 14}, // expected
+ false); // overflow
+
+ DivideAndVerifyAllSign(DecimalScalar128{"201", 20, 3}, // x
+ DecimalScalar128{"301", 20, 2}, // y
+ DecimalScalar128{"667774086378737542", 38, 19}, // expected
+ false); // overflow
+
+ DivideAndVerifyAllSign(DecimalScalar128{"201", 20, 3}, // x
+ DecimalScalar128{kThirtyFive9s, 35, 2}, // y
+ DecimalScalar128{"0", 38, 19}, // expected
+ false); // overflow
+
+ DivideAndVerifyAllSign(
+ DecimalScalar128{kThirtyFive9s, 35, 6}, // x
+ DecimalScalar128{"201", 20, 3}, // y
+ DecimalScalar128{"497512437810945273631840796019900493", 38, 6}, // expected
+ false); // overflow
+
+ DivideAndVerifyAllSign(DecimalScalar128{kThirtyEight9s, 38, 20}, // x
+ DecimalScalar128{kThirtyFive9s, 38, 20}, // y
+ DecimalScalar128{"1000000000", 38, 6}, // expected
+ false); // overflow
+
+ DivideAndVerifyAllSign(DecimalScalar128{"31939128063561476055", 38, 8}, // x
+ DecimalScalar128{"10000", 20, 0}, // y
+ DecimalScalar128{"3193912806356148", 38, 8}, // expected
+ false);
+
+ // Corner cases
+ DivideAndVerifyAllSign(DecimalScalar128{0, UINT64_MAX, 38, 4}, // x
+ DecimalScalar128{0, UINT64_MAX, 38, 4}, // y
+ DecimalScalar128{"1000000", 38, 6}, // expected
+ false); // overflow
+
+ DivideAndVerifyAllSign(DecimalScalar128{0, UINT64_MAX, 38, 4}, // x
+ DecimalScalar128{0, INT64_MAX, 38, 4}, // y
+ DecimalScalar128{"2000000", 38, 6}, // expected
+ false); // overflow
+
+ DivideAndVerifyAllSign(DecimalScalar128{0, UINT64_MAX, 19, 5}, // x
+ DecimalScalar128{0, INT64_MAX, 19, 5}, // y
+ DecimalScalar128{"20000000000000000001", 38, 19}, // expected
+ false); // overflow
+
+ DivideAndVerifyAllSign(DecimalScalar128{kThirtyFive9s, 38, 37}, // x
+ DecimalScalar128{kThirtyFive9s, 38, 38}, // y
+ DecimalScalar128{"10000000", 38, 6}, // expected
+ false); // overflow
+
+ // overflow
+ DivideAndVerifyAllSign(DecimalScalar128{kThirtyEight9s, 38, 6}, // x
+ DecimalScalar128{"201", 20, 3}, // y
+ DecimalScalar128{"0", 38, 6}, // expected
+ true);
+}
+
+TEST_F(TestDecimalSql, Mod) {
+ ModAndVerifyAllSign(DecimalScalar128{"201", 10, 3}, // x
+ DecimalScalar128{"301", 10, 2}, // y
+ DecimalScalar128{"201", 10, 3}, // expected
+ false); // overflow
+
+ ModAndVerify(DecimalScalar128{"201", 20, 2}, // x
+ DecimalScalar128{"301", 20, 3}, // y
+ DecimalScalar128{"204", 20, 3}, // expected
+ false); // overflow
+
+ ModAndVerifyAllSign(DecimalScalar128{"201", 20, 3}, // x
+ DecimalScalar128{kThirtyFive9s, 35, 2}, // y
+ DecimalScalar128{"201", 20, 3}, // expected
+ false); // overflow
+
+ ModAndVerifyAllSign(DecimalScalar128{kThirtyFive9s, 35, 6}, // x
+ DecimalScalar128{"201", 20, 3}, // y
+ DecimalScalar128{"180999", 23, 6}, // expected
+ false); // overflow
+
+ ModAndVerifyAllSign(DecimalScalar128{kThirtyEight9s, 38, 20}, // x
+ DecimalScalar128{kThirtyFive9s, 38, 21}, // y
+ DecimalScalar128{"9990", 38, 21}, // expected
+ false); // overflow
+
+ ModAndVerifyAllSign(DecimalScalar128{"31939128063561476055", 38, 8}, // x
+ DecimalScalar128{"10000", 20, 0}, // y
+ DecimalScalar128{"63561476055", 28, 8}, // expected
+ false);
+
+ ModAndVerifyAllSign(DecimalScalar128{0, UINT64_MAX, 38, 4}, // x
+ DecimalScalar128{0, UINT64_MAX, 38, 4}, // y
+ DecimalScalar128{"0", 38, 4}, // expected
+ false); // overflow
+
+ ModAndVerifyAllSign(DecimalScalar128{0, UINT64_MAX, 38, 4}, // x
+ DecimalScalar128{0, INT64_MAX, 38, 4}, // y
+ DecimalScalar128{"1", 38, 4}, // expected
+ false); // overflow
+}
+
+TEST_F(TestDecimalSql, DivideByZero) {
+ gandiva::ExecutionContext context;
+ int32_t result_precision;
+ int32_t result_scale;
+ bool overflow;
+
+ // divide-by-zero should cause an error.
+ context.Reset();
+ result_precision = 38;
+ result_scale = 19;
+ decimalops::Divide(reinterpret_cast<gdv_int64>(&context),
+ DecimalScalar128{"201", 20, 3}, DecimalScalar128{"0", 20, 2},
+ result_precision, result_scale, &overflow);
+ EXPECT_TRUE(context.has_error());
+ EXPECT_EQ(context.get_error(), "divide by zero error");
+
+ // divide-by-nonzero should not cause an error.
+ context.Reset();
+ decimalops::Divide(reinterpret_cast<gdv_int64>(&context),
+ DecimalScalar128{"201", 20, 3}, DecimalScalar128{"1", 20, 2},
+ result_precision, result_scale, &overflow);
+ EXPECT_FALSE(context.has_error());
+
+ // mod-by-zero should cause an error.
+ context.Reset();
+ result_precision = 20;
+ result_scale = 3;
+ decimalops::Mod(reinterpret_cast<gdv_int64>(&context), DecimalScalar128{"201", 20, 3},
+ DecimalScalar128{"0", 20, 2}, result_precision, result_scale,
+ &overflow);
+ EXPECT_TRUE(context.has_error());
+ EXPECT_EQ(context.get_error(), "divide by zero error");
+
+ // mod-by-nonzero should not cause an error.
+ context.Reset();
+ decimalops::Mod(reinterpret_cast<gdv_int64>(&context), DecimalScalar128{"201", 20, 3},
+ DecimalScalar128{"1", 20, 2}, result_precision, result_scale,
+ &overflow);
+ EXPECT_FALSE(context.has_error());
+}
+
+TEST_F(TestDecimalSql, Compare) {
+ // x.scale == y.scale
+ EXPECT_EQ(
+ 0, decimalops::Compare(DecimalScalar128{100, 38, 6}, DecimalScalar128{100, 38, 6}));
+ EXPECT_EQ(
+ 1, decimalops::Compare(DecimalScalar128{200, 38, 6}, DecimalScalar128{100, 38, 6}));
+ EXPECT_EQ(-1, decimalops::Compare(DecimalScalar128{100, 38, 6},
+ DecimalScalar128{200, 38, 6}));
+
+ // x.scale == y.scale, with -ve.
+ EXPECT_EQ(0, decimalops::Compare(DecimalScalar128{-100, 38, 6},
+ DecimalScalar128{-100, 38, 6}));
+ EXPECT_EQ(-1, decimalops::Compare(DecimalScalar128{-200, 38, 6},
+ DecimalScalar128{-100, 38, 6}));
+ EXPECT_EQ(1, decimalops::Compare(DecimalScalar128{-100, 38, 6},
+ DecimalScalar128{-200, 38, 6}));
+ EXPECT_EQ(1, decimalops::Compare(DecimalScalar128{100, 38, 6},
+ DecimalScalar128{-200, 38, 6}));
+
+ for (int32_t precision : {16, 36, 38}) {
+ // x_scale > y_scale
+ EXPECT_EQ(0, decimalops::Compare(DecimalScalar128{10000, precision, 6},
+ DecimalScalar128{100, precision, 4}));
+ EXPECT_EQ(1, decimalops::Compare(DecimalScalar128{20000, precision, 6},
+ DecimalScalar128{100, precision, 4}));
+ EXPECT_EQ(-1, decimalops::Compare(DecimalScalar128{10000, precision, 6},
+ DecimalScalar128{200, precision, 4}));
+
+ // x.scale > y.scale, with -ve
+ EXPECT_EQ(0, decimalops::Compare(DecimalScalar128{-10000, precision, 6},
+ DecimalScalar128{-100, precision, 4}));
+ EXPECT_EQ(-1, decimalops::Compare(DecimalScalar128{-20000, precision, 6},
+ DecimalScalar128{-100, precision, 4}));
+ EXPECT_EQ(1, decimalops::Compare(DecimalScalar128{-10000, precision, 6},
+ DecimalScalar128{-200, precision, 4}));
+ EXPECT_EQ(1, decimalops::Compare(DecimalScalar128{10000, precision, 6},
+ DecimalScalar128{-200, precision, 4}));
+
+ // x.scale < y.scale
+ EXPECT_EQ(0, decimalops::Compare(DecimalScalar128{100, precision, 4},
+ DecimalScalar128{10000, precision, 6}));
+ EXPECT_EQ(1, decimalops::Compare(DecimalScalar128{200, precision, 4},
+ DecimalScalar128{10000, precision, 6}));
+ EXPECT_EQ(-1, decimalops::Compare(DecimalScalar128{100, precision, 4},
+ DecimalScalar128{20000, precision, 6}));
+
+ // x.scale < y.scale, with -ve
+ EXPECT_EQ(0, decimalops::Compare(DecimalScalar128{-100, precision, 4},
+ DecimalScalar128{-10000, precision, 6}));
+ EXPECT_EQ(-1, decimalops::Compare(DecimalScalar128{-200, precision, 4},
+ DecimalScalar128{-10000, precision, 6}));
+ EXPECT_EQ(1, decimalops::Compare(DecimalScalar128{-100, precision, 4},
+ DecimalScalar128{-20000, precision, 6}));
+ EXPECT_EQ(1, decimalops::Compare(DecimalScalar128{100, precision, 4},
+ DecimalScalar128{-200, precision, 6}));
+ }
+
+ // large cases.
+ EXPECT_EQ(0, decimalops::Compare(DecimalScalar128{kThirtyEight9s, 38, 6},
+ DecimalScalar128{kThirtyEight9s, 38, 6}));
+
+ EXPECT_EQ(1, decimalops::Compare(DecimalScalar128{kThirtyEight9s, 38, 6},
+ DecimalScalar128{kThirtySix9s, 38, 4}));
+
+ EXPECT_EQ(-1, decimalops::Compare(DecimalScalar128{kThirtyEight9s, 38, 6},
+ DecimalScalar128{kThirtyEight9s, 38, 4}));
+}
+
+TEST_F(TestDecimalSql, Round) {
+ // expected, input, rounding_scale, overflow
+ using TupleType = std::tuple<DecimalScalar128, DecimalScalar128, int32_t, bool>;
+ std::vector<TupleType> test_values = {
+ // examples from
+ // https://dev.mysql.com/doc/refman/5.7/en/mathematical-functions.html#function_round
+ std::make_tuple(DecimalScalar128{-1, 36, 0}, DecimalScalar128{-123, 38, 2}, 0,
+ false),
+ std::make_tuple(DecimalScalar128{-2, 36, 0}, DecimalScalar128{-158, 38, 2}, 0,
+ false),
+ std::make_tuple(DecimalScalar128{2, 36, 0}, DecimalScalar128{158, 38, 2}, 0, false),
+ std::make_tuple(DecimalScalar128{-13, 36, 1}, DecimalScalar128{-1298, 38, 3}, 1,
+ false),
+ std::make_tuple(DecimalScalar128{-1, 35, 0}, DecimalScalar128{-1298, 38, 3}, 0,
+ false),
+ std::make_tuple(DecimalScalar128{20, 35, 0}, DecimalScalar128{23298, 38, 3}, -1,
+ false),
+ std::make_tuple(DecimalScalar128{100, 38, 0}, DecimalScalar128{122, 38, 0}, -2,
+ false),
+ std::make_tuple(DecimalScalar128{3, 37, 0}, DecimalScalar128{25, 38, 1}, 0, false),
+
+ // border cases
+ std::make_tuple(DecimalScalar128{INT64_MIN / 100, 36, 0},
+ DecimalScalar128{INT64_MIN, 38, 2}, 0, false),
+
+ std::make_tuple(DecimalScalar128{INT64_MIN, 38, 0},
+ DecimalScalar128{INT64_MIN, 38, 0}, 0, false),
+ std::make_tuple(DecimalScalar128{0, 0, 36, 0}, DecimalScalar128{0, 0, 38, 2}, 0,
+ false),
+ std::make_tuple(DecimalScalar128{INT64_MAX, 38, 0},
+ DecimalScalar128{INT64_MAX, 38, 0}, 0, false),
+
+ std::make_tuple(DecimalScalar128{INT64_MAX / 100, 36, 0},
+ DecimalScalar128{INT64_MAX, 38, 2}, 0, false),
+
+ // large scales
+ std::make_tuple(DecimalScalar128{0, 0, 22, 0}, DecimalScalar128{12345, 38, 16}, 0,
+ false),
+
+ std::make_tuple(
+ DecimalScalar128{BasicDecimal128{124}, 22, 0},
+ DecimalScalar128{BasicDecimal128{12389}.IncreaseScaleBy(14), 38, 16}, 0, false),
+ std::make_tuple(
+ DecimalScalar128{BasicDecimal128{-124}, 22, 0},
+ DecimalScalar128{BasicDecimal128{-12389}.IncreaseScaleBy(14), 38, 16}, 0,
+ false),
+ std::make_tuple(
+ DecimalScalar128{BasicDecimal128{124}, 6, 0},
+ DecimalScalar128{BasicDecimal128{12389}.IncreaseScaleBy(30), 38, 32}, 0, false),
+ std::make_tuple(
+ DecimalScalar128{BasicDecimal128{-124}, 6, 0},
+ DecimalScalar128{BasicDecimal128{-12389}.IncreaseScaleBy(30), 38, 32}, 0,
+ false),
+
+ // scale bigger than arg
+ std::make_tuple(
+ DecimalScalar128{BasicDecimal128{12389}.IncreaseScaleBy(32), 38, 32},
+ DecimalScalar128{BasicDecimal128{12389}.IncreaseScaleBy(32), 38, 32}, 35,
+ false),
+ std::make_tuple(
+ DecimalScalar128{BasicDecimal128{-12389}.IncreaseScaleBy(32), 38, 32},
+ DecimalScalar128{BasicDecimal128{-12389}.IncreaseScaleBy(32), 38, 32}, 35,
+ false),
+
+ // overflow
+ std::make_tuple(DecimalScalar128{0, 0, 1, 0}, DecimalScalar128{99, 2, 1}, 0, true),
+ };
+
+ for (auto iter : test_values) {
+ auto expected = std::get<0>(iter);
+ auto input = std::get<1>(iter);
+ auto rounding_scale = std::get<2>(iter);
+ auto expected_overflow = std::get<3>(iter);
+ bool overflow = false;
+
+ EXPECT_EQ(expected.value(),
+ decimalops::Round(input, expected.precision(), expected.scale(),
+ rounding_scale, &overflow))
+ << " failed on input " << input << " rounding scale " << rounding_scale;
+ if (expected_overflow) {
+ ASSERT_TRUE(overflow) << "overflow expected for input " << input;
+ } else {
+ ASSERT_FALSE(overflow) << "overflow not expected for input " << input;
+ }
+ }
+}
+
+TEST_F(TestDecimalSql, Truncate) {
+ // expected, input, rounding_scale, overflow
+ using TupleType = std::tuple<DecimalScalar128, DecimalScalar128, int32_t, bool>;
+ std::vector<TupleType> test_values = {
+ // examples from
+ // https://dev.mysql.com/doc/refman/5.7/en/mathematical-functions.html#function_truncate
+ std::make_tuple(DecimalScalar128{12, 36, 1}, DecimalScalar128{1223, 38, 3}, 1,
+ false),
+ std::make_tuple(DecimalScalar128{19, 36, 1}, DecimalScalar128{1999, 38, 3}, 1,
+ false),
+ std::make_tuple(DecimalScalar128{1, 35, 0}, DecimalScalar128{1999, 38, 3}, 0,
+ false),
+ std::make_tuple(DecimalScalar128{-19, 36, 1}, DecimalScalar128{-1999, 38, 3}, 1,
+ false),
+ std::make_tuple(DecimalScalar128{100, 38, 0}, DecimalScalar128{122, 38, 0}, -2,
+ false),
+ std::make_tuple(DecimalScalar128{1028, 38, 0}, DecimalScalar128{1028, 38, 0}, 0,
+ false),
+
+ // border cases
+ std::make_tuple(DecimalScalar128{BasicDecimal128{INT64_MIN / 100}, 36, 0},
+ DecimalScalar128{INT64_MIN, 38, 2}, 0, false),
+
+ std::make_tuple(DecimalScalar128{INT64_MIN, 38, 0},
+ DecimalScalar128{INT64_MIN, 38, 0}, 0, false),
+ std::make_tuple(DecimalScalar128{0, 0, 38, 0}, DecimalScalar128{0, 0, 38, 2}, 0,
+ false),
+ std::make_tuple(DecimalScalar128{INT64_MAX, 38, 0},
+ DecimalScalar128{INT64_MAX, 38, 0}, 0, false),
+
+ std::make_tuple(DecimalScalar128{BasicDecimal128(INT64_MAX / 100), 36, 0},
+ DecimalScalar128{INT64_MAX, 38, 2}, 0, false),
+
+ // large scales
+ std::make_tuple(DecimalScalar128{BasicDecimal128{0, 0}, 22, 0},
+ DecimalScalar128{12345, 38, 16}, 0, false),
+ std::make_tuple(
+ DecimalScalar128{BasicDecimal128{123}, 22, 0},
+ DecimalScalar128{BasicDecimal128{12389}.IncreaseScaleBy(14), 38, 16}, 0, false),
+ std::make_tuple(
+ DecimalScalar128{BasicDecimal128{-123}, 22, 0},
+ DecimalScalar128{BasicDecimal128{-12389}.IncreaseScaleBy(14), 38, 16}, 0,
+ false),
+ std::make_tuple(
+ DecimalScalar128{BasicDecimal128{123}, 6, 0},
+ DecimalScalar128{BasicDecimal128{12389}.IncreaseScaleBy(30), 38, 32}, 0, false),
+ std::make_tuple(
+ DecimalScalar128{BasicDecimal128{-123}, 6, 0},
+ DecimalScalar128{BasicDecimal128{-12389}.IncreaseScaleBy(30), 38, 32}, 0,
+ false),
+
+ // overflow
+ std::make_tuple(
+ DecimalScalar128{BasicDecimal128{12389}.IncreaseScaleBy(32), 38, 32},
+ DecimalScalar128{BasicDecimal128{12389}.IncreaseScaleBy(32), 38, 32}, 35,
+ false),
+ std::make_tuple(
+ DecimalScalar128{BasicDecimal128{-12389}.IncreaseScaleBy(32), 38, 32},
+ DecimalScalar128{BasicDecimal128{-12389}.IncreaseScaleBy(32), 38, 32}, 35,
+ false),
+ };
+
+ for (auto iter : test_values) {
+ auto expected = std::get<0>(iter);
+ auto input = std::get<1>(iter);
+ auto rounding_scale = std::get<2>(iter);
+ auto expected_overflow = std::get<3>(iter);
+ bool overflow = false;
+
+ EXPECT_EQ(expected.value(),
+ decimalops::Truncate(input, expected.precision(), expected.scale(),
+ rounding_scale, &overflow))
+ << " failed on input " << input << " rounding scale " << rounding_scale;
+ if (expected_overflow) {
+ ASSERT_TRUE(overflow) << "overflow expected for input " << input;
+ } else {
+ ASSERT_FALSE(overflow) << "overflow not expected for input " << input;
+ }
+ }
+}
+
+TEST_F(TestDecimalSql, Ceil) {
+ // expected, input, overflow
+ std::vector<std::tuple<BasicDecimal128, DecimalScalar128, bool>> test_values = {
+ // https://dev.mysql.com/doc/refman/5.7/en/mathematical-functions.html#function_ceil
+ std::make_tuple(2, DecimalScalar128{123, 38, 2}, false),
+ std::make_tuple(-1, DecimalScalar128{-123, 38, 2}, false),
+
+ // border cases
+ std::make_tuple(BasicDecimal128{INT64_MIN / 100},
+ DecimalScalar128{INT64_MIN, 38, 2}, false),
+
+ std::make_tuple(INT64_MIN, DecimalScalar128{INT64_MIN, 38, 0}, false),
+ std::make_tuple(BasicDecimal128{0, 0}, DecimalScalar128{0, 0, 38, 2}, false),
+ std::make_tuple(INT64_MAX, DecimalScalar128{INT64_MAX, 38, 0}, false),
+
+ std::make_tuple(BasicDecimal128(INT64_MAX / 100 + 1),
+ DecimalScalar128{INT64_MAX, 38, 2}, false),
+
+ // large scales
+ std::make_tuple(BasicDecimal128{0, 1}, DecimalScalar128{12345, 38, 16}, false),
+ std::make_tuple(
+ BasicDecimal128{124},
+ DecimalScalar128{BasicDecimal128{12389}.IncreaseScaleBy(14), 38, 16}, false),
+ std::make_tuple(
+ BasicDecimal128{-123},
+ DecimalScalar128{BasicDecimal128{-12389}.IncreaseScaleBy(14), 38, 16}, false),
+ std::make_tuple(
+ BasicDecimal128{124},
+ DecimalScalar128{BasicDecimal128{12389}.IncreaseScaleBy(30), 38, 32}, false),
+ std::make_tuple(
+ BasicDecimal128{-123},
+ DecimalScalar128{BasicDecimal128{-12389}.IncreaseScaleBy(30), 38, 32}, false),
+ };
+
+ for (auto iter : test_values) {
+ auto expected = std::get<0>(iter);
+ auto input = std::get<1>(iter);
+ auto expected_overflow = std::get<2>(iter);
+ bool overflow = false;
+
+ EXPECT_EQ(expected, decimalops::Ceil(input, &overflow))
+ << " failed on input " << input;
+ if (expected_overflow) {
+ ASSERT_TRUE(overflow) << "overflow expected for input " << input;
+ } else {
+ ASSERT_FALSE(overflow) << "overflow not expected for input " << input;
+ }
+ }
+}
+
+TEST_F(TestDecimalSql, Floor) {
+ // expected, input, overflow
+ std::vector<std::tuple<BasicDecimal128, DecimalScalar128, bool>> test_values = {
+ // https://dev.mysql.com/doc/refman/5.7/en/mathematical-functions.html#function_floor
+ std::make_tuple(1, DecimalScalar128{123, 38, 2}, false),
+ std::make_tuple(-2, DecimalScalar128{-123, 38, 2}, false),
+
+ // border cases
+ std::make_tuple(BasicDecimal128{INT64_MIN / 100 - 1},
+ DecimalScalar128{INT64_MIN, 38, 2}, false),
+
+ std::make_tuple(INT64_MIN, DecimalScalar128{INT64_MIN, 38, 0}, false),
+ std::make_tuple(BasicDecimal128{0, 0}, DecimalScalar128{0, 0, 38, 2}, false),
+ std::make_tuple(INT64_MAX, DecimalScalar128{INT64_MAX, 38, 0}, false),
+
+ std::make_tuple(BasicDecimal128{INT64_MAX / 100},
+ DecimalScalar128{INT64_MAX, 38, 2}, false),
+
+ // large scales
+ std::make_tuple(BasicDecimal128{0, 0}, DecimalScalar128{12345, 38, 16}, false),
+ std::make_tuple(
+ BasicDecimal128{123},
+ DecimalScalar128{BasicDecimal128{12389}.IncreaseScaleBy(14), 38, 16}, false),
+ std::make_tuple(
+ BasicDecimal128{-124},
+ DecimalScalar128{BasicDecimal128{-12389}.IncreaseScaleBy(14), 38, 16}, false),
+ std::make_tuple(
+ BasicDecimal128{123},
+ DecimalScalar128{BasicDecimal128{12389}.IncreaseScaleBy(30), 38, 32}, false),
+ std::make_tuple(
+ BasicDecimal128{-124},
+ DecimalScalar128{BasicDecimal128{-12389}.IncreaseScaleBy(30), 38, 32}, false),
+ };
+
+ for (auto iter : test_values) {
+ auto expected = std::get<0>(iter);
+ auto input = std::get<1>(iter);
+ auto expected_overflow = std::get<2>(iter);
+ bool overflow = false;
+
+ EXPECT_EQ(expected, decimalops::Floor(input, &overflow))
+ << " failed on input " << input;
+ if (expected_overflow) {
+ ASSERT_TRUE(overflow) << "overflow expected for input " << input;
+ } else {
+ ASSERT_FALSE(overflow) << "overflow not expected for input " << input;
+ }
+ }
+}
+
+TEST_F(TestDecimalSql, Convert) {
+ // expected, input, overflow
+ std::vector<std::tuple<DecimalScalar128, DecimalScalar128, bool>> test_values = {
+ // simple cases
+ std::make_tuple(DecimalScalar128{12, 38, 1}, DecimalScalar128{123, 38, 2}, false),
+ std::make_tuple(DecimalScalar128{1230, 38, 3}, DecimalScalar128{123, 38, 2}, false),
+ std::make_tuple(DecimalScalar128{123, 38, 2}, DecimalScalar128{123, 38, 2}, false),
+
+ std::make_tuple(DecimalScalar128{-12, 38, 1}, DecimalScalar128{-123, 38, 2}, false),
+ std::make_tuple(DecimalScalar128{-1230, 38, 3}, DecimalScalar128{-123, 38, 2},
+ false),
+ std::make_tuple(DecimalScalar128{-123, 38, 2}, DecimalScalar128{-123, 38, 2},
+ false),
+
+ // border cases
+ std::make_tuple(
+ DecimalScalar128{BasicDecimal128(INT64_MIN).ReduceScaleBy(1), 38, 1},
+ DecimalScalar128{INT64_MIN, 38, 2}, false),
+ std::make_tuple(
+ DecimalScalar128{BasicDecimal128(INT64_MIN).IncreaseScaleBy(1), 38, 3},
+ DecimalScalar128{INT64_MIN, 38, 2}, false),
+ std::make_tuple(DecimalScalar128{-3, 38, 1}, DecimalScalar128{-32, 38, 2}, false),
+ std::make_tuple(DecimalScalar128{0, 0, 38, 1}, DecimalScalar128{0, 0, 38, 2},
+ false),
+ std::make_tuple(DecimalScalar128{3, 38, 1}, DecimalScalar128{32, 38, 2}, false),
+ std::make_tuple(
+ DecimalScalar128{BasicDecimal128(INT64_MAX).ReduceScaleBy(1), 38, 1},
+ DecimalScalar128{INT64_MAX, 38, 2}, false),
+ std::make_tuple(
+ DecimalScalar128{BasicDecimal128(INT64_MAX).IncreaseScaleBy(1), 38, 3},
+ DecimalScalar128{INT64_MAX, 38, 2}, false),
+
+ // large scales
+ std::make_tuple(DecimalScalar128{BasicDecimal128(123).IncreaseScaleBy(16), 38, 18},
+ DecimalScalar128{123, 38, 2}, false),
+ std::make_tuple(DecimalScalar128{BasicDecimal128(-123).IncreaseScaleBy(16), 38, 18},
+ DecimalScalar128{-123, 38, 2}, false),
+ std::make_tuple(DecimalScalar128{BasicDecimal128(123).IncreaseScaleBy(30), 38, 32},
+ DecimalScalar128{123, 38, 2}, false),
+ std::make_tuple(DecimalScalar128{BasicDecimal128(-123).IncreaseScaleBy(30), 38, 32},
+ DecimalScalar128{-123, 38, 2}, false),
+
+ // overflow due to scaling up.
+ std::make_tuple(DecimalScalar128{0, 0, 38, 36}, DecimalScalar128{12345, 38, 2},
+ true),
+ std::make_tuple(DecimalScalar128{0, 0, 38, 36}, DecimalScalar128{-12345, 38, 2},
+ true),
+
+ // overflow due to precision.
+ std::make_tuple(DecimalScalar128{0, 0, 5, 3}, DecimalScalar128{12345, 5, 2}, true),
+ };
+
+ for (auto iter : test_values) {
+ auto expected = std::get<0>(iter);
+ auto input = std::get<1>(iter);
+ auto expected_overflow = std::get<2>(iter);
+ bool overflow = false;
+
+ EXPECT_EQ(expected.value(), decimalops::Convert(input, expected.precision(),
+ expected.scale(), &overflow))
+ << " failed on input " << input;
+
+ if (expected_overflow) {
+ ASSERT_TRUE(overflow) << "overflow expected for input " << input;
+ } else {
+ ASSERT_FALSE(overflow) << "overflow not expected for input " << input;
+ }
+ }
+}
+
+// double can store up to this integer value without losing precision
+static const int64_t kMaxDoubleInt = 1ull << 53;
+
+TEST_F(TestDecimalSql, FromDouble) {
+ // expected, input, overflow
+ std::vector<std::tuple<DecimalScalar128, double, bool>> test_values = {
+ // simple cases
+ std::make_tuple(DecimalScalar128{-16285, 38, 3}, -16.285, false),
+ std::make_tuple(DecimalScalar128{-162850, 38, 4}, -16.285, false),
+ std::make_tuple(DecimalScalar128{-1629, 38, 2}, -16.285, false),
+
+ std::make_tuple(DecimalScalar128{16285, 38, 3}, 16.285, false),
+ std::make_tuple(DecimalScalar128{162850, 38, 4}, 16.285, false),
+ std::make_tuple(DecimalScalar128{1629, 38, 2}, 16.285, false),
+
+ // round up
+ std::make_tuple(DecimalScalar128{1, 18, 0}, 1.15470053838, false),
+ std::make_tuple(DecimalScalar128{-1, 18, 0}, -1.15470053838, false),
+ std::make_tuple(DecimalScalar128{2, 18, 0}, 1.55470053838, false),
+ std::make_tuple(DecimalScalar128{-2, 18, 0}, -1.55470053838, false),
+
+ // border cases
+ std::make_tuple(DecimalScalar128{-kMaxDoubleInt, 38, 0},
+ static_cast<double>(-kMaxDoubleInt), false),
+ std::make_tuple(DecimalScalar128{-32, 38, 0}, -32, false),
+ std::make_tuple(DecimalScalar128{0, 0, 38, 0}, 0, false),
+ std::make_tuple(DecimalScalar128{32, 38, 0}, 32, false),
+ std::make_tuple(DecimalScalar128{kMaxDoubleInt, 38, 0},
+ static_cast<double>(kMaxDoubleInt), false),
+
+ // large scales
+ std::make_tuple(DecimalScalar128{123, 38, 16}, 1.23E-14, false),
+ std::make_tuple(DecimalScalar128{123, 38, 32}, 1.23E-30, false),
+ std::make_tuple(DecimalScalar128{1230, 38, 33}, 1.23E-30, false),
+ std::make_tuple(DecimalScalar128{123, 38, 38}, 1.23E-36, false),
+
+ // very small doubles
+ std::make_tuple(DecimalScalar128{0, 0, 38, 0}, std::numeric_limits<double>::min(),
+ false),
+ std::make_tuple(DecimalScalar128{0, 0, 38, 0}, -std::numeric_limits<double>::min(),
+ false),
+
+ // overflow due to large -ve double
+ std::make_tuple(DecimalScalar128{0, 0, 38, 0}, -std::numeric_limits<double>::max(),
+ true),
+ // overflow due to large +ve double
+ std::make_tuple(DecimalScalar128{0, 0, 38, 0}, std::numeric_limits<double>::max(),
+ true),
+ // overflow due to scaling up.
+ std::make_tuple(DecimalScalar128{0, 0, 38, 36}, 123.45, true),
+ // overflow due to precision.
+ std::make_tuple(DecimalScalar128{0, 0, 4, 2}, 12345.67, true),
+ };
+
+ for (auto iter : test_values) {
+ auto dscalar = std::get<0>(iter);
+ auto input = std::get<1>(iter);
+ auto expected_overflow = std::get<2>(iter);
+ bool overflow = false;
+
+ EXPECT_EQ(dscalar.value(), decimalops::FromDouble(input, dscalar.precision(),
+ dscalar.scale(), &overflow))
+ << " failed on input " << input;
+
+ if (expected_overflow) {
+ ASSERT_TRUE(overflow) << "overflow expected for input " << input;
+ } else {
+ ASSERT_FALSE(overflow) << "overflow not expected for input " << input;
+ }
+ }
+}
+
+#define EXPECT_FUZZY_EQ(x, y) \
+ EXPECT_TRUE(x - y <= 0.00001) << "expected " << x << ", got " << y
+
+TEST_F(TestDecimalSql, ToDouble) {
+ // expected, input, overflow
+ std::vector<std::tuple<double, DecimalScalar128>> test_values = {
+ // simple ones
+ std::make_tuple(-16.285, DecimalScalar128{-16285, 38, 3}),
+ std::make_tuple(-162.85, DecimalScalar128{-16285, 38, 2}),
+ std::make_tuple(-1.6285, DecimalScalar128{-16285, 38, 4}),
+
+ // large scales
+ std::make_tuple(1.23E-14, DecimalScalar128{123, 38, 16}),
+ std::make_tuple(1.23E-30, DecimalScalar128{123, 38, 32}),
+ std::make_tuple(1.23E-36, DecimalScalar128{123, 38, 38}),
+
+ // border cases
+ std::make_tuple(static_cast<double>(-kMaxDoubleInt),
+ DecimalScalar128{-kMaxDoubleInt, 38, 0}),
+ std::make_tuple(-32, DecimalScalar128{-32, 38, 0}),
+ std::make_tuple(0, DecimalScalar128{0, 0, 38, 0}),
+ std::make_tuple(32, DecimalScalar128{32, 38, 0}),
+ std::make_tuple(static_cast<double>(kMaxDoubleInt),
+ DecimalScalar128{kMaxDoubleInt, 38, 0}),
+ };
+ for (auto iter : test_values) {
+ auto input = std::get<1>(iter);
+ bool overflow = false;
+
+ EXPECT_FUZZY_EQ(std::get<0>(iter), decimalops::ToDouble(input, &overflow));
+ ASSERT_FALSE(overflow) << "overflow not expected for input " << input;
+ }
+}
+
+TEST_F(TestDecimalSql, FromInt64) {
+ // expected, input, overflow
+ std::vector<std::tuple<DecimalScalar128, int64_t, bool>> test_values = {
+ // simple cases
+ std::make_tuple(DecimalScalar128{-16000, 38, 3}, -16, false),
+ std::make_tuple(DecimalScalar128{-160000, 38, 4}, -16, false),
+ std::make_tuple(DecimalScalar128{-1600, 38, 2}, -16, false),
+
+ std::make_tuple(DecimalScalar128{16000, 38, 3}, 16, false),
+ std::make_tuple(DecimalScalar128{160000, 38, 4}, 16, false),
+ std::make_tuple(DecimalScalar128{1600, 38, 2}, 16, false),
+
+ // border cases
+ std::make_tuple(DecimalScalar128{INT64_MIN, 38, 0}, INT64_MIN, false),
+ std::make_tuple(DecimalScalar128{-32, 38, 0}, -32, false),
+ std::make_tuple(DecimalScalar128{0, 0, 38, 0}, 0, false),
+ std::make_tuple(DecimalScalar128{32, 38, 0}, 32, false),
+ std::make_tuple(DecimalScalar128{INT64_MAX, 38, 0}, INT64_MAX, false),
+
+ // large scales
+ std::make_tuple(DecimalScalar128{BasicDecimal128(123).IncreaseScaleBy(16), 38, 16},
+ 123, false),
+ std::make_tuple(DecimalScalar128{BasicDecimal128(123).IncreaseScaleBy(32), 38, 32},
+ 123, false),
+ std::make_tuple(DecimalScalar128{BasicDecimal128(-123).IncreaseScaleBy(16), 38, 16},
+ -123, false),
+ std::make_tuple(DecimalScalar128{BasicDecimal128(-123).IncreaseScaleBy(32), 38, 32},
+ -123, false),
+
+ // overflow due to scaling up.
+ std::make_tuple(DecimalScalar128{0, 0, 38, 36}, 123, true),
+ // overflow due to precision.
+ std::make_tuple(DecimalScalar128{0, 0, 4, 2}, 12345, true),
+ };
+
+ for (auto iter : test_values) {
+ auto dscalar = std::get<0>(iter);
+ auto input = std::get<1>(iter);
+ auto expected_overflow = std::get<2>(iter);
+ bool overflow = false;
+
+ EXPECT_EQ(dscalar.value(), decimalops::FromInt64(input, dscalar.precision(),
+ dscalar.scale(), &overflow))
+ << " failed on input " << input;
+
+ if (expected_overflow) {
+ ASSERT_TRUE(overflow) << "overflow expected for input " << input;
+ } else {
+ ASSERT_FALSE(overflow) << "overflow not expected for input " << input;
+ }
+ }
+}
+
+TEST_F(TestDecimalSql, ToInt64) {
+ // expected, input, overflow
+ std::vector<std::tuple<int64_t, DecimalScalar128, bool>> test_values = {
+ // simple ones
+ std::make_tuple(-16, DecimalScalar128{-16285, 38, 3}, false),
+ std::make_tuple(-163, DecimalScalar128{-16285, 38, 2}, false),
+ std::make_tuple(-2, DecimalScalar128{-16285, 38, 4}, false),
+
+ // border cases
+ std::make_tuple(INT64_MIN, DecimalScalar128{INT64_MIN, 38, 0}, false),
+ std::make_tuple(-32, DecimalScalar128{-32, 38, 0}, false),
+ std::make_tuple(0, DecimalScalar128{0, 0, 38, 0}, false),
+ std::make_tuple(32, DecimalScalar128{32, 38, 0}, false),
+ std::make_tuple(INT64_MAX, DecimalScalar128{INT64_MAX, 38, 0}, false),
+
+ // large scales
+ std::make_tuple(0, DecimalScalar128{123, 38, 16}, false),
+ std::make_tuple(0, DecimalScalar128{123, 38, 32}, false),
+ std::make_tuple(0, DecimalScalar128{123, 38, 38}, false),
+
+ // overflow test cases
+ // very large
+ std::make_tuple(0, DecimalScalar128{32768, 16, 38, 2}, true),
+ std::make_tuple(0, DecimalScalar128{INT64_MAX, UINT64_MAX, 38, 10}, true),
+ // very small
+ std::make_tuple(0, -DecimalScalar128{32768, 16, 38, 2}, true),
+ std::make_tuple(0, -DecimalScalar128{INT64_MAX, UINT64_MAX, 38, 10}, true),
+ };
+
+ for (auto iter : test_values) {
+ auto expected_value = std::get<0>(iter);
+ auto input = std::get<1>(iter);
+ auto expected_overflow = std::get<2>(iter);
+ bool overflow = false;
+
+ EXPECT_EQ(expected_value, decimalops::ToInt64(input, &overflow))
+ << " failed on input " << input;
+ if (expected_overflow) {
+ ASSERT_TRUE(overflow) << "overflow expected for input " << input;
+ } else {
+ ASSERT_FALSE(overflow) << "overflow not expected for input " << input;
+ }
+ }
+}
+
+} // namespace gandiva
diff --git a/src/arrow/cpp/src/gandiva/precompiled/decimal_wrapper.cc b/src/arrow/cpp/src/gandiva/precompiled/decimal_wrapper.cc
new file mode 100644
index 000000000..082d5832d
--- /dev/null
+++ b/src/arrow/cpp/src/gandiva/precompiled/decimal_wrapper.cc
@@ -0,0 +1,433 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gandiva/precompiled/decimal_ops.h"
+#include "gandiva/precompiled/types.h"
+
+extern "C" {
+
+FORCE_INLINE
+void add_large_decimal128_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale, int64_t y_high, uint64_t y_low,
+ int32_t y_precision, int32_t y_scale,
+ int32_t out_precision, int32_t out_scale,
+ int64_t* out_high, uint64_t* out_low) {
+ gandiva::BasicDecimalScalar128 x(x_high, x_low, x_precision, x_scale);
+ gandiva::BasicDecimalScalar128 y(y_high, y_low, y_precision, y_scale);
+
+ arrow::BasicDecimal128 out = gandiva::decimalops::Add(x, y, out_precision, out_scale);
+ *out_high = out.high_bits();
+ *out_low = out.low_bits();
+}
+
+FORCE_INLINE
+void multiply_decimal128_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale, int64_t y_high, uint64_t y_low,
+ int32_t y_precision, int32_t y_scale,
+ int32_t out_precision, int32_t out_scale,
+ int64_t* out_high, uint64_t* out_low) {
+ gandiva::BasicDecimalScalar128 x(x_high, x_low, x_precision, x_scale);
+ gandiva::BasicDecimalScalar128 y(y_high, y_low, y_precision, y_scale);
+ bool overflow;
+
+ // TODO ravindra: generate error on overflows (ARROW-4570).
+ arrow::BasicDecimal128 out =
+ gandiva::decimalops::Multiply(x, y, out_precision, out_scale, &overflow);
+ *out_high = out.high_bits();
+ *out_low = out.low_bits();
+}
+
+FORCE_INLINE
+void divide_decimal128_decimal128(int64_t context, int64_t x_high, uint64_t x_low,
+ int32_t x_precision, int32_t x_scale, int64_t y_high,
+ uint64_t y_low, int32_t y_precision, int32_t y_scale,
+ int32_t out_precision, int32_t out_scale,
+ int64_t* out_high, uint64_t* out_low) {
+ gandiva::BasicDecimalScalar128 x(x_high, x_low, x_precision, x_scale);
+ gandiva::BasicDecimalScalar128 y(y_high, y_low, y_precision, y_scale);
+ bool overflow;
+
+ // TODO ravindra: generate error on overflows (ARROW-4570).
+ arrow::BasicDecimal128 out =
+ gandiva::decimalops::Divide(context, x, y, out_precision, out_scale, &overflow);
+ *out_high = out.high_bits();
+ *out_low = out.low_bits();
+}
+
+FORCE_INLINE
+void mod_decimal128_decimal128(int64_t context, int64_t x_high, uint64_t x_low,
+ int32_t x_precision, int32_t x_scale, int64_t y_high,
+ uint64_t y_low, int32_t y_precision, int32_t y_scale,
+ int32_t out_precision, int32_t out_scale,
+ int64_t* out_high, uint64_t* out_low) {
+ gandiva::BasicDecimalScalar128 x(x_high, x_low, x_precision, x_scale);
+ gandiva::BasicDecimalScalar128 y(y_high, y_low, y_precision, y_scale);
+ bool overflow;
+
+ // TODO ravindra: generate error on overflows (ARROW-4570).
+ arrow::BasicDecimal128 out =
+ gandiva::decimalops::Mod(context, x, y, out_precision, out_scale, &overflow);
+ *out_high = out.high_bits();
+ *out_low = out.low_bits();
+}
+
+FORCE_INLINE
+int32_t compare_decimal128_decimal128_internal(int64_t x_high, uint64_t x_low,
+ int32_t x_precision, int32_t x_scale,
+ int64_t y_high, uint64_t y_low,
+ int32_t y_precision, int32_t y_scale) {
+ gandiva::BasicDecimalScalar128 x(x_high, x_low, x_precision, x_scale);
+ gandiva::BasicDecimalScalar128 y(y_high, y_low, y_precision, y_scale);
+
+ return gandiva::decimalops::Compare(x, y);
+}
+
+FORCE_INLINE
+void abs_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision, int32_t x_scale,
+ int32_t out_precision, int32_t out_scale, int64_t* out_high,
+ uint64_t* out_low) {
+ gandiva::BasicDecimal128 x(x_high, x_low);
+ x.Abs();
+ *out_high = x.high_bits();
+ *out_low = x.low_bits();
+}
+
+FORCE_INLINE
+void ceil_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision, int32_t x_scale,
+ int32_t out_precision, int32_t out_scale, int64_t* out_high,
+ uint64_t* out_low) {
+ gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale);
+
+ bool overflow = false;
+ auto out = gandiva::decimalops::Ceil(x, &overflow);
+ *out_high = out.high_bits();
+ *out_low = out.low_bits();
+}
+
+FORCE_INLINE
+void floor_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale, int32_t out_precision, int32_t out_scale,
+ int64_t* out_high, uint64_t* out_low) {
+ gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale);
+
+ bool overflow = false;
+ auto out = gandiva::decimalops::Floor(x, &overflow);
+ *out_high = out.high_bits();
+ *out_low = out.low_bits();
+}
+
+FORCE_INLINE
+void round_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale, int32_t out_precision, int32_t out_scale,
+ int64_t* out_high, uint64_t* out_low) {
+ gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale);
+
+ bool overflow = false;
+ auto out = gandiva::decimalops::Round(x, out_precision, 0, 0, &overflow);
+ *out_high = out.high_bits();
+ *out_low = out.low_bits();
+}
+
+FORCE_INLINE
+void round_decimal128_int32(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale, int32_t rounding_scale,
+ int32_t out_precision, int32_t out_scale, int64_t* out_high,
+ uint64_t* out_low) {
+ gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale);
+
+ bool overflow = false;
+ auto out =
+ gandiva::decimalops::Round(x, out_precision, out_scale, rounding_scale, &overflow);
+ *out_high = out.high_bits();
+ *out_low = out.low_bits();
+}
+
+FORCE_INLINE
+void truncate_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale, int32_t out_precision, int32_t out_scale,
+ int64_t* out_high, uint64_t* out_low) {
+ gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale);
+
+ bool overflow = false;
+ auto out = gandiva::decimalops::Truncate(x, out_precision, 0, 0, &overflow);
+ *out_high = out.high_bits();
+ *out_low = out.low_bits();
+}
+
+FORCE_INLINE
+void truncate_decimal128_int32(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale, int32_t rounding_scale,
+ int32_t out_precision, int32_t out_scale,
+ int64_t* out_high, uint64_t* out_low) {
+ gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale);
+
+ bool overflow = false;
+ auto out = gandiva::decimalops::Truncate(x, out_precision, out_scale, rounding_scale,
+ &overflow);
+ *out_high = out.high_bits();
+ *out_low = out.low_bits();
+}
+
+FORCE_INLINE
+double castFLOAT8_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale) {
+ gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale);
+
+ bool overflow = false;
+ return gandiva::decimalops::ToDouble(x, &overflow);
+}
+
+FORCE_INLINE
+int64_t castBIGINT_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale) {
+ gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale);
+
+ bool overflow = false;
+ return gandiva::decimalops::ToInt64(x, &overflow);
+}
+
+FORCE_INLINE
+void castDECIMAL_int64(int64_t in, int32_t x_precision, int32_t x_scale,
+ int64_t* out_high, uint64_t* out_low) {
+ bool overflow = false;
+ auto out = gandiva::decimalops::FromInt64(in, x_precision, x_scale, &overflow);
+ *out_high = out.high_bits();
+ *out_low = out.low_bits();
+}
+
+FORCE_INLINE
+void castDECIMAL_int32(int32_t in, int32_t x_precision, int32_t x_scale,
+ int64_t* out_high, uint64_t* out_low) {
+ castDECIMAL_int64(in, x_precision, x_scale, out_high, out_low);
+}
+
+FORCE_INLINE
+void castDECIMAL_float64(double in, int32_t x_precision, int32_t x_scale,
+ int64_t* out_high, uint64_t* out_low) {
+ bool overflow = false;
+ auto out = gandiva::decimalops::FromDouble(in, x_precision, x_scale, &overflow);
+ *out_high = out.high_bits();
+ *out_low = out.low_bits();
+}
+
+FORCE_INLINE
+void castDECIMAL_float32(float in, int32_t x_precision, int32_t x_scale,
+ int64_t* out_high, uint64_t* out_low) {
+ castDECIMAL_float64(in, x_precision, x_scale, out_high, out_low);
+}
+
+FORCE_INLINE
+bool castDecimal_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale, int32_t out_precision, int32_t out_scale,
+ int64_t* out_high, int64_t* out_low) {
+ gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale);
+ bool overflow = false;
+ auto out = gandiva::decimalops::Convert(x, out_precision, out_scale, &overflow);
+ *out_high = out.high_bits();
+ *out_low = out.low_bits();
+ return overflow;
+}
+
+FORCE_INLINE
+void castDECIMAL_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale, int32_t out_precision, int32_t out_scale,
+ int64_t* out_high, int64_t* out_low) {
+ castDecimal_internal(x_high, x_low, x_precision, x_scale, out_precision, out_scale,
+ out_high, out_low);
+}
+
+FORCE_INLINE
+void castDECIMALNullOnOverflow_decimal128(int64_t x_high, uint64_t x_low,
+ int32_t x_precision, int32_t x_scale,
+ bool x_isvalid, bool* out_valid,
+ int32_t out_precision, int32_t out_scale,
+ int64_t* out_high, int64_t* out_low) {
+ *out_valid = true;
+
+ if (!x_isvalid) {
+ *out_valid = false;
+ return;
+ }
+
+ if (castDecimal_internal(x_high, x_low, x_precision, x_scale, out_precision, out_scale,
+ out_high, out_low)) {
+ *out_valid = false;
+ }
+}
+
+FORCE_INLINE
+int32_t hash32_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale, gdv_boolean x_isvalid) {
+ return x_isvalid
+ ? hash32_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, 0)
+ : 0;
+}
+
+FORCE_INLINE
+int32_t hash_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale, gdv_boolean x_isvalid) {
+ return hash32_decimal128(x_high, x_low, x_precision, x_scale, x_isvalid);
+}
+
+FORCE_INLINE
+int64_t hash64_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale, gdv_boolean x_isvalid) {
+ return x_isvalid
+ ? hash64_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, 0)
+ : 0;
+}
+
+FORCE_INLINE
+int32_t hash32WithSeed_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale, gdv_boolean x_isvalid, int32_t seed,
+ gdv_boolean seed_isvalid) {
+ if (!x_isvalid) {
+ return seed;
+ }
+ return hash32_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, seed);
+}
+
+FORCE_INLINE
+int64_t hash64WithSeed_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale, gdv_boolean x_isvalid, int64_t seed,
+ gdv_boolean seed_isvalid) {
+ if (!x_isvalid) {
+ return seed;
+ }
+ return hash64_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, seed);
+}
+
+FORCE_INLINE
+int32_t hash32AsDouble_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale, gdv_boolean x_isvalid) {
+ return x_isvalid
+ ? hash32_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, 0)
+ : 0;
+}
+
+FORCE_INLINE
+int64_t hash64AsDouble_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale, gdv_boolean x_isvalid) {
+ return x_isvalid
+ ? hash64_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, 0)
+ : 0;
+}
+
+FORCE_INLINE
+int32_t hash32AsDoubleWithSeed_decimal128(int64_t x_high, uint64_t x_low,
+ int32_t x_precision, int32_t x_scale,
+ gdv_boolean x_isvalid, int32_t seed,
+ gdv_boolean seed_isvalid) {
+ if (!x_isvalid) {
+ return seed;
+ }
+ return hash32_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, seed);
+}
+
+FORCE_INLINE
+int64_t hash64AsDoubleWithSeed_decimal128(int64_t x_high, uint64_t x_low,
+ int32_t x_precision, int32_t x_scale,
+ gdv_boolean x_isvalid, int64_t seed,
+ gdv_boolean seed_isvalid) {
+ if (!x_isvalid) {
+ return seed;
+ }
+ return hash64_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, seed);
+}
+
+FORCE_INLINE
+gdv_boolean isnull_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale, gdv_boolean x_isvalid) {
+ return !x_isvalid;
+}
+
+FORCE_INLINE
+gdv_boolean isnotnull_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale, gdv_boolean x_isvalid) {
+ return x_isvalid;
+}
+
+FORCE_INLINE
+gdv_boolean isnumeric_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+ int32_t x_scale, gdv_boolean x_isvalid) {
+ return x_isvalid;
+}
+
+FORCE_INLINE
+gdv_boolean is_not_distinct_from_decimal128_decimal128(
+ int64_t x_high, uint64_t x_low, int32_t x_precision, int32_t x_scale,
+ gdv_boolean x_isvalid, int64_t y_high, uint64_t y_low, int32_t y_precision,
+ int32_t y_scale, gdv_boolean y_isvalid) {
+ if (x_isvalid != y_isvalid) {
+ return false;
+ }
+ if (!x_isvalid) {
+ return true;
+ }
+ return 0 == compare_decimal128_decimal128_internal(x_high, x_low, x_precision, x_scale,
+ y_high, y_low, y_precision, y_scale);
+}
+
+FORCE_INLINE
+gdv_boolean is_distinct_from_decimal128_decimal128(int64_t x_high, uint64_t x_low,
+ int32_t x_precision, int32_t x_scale,
+ gdv_boolean x_isvalid, int64_t y_high,
+ uint64_t y_low, int32_t y_precision,
+ int32_t y_scale,
+ gdv_boolean y_isvalid) {
+ return !is_not_distinct_from_decimal128_decimal128(x_high, x_low, x_precision, x_scale,
+ x_isvalid, y_high, y_low,
+ y_precision, y_scale, y_isvalid);
+}
+
+FORCE_INLINE
+void castDECIMAL_utf8(int64_t context, const char* in, int32_t in_length,
+ int32_t out_precision, int32_t out_scale, int64_t* out_high,
+ uint64_t* out_low) {
+ int64_t dec_high_from_str;
+ uint64_t dec_low_from_str;
+ int32_t precision_from_str;
+ int32_t scale_from_str;
+ int32_t status =
+ gdv_fn_dec_from_string(context, in, in_length, &precision_from_str, &scale_from_str,
+ &dec_high_from_str, &dec_low_from_str);
+ if (status != 0) {
+ return;
+ }
+
+ gandiva::BasicDecimalScalar128 x({dec_high_from_str, dec_low_from_str},
+ precision_from_str, scale_from_str);
+ bool overflow = false;
+ auto out = gandiva::decimalops::Convert(x, out_precision, out_scale, &overflow);
+ *out_high = out.high_bits();
+ *out_low = out.low_bits();
+}
+
+FORCE_INLINE
+char* castVARCHAR_decimal128_int64(int64_t context, int64_t x_high, uint64_t x_low,
+ int32_t x_precision, int32_t x_scale,
+ int64_t out_len_param, int32_t* out_length) {
+ int32_t full_dec_str_len;
+ char* dec_str =
+ gdv_fn_dec_to_string(context, x_high, x_low, x_scale, &full_dec_str_len);
+ int32_t trunc_dec_str_len =
+ out_len_param < full_dec_str_len ? out_len_param : full_dec_str_len;
+ *out_length = trunc_dec_str_len;
+ return dec_str;
+}
+
+} // extern "C"
diff --git a/src/arrow/cpp/src/gandiva/precompiled/epoch_time_point.h b/src/arrow/cpp/src/gandiva/precompiled/epoch_time_point.h
new file mode 100644
index 000000000..45cfb28ca
--- /dev/null
+++ b/src/arrow/cpp/src/gandiva/precompiled/epoch_time_point.h
@@ -0,0 +1,118 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+// TODO(wesm): IR compilation does not have any include directories set
+#include "../../arrow/vendored/datetime/date.h"
+
+bool is_leap_year(int yy);
+bool did_days_overflow(arrow_vendored::date::year_month_day ymd);
+int last_possible_day_in_month(int month, int year);
+
+// A point of time measured in millis since epoch.
+class EpochTimePoint {
+ public:
+ explicit EpochTimePoint(std::chrono::milliseconds millis_since_epoch)
+ : tp_(millis_since_epoch) {}
+
+ explicit EpochTimePoint(int64_t millis_since_epoch)
+ : EpochTimePoint(std::chrono::milliseconds(millis_since_epoch)) {}
+
+ int TmYear() const { return static_cast<int>(YearMonthDay().year()) - 1900; }
+
+ int TmMon() const { return static_cast<unsigned int>(YearMonthDay().month()) - 1; }
+
+ int TmYday() const {
+ auto to_days = arrow_vendored::date::floor<arrow_vendored::date::days>(tp_);
+ auto first_day_in_year = arrow_vendored::date::sys_days{
+ YearMonthDay().year() / arrow_vendored::date::jan / 1};
+ return (to_days - first_day_in_year).count();
+ }
+
+ int TmMday() const { return static_cast<unsigned int>(YearMonthDay().day()); }
+
+ int TmWday() const {
+ auto to_days = arrow_vendored::date::floor<arrow_vendored::date::days>(tp_);
+ return (arrow_vendored::date::weekday{to_days} - // NOLINT
+ arrow_vendored::date::Sunday)
+ .count();
+ }
+
+ int TmHour() const { return static_cast<int>(TimeOfDay().hours().count()); }
+
+ int TmMin() const { return static_cast<int>(TimeOfDay().minutes().count()); }
+
+ int TmSec() const {
+ // TODO(wesm): UNIX y2k issue on int=gdv_int32 platforms
+ return static_cast<int>(TimeOfDay().seconds().count());
+ }
+
+ EpochTimePoint AddYears(int num_years) const {
+ auto ymd = YearMonthDay() + arrow_vendored::date::years(num_years);
+ return EpochTimePoint((arrow_vendored::date::sys_days{ymd} + // NOLINT
+ TimeOfDay().to_duration())
+ .time_since_epoch());
+ }
+
+ EpochTimePoint AddMonths(int num_months) const {
+ auto ymd = YearMonthDay() + arrow_vendored::date::months(num_months);
+
+ EpochTimePoint tp = EpochTimePoint((arrow_vendored::date::sys_days{ymd} + // NOLINT
+ TimeOfDay().to_duration())
+ .time_since_epoch());
+
+ if (did_days_overflow(ymd)) {
+ int days_to_offset =
+ last_possible_day_in_month(static_cast<int>(ymd.year()),
+ static_cast<unsigned int>(ymd.month())) -
+ static_cast<unsigned int>(ymd.day());
+ tp = tp.AddDays(days_to_offset);
+ }
+ return tp;
+ }
+
+ EpochTimePoint AddDays(int num_days) const {
+ auto days_since_epoch = arrow_vendored::date::sys_days{YearMonthDay()} + // NOLINT
+ arrow_vendored::date::days(num_days);
+ return EpochTimePoint(
+ (days_since_epoch + TimeOfDay().to_duration()).time_since_epoch());
+ }
+
+ EpochTimePoint ClearTimeOfDay() const {
+ return EpochTimePoint((tp_ - TimeOfDay().to_duration()).time_since_epoch());
+ }
+
+ bool operator==(const EpochTimePoint& other) const { return tp_ == other.tp_; }
+
+ int64_t MillisSinceEpoch() const { return tp_.time_since_epoch().count(); }
+
+ arrow_vendored::date::time_of_day<std::chrono::milliseconds> TimeOfDay() const {
+ auto millis_since_midnight =
+ tp_ - arrow_vendored::date::floor<arrow_vendored::date::days>(tp_);
+ return arrow_vendored::date::time_of_day<std::chrono::milliseconds>(
+ millis_since_midnight);
+ }
+
+ private:
+ arrow_vendored::date::year_month_day YearMonthDay() const {
+ return arrow_vendored::date::year_month_day{
+ arrow_vendored::date::floor<arrow_vendored::date::days>(tp_)}; // NOLINT
+ }
+
+ std::chrono::time_point<std::chrono::system_clock, std::chrono::milliseconds> tp_;
+};
diff --git a/src/arrow/cpp/src/gandiva/precompiled/epoch_time_point_test.cc b/src/arrow/cpp/src/gandiva/precompiled/epoch_time_point_test.cc
new file mode 100644
index 000000000..9180aac07
--- /dev/null
+++ b/src/arrow/cpp/src/gandiva/precompiled/epoch_time_point_test.cc
@@ -0,0 +1,103 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <ctime>
+
+#include <gtest/gtest.h>
+#include "./epoch_time_point.h"
+#include "gandiva/precompiled/testing.h"
+#include "gandiva/precompiled/types.h"
+
+#include "gandiva/date_utils.h"
+
+namespace gandiva {
+
+TEST(TestEpochTimePoint, TestTm) {
+ auto ts = StringToTimestamp("2015-05-07 10:20:34");
+ EpochTimePoint tp(ts);
+
+ struct tm* tm_ptr;
+#if defined(_WIN32)
+ __time64_t tsec = ts / 1000;
+ tm_ptr = _gmtime64(&tsec);
+#else
+ struct tm tm;
+ time_t tsec = ts / 1000;
+ tm_ptr = gmtime_r(&tsec, &tm);
+#endif
+
+ EXPECT_EQ(tp.TmYear(), tm_ptr->tm_year);
+ EXPECT_EQ(tp.TmMon(), tm_ptr->tm_mon);
+ EXPECT_EQ(tp.TmYday(), tm_ptr->tm_yday);
+ EXPECT_EQ(tp.TmMday(), tm_ptr->tm_mday);
+ EXPECT_EQ(tp.TmWday(), tm_ptr->tm_wday);
+ EXPECT_EQ(tp.TmHour(), tm_ptr->tm_hour);
+ EXPECT_EQ(tp.TmMin(), tm_ptr->tm_min);
+ EXPECT_EQ(tp.TmSec(), tm_ptr->tm_sec);
+}
+
+TEST(TestEpochTimePoint, TestAddYears) {
+ EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).AddYears(2),
+ EpochTimePoint(StringToTimestamp("2017-05-05 10:20:34")));
+
+ EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).AddYears(0),
+ EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")));
+
+ EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).AddYears(-1),
+ EpochTimePoint(StringToTimestamp("2014-05-05 10:20:34")));
+}
+
+TEST(TestEpochTimePoint, TestAddMonths) {
+ EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).AddMonths(2),
+ EpochTimePoint(StringToTimestamp("2015-07-05 10:20:34")));
+
+ EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).AddMonths(11),
+ EpochTimePoint(StringToTimestamp("2016-04-05 10:20:34")));
+
+ EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).AddMonths(0),
+ EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")));
+
+ EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).AddMonths(-1),
+ EpochTimePoint(StringToTimestamp("2015-04-05 10:20:34")));
+
+ EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).AddMonths(-10),
+ EpochTimePoint(StringToTimestamp("2014-07-05 10:20:34")));
+}
+
+TEST(TestEpochTimePoint, TestAddDays) {
+ EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).AddDays(2),
+ EpochTimePoint(StringToTimestamp("2015-05-07 10:20:34")));
+
+ EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).AddDays(11),
+ EpochTimePoint(StringToTimestamp("2015-05-16 10:20:34")));
+
+ EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).AddDays(0),
+ EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")));
+
+ EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).AddDays(-1),
+ EpochTimePoint(StringToTimestamp("2015-05-04 10:20:34")));
+
+ EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).AddDays(-10),
+ EpochTimePoint(StringToTimestamp("2015-04-25 10:20:34")));
+}
+
+TEST(TestEpochTimePoint, TestClearTimeOfDay) {
+ EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).ClearTimeOfDay(),
+ EpochTimePoint(StringToTimestamp("2015-05-05 00:00:00")));
+}
+
+} // namespace gandiva
diff --git a/src/arrow/cpp/src/gandiva/precompiled/extended_math_ops.cc b/src/arrow/cpp/src/gandiva/precompiled/extended_math_ops.cc
new file mode 100644
index 000000000..365b08a6d
--- /dev/null
+++ b/src/arrow/cpp/src/gandiva/precompiled/extended_math_ops.cc
@@ -0,0 +1,410 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+
+#include "arrow/util/logging.h"
+#include "gandiva/precompiled/decimal_ops.h"
+
+extern "C" {
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "./types.h"
+
+// Expand the inner fn for types that support extended math.
+#define ENUMERIC_TYPES_UNARY(INNER, OUT_TYPE) \
+ INNER(int32, OUT_TYPE) \
+ INNER(uint32, OUT_TYPE) \
+ INNER(int64, OUT_TYPE) \
+ INNER(uint64, OUT_TYPE) \
+ INNER(float32, OUT_TYPE) \
+ INNER(float64, OUT_TYPE)
+
+// Cubic root
+#define CBRT(IN_TYPE, OUT_TYPE) \
+ FORCE_INLINE \
+ gdv_##OUT_TYPE cbrt_##IN_TYPE(gdv_##IN_TYPE in) { \
+ return static_cast<gdv_float64>(cbrtl(static_cast<long double>(in))); \
+ }
+
+ENUMERIC_TYPES_UNARY(CBRT, float64)
+
+// Exponent
+#define EXP(IN_TYPE, OUT_TYPE) \
+ FORCE_INLINE \
+ gdv_##OUT_TYPE exp_##IN_TYPE(gdv_##IN_TYPE in) { \
+ return static_cast<gdv_float64>(expl(static_cast<long double>(in))); \
+ }
+
+ENUMERIC_TYPES_UNARY(EXP, float64)
+
+// log
+#define LOG(IN_TYPE, OUT_TYPE) \
+ FORCE_INLINE \
+ gdv_##OUT_TYPE log_##IN_TYPE(gdv_##IN_TYPE in) { \
+ return static_cast<gdv_float64>(logl(static_cast<long double>(in))); \
+ }
+
+ENUMERIC_TYPES_UNARY(LOG, float64)
+
+// log base 10
+#define LOG10(IN_TYPE, OUT_TYPE) \
+ FORCE_INLINE \
+ gdv_##OUT_TYPE log10_##IN_TYPE(gdv_##IN_TYPE in) { \
+ return static_cast<gdv_float64>(log10l(static_cast<long double>(in))); \
+ }
+
+#define LOGL(VALUE) static_cast<gdv_float64>(logl(static_cast<long double>(VALUE)))
+
+ENUMERIC_TYPES_UNARY(LOG10, float64)
+
+FORCE_INLINE
+void set_error_for_logbase(int64_t execution_context, double base) {
+ char const* prefix = "divide by zero error with log of base";
+ int size = static_cast<int>(strlen(prefix)) + 64;
+ char* error = reinterpret_cast<char*>(malloc(size));
+ snprintf(error, size, "%s %f", prefix, base);
+ gdv_fn_context_set_error_msg(execution_context, error);
+ free(static_cast<char*>(error));
+}
+
+// log with base
+#define LOG_WITH_BASE(IN_TYPE1, IN_TYPE2, OUT_TYPE) \
+ FORCE_INLINE \
+ gdv_##OUT_TYPE log_##IN_TYPE1##_##IN_TYPE2(gdv_int64 context, gdv_##IN_TYPE1 base, \
+ gdv_##IN_TYPE2 value) { \
+ gdv_##OUT_TYPE log_of_base = LOGL(base); \
+ if (log_of_base == 0) { \
+ set_error_for_logbase(context, static_cast<gdv_float64>(base)); \
+ return 0; \
+ } \
+ return LOGL(value) / LOGL(base); \
+ }
+
+LOG_WITH_BASE(int32, int32, float64)
+LOG_WITH_BASE(uint32, uint32, float64)
+LOG_WITH_BASE(int64, int64, float64)
+LOG_WITH_BASE(uint64, uint64, float64)
+LOG_WITH_BASE(float32, float32, float64)
+LOG_WITH_BASE(float64, float64, float64)
+
+// Sin
+#define SIN(IN_TYPE, OUT_TYPE) \
+ FORCE_INLINE \
+ gdv_##OUT_TYPE sin_##IN_TYPE(gdv_##IN_TYPE in) { \
+ return static_cast<gdv_##OUT_TYPE>(sin(static_cast<long double>(in))); \
+ }
+ENUMERIC_TYPES_UNARY(SIN, float64)
+
+// Asin
+#define ASIN(IN_TYPE, OUT_TYPE) \
+ FORCE_INLINE \
+ gdv_##OUT_TYPE asin_##IN_TYPE(gdv_##IN_TYPE in) { \
+ return static_cast<gdv_##OUT_TYPE>(asin(static_cast<long double>(in))); \
+ }
+ENUMERIC_TYPES_UNARY(ASIN, float64)
+
+// Cos
+#define COS(IN_TYPE, OUT_TYPE) \
+ FORCE_INLINE \
+ gdv_##OUT_TYPE cos_##IN_TYPE(gdv_##IN_TYPE in) { \
+ return static_cast<gdv_##OUT_TYPE>(cos(static_cast<long double>(in))); \
+ }
+ENUMERIC_TYPES_UNARY(COS, float64)
+
+// Acos
+#define ACOS(IN_TYPE, OUT_TYPE) \
+ FORCE_INLINE \
+ gdv_##OUT_TYPE acos_##IN_TYPE(gdv_##IN_TYPE in) { \
+ return static_cast<gdv_##OUT_TYPE>(acos(static_cast<long double>(in))); \
+ }
+ENUMERIC_TYPES_UNARY(ACOS, float64)
+
+// Tan
+#define TAN(IN_TYPE, OUT_TYPE) \
+ FORCE_INLINE \
+ gdv_##OUT_TYPE tan_##IN_TYPE(gdv_##IN_TYPE in) { \
+ return static_cast<gdv_##OUT_TYPE>(tan(static_cast<long double>(in))); \
+ }
+ENUMERIC_TYPES_UNARY(TAN, float64)
+
+// Atan
+#define ATAN(IN_TYPE, OUT_TYPE) \
+ FORCE_INLINE \
+ gdv_##OUT_TYPE atan_##IN_TYPE(gdv_##IN_TYPE in) { \
+ return static_cast<gdv_##OUT_TYPE>(atan(static_cast<long double>(in))); \
+ }
+ENUMERIC_TYPES_UNARY(ATAN, float64)
+
+// Sinh
+#define SINH(IN_TYPE, OUT_TYPE) \
+ FORCE_INLINE \
+ gdv_##OUT_TYPE sinh_##IN_TYPE(gdv_##IN_TYPE in) { \
+ return static_cast<gdv_##OUT_TYPE>(sinh(static_cast<long double>(in))); \
+ }
+ENUMERIC_TYPES_UNARY(SINH, float64)
+
+// Cosh
+#define COSH(IN_TYPE, OUT_TYPE) \
+ FORCE_INLINE \
+ gdv_##OUT_TYPE cosh_##IN_TYPE(gdv_##IN_TYPE in) { \
+ return static_cast<gdv_##OUT_TYPE>(cosh(static_cast<long double>(in))); \
+ }
+ENUMERIC_TYPES_UNARY(COSH, float64)
+
+// Tanh
+#define TANH(IN_TYPE, OUT_TYPE) \
+ FORCE_INLINE \
+ gdv_##OUT_TYPE tanh_##IN_TYPE(gdv_##IN_TYPE in) { \
+ return static_cast<gdv_##OUT_TYPE>(tanh(static_cast<long double>(in))); \
+ }
+ENUMERIC_TYPES_UNARY(TANH, float64)
+
+// Atan2
+#define ATAN2(IN_TYPE, OUT_TYPE) \
+ FORCE_INLINE \
+ gdv_##OUT_TYPE atan2_##IN_TYPE##_##IN_TYPE(gdv_##IN_TYPE in1, gdv_##IN_TYPE in2) { \
+ return static_cast<gdv_##OUT_TYPE>( \
+ atan2(static_cast<long double>(in1), static_cast<long double>(in2))); \
+ }
+ENUMERIC_TYPES_UNARY(ATAN2, float64)
+
+// Cot
+#define COT(IN_TYPE, OUT_TYPE) \
+ FORCE_INLINE \
+ gdv_##OUT_TYPE cot_##IN_TYPE(gdv_##IN_TYPE in) { \
+ return static_cast<gdv_##OUT_TYPE>(tan(M_PI / 2 - static_cast<long double>(in))); \
+ }
+ENUMERIC_TYPES_UNARY(COT, float64)
+
+// Radians
+#define RADIANS(IN_TYPE, OUT_TYPE) \
+ FORCE_INLINE \
+ gdv_##OUT_TYPE radians_##IN_TYPE(gdv_##IN_TYPE in) { \
+ return static_cast<gdv_##OUT_TYPE>(static_cast<long double>(in) * M_PI / 180.0); \
+ }
+ENUMERIC_TYPES_UNARY(RADIANS, float64)
+
+// Degrees
+#define DEGREES(IN_TYPE, OUT_TYPE) \
+ FORCE_INLINE \
+ gdv_##OUT_TYPE degrees_##IN_TYPE(gdv_##IN_TYPE in) { \
+ return static_cast<gdv_##OUT_TYPE>(static_cast<long double>(in) * 180.0 / M_PI); \
+ }
+ENUMERIC_TYPES_UNARY(DEGREES, float64)
+
+// power
+#define POWER(IN_TYPE1, IN_TYPE2, OUT_TYPE) \
+ FORCE_INLINE \
+ gdv_##OUT_TYPE power_##IN_TYPE1##_##IN_TYPE2(gdv_##IN_TYPE1 in1, gdv_##IN_TYPE2 in2) { \
+ return static_cast<gdv_float64>(powl(in1, in2)); \
+ }
+POWER(float64, float64, float64)
+
+FORCE_INLINE
+gdv_int32 round_int32(gdv_int32 num) { return num; }
+
+FORCE_INLINE
+gdv_int64 round_int64(gdv_int64 num) { return num; }
+
+// rounds the number to the nearest integer
+#define ROUND_DECIMAL(TYPE) \
+ FORCE_INLINE \
+ gdv_##TYPE round_##TYPE(gdv_##TYPE num) { \
+ return static_cast<gdv_##TYPE>(trunc(num + ((num >= 0) ? 0.5 : -0.5))); \
+ }
+
+ROUND_DECIMAL(float32)
+ROUND_DECIMAL(float64)
+
+// rounds the number to the given scale
+#define ROUND_DECIMAL_TO_SCALE(TYPE) \
+ FORCE_INLINE \
+ gdv_##TYPE round_##TYPE##_int32(gdv_##TYPE number, gdv_int32 out_scale) { \
+ gdv_float64 scale_multiplier = get_scale_multiplier(out_scale); \
+ return static_cast<gdv_##TYPE>( \
+ trunc(number * scale_multiplier + ((number >= 0) ? 0.5 : -0.5)) / \
+ scale_multiplier); \
+ }
+
+ROUND_DECIMAL_TO_SCALE(float32)
+ROUND_DECIMAL_TO_SCALE(float64)
+
+FORCE_INLINE
+gdv_int32 round_int32_int32(gdv_int32 number, gdv_int32 precision) {
+ // for integers, there is nothing following the decimal point,
+ // so round() always returns the same number if precision >= 0
+ if (precision >= 0) {
+ return number;
+ }
+ gdv_int32 abs_precision = -precision;
+ // This is to ensure that there is no overflow while calculating 10^precision, 9 is
+ // the smallest N for which 10^N does not fit into 32 bits, so we can safely return 0
+ if (abs_precision > 9) {
+ return 0;
+ }
+ gdv_int32 num_sign = (number > 0) ? 1 : -1;
+ gdv_int32 abs_number = number * num_sign;
+ gdv_int32 power_of_10 = static_cast<gdv_int32>(get_power_of_10(abs_precision));
+ gdv_int32 remainder = abs_number % power_of_10;
+ abs_number -= remainder;
+ // if the fractional part of the quotient >= 0.5, round to next higher integer
+ if (remainder >= power_of_10 / 2) {
+ abs_number += power_of_10;
+ }
+ return abs_number * num_sign;
+}
+
+FORCE_INLINE
+gdv_int64 round_int64_int32(gdv_int64 number, gdv_int32 precision) {
+ // for long integers, there is nothing following the decimal point,
+ // so round() always returns the same number if precision >= 0
+ if (precision >= 0) {
+ return number;
+ }
+ gdv_int32 abs_precision = -precision;
+ // This is to ensure that there is no overflow while calculating 10^precision, 19 is
+ // the smallest N for which 10^N does not fit into 64 bits, so we can safely return 0
+ if (abs_precision > 18) {
+ return 0;
+ }
+ gdv_int32 num_sign = (number > 0) ? 1 : -1;
+ gdv_int64 abs_number = number * num_sign;
+ gdv_int64 power_of_10 = get_power_of_10(abs_precision);
+ gdv_int64 remainder = abs_number % power_of_10;
+ abs_number -= remainder;
+ // if the fractional part of the quotient >= 0.5, round to next higher integer
+ if (remainder >= power_of_10 / 2) {
+ abs_number += power_of_10;
+ }
+ return abs_number * num_sign;
+}
+
+FORCE_INLINE
+gdv_int64 get_power_of_10(gdv_int32 exp) {
+ DCHECK_GE(exp, 0);
+ DCHECK_LE(exp, 18);
+ static const gdv_int64 power_of_10[] = {1,
+ 10,
+ 100,
+ 1000,
+ 10000,
+ 100000,
+ 1000000,
+ 10000000,
+ 100000000,
+ 1000000000,
+ 10000000000,
+ 100000000000,
+ 1000000000000,
+ 10000000000000,
+ 100000000000000,
+ 1000000000000000,
+ 10000000000000000,
+ 100000000000000000,
+ 1000000000000000000};
+ return power_of_10[exp];
+}
+
+FORCE_INLINE
+gdv_int64 truncate_int64_int32(gdv_int64 in, gdv_int32 out_scale) {
+ bool overflow = false;
+ arrow::BasicDecimal128 decimal = gandiva::decimalops::FromInt64(in, 38, 0, &overflow);
+ arrow::BasicDecimal128 decimal_with_outscale =
+ gandiva::decimalops::Truncate(gandiva::BasicDecimalScalar128(decimal, 38, 0), 38,
+ out_scale, out_scale, &overflow);
+ if (out_scale < 0) {
+ out_scale = 0;
+ }
+ return gandiva::decimalops::ToInt64(
+ gandiva::BasicDecimalScalar128(decimal_with_outscale, 38, out_scale), &overflow);
+}
+
+FORCE_INLINE
+gdv_float64 get_scale_multiplier(gdv_int32 scale) {
+ static const gdv_float64 values[] = {1.0,
+ 10.0,
+ 100.0,
+ 1000.0,
+ 10000.0,
+ 100000.0,
+ 1000000.0,
+ 10000000.0,
+ 100000000.0,
+ 1000000000.0,
+ 10000000000.0,
+ 100000000000.0,
+ 1000000000000.0,
+ 10000000000000.0,
+ 100000000000000.0,
+ 1000000000000000.0,
+ 10000000000000000.0,
+ 100000000000000000.0,
+ 1000000000000000000.0,
+ 10000000000000000000.0};
+ if (scale >= 0 && scale < 20) {
+ return values[scale];
+ }
+ return power_float64_float64(10.0, scale);
+}
+
+// returns the binary representation of a given integer (e.g. 928 -> 1110100000)
+#define BIN_INTEGER(IN_TYPE) \
+ FORCE_INLINE \
+ const char* bin_##IN_TYPE(int64_t context, gdv_##IN_TYPE value, int32_t* out_len) { \
+ *out_len = 0; \
+ int32_t len = 8 * sizeof(value); \
+ char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, len)); \
+ if (ret == nullptr) { \
+ gdv_fn_context_set_error_msg(context, "Could not allocate memory for output"); \
+ return ""; \
+ } \
+ /* handle case when value is zero */ \
+ if (value == 0) { \
+ *out_len = 1; \
+ ret[0] = '0'; \
+ return ret; \
+ } \
+ /* generate binary representation iteratively */ \
+ gdv_u##IN_TYPE i; \
+ int8_t count = 0; \
+ bool first = false; /* flag for not printing left zeros in positive numbers */ \
+ for (i = static_cast<gdv_u##IN_TYPE>(1) << (len - 1); i > 0; i = i / 2) { \
+ if ((value & i) != 0) { \
+ ret[count] = '1'; \
+ if (!first) first = true; \
+ } else { \
+ if (!first) continue; \
+ ret[count] = '0'; \
+ } \
+ count += 1; \
+ } \
+ *out_len = count; \
+ return ret; \
+ }
+
+BIN_INTEGER(int32)
+BIN_INTEGER(int64)
+
+#undef BIN_INTEGER
+
+} // extern "C"
diff --git a/src/arrow/cpp/src/gandiva/precompiled/extended_math_ops_test.cc b/src/arrow/cpp/src/gandiva/precompiled/extended_math_ops_test.cc
new file mode 100644
index 000000000..147b4035c
--- /dev/null
+++ b/src/arrow/cpp/src/gandiva/precompiled/extended_math_ops_test.cc
@@ -0,0 +1,349 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+
+#include <gtest/gtest.h>
+#include <cmath>
+#include "gandiva/execution_context.h"
+#include "gandiva/precompiled/types.h"
+
+namespace gandiva {
+
+static const double MAX_ERROR = 0.00005;
+
+void VerifyFuzzyEquals(double actual, double expected, double max_error = MAX_ERROR) {
+ EXPECT_TRUE(fabs(actual - expected) < max_error) << actual << " != " << expected;
+}
+
+TEST(TestExtendedMathOps, TestCbrt) {
+ VerifyFuzzyEquals(cbrt_int32(27), 3);
+ VerifyFuzzyEquals(cbrt_int64(27), 3);
+ VerifyFuzzyEquals(cbrt_float32(27), 3);
+ VerifyFuzzyEquals(cbrt_float64(27), 3);
+ VerifyFuzzyEquals(cbrt_float64(-27), -3);
+
+ VerifyFuzzyEquals(cbrt_float32(15.625), 2.5);
+ VerifyFuzzyEquals(cbrt_float64(15.625), 2.5);
+}
+
+TEST(TestExtendedMathOps, TestExp) {
+ double val = 20.085536923187668;
+
+ VerifyFuzzyEquals(exp_int32(3), val);
+ VerifyFuzzyEquals(exp_int64(3), val);
+ VerifyFuzzyEquals(exp_float32(3), val);
+ VerifyFuzzyEquals(exp_float64(3), val);
+}
+
+TEST(TestExtendedMathOps, TestLog) {
+ double val = 4.1588830833596715;
+
+ VerifyFuzzyEquals(log_int32(64), val);
+ VerifyFuzzyEquals(log_int64(64), val);
+ VerifyFuzzyEquals(log_float32(64), val);
+ VerifyFuzzyEquals(log_float64(64), val);
+
+ EXPECT_EQ(log_int32(0), -std::numeric_limits<double>::infinity());
+}
+
+TEST(TestExtendedMathOps, TestLog10) {
+ VerifyFuzzyEquals(log10_int32(100), 2);
+ VerifyFuzzyEquals(log10_int64(100), 2);
+ VerifyFuzzyEquals(log10_float32(100), 2);
+ VerifyFuzzyEquals(log10_float64(100), 2);
+}
+
+TEST(TestExtendedMathOps, TestPower) {
+ VerifyFuzzyEquals(power_float64_float64(2, 5.4), 42.22425314473263);
+ VerifyFuzzyEquals(power_float64_float64(5.4, 2), 29.160000000000004);
+}
+
+TEST(TestExtendedMathOps, TestLogWithBase) {
+ gandiva::ExecutionContext context;
+ gdv_float64 out =
+ log_int32_int32(reinterpret_cast<gdv_int64>(&context), 1 /*base*/, 10 /*value*/);
+ VerifyFuzzyEquals(out, 0);
+ EXPECT_EQ(context.has_error(), true);
+ EXPECT_TRUE(context.get_error().find("divide by zero error") != std::string::npos)
+ << context.get_error();
+
+ gandiva::ExecutionContext context1;
+ out = log_int32_int32(reinterpret_cast<gdv_int64>(&context), 2 /*base*/, 64 /*value*/);
+ VerifyFuzzyEquals(out, 6);
+ EXPECT_EQ(context1.has_error(), false);
+}
+
+TEST(TestExtendedMathOps, TestRoundDecimal) {
+ EXPECT_FLOAT_EQ(round_float32(1234.245f), 1234);
+ EXPECT_FLOAT_EQ(round_float32(-11.7892f), -12);
+ EXPECT_FLOAT_EQ(round_float32(1.4999999f), 1);
+ EXPECT_EQ(std::signbit(round_float32(0)), 0);
+ EXPECT_FLOAT_EQ(round_float32_int32(1234.789f, 2), 1234.79f);
+ EXPECT_FLOAT_EQ(round_float32_int32(1234.12345f, -3), 1000);
+ EXPECT_FLOAT_EQ(round_float32_int32(-1234.4567f, 3), -1234.457f);
+ EXPECT_FLOAT_EQ(round_float32_int32(-1234.4567f, -3), -1000);
+ EXPECT_FLOAT_EQ(round_float32_int32(1234.4567f, 0), 1234);
+ EXPECT_FLOAT_EQ(round_float32_int32(1.5499999523162842f, 1), 1.5f);
+ EXPECT_EQ(std::signbit(round_float32_int32(0, 5)), 0);
+ EXPECT_FLOAT_EQ(round_float32_int32(static_cast<float>(1.55), 1), 1.5f);
+ EXPECT_FLOAT_EQ(round_float32_int32(static_cast<float>(9.134123), 2), 9.13f);
+ EXPECT_FLOAT_EQ(round_float32_int32(static_cast<float>(-1.923), 1), -1.9f);
+
+ VerifyFuzzyEquals(round_float64(1234.245), 1234);
+ VerifyFuzzyEquals(round_float64(-11.7892), -12);
+ VerifyFuzzyEquals(round_float64(1.4999999), 1);
+ EXPECT_EQ(std::signbit(round_float64(0)), 0);
+ VerifyFuzzyEquals(round_float64_int32(1234.789, 2), 1234.79);
+ VerifyFuzzyEquals(round_float64_int32(1234.12345, -3), 1000);
+ VerifyFuzzyEquals(round_float64_int32(-1234.4567, 3), -1234.457);
+ VerifyFuzzyEquals(round_float64_int32(-1234.4567, -3), -1000);
+ VerifyFuzzyEquals(round_float64_int32(1234.4567, 0), 1234);
+ EXPECT_EQ(std::signbit(round_float64_int32(0, -2)), 0);
+ VerifyFuzzyEquals(round_float64_int32((double)INT_MAX + 1, 0), (double)INT_MAX + 1);
+ VerifyFuzzyEquals(round_float64_int32((double)INT_MIN - 1, 0), (double)INT_MIN - 1);
+}
+
+TEST(TestExtendedMathOps, TestRound) {
+ EXPECT_EQ(round_int32(21134), 21134);
+ EXPECT_EQ(round_int32(-132422), -132422);
+ EXPECT_EQ(round_int32_int32(7589, -1), 7590);
+ EXPECT_EQ(round_int32_int32(8532, -2), 8500);
+ EXPECT_EQ(round_int32_int32(-8579, -1), -8580);
+ EXPECT_EQ(round_int32_int32(-8612, -2), -8600);
+ EXPECT_EQ(round_int32_int32(758, 2), 758);
+ EXPECT_EQ(round_int32_int32(8612, -5), 0);
+
+ EXPECT_EQ(round_int64(3453562312), 3453562312);
+ EXPECT_EQ(round_int64(-23453462343), -23453462343);
+ EXPECT_EQ(round_int64_int32(3453562312, -2), 3453562300);
+ EXPECT_EQ(round_int64_int32(3453562343, -5), 3453600000);
+ EXPECT_EQ(round_int64_int32(345353425343, 12), 345353425343);
+ EXPECT_EQ(round_int64_int32(-23453462343, -4), -23453460000);
+ EXPECT_EQ(round_int64_int32(-23453462343, -5), -23453500000);
+ EXPECT_EQ(round_int64_int32(345353425343, -12), 0);
+}
+
+TEST(TestExtendedMathOps, TestTruncate) {
+ EXPECT_EQ(truncate_int64_int32(1234, 4), 1234);
+ EXPECT_EQ(truncate_int64_int32(-1234, 4), -1234);
+ EXPECT_EQ(truncate_int64_int32(1234, -4), 0);
+ EXPECT_EQ(truncate_int64_int32(-1234, -2), -1200);
+ EXPECT_EQ(truncate_int64_int32(8124674407369523212, 0), 8124674407369523212);
+ EXPECT_EQ(truncate_int64_int32(8124674407369523212, -2), 8124674407369523200);
+}
+
+TEST(TestExtendedMathOps, TestTrigonometricFunctions) {
+ auto pi_float = static_cast<float>(M_PI);
+ // Sin functions
+ VerifyFuzzyEquals(sin_float32(0), sin(0));
+ VerifyFuzzyEquals(sin_float32(0), sin(0));
+ VerifyFuzzyEquals(sin_float32(pi_float / 2), sin(M_PI / 2));
+ VerifyFuzzyEquals(sin_float32(pi_float), sin(M_PI));
+ VerifyFuzzyEquals(sin_float32(-pi_float / 2), sin(-M_PI / 2));
+ VerifyFuzzyEquals(sin_float64(0), sin(0));
+ VerifyFuzzyEquals(sin_float64(M_PI / 2), sin(M_PI / 2));
+ VerifyFuzzyEquals(sin_float64(M_PI), sin(M_PI));
+ VerifyFuzzyEquals(sin_float64(-M_PI / 2), sin(-M_PI / 2));
+ VerifyFuzzyEquals(sin_int32(0), sin(0));
+ VerifyFuzzyEquals(sin_int64(0), sin(0));
+
+ // Cos functions
+ VerifyFuzzyEquals(cos_float32(0), cos(0));
+ VerifyFuzzyEquals(cos_float32(pi_float / 2), cos(M_PI / 2));
+ VerifyFuzzyEquals(cos_float32(pi_float), cos(M_PI));
+ VerifyFuzzyEquals(cos_float32(-pi_float / 2), cos(-M_PI / 2));
+ VerifyFuzzyEquals(cos_float64(0), cos(0));
+ VerifyFuzzyEquals(cos_float64(M_PI / 2), cos(M_PI / 2));
+ VerifyFuzzyEquals(cos_float64(M_PI), cos(M_PI));
+ VerifyFuzzyEquals(cos_float64(-M_PI / 2), cos(-M_PI / 2));
+ VerifyFuzzyEquals(cos_int32(0), cos(0));
+ VerifyFuzzyEquals(cos_int64(0), cos(0));
+
+ // Asin functions
+ VerifyFuzzyEquals(asin_float32(-1.0), asin(-1.0));
+ VerifyFuzzyEquals(asin_float32(1.0), asin(1.0));
+ VerifyFuzzyEquals(asin_float64(-1.0), asin(-1.0));
+ VerifyFuzzyEquals(asin_float64(1.0), asin(1.0));
+ VerifyFuzzyEquals(asin_int32(0), asin(0));
+ VerifyFuzzyEquals(asin_int64(0), asin(0));
+
+ // Acos functions
+ VerifyFuzzyEquals(acos_float32(-1.0), acos(-1.0));
+ VerifyFuzzyEquals(acos_float32(1.0), acos(1.0));
+ VerifyFuzzyEquals(acos_float64(-1.0), acos(-1.0));
+ VerifyFuzzyEquals(acos_float64(1.0), acos(1.0));
+ VerifyFuzzyEquals(acos_int32(0), acos(0));
+ VerifyFuzzyEquals(acos_int64(0), acos(0));
+
+ // Tan
+ VerifyFuzzyEquals(tan_float32(pi_float), tan(M_PI));
+ VerifyFuzzyEquals(tan_float32(-pi_float), tan(-M_PI));
+ VerifyFuzzyEquals(tan_float64(M_PI), tan(M_PI));
+ VerifyFuzzyEquals(tan_float64(-M_PI), tan(-M_PI));
+ VerifyFuzzyEquals(tan_int32(0), tan(0));
+ VerifyFuzzyEquals(tan_int64(0), tan(0));
+
+ // Atan
+ VerifyFuzzyEquals(atan_float32(pi_float), atan(M_PI));
+ VerifyFuzzyEquals(atan_float32(-pi_float), atan(-M_PI));
+ VerifyFuzzyEquals(atan_float64(M_PI), atan(M_PI));
+ VerifyFuzzyEquals(atan_float64(-M_PI), atan(-M_PI));
+ VerifyFuzzyEquals(atan_int32(0), atan(0));
+ VerifyFuzzyEquals(atan_int64(0), atan(0));
+
+ // Sinh functions
+ VerifyFuzzyEquals(sinh_float32(0), sinh(0));
+ VerifyFuzzyEquals(sinh_float32(pi_float / 2), sinh(M_PI / 2));
+ VerifyFuzzyEquals(sinh_float32(pi_float), sinh(M_PI));
+ VerifyFuzzyEquals(sinh_float32(-pi_float / 2), sinh(-M_PI / 2));
+ VerifyFuzzyEquals(sinh_float64(0), sinh(0));
+ VerifyFuzzyEquals(sinh_float64(M_PI / 2), sinh(M_PI / 2));
+ VerifyFuzzyEquals(sinh_float64(M_PI), sinh(M_PI));
+ VerifyFuzzyEquals(sinh_float64(-M_PI / 2), sinh(-M_PI / 2));
+ VerifyFuzzyEquals(sinh_int32(0), sinh(0));
+ VerifyFuzzyEquals(sinh_int64(0), sinh(0));
+
+ // Cosh functions
+ VerifyFuzzyEquals(cosh_float32(0), cosh(0));
+ VerifyFuzzyEquals(cosh_float32(pi_float / 2), cosh(M_PI / 2));
+ VerifyFuzzyEquals(cosh_float32(pi_float), cosh(M_PI));
+ VerifyFuzzyEquals(cosh_float32(-pi_float / 2), cosh(-M_PI / 2));
+ VerifyFuzzyEquals(cosh_float64(0), cosh(0));
+ VerifyFuzzyEquals(cosh_float64(M_PI / 2), cosh(M_PI / 2));
+ VerifyFuzzyEquals(cosh_float64(M_PI), cosh(M_PI));
+ VerifyFuzzyEquals(cosh_float64(-M_PI / 2), cosh(-M_PI / 2));
+ VerifyFuzzyEquals(cosh_int32(0), cosh(0));
+ VerifyFuzzyEquals(cosh_int64(0), cosh(0));
+
+ // Tanh
+ VerifyFuzzyEquals(tanh_float32(pi_float), tanh(M_PI));
+ VerifyFuzzyEquals(tanh_float32(-pi_float), tanh(-M_PI));
+ VerifyFuzzyEquals(tanh_float64(M_PI), tanh(M_PI));
+ VerifyFuzzyEquals(tanh_float64(-M_PI), tanh(-M_PI));
+ VerifyFuzzyEquals(tanh_int32(0), tanh(0));
+ VerifyFuzzyEquals(tanh_int64(0), tanh(0));
+
+ // Atan2
+ VerifyFuzzyEquals(atan2_float32_float32(1, 0), atan2(1, 0));
+ VerifyFuzzyEquals(atan2_float32_float32(-1.0, 0), atan2(-1, 0));
+ VerifyFuzzyEquals(atan2_float64_float64(1.0, 0.0), atan2(1, 0));
+ VerifyFuzzyEquals(atan2_float64_float64(-1, 0), atan2(-1, 0));
+ VerifyFuzzyEquals(atan2_int32_int32(1, 0), atan2(1, 0));
+ VerifyFuzzyEquals(atan2_int64_int64(-1, 0), atan2(-1, 0));
+
+ // Radians
+ VerifyFuzzyEquals(radians_float32(0), 0);
+ VerifyFuzzyEquals(radians_float32(180.0), M_PI);
+ VerifyFuzzyEquals(radians_float32(90.0), M_PI / 2);
+ VerifyFuzzyEquals(radians_float64(0), 0);
+ VerifyFuzzyEquals(radians_float64(180.0), M_PI);
+ VerifyFuzzyEquals(radians_float64(90.0), M_PI / 2);
+ VerifyFuzzyEquals(radians_int32(180), M_PI);
+ VerifyFuzzyEquals(radians_int64(90), M_PI / 2);
+
+ // Degrees
+ VerifyFuzzyEquals(degrees_float32(0), 0.0);
+ VerifyFuzzyEquals(degrees_float32(pi_float), 180.0);
+ VerifyFuzzyEquals(degrees_float32(pi_float / 2), 90.0);
+ VerifyFuzzyEquals(degrees_float64(0), 0.0);
+ VerifyFuzzyEquals(degrees_float64(M_PI), 180.0);
+ VerifyFuzzyEquals(degrees_float64(M_PI / 2), 90.0);
+ VerifyFuzzyEquals(degrees_int32(1), 57.2958);
+ VerifyFuzzyEquals(degrees_int64(1), 57.2958);
+
+ // Cot
+ VerifyFuzzyEquals(cot_float32(pi_float / 2), tan(M_PI / 2 - M_PI / 2));
+ VerifyFuzzyEquals(cot_float64(M_PI / 2), tan(M_PI / 2 - M_PI / 2));
+}
+
+TEST(TestExtendedMathOps, TestBinRepresentation) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+ gdv_int32 out_len = 0;
+
+ const char* out_str = bin_int32(ctx_ptr, 7, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "111");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = bin_int32(ctx_ptr, 0, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "0");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = bin_int32(ctx_ptr, 28550, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "110111110000110");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = bin_int32(ctx_ptr, -28550, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "11111111111111111001000001111010");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = bin_int32(ctx_ptr, 58117, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "1110001100000101");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = bin_int32(ctx_ptr, -58117, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "11111111111111110001110011111011");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = bin_int32(ctx_ptr, INT32_MAX, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "1111111111111111111111111111111");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = bin_int32(ctx_ptr, INT32_MIN, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "10000000000000000000000000000000");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = bin_int64(ctx_ptr, 7, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "111");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = bin_int64(ctx_ptr, 0, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "0");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = bin_int64(ctx_ptr, 28550, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "110111110000110");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = bin_int64(ctx_ptr, -28550, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len),
+ "1111111111111111111111111111111111111111111111111001000001111010");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = bin_int64(ctx_ptr, 58117, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "1110001100000101");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = bin_int64(ctx_ptr, -58117, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len),
+ "1111111111111111111111111111111111111111111111110001110011111011");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = bin_int64(ctx_ptr, INT64_MAX, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len),
+ "111111111111111111111111111111111111111111111111111111111111111");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = bin_int64(ctx_ptr, INT64_MIN, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len),
+ "1000000000000000000000000000000000000000000000000000000000000000");
+ EXPECT_FALSE(ctx.has_error());
+}
+} // namespace gandiva
diff --git a/src/arrow/cpp/src/gandiva/precompiled/hash.cc b/src/arrow/cpp/src/gandiva/precompiled/hash.cc
new file mode 100644
index 000000000..eacf36230
--- /dev/null
+++ b/src/arrow/cpp/src/gandiva/precompiled/hash.cc
@@ -0,0 +1,407 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern "C" {
+
+#include <string.h>
+
+#include "./types.h"
+
+static inline gdv_uint64 rotate_left(gdv_uint64 val, int distance) {
+ return (val << distance) | (val >> (64 - distance));
+}
+
+//
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain.
+// See http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp
+// MurmurHash3_x64_128
+//
+static inline gdv_uint64 fmix64(gdv_uint64 k) {
+ k ^= k >> 33;
+ k *= 0xff51afd7ed558ccduLL;
+ k ^= k >> 33;
+ k *= 0xc4ceb9fe1a85ec53uLL;
+ k ^= k >> 33;
+ return k;
+}
+
+static inline gdv_uint64 murmur3_64(gdv_uint64 val, gdv_int32 seed) {
+ gdv_uint64 h1 = seed;
+ gdv_uint64 h2 = seed;
+
+ gdv_uint64 c1 = 0x87c37b91114253d5ull;
+ gdv_uint64 c2 = 0x4cf5ad432745937full;
+
+ int length = 8;
+ gdv_uint64 k1 = 0;
+
+ k1 = val;
+ k1 *= c1;
+ k1 = rotate_left(k1, 31);
+ k1 *= c2;
+ h1 ^= k1;
+
+ h1 ^= length;
+ h2 ^= length;
+
+ h1 += h2;
+ h2 += h1;
+
+ h1 = fmix64(h1);
+ h2 = fmix64(h2);
+
+ h1 += h2;
+
+ // h2 += h1;
+ // murmur3_128 should return 128 bit (h1,h2), now we return only 64bits,
+ return h1;
+}
+
+static inline gdv_uint32 murmur3_32(gdv_uint64 val, gdv_int32 seed) {
+ gdv_uint64 c1 = 0xcc9e2d51ull;
+ gdv_uint64 c2 = 0x1b873593ull;
+ int length = 8;
+ static gdv_uint64 UINT_MASK = 0xffffffffull;
+ gdv_uint64 lh1 = seed & UINT_MASK;
+ for (int i = 0; i < 2; i++) {
+ gdv_uint64 lk1 = ((val >> i * 32) & UINT_MASK);
+ lk1 *= c1;
+ lk1 &= UINT_MASK;
+
+ lk1 = ((lk1 << 15) & UINT_MASK) | (lk1 >> 17);
+
+ lk1 *= c2;
+ lk1 &= UINT_MASK;
+
+ lh1 ^= lk1;
+ lh1 = ((lh1 << 13) & UINT_MASK) | (lh1 >> 19);
+
+ lh1 = lh1 * 5 + 0xe6546b64L;
+ lh1 = UINT_MASK & lh1;
+ }
+ lh1 ^= length;
+
+ lh1 ^= lh1 >> 16;
+ lh1 *= 0x85ebca6bull;
+ lh1 = UINT_MASK & lh1;
+ lh1 ^= lh1 >> 13;
+ lh1 *= 0xc2b2ae35ull;
+ lh1 = UINT_MASK & lh1;
+ lh1 ^= lh1 >> 16;
+
+ return static_cast<gdv_uint32>(lh1);
+}
+
+static inline gdv_uint64 double_to_long_bits(double value) {
+ gdv_uint64 result;
+ memcpy(&result, &value, sizeof(result));
+ return result;
+}
+
+FORCE_INLINE gdv_int64 hash64(double val, gdv_int64 seed) {
+ return murmur3_64(double_to_long_bits(val), static_cast<gdv_int32>(seed));
+}
+
+FORCE_INLINE gdv_int32 hash32(double val, gdv_int32 seed) {
+ return murmur3_32(double_to_long_bits(val), seed);
+}
+
+// Wrappers for all the numeric/data/time arrow types
+
+#define HASH64_WITH_SEED_OP(NAME, TYPE) \
+ FORCE_INLINE \
+ gdv_int64 NAME##_##TYPE(gdv_##TYPE in, gdv_boolean is_valid, gdv_int64 seed, \
+ gdv_boolean seed_isvalid) { \
+ if (!is_valid) { \
+ return seed; \
+ } \
+ return hash64(static_cast<double>(in), seed); \
+ }
+
+#define HASH32_WITH_SEED_OP(NAME, TYPE) \
+ FORCE_INLINE \
+ gdv_int32 NAME##_##TYPE(gdv_##TYPE in, gdv_boolean is_valid, gdv_int32 seed, \
+ gdv_boolean seed_isvalid) { \
+ if (!is_valid) { \
+ return seed; \
+ } \
+ return hash32(static_cast<double>(in), seed); \
+ }
+
+#define HASH64_OP(NAME, TYPE) \
+ FORCE_INLINE \
+ gdv_int64 NAME##_##TYPE(gdv_##TYPE in, gdv_boolean is_valid) { \
+ return is_valid ? hash64(static_cast<double>(in), 0) : 0; \
+ }
+
+#define HASH32_OP(NAME, TYPE) \
+ FORCE_INLINE \
+ gdv_int32 NAME##_##TYPE(gdv_##TYPE in, gdv_boolean is_valid) { \
+ return is_valid ? hash32(static_cast<double>(in), 0) : 0; \
+ }
+
+// Expand inner macro for all numeric types.
+#define NUMERIC_BOOL_DATE_TYPES(INNER, NAME) \
+ INNER(NAME, int8) \
+ INNER(NAME, int16) \
+ INNER(NAME, int32) \
+ INNER(NAME, int64) \
+ INNER(NAME, uint8) \
+ INNER(NAME, uint16) \
+ INNER(NAME, uint32) \
+ INNER(NAME, uint64) \
+ INNER(NAME, float32) \
+ INNER(NAME, float64) \
+ INNER(NAME, boolean) \
+ INNER(NAME, date64) \
+ INNER(NAME, date32) \
+ INNER(NAME, time32) \
+ INNER(NAME, timestamp)
+
+NUMERIC_BOOL_DATE_TYPES(HASH32_OP, hash)
+NUMERIC_BOOL_DATE_TYPES(HASH32_OP, hash32)
+NUMERIC_BOOL_DATE_TYPES(HASH32_OP, hash32AsDouble)
+NUMERIC_BOOL_DATE_TYPES(HASH32_WITH_SEED_OP, hash32WithSeed)
+NUMERIC_BOOL_DATE_TYPES(HASH32_WITH_SEED_OP, hash32AsDoubleWithSeed)
+
+NUMERIC_BOOL_DATE_TYPES(HASH64_OP, hash64)
+NUMERIC_BOOL_DATE_TYPES(HASH64_OP, hash64AsDouble)
+NUMERIC_BOOL_DATE_TYPES(HASH64_WITH_SEED_OP, hash64WithSeed)
+NUMERIC_BOOL_DATE_TYPES(HASH64_WITH_SEED_OP, hash64AsDoubleWithSeed)
+
+#undef NUMERIC_BOOL_DATE_TYPES
+
+static inline gdv_uint64 murmur3_64_buf(const gdv_uint8* key, gdv_int32 len,
+ gdv_int32 seed) {
+ gdv_uint64 h1 = seed;
+ gdv_uint64 h2 = seed;
+ gdv_uint64 c1 = 0x87c37b91114253d5ull;
+ gdv_uint64 c2 = 0x4cf5ad432745937full;
+
+ const gdv_uint64* blocks = reinterpret_cast<const gdv_uint64*>(key);
+ int nblocks = len / 16;
+ for (int i = 0; i < nblocks; i++) {
+ gdv_uint64 k1 = blocks[i * 2 + 0];
+ gdv_uint64 k2 = blocks[i * 2 + 1];
+
+ k1 *= c1;
+ k1 = rotate_left(k1, 31);
+ k1 *= c2;
+ h1 ^= k1;
+ h1 = rotate_left(h1, 27);
+ h1 += h2;
+ h1 = h1 * 5 + 0x52dce729;
+ k2 *= c2;
+ k2 = rotate_left(k2, 33);
+ k2 *= c1;
+ h2 ^= k2;
+ h2 = rotate_left(h2, 31);
+ h2 += h1;
+ h2 = h2 * 5 + 0x38495ab5;
+ }
+
+ // tail
+ gdv_uint64 k1 = 0;
+ gdv_uint64 k2 = 0;
+
+ const gdv_uint8* tail = reinterpret_cast<const gdv_uint8*>(key + nblocks * 16);
+ switch (len & 15) {
+ case 15:
+ k2 = static_cast<gdv_uint64>(tail[14]) << 48;
+ case 14:
+ k2 ^= static_cast<gdv_uint64>(tail[13]) << 40;
+ case 13:
+ k2 ^= static_cast<gdv_uint64>(tail[12]) << 32;
+ case 12:
+ k2 ^= static_cast<gdv_uint64>(tail[11]) << 24;
+ case 11:
+ k2 ^= static_cast<gdv_uint64>(tail[10]) << 16;
+ case 10:
+ k2 ^= static_cast<gdv_uint64>(tail[9]) << 8;
+ case 9:
+ k2 ^= static_cast<gdv_uint64>(tail[8]);
+ k2 *= c2;
+ k2 = rotate_left(k2, 33);
+ k2 *= c1;
+ h2 ^= k2;
+ case 8:
+ k1 ^= static_cast<gdv_uint64>(tail[7]) << 56;
+ case 7:
+ k1 ^= static_cast<gdv_uint64>(tail[6]) << 48;
+ case 6:
+ k1 ^= static_cast<gdv_uint64>(tail[5]) << 40;
+ case 5:
+ k1 ^= static_cast<gdv_uint64>(tail[4]) << 32;
+ case 4:
+ k1 ^= static_cast<gdv_uint64>(tail[3]) << 24;
+ case 3:
+ k1 ^= static_cast<gdv_uint64>(tail[2]) << 16;
+ case 2:
+ k1 ^= static_cast<gdv_uint64>(tail[1]) << 8;
+ case 1:
+ k1 ^= static_cast<gdv_uint64>(tail[0]) << 0;
+ k1 *= c1;
+ k1 = rotate_left(k1, 31);
+ k1 *= c2;
+ h1 ^= k1;
+ }
+
+ h1 ^= len;
+ h2 ^= len;
+
+ h1 += h2;
+ h2 += h1;
+
+ h1 = fmix64(h1);
+ h2 = fmix64(h2);
+
+ h1 += h2;
+ // h2 += h1;
+ // returning 64-bits of the 128-bit hash.
+ return h1;
+}
+
+static gdv_uint32 murmur3_32_buf(const gdv_uint8* key, gdv_int32 len, gdv_int32 seed) {
+ static const gdv_uint64 c1 = 0xcc9e2d51ull;
+ static const gdv_uint64 c2 = 0x1b873593ull;
+ static const gdv_uint64 UINT_MASK = 0xffffffffull;
+ gdv_uint64 lh1 = seed;
+ const gdv_uint32* blocks = reinterpret_cast<const gdv_uint32*>(key);
+ int nblocks = len / 4;
+ const gdv_uint8* tail = reinterpret_cast<const gdv_uint8*>(key + nblocks * 4);
+ for (int i = 0; i < nblocks; i++) {
+ gdv_uint64 lk1 = static_cast<gdv_uint64>(blocks[i]);
+
+ // k1 *= c1;
+ lk1 *= c1;
+ lk1 &= UINT_MASK;
+
+ lk1 = ((lk1 << 15) & UINT_MASK) | (lk1 >> 17);
+
+ lk1 *= c2;
+ lk1 = lk1 & UINT_MASK;
+ lh1 ^= lk1;
+ lh1 = ((lh1 << 13) & UINT_MASK) | (lh1 >> 19);
+
+ lh1 = lh1 * 5 + 0xe6546b64ull;
+ lh1 = UINT_MASK & lh1;
+ }
+
+ // tail
+ gdv_uint64 lk1 = 0;
+
+ switch (len & 3) {
+ case 3:
+ lk1 = (tail[2] & 0xff) << 16;
+ case 2:
+ lk1 |= (tail[1] & 0xff) << 8;
+ case 1:
+ lk1 |= (tail[0] & 0xff);
+ lk1 *= c1;
+ lk1 = UINT_MASK & lk1;
+ lk1 = ((lk1 << 15) & UINT_MASK) | (lk1 >> 17);
+
+ lk1 *= c2;
+ lk1 = lk1 & UINT_MASK;
+
+ lh1 ^= lk1;
+ }
+
+ // finalization
+ lh1 ^= len;
+
+ lh1 ^= lh1 >> 16;
+ lh1 *= 0x85ebca6b;
+ lh1 = UINT_MASK & lh1;
+ lh1 ^= lh1 >> 13;
+
+ lh1 *= 0xc2b2ae35;
+ lh1 = UINT_MASK & lh1;
+ lh1 ^= lh1 >> 16;
+
+ return static_cast<gdv_uint32>(lh1 & UINT_MASK);
+}
+
+FORCE_INLINE gdv_int64 hash64_buf(const gdv_uint8* buf, int len, gdv_int64 seed) {
+ return murmur3_64_buf(buf, len, static_cast<gdv_int32>(seed));
+}
+
+FORCE_INLINE gdv_int32 hash32_buf(const gdv_uint8* buf, int len, gdv_int32 seed) {
+ return murmur3_32_buf(buf, len, seed);
+}
+
+// Wrappers for the varlen types
+
+#define HASH64_BUF_WITH_SEED_OP(NAME, TYPE) \
+ FORCE_INLINE \
+ gdv_int64 NAME##_##TYPE(gdv_##TYPE in, gdv_int32 len, gdv_boolean is_valid, \
+ gdv_int64 seed, gdv_boolean seed_isvalid) { \
+ if (!is_valid) { \
+ return seed; \
+ } \
+ return hash64_buf(reinterpret_cast<const uint8_t*>(in), len, seed); \
+ }
+
+#define HASH32_BUF_WITH_SEED_OP(NAME, TYPE) \
+ FORCE_INLINE \
+ gdv_int32 NAME##_##TYPE(gdv_##TYPE in, gdv_int32 len, gdv_boolean is_valid, \
+ gdv_int32 seed, gdv_boolean seed_isvalid) { \
+ if (!is_valid) { \
+ return seed; \
+ } \
+ return hash32_buf(reinterpret_cast<const uint8_t*>(in), len, seed); \
+ }
+
+#define HASH64_BUF_OP(NAME, TYPE) \
+ FORCE_INLINE \
+ gdv_int64 NAME##_##TYPE(gdv_##TYPE in, gdv_int32 len, gdv_boolean is_valid) { \
+ return is_valid ? hash64_buf(reinterpret_cast<const uint8_t*>(in), len, 0) : 0; \
+ }
+
+#define HASH32_BUF_OP(NAME, TYPE) \
+ FORCE_INLINE \
+ gdv_int32 NAME##_##TYPE(gdv_##TYPE in, gdv_int32 len, gdv_boolean is_valid) { \
+ return is_valid ? hash32_buf(reinterpret_cast<const uint8_t*>(in), len, 0) : 0; \
+ }
+
+// Expand inner macro for all non-numeric types.
+#define VAR_LEN_TYPES(INNER, NAME) \
+ INNER(NAME, utf8) \
+ INNER(NAME, binary)
+
+VAR_LEN_TYPES(HASH32_BUF_OP, hash)
+VAR_LEN_TYPES(HASH32_BUF_OP, hash32)
+VAR_LEN_TYPES(HASH32_BUF_OP, hash32AsDouble)
+VAR_LEN_TYPES(HASH32_BUF_WITH_SEED_OP, hash32WithSeed)
+VAR_LEN_TYPES(HASH32_BUF_WITH_SEED_OP, hash32AsDoubleWithSeed)
+
+VAR_LEN_TYPES(HASH64_BUF_OP, hash64)
+VAR_LEN_TYPES(HASH64_BUF_OP, hash64AsDouble)
+VAR_LEN_TYPES(HASH64_BUF_WITH_SEED_OP, hash64WithSeed)
+VAR_LEN_TYPES(HASH64_BUF_WITH_SEED_OP, hash64AsDoubleWithSeed)
+
+#undef HASH32_BUF_OP
+#undef HASH32_BUF_WITH_SEED_OP
+#undef HASH32_OP
+#undef HASH32_WITH_SEED_OP
+#undef HASH64_BUF_OP
+#undef HASH64_BUF_WITH_SEED_OP
+#undef HASH64_OP
+#undef HASH64_WITH_SEED_OP
+
+} // extern "C"
diff --git a/src/arrow/cpp/src/gandiva/precompiled/hash_test.cc b/src/arrow/cpp/src/gandiva/precompiled/hash_test.cc
new file mode 100644
index 000000000..0a51dced2
--- /dev/null
+++ b/src/arrow/cpp/src/gandiva/precompiled/hash_test.cc
@@ -0,0 +1,122 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <time.h>
+
+#include <gtest/gtest.h>
+#include "gandiva/precompiled/types.h"
+
+namespace gandiva {
+
+TEST(TestHash, TestHash32) {
+ gdv_int8 s8 = 0;
+ gdv_uint8 u8 = 0;
+ gdv_int16 s16 = 0;
+ gdv_uint16 u16 = 0;
+ gdv_int32 s32 = 0;
+ gdv_uint32 u32 = 0;
+ gdv_int64 s64 = 0;
+ gdv_uint64 u64 = 0;
+ gdv_float32 f32 = 0;
+ gdv_float64 f64 = 0;
+
+ // hash of 0 should be non-zero (zero is the hash value for nulls).
+ gdv_int32 zero_hash = hash32(s8, 0);
+ EXPECT_NE(zero_hash, 0);
+
+ // for a given value, all numeric types must have the same hash.
+ EXPECT_EQ(hash32(u8, 0), zero_hash);
+ EXPECT_EQ(hash32(s16, 0), zero_hash);
+ EXPECT_EQ(hash32(u16, 0), zero_hash);
+ EXPECT_EQ(hash32(s32, 0), zero_hash);
+ EXPECT_EQ(hash32(u32, 0), zero_hash);
+ EXPECT_EQ(hash32(static_cast<double>(s64), 0), zero_hash);
+ EXPECT_EQ(hash32(static_cast<double>(u64), 0), zero_hash);
+ EXPECT_EQ(hash32(f32, 0), zero_hash);
+ EXPECT_EQ(hash32(f64, 0), zero_hash);
+
+ // hash must change with a change in seed.
+ EXPECT_NE(hash32(s8, 1), zero_hash);
+
+ // for a given value and seed, all numeric types must have the same hash.
+ EXPECT_EQ(hash32(s8, 1), hash32(s16, 1));
+ EXPECT_EQ(hash32(s8, 1), hash32(u32, 1));
+ EXPECT_EQ(hash32(s8, 1), hash32(f32, 1));
+ EXPECT_EQ(hash32(s8, 1), hash32(f64, 1));
+}
+
+TEST(TestHash, TestHash64) {
+ gdv_int8 s8 = 0;
+ gdv_uint8 u8 = 0;
+ gdv_int16 s16 = 0;
+ gdv_uint16 u16 = 0;
+ gdv_int32 s32 = 0;
+ gdv_uint32 u32 = 0;
+ gdv_int64 s64 = 0;
+ gdv_uint64 u64 = 0;
+ gdv_float32 f32 = 0;
+ gdv_float64 f64 = 0;
+
+ // hash of 0 should be non-zero (zero is the hash value for nulls).
+ gdv_int64 zero_hash = hash64(s8, 0);
+ EXPECT_NE(zero_hash, 0);
+ EXPECT_NE(hash64(u8, 0), hash32(u8, 0));
+
+ // for a given value, all numeric types must have the same hash.
+ EXPECT_EQ(hash64(u8, 0), zero_hash);
+ EXPECT_EQ(hash64(s16, 0), zero_hash);
+ EXPECT_EQ(hash64(u16, 0), zero_hash);
+ EXPECT_EQ(hash64(s32, 0), zero_hash);
+ EXPECT_EQ(hash64(u32, 0), zero_hash);
+ EXPECT_EQ(hash64(static_cast<double>(s64), 0), zero_hash);
+ EXPECT_EQ(hash64(static_cast<double>(u64), 0), zero_hash);
+ EXPECT_EQ(hash64(f32, 0), zero_hash);
+ EXPECT_EQ(hash64(f64, 0), zero_hash);
+
+ // hash must change with a change in seed.
+ EXPECT_NE(hash64(s8, 1), zero_hash);
+
+ // for a given value and seed, all numeric types must have the same hash.
+ EXPECT_EQ(hash64(s8, 1), hash64(s16, 1));
+ EXPECT_EQ(hash64(s8, 1), hash64(u32, 1));
+ EXPECT_EQ(hash64(s8, 1), hash64(f32, 1));
+}
+
+TEST(TestHash, TestHashBuf) {
+ const char* buf = "hello";
+ int buf_len = 5;
+
+ // hash should be non-zero (zero is the hash value for nulls).
+ EXPECT_NE(hash32_buf((const gdv_uint8*)buf, buf_len, 0), 0);
+ EXPECT_NE(hash64_buf((const gdv_uint8*)buf, buf_len, 0), 0);
+
+ // hash must change if the string is changed.
+ EXPECT_NE(hash32_buf((const gdv_uint8*)buf, buf_len, 0),
+ hash32_buf((const gdv_uint8*)buf, buf_len - 1, 0));
+
+ EXPECT_NE(hash64_buf((const gdv_uint8*)buf, buf_len, 0),
+ hash64_buf((const gdv_uint8*)buf, buf_len - 1, 0));
+
+ // hash must change if the seed is changed.
+ EXPECT_NE(hash32_buf((const gdv_uint8*)buf, buf_len, 0),
+ hash32_buf((const gdv_uint8*)buf, buf_len, 1));
+
+ EXPECT_NE(hash64_buf((const gdv_uint8*)buf, buf_len, 0),
+ hash64_buf((const gdv_uint8*)buf, buf_len, 1));
+}
+
+} // namespace gandiva
diff --git a/src/arrow/cpp/src/gandiva/precompiled/print.cc b/src/arrow/cpp/src/gandiva/precompiled/print.cc
new file mode 100644
index 000000000..ecb90e1a3
--- /dev/null
+++ b/src/arrow/cpp/src/gandiva/precompiled/print.cc
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern "C" {
+
+#include <stdio.h>
+
+#include "./types.h"
+
+int print_double(char* msg, double val) { return printf(msg, val); }
+
+int print_float(char* msg, float val) { return printf(msg, val); }
+
+} // extern "C"
diff --git a/src/arrow/cpp/src/gandiva/precompiled/string_ops.cc b/src/arrow/cpp/src/gandiva/precompiled/string_ops.cc
new file mode 100644
index 000000000..48c24b862
--- /dev/null
+++ b/src/arrow/cpp/src/gandiva/precompiled/string_ops.cc
@@ -0,0 +1,2198 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// String functions
+#include "arrow/util/value_parsing.h"
+
+extern "C" {
+
+#include <algorithm>
+#include <climits>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+#include "./types.h"
+
+FORCE_INLINE
+gdv_int32 octet_length_utf8(const gdv_utf8 input, gdv_int32 length) { return length; }
+
+FORCE_INLINE
+gdv_int32 bit_length_utf8(const gdv_utf8 input, gdv_int32 length) { return length * 8; }
+
+FORCE_INLINE
+gdv_int32 octet_length_binary(const gdv_binary input, gdv_int32 length) { return length; }
+
+FORCE_INLINE
+gdv_int32 bit_length_binary(const gdv_binary input, gdv_int32 length) {
+ return length * 8;
+}
+
+FORCE_INLINE
+int match_string(const char* input, gdv_int32 input_len, gdv_int32 start_pos,
+ const char* delim, gdv_int32 delim_len) {
+ for (int i = start_pos; i < input_len; i++) {
+ int left_chars = input_len - i;
+ if ((left_chars >= delim_len) && memcmp(input + i, delim, delim_len) == 0) {
+ return i + delim_len;
+ }
+ }
+
+ return -1;
+}
+
+FORCE_INLINE
+gdv_int32 mem_compare(const char* left, gdv_int32 left_len, const char* right,
+ gdv_int32 right_len) {
+ int min = left_len;
+ if (right_len < min) {
+ min = right_len;
+ }
+
+ int cmp_ret = memcmp(left, right, min);
+ if (cmp_ret != 0) {
+ return cmp_ret;
+ } else {
+ return left_len - right_len;
+ }
+}
+
+// Expand inner macro for all varlen types.
+#define VAR_LEN_OP_TYPES(INNER, NAME, OP) \
+ INNER(NAME, utf8, OP) \
+ INNER(NAME, binary, OP)
+
+// Relational binary fns : left, right params are same, return is bool.
+#define BINARY_RELATIONAL(NAME, TYPE, OP) \
+ FORCE_INLINE \
+ bool NAME##_##TYPE##_##TYPE(const gdv_##TYPE left, gdv_int32 left_len, \
+ const gdv_##TYPE right, gdv_int32 right_len) { \
+ return mem_compare(left, left_len, right, right_len) OP 0; \
+ }
+
+VAR_LEN_OP_TYPES(BINARY_RELATIONAL, equal, ==)
+VAR_LEN_OP_TYPES(BINARY_RELATIONAL, not_equal, !=)
+VAR_LEN_OP_TYPES(BINARY_RELATIONAL, less_than, <)
+VAR_LEN_OP_TYPES(BINARY_RELATIONAL, less_than_or_equal_to, <=)
+VAR_LEN_OP_TYPES(BINARY_RELATIONAL, greater_than, >)
+VAR_LEN_OP_TYPES(BINARY_RELATIONAL, greater_than_or_equal_to, >=)
+
+#undef BINARY_RELATIONAL
+#undef VAR_LEN_OP_TYPES
+
+// Expand inner macro for all varlen types.
+#define VAR_LEN_TYPES(INNER, NAME) \
+ INNER(NAME, utf8) \
+ INNER(NAME, binary)
+
+FORCE_INLINE
+int to_binary_from_hex(char ch) {
+ if (ch >= 'A' && ch <= 'F') {
+ return 10 + (ch - 'A');
+ } else if (ch >= 'a' && ch <= 'f') {
+ return 10 + (ch - 'a');
+ }
+ return ch - '0';
+}
+
+FORCE_INLINE
+bool starts_with_utf8_utf8(const char* data, gdv_int32 data_len, const char* prefix,
+ gdv_int32 prefix_len) {
+ return ((data_len >= prefix_len) && (memcmp(data, prefix, prefix_len) == 0));
+}
+
+FORCE_INLINE
+bool ends_with_utf8_utf8(const char* data, gdv_int32 data_len, const char* suffix,
+ gdv_int32 suffix_len) {
+ return ((data_len >= suffix_len) &&
+ (memcmp(data + data_len - suffix_len, suffix, suffix_len) == 0));
+}
+
+FORCE_INLINE
+bool is_substr_utf8_utf8(const char* data, int32_t data_len, const char* substr,
+ int32_t substr_len) {
+ for (int32_t i = 0; i <= data_len - substr_len; ++i) {
+ if (memcmp(data + i, substr, substr_len) == 0) {
+ return true;
+ }
+ }
+ return false;
+}
+
+FORCE_INLINE
+gdv_int32 utf8_char_length(char c) {
+ if ((signed char)c >= 0) { // 1-byte char (0x00 ~ 0x7F)
+ return 1;
+ } else if ((c & 0xE0) == 0xC0) { // 2-byte char
+ return 2;
+ } else if ((c & 0xF0) == 0xE0) { // 3-byte char
+ return 3;
+ } else if ((c & 0xF8) == 0xF0) { // 4-byte char
+ return 4;
+ }
+ // invalid char
+ return 0;
+}
+
+FORCE_INLINE
+void set_error_for_invalid_utf(int64_t execution_context, char val) {
+ char const* fmt = "unexpected byte \\%02hhx encountered while decoding utf8 string";
+ int size = static_cast<int>(strlen(fmt)) + 64;
+ char* error = reinterpret_cast<char*>(malloc(size));
+ snprintf(error, size, fmt, (unsigned char)val);
+ gdv_fn_context_set_error_msg(execution_context, error);
+ free(error);
+}
+
+FORCE_INLINE
+bool validate_utf8_following_bytes(const char* data, int32_t data_len,
+ int32_t char_index) {
+ for (int j = 1; j < data_len; ++j) {
+ if ((data[char_index + j] & 0xC0) != 0x80) { // bytes following head-byte of glyph
+ return false;
+ }
+ }
+ return true;
+}
+
+// Count the number of utf8 characters
+// return 0 for invalid/incomplete input byte sequences
+FORCE_INLINE
+gdv_int32 utf8_length(gdv_int64 context, const char* data, gdv_int32 data_len) {
+ int char_len = 0;
+ int count = 0;
+ for (int i = 0; i < data_len; i += char_len) {
+ char_len = utf8_char_length(data[i]);
+ if (char_len == 0 || i + char_len > data_len) { // invalid byte or incomplete glyph
+ set_error_for_invalid_utf(context, data[i]);
+ return 0;
+ }
+ for (int j = 1; j < char_len; ++j) {
+ if ((data[i + j] & 0xC0) != 0x80) { // bytes following head-byte of glyph
+ set_error_for_invalid_utf(context, data[i + j]);
+ return 0;
+ }
+ }
+ ++count;
+ }
+ return count;
+}
+
+// Count the number of utf8 characters, ignoring invalid char, considering size 1
+FORCE_INLINE
+gdv_int32 utf8_length_ignore_invalid(const char* data, gdv_int32 data_len) {
+ int char_len = 0;
+ int count = 0;
+ for (int i = 0; i < data_len; i += char_len) {
+ char_len = utf8_char_length(data[i]);
+ if (char_len == 0 || i + char_len > data_len) { // invalid byte or incomplete glyph
+ // if invalid byte or incomplete glyph, ignore it
+ char_len = 1;
+ }
+ for (int j = 1; j < char_len; ++j) {
+ if ((data[i + j] & 0xC0) != 0x80) { // bytes following head-byte of glyph
+ char_len += 1;
+ }
+ }
+ ++count;
+ }
+ return count;
+}
+
+// Get the byte position corresponding to a character position for a non-empty utf8
+// sequence
+FORCE_INLINE
+gdv_int32 utf8_byte_pos(gdv_int64 context, const char* str, gdv_int32 str_len,
+ gdv_int32 char_pos) {
+ int char_len = 0;
+ int byte_index = 0;
+ for (gdv_int32 char_index = 0; char_index < char_pos && byte_index < str_len;
+ char_index++) {
+ char_len = utf8_char_length(str[byte_index]);
+ if (char_len == 0 ||
+ byte_index + char_len > str_len) { // invalid byte or incomplete glyph
+ set_error_for_invalid_utf(context, str[byte_index]);
+ return -1;
+ }
+ byte_index += char_len;
+ }
+ return byte_index;
+}
+
+#define UTF8_LENGTH(NAME, TYPE) \
+ FORCE_INLINE \
+ gdv_int32 NAME##_##TYPE(gdv_int64 context, gdv_##TYPE in, gdv_int32 in_len) { \
+ return utf8_length(context, in, in_len); \
+ }
+
+UTF8_LENGTH(char_length, utf8)
+UTF8_LENGTH(length, utf8)
+UTF8_LENGTH(lengthUtf8, binary)
+
+// Returns a string of 'n' spaces.
+#define SPACE_STR(IN_TYPE) \
+ GANDIVA_EXPORT \
+ const char* space_##IN_TYPE(gdv_int64 ctx, gdv_##IN_TYPE n, int32_t* out_len) { \
+ gdv_int32 n_times = static_cast<gdv_int32>(n); \
+ if (n_times <= 0) { \
+ *out_len = 0; \
+ return ""; \
+ } \
+ char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(ctx, n_times)); \
+ if (ret == nullptr) { \
+ gdv_fn_context_set_error_msg(ctx, "Could not allocate memory for output string"); \
+ *out_len = 0; \
+ return ""; \
+ } \
+ for (int i = 0; i < n_times; i++) { \
+ ret[i] = ' '; \
+ } \
+ *out_len = n_times; \
+ return ret; \
+ }
+
+SPACE_STR(int32)
+SPACE_STR(int64)
+
+// Reverse a utf8 sequence
+FORCE_INLINE
+const char* reverse_utf8(gdv_int64 context, const char* data, gdv_int32 data_len,
+ int32_t* out_len) {
+ if (data_len == 0) {
+ *out_len = 0;
+ return "";
+ }
+
+ char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, data_len));
+ if (ret == nullptr) {
+ gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+ *out_len = 0;
+ return "";
+ }
+
+ gdv_int32 char_len;
+ for (gdv_int32 i = 0; i < data_len; i += char_len) {
+ char_len = utf8_char_length(data[i]);
+
+ if (char_len == 0 || i + char_len > data_len) { // invalid byte or incomplete glyph
+ set_error_for_invalid_utf(context, data[i]);
+ *out_len = 0;
+ return "";
+ }
+
+ for (gdv_int32 j = 0; j < char_len; ++j) {
+ if (j > 0 && (data[i + j] & 0xC0) != 0x80) { // bytes following head-byte of glyph
+ set_error_for_invalid_utf(context, data[i + j]);
+ *out_len = 0;
+ return "";
+ }
+ ret[data_len - i - char_len + j] = data[i + j];
+ }
+ }
+ *out_len = data_len;
+ return ret;
+}
+
+// Trims whitespaces from the left end of the input utf8 sequence
+FORCE_INLINE
+const char* ltrim_utf8(gdv_int64 context, const char* data, gdv_int32 data_len,
+ int32_t* out_len) {
+ if (data_len == 0) {
+ *out_len = 0;
+ return "";
+ }
+
+ gdv_int32 start = 0;
+ // start denotes the first position of non-space characters in the input string
+ while (start < data_len && data[start] == ' ') {
+ ++start;
+ }
+
+ *out_len = data_len - start;
+ return data + start;
+}
+
+// Trims whitespaces from the right end of the input utf8 sequence
+FORCE_INLINE
+const char* rtrim_utf8(gdv_int64 context, const char* data, gdv_int32 data_len,
+ int32_t* out_len) {
+ if (data_len == 0) {
+ *out_len = 0;
+ return "";
+ }
+
+ gdv_int32 end = data_len - 1;
+ // end denotes the last position of non-space characters in the input string
+ while (end >= 0 && data[end] == ' ') {
+ --end;
+ }
+
+ *out_len = end + 1;
+ return data;
+}
+
+// Trims whitespaces from both the ends of the input utf8 sequence
+FORCE_INLINE
+const char* btrim_utf8(gdv_int64 context, const char* data, gdv_int32 data_len,
+ int32_t* out_len) {
+ if (data_len == 0) {
+ *out_len = 0;
+ return "";
+ }
+
+ gdv_int32 start = 0, end = data_len - 1;
+ // start and end denote the first and last positions of non-space
+ // characters in the input string respectively
+ while (start <= end && data[start] == ' ') {
+ ++start;
+ }
+ while (end >= start && data[end] == ' ') {
+ --end;
+ }
+
+ // string has some leading/trailing spaces and some non-space characters
+ *out_len = end - start + 1;
+ return data + start;
+}
+
+// Trims characters present in the trim text from the left end of the base text
+FORCE_INLINE
+const char* ltrim_utf8_utf8(gdv_int64 context, const char* basetext,
+ gdv_int32 basetext_len, const char* trimtext,
+ gdv_int32 trimtext_len, int32_t* out_len) {
+ if (basetext_len == 0) {
+ *out_len = 0;
+ return "";
+ } else if (trimtext_len == 0) {
+ *out_len = basetext_len;
+ return basetext;
+ }
+
+ gdv_int32 start_ptr, char_len;
+ // scan the base text from left to right and increment the start pointer till
+ // there is a character which is not present in the trim text
+ for (start_ptr = 0; start_ptr < basetext_len; start_ptr += char_len) {
+ char_len = utf8_char_length(basetext[start_ptr]);
+ if (char_len == 0 || start_ptr + char_len > basetext_len) {
+ // invalid byte or incomplete glyph
+ set_error_for_invalid_utf(context, basetext[start_ptr]);
+ *out_len = 0;
+ return "";
+ }
+ if (!is_substr_utf8_utf8(trimtext, trimtext_len, basetext + start_ptr, char_len)) {
+ break;
+ }
+ }
+
+ *out_len = basetext_len - start_ptr;
+ return basetext + start_ptr;
+}
+
+// Trims characters present in the trim text from the right end of the base text
+FORCE_INLINE
+const char* rtrim_utf8_utf8(gdv_int64 context, const char* basetext,
+ gdv_int32 basetext_len, const char* trimtext,
+ gdv_int32 trimtext_len, int32_t* out_len) {
+ if (basetext_len == 0) {
+ *out_len = 0;
+ return "";
+ } else if (trimtext_len == 0) {
+ *out_len = basetext_len;
+ return basetext;
+ }
+
+ gdv_int32 char_len, end_ptr, byte_cnt = 1;
+ // scan the base text from right to left and decrement the end pointer till
+ // there is a character which is not present in the trim text
+ for (end_ptr = basetext_len - 1; end_ptr >= 0; --end_ptr) {
+ char_len = utf8_char_length(basetext[end_ptr]);
+ if (char_len == 0) { // trailing bytes of multibyte character
+ ++byte_cnt;
+ continue;
+ }
+ // this is the first byte of a character, hence check if char_len = char_cnt
+ if (byte_cnt != char_len) { // invalid byte or incomplete glyph
+ set_error_for_invalid_utf(context, basetext[end_ptr]);
+ *out_len = 0;
+ return "";
+ }
+ byte_cnt = 1; // reset the counter*/
+ if (!is_substr_utf8_utf8(trimtext, trimtext_len, basetext + end_ptr, char_len)) {
+ break;
+ }
+ }
+
+ // when all characters in the basetext are part of the trimtext
+ if (end_ptr == -1) {
+ *out_len = 0;
+ return "";
+ }
+
+ end_ptr += utf8_char_length(basetext[end_ptr]); // point to the next character
+ *out_len = end_ptr;
+ return basetext;
+}
+
+// Trims characters present in the trim text from both ends of the base text
+FORCE_INLINE
+const char* btrim_utf8_utf8(gdv_int64 context, const char* basetext,
+ gdv_int32 basetext_len, const char* trimtext,
+ gdv_int32 trimtext_len, int32_t* out_len) {
+ if (basetext_len == 0) {
+ *out_len = 0;
+ return "";
+ } else if (trimtext_len == 0) {
+ *out_len = basetext_len;
+ return basetext;
+ }
+
+ gdv_int32 start_ptr, end_ptr, char_len, byte_cnt = 1;
+ // scan the base text from left to right and increment the start and decrement the
+ // end pointers till there are characters which are not present in the trim text
+ for (start_ptr = 0; start_ptr < basetext_len; start_ptr += char_len) {
+ char_len = utf8_char_length(basetext[start_ptr]);
+ if (char_len == 0 || start_ptr + char_len > basetext_len) {
+ // invalid byte or incomplete glyph
+ set_error_for_invalid_utf(context, basetext[start_ptr]);
+ *out_len = 0;
+ return "";
+ }
+ if (!is_substr_utf8_utf8(trimtext, trimtext_len, basetext + start_ptr, char_len)) {
+ break;
+ }
+ }
+ for (end_ptr = basetext_len - 1; end_ptr >= start_ptr; --end_ptr) {
+ char_len = utf8_char_length(basetext[end_ptr]);
+ if (char_len == 0) { // trailing byte in multibyte character
+ ++byte_cnt;
+ continue;
+ }
+ // this is the first byte of a character, hence check if char_len = char_cnt
+ if (byte_cnt != char_len) { // invalid byte or incomplete glyph
+ set_error_for_invalid_utf(context, basetext[end_ptr]);
+ *out_len = 0;
+ return "";
+ }
+ byte_cnt = 1; // reset the counter*/
+ if (!is_substr_utf8_utf8(trimtext, trimtext_len, basetext + end_ptr, char_len)) {
+ break;
+ }
+ }
+
+ // when all characters are trimmed, start_ptr has been incremented to basetext_len and
+ // end_ptr still points to basetext_len - 1, hence we need to handle this case
+ if (start_ptr > end_ptr) {
+ *out_len = 0;
+ return "";
+ }
+
+ end_ptr += utf8_char_length(basetext[end_ptr]); // point to the next character
+ *out_len = end_ptr - start_ptr;
+ return basetext + start_ptr;
+}
+
+FORCE_INLINE
+gdv_boolean compare_lower_strings(const char* base_str, gdv_int32 base_str_len,
+ const char* str, gdv_int32 str_len) {
+ if (base_str_len != str_len) {
+ return false;
+ }
+ for (int i = 0; i < str_len; i++) {
+ // convert char to lower
+ char cur = str[i];
+ // 'A' - 'Z' : 0x41 - 0x5a
+ // 'a' - 'z' : 0x61 - 0x7a
+ if (cur >= 0x41 && cur <= 0x5a) {
+ cur = static_cast<char>(cur + 0x20);
+ }
+ // if the character does not match, break the flow
+ if (cur != base_str[i]) break;
+ // if the character matches and it is the last iteration, return true
+ if (i == str_len - 1) return true;
+ }
+ return false;
+}
+
+// Try to cast the received string ('0', '1', 'true', 'false'), ignoring leading
+// and trailing spaces, also ignoring lower and upper case.
+FORCE_INLINE
+gdv_boolean castBIT_utf8(gdv_int64 context, const char* data, gdv_int32 data_len) {
+ if (data_len <= 0) {
+ gdv_fn_context_set_error_msg(context, "Invalid value for boolean.");
+ return false;
+ }
+
+ // trim leading and trailing spaces
+ int32_t trimmed_len;
+ int32_t start = 0, end = data_len - 1;
+ while (start <= end && data[start] == ' ') {
+ ++start;
+ }
+ while (end >= start && data[end] == ' ') {
+ --end;
+ }
+ trimmed_len = end - start + 1;
+ const char* trimmed_data = data + start;
+
+ // compare received string with the valid bool string values '1', '0', 'true', 'false'
+ if (trimmed_len == 1) {
+ // case for '0' and '1' value
+ if (trimmed_data[0] == '1') return true;
+ if (trimmed_data[0] == '0') return false;
+ } else if (trimmed_len == 4) {
+ // case for matching 'true'
+ if (compare_lower_strings("true", 4, trimmed_data, trimmed_len)) return true;
+ } else if (trimmed_len == 5) {
+ // case for matching 'false'
+ if (compare_lower_strings("false", 5, trimmed_data, trimmed_len)) return false;
+ }
+ // if no 'true', 'false', '0' or '1' value is found, set an error
+ gdv_fn_context_set_error_msg(context, "Invalid value for boolean.");
+ return false;
+}
+
+FORCE_INLINE
+const char* castVARCHAR_bool_int64(gdv_int64 context, gdv_boolean value,
+ gdv_int64 out_len, gdv_int32* out_length) {
+ gdv_int32 len = static_cast<gdv_int32>(out_len);
+ if (len < 0) {
+ gdv_fn_context_set_error_msg(context, "Output buffer length can't be negative");
+ *out_length = 0;
+ return "";
+ }
+ const char* out =
+ reinterpret_cast<const char*>(gdv_fn_context_arena_malloc(context, 5));
+ out = value ? "true" : "false";
+ *out_length = value ? ((len > 4) ? 4 : len) : ((len > 5) ? 5 : len);
+ return out;
+}
+
+// Truncates the string to given length
+#define CAST_VARCHAR_FROM_VARLEN_TYPE(TYPE) \
+ FORCE_INLINE \
+ const char* castVARCHAR_##TYPE##_int64(gdv_int64 context, const char* data, \
+ gdv_int32 data_len, int64_t out_len, \
+ int32_t* out_length) { \
+ int32_t len = static_cast<int32_t>(out_len); \
+ \
+ if (len < 0) { \
+ gdv_fn_context_set_error_msg(context, "Output buffer length can't be negative"); \
+ *out_length = 0; \
+ return ""; \
+ } \
+ \
+ if (len >= data_len || len == 0) { \
+ *out_length = data_len; \
+ return data; \
+ } \
+ \
+ int32_t remaining = len; \
+ int32_t index = 0; \
+ bool is_multibyte = false; \
+ do { \
+ /* In utf8, MSB of a single byte unicode char is always 0, \
+ * whereas for a multibyte character the MSB of each byte is 1. \
+ * So for a single byte char, a bitwise-and with x80 (10000000) will be 0 \
+ * and it won't be 0 for bytes of a multibyte char. \
+ */ \
+ char* data_ptr = const_cast<char*>(data); \
+ \
+ /* advance byte by byte till the 8-byte boundary then advance 8 bytes */ \
+ auto num_bytes = reinterpret_cast<uintptr_t>(data_ptr) & 0x07; \
+ num_bytes = (8 - num_bytes) & 0x07; \
+ while (num_bytes > 0) { \
+ uint8_t* ptr = reinterpret_cast<uint8_t*>(data_ptr + index); \
+ if ((*ptr & 0x80) != 0) { \
+ is_multibyte = true; \
+ break; \
+ } \
+ index++; \
+ remaining--; \
+ num_bytes--; \
+ } \
+ if (is_multibyte) break; \
+ while (remaining >= 8) { \
+ uint64_t* ptr = reinterpret_cast<uint64_t*>(data_ptr + index); \
+ if ((*ptr & 0x8080808080808080) != 0) { \
+ is_multibyte = true; \
+ break; \
+ } \
+ index += 8; \
+ remaining -= 8; \
+ } \
+ if (is_multibyte) break; \
+ if (remaining >= 4) { \
+ uint32_t* ptr = reinterpret_cast<uint32_t*>(data_ptr + index); \
+ if ((*ptr & 0x80808080) != 0) break; \
+ index += 4; \
+ remaining -= 4; \
+ } \
+ while (remaining > 0) { \
+ uint8_t* ptr = reinterpret_cast<uint8_t*>(data_ptr + index); \
+ if ((*ptr & 0x80) != 0) { \
+ is_multibyte = true; \
+ break; \
+ } \
+ index++; \
+ remaining--; \
+ } \
+ if (is_multibyte) break; \
+ /* reached here; all are single byte characters */ \
+ *out_length = len; \
+ return data; \
+ } while (false); \
+ \
+ /* detected multibyte utf8 characters; slow path */ \
+ int32_t byte_pos = \
+ utf8_byte_pos(context, data + index, data_len - index, len - index); \
+ if (byte_pos < 0) { \
+ *out_length = 0; \
+ return ""; \
+ } \
+ \
+ *out_length = index + byte_pos; \
+ return data; \
+ }
+
+CAST_VARCHAR_FROM_VARLEN_TYPE(utf8)
+CAST_VARCHAR_FROM_VARLEN_TYPE(binary)
+
+#undef CAST_VARCHAR_FROM_VARLEN_TYPE
+
+// Add functions for castVARBINARY
+#define CAST_VARBINARY_FROM_STRING_AND_BINARY(TYPE) \
+ GANDIVA_EXPORT \
+ const char* castVARBINARY_##TYPE##_int64(gdv_int64 context, const char* data, \
+ gdv_int32 data_len, int64_t out_len, \
+ int32_t* out_length) { \
+ int32_t len = static_cast<int32_t>(out_len); \
+ if (len < 0) { \
+ gdv_fn_context_set_error_msg(context, "Output buffer length can't be negative"); \
+ *out_length = 0; \
+ return ""; \
+ } \
+ \
+ if (len >= data_len || len == 0) { \
+ *out_length = data_len; \
+ } else { \
+ *out_length = len; \
+ } \
+ return data; \
+ }
+
+CAST_VARBINARY_FROM_STRING_AND_BINARY(utf8)
+CAST_VARBINARY_FROM_STRING_AND_BINARY(binary)
+
+#undef CAST_VARBINARY_FROM_STRING_AND_BINARY
+
+#define IS_NULL(NAME, TYPE) \
+ FORCE_INLINE \
+ bool NAME##_##TYPE(gdv_##TYPE in, gdv_int32 len, gdv_boolean is_valid) { \
+ return !is_valid; \
+ }
+
+VAR_LEN_TYPES(IS_NULL, isnull)
+
+#undef IS_NULL
+
+#define IS_NOT_NULL(NAME, TYPE) \
+ FORCE_INLINE \
+ bool NAME##_##TYPE(gdv_##TYPE in, gdv_int32 len, gdv_boolean is_valid) { \
+ return is_valid; \
+ }
+
+VAR_LEN_TYPES(IS_NOT_NULL, isnotnull)
+
+#undef IS_NOT_NULL
+#undef VAR_LEN_TYPES
+
+/*
+ We follow Oracle semantics for offset:
+ - If position is positive, then the first glyph in the substring is determined by
+ counting that many glyphs forward from the beginning of the input. (i.e., for position ==
+ 1 the first glyph in the substring will be identical to the first glyph in the input)
+
+ - If position is negative, then the first glyph in the substring is determined by
+ counting that many glyphs backward from the end of the input. (i.e., for position == -1
+ the first glyph in the substring will be identical to the last glyph in the input)
+
+ - If position is 0 then it is treated as 1.
+ */
+FORCE_INLINE
+const char* substr_utf8_int64_int64(gdv_int64 context, const char* input,
+ gdv_int32 in_data_len, gdv_int64 position,
+ gdv_int64 substring_length, gdv_int32* out_data_len) {
+ if (substring_length <= 0 || input == nullptr || in_data_len <= 0) {
+ *out_data_len = 0;
+ return "";
+ }
+
+ gdv_int64 in_glyphs_count =
+ static_cast<gdv_int64>(utf8_length(context, input, in_data_len));
+
+ // in_glyphs_count is zero if input has invalid glyphs
+ if (in_glyphs_count == 0) {
+ *out_data_len = 0;
+ return "";
+ }
+
+ gdv_int64 from_glyph; // from_glyph==0 indicates the first glyph of the input
+ if (position > 0) {
+ from_glyph = position - 1;
+ } else if (position < 0) {
+ from_glyph = in_glyphs_count + position;
+ } else {
+ from_glyph = 0;
+ }
+
+ if (from_glyph < 0 || from_glyph >= in_glyphs_count) {
+ *out_data_len = 0;
+ return "";
+ }
+
+ gdv_int64 out_glyphs_count = substring_length;
+ if (substring_length > in_glyphs_count - from_glyph) {
+ out_glyphs_count = in_glyphs_count - from_glyph;
+ }
+
+ gdv_int64 in_data_len64 = static_cast<gdv_int64>(in_data_len);
+ gdv_int64 start_pos = 0;
+ gdv_int64 end_pos = in_data_len64;
+
+ gdv_int64 current_glyph = 0;
+ gdv_int64 pos = 0;
+ while (pos < in_data_len64) {
+ if (current_glyph == from_glyph) {
+ start_pos = pos;
+ }
+ pos += static_cast<gdv_int64>(utf8_char_length(input[pos]));
+ if (current_glyph - from_glyph + 1 == out_glyphs_count) {
+ end_pos = pos;
+ }
+ current_glyph++;
+ }
+
+ if (end_pos > in_data_len64 || end_pos > INT_MAX) {
+ end_pos = in_data_len64;
+ }
+
+ *out_data_len = static_cast<gdv_int32>(end_pos - start_pos);
+ char* ret =
+ reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_data_len));
+ if (ret == nullptr) {
+ gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+ *out_data_len = 0;
+ return "";
+ }
+ memcpy(ret, input + start_pos, *out_data_len);
+ return ret;
+}
+
+FORCE_INLINE
+const char* substr_utf8_int64(gdv_int64 context, const char* input, gdv_int32 in_len,
+ gdv_int64 offset64, gdv_int32* out_len) {
+ return substr_utf8_int64_int64(context, input, in_len, offset64, in_len, out_len);
+}
+
+FORCE_INLINE
+const char* repeat_utf8_int32(gdv_int64 context, const char* in, gdv_int32 in_len,
+ gdv_int32 repeat_number, gdv_int32* out_len) {
+ // if the repeat number is zero, then return empty string
+ if (repeat_number == 0 || in_len <= 0) {
+ *out_len = 0;
+ return "";
+ }
+ // if the repeat number is a negative number, an error is set on context
+ if (repeat_number < 0) {
+ gdv_fn_context_set_error_msg(context, "Repeat number can't be negative");
+ *out_len = 0;
+ return "";
+ }
+ *out_len = repeat_number * in_len;
+ char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+ if (ret == nullptr) {
+ gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+ *out_len = 0;
+ return "";
+ }
+ for (int i = 0; i < repeat_number; ++i) {
+ memcpy(ret + (i * in_len), in, in_len);
+ }
+ return ret;
+}
+
+FORCE_INLINE
+const char* concat_utf8_utf8(gdv_int64 context, const char* left, gdv_int32 left_len,
+ bool left_validity, const char* right, gdv_int32 right_len,
+ bool right_validity, gdv_int32* out_len) {
+ if (!left_validity) {
+ left_len = 0;
+ }
+ if (!right_validity) {
+ right_len = 0;
+ }
+ return concatOperator_utf8_utf8(context, left, left_len, right, right_len, out_len);
+}
+
+FORCE_INLINE
+const char* concatOperator_utf8_utf8(gdv_int64 context, const char* left,
+ gdv_int32 left_len, const char* right,
+ gdv_int32 right_len, gdv_int32* out_len) {
+ *out_len = left_len + right_len;
+ if (*out_len <= 0) {
+ *out_len = 0;
+ return "";
+ }
+ char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+ if (ret == nullptr) {
+ gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+ *out_len = 0;
+ return "";
+ }
+ memcpy(ret, left, left_len);
+ memcpy(ret + left_len, right, right_len);
+ return ret;
+}
+
+FORCE_INLINE
+const char* concat_utf8_utf8_utf8(gdv_int64 context, const char* in1, gdv_int32 in1_len,
+ bool in1_validity, const char* in2, gdv_int32 in2_len,
+ bool in2_validity, const char* in3, gdv_int32 in3_len,
+ bool in3_validity, gdv_int32* out_len) {
+ if (!in1_validity) {
+ in1_len = 0;
+ }
+ if (!in2_validity) {
+ in2_len = 0;
+ }
+ if (!in3_validity) {
+ in3_len = 0;
+ }
+ return concatOperator_utf8_utf8_utf8(context, in1, in1_len, in2, in2_len, in3, in3_len,
+ out_len);
+}
+
+FORCE_INLINE
+const char* concatOperator_utf8_utf8_utf8(gdv_int64 context, const char* in1,
+ gdv_int32 in1_len, const char* in2,
+ gdv_int32 in2_len, const char* in3,
+ gdv_int32 in3_len, gdv_int32* out_len) {
+ *out_len = in1_len + in2_len + in3_len;
+ if (*out_len <= 0) {
+ *out_len = 0;
+ return "";
+ }
+ char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+ if (ret == nullptr) {
+ gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+ *out_len = 0;
+ return "";
+ }
+ memcpy(ret, in1, in1_len);
+ memcpy(ret + in1_len, in2, in2_len);
+ memcpy(ret + in1_len + in2_len, in3, in3_len);
+ return ret;
+}
+
+FORCE_INLINE
+const char* concat_utf8_utf8_utf8_utf8(gdv_int64 context, const char* in1,
+ gdv_int32 in1_len, bool in1_validity,
+ const char* in2, gdv_int32 in2_len,
+ bool in2_validity, const char* in3,
+ gdv_int32 in3_len, bool in3_validity,
+ const char* in4, gdv_int32 in4_len,
+ bool in4_validity, gdv_int32* out_len) {
+ if (!in1_validity) {
+ in1_len = 0;
+ }
+ if (!in2_validity) {
+ in2_len = 0;
+ }
+ if (!in3_validity) {
+ in3_len = 0;
+ }
+ if (!in4_validity) {
+ in4_len = 0;
+ }
+ return concatOperator_utf8_utf8_utf8_utf8(context, in1, in1_len, in2, in2_len, in3,
+ in3_len, in4, in4_len, out_len);
+}
+
+FORCE_INLINE
+const char* concatOperator_utf8_utf8_utf8_utf8(gdv_int64 context, const char* in1,
+ gdv_int32 in1_len, const char* in2,
+ gdv_int32 in2_len, const char* in3,
+ gdv_int32 in3_len, const char* in4,
+ gdv_int32 in4_len, gdv_int32* out_len) {
+ *out_len = in1_len + in2_len + in3_len + in4_len;
+ if (*out_len <= 0) {
+ *out_len = 0;
+ return "";
+ }
+ char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+ if (ret == nullptr) {
+ gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+ *out_len = 0;
+ return "";
+ }
+ memcpy(ret, in1, in1_len);
+ memcpy(ret + in1_len, in2, in2_len);
+ memcpy(ret + in1_len + in2_len, in3, in3_len);
+ memcpy(ret + in1_len + in2_len + in3_len, in4, in4_len);
+ return ret;
+}
+
+FORCE_INLINE
+const char* concat_utf8_utf8_utf8_utf8_utf8(
+ gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity,
+ const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3,
+ gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len,
+ bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity,
+ gdv_int32* out_len) {
+ if (!in1_validity) {
+ in1_len = 0;
+ }
+ if (!in2_validity) {
+ in2_len = 0;
+ }
+ if (!in3_validity) {
+ in3_len = 0;
+ }
+ if (!in4_validity) {
+ in4_len = 0;
+ }
+ if (!in5_validity) {
+ in5_len = 0;
+ }
+ return concatOperator_utf8_utf8_utf8_utf8_utf8(context, in1, in1_len, in2, in2_len, in3,
+ in3_len, in4, in4_len, in5, in5_len,
+ out_len);
+}
+
+FORCE_INLINE
+const char* concatOperator_utf8_utf8_utf8_utf8_utf8(
+ gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2,
+ gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4,
+ gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, gdv_int32* out_len) {
+ *out_len = in1_len + in2_len + in3_len + in4_len + in5_len;
+ if (*out_len <= 0) {
+ *out_len = 0;
+ return "";
+ }
+ char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+ if (ret == nullptr) {
+ gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+ *out_len = 0;
+ return "";
+ }
+ memcpy(ret, in1, in1_len);
+ memcpy(ret + in1_len, in2, in2_len);
+ memcpy(ret + in1_len + in2_len, in3, in3_len);
+ memcpy(ret + in1_len + in2_len + in3_len, in4, in4_len);
+ memcpy(ret + in1_len + in2_len + in3_len + in4_len, in5, in5_len);
+ return ret;
+}
+
+FORCE_INLINE
+const char* concat_utf8_utf8_utf8_utf8_utf8_utf8(
+ gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity,
+ const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3,
+ gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len,
+ bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity,
+ const char* in6, gdv_int32 in6_len, bool in6_validity, gdv_int32* out_len) {
+ if (!in1_validity) {
+ in1_len = 0;
+ }
+ if (!in2_validity) {
+ in2_len = 0;
+ }
+ if (!in3_validity) {
+ in3_len = 0;
+ }
+ if (!in4_validity) {
+ in4_len = 0;
+ }
+ if (!in5_validity) {
+ in5_len = 0;
+ }
+ if (!in6_validity) {
+ in6_len = 0;
+ }
+ return concatOperator_utf8_utf8_utf8_utf8_utf8_utf8(context, in1, in1_len, in2, in2_len,
+ in3, in3_len, in4, in4_len, in5,
+ in5_len, in6, in6_len, out_len);
+}
+
+FORCE_INLINE
+const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8(
+ gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2,
+ gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4,
+ gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, const char* in6,
+ gdv_int32 in6_len, gdv_int32* out_len) {
+ *out_len = in1_len + in2_len + in3_len + in4_len + in5_len + in6_len;
+ if (*out_len <= 0) {
+ *out_len = 0;
+ return "";
+ }
+ char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+ if (ret == nullptr) {
+ gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+ *out_len = 0;
+ return "";
+ }
+ memcpy(ret, in1, in1_len);
+ memcpy(ret + in1_len, in2, in2_len);
+ memcpy(ret + in1_len + in2_len, in3, in3_len);
+ memcpy(ret + in1_len + in2_len + in3_len, in4, in4_len);
+ memcpy(ret + in1_len + in2_len + in3_len + in4_len, in5, in5_len);
+ memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len, in6, in6_len);
+ return ret;
+}
+
+FORCE_INLINE
+const char* concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity,
+ const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3,
+ gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len,
+ bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity,
+ const char* in6, gdv_int32 in6_len, bool in6_validity, const char* in7,
+ gdv_int32 in7_len, bool in7_validity, gdv_int32* out_len) {
+ if (!in1_validity) {
+ in1_len = 0;
+ }
+ if (!in2_validity) {
+ in2_len = 0;
+ }
+ if (!in3_validity) {
+ in3_len = 0;
+ }
+ if (!in4_validity) {
+ in4_len = 0;
+ }
+ if (!in5_validity) {
+ in5_len = 0;
+ }
+ if (!in6_validity) {
+ in6_len = 0;
+ }
+ if (!in7_validity) {
+ in7_len = 0;
+ }
+ return concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ context, in1, in1_len, in2, in2_len, in3, in3_len, in4, in4_len, in5, in5_len, in6,
+ in6_len, in7, in7_len, out_len);
+}
+
+FORCE_INLINE
+const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2,
+ gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4,
+ gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, const char* in6,
+ gdv_int32 in6_len, const char* in7, gdv_int32 in7_len, gdv_int32* out_len) {
+ *out_len = in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len;
+ if (*out_len <= 0) {
+ *out_len = 0;
+ return "";
+ }
+ char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+ if (ret == nullptr) {
+ gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+ *out_len = 0;
+ return "";
+ }
+ memcpy(ret, in1, in1_len);
+ memcpy(ret + in1_len, in2, in2_len);
+ memcpy(ret + in1_len + in2_len, in3, in3_len);
+ memcpy(ret + in1_len + in2_len + in3_len, in4, in4_len);
+ memcpy(ret + in1_len + in2_len + in3_len + in4_len, in5, in5_len);
+ memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len, in6, in6_len);
+ memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len, in7, in7_len);
+ return ret;
+}
+
+FORCE_INLINE
+const char* concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity,
+ const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3,
+ gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len,
+ bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity,
+ const char* in6, gdv_int32 in6_len, bool in6_validity, const char* in7,
+ gdv_int32 in7_len, bool in7_validity, const char* in8, gdv_int32 in8_len,
+ bool in8_validity, gdv_int32* out_len) {
+ if (!in1_validity) {
+ in1_len = 0;
+ }
+ if (!in2_validity) {
+ in2_len = 0;
+ }
+ if (!in3_validity) {
+ in3_len = 0;
+ }
+ if (!in4_validity) {
+ in4_len = 0;
+ }
+ if (!in5_validity) {
+ in5_len = 0;
+ }
+ if (!in6_validity) {
+ in6_len = 0;
+ }
+ if (!in7_validity) {
+ in7_len = 0;
+ }
+ if (!in8_validity) {
+ in8_len = 0;
+ }
+ return concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ context, in1, in1_len, in2, in2_len, in3, in3_len, in4, in4_len, in5, in5_len, in6,
+ in6_len, in7, in7_len, in8, in8_len, out_len);
+}
+
+FORCE_INLINE
+const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2,
+ gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4,
+ gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, const char* in6,
+ gdv_int32 in6_len, const char* in7, gdv_int32 in7_len, const char* in8,
+ gdv_int32 in8_len, gdv_int32* out_len) {
+ *out_len =
+ in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len + in8_len;
+ if (*out_len <= 0) {
+ *out_len = 0;
+ return "";
+ }
+ char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+ if (ret == nullptr) {
+ gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+ *out_len = 0;
+ return "";
+ }
+ memcpy(ret, in1, in1_len);
+ memcpy(ret + in1_len, in2, in2_len);
+ memcpy(ret + in1_len + in2_len, in3, in3_len);
+ memcpy(ret + in1_len + in2_len + in3_len, in4, in4_len);
+ memcpy(ret + in1_len + in2_len + in3_len + in4_len, in5, in5_len);
+ memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len, in6, in6_len);
+ memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len, in7, in7_len);
+ memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len, in8,
+ in8_len);
+ return ret;
+}
+
+FORCE_INLINE
+const char* concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity,
+ const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3,
+ gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len,
+ bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity,
+ const char* in6, gdv_int32 in6_len, bool in6_validity, const char* in7,
+ gdv_int32 in7_len, bool in7_validity, const char* in8, gdv_int32 in8_len,
+ bool in8_validity, const char* in9, gdv_int32 in9_len, bool in9_validity,
+ gdv_int32* out_len) {
+ if (!in1_validity) {
+ in1_len = 0;
+ }
+ if (!in2_validity) {
+ in2_len = 0;
+ }
+ if (!in3_validity) {
+ in3_len = 0;
+ }
+ if (!in4_validity) {
+ in4_len = 0;
+ }
+ if (!in5_validity) {
+ in5_len = 0;
+ }
+ if (!in6_validity) {
+ in6_len = 0;
+ }
+ if (!in7_validity) {
+ in7_len = 0;
+ }
+ if (!in8_validity) {
+ in8_len = 0;
+ }
+ if (!in9_validity) {
+ in9_len = 0;
+ }
+ return concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ context, in1, in1_len, in2, in2_len, in3, in3_len, in4, in4_len, in5, in5_len, in6,
+ in6_len, in7, in7_len, in8, in8_len, in9, in9_len, out_len);
+}
+
+FORCE_INLINE
+const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2,
+ gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4,
+ gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, const char* in6,
+ gdv_int32 in6_len, const char* in7, gdv_int32 in7_len, const char* in8,
+ gdv_int32 in8_len, const char* in9, gdv_int32 in9_len, gdv_int32* out_len) {
+ *out_len = in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len +
+ in8_len + in9_len;
+ if (*out_len <= 0) {
+ *out_len = 0;
+ return "";
+ }
+ char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+ if (ret == nullptr) {
+ gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+ *out_len = 0;
+ return "";
+ }
+ memcpy(ret, in1, in1_len);
+ memcpy(ret + in1_len, in2, in2_len);
+ memcpy(ret + in1_len + in2_len, in3, in3_len);
+ memcpy(ret + in1_len + in2_len + in3_len, in4, in4_len);
+ memcpy(ret + in1_len + in2_len + in3_len + in4_len, in5, in5_len);
+ memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len, in6, in6_len);
+ memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len, in7, in7_len);
+ memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len, in8,
+ in8_len);
+ memcpy(
+ ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len + in8_len,
+ in9, in9_len);
+ return ret;
+}
+
+FORCE_INLINE
+const char* concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity,
+ const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3,
+ gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len,
+ bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity,
+ const char* in6, gdv_int32 in6_len, bool in6_validity, const char* in7,
+ gdv_int32 in7_len, bool in7_validity, const char* in8, gdv_int32 in8_len,
+ bool in8_validity, const char* in9, gdv_int32 in9_len, bool in9_validity,
+ const char* in10, gdv_int32 in10_len, bool in10_validity, gdv_int32* out_len) {
+ if (!in1_validity) {
+ in1_len = 0;
+ }
+ if (!in2_validity) {
+ in2_len = 0;
+ }
+ if (!in3_validity) {
+ in3_len = 0;
+ }
+ if (!in4_validity) {
+ in4_len = 0;
+ }
+ if (!in5_validity) {
+ in5_len = 0;
+ }
+ if (!in6_validity) {
+ in6_len = 0;
+ }
+ if (!in7_validity) {
+ in7_len = 0;
+ }
+ if (!in8_validity) {
+ in8_len = 0;
+ }
+ if (!in9_validity) {
+ in9_len = 0;
+ }
+ if (!in10_validity) {
+ in10_len = 0;
+ }
+ return concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ context, in1, in1_len, in2, in2_len, in3, in3_len, in4, in4_len, in5, in5_len, in6,
+ in6_len, in7, in7_len, in8, in8_len, in9, in9_len, in10, in10_len, out_len);
+}
+
+FORCE_INLINE
+const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2,
+ gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4,
+ gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, const char* in6,
+ gdv_int32 in6_len, const char* in7, gdv_int32 in7_len, const char* in8,
+ gdv_int32 in8_len, const char* in9, gdv_int32 in9_len, const char* in10,
+ gdv_int32 in10_len, gdv_int32* out_len) {
+ *out_len = in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len +
+ in8_len + in9_len + in10_len;
+ if (*out_len <= 0) {
+ *out_len = 0;
+ return "";
+ }
+ char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+ if (ret == nullptr) {
+ gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+ *out_len = 0;
+ return "";
+ }
+ memcpy(ret, in1, in1_len);
+ memcpy(ret + in1_len, in2, in2_len);
+ memcpy(ret + in1_len + in2_len, in3, in3_len);
+ memcpy(ret + in1_len + in2_len + in3_len, in4, in4_len);
+ memcpy(ret + in1_len + in2_len + in3_len + in4_len, in5, in5_len);
+ memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len, in6, in6_len);
+ memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len, in7, in7_len);
+ memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len, in8,
+ in8_len);
+ memcpy(
+ ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len + in8_len,
+ in9, in9_len);
+ memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len +
+ in8_len + in9_len,
+ in10, in10_len);
+ return ret;
+}
+
+// Returns the numeric value of the first character of str.
+GANDIVA_EXPORT
+gdv_int32 ascii_utf8(const char* data, gdv_int32 data_len) {
+ if (data_len == 0) {
+ return 0;
+ }
+ return static_cast<gdv_int32>(data[0]);
+}
+
+FORCE_INLINE
+const char* convert_fromUTF8_binary(gdv_int64 context, const char* bin_in, gdv_int32 len,
+ gdv_int32* out_len) {
+ *out_len = len;
+ char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+ if (ret == nullptr) {
+ gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+ *out_len = 0;
+ return "";
+ }
+ memcpy(ret, bin_in, *out_len);
+ return ret;
+}
+
+FORCE_INLINE
+const char* convert_replace_invalid_fromUTF8_binary(int64_t context, const char* text_in,
+ int32_t text_len,
+ const char* char_to_replace,
+ int32_t char_to_replace_len,
+ int32_t* out_len) {
+ if (char_to_replace_len > 1) {
+ gdv_fn_context_set_error_msg(context, "Replacement of multiple bytes not supported");
+ *out_len = 0;
+ return "";
+ }
+ // actually the convert_replace function replaces invalid chars with an ASCII
+ // character so the output length will be the same as the input length
+ *out_len = text_len;
+ char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+ if (ret == nullptr) {
+ gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+ *out_len = 0;
+ return "";
+ }
+ int32_t valid_bytes_to_cpy = 0;
+ int32_t out_byte_counter = 0;
+ int32_t in_byte_counter = 0;
+ int32_t char_len;
+ // scan the base text from left to right and increment the start pointer till
+ // looking for invalid chars to substitute
+ for (int text_index = 0; text_index < text_len; text_index += char_len) {
+ char_len = utf8_char_length(text_in[text_index]);
+ // only memory copy the bytes when detect invalid char
+ if (char_len == 0 || text_index + char_len > text_len ||
+ !validate_utf8_following_bytes(text_in, char_len, text_index)) {
+ // define char_len = 1 to increase text_index by 1 (as ASCII char fits in 1 byte)
+ char_len = 1;
+ // first copy the valid bytes until now and then replace the invalid character
+ memcpy(ret + out_byte_counter, text_in + in_byte_counter, valid_bytes_to_cpy);
+ // if the replacement char is empty, the invalid char should be ignored
+ if (char_to_replace_len == 0) {
+ out_byte_counter += valid_bytes_to_cpy;
+ } else {
+ ret[out_byte_counter + valid_bytes_to_cpy] = char_to_replace[0];
+ out_byte_counter += valid_bytes_to_cpy + char_len;
+ }
+ in_byte_counter += valid_bytes_to_cpy + char_len;
+ valid_bytes_to_cpy = 0;
+ continue;
+ }
+ valid_bytes_to_cpy += char_len;
+ }
+ // if invalid chars were not found, return the original string
+ if (out_byte_counter == 0 && in_byte_counter == 0) return text_in;
+ // if there are still valid bytes to copy, do it
+ if (valid_bytes_to_cpy != 0) {
+ memcpy(ret + out_byte_counter, text_in + in_byte_counter, valid_bytes_to_cpy);
+ }
+ // the out length will be the out bytes copied + the missing end bytes copied
+ *out_len = valid_bytes_to_cpy + out_byte_counter;
+ return ret;
+}
+
+// The function reverse a char array in-place
+static inline void reverse_char_buf(char* buf, int32_t len) {
+ char temp;
+
+ for (int32_t i = 0; i < len / 2; i++) {
+ int32_t pos_swp = len - (1 + i);
+ temp = buf[pos_swp];
+ buf[pos_swp] = buf[i];
+ buf[i] = temp;
+ }
+}
+
+// Converts a double variable to binary
+FORCE_INLINE
+const char* convert_toDOUBLE(int64_t context, double value, int32_t* out_len) {
+ *out_len = sizeof(value);
+ char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+
+ if (ret == nullptr) {
+ gdv_fn_context_set_error_msg(context,
+ "Could not allocate memory for the output string");
+
+ *out_len = 0;
+ return "";
+ }
+
+ memcpy(ret, &value, *out_len);
+
+ return ret;
+}
+
+FORCE_INLINE
+const char* convert_toDOUBLE_be(int64_t context, double value, int32_t* out_len) {
+ // The function behaves like convert_toDOUBLE, but always return the result
+ // in big endian format
+ char* ret = const_cast<char*>(convert_toDOUBLE(context, value, out_len));
+
+#if ARROW_LITTLE_ENDIAN
+ reverse_char_buf(ret, *out_len);
+#endif
+
+ return ret;
+}
+
+// Converts a float variable to binary
+FORCE_INLINE
+const char* convert_toFLOAT(int64_t context, float value, int32_t* out_len) {
+ *out_len = sizeof(value);
+ char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+
+ if (ret == nullptr) {
+ gdv_fn_context_set_error_msg(context,
+ "Could not allocate memory for the output string");
+
+ *out_len = 0;
+ return "";
+ }
+
+ memcpy(ret, &value, *out_len);
+
+ return ret;
+}
+
+FORCE_INLINE
+const char* convert_toFLOAT_be(int64_t context, float value, int32_t* out_len) {
+ // The function behaves like convert_toFLOAT, but always return the result
+ // in big endian format
+ char* ret = const_cast<char*>(convert_toFLOAT(context, value, out_len));
+
+#if ARROW_LITTLE_ENDIAN
+ reverse_char_buf(ret, *out_len);
+#endif
+
+ return ret;
+}
+
+// Converts a bigint(int with 64 bits) variable to binary
+FORCE_INLINE
+const char* convert_toBIGINT(int64_t context, int64_t value, int32_t* out_len) {
+ *out_len = sizeof(value);
+ char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+
+ if (ret == nullptr) {
+ gdv_fn_context_set_error_msg(context,
+ "Could not allocate memory for the output string");
+
+ *out_len = 0;
+ return "";
+ }
+
+ memcpy(ret, &value, *out_len);
+
+ return ret;
+}
+
+FORCE_INLINE
+const char* convert_toBIGINT_be(int64_t context, int64_t value, int32_t* out_len) {
+ // The function behaves like convert_toBIGINT, but always return the result
+ // in big endian format
+ char* ret = const_cast<char*>(convert_toBIGINT(context, value, out_len));
+
+#if ARROW_LITTLE_ENDIAN
+ reverse_char_buf(ret, *out_len);
+#endif
+
+ return ret;
+}
+
+// Converts an integer(with 32 bits) variable to binary
+FORCE_INLINE
+const char* convert_toINT(int64_t context, int32_t value, int32_t* out_len) {
+ *out_len = sizeof(value);
+ char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+
+ if (ret == nullptr) {
+ gdv_fn_context_set_error_msg(context,
+ "Could not allocate memory for the output string");
+
+ *out_len = 0;
+ return "";
+ }
+
+ memcpy(ret, &value, *out_len);
+
+ return ret;
+}
+
+FORCE_INLINE
+const char* convert_toINT_be(int64_t context, int32_t value, int32_t* out_len) {
+ // The function behaves like convert_toINT, but always return the result
+ // in big endian format
+ char* ret = const_cast<char*>(convert_toINT(context, value, out_len));
+
+#if ARROW_LITTLE_ENDIAN
+ reverse_char_buf(ret, *out_len);
+#endif
+
+ return ret;
+}
+
+// Converts a boolean variable to binary
+FORCE_INLINE
+const char* convert_toBOOLEAN(int64_t context, bool value, int32_t* out_len) {
+ *out_len = sizeof(value);
+ char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+
+ if (ret == nullptr) {
+ gdv_fn_context_set_error_msg(context,
+ "Could not allocate memory for the output string");
+
+ *out_len = 0;
+ return "";
+ }
+
+ memcpy(ret, &value, *out_len);
+
+ return ret;
+}
+
+// Converts a time variable to binary
+FORCE_INLINE
+const char* convert_toTIME_EPOCH(int64_t context, int32_t value, int32_t* out_len) {
+ return convert_toINT(context, value, out_len);
+}
+
+FORCE_INLINE
+const char* convert_toTIME_EPOCH_be(int64_t context, int32_t value, int32_t* out_len) {
+ // The function behaves as convert_toTIME_EPOCH, but
+ // returns the bytes in big endian format
+ return convert_toINT_be(context, value, out_len);
+}
+
+// Converts a timestamp variable to binary
+FORCE_INLINE
+const char* convert_toTIMESTAMP_EPOCH(int64_t context, int64_t timestamp,
+ int32_t* out_len) {
+ return convert_toBIGINT(context, timestamp, out_len);
+}
+
+FORCE_INLINE
+const char* convert_toTIMESTAMP_EPOCH_be(int64_t context, int64_t timestamp,
+ int32_t* out_len) {
+ // The function behaves as convert_toTIMESTAMP_EPOCH, but
+ // returns the bytes in big endian format
+ return convert_toBIGINT_be(context, timestamp, out_len);
+}
+
+// Converts a date variable to binary
+FORCE_INLINE
+const char* convert_toDATE_EPOCH(int64_t context, int64_t date, int32_t* out_len) {
+ return convert_toBIGINT(context, date, out_len);
+}
+
+FORCE_INLINE
+const char* convert_toDATE_EPOCH_be(int64_t context, int64_t date, int32_t* out_len) {
+ // The function behaves as convert_toDATE_EPOCH, but
+ // returns the bytes in big endian format
+ return convert_toBIGINT_be(context, date, out_len);
+}
+
+// Converts a string variable to binary
+FORCE_INLINE
+const char* convert_toUTF8(int64_t context, const char* value, int32_t value_len,
+ int32_t* out_len) {
+ *out_len = value_len;
+ return value;
+}
+
+// Search for a string within another string
+// Same as "locate(substr, str)", except for the reverse order of the arguments.
+FORCE_INLINE
+gdv_int32 strpos_utf8_utf8(gdv_int64 context, const char* str, gdv_int32 str_len,
+ const char* sub_str, gdv_int32 sub_str_len) {
+ return locate_utf8_utf8_int32(context, sub_str, sub_str_len, str, str_len, 1);
+}
+
+// Search for a string within another string
+FORCE_INLINE
+gdv_int32 locate_utf8_utf8(gdv_int64 context, const char* sub_str, gdv_int32 sub_str_len,
+ const char* str, gdv_int32 str_len) {
+ return locate_utf8_utf8_int32(context, sub_str, sub_str_len, str, str_len, 1);
+}
+
+// Search for a string within another string starting at position start-pos (1-indexed)
+FORCE_INLINE
+gdv_int32 locate_utf8_utf8_int32(gdv_int64 context, const char* sub_str,
+ gdv_int32 sub_str_len, const char* str,
+ gdv_int32 str_len, gdv_int32 start_pos) {
+ if (start_pos < 1) {
+ gdv_fn_context_set_error_msg(context, "Start position must be greater than 0");
+ return 0;
+ }
+
+ if (str_len == 0 || sub_str_len == 0) {
+ return 0;
+ }
+
+ gdv_int32 byte_pos = utf8_byte_pos(context, str, str_len, start_pos - 1);
+ if (byte_pos < 0 || byte_pos >= str_len) {
+ return 0;
+ }
+ for (gdv_int32 i = byte_pos; i <= str_len - sub_str_len; ++i) {
+ if (memcmp(str + i, sub_str, sub_str_len) == 0) {
+ return utf8_length(context, str, i) + 1;
+ }
+ }
+ return 0;
+}
+
+FORCE_INLINE
+const char* replace_with_max_len_utf8_utf8_utf8(gdv_int64 context, const char* text,
+ gdv_int32 text_len, const char* from_str,
+ gdv_int32 from_str_len,
+ const char* to_str, gdv_int32 to_str_len,
+ gdv_int32 max_length,
+ gdv_int32* out_len) {
+ // if from_str is empty or its length exceeds that of original string,
+ // return the original string
+ if (from_str_len <= 0 || from_str_len > text_len) {
+ *out_len = text_len;
+ return text;
+ }
+
+ bool found = false;
+ gdv_int32 text_index = 0;
+ char* out;
+ gdv_int32 out_index = 0;
+ gdv_int32 last_match_index =
+ 0; // defer copying string from last_match_index till next match is found
+
+ for (; text_index <= text_len - from_str_len;) {
+ if (memcmp(text + text_index, from_str, from_str_len) == 0) {
+ if (out_index + text_index - last_match_index + to_str_len > max_length) {
+ gdv_fn_context_set_error_msg(context, "Buffer overflow for output string");
+ *out_len = 0;
+ return "";
+ }
+ if (!found) {
+ // found match for first time
+ out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, max_length));
+ if (out == nullptr) {
+ gdv_fn_context_set_error_msg(context,
+ "Could not allocate memory for output string");
+ *out_len = 0;
+ return "";
+ }
+ found = true;
+ }
+ // first copy the part deferred till now
+ memcpy(out + out_index, text + last_match_index, (text_index - last_match_index));
+ out_index += text_index - last_match_index;
+ // then copy the target string
+ memcpy(out + out_index, to_str, to_str_len);
+ out_index += to_str_len;
+
+ text_index += from_str_len;
+ last_match_index = text_index;
+ } else {
+ text_index++;
+ }
+ }
+
+ if (!found) {
+ *out_len = text_len;
+ return text;
+ }
+
+ if (out_index + text_len - last_match_index > max_length) {
+ gdv_fn_context_set_error_msg(context, "Buffer overflow for output string");
+ *out_len = 0;
+ return "";
+ }
+ memcpy(out + out_index, text + last_match_index, text_len - last_match_index);
+ out_index += text_len - last_match_index;
+ *out_len = out_index;
+ return out;
+}
+
+FORCE_INLINE
+const char* replace_utf8_utf8_utf8(gdv_int64 context, const char* text,
+ gdv_int32 text_len, const char* from_str,
+ gdv_int32 from_str_len, const char* to_str,
+ gdv_int32 to_str_len, gdv_int32* out_len) {
+ return replace_with_max_len_utf8_utf8_utf8(context, text, text_len, from_str,
+ from_str_len, to_str, to_str_len, 65535,
+ out_len);
+}
+
+FORCE_INLINE
+const char* lpad_utf8_int32_utf8(gdv_int64 context, const char* text, gdv_int32 text_len,
+ gdv_int32 return_length, const char* fill_text,
+ gdv_int32 fill_text_len, gdv_int32* out_len) {
+ // if the text length or the defined return length (number of characters to return)
+ // is <=0, then return an empty string.
+ if (text_len == 0 || return_length <= 0) {
+ *out_len = 0;
+ return "";
+ }
+
+ // count the number of utf8 characters on text, ignoring invalid bytes
+ int text_char_count = utf8_length_ignore_invalid(text, text_len);
+
+ if (return_length == text_char_count ||
+ (return_length > text_char_count && fill_text_len == 0)) {
+ // case where the return length is same as the text's length, or if it need to
+ // fill into text but "fill_text" is empty, then return text directly.
+ *out_len = text_len;
+ return text;
+ } else if (return_length < text_char_count) {
+ // case where it truncates the result on return length.
+ *out_len = utf8_byte_pos(context, text, text_len, return_length);
+ return text;
+ } else {
+ // case (return_length > text_char_count)
+ // case where it needs to copy "fill_text" on the string left. The total number
+ // of chars to copy is given by (return_length - text_char_count)
+ char* ret =
+ reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(context, return_length));
+ if (ret == nullptr) {
+ gdv_fn_context_set_error_msg(context,
+ "Could not allocate memory for output string");
+ *out_len = 0;
+ return "";
+ }
+ // try to fulfill the return string with the "fill_text" continuously
+ int32_t copied_chars_count = 0;
+ int32_t copied_chars_position = 0;
+ while (copied_chars_count < return_length - text_char_count) {
+ int32_t char_len;
+ int32_t fill_index;
+ // for each char, evaluate its length to consider it when mem copying
+ for (fill_index = 0; fill_index < fill_text_len; fill_index += char_len) {
+ if (copied_chars_count >= return_length - text_char_count) {
+ break;
+ }
+ char_len = utf8_char_length(fill_text[fill_index]);
+ // ignore invalid char on the fill text, considering it as size 1
+ if (char_len == 0) char_len += 1;
+ copied_chars_count++;
+ }
+ memcpy(ret + copied_chars_position, fill_text, fill_index);
+ copied_chars_position += fill_index;
+ }
+ // after fulfilling the text, copy the main string
+ memcpy(ret + copied_chars_position, text, text_len);
+ *out_len = copied_chars_position + text_len;
+ return ret;
+ }
+}
+
+FORCE_INLINE
+const char* rpad_utf8_int32_utf8(gdv_int64 context, const char* text, gdv_int32 text_len,
+ gdv_int32 return_length, const char* fill_text,
+ gdv_int32 fill_text_len, gdv_int32* out_len) {
+ // if the text length or the defined return length (number of characters to return)
+ // is <=0, then return an empty string.
+ if (text_len == 0 || return_length <= 0) {
+ *out_len = 0;
+ return "";
+ }
+
+ // count the number of utf8 characters on text, ignoring invalid bytes
+ int text_char_count = utf8_length_ignore_invalid(text, text_len);
+
+ if (return_length == text_char_count ||
+ (return_length > text_char_count && fill_text_len == 0)) {
+ // case where the return length is same as the text's length, or if it need to
+ // fill into text but "fill_text" is empty, then return text directly.
+ *out_len = text_len;
+ return text;
+ } else if (return_length < text_char_count) {
+ // case where it truncates the result on return length.
+ *out_len = utf8_byte_pos(context, text, text_len, return_length);
+ return text;
+ } else {
+ // case (return_length > text_char_count)
+ // case where it needs to copy "fill_text" on the string right
+ char* ret =
+ reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(context, return_length));
+ if (ret == nullptr) {
+ gdv_fn_context_set_error_msg(context,
+ "Could not allocate memory for output string");
+ *out_len = 0;
+ return "";
+ }
+ // fulfill the initial text copying the main input string
+ memcpy(ret, text, text_len);
+ // try to fulfill the return string with the "fill_text" continuously
+ int32_t copied_chars_count = 0;
+ int32_t copied_chars_position = 0;
+ while (text_char_count + copied_chars_count < return_length) {
+ int32_t char_len;
+ int32_t fill_length;
+ // for each char, evaluate its length to consider it when mem copying
+ for (fill_length = 0; fill_length < fill_text_len; fill_length += char_len) {
+ if (text_char_count + copied_chars_count >= return_length) {
+ break;
+ }
+ char_len = utf8_char_length(fill_text[fill_length]);
+ // ignore invalid char on the fill text, considering it as size 1
+ if (char_len == 0) char_len += 1;
+ copied_chars_count++;
+ }
+ memcpy(ret + text_len + copied_chars_position, fill_text, fill_length);
+ copied_chars_position += fill_length;
+ }
+ *out_len = copied_chars_position + text_len;
+ return ret;
+ }
+}
+
+FORCE_INLINE
+const char* lpad_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
+ gdv_int32 return_length, gdv_int32* out_len) {
+ return lpad_utf8_int32_utf8(context, text, text_len, return_length, " ", 1, out_len);
+}
+
+FORCE_INLINE
+const char* rpad_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
+ gdv_int32 return_length, gdv_int32* out_len) {
+ return rpad_utf8_int32_utf8(context, text, text_len, return_length, " ", 1, out_len);
+}
+
+FORCE_INLINE
+const char* split_part(gdv_int64 context, const char* text, gdv_int32 text_len,
+ const char* delimiter, gdv_int32 delim_len, gdv_int32 index,
+ gdv_int32* out_len) {
+ *out_len = 0;
+ if (index < 1) {
+ char error_message[100];
+ snprintf(error_message, sizeof(error_message),
+ "Index in split_part must be positive, value provided was %d", index);
+ gdv_fn_context_set_error_msg(context, error_message);
+ return "";
+ }
+
+ if (delim_len == 0 || text_len == 0) {
+ // output will just be text if no delimiter is provided
+ *out_len = text_len;
+ return text;
+ }
+
+ int i = 0, match_no = 1;
+
+ while (i < text_len) {
+ // find the position where delimiter matched for the first time
+ int match_pos = match_string(text, text_len, i, delimiter, delim_len);
+ if (match_pos == -1 && match_no != index) {
+ // reached the end without finding a match.
+ return "";
+ } else {
+ // Found a match. If the match number is index then return this match
+ if (match_no == index) {
+ int end_pos = match_pos - delim_len;
+
+ if (match_pos == -1) {
+ // end position should be last position of the string as we have the last
+ // delimiter
+ end_pos = text_len;
+ }
+
+ *out_len = end_pos - i;
+ char* out_str =
+ reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+ if (out_str == nullptr) {
+ gdv_fn_context_set_error_msg(context,
+ "Could not allocate memory for output string");
+ *out_len = 0;
+ return "";
+ }
+ memcpy(out_str, text + i, *out_len);
+ return out_str;
+ } else {
+ i = match_pos;
+ match_no++;
+ }
+ }
+ }
+
+ return "";
+}
+
+// Returns the x leftmost characters of a given string. Cases:
+// LEFT("TestString", 10) => "TestString"
+// LEFT("TestString", 3) => "Tes"
+// LEFT("TestString", -3) => "TestStr"
+FORCE_INLINE
+const char* left_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
+ gdv_int32 number, gdv_int32* out_len) {
+ // returns the 'number' left most characters of a given text
+ if (text_len == 0 || number == 0) {
+ *out_len = 0;
+ return "";
+ }
+
+ // iterate over the utf8 string validating each character
+ int char_len;
+ int char_count = 0;
+ int byte_index = 0;
+ for (int i = 0; i < text_len; i += char_len) {
+ char_len = utf8_char_length(text[i]);
+ if (char_len == 0 || i + char_len > text_len) { // invalid byte or incomplete glyph
+ set_error_for_invalid_utf(context, text[i]);
+ *out_len = 0;
+ return "";
+ }
+ for (int j = 1; j < char_len; ++j) {
+ if ((text[i + j] & 0xC0) != 0x80) { // bytes following head-byte of glyph
+ set_error_for_invalid_utf(context, text[i + j]);
+ *out_len = 0;
+ return "";
+ }
+ }
+ byte_index += char_len;
+ ++char_count;
+ // Define the rules to stop the iteration over the string
+ // case where left('abc', 5) -> 'abc'
+ if (number > 0 && char_count == number) break;
+ // case where left('abc', -5) ==> ''
+ if (number < 0 && char_count == number + text_len) break;
+ }
+
+ *out_len = byte_index;
+ return text;
+}
+
+// Returns the x rightmost characters of a given string. Cases:
+// RIGHT("TestString", 10) => "TestString"
+// RIGHT("TestString", 3) => "ing"
+// RIGHT("TestString", -3) => "tString"
+FORCE_INLINE
+const char* right_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
+ gdv_int32 number, gdv_int32* out_len) {
+ // returns the 'number' left most characters of a given text
+ if (text_len == 0 || number == 0) {
+ *out_len = 0;
+ return "";
+ }
+
+ // initially counts the number of utf8 characters in the defined text
+ int32_t char_count = utf8_length(context, text, text_len);
+ // char_count is zero if input has invalid utf8 char
+ if (char_count == 0) {
+ *out_len = 0;
+ return "";
+ }
+
+ int32_t start_char_pos; // the char result start position (inclusive)
+ int32_t end_char_len; // the char result end position (inclusive)
+ if (number > 0) {
+ // case where right('abc', 5) ==> 'abc' start_char_pos=1.
+ start_char_pos = (char_count > number) ? char_count - number : 0;
+ end_char_len = char_count - start_char_pos;
+ } else {
+ start_char_pos = number * -1;
+ end_char_len = char_count - start_char_pos;
+ }
+
+ // calculate the start byte position and the output length
+ int32_t start_byte_pos = utf8_byte_pos(context, text, text_len, start_char_pos);
+ *out_len = utf8_byte_pos(context, text, text_len, end_char_len);
+
+ // try to allocate memory for the response
+ char* ret =
+ reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(context, *out_len));
+ if (ret == nullptr) {
+ gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+ *out_len = 0;
+ return "";
+ }
+ memcpy(ret, text + start_byte_pos, *out_len);
+ return ret;
+}
+
+FORCE_INLINE
+const char* binary_string(gdv_int64 context, const char* text, gdv_int32 text_len,
+ gdv_int32* out_len) {
+ gdv_binary ret =
+ reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(context, text_len));
+
+ if (ret == nullptr) {
+ gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+ *out_len = 0;
+ return "";
+ }
+
+ if (text_len == 0) {
+ *out_len = 0;
+ return "";
+ }
+
+ // converting hex encoded string to normal string
+ int j = 0;
+ for (int i = 0; i < text_len; i++, j++) {
+ if (text[i] == '\\' && i + 3 < text_len &&
+ (text[i + 1] == 'x' || text[i + 1] == 'X')) {
+ char hd1 = text[i + 2];
+ char hd2 = text[i + 3];
+ if (isxdigit(hd1) && isxdigit(hd2)) {
+ // [a-fA-F0-9]
+ ret[j] = to_binary_from_hex(hd1) * 16 + to_binary_from_hex(hd2);
+ i += 3;
+ } else {
+ ret[j] = text[i];
+ }
+ } else {
+ ret[j] = text[i];
+ }
+ }
+ *out_len = j;
+ return ret;
+}
+
+#define CAST_INT_BIGINT_VARBINARY(OUT_TYPE, TYPE_NAME) \
+ FORCE_INLINE \
+ OUT_TYPE \
+ cast##TYPE_NAME##_varbinary(gdv_int64 context, const char* in, int32_t in_len) { \
+ if (in_len == 0) { \
+ gdv_fn_context_set_error_msg(context, "Can't cast an empty string."); \
+ return -1; \
+ } \
+ char sign = in[0]; \
+ \
+ bool negative = false; \
+ if (sign == '-') { \
+ negative = true; \
+ /* Ignores the sign char in the hexadecimal string */ \
+ in++; \
+ in_len--; \
+ } \
+ \
+ if (negative && in_len == 0) { \
+ gdv_fn_context_set_error_msg(context, \
+ "Can't cast hexadecimal with only a minus sign."); \
+ return -1; \
+ } \
+ \
+ OUT_TYPE result = 0; \
+ int digit; \
+ \
+ int read_index = 0; \
+ while (read_index < in_len) { \
+ char c1 = in[read_index]; \
+ if (isxdigit(c1)) { \
+ digit = to_binary_from_hex(c1); \
+ \
+ OUT_TYPE next = result * 16 - digit; \
+ \
+ if (next > result) { \
+ gdv_fn_context_set_error_msg(context, "Integer overflow."); \
+ return -1; \
+ } \
+ result = next; \
+ read_index++; \
+ } else { \
+ gdv_fn_context_set_error_msg(context, \
+ "The hexadecimal given has invalid characters."); \
+ return -1; \
+ } \
+ } \
+ if (!negative) { \
+ result *= -1; \
+ \
+ if (result < 0) { \
+ gdv_fn_context_set_error_msg(context, "Integer overflow."); \
+ return -1; \
+ } \
+ } \
+ return result; \
+ }
+
+CAST_INT_BIGINT_VARBINARY(int32_t, INT)
+CAST_INT_BIGINT_VARBINARY(int64_t, BIGINT)
+
+#undef CAST_INT_BIGINT_VARBINARY
+
+// Produces the binary representation of a string y characters long derived by starting
+// at offset 'x' and considering the defined length 'y'. Notice that the offset index
+// may be a negative number (starting from the end of the string), or a positive number
+// starting on index 1. Cases:
+// BYTE_SUBSTR("TestString", 1, 10) => "TestString"
+// BYTE_SUBSTR("TestString", 5, 10) => "String"
+// BYTE_SUBSTR("TestString", -6, 10) => "String"
+// BYTE_SUBSTR("TestString", -600, 10) => "TestString"
+FORCE_INLINE
+const char* byte_substr_binary_int32_int32(gdv_int64 context, const char* text,
+ gdv_int32 text_len, gdv_int32 offset,
+ gdv_int32 length, gdv_int32* out_len) {
+ // the first offset position for a string is 1, so not consider offset == 0
+ // also, the length should be always a positive number
+ if (text_len == 0 || offset == 0 || length <= 0) {
+ *out_len = 0;
+ return "";
+ }
+
+ char* ret =
+ reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(context, text_len));
+
+ if (ret == nullptr) {
+ gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+ *out_len = 0;
+ return "";
+ }
+
+ int32_t startPos = 0;
+ if (offset >= 0) {
+ startPos = offset - 1;
+ } else if (text_len + offset >= 0) {
+ startPos = text_len + offset;
+ }
+
+ // calculate end position from length and truncate to upper value bounds
+ if (startPos + length > text_len) {
+ *out_len = text_len - startPos;
+ } else {
+ *out_len = length;
+ }
+
+ memcpy(ret, text + startPos, *out_len);
+ return ret;
+}
+} // extern "C"
diff --git a/src/arrow/cpp/src/gandiva/precompiled/string_ops_test.cc b/src/arrow/cpp/src/gandiva/precompiled/string_ops_test.cc
new file mode 100644
index 000000000..6221dffb3
--- /dev/null
+++ b/src/arrow/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -0,0 +1,1758 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include <limits>
+
+#include "gandiva/execution_context.h"
+#include "gandiva/precompiled/types.h"
+
+namespace gandiva {
+
+TEST(TestStringOps, TestCompare) {
+ const char* left = "abcd789";
+ const char* right = "abcd123";
+
+ // 0 for equal
+ EXPECT_EQ(mem_compare(left, 4, right, 4), 0);
+
+ // compare lengths if the prefixes match
+ EXPECT_GT(mem_compare(left, 5, right, 4), 0);
+ EXPECT_LT(mem_compare(left, 4, right, 5), 0);
+
+ // compare bytes if the prefixes don't match
+ EXPECT_GT(mem_compare(left, 5, right, 5), 0);
+ EXPECT_GT(mem_compare(left, 5, right, 7), 0);
+ EXPECT_GT(mem_compare(left, 7, right, 5), 0);
+}
+
+TEST(TestStringOps, TestAscii) {
+ // ASCII
+ EXPECT_EQ(ascii_utf8("ABC", 3), 65);
+ EXPECT_EQ(ascii_utf8("abc", 3), 97);
+ EXPECT_EQ(ascii_utf8("Hello World!", 12), 72);
+ EXPECT_EQ(ascii_utf8("This is us", 10), 84);
+ EXPECT_EQ(ascii_utf8("", 0), 0);
+ EXPECT_EQ(ascii_utf8("123", 3), 49);
+ EXPECT_EQ(ascii_utf8("999", 3), 57);
+}
+
+TEST(TestStringOps, TestBeginsEnds) {
+ // starts_with
+ EXPECT_TRUE(starts_with_utf8_utf8("hello sir", 9, "hello", 5));
+ EXPECT_TRUE(starts_with_utf8_utf8("hellos", 6, "hello", 5));
+ EXPECT_TRUE(starts_with_utf8_utf8("hello", 5, "hello", 5));
+ EXPECT_FALSE(starts_with_utf8_utf8("hell", 4, "hello", 5));
+ EXPECT_FALSE(starts_with_utf8_utf8("world hello", 11, "hello", 5));
+
+ // ends_with
+ EXPECT_TRUE(ends_with_utf8_utf8("hello sir", 9, "sir", 3));
+ EXPECT_TRUE(ends_with_utf8_utf8("ssir", 4, "sir", 3));
+ EXPECT_TRUE(ends_with_utf8_utf8("sir", 3, "sir", 3));
+ EXPECT_FALSE(ends_with_utf8_utf8("ir", 2, "sir", 3));
+ EXPECT_FALSE(ends_with_utf8_utf8("hello", 5, "sir", 3));
+}
+
+TEST(TestStringOps, TestSpace) {
+ // Space - returns a string with 'n' spaces
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+ int32_t out_len = 0;
+
+ auto out = space_int32(ctx_ptr, 1, &out_len);
+ EXPECT_EQ(std::string(out, out_len), " ");
+ out = space_int32(ctx_ptr, 10, &out_len);
+ EXPECT_EQ(std::string(out, out_len), " ");
+ out = space_int32(ctx_ptr, 5, &out_len);
+ EXPECT_EQ(std::string(out, out_len), " ");
+ out = space_int32(ctx_ptr, -5, &out_len);
+ EXPECT_EQ(std::string(out, out_len), "");
+
+ out = space_int64(ctx_ptr, 2, &out_len);
+ EXPECT_EQ(std::string(out, out_len), " ");
+ out = space_int64(ctx_ptr, 9, &out_len);
+ EXPECT_EQ(std::string(out, out_len), " ");
+ out = space_int64(ctx_ptr, 4, &out_len);
+ EXPECT_EQ(std::string(out, out_len), " ");
+ out = space_int64(ctx_ptr, -5, &out_len);
+ EXPECT_EQ(std::string(out, out_len), "");
+}
+
+TEST(TestStringOps, TestIsSubstr) {
+ EXPECT_TRUE(is_substr_utf8_utf8("hello world", 11, "world", 5));
+ EXPECT_TRUE(is_substr_utf8_utf8("hello world", 11, "lo wo", 5));
+ EXPECT_FALSE(is_substr_utf8_utf8("hello world", 11, "adsed", 5));
+ EXPECT_FALSE(is_substr_utf8_utf8("hel", 3, "hello", 5));
+ EXPECT_TRUE(is_substr_utf8_utf8("hello", 5, "hello", 5));
+ EXPECT_TRUE(is_substr_utf8_utf8("hello world", 11, "", 0));
+}
+
+TEST(TestStringOps, TestCharLength) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+
+ EXPECT_EQ(utf8_length(ctx_ptr, "hello sir", 9), 9);
+
+ std::string a("âpple");
+ EXPECT_EQ(utf8_length(ctx_ptr, a.data(), static_cast<int>(a.length())), 5);
+
+ std::string b("मदन");
+ EXPECT_EQ(utf8_length(ctx_ptr, b.data(), static_cast<int>(b.length())), 3);
+
+ // invalid utf8
+ std::string c("\xf8\x28");
+ EXPECT_EQ(utf8_length(ctx_ptr, c.data(), static_cast<int>(c.length())), 0);
+ EXPECT_TRUE(ctx.get_error().find(
+ "unexpected byte \\f8 encountered while decoding utf8 string") !=
+ std::string::npos)
+ << ctx.get_error();
+ ctx.Reset();
+
+ std::string d("aa\xc3");
+ EXPECT_EQ(utf8_length(ctx_ptr, d.data(), static_cast<int>(d.length())), 0);
+ EXPECT_TRUE(ctx.get_error().find(
+ "unexpected byte \\c3 encountered while decoding utf8 string") !=
+ std::string::npos)
+ << ctx.get_error();
+ ctx.Reset();
+
+ std::string e(
+ "a\xc3"
+ "a");
+ EXPECT_EQ(utf8_length(ctx_ptr, e.data(), static_cast<int>(e.length())), 0);
+ EXPECT_TRUE(ctx.get_error().find(
+ "unexpected byte \\61 encountered while decoding utf8 string") !=
+ std::string::npos)
+ << ctx.get_error();
+ ctx.Reset();
+
+ std::string f(
+ "a\xc3\xe3"
+ "a");
+ EXPECT_EQ(utf8_length(ctx_ptr, f.data(), static_cast<int>(f.length())), 0);
+ EXPECT_TRUE(ctx.get_error().find(
+ "unexpected byte \\e3 encountered while decoding utf8 string") !=
+ std::string::npos)
+ << ctx.get_error();
+ ctx.Reset();
+}
+
+TEST(TestStringOps, TestConvertReplaceInvalidUtf8Char) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+
+ // invalid utf8 (xf8 is invalid but x28 is not - x28 = '(')
+ std::string a(
+ "ok-\xf8\x28"
+ "-a");
+ auto a_in_out_len = static_cast<int>(a.length());
+ const char* a_str = convert_replace_invalid_fromUTF8_binary(
+ ctx_ptr, a.data(), a_in_out_len, "a", 1, &a_in_out_len);
+ EXPECT_EQ(std::string(a_str, a_in_out_len), "ok-a(-a");
+ EXPECT_FALSE(ctx.has_error());
+
+ // invalid utf8 (xa0 and xa1 are invalid)
+ std::string b("ok-\xa0\xa1-valid");
+ auto b_in_out_len = static_cast<int>(b.length());
+ const char* b_str = convert_replace_invalid_fromUTF8_binary(
+ ctx_ptr, b.data(), b_in_out_len, "b", 1, &b_in_out_len);
+ EXPECT_EQ(std::string(b_str, b_in_out_len), "ok-bb-valid");
+ EXPECT_FALSE(ctx.has_error());
+
+ // full valid utf8
+ std::string c("all-valid");
+ auto c_in_out_len = static_cast<int>(c.length());
+ const char* c_str = convert_replace_invalid_fromUTF8_binary(
+ ctx_ptr, c.data(), c_in_out_len, "c", 1, &c_in_out_len);
+ EXPECT_EQ(std::string(c_str, c_in_out_len), "all-valid");
+ EXPECT_FALSE(ctx.has_error());
+
+ // valid utf8 (महसुस is 4-char string, each char of which is likely a multibyte char)
+ std::string d("ok-महसुस-valid-new");
+ auto d_in_out_len = static_cast<int>(d.length());
+ const char* d_str = convert_replace_invalid_fromUTF8_binary(
+ ctx_ptr, d.data(), d_in_out_len, "d", 1, &d_in_out_len);
+ EXPECT_EQ(std::string(d_str, d_in_out_len), "ok-महसुस-valid-new");
+ EXPECT_FALSE(ctx.has_error());
+
+ // full valid utf8, but invalid replacement char length
+ std::string e("all-valid");
+ auto e_in_out_len = static_cast<int>(e.length());
+ const char* e_str = convert_replace_invalid_fromUTF8_binary(
+ ctx_ptr, e.data(), e_in_out_len, "ee", 2, &e_in_out_len);
+ EXPECT_EQ(std::string(e_str, e_in_out_len), "");
+ EXPECT_TRUE(ctx.has_error());
+ ctx.Reset();
+
+ // invalid utf8 (xa0 and xa1 are invalid) with empty replacement char length
+ std::string f("ok-\xa0\xa1-valid");
+ auto f_in_out_len = static_cast<int>(f.length());
+ const char* f_str = convert_replace_invalid_fromUTF8_binary(
+ ctx_ptr, f.data(), f_in_out_len, "", 0, &f_in_out_len);
+ EXPECT_EQ(std::string(f_str, f_in_out_len), "ok--valid");
+ EXPECT_FALSE(ctx.has_error());
+ ctx.Reset();
+
+ // invalid utf8 (xa0 and xa1 are invalid) with empty replacement char length
+ std::string g("\xa0\xa1-ok-\xa0\xa1-valid-\xa0\xa1");
+ auto g_in_out_len = static_cast<int>(g.length());
+ const char* g_str = convert_replace_invalid_fromUTF8_binary(
+ ctx_ptr, g.data(), g_in_out_len, "", 0, &g_in_out_len);
+ EXPECT_EQ(std::string(g_str, g_in_out_len), "-ok--valid-");
+ EXPECT_FALSE(ctx.has_error());
+ ctx.Reset();
+
+ std::string h("\xa0\xa1-valid");
+ auto h_in_out_len = static_cast<int>(h.length());
+ const char* h_str = convert_replace_invalid_fromUTF8_binary(
+ ctx_ptr, h.data(), h_in_out_len, "", 0, &h_in_out_len);
+ EXPECT_EQ(std::string(h_str, h_in_out_len), "-valid");
+ EXPECT_FALSE(ctx.has_error());
+ ctx.Reset();
+
+ std::string i("\xa0\xa1-valid-\xa0\xa1-valid-\xa0\xa1");
+ auto i_in_out_len = static_cast<int>(i.length());
+ const char* i_str = convert_replace_invalid_fromUTF8_binary(
+ ctx_ptr, i.data(), i_in_out_len, "", 0, &i_in_out_len);
+ EXPECT_EQ(std::string(i_str, i_in_out_len), "-valid--valid-");
+ EXPECT_FALSE(ctx.has_error());
+ ctx.Reset();
+}
+
+TEST(TestStringOps, TestRepeat) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+ gdv_int32 out_len = 0;
+
+ const char* out_str = repeat_utf8_int32(ctx_ptr, "abc", 3, 2, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "abcabc");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = repeat_utf8_int32(ctx_ptr, "a", 1, 5, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "aaaaa");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = repeat_utf8_int32(ctx_ptr, "", 0, 10, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = repeat_utf8_int32(ctx_ptr, "", -20, 10, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = repeat_utf8_int32(ctx_ptr, "a", 1, -10, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Repeat number can't be negative"));
+ ctx.Reset();
+}
+
+TEST(TestStringOps, TestCastBoolToVarchar) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+ gdv_int32 out_len = 0;
+
+ const char* out_str = castVARCHAR_bool_int64(ctx_ptr, true, 2, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "tr");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_bool_int64(ctx_ptr, true, 7, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "true");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_bool_int64(ctx_ptr, false, 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "fals");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_bool_int64(ctx_ptr, false, 5, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "false");
+ EXPECT_FALSE(ctx.has_error());
+
+ castVARCHAR_bool_int64(ctx_ptr, true, -3, &out_len);
+ EXPECT_THAT(ctx.get_error(),
+ ::testing::HasSubstr("Output buffer length can't be negative"));
+ ctx.Reset();
+}
+
+TEST(TestStringOps, TestCastVarcharToBool) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+
+ EXPECT_EQ(castBIT_utf8(ctx_ptr, "true", 4), true);
+ EXPECT_FALSE(ctx.has_error());
+
+ EXPECT_EQ(castBIT_utf8(ctx_ptr, " true ", 14), true);
+ EXPECT_FALSE(ctx.has_error());
+
+ EXPECT_EQ(castBIT_utf8(ctx_ptr, "true ", 9), true);
+ EXPECT_FALSE(ctx.has_error());
+
+ EXPECT_EQ(castBIT_utf8(ctx_ptr, " true", 9), true);
+ EXPECT_FALSE(ctx.has_error());
+
+ EXPECT_EQ(castBIT_utf8(ctx_ptr, "TRUE", 4), true);
+ EXPECT_FALSE(ctx.has_error());
+
+ EXPECT_EQ(castBIT_utf8(ctx_ptr, "TrUe", 4), true);
+ EXPECT_FALSE(ctx.has_error());
+
+ EXPECT_EQ(castBIT_utf8(ctx_ptr, "1", 1), true);
+ EXPECT_FALSE(ctx.has_error());
+
+ EXPECT_EQ(castBIT_utf8(ctx_ptr, " 1", 3), true);
+ EXPECT_FALSE(ctx.has_error());
+
+ EXPECT_EQ(castBIT_utf8(ctx_ptr, "false", 5), false);
+ EXPECT_FALSE(ctx.has_error());
+
+ EXPECT_EQ(castBIT_utf8(ctx_ptr, "false ", 10), false);
+ EXPECT_FALSE(ctx.has_error());
+
+ EXPECT_EQ(castBIT_utf8(ctx_ptr, " false", 10), false);
+ EXPECT_FALSE(ctx.has_error());
+
+ EXPECT_EQ(castBIT_utf8(ctx_ptr, "0", 1), false);
+ EXPECT_FALSE(ctx.has_error());
+
+ EXPECT_EQ(castBIT_utf8(ctx_ptr, "0 ", 4), false);
+ EXPECT_FALSE(ctx.has_error());
+
+ EXPECT_EQ(castBIT_utf8(ctx_ptr, "FALSE", 5), false);
+ EXPECT_FALSE(ctx.has_error());
+
+ EXPECT_EQ(castBIT_utf8(ctx_ptr, "FaLsE", 5), false);
+ EXPECT_FALSE(ctx.has_error());
+
+ EXPECT_EQ(castBIT_utf8(ctx_ptr, "test", 4), false);
+ EXPECT_TRUE(ctx.has_error());
+ EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Invalid value for boolean"));
+ ctx.Reset();
+}
+
+TEST(TestStringOps, TestCastVarchar) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+ gdv_int32 out_len = 0;
+
+ // BINARY TESTS
+ const char* out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 1, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "a");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 6, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "asdf");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 3, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "asd");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "asdf");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 5, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "asdf");
+ EXPECT_FALSE(ctx.has_error());
+
+ // do not truncate if output length is 0
+ out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 0, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "asdf");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_binary_int64(ctx_ptr, "", 0, 3, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†", 9, 3, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "çåå");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†", 9, 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "çåå†");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†", 9, 5, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "çåå†");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†", 9, 10, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "çåå†");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†", 9, 6, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "çåå†");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_binary_int64(ctx_ptr, "abc", 3, -1, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_THAT(ctx.get_error(),
+ ::testing::HasSubstr("Output buffer length can't be negative"));
+ ctx.Reset();
+
+ std::string z("aa\xc3");
+ out_str = castVARCHAR_binary_int64(ctx_ptr, z.data(), static_cast<int>(z.length()), 2,
+ &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "aa");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 16, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "1234567812341234");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 15, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "123456781234123");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 12, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "123456781234");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 8, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "12345678");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 7, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "1234567");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "1234");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 3, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "123");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812çåå†123456", 25, 16, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "1234567812çåå†12");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_binary_int64(ctx_ptr, "123456781234çåå†1234", 25, 15, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "123456781234çåå");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_binary_int64(ctx_ptr, "12çåå†34567812123456", 25, 16, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "12çåå†3456781212");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†1234567812123456", 25, 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "çåå†");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†1234567812123456", 25, 3, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "çåå");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_binary_int64(ctx_ptr, "123456781234çåå†", 21, 40, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "123456781234çåå†");
+ EXPECT_FALSE(ctx.has_error());
+
+ std::string f("123456781234çåå\xc3");
+ out_str = castVARCHAR_binary_int64(ctx_ptr, f.data(), static_cast<int32_t>(f.length()),
+ 16, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_THAT(ctx.get_error(),
+ ::testing::HasSubstr(
+ "unexpected byte \\c3 encountered while decoding utf8 string"));
+ ctx.Reset();
+
+ // UTF8 TESTS
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, "asdf", 4, 1, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "a");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, "asdf", 4, 6, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "asdf");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, "asdf", 4, 3, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "asd");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, "asdf", 4, 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "asdf");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, "asdf", 4, 5, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "asdf");
+ EXPECT_FALSE(ctx.has_error());
+
+ // do not truncate if output length is 0
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, "asdf", 4, 0, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "asdf");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, "", 0, 3, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, "çåå†", 9, 3, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "çåå");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, "çåå†", 9, 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "çåå†");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, "çåå†", 9, 5, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "çåå†");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, "çåå†", 9, 10, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "çåå†");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, "çåå†", 9, 6, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "çåå†");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, "abc", 3, -1, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_THAT(ctx.get_error(),
+ ::testing::HasSubstr("Output buffer length can't be negative"));
+ ctx.Reset();
+
+ std::string d("aa\xc3");
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, d.data(), static_cast<int>(d.length()), 2,
+ &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "aa");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, "1234567812341234", 16, 16, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "1234567812341234");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, "1234567812341234", 16, 15, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "123456781234123");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, "1234567812341234", 16, 12, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "123456781234");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, "1234567812341234", 16, 8, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "12345678");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, "1234567812341234", 16, 7, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "1234567");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, "1234567812341234", 16, 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "1234");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, "1234567812341234", 16, 3, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "123");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, "1234567812çåå†123456", 25, 16, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "1234567812çåå†12");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, "123456781234çåå†1234", 25, 15, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "123456781234çåå");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, "12çåå†34567812123456", 25, 16, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "12çåå†3456781212");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, "çåå†1234567812123456", 25, 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "çåå†");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, "çåå†1234567812123456", 25, 3, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "çåå");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, "123456781234çåå†", 21, 40, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "123456781234çåå†");
+ EXPECT_FALSE(ctx.has_error());
+
+ std::string y("123456781234çåå\xc3");
+ out_str = castVARCHAR_utf8_int64(ctx_ptr, y.data(), static_cast<int32_t>(y.length()),
+ 16, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_THAT(ctx.get_error(),
+ ::testing::HasSubstr(
+ "unexpected byte \\c3 encountered while decoding utf8 string"));
+ ctx.Reset();
+}
+
+TEST(TestStringOps, TestSubstring) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+ gdv_int32 out_len = 0;
+
+ const char* out_str = substr_utf8_int64_int64(ctx_ptr, "asdf", 4, 1, 0, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = substr_utf8_int64_int64(ctx_ptr, "asdf", 4, 1, 2, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "as");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = substr_utf8_int64_int64(ctx_ptr, "asdf", 4, 1, 5, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "asdf");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = substr_utf8_int64_int64(ctx_ptr, "asdf", 4, 0, 5, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "asdf");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = substr_utf8_int64_int64(ctx_ptr, "asdf", 4, -2, 5, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "df");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = substr_utf8_int64_int64(ctx_ptr, "asdf", 4, -5, 5, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = substr_utf8_int64_int64(ctx_ptr, "अपाचे एरो", 25, 1, 5, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "अपाचे");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = substr_utf8_int64_int64(ctx_ptr, "अपाचे एरो", 25, 7, 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "एरो");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = substr_utf8_int64_int64(ctx_ptr, "çåå†", 9, 4, 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "†");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = substr_utf8_int64_int64(ctx_ptr, "çåå†", 9, 2, 2, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "åå");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = substr_utf8_int64_int64(ctx_ptr, "çåå†", 9, 0, 2, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "çå");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = substr_utf8_int64_int64(ctx_ptr, "afg", 4, 0, -5, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = substr_utf8_int64_int64(ctx_ptr, "", 0, 5, 5, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = substr_utf8_int64(ctx_ptr, "abcd", 4, 2, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "bcd");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = substr_utf8_int64(ctx_ptr, "abcd", 4, 0, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "abcd");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = substr_utf8_int64(ctx_ptr, "çåå†", 9, 2, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "åå†");
+ EXPECT_FALSE(ctx.has_error());
+}
+
+TEST(TestStringOps, TestSubstringInvalidInputs) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+ gdv_int32 out_len = 0;
+
+ char bytes[] = {'\xA7', 'a'};
+ const char* out_str = substr_utf8_int64_int64(ctx_ptr, bytes, 2, 1, 1, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_TRUE(ctx.has_error());
+ ctx.Reset();
+
+ char midbytes[] = {'c', '\xA7', 'a'};
+ out_str = substr_utf8_int64_int64(ctx_ptr, midbytes, 3, 1, 1, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_TRUE(ctx.has_error());
+ ctx.Reset();
+
+ char midbytes2[] = {'\xC3', 'a', 'a'};
+ out_str = substr_utf8_int64_int64(ctx_ptr, midbytes2, 3, 1, 2, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_TRUE(ctx.has_error());
+ ctx.Reset();
+
+ char endbytes[] = {'a', 'a', '\xA7'};
+ out_str = substr_utf8_int64_int64(ctx_ptr, endbytes, 3, 1, 1, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_TRUE(ctx.has_error());
+ ctx.Reset();
+
+ char endbytes2[] = {'a', 'a', '\xC3'};
+ out_str = substr_utf8_int64_int64(ctx_ptr, endbytes2, 3, 1, 3, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_TRUE(ctx.has_error());
+ ctx.Reset();
+
+ out_str = substr_utf8_int64_int64(ctx_ptr, "çåå†", 9, 2147483656, 2, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+}
+
+TEST(TestGdvFnStubs, TestCastVarbinaryUtf8) {
+ gandiva::ExecutionContext ctx;
+
+ int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+ int32_t out_len = 0;
+ const char* input = "abc";
+ const char* out;
+
+ out = castVARBINARY_utf8_int64(ctx_ptr, input, 3, 0, &out_len);
+ EXPECT_EQ(std::string(out, out_len), input);
+
+ out = castVARBINARY_utf8_int64(ctx_ptr, input, 3, 1, &out_len);
+ EXPECT_EQ(std::string(out, out_len), "a");
+
+ out = castVARBINARY_utf8_int64(ctx_ptr, input, 3, 500, &out_len);
+ EXPECT_EQ(std::string(out, out_len), input);
+
+ out = castVARBINARY_utf8_int64(ctx_ptr, input, 3, -10, &out_len);
+ EXPECT_EQ(std::string(out, out_len), "");
+ EXPECT_THAT(ctx.get_error(),
+ ::testing::HasSubstr("Output buffer length can't be negative"));
+ ctx.Reset();
+}
+
+TEST(TestGdvFnStubs, TestCastVarbinaryBinary) {
+ gandiva::ExecutionContext ctx;
+
+ int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+ int32_t out_len = 0;
+ const char* input = "\\x41\\x42\\x43";
+ const char* out;
+
+ out = castVARBINARY_binary_int64(ctx_ptr, input, 12, 0, &out_len);
+ EXPECT_EQ(std::string(out, out_len), input);
+
+ out = castVARBINARY_binary_int64(ctx_ptr, input, 8, 8, &out_len);
+ EXPECT_EQ(std::string(out, out_len), "\\x41\\x42");
+
+ out = castVARBINARY_binary_int64(ctx_ptr, input, 12, 500, &out_len);
+ EXPECT_EQ(std::string(out, out_len), input);
+
+ out = castVARBINARY_binary_int64(ctx_ptr, input, 12, -10, &out_len);
+ EXPECT_EQ(std::string(out, out_len), "");
+ EXPECT_THAT(ctx.get_error(),
+ ::testing::HasSubstr("Output buffer length can't be negative"));
+ ctx.Reset();
+}
+
+TEST(TestStringOps, TestConcat) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+ gdv_int32 out_len = 0;
+
+ const char* out_str =
+ concat_utf8_utf8(ctx_ptr, "abcd", 4, true, "\npq", 3, false, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "abcd");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = concatOperator_utf8_utf8(ctx_ptr, "asdf", 4, "jkl", 3, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "asdfjkl");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = concatOperator_utf8_utf8(ctx_ptr, "asdf", 4, "", 0, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "asdf");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = concatOperator_utf8_utf8(ctx_ptr, "", 0, "jkl", 3, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "jkl");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = concatOperator_utf8_utf8(ctx_ptr, "", 0, "", 0, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = concatOperator_utf8_utf8(ctx_ptr, "abcd\n", 5, "a", 1, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "abcd\na");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = concat_utf8_utf8_utf8(ctx_ptr, "abcd", 4, false, "\npq", 3, true, "ard", 3,
+ true, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "\npqard");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str =
+ concatOperator_utf8_utf8_utf8(ctx_ptr, "abcd\n", 5, "a", 1, "bcd", 3, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "abcd\nabcd");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = concatOperator_utf8_utf8_utf8(ctx_ptr, "abcd", 4, "a", 1, "", 0, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "abcda");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = concatOperator_utf8_utf8_utf8(ctx_ptr, "", 0, "a", 1, "pqrs", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "apqrs");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = concat_utf8_utf8_utf8_utf8(ctx_ptr, "abcd", 4, false, "\npq", 3, true, "ard",
+ 3, true, "uvw", 3, false, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "\npqard");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = concatOperator_utf8_utf8_utf8_utf8(ctx_ptr, "pqrs", 4, "", 0, "\nabc", 4, "y",
+ 1, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "pqrs\nabcy");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = concat_utf8_utf8_utf8_utf8_utf8(ctx_ptr, "abcd", 4, false, "\npq", 3, true,
+ "ard", 3, true, "uvw", 3, false, "abc\n", 4,
+ true, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "\npqardabc\n");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = concatOperator_utf8_utf8_utf8_utf8_utf8(ctx_ptr, "pqrs", 4, "", 0, "\nabc", 4,
+ "y", 1, "", 0, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "pqrs\nabcy");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = concat_utf8_utf8_utf8_utf8_utf8_utf8(
+ ctx_ptr, "abcd", 4, false, "\npq", 3, true, "ard", 3, true, "uvw", 3, false,
+ "abc\n", 4, true, "sdfgs", 5, true, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "\npqardabc\nsdfgs");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = concatOperator_utf8_utf8_utf8_utf8_utf8_utf8(
+ ctx_ptr, "pqrs", 4, "", 0, "\nabc", 4, "y", 1, "", 0, "\nbcd", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "pqrs\nabcy\nbcd");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ ctx_ptr, "abcd", 4, false, "\npq", 3, true, "ard", 3, true, "uvw", 3, false,
+ "abc\n", 4, true, "sdfgs", 5, true, "wfw", 3, false, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "\npqardabc\nsdfgs");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ ctx_ptr, "", 0, "pqrs", 4, "abc\n", 4, "y", 1, "", 0, "asdf", 4, "jkl", 3,
+ &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "pqrsabc\nyasdfjkl");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ ctx_ptr, "abcd", 4, false, "\npq", 3, true, "ard", 3, true, "uvw", 3, false,
+ "abc\n", 4, true, "sdfgs", 5, true, "wfw", 3, false, "", 0, true, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "\npqardabc\nsdfgs");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ ctx_ptr, "", 0, "pqrs", 4, "abc\n", 4, "y", 1, "", 0, "asdf", 4, "jkl", 3, "", 0,
+ &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "pqrsabc\nyasdfjkl");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ ctx_ptr, "abcd", 4, false, "\npq", 3, true, "ard", 3, true, "uvw", 3, false,
+ "abc\n", 4, true, "sdfgs", 5, true, "wfw", 3, false, "", 0, true, "qwert|n", 7,
+ true, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "\npqardabc\nsdfgsqwert|n");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ ctx_ptr, "", 0, "pqrs", 4, "abc\n", 4, "y", 1, "", 0, "asdf", 4, "jkl", 3, "", 0,
+ "sfl\n", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "pqrsabc\nyasdfjklsfl\n");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ ctx_ptr, "abcd", 4, false, "\npq", 3, true, "ard", 3, true, "uvw", 3, false,
+ "abc\n", 4, true, "sdfgs", 5, true, "wfw", 3, false, "", 0, true, "qwert|n", 7,
+ true, "ewfwe", 5, false, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "\npqardabc\nsdfgsqwert|n");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ ctx_ptr, "", 0, "pqrs", 4, "abc\n", 4, "y", 1, "", 0, "asdf", 4, "", 0, "jkl", 3,
+ "sfl\n", 4, "", 0, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "pqrsabc\nyasdfjklsfl\n");
+ EXPECT_FALSE(ctx.has_error());
+}
+
+TEST(TestStringOps, TestReverse) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+ gdv_int32 out_len = 0;
+
+ const char* out_str;
+ out_str = reverse_utf8(ctx_ptr, "TestString", 10, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "gnirtStseT");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = reverse_utf8(ctx_ptr, "", 0, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = reverse_utf8(ctx_ptr, "çåå†", 9, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "†ååç");
+ EXPECT_FALSE(ctx.has_error());
+
+ std::string d("aa\xc3");
+ out_str = reverse_utf8(ctx_ptr, d.data(), static_cast<int>(d.length()), &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_THAT(ctx.get_error(),
+ ::testing::HasSubstr(
+ "unexpected byte \\c3 encountered while decoding utf8 string"));
+ ctx.Reset();
+}
+
+TEST(TestStringOps, TestLtrim) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+ gdv_int32 out_len = 0;
+ const char* out_str;
+
+ out_str = ltrim_utf8(ctx_ptr, "TestString ", 12, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestString ");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = ltrim_utf8(ctx_ptr, " TestString ", 18, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestString ");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = ltrim_utf8(ctx_ptr, " Test çåå†bD", 18, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "Test çåå†bD");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = ltrim_utf8(ctx_ptr, "", 0, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = ltrim_utf8(ctx_ptr, " ", 6, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = ltrim_utf8_utf8(ctx_ptr, "", 0, "TestString", 10, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = ltrim_utf8_utf8(ctx_ptr, "TestString", 10, "", 0, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestString");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = ltrim_utf8_utf8(ctx_ptr, "abcbbaccabbcdef", 15, "abc", 3, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "def");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = ltrim_utf8_utf8(ctx_ptr, "abcbbaccabbcdef", 15, "ababbac", 7, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "def");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = ltrim_utf8_utf8(ctx_ptr, "ååçåå†eç†Dd", 21, "çåå†", 9, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "eç†Dd");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = ltrim_utf8_utf8(ctx_ptr, "ç†ååçåå†", 18, "çåå†", 9, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+
+ std::string d(
+ "aa\xc3"
+ "bcd");
+ out_str =
+ ltrim_utf8_utf8(ctx_ptr, d.data(), static_cast<int>(d.length()), "a", 1, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len),
+ "\xc3"
+ "bcd");
+ EXPECT_FALSE(ctx.has_error());
+
+ std::string e(
+ "åå\xe0\xa0"
+ "bcd");
+ out_str =
+ ltrim_utf8_utf8(ctx_ptr, e.data(), static_cast<int>(e.length()), "å", 2, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len),
+ "\xE0\xa0"
+ "bcd");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = ltrim_utf8_utf8(ctx_ptr, "TestString", 10, "abcd", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestString");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = ltrim_utf8_utf8(ctx_ptr, "acbabbcabb", 10, "abcbd", 5, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+}
+
+TEST(TestStringOps, TestLpadString) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+ gdv_int32 out_len = 0;
+ const char* out_str;
+
+ // LPAD function tests - with defined fill pad text
+ out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 4, "fill", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "Test");
+
+ out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 10, "fill", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestString");
+
+ out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 0, 10, "fill", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+
+ out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 0, "fill", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+
+ out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, -500, "fill", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+
+ out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 500, "", 0, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestString");
+
+ out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 18, "Fill", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "FillFillTestString");
+
+ out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 15, "Fill", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "FillFTestString");
+
+ out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 20, "Fill", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "FillFillFiTestString");
+
+ out_str = lpad_utf8_int32_utf8(ctx_ptr, "абвгд", 10, 7, "д", 2, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "ддабвгд");
+
+ out_str = lpad_utf8_int32_utf8(ctx_ptr, "абвгд", 10, 20, "абвгд", 10, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "абвгдабвгдабвгдабвгд");
+
+ out_str = lpad_utf8_int32_utf8(ctx_ptr, "hello", 5, 6, "д", 2, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "дhello");
+
+ // LPAD function tests - with NO pad text
+ out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "Test");
+
+ out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 10, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestString");
+
+ out_str = lpad_utf8_int32(ctx_ptr, "TestString", 0, 10, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+
+ out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 0, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+
+ out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, -500, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+
+ out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 18, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), " TestString");
+
+ out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 15, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), " TestString");
+
+ out_str = lpad_utf8_int32(ctx_ptr, "абвгд", 10, 7, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), " абвгд");
+}
+
+TEST(TestStringOps, TestRpadString) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+ gdv_int32 out_len = 0;
+ const char* out_str;
+
+ // RPAD function tests - with defined fill pad text
+ out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 4, "fill", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "Test");
+
+ out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 10, "fill", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestString");
+
+ out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 0, 10, "fill", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+
+ out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 0, "fill", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+
+ out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, -500, "fill", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+
+ out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 500, "", 0, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestString");
+
+ out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 18, "Fill", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestStringFillFill");
+
+ out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 15, "Fill", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestStringFillF");
+
+ out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 20, "Fill", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestStringFillFillFi");
+
+ out_str = rpad_utf8_int32_utf8(ctx_ptr, "абвгд", 10, 7, "д", 2, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "абвгддд");
+
+ out_str = rpad_utf8_int32_utf8(ctx_ptr, "абвгд", 10, 20, "абвгд", 10, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "абвгдабвгдабвгдабвгд");
+
+ out_str = rpad_utf8_int32_utf8(ctx_ptr, "hello", 5, 6, "д", 2, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "helloд");
+
+ // RPAD function tests - with NO pad text
+ out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "Test");
+
+ out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 10, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestString");
+
+ out_str = rpad_utf8_int32(ctx_ptr, "TestString", 0, 10, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+
+ out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 0, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+
+ out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, -500, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+
+ out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 18, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestString ");
+
+ out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 15, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestString ");
+
+ out_str = rpad_utf8_int32(ctx_ptr, "абвгд", 10, 7, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "абвгд ");
+}
+
+TEST(TestStringOps, TestRtrim) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+ gdv_int32 out_len = 0;
+ const char* out_str;
+
+ out_str = rtrim_utf8(ctx_ptr, " TestString", 12, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), " TestString");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = rtrim_utf8(ctx_ptr, " TestString ", 18, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), " TestString");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = rtrim_utf8(ctx_ptr, "Test çåå†bD ", 20, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "Test çåå†bD");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = rtrim_utf8(ctx_ptr, "", 0, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = rtrim_utf8(ctx_ptr, " ", 6, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = rtrim_utf8_utf8(ctx_ptr, "", 0, "TestString", 10, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = rtrim_utf8_utf8(ctx_ptr, "TestString", 10, "", 0, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestString");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = rtrim_utf8_utf8(ctx_ptr, "TestString", 10, "ring", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestSt");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = rtrim_utf8_utf8(ctx_ptr, "defabcbbaccabbc", 15, "abc", 3, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "def");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = rtrim_utf8_utf8(ctx_ptr, "defabcbbaccabbc", 15, "ababbac", 7, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "def");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = rtrim_utf8_utf8(ctx_ptr, "eDdç†ååçåå†", 21, "çåå†", 9, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "eDd");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = rtrim_utf8_utf8(ctx_ptr, "ç†ååçåå†", 18, "çåå†", 9, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+
+ std::string d(
+ "\xc3"
+ "aaa");
+ out_str =
+ rtrim_utf8_utf8(ctx_ptr, d.data(), static_cast<int>(d.length()), "a", 1, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_TRUE(ctx.has_error());
+ ctx.Reset();
+
+ std::string e(
+ "\xe0\xa0"
+ "åå");
+ out_str =
+ rtrim_utf8_utf8(ctx_ptr, e.data(), static_cast<int>(e.length()), "å", 2, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_TRUE(ctx.has_error());
+ ctx.Reset();
+
+ out_str = rtrim_utf8_utf8(ctx_ptr, "åeçå", 7, "çå", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "åe");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = rtrim_utf8_utf8(ctx_ptr, "TestString", 10, "abcd", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestString");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = rtrim_utf8_utf8(ctx_ptr, "acbabbcabb", 10, "abcbd", 5, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+}
+
+TEST(TestStringOps, TestBtrim) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+ gdv_int32 out_len = 0;
+ const char* out_str;
+
+ out_str = btrim_utf8(ctx_ptr, "TestString", 10, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestString");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = btrim_utf8(ctx_ptr, " TestString ", 18, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestString");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = btrim_utf8(ctx_ptr, " Test çåå†bD ", 21, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "Test çåå†bD");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = btrim_utf8(ctx_ptr, "", 0, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = btrim_utf8(ctx_ptr, " ", 6, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = btrim_utf8_utf8(ctx_ptr, "", 0, "TestString", 10, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = btrim_utf8_utf8(ctx_ptr, "TestString", 10, "Test", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "String");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = btrim_utf8_utf8(ctx_ptr, "TestString", 10, "String", 6, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "Tes");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = btrim_utf8_utf8(ctx_ptr, "TestString", 10, "", 0, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestString");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = btrim_utf8_utf8(ctx_ptr, "abcbbadefccabbc", 15, "abc", 3, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "def");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = btrim_utf8_utf8(ctx_ptr, "abcbbadefccabbc", 15, "ababbac", 7, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "def");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = btrim_utf8_utf8(ctx_ptr, "ååçåå†Ddeç†", 21, "çåå†", 9, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "Dde");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = btrim_utf8_utf8(ctx_ptr, "ç†ååçåå†", 18, "çåå†", 9, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+ ctx.Reset();
+
+ std::string d(
+ "acd\xc3"
+ "aaa");
+ out_str =
+ btrim_utf8_utf8(ctx_ptr, d.data(), static_cast<int>(d.length()), "a", 1, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_TRUE(ctx.has_error());
+ ctx.Reset();
+
+ std::string e(
+ "åbc\xe0\xa0"
+ "åå");
+ out_str =
+ btrim_utf8_utf8(ctx_ptr, e.data(), static_cast<int>(e.length()), "å", 2, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_TRUE(ctx.has_error());
+ ctx.Reset();
+
+ std::string f(
+ "aa\xc3"
+ "bcd");
+ out_str =
+ btrim_utf8_utf8(ctx_ptr, f.data(), static_cast<int>(f.length()), "a", 1, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len),
+ "\xc3"
+ "bcd");
+ EXPECT_FALSE(ctx.has_error());
+
+ std::string g(
+ "åå\xe0\xa0"
+ "bcå");
+ out_str =
+ btrim_utf8_utf8(ctx_ptr, g.data(), static_cast<int>(g.length()), "å", 2, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len),
+ "\xe0\xa0"
+ "bc");
+
+ out_str = btrim_utf8_utf8(ctx_ptr, "åe†çå", 10, "çå", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "e†");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = btrim_utf8_utf8(ctx_ptr, "TestString", 10, "abcd", 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestString");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = btrim_utf8_utf8(ctx_ptr, "acbabbcabb", 10, "abcbd", 5, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+}
+
+TEST(TestStringOps, TestLocate) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+
+ int pos;
+
+ pos = locate_utf8_utf8(ctx_ptr, "String", 6, "TestString", 10);
+ EXPECT_EQ(pos, 5);
+ EXPECT_FALSE(ctx.has_error());
+
+ pos = locate_utf8_utf8_int32(ctx_ptr, "String", 6, "TestString", 10, 1);
+ EXPECT_EQ(pos, 5);
+ EXPECT_FALSE(ctx.has_error());
+
+ pos = locate_utf8_utf8_int32(ctx_ptr, "abc", 3, "abcabc", 6, 2);
+ EXPECT_EQ(pos, 4);
+ EXPECT_FALSE(ctx.has_error());
+
+ pos = locate_utf8_utf8(ctx_ptr, "çåå", 6, "s†å†emçåå†d", 21);
+ EXPECT_EQ(pos, 7);
+ EXPECT_FALSE(ctx.has_error());
+
+ pos = locate_utf8_utf8_int32(ctx_ptr, "bar", 3, "†barbar", 9, 3);
+ EXPECT_EQ(pos, 5);
+ EXPECT_FALSE(ctx.has_error());
+
+ pos = locate_utf8_utf8_int32(ctx_ptr, "sub", 3, "", 0, 1);
+ EXPECT_EQ(pos, 0);
+ EXPECT_FALSE(ctx.has_error());
+
+ pos = locate_utf8_utf8_int32(ctx_ptr, "", 0, "str", 3, 1);
+ EXPECT_EQ(pos, 0);
+ EXPECT_FALSE(ctx.has_error());
+
+ pos = locate_utf8_utf8_int32(ctx_ptr, "bar", 3, "barbar", 6, 0);
+ EXPECT_EQ(pos, 0);
+ EXPECT_THAT(ctx.get_error(),
+ ::testing::HasSubstr("Start position must be greater than 0"));
+ ctx.Reset();
+
+ pos = locate_utf8_utf8_int32(ctx_ptr, "bar", 3, "barbar", 6, 7);
+ EXPECT_EQ(pos, 0);
+ EXPECT_FALSE(ctx.has_error());
+
+ std::string d(
+ "a\xff"
+ "c");
+ pos =
+ locate_utf8_utf8_int32(ctx_ptr, "c", 1, d.data(), static_cast<int>(d.length()), 3);
+ EXPECT_EQ(pos, 0);
+ EXPECT_THAT(ctx.get_error(),
+ ::testing::HasSubstr(
+ "unexpected byte \\ff encountered while decoding utf8 string"));
+ ctx.Reset();
+}
+
+TEST(TestStringOps, TestByteSubstr) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+ gdv_int32 out_len = 0;
+
+ const char* out_str;
+ out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 5, 10, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "String");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, -6, 10, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "String");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 0, 10, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 0, -500, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 1, 10, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestString");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 1, 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "Test");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 1, 1000, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestString");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 5, 3, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "Str");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 5, 10, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "String");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, -100, 10, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestString");
+ EXPECT_FALSE(ctx.has_error());
+}
+
+TEST(TestStringOps, TestStrPos) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+
+ int pos;
+
+ pos = strpos_utf8_utf8(ctx_ptr, "TestString", 10, "String", 6);
+ EXPECT_EQ(pos, 5);
+ EXPECT_FALSE(ctx.has_error());
+
+ pos = strpos_utf8_utf8(ctx_ptr, "TestString", 10, "String", 6);
+ EXPECT_EQ(pos, 5);
+ EXPECT_FALSE(ctx.has_error());
+
+ pos = strpos_utf8_utf8(ctx_ptr, "abcabc", 6, "abc", 3);
+ EXPECT_EQ(pos, 1);
+ EXPECT_FALSE(ctx.has_error());
+
+ pos = strpos_utf8_utf8(ctx_ptr, "s†å†emçåå†d", 21, "çåå", 6);
+ EXPECT_EQ(pos, 7);
+ EXPECT_FALSE(ctx.has_error());
+
+ pos = strpos_utf8_utf8(ctx_ptr, "†barbar", 9, "bar", 3);
+ EXPECT_EQ(pos, 2);
+ EXPECT_FALSE(ctx.has_error());
+
+ pos = strpos_utf8_utf8(ctx_ptr, "", 0, "sub", 3);
+ EXPECT_EQ(pos, 0);
+ EXPECT_FALSE(ctx.has_error());
+
+ pos = strpos_utf8_utf8(ctx_ptr, "str", 3, "", 0);
+ EXPECT_EQ(pos, 0);
+ EXPECT_FALSE(ctx.has_error());
+
+ std::string d(
+ "a\xff"
+ "c");
+ pos = strpos_utf8_utf8(ctx_ptr, d.data(), static_cast<int>(d.length()), "c", 1);
+ EXPECT_THAT(ctx.get_error(),
+ ::testing::HasSubstr(
+ "unexpected byte \\ff encountered while decoding utf8 string"));
+ ctx.Reset();
+}
+
+TEST(TestStringOps, TestReplace) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+ gdv_int32 out_len = 0;
+
+ const char* out_str;
+ out_str = replace_utf8_utf8_utf8(ctx_ptr, "TestString1String2", 18, "String", 6,
+ "Replace", 7, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestReplace1Replace2");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str =
+ replace_utf8_utf8_utf8(ctx_ptr, "TestString1", 11, "String", 6, "", 0, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "Test1");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = replace_utf8_utf8_utf8(ctx_ptr, "", 0, "test", 4, "rep", 3, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = replace_utf8_utf8_utf8(ctx_ptr, "dž†çåå†", 17, "†", 3, "t", 1, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "Çttçååt");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = replace_utf8_utf8_utf8(ctx_ptr, "TestString", 10, "", 0, "rep", 3, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestString");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str =
+ replace_utf8_utf8_utf8(ctx_ptr, "Test", 4, "TestString", 10, "rep", 3, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "Test");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str = replace_utf8_utf8_utf8(ctx_ptr, "Test", 4, "Test", 4, "", 0, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_FALSE(ctx.has_error());
+
+ out_str =
+ replace_utf8_utf8_utf8(ctx_ptr, "TestString", 10, "abc", 3, "xyz", 3, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "TestString");
+ EXPECT_FALSE(ctx.has_error());
+
+ replace_with_max_len_utf8_utf8_utf8(ctx_ptr, "Hell", 4, "ell", 3, "ollow", 5, 5,
+ &out_len);
+ EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer overflow for output string"));
+ ctx.Reset();
+
+ replace_with_max_len_utf8_utf8_utf8(ctx_ptr, "eeee", 4, "e", 1, "aaaa", 4, 14,
+ &out_len);
+ EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer overflow for output string"));
+ ctx.Reset();
+}
+
+TEST(TestStringOps, TestLeftString) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+ gdv_int32 out_len = 0;
+ const char* out_str;
+
+ out_str = left_utf8_int32(ctx_ptr, "TestString", 10, 10, &out_len);
+ std::string output = std::string(out_str, out_len);
+ EXPECT_EQ(output, "TestString");
+
+ out_str = left_utf8_int32(ctx_ptr, "", 0, 0, &out_len);
+ output = std::string(out_str, out_len);
+ EXPECT_EQ(output, "");
+
+ out_str = left_utf8_int32(ctx_ptr, "", 0, 500, &out_len);
+ output = std::string(out_str, out_len);
+ EXPECT_EQ(output, "");
+
+ out_str = left_utf8_int32(ctx_ptr, "TestString", 10, 3, &out_len);
+ output = std::string(out_str, out_len);
+ EXPECT_EQ(output, "Tes");
+
+ out_str = left_utf8_int32(ctx_ptr, "TestString", 10, -3, &out_len);
+ output = std::string(out_str, out_len);
+ EXPECT_EQ(output, "TestStr");
+
+ // the text length for this string is 10 (each utf8 char is represented by two bytes)
+ out_str = left_utf8_int32(ctx_ptr, "абвгд", 10, 3, &out_len);
+ output = std::string(out_str, out_len);
+ EXPECT_EQ(output, "абв");
+}
+
+TEST(TestStringOps, TestRightString) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+ gdv_int32 out_len = 0;
+ const char* out_str;
+
+ out_str = right_utf8_int32(ctx_ptr, "TestString", 10, 10, &out_len);
+ std::string output = std::string(out_str, out_len);
+ EXPECT_EQ(output, "TestString");
+
+ out_str = right_utf8_int32(ctx_ptr, "", 0, 0, &out_len);
+ output = std::string(out_str, out_len);
+ EXPECT_EQ(output, "");
+
+ out_str = right_utf8_int32(ctx_ptr, "", 0, 500, &out_len);
+ output = std::string(out_str, out_len);
+ EXPECT_EQ(output, "");
+
+ out_str = right_utf8_int32(ctx_ptr, "TestString", 10, 3, &out_len);
+ output = std::string(out_str, out_len);
+ EXPECT_EQ(output, "ing");
+
+ out_str = right_utf8_int32(ctx_ptr, "TestString", 10, -3, &out_len);
+ output = std::string(out_str, out_len);
+ EXPECT_EQ(output, "tString");
+
+ // the text length for this string is 10 (each utf8 char is represented by two bytes)
+ out_str = right_utf8_int32(ctx_ptr, "абвгд", 10, 3, &out_len);
+ output = std::string(out_str, out_len);
+ EXPECT_EQ(output, "вгд");
+}
+
+TEST(TestStringOps, TestBinaryString) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+ gdv_int32 out_len = 0;
+ const char* out_str;
+
+ out_str = binary_string(ctx_ptr, "TestString", 10, &out_len);
+ std::string output = std::string(out_str, out_len);
+ EXPECT_EQ(output, "TestString");
+
+ out_str = binary_string(ctx_ptr, "", 0, &out_len);
+ output = std::string(out_str, out_len);
+ EXPECT_EQ(output, "");
+
+ out_str = binary_string(ctx_ptr, "T", 1, &out_len);
+ output = std::string(out_str, out_len);
+ EXPECT_EQ(output, "T");
+
+ out_str = binary_string(ctx_ptr, "\\x41\\x42\\x43", 12, &out_len);
+ output = std::string(out_str, out_len);
+ EXPECT_EQ(output, "ABC");
+
+ out_str = binary_string(ctx_ptr, "\\x41", 4, &out_len);
+ output = std::string(out_str, out_len);
+ EXPECT_EQ(output, "A");
+
+ out_str = binary_string(ctx_ptr, "\\x6d\\x6D", 8, &out_len);
+ output = std::string(out_str, out_len);
+ EXPECT_EQ(output, "mm");
+
+ out_str = binary_string(ctx_ptr, "\\x6f\\x6d", 8, &out_len);
+ output = std::string(out_str, out_len);
+ EXPECT_EQ(output, "om");
+
+ out_str = binary_string(ctx_ptr, "\\x4f\\x4D", 8, &out_len);
+ output = std::string(out_str, out_len);
+ EXPECT_EQ(output, "OM");
+}
+
+TEST(TestStringOps, TestSplitPart) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+ gdv_int32 out_len = 0;
+ const char* out_str;
+
+ out_str = split_part(ctx_ptr, "A,B,C", 5, ",", 1, 0, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+ EXPECT_THAT(
+ ctx.get_error(),
+ ::testing::HasSubstr("Index in split_part must be positive, value provided was 0"));
+
+ out_str = split_part(ctx_ptr, "A,B,C", 5, ",", 1, 1, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "A");
+
+ out_str = split_part(ctx_ptr, "A,B,C", 5, ",", 1, 2, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "B");
+
+ out_str = split_part(ctx_ptr, "A,B,C", 5, ",", 1, 3, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "C");
+
+ out_str = split_part(ctx_ptr, "abc~@~def~@~ghi", 15, "~@~", 3, 1, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "abc");
+
+ out_str = split_part(ctx_ptr, "abc~@~def~@~ghi", 15, "~@~", 3, 2, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "def");
+
+ out_str = split_part(ctx_ptr, "abc~@~def~@~ghi", 15, "~@~", 3, 3, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "ghi");
+
+ // Result must be empty when the index is > no of elements
+ out_str = split_part(ctx_ptr, "123|456|789", 11, "|", 1, 4, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+
+ out_str = split_part(ctx_ptr, "123|", 4, "|", 1, 1, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "123");
+
+ out_str = split_part(ctx_ptr, "|123", 4, "|", 1, 1, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "");
+
+ out_str = split_part(ctx_ptr, "ç†ååçåå†", 18, "å", 2, 1, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "ç†");
+
+ out_str = split_part(ctx_ptr, "ç†ååçåå†", 18, "†åå", 6, 1, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "ç");
+
+ out_str = split_part(ctx_ptr, "ç†ååçåå†", 18, "†", 3, 2, &out_len);
+ EXPECT_EQ(std::string(out_str, out_len), "ååçåå");
+}
+
+TEST(TestStringOps, TestConvertTo) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+ gdv_int32 out_len = 0;
+ const char* out_str;
+
+ const int32_t ALL_BYTES_MATCH = 0;
+
+ int32_t integer_value = std::numeric_limits<int32_t>::max();
+ out_str = convert_toINT(ctx_ptr, integer_value, &out_len);
+ EXPECT_EQ(out_len, sizeof(integer_value));
+ EXPECT_EQ(ALL_BYTES_MATCH, memcmp(out_str, &integer_value, out_len));
+
+ int64_t big_integer_value = std::numeric_limits<int64_t>::max();
+ out_str = convert_toBIGINT(ctx_ptr, big_integer_value, &out_len);
+ EXPECT_EQ(out_len, sizeof(big_integer_value));
+ EXPECT_EQ(ALL_BYTES_MATCH, memcmp(out_str, &big_integer_value, out_len));
+
+ float float_value = std::numeric_limits<float>::max();
+ out_str = convert_toFLOAT(ctx_ptr, float_value, &out_len);
+ EXPECT_EQ(out_len, sizeof(float_value));
+ EXPECT_EQ(ALL_BYTES_MATCH, memcmp(out_str, &float_value, out_len));
+
+ double double_value = std::numeric_limits<double>::max();
+ out_str = convert_toDOUBLE(ctx_ptr, double_value, &out_len);
+ EXPECT_EQ(out_len, sizeof(double_value));
+ EXPECT_EQ(ALL_BYTES_MATCH, memcmp(out_str, &double_value, out_len));
+
+ const char* test_string = "test string";
+ int32_t str_len = 11;
+ out_str = convert_toUTF8(ctx_ptr, test_string, str_len, &out_len);
+ EXPECT_EQ(out_len, str_len);
+ EXPECT_EQ(ALL_BYTES_MATCH, memcmp(out_str, test_string, out_len));
+}
+
+TEST(TestStringOps, TestConvertToBigEndian) {
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+ gdv_int32 out_len = 0;
+ gdv_int32 out_len_big_endian = 0;
+ const char* out_str;
+ const char* out_str_big_endian;
+
+ int64_t big_integer_value = std::numeric_limits<int64_t>::max();
+ out_str = convert_toBIGINT(ctx_ptr, big_integer_value, &out_len);
+ out_str_big_endian =
+ convert_toBIGINT_be(ctx_ptr, big_integer_value, &out_len_big_endian);
+ EXPECT_EQ(out_len_big_endian, sizeof(big_integer_value));
+ EXPECT_EQ(out_len_big_endian, out_len);
+
+#if ARROW_LITTLE_ENDIAN
+ // Checks that bytes are in reverse order
+ for (auto i = 0; i < out_len; i++) {
+ EXPECT_EQ(out_str[i], out_str_big_endian[out_len - (i + 1)]);
+ }
+#else
+ for (auto i = 0; i < out_len; i++) {
+ EXPECT_EQ(out_str[i], out_str_big_endian[i]);
+ }
+#endif
+
+ double double_value = std::numeric_limits<double>::max();
+ out_str = convert_toDOUBLE(ctx_ptr, double_value, &out_len);
+ out_str_big_endian = convert_toDOUBLE_be(ctx_ptr, double_value, &out_len_big_endian);
+ EXPECT_EQ(out_len_big_endian, sizeof(double_value));
+ EXPECT_EQ(out_len_big_endian, out_len);
+
+#if ARROW_LITTLE_ENDIAN
+ // Checks that bytes are in reverse order
+ for (auto i = 0; i < out_len; i++) {
+ EXPECT_EQ(out_str[i], out_str_big_endian[out_len - (i + 1)]);
+ }
+#else
+ for (auto i = 0; i < out_len; i++) {
+ EXPECT_EQ(out_str[i], out_str_big_endian[i]);
+ }
+#endif
+}
+
+} // namespace gandiva
diff --git a/src/arrow/cpp/src/gandiva/precompiled/testing.h b/src/arrow/cpp/src/gandiva/precompiled/testing.h
new file mode 100644
index 000000000..c41bc5471
--- /dev/null
+++ b/src/arrow/cpp/src/gandiva/precompiled/testing.h
@@ -0,0 +1,43 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <ctime>
+#include <string>
+
+#include <gtest/gtest.h>
+
+#include "arrow/util/logging.h"
+#include "arrow/util/value_parsing.h"
+
+#include "gandiva/date_utils.h"
+#include "gandiva/precompiled/types.h"
+
+namespace gandiva {
+
+static inline gdv_timestamp StringToTimestamp(const std::string& s) {
+ int64_t out = 0;
+ bool success = ::arrow::internal::ParseTimestampStrptime(
+ s.c_str(), s.length(), "%Y-%m-%d %H:%M:%S", /*ignore_time_in_day=*/false,
+ /*allow_trailing_chars=*/false, ::arrow::TimeUnit::SECOND, &out);
+ DCHECK(success);
+ ARROW_UNUSED(success);
+ return out * 1000;
+}
+
+} // namespace gandiva
diff --git a/src/arrow/cpp/src/gandiva/precompiled/time.cc b/src/arrow/cpp/src/gandiva/precompiled/time.cc
new file mode 100644
index 000000000..336f69226
--- /dev/null
+++ b/src/arrow/cpp/src/gandiva/precompiled/time.cc
@@ -0,0 +1,894 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "./epoch_time_point.h"
+
+extern "C" {
+
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "./time_constants.h"
+#include "./time_fields.h"
+#include "./types.h"
+
+#define MINS_IN_HOUR 60
+#define SECONDS_IN_MINUTE 60
+#define SECONDS_IN_HOUR (SECONDS_IN_MINUTE) * (MINS_IN_HOUR)
+
+#define HOURS_IN_DAY 24
+
+// Expand inner macro for all date types.
+#define DATE_TYPES(INNER) \
+ INNER(date64) \
+ INNER(timestamp)
+
+// Expand inner macro for all base numeric types.
+#define NUMERIC_TYPES(INNER) \
+ INNER(int8) \
+ INNER(int16) \
+ INNER(int32) \
+ INNER(int64) \
+ INNER(uint8) \
+ INNER(uint16) \
+ INNER(uint32) \
+ INNER(uint64) \
+ INNER(float32) \
+ INNER(float64)
+
+// Extract millennium
+#define EXTRACT_MILLENNIUM(TYPE) \
+ FORCE_INLINE \
+ gdv_int64 extractMillennium##_##TYPE(gdv_##TYPE millis) { \
+ EpochTimePoint tp(millis); \
+ return (1900 + tp.TmYear() - 1) / 1000 + 1; \
+ }
+
+DATE_TYPES(EXTRACT_MILLENNIUM)
+
+// Extract century
+#define EXTRACT_CENTURY(TYPE) \
+ FORCE_INLINE \
+ gdv_int64 extractCentury##_##TYPE(gdv_##TYPE millis) { \
+ EpochTimePoint tp(millis); \
+ return (1900 + tp.TmYear() - 1) / 100 + 1; \
+ }
+
+DATE_TYPES(EXTRACT_CENTURY)
+
+// Extract decade
+#define EXTRACT_DECADE(TYPE) \
+ FORCE_INLINE \
+ gdv_int64 extractDecade##_##TYPE(gdv_##TYPE millis) { \
+ EpochTimePoint tp(millis); \
+ return (1900 + tp.TmYear()) / 10; \
+ }
+
+DATE_TYPES(EXTRACT_DECADE)
+
+// Extract year.
+#define EXTRACT_YEAR(TYPE) \
+ FORCE_INLINE \
+ gdv_int64 extractYear##_##TYPE(gdv_##TYPE millis) { \
+ EpochTimePoint tp(millis); \
+ return 1900 + tp.TmYear(); \
+ }
+
+DATE_TYPES(EXTRACT_YEAR)
+
+#define EXTRACT_DOY(TYPE) \
+ FORCE_INLINE \
+ gdv_int64 extractDoy##_##TYPE(gdv_##TYPE millis) { \
+ EpochTimePoint tp(millis); \
+ return 1 + tp.TmYday(); \
+ }
+
+DATE_TYPES(EXTRACT_DOY)
+
+#define EXTRACT_QUARTER(TYPE) \
+ FORCE_INLINE \
+ gdv_int64 extractQuarter##_##TYPE(gdv_##TYPE millis) { \
+ EpochTimePoint tp(millis); \
+ return tp.TmMon() / 3 + 1; \
+ }
+
+DATE_TYPES(EXTRACT_QUARTER)
+
+#define EXTRACT_MONTH(TYPE) \
+ FORCE_INLINE \
+ gdv_int64 extractMonth##_##TYPE(gdv_##TYPE millis) { \
+ EpochTimePoint tp(millis); \
+ return 1 + tp.TmMon(); \
+ }
+
+DATE_TYPES(EXTRACT_MONTH)
+
+#define JAN1_WDAY(tp) ((tp.TmWday() - (tp.TmYday() % 7) + 7) % 7)
+
+bool IsLeapYear(int yy) {
+ if ((yy % 4) != 0) {
+ // not divisible by 4
+ return false;
+ }
+
+ // yy = 4x
+ if ((yy % 400) == 0) {
+ // yy = 400x
+ return true;
+ }
+
+ // yy = 4x, return true if yy != 100x
+ return ((yy % 100) != 0);
+}
+
+// Day belongs to current year
+// Note that TmYday is 0 for Jan 1 (subtract 1 from day in the below examples)
+//
+// If Jan 1 is Mon, (TmYday) / 7 + 1 (Jan 1->WK1, Jan 8->WK2, etc)
+// If Jan 1 is Tues, (TmYday + 1) / 7 + 1 (Jan 1->WK1, Jan 7->WK2, etc)
+// If Jan 1 is Wed, (TmYday + 2) / 7 + 1
+// If Jan 1 is Thu, (TmYday + 3) / 7 + 1
+//
+// If Jan 1 is Fri, Sat or Sun, the first few days belong to the previous year
+// If Jan 1 is Fri, (TmYday - 3) / 7 + 1 (Jan 4->WK1, Jan 11->WK2)
+// If Jan 1 is Sat, (TmYday - 2) / 7 + 1 (Jan 3->WK1, Jan 10->WK2)
+// If Jan 1 is Sun, (TmYday - 1) / 7 + 1 (Jan 2->WK1, Jan 9->WK2)
+int weekOfCurrentYear(const EpochTimePoint& tp) {
+ int jan1_wday = JAN1_WDAY(tp);
+ switch (jan1_wday) {
+ // Monday
+ case 1:
+ // Tuesday
+ case 2:
+ // Wednesday
+ case 3:
+ // Thursday
+ case 4: {
+ return (tp.TmYday() + jan1_wday - 1) / 7 + 1;
+ }
+ // Friday
+ case 5:
+ // Saturday
+ case 6: {
+ return (tp.TmYday() - (8 - jan1_wday)) / 7 + 1;
+ }
+ // Sunday
+ case 0: {
+ return (tp.TmYday() - 1) / 7 + 1;
+ }
+ }
+
+ // cannot reach here
+ // keep compiler happy
+ return 0;
+}
+
+// Jan 1-3
+// If Jan 1 is one of Mon, Tue, Wed, Thu - belongs to week of current year
+// If Jan 1 is Fri/Sat/Sun - belongs to previous year
+int getJanWeekOfYear(const EpochTimePoint& tp) {
+ int jan1_wday = JAN1_WDAY(tp);
+
+ if ((jan1_wday >= 1) && (jan1_wday <= 4)) {
+ // Jan 1-3 with the week belonging to this year
+ return 1;
+ }
+
+ if (jan1_wday == 5) {
+ // Jan 1 is a Fri
+ // Jan 1-3 belong to previous year. Dec 31 of previous year same week # as Jan 1-3
+ // previous year is a leap year:
+ // Prev Jan 1 is a Wed. Jan 6th is Mon
+ // Dec 31 - Jan 6 = 366 - 5 = 361
+ // week from Jan 6 = (361 - 1) / 7 + 1 = 52
+ // week # in previous year = 52 + 1 = 53
+ //
+ // previous year is not a leap year. Jan 1 is Thu. Jan 5th is Mon
+ // Dec 31 - Jan 5 = 365 - 4 = 361
+ // week from Jan 5 = (361 - 1) / 7 + 1 = 52
+ // week # in previous year = 52 + 1 = 53
+ return 53;
+ }
+
+ if (jan1_wday == 0) {
+ // Jan 1 is a Sun
+ if (tp.TmMday() > 1) {
+ // Jan 2 and 3 belong to current year
+ return 1;
+ }
+
+ // day belongs to previous year. Same as Dec 31
+ // Same as the case where Jan 1 is a Fri, except that previous year
+ // does not have an extra week
+ // Hence, return 52
+ return 52;
+ }
+
+ // Jan 1 is a Sat
+ // Jan 1-2 belong to previous year
+ if (tp.TmMday() == 3) {
+ // Jan 3, return 1
+ return 1;
+ }
+
+ // prev Jan 1 is leap year
+ // prev Jan 1 is a Thu
+ // return 53 (extra week)
+ if (IsLeapYear(1900 + tp.TmYear() - 1)) {
+ return 53;
+ }
+
+ // prev Jan 1 is not a leap year
+ // prev Jan 1 is a Fri
+ // return 52 (no extra week)
+ return 52;
+}
+
+// Dec 29-31
+int getDecWeekOfYear(const EpochTimePoint& tp) {
+ int next_jan1_wday = (tp.TmWday() + (31 - tp.TmMday()) + 1) % 7;
+
+ if (next_jan1_wday == 4) {
+ // next Jan 1 is a Thu
+ // day belongs to week 1 of next year
+ return 1;
+ }
+
+ if (next_jan1_wday == 3) {
+ // next Jan 1 is a Wed
+ // Dec 31 and 30 belong to next year - return 1
+ if (tp.TmMday() != 29) {
+ return 1;
+ }
+
+ // Dec 29 belongs to current year
+ return weekOfCurrentYear(tp);
+ }
+
+ if (next_jan1_wday == 2) {
+ // next Jan 1 is a Tue
+ // Dec 31 belongs to next year - return 1
+ if (tp.TmMday() == 31) {
+ return 1;
+ }
+
+ // Dec 29 and 30 belong to current year
+ return weekOfCurrentYear(tp);
+ }
+
+ // next Jan 1 is a Fri/Sat/Sun. No day from this year belongs to that week
+ // next Jan 1 is a Mon. No day from this year belongs to that week
+ return weekOfCurrentYear(tp);
+}
+
+// Week of year is determined by ISO 8601 standard
+// Take a look at: https://en.wikipedia.org/wiki/ISO_week_date
+//
+// Important points to note:
+// Week starts with a Monday and ends with a Sunday
+// A week can have some days in this year and some days in the previous/next year
+// This is true for the first and last weeks
+//
+// The first week of the year should have at-least 4 days in the current year
+// The last week of the year should have at-least 4 days in the current year
+//
+// A given day might belong to the first week of the next year - e.g Dec 29, 30 and 31
+// A given day might belong to the last week of the previous year - e.g. Jan 1, 2 and 3
+//
+// Algorithm:
+// If day belongs to week in current year, weekOfCurrentYear
+//
+// If day is Jan 1-3, see getJanWeekOfYear
+// If day is Dec 29-21, see getDecWeekOfYear
+//
+gdv_int64 weekOfYear(const EpochTimePoint& tp) {
+ if (tp.TmYday() < 3) {
+ // Jan 1-3
+ return getJanWeekOfYear(tp);
+ }
+
+ if ((tp.TmMon() == 11) && (tp.TmMday() >= 29)) {
+ // Dec 29-31
+ return getDecWeekOfYear(tp);
+ }
+
+ return weekOfCurrentYear(tp);
+}
+
+#define EXTRACT_WEEK(TYPE) \
+ FORCE_INLINE \
+ gdv_int64 extractWeek##_##TYPE(gdv_##TYPE millis) { \
+ EpochTimePoint tp(millis); \
+ return weekOfYear(tp); \
+ }
+
+DATE_TYPES(EXTRACT_WEEK)
+
+#define EXTRACT_DOW(TYPE) \
+ FORCE_INLINE \
+ gdv_int64 extractDow##_##TYPE(gdv_##TYPE millis) { \
+ EpochTimePoint tp(millis); \
+ return 1 + tp.TmWday(); \
+ }
+
+DATE_TYPES(EXTRACT_DOW)
+
+#define EXTRACT_DAY(TYPE) \
+ FORCE_INLINE \
+ gdv_int64 extractDay##_##TYPE(gdv_##TYPE millis) { \
+ EpochTimePoint tp(millis); \
+ return tp.TmMday(); \
+ }
+
+DATE_TYPES(EXTRACT_DAY)
+
+#define EXTRACT_HOUR(TYPE) \
+ FORCE_INLINE \
+ gdv_int64 extractHour##_##TYPE(gdv_##TYPE millis) { \
+ EpochTimePoint tp(millis); \
+ return tp.TmHour(); \
+ }
+
+DATE_TYPES(EXTRACT_HOUR)
+
+#define EXTRACT_MINUTE(TYPE) \
+ FORCE_INLINE \
+ gdv_int64 extractMinute##_##TYPE(gdv_##TYPE millis) { \
+ EpochTimePoint tp(millis); \
+ return tp.TmMin(); \
+ }
+
+DATE_TYPES(EXTRACT_MINUTE)
+
+#define EXTRACT_SECOND(TYPE) \
+ FORCE_INLINE \
+ gdv_int64 extractSecond##_##TYPE(gdv_##TYPE millis) { \
+ EpochTimePoint tp(millis); \
+ return tp.TmSec(); \
+ }
+
+DATE_TYPES(EXTRACT_SECOND)
+
+#define EXTRACT_EPOCH(TYPE) \
+ FORCE_INLINE \
+ gdv_int64 extractEpoch##_##TYPE(gdv_##TYPE millis) { return MILLIS_TO_SEC(millis); }
+
+DATE_TYPES(EXTRACT_EPOCH)
+
+// Functions that work on millis in a day
+#define EXTRACT_SECOND_TIME(TYPE) \
+ FORCE_INLINE \
+ gdv_int64 extractSecond##_##TYPE(gdv_##TYPE millis) { \
+ gdv_int64 seconds_of_day = MILLIS_TO_SEC(millis); \
+ gdv_int64 sec = seconds_of_day % SECONDS_IN_MINUTE; \
+ return sec; \
+ }
+
+EXTRACT_SECOND_TIME(time32)
+
+#define EXTRACT_MINUTE_TIME(TYPE) \
+ FORCE_INLINE \
+ gdv_int64 extractMinute##_##TYPE(gdv_##TYPE millis) { \
+ gdv_##TYPE mins = MILLIS_TO_MINS(millis); \
+ return (mins % (MINS_IN_HOUR)); \
+ }
+
+EXTRACT_MINUTE_TIME(time32)
+
+#define EXTRACT_HOUR_TIME(TYPE) \
+ FORCE_INLINE \
+ gdv_int64 extractHour##_##TYPE(gdv_##TYPE millis) { return MILLIS_TO_HOUR(millis); }
+
+EXTRACT_HOUR_TIME(time32)
+
+#define DATE_TRUNC_FIXED_UNIT(NAME, TYPE, NMILLIS_IN_UNIT) \
+ FORCE_INLINE \
+ gdv_##TYPE NAME##_##TYPE(gdv_##TYPE millis) { \
+ return ((millis / NMILLIS_IN_UNIT) * NMILLIS_IN_UNIT); \
+ }
+
+#define DATE_TRUNC_WEEK(TYPE) \
+ FORCE_INLINE \
+ gdv_##TYPE date_trunc_Week_##TYPE(gdv_##TYPE millis) { \
+ EpochTimePoint tp(millis); \
+ int ndays_to_trunc = 0; \
+ if (tp.TmWday() == 0) { \
+ /* Sunday */ \
+ ndays_to_trunc = 6; \
+ } else { \
+ /* All other days */ \
+ ndays_to_trunc = tp.TmWday() - 1; \
+ } \
+ return tp.AddDays(-ndays_to_trunc).ClearTimeOfDay().MillisSinceEpoch(); \
+ }
+
+#define DATE_TRUNC_MONTH_UNITS(NAME, TYPE, NMONTHS_IN_UNIT) \
+ FORCE_INLINE \
+ gdv_##TYPE NAME##_##TYPE(gdv_##TYPE millis) { \
+ EpochTimePoint tp(millis); \
+ int ndays_to_trunc = tp.TmMday() - 1; \
+ int nmonths_to_trunc = \
+ tp.TmMon() - ((tp.TmMon() / NMONTHS_IN_UNIT) * NMONTHS_IN_UNIT); \
+ return tp.AddDays(-ndays_to_trunc) \
+ .AddMonths(-nmonths_to_trunc) \
+ .ClearTimeOfDay() \
+ .MillisSinceEpoch(); \
+ }
+
+#define DATE_TRUNC_YEAR_UNITS(NAME, TYPE, NYEARS_IN_UNIT, OFF_BY) \
+ FORCE_INLINE \
+ gdv_##TYPE NAME##_##TYPE(gdv_##TYPE millis) { \
+ EpochTimePoint tp(millis); \
+ int ndays_to_trunc = tp.TmMday() - 1; \
+ int nmonths_to_trunc = tp.TmMon(); \
+ int year = 1900 + tp.TmYear(); \
+ year = ((year - OFF_BY) / NYEARS_IN_UNIT) * NYEARS_IN_UNIT + OFF_BY; \
+ int nyears_to_trunc = tp.TmYear() - (year - 1900); \
+ return tp.AddDays(-ndays_to_trunc) \
+ .AddMonths(-nmonths_to_trunc) \
+ .AddYears(-nyears_to_trunc) \
+ .ClearTimeOfDay() \
+ .MillisSinceEpoch(); \
+ }
+
+#define DATE_TRUNC_FUNCTIONS(TYPE) \
+ DATE_TRUNC_FIXED_UNIT(date_trunc_Second, TYPE, MILLIS_IN_SEC) \
+ DATE_TRUNC_FIXED_UNIT(date_trunc_Minute, TYPE, MILLIS_IN_MIN) \
+ DATE_TRUNC_FIXED_UNIT(date_trunc_Hour, TYPE, MILLIS_IN_HOUR) \
+ DATE_TRUNC_FIXED_UNIT(date_trunc_Day, TYPE, MILLIS_IN_DAY) \
+ DATE_TRUNC_WEEK(TYPE) \
+ DATE_TRUNC_MONTH_UNITS(date_trunc_Month, TYPE, 1) \
+ DATE_TRUNC_MONTH_UNITS(date_trunc_Quarter, TYPE, 3) \
+ DATE_TRUNC_MONTH_UNITS(date_trunc_Year, TYPE, 12) \
+ DATE_TRUNC_YEAR_UNITS(date_trunc_Decade, TYPE, 10, 0) \
+ DATE_TRUNC_YEAR_UNITS(date_trunc_Century, TYPE, 100, 1) \
+ DATE_TRUNC_YEAR_UNITS(date_trunc_Millennium, TYPE, 1000, 1)
+
+DATE_TRUNC_FUNCTIONS(date64)
+DATE_TRUNC_FUNCTIONS(timestamp)
+
+#define LAST_DAY_FUNC(TYPE) \
+ FORCE_INLINE \
+ gdv_date64 last_day_from_##TYPE(gdv_date64 millis) { \
+ EpochTimePoint received_day(millis); \
+ const auto& day_without_hours_and_sec = received_day.ClearTimeOfDay(); \
+ \
+ int received_day_in_month = day_without_hours_and_sec.TmMday(); \
+ const auto& first_day_in_month = \
+ day_without_hours_and_sec.AddDays(1 - received_day_in_month); \
+ \
+ const auto& month_last_day = first_day_in_month.AddMonths(1).AddDays(-1); \
+ \
+ return month_last_day.MillisSinceEpoch(); \
+ }
+
+DATE_TYPES(LAST_DAY_FUNC)
+
+FORCE_INLINE
+gdv_date64 castDATE_int64(gdv_int64 in) { return in; }
+
+FORCE_INLINE
+gdv_date32 castDATE_int32(gdv_int32 in) { return in; }
+
+FORCE_INLINE
+gdv_date64 castDATE_date32(gdv_date32 days) {
+ return days * static_cast<gdv_date64>(MILLIS_IN_DAY);
+}
+
+static int days_in_month[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
+
+bool IsLastDayOfMonth(const EpochTimePoint& tp) {
+ if (tp.TmMon() != 1) {
+ // not February. Don't worry about leap year
+ return (tp.TmMday() == days_in_month[tp.TmMon()]);
+ }
+
+ // this is February, check if the day is 28 or 29
+ if (tp.TmMday() < 28) {
+ return false;
+ }
+
+ if (tp.TmMday() == 29) {
+ // Feb 29th
+ return true;
+ }
+
+ // check if year is non-leap year
+ return !IsLeapYear(tp.TmYear());
+}
+
+FORCE_INLINE
+bool is_valid_time(const int hours, const int minutes, const int seconds) {
+ return hours >= 0 && hours < 24 && minutes >= 0 && minutes < 60 && seconds >= 0 &&
+ seconds < 60;
+}
+
+// MONTHS_BETWEEN returns number of months between dates date1 and date2.
+// If date1 is later than date2, then the result is positive.
+// If date1 is earlier than date2, then the result is negative.
+// If date1 and date2 are either the same days of the month or both last days of months,
+// then the result is always an integer. Otherwise Oracle Database calculates the
+// fractional portion of the result based on a 31-day month and considers the difference
+// in time components date1 and date2
+#define MONTHS_BETWEEN(TYPE) \
+ FORCE_INLINE \
+ double months_between##_##TYPE##_##TYPE(uint64_t endEpoch, uint64_t startEpoch) { \
+ EpochTimePoint endTime(endEpoch); \
+ EpochTimePoint startTime(startEpoch); \
+ int endYear = endTime.TmYear(); \
+ int endMonth = endTime.TmMon(); \
+ int startYear = startTime.TmYear(); \
+ int startMonth = startTime.TmMon(); \
+ int monthsDiff = (endYear - startYear) * 12 + (endMonth - startMonth); \
+ if ((endTime.TmMday() == startTime.TmMday()) || \
+ (IsLastDayOfMonth(endTime) && IsLastDayOfMonth(startTime))) { \
+ return static_cast<double>(monthsDiff); \
+ } \
+ double diffDays = static_cast<double>(endTime.TmMday() - startTime.TmMday()) / \
+ static_cast<double>(31); \
+ double diffHours = static_cast<double>(endTime.TmHour() - startTime.TmHour()) + \
+ static_cast<double>(endTime.TmMin() - startTime.TmMin()) / \
+ static_cast<double>(MINS_IN_HOUR) + \
+ static_cast<double>(endTime.TmSec() - startTime.TmSec()) / \
+ static_cast<double>(SECONDS_IN_HOUR); \
+ return static_cast<double>(monthsDiff) + diffDays + \
+ diffHours / static_cast<double>(HOURS_IN_DAY * 31); \
+ }
+
+DATE_TYPES(MONTHS_BETWEEN)
+
+FORCE_INLINE
+void set_error_for_date(gdv_int32 length, const char* input, const char* msg,
+ int64_t execution_context) {
+ int size = length + static_cast<int>(strlen(msg)) + 1;
+ char* error = reinterpret_cast<char*>(malloc(size));
+ snprintf(error, size, "%s%s", msg, input);
+ gdv_fn_context_set_error_msg(execution_context, error);
+ free(error);
+}
+
+gdv_date64 castDATE_utf8(int64_t context, const char* input, gdv_int32 length) {
+ using arrow_vendored::date::day;
+ using arrow_vendored::date::month;
+ using arrow_vendored::date::sys_days;
+ using arrow_vendored::date::year;
+ using arrow_vendored::date::year_month_day;
+ using gandiva::TimeFields;
+ // format : 0 is year, 1 is month and 2 is day.
+ int dateFields[3];
+ int dateIndex = 0, index = 0, value = 0;
+ int year_str_len = 0;
+ while (dateIndex < 3 && index < length) {
+ if (!isdigit(input[index])) {
+ dateFields[dateIndex++] = value;
+ value = 0;
+ } else {
+ value = (value * 10) + (input[index] - '0');
+ if (dateIndex == TimeFields::kYear) {
+ year_str_len++;
+ }
+ }
+ index++;
+ }
+
+ if (dateIndex < 3) {
+ // If we reached the end of input, we would have not encountered a separator
+ // store the last value
+ dateFields[dateIndex++] = value;
+ }
+ const char* msg = "Not a valid date value ";
+ if (dateIndex != 3) {
+ set_error_for_date(length, input, msg, context);
+ return 0;
+ }
+
+ /* Handle two digit years
+ * If range of two digits is between 70 - 99 then year = 1970 - 1999
+ * Else if two digits is between 00 - 69 = 2000 - 2069
+ */
+ if (dateFields[TimeFields::kYear] < 100 && year_str_len < 4) {
+ if (dateFields[TimeFields::kYear] < 70) {
+ dateFields[TimeFields::kYear] += 2000;
+ } else {
+ dateFields[TimeFields::kYear] += 1900;
+ }
+ }
+ year_month_day date = year(dateFields[TimeFields::kYear]) /
+ month(dateFields[TimeFields::kMonth]) /
+ day(dateFields[TimeFields::kDay]);
+ if (!date.ok()) {
+ set_error_for_date(length, input, msg, context);
+ return 0;
+ }
+ return std::chrono::time_point_cast<std::chrono::milliseconds>(sys_days(date))
+ .time_since_epoch()
+ .count();
+}
+
+/*
+ * Input consists of mandatory and optional fields.
+ * Mandatory fields are year, month and day.
+ * Optional fields are time, displacement and zone.
+ * Format is <year-month-day>[ hours:minutes:seconds][.millis][ displacement|zone]
+ */
+gdv_timestamp castTIMESTAMP_utf8(int64_t context, const char* input, gdv_int32 length) {
+ using arrow_vendored::date::day;
+ using arrow_vendored::date::month;
+ using arrow_vendored::date::sys_days;
+ using arrow_vendored::date::year;
+ using arrow_vendored::date::year_month_day;
+ using gandiva::TimeFields;
+ using std::chrono::hours;
+ using std::chrono::milliseconds;
+ using std::chrono::minutes;
+ using std::chrono::seconds;
+
+ int ts_fields[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
+ gdv_boolean add_displacement = true;
+ gdv_boolean encountered_zone = false;
+ int year_str_len = 0, sub_seconds_len = 0;
+ int ts_field_index = TimeFields::kYear, index = 0, value = 0;
+ while (ts_field_index < TimeFields::kMax && index < length) {
+ if (isdigit(input[index])) {
+ value = (value * 10) + (input[index] - '0');
+ if (ts_field_index == TimeFields::kYear) {
+ year_str_len++;
+ }
+ if (ts_field_index == TimeFields::kSubSeconds) {
+ sub_seconds_len++;
+ }
+ } else {
+ ts_fields[ts_field_index] = value;
+ value = 0;
+
+ switch (input[index]) {
+ case '.':
+ case ':':
+ case ' ':
+ ts_field_index++;
+ break;
+ case '+':
+ // +08:00, means time zone is 8 hours ahead. Need to subtract.
+ add_displacement = false;
+ ts_field_index = TimeFields::kDisplacementHours;
+ break;
+ case '-':
+ // Overloaded as date separator and negative displacement.
+ ts_field_index = (ts_field_index < 3) ? (ts_field_index + 1)
+ : TimeFields::kDisplacementHours;
+ break;
+ default:
+ encountered_zone = true;
+ break;
+ }
+ }
+ if (encountered_zone) {
+ break;
+ }
+ index++;
+ }
+
+ // Store the last value
+ if (ts_field_index < TimeFields::kMax) {
+ ts_fields[ts_field_index++] = value;
+ }
+
+ // adjust the year
+ if (ts_fields[TimeFields::kYear] < 100 && year_str_len < 4) {
+ if (ts_fields[TimeFields::kYear] < 70) {
+ ts_fields[TimeFields::kYear] += 2000;
+ } else {
+ ts_fields[TimeFields::kYear] += 1900;
+ }
+ }
+
+ // adjust the milliseconds
+ if (sub_seconds_len > 0) {
+ if (sub_seconds_len > 3) {
+ const char* msg = "Invalid millis for timestamp value ";
+ set_error_for_date(length, input, msg, context);
+ return 0;
+ }
+ while (sub_seconds_len < 3) {
+ ts_fields[TimeFields::kSubSeconds] *= 10;
+ sub_seconds_len++;
+ }
+ }
+ // handle timezone
+ if (encountered_zone) {
+ int err = 0;
+ gdv_timestamp ret_time = 0;
+ err = gdv_fn_time_with_zone(&ts_fields[0], (input + index), (length - index),
+ &ret_time);
+ if (err) {
+ const char* msg = "Invalid timestamp or unknown zone for timestamp value ";
+ set_error_for_date(length, input, msg, context);
+ return 0;
+ }
+ return ret_time;
+ }
+
+ year_month_day date = year(ts_fields[TimeFields::kYear]) /
+ month(ts_fields[TimeFields::kMonth]) /
+ day(ts_fields[TimeFields::kDay]);
+ if (!date.ok()) {
+ const char* msg = "Not a valid day for timestamp value ";
+ set_error_for_date(length, input, msg, context);
+ return 0;
+ }
+
+ if (!is_valid_time(ts_fields[TimeFields::kHours], ts_fields[TimeFields::kMinutes],
+ ts_fields[TimeFields::kSeconds])) {
+ const char* msg = "Not a valid time for timestamp value ";
+ set_error_for_date(length, input, msg, context);
+ return 0;
+ }
+
+ auto date_time = sys_days(date) + hours(ts_fields[TimeFields::kHours]) +
+ minutes(ts_fields[TimeFields::kMinutes]) +
+ seconds(ts_fields[TimeFields::kSeconds]) +
+ milliseconds(ts_fields[TimeFields::kSubSeconds]);
+ if (ts_fields[TimeFields::kDisplacementHours] ||
+ ts_fields[TimeFields::kDisplacementMinutes]) {
+ auto displacement_time = hours(ts_fields[TimeFields::kDisplacementHours]) +
+ minutes(ts_fields[TimeFields::kDisplacementMinutes]);
+ date_time = (add_displacement) ? (date_time + displacement_time)
+ : (date_time - displacement_time);
+ }
+ return std::chrono::time_point_cast<milliseconds>(date_time).time_since_epoch().count();
+}
+
+gdv_timestamp castTIMESTAMP_date64(gdv_date64 date_in_millis) { return date_in_millis; }
+
+gdv_timestamp castTIMESTAMP_int64(gdv_int64 in) { return in; }
+
+gdv_date64 castDATE_timestamp(gdv_timestamp timestamp_in_millis) {
+ EpochTimePoint tp(timestamp_in_millis);
+ return tp.ClearTimeOfDay().MillisSinceEpoch();
+}
+
+gdv_time32 castTIME_timestamp(gdv_timestamp timestamp_in_millis) {
+ // Retrieves a timestamp and returns the number of milliseconds since the midnight
+ EpochTimePoint tp(timestamp_in_millis);
+ auto tp_at_midnight = tp.ClearTimeOfDay();
+
+ int64_t millis_since_midnight =
+ tp.MillisSinceEpoch() - tp_at_midnight.MillisSinceEpoch();
+
+ return static_cast<int32_t>(millis_since_midnight);
+}
+
+const char* castVARCHAR_timestamp_int64(gdv_int64 context, gdv_timestamp in,
+ gdv_int64 length, gdv_int32* out_len) {
+ gdv_int64 year = extractYear_timestamp(in);
+ gdv_int64 month = extractMonth_timestamp(in);
+ gdv_int64 day = extractDay_timestamp(in);
+ gdv_int64 hour = extractHour_timestamp(in);
+ gdv_int64 minute = extractMinute_timestamp(in);
+ gdv_int64 second = extractSecond_timestamp(in);
+ gdv_int64 millis = in % MILLIS_IN_SEC;
+
+ static const int kTimeStampStringLen = 23;
+ const int char_buffer_length = kTimeStampStringLen + 1; // snprintf adds \0
+ char char_buffer[char_buffer_length];
+
+ // yyyy-MM-dd hh:mm:ss.sss
+ int res = snprintf(char_buffer, char_buffer_length,
+ "%04" PRId64 "-%02" PRId64 "-%02" PRId64 " %02" PRId64 ":%02" PRId64
+ ":%02" PRId64 ".%03" PRId64,
+ year, month, day, hour, minute, second, millis);
+ if (res < 0) {
+ gdv_fn_context_set_error_msg(context, "Could not format the timestamp");
+ return "";
+ }
+
+ *out_len = static_cast<gdv_int32>(length);
+ if (*out_len > kTimeStampStringLen) {
+ *out_len = kTimeStampStringLen;
+ }
+
+ if (*out_len <= 0) {
+ if (*out_len < 0) {
+ gdv_fn_context_set_error_msg(context, "Length of output string cannot be negative");
+ }
+ *out_len = 0;
+ return "";
+ }
+
+ char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
+ if (ret == nullptr) {
+ gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
+ *out_len = 0;
+ return "";
+ }
+
+ memcpy(ret, char_buffer, *out_len);
+ return ret;
+}
+
+FORCE_INLINE
+gdv_int64 extractDay_daytimeinterval(gdv_day_time_interval in) {
+ gdv_int32 days = static_cast<gdv_int32>(in & 0x00000000FFFFFFFF);
+ return static_cast<gdv_int64>(days);
+}
+
+FORCE_INLINE
+gdv_int64 extractMillis_daytimeinterval(gdv_day_time_interval in) {
+ gdv_int32 millis = static_cast<gdv_int32>((in & 0xFFFFFFFF00000000) >> 32);
+ return static_cast<gdv_int64>(millis);
+}
+
+FORCE_INLINE
+gdv_int64 castBIGINT_daytimeinterval(gdv_day_time_interval in) {
+ return extractMillis_daytimeinterval(in) +
+ extractDay_daytimeinterval(in) * MILLIS_IN_DAY;
+}
+
+// Convert the seconds since epoch argument to timestamp
+#define TO_TIMESTAMP(TYPE) \
+ FORCE_INLINE \
+ gdv_timestamp to_timestamp##_##TYPE(gdv_##TYPE seconds) { \
+ return static_cast<gdv_timestamp>(seconds * MILLIS_IN_SEC); \
+ }
+
+NUMERIC_TYPES(TO_TIMESTAMP)
+
+// Convert the seconds since epoch argument to time
+#define TO_TIME(TYPE) \
+ FORCE_INLINE \
+ gdv_time32 to_time##_##TYPE(gdv_##TYPE seconds) { \
+ EpochTimePoint tp(static_cast<int64_t>(seconds * MILLIS_IN_SEC)); \
+ return static_cast<gdv_time32>(tp.TimeOfDay().to_duration().count()); \
+ }
+
+NUMERIC_TYPES(TO_TIME)
+
+#define CAST_INT_YEAR_INTERVAL(TYPE, OUT_TYPE) \
+ FORCE_INLINE \
+ gdv_##OUT_TYPE TYPE##_year_interval(gdv_month_interval in) { \
+ return static_cast<gdv_##OUT_TYPE>(in / 12.0); \
+ }
+
+CAST_INT_YEAR_INTERVAL(castBIGINT, int64)
+CAST_INT_YEAR_INTERVAL(castINT, int32)
+
+#define CAST_NULLABLE_INTERVAL_DAY(TYPE) \
+ FORCE_INLINE \
+ gdv_day_time_interval castNULLABLEINTERVALDAY_##TYPE(gdv_##TYPE in) { \
+ return static_cast<gdv_day_time_interval>(in); \
+ }
+
+CAST_NULLABLE_INTERVAL_DAY(int32)
+CAST_NULLABLE_INTERVAL_DAY(int64)
+
+#define CAST_NULLABLE_INTERVAL_YEAR(TYPE) \
+ FORCE_INLINE \
+ gdv_month_interval castNULLABLEINTERVALYEAR_##TYPE(int64_t context, gdv_##TYPE in) { \
+ gdv_month_interval value = static_cast<gdv_month_interval>(in); \
+ if (value != in) { \
+ gdv_fn_context_set_error_msg(context, "Integer overflow"); \
+ } \
+ return value; \
+ }
+
+CAST_NULLABLE_INTERVAL_YEAR(int32)
+CAST_NULLABLE_INTERVAL_YEAR(int64)
+
+} // extern "C"
diff --git a/src/arrow/cpp/src/gandiva/precompiled/time_constants.h b/src/arrow/cpp/src/gandiva/precompiled/time_constants.h
new file mode 100644
index 000000000..015ef4bf9
--- /dev/null
+++ b/src/arrow/cpp/src/gandiva/precompiled/time_constants.h
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#define MILLIS_IN_SEC (1000)
+#define MILLIS_IN_MIN (60 * MILLIS_IN_SEC)
+#define MILLIS_IN_HOUR (60 * MILLIS_IN_MIN)
+#define MILLIS_IN_DAY (24 * MILLIS_IN_HOUR)
+#define MILLIS_IN_WEEK (7 * MILLIS_IN_DAY)
+
+#define MILLIS_TO_SEC(millis) ((millis) / MILLIS_IN_SEC)
+#define MILLIS_TO_MINS(millis) ((millis) / MILLIS_IN_MIN)
+#define MILLIS_TO_HOUR(millis) ((millis) / MILLIS_IN_HOUR)
+#define MILLIS_TO_DAY(millis) ((millis) / MILLIS_IN_DAY)
+#define MILLIS_TO_WEEK(millis) ((millis) / MILLIS_IN_WEEK)
diff --git a/src/arrow/cpp/src/gandiva/precompiled/time_fields.h b/src/arrow/cpp/src/gandiva/precompiled/time_fields.h
new file mode 100644
index 000000000..d5277e743
--- /dev/null
+++ b/src/arrow/cpp/src/gandiva/precompiled/time_fields.h
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+namespace gandiva {
+
+enum TimeFields {
+ kYear,
+ kMonth,
+ kDay,
+ kHours,
+ kMinutes,
+ kSeconds,
+ kSubSeconds,
+ kDisplacementHours,
+ kDisplacementMinutes,
+ kMax
+};
+
+} // namespace gandiva
diff --git a/src/arrow/cpp/src/gandiva/precompiled/time_test.cc b/src/arrow/cpp/src/gandiva/precompiled/time_test.cc
new file mode 100644
index 000000000..332ffa332
--- /dev/null
+++ b/src/arrow/cpp/src/gandiva/precompiled/time_test.cc
@@ -0,0 +1,953 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <time.h>
+
+#include "../execution_context.h"
+#include "gandiva/precompiled/testing.h"
+#include "gandiva/precompiled/types.h"
+
+namespace gandiva {
+
+TEST(TestTime, TestCastDate) {
+ ExecutionContext context;
+ int64_t context_ptr = reinterpret_cast<int64_t>(&context);
+
+ EXPECT_EQ(castDATE_utf8(context_ptr, "1967-12-1", 9), -65836800000);
+ EXPECT_EQ(castDATE_utf8(context_ptr, "2067-12-1", 9), 3089923200000);
+
+ EXPECT_EQ(castDATE_utf8(context_ptr, "7-12-1", 6), 1196467200000);
+ EXPECT_EQ(castDATE_utf8(context_ptr, "67-12-1", 7), 3089923200000);
+ EXPECT_EQ(castDATE_utf8(context_ptr, "067-12-1", 8), 3089923200000);
+ EXPECT_EQ(castDATE_utf8(context_ptr, "0067-12-1", 9), -60023980800000);
+ EXPECT_EQ(castDATE_utf8(context_ptr, "00067-12-1", 10), -60023980800000);
+ EXPECT_EQ(castDATE_utf8(context_ptr, "167-12-1", 8), -56868307200000);
+
+ EXPECT_EQ(castDATE_utf8(context_ptr, "1972-12-1", 9), 92016000000);
+ EXPECT_EQ(castDATE_utf8(context_ptr, "72-12-1", 7), 92016000000);
+
+ EXPECT_EQ(castDATE_utf8(context_ptr, "1972222222", 10), 0);
+ EXPECT_EQ(context.get_error(), "Not a valid date value 1972222222");
+ context.Reset();
+
+ EXPECT_EQ(castDATE_utf8(context_ptr, "blahblah", 8), 0);
+ EXPECT_EQ(castDATE_utf8(context_ptr, "1967-12-1bb", 11), -65836800000);
+
+ EXPECT_EQ(castDATE_utf8(context_ptr, "67-12-1", 7), 3089923200000);
+ EXPECT_EQ(castDATE_utf8(context_ptr, "67-1-1", 6), 3061065600000);
+ EXPECT_EQ(castDATE_utf8(context_ptr, "71-1-1", 6), 31536000000);
+ EXPECT_EQ(castDATE_utf8(context_ptr, "71-45-1", 7), 0);
+ EXPECT_EQ(castDATE_utf8(context_ptr, "71-12-XX", 8), 0);
+
+ EXPECT_EQ(castDATE_date32(1), 86400000);
+}
+
+TEST(TestTime, TestCastTimestamp) {
+ ExecutionContext context;
+ int64_t context_ptr = reinterpret_cast<int64_t>(&context);
+
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "1967-12-1", 9), -65836800000);
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2067-12-1", 9), 3089923200000);
+
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "7-12-1", 6), 1196467200000);
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "67-12-1", 7), 3089923200000);
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "067-12-1", 8), 3089923200000);
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "0067-12-1", 9), -60023980800000);
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "00067-12-1", 10), -60023980800000);
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "167-12-1", 8), -56868307200000);
+
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "1972-12-1", 9), 92016000000);
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "72-12-1", 7), 92016000000);
+
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "1972-12-1", 9), 92016000000);
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "67-12-1", 7), 3089923200000);
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "67-1-1", 6), 3061065600000);
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "71-1-1", 6), 31536000000);
+
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30", 18), 969702330000);
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30.920", 22), 969702330920);
+
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30.920 +08:00", 29),
+ 969673530920);
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30.920 -11:45", 29),
+ 969744630920);
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "65-03-04 00:20:40.920 +00:30", 28),
+ 3003349840920);
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "1932-05-18 11:30:00.920 +11:30", 30),
+ -1187308799080);
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "1857-02-11 20:31:40.920 -05:30", 30),
+ -3562264699080);
+ EXPECT_EQ(castTIMESTAMP_date64(
+ castDATE_utf8(context_ptr, "2000-09-23 9:45:30.920 +08:00", 29)),
+ castTIMESTAMP_utf8(context_ptr, "2000-09-23 0:00:00.000 +00:00", 29));
+
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30.1", 20),
+ castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30", 18) + 100);
+
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30.10", 20),
+ castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30", 18) + 100);
+
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30.100", 20),
+ castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30", 18) + 100);
+
+ // error cases
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 24:00:00", 19), 0);
+ EXPECT_EQ(context.get_error(),
+ "Not a valid time for timestamp value 2000-01-01 24:00:00");
+ context.Reset();
+
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:60:00", 19), 0);
+ EXPECT_EQ(context.get_error(),
+ "Not a valid time for timestamp value 2000-01-01 00:60:00");
+ context.Reset();
+
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:100", 20), 0);
+ EXPECT_EQ(context.get_error(),
+ "Not a valid time for timestamp value 2000-01-01 00:00:100");
+ context.Reset();
+
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.0001", 24), 0);
+ EXPECT_EQ(context.get_error(),
+ "Invalid millis for timestamp value 2000-01-01 00:00:00.0001");
+ context.Reset();
+
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.1000", 24), 0);
+ EXPECT_EQ(context.get_error(),
+ "Invalid millis for timestamp value 2000-01-01 00:00:00.1000");
+ context.Reset();
+}
+
+#ifndef _WIN32
+
+// TODO(wesm): ARROW-4495. Need to address TZ database issues on Windows
+
+TEST(TestTime, TestCastTimestampWithTZ) {
+ ExecutionContext context;
+ int64_t context_ptr = reinterpret_cast<int64_t>(&context);
+
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30.920 Canada/Pacific", 37),
+ 969727530920);
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2012-02-28 23:30:59 Asia/Kolkata", 32),
+ 1330452059000);
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "1923-10-07 03:03:03 America/New_York", 36),
+ -1459094217000);
+}
+
+TEST(TestTime, TestCastTimestampErrors) {
+ ExecutionContext context;
+ int64_t context_ptr = reinterpret_cast<int64_t>(&context);
+
+ // error cases
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "20000923", 8), 0);
+ EXPECT_EQ(context.get_error(), "Not a valid day for timestamp value 20000923");
+ context.Reset();
+
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-2b", 10), 0);
+ EXPECT_EQ(context.get_error(),
+ "Invalid timestamp or unknown zone for timestamp value 2000-09-2b");
+ context.Reset();
+
+ EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30.920 Unknown/Zone", 35),
+ 0);
+ EXPECT_EQ(context.get_error(),
+ "Invalid timestamp or unknown zone for timestamp value 2000-09-23 "
+ "9:45:30.920 Unknown/Zone");
+ context.Reset();
+}
+
+#endif
+
+TEST(TestTime, TestExtractTime) {
+ // 10:20:33
+ gdv_int32 time_as_millis_in_day = 37233000;
+
+ EXPECT_EQ(extractHour_time32(time_as_millis_in_day), 10);
+ EXPECT_EQ(extractMinute_time32(time_as_millis_in_day), 20);
+ EXPECT_EQ(extractSecond_time32(time_as_millis_in_day), 33);
+}
+
+TEST(TestTime, TestTimestampDiffMonth) {
+ gdv_timestamp ts1 = StringToTimestamp("2019-06-30 00:00:00");
+ gdv_timestamp ts2 = StringToTimestamp("2019-05-31 00:00:00");
+ EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), -1);
+
+ ts1 = StringToTimestamp("2019-06-30 00:00:00");
+ ts2 = StringToTimestamp("2019-02-28 00:00:00");
+ EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), -4);
+
+ ts1 = StringToTimestamp("2019-06-30 00:00:00");
+ ts2 = StringToTimestamp("2019-03-31 00:00:00");
+ EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), -3);
+
+ ts1 = StringToTimestamp("2019-06-30 00:00:00");
+ ts2 = StringToTimestamp("2019-06-30 00:00:00");
+ EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), 0);
+
+ ts1 = StringToTimestamp("2019-06-30 00:00:00");
+ ts2 = StringToTimestamp("2019-07-31 00:00:00");
+ EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), 1);
+
+ ts1 = StringToTimestamp("2019-06-30 00:00:00");
+ ts2 = StringToTimestamp("2019-07-30 00:00:00");
+ EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), 1);
+
+ ts1 = StringToTimestamp("2019-06-30 00:00:00");
+ ts2 = StringToTimestamp("2019-07-29 00:00:00");
+ EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), 0);
+}
+
+TEST(TestTime, TestExtractTimestamp) {
+ gdv_timestamp ts = StringToTimestamp("1970-05-02 10:20:33");
+
+ EXPECT_EQ(extractMillennium_timestamp(ts), 2);
+ EXPECT_EQ(extractCentury_timestamp(ts), 20);
+ EXPECT_EQ(extractDecade_timestamp(ts), 197);
+ EXPECT_EQ(extractYear_timestamp(ts), 1970);
+ EXPECT_EQ(extractDoy_timestamp(ts), 122);
+ EXPECT_EQ(extractMonth_timestamp(ts), 5);
+ EXPECT_EQ(extractDow_timestamp(ts), 7);
+ EXPECT_EQ(extractDay_timestamp(ts), 2);
+ EXPECT_EQ(extractHour_timestamp(ts), 10);
+ EXPECT_EQ(extractMinute_timestamp(ts), 20);
+ EXPECT_EQ(extractSecond_timestamp(ts), 33);
+}
+
+TEST(TestTime, TimeStampTrunc) {
+ EXPECT_EQ(date_trunc_Second_date64(StringToTimestamp("2015-05-05 10:20:34")),
+ StringToTimestamp("2015-05-05 10:20:34"));
+ EXPECT_EQ(date_trunc_Minute_date64(StringToTimestamp("2015-05-05 10:20:34")),
+ StringToTimestamp("2015-05-05 10:20:00"));
+ EXPECT_EQ(date_trunc_Hour_date64(StringToTimestamp("2015-05-05 10:20:34")),
+ StringToTimestamp("2015-05-05 10:00:00"));
+ EXPECT_EQ(date_trunc_Day_date64(StringToTimestamp("2015-05-05 10:20:34")),
+ StringToTimestamp("2015-05-05 00:00:00"));
+ EXPECT_EQ(date_trunc_Month_date64(StringToTimestamp("2015-05-05 10:20:34")),
+ StringToTimestamp("2015-05-01 00:00:00"));
+ EXPECT_EQ(date_trunc_Quarter_date64(StringToTimestamp("2015-05-05 10:20:34")),
+ StringToTimestamp("2015-04-01 00:00:00"));
+ EXPECT_EQ(date_trunc_Year_date64(StringToTimestamp("2015-05-05 10:20:34")),
+ StringToTimestamp("2015-01-01 00:00:00"));
+ EXPECT_EQ(date_trunc_Decade_date64(StringToTimestamp("2015-05-05 10:20:34")),
+ StringToTimestamp("2010-01-01 00:00:00"));
+ EXPECT_EQ(date_trunc_Century_date64(StringToTimestamp("2115-05-05 10:20:34")),
+ StringToTimestamp("2101-01-01 00:00:00"));
+ EXPECT_EQ(date_trunc_Millennium_date64(StringToTimestamp("2115-05-05 10:20:34")),
+ StringToTimestamp("2001-01-01 00:00:00"));
+
+ // truncate week going to previous year
+ EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2011-01-01 10:10:10")),
+ StringToTimestamp("2010-12-27 00:00:00"));
+ EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2011-01-02 10:10:10")),
+ StringToTimestamp("2010-12-27 00:00:00"));
+ EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2011-01-03 10:10:10")),
+ StringToTimestamp("2011-01-03 00:00:00"));
+ EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2011-01-04 10:10:10")),
+ StringToTimestamp("2011-01-03 00:00:00"));
+ EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2011-01-05 10:10:10")),
+ StringToTimestamp("2011-01-03 00:00:00"));
+ EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2011-01-06 10:10:10")),
+ StringToTimestamp("2011-01-03 00:00:00"));
+ EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2011-01-07 10:10:10")),
+ StringToTimestamp("2011-01-03 00:00:00"));
+ EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2011-01-08 10:10:10")),
+ StringToTimestamp("2011-01-03 00:00:00"));
+ EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2011-01-09 10:10:10")),
+ StringToTimestamp("2011-01-03 00:00:00"));
+
+ // truncate week for Feb in a leap year
+ EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2000-02-28 10:10:10")),
+ StringToTimestamp("2000-02-28 00:00:00"));
+ EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2000-02-29 10:10:10")),
+ StringToTimestamp("2000-02-28 00:00:00"));
+ EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2000-03-01 10:10:10")),
+ StringToTimestamp("2000-02-28 00:00:00"));
+ EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2000-03-02 10:10:10")),
+ StringToTimestamp("2000-02-28 00:00:00"));
+ EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2000-03-03 10:10:10")),
+ StringToTimestamp("2000-02-28 00:00:00"));
+ EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2000-03-04 10:10:10")),
+ StringToTimestamp("2000-02-28 00:00:00"));
+ EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2000-03-05 10:10:10")),
+ StringToTimestamp("2000-02-28 00:00:00"));
+ EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2000-03-06 10:10:10")),
+ StringToTimestamp("2000-03-06 00:00:00"));
+}
+
+TEST(TestTime, TimeStampAdd) {
+ EXPECT_EQ(
+ timestampaddSecond_int32_timestamp(30, StringToTimestamp("2000-05-01 10:20:34")),
+ StringToTimestamp("2000-05-01 10:21:04"));
+
+ EXPECT_EQ(
+ timestampaddSecond_timestamp_int32(StringToTimestamp("2000-05-01 10:20:34"), 30),
+ StringToTimestamp("2000-05-01 10:21:04"));
+
+ EXPECT_EQ(
+ timestampaddMinute_int64_timestamp(-30, StringToTimestamp("2000-05-01 10:20:34")),
+ StringToTimestamp("2000-05-01 09:50:34"));
+
+ EXPECT_EQ(
+ timestampaddMinute_timestamp_int64(StringToTimestamp("2000-05-01 10:20:34"), -30),
+ StringToTimestamp("2000-05-01 09:50:34"));
+
+ EXPECT_EQ(
+ timestampaddHour_int32_timestamp(20, StringToTimestamp("2000-05-01 10:20:34")),
+ StringToTimestamp("2000-05-02 06:20:34"));
+
+ EXPECT_EQ(
+ timestampaddHour_timestamp_int32(StringToTimestamp("2000-05-01 10:20:34"), 20),
+ StringToTimestamp("2000-05-02 06:20:34"));
+
+ EXPECT_EQ(
+ timestampaddDay_int64_timestamp(-35, StringToTimestamp("2000-05-01 10:20:34")),
+ StringToTimestamp("2000-03-27 10:20:34"));
+
+ EXPECT_EQ(
+ timestampaddDay_timestamp_int64(StringToTimestamp("2000-05-01 10:20:34"), -35),
+ StringToTimestamp("2000-03-27 10:20:34"));
+
+ EXPECT_EQ(timestampaddWeek_int32_timestamp(4, StringToTimestamp("2000-05-01 10:20:34")),
+ StringToTimestamp("2000-05-29 10:20:34"));
+
+ EXPECT_EQ(timestampaddWeek_timestamp_int32(StringToTimestamp("2000-05-01 10:20:34"), 4),
+ StringToTimestamp("2000-05-29 10:20:34"));
+
+ EXPECT_EQ(timestampaddWeek_timestamp_int32(StringToTimestamp("2000-05-01 10:20:34"), 4),
+ StringToTimestamp("2000-05-29 10:20:34"));
+
+ EXPECT_EQ(
+ timestampaddMonth_int64_timestamp(10, StringToTimestamp("2000-05-01 10:20:34")),
+ StringToTimestamp("2001-03-01 10:20:34"));
+
+ EXPECT_EQ(
+ timestampaddMonth_int64_timestamp(1, StringToTimestamp("2000-01-31 10:20:34")),
+ StringToTimestamp("2000-2-29 10:20:34"));
+ EXPECT_EQ(
+ timestampaddMonth_int64_timestamp(13, StringToTimestamp("2001-01-31 10:20:34")),
+ StringToTimestamp("2002-02-28 10:20:34"));
+
+ EXPECT_EQ(
+ timestampaddMonth_int64_timestamp(11, StringToTimestamp("2000-05-31 10:20:34")),
+ StringToTimestamp("2001-04-30 10:20:34"));
+
+ EXPECT_EQ(
+ timestampaddMonth_timestamp_int64(StringToTimestamp("2000-05-31 10:20:34"), 11),
+ StringToTimestamp("2001-04-30 10:20:34"));
+
+ EXPECT_EQ(
+ timestampaddQuarter_int32_timestamp(-2, StringToTimestamp("2000-05-01 10:20:34")),
+ StringToTimestamp("1999-11-01 10:20:34"));
+
+ EXPECT_EQ(timestampaddYear_int64_timestamp(2, StringToTimestamp("2000-05-01 10:20:34")),
+ StringToTimestamp("2002-05-01 10:20:34"));
+
+ EXPECT_EQ(
+ timestampaddQuarter_int32_timestamp(-5, StringToTimestamp("2000-05-01 10:20:34")),
+ StringToTimestamp("1999-02-01 10:20:34"));
+ EXPECT_EQ(
+ timestampaddQuarter_int32_timestamp(-6, StringToTimestamp("2000-05-01 10:20:34")),
+ StringToTimestamp("1998-11-01 10:20:34"));
+
+ // date_add
+ EXPECT_EQ(date_add_int32_timestamp(7, StringToTimestamp("2000-05-01 00:00:00")),
+ StringToTimestamp("2000-05-08 00:00:00"));
+
+ EXPECT_EQ(add_int32_timestamp(4, StringToTimestamp("2000-05-01 00:00:00")),
+ StringToTimestamp("2000-05-05 00:00:00"));
+
+ EXPECT_EQ(add_int64_timestamp(7, StringToTimestamp("2000-05-01 00:00:00")),
+ StringToTimestamp("2000-05-08 00:00:00"));
+
+ EXPECT_EQ(date_add_int64_timestamp(4, StringToTimestamp("2000-05-01 00:00:00")),
+ StringToTimestamp("2000-05-05 00:00:00"));
+
+ EXPECT_EQ(date_add_int64_timestamp(4, StringToTimestamp("2000-02-27 00:00:00")),
+ StringToTimestamp("2000-03-02 00:00:00"));
+
+ EXPECT_EQ(add_date64_int64(StringToTimestamp("2000-02-27 00:00:00"), 4),
+ StringToTimestamp("2000-03-02 00:00:00"));
+
+ // date_sub
+ EXPECT_EQ(date_sub_timestamp_int32(StringToTimestamp("2000-05-01 00:00:00"), 7),
+ StringToTimestamp("2000-04-24 00:00:00"));
+
+ EXPECT_EQ(subtract_timestamp_int32(StringToTimestamp("2000-05-01 00:00:00"), -7),
+ StringToTimestamp("2000-05-08 00:00:00"));
+
+ EXPECT_EQ(date_diff_timestamp_int64(StringToTimestamp("2000-05-01 00:00:00"), 365),
+ StringToTimestamp("1999-05-02 00:00:00"));
+
+ EXPECT_EQ(date_diff_timestamp_int64(StringToTimestamp("2000-03-01 00:00:00"), 1),
+ StringToTimestamp("2000-02-29 00:00:00"));
+
+ EXPECT_EQ(date_diff_timestamp_int64(StringToTimestamp("2000-02-29 00:00:00"), 365),
+ StringToTimestamp("1999-03-01 00:00:00"));
+}
+
+// test cases from http://www.staff.science.uu.nl/~gent0113/calendar/isocalendar.htm
+TEST(TestTime, TestExtractWeek) {
+ std::vector<std::string> data;
+
+ // A type
+ // Jan 1, 2 and 3
+ data.push_back("2006-01-01 10:10:10");
+ data.push_back("52");
+ data.push_back("2006-01-02 10:10:10");
+ data.push_back("1");
+ data.push_back("2006-01-03 10:10:10");
+ data.push_back("1");
+ // middle, Monday and Sunday
+ data.push_back("2006-04-24 10:10:10");
+ data.push_back("17");
+ data.push_back("2006-04-30 10:10:10");
+ data.push_back("17");
+ // Dec 29-31
+ data.push_back("2006-12-29 10:10:10");
+ data.push_back("52");
+ data.push_back("2006-12-30 10:10:10");
+ data.push_back("52");
+ data.push_back("2006-12-31 10:10:10");
+ data.push_back("52");
+ // B(C) type
+ // Jan 1, 2 and 3
+ data.push_back("2011-01-01 10:10:10");
+ data.push_back("52");
+ data.push_back("2011-01-02 10:10:10");
+ data.push_back("52");
+ data.push_back("2011-01-03 10:10:10");
+ data.push_back("1");
+ // middle, Monday and Sunday
+ data.push_back("2011-07-18 10:10:10");
+ data.push_back("29");
+ data.push_back("2011-07-24 10:10:10");
+ data.push_back("29");
+ // Dec 29-31
+ data.push_back("2011-12-29 10:10:10");
+ data.push_back("52");
+ data.push_back("2011-12-30 10:10:10");
+ data.push_back("52");
+ data.push_back("2011-12-31 10:10:10");
+ data.push_back("52");
+ // B(DC) type
+ // Jan 1, 2 and 3
+ data.push_back("2005-01-01 10:10:10");
+ data.push_back("53");
+ data.push_back("2005-01-02 10:10:10");
+ data.push_back("53");
+ data.push_back("2005-01-03 10:10:10");
+ data.push_back("1");
+ // middle, Monday and Sunday
+ data.push_back("2005-11-07 10:10:10");
+ data.push_back("45");
+ data.push_back("2005-11-13 10:10:10");
+ data.push_back("45");
+ // Dec 29-31
+ data.push_back("2005-12-29 10:10:10");
+ data.push_back("52");
+ data.push_back("2005-12-30 10:10:10");
+ data.push_back("52");
+ data.push_back("2005-12-31 10:10:10");
+ data.push_back("52");
+ // C type
+ // Jan 1, 2 and 3
+ data.push_back("2010-01-01 10:10:10");
+ data.push_back("53");
+ data.push_back("2010-01-02 10:10:10");
+ data.push_back("53");
+ data.push_back("2010-01-03 10:10:10");
+ data.push_back("53");
+ // middle, Monday and Sunday
+ data.push_back("2010-09-13 10:10:10");
+ data.push_back("37");
+ data.push_back("2010-09-19 10:10:10");
+ data.push_back("37");
+ // Dec 29-31
+ data.push_back("2010-12-29 10:10:10");
+ data.push_back("52");
+ data.push_back("2010-12-30 10:10:10");
+ data.push_back("52");
+ data.push_back("2010-12-31 10:10:10");
+ data.push_back("52");
+ // D type
+ // Jan 1, 2 and 3
+ data.push_back("2037-01-01 10:10:10");
+ data.push_back("1");
+ data.push_back("2037-01-02 10:10:10");
+ data.push_back("1");
+ data.push_back("2037-01-03 10:10:10");
+ data.push_back("1");
+ // middle, Monday and Sunday
+ data.push_back("2037-08-17 10:10:10");
+ data.push_back("34");
+ data.push_back("2037-08-23 10:10:10");
+ data.push_back("34");
+ // Dec 29-31
+ data.push_back("2037-12-29 10:10:10");
+ data.push_back("53");
+ data.push_back("2037-12-30 10:10:10");
+ data.push_back("53");
+ data.push_back("2037-12-31 10:10:10");
+ data.push_back("53");
+ // E type
+ // Jan 1, 2 and 3
+ data.push_back("2014-01-01 10:10:10");
+ data.push_back("1");
+ data.push_back("2014-01-02 10:10:10");
+ data.push_back("1");
+ data.push_back("2014-01-03 10:10:10");
+ data.push_back("1");
+ // middle, Monday and Sunday
+ data.push_back("2014-01-13 10:10:10");
+ data.push_back("3");
+ data.push_back("2014-01-19 10:10:10");
+ data.push_back("3");
+ // Dec 29-31
+ data.push_back("2014-12-29 10:10:10");
+ data.push_back("1");
+ data.push_back("2014-12-30 10:10:10");
+ data.push_back("1");
+ data.push_back("2014-12-31 10:10:10");
+ data.push_back("1");
+ // F type
+ // Jan 1, 2 and 3
+ data.push_back("2019-01-01 10:10:10");
+ data.push_back("1");
+ data.push_back("2019-01-02 10:10:10");
+ data.push_back("1");
+ data.push_back("2019-01-03 10:10:10");
+ data.push_back("1");
+ // middle, Monday and Sunday
+ data.push_back("2019-02-11 10:10:10");
+ data.push_back("7");
+ data.push_back("2019-02-17 10:10:10");
+ data.push_back("7");
+ // Dec 29-31
+ data.push_back("2019-12-29 10:10:10");
+ data.push_back("52");
+ data.push_back("2019-12-30 10:10:10");
+ data.push_back("1");
+ data.push_back("2019-12-31 10:10:10");
+ data.push_back("1");
+ // G type
+ // Jan 1, 2 and 3
+ data.push_back("2001-01-01 10:10:10");
+ data.push_back("1");
+ data.push_back("2001-01-02 10:10:10");
+ data.push_back("1");
+ data.push_back("2001-01-03 10:10:10");
+ data.push_back("1");
+ // middle, Monday and Sunday
+ data.push_back("2001-03-19 10:10:10");
+ data.push_back("12");
+ data.push_back("2001-03-25 10:10:10");
+ data.push_back("12");
+ // Dec 29-31
+ data.push_back("2001-12-29 10:10:10");
+ data.push_back("52");
+ data.push_back("2001-12-30 10:10:10");
+ data.push_back("52");
+ data.push_back("2001-12-31 10:10:10");
+ data.push_back("1");
+ // AG type
+ // Jan 1, 2 and 3
+ data.push_back("2012-01-01 10:10:10");
+ data.push_back("52");
+ data.push_back("2012-01-02 10:10:10");
+ data.push_back("1");
+ data.push_back("2012-01-03 10:10:10");
+ data.push_back("1");
+ // middle, Monday and Sunday
+ data.push_back("2012-04-02 10:10:10");
+ data.push_back("14");
+ data.push_back("2012-04-08 10:10:10");
+ data.push_back("14");
+ // Dec 29-31
+ data.push_back("2012-12-29 10:10:10");
+ data.push_back("52");
+ data.push_back("2012-12-30 10:10:10");
+ data.push_back("52");
+ data.push_back("2012-12-31 10:10:10");
+ data.push_back("1");
+ // BA type
+ // Jan 1, 2 and 3
+ data.push_back("2000-01-01 10:10:10");
+ data.push_back("52");
+ data.push_back("2000-01-02 10:10:10");
+ data.push_back("52");
+ data.push_back("2000-01-03 10:10:10");
+ data.push_back("1");
+ // middle, Monday and Sunday
+ data.push_back("2000-05-22 10:10:10");
+ data.push_back("21");
+ data.push_back("2000-05-28 10:10:10");
+ data.push_back("21");
+ // Dec 29-31
+ data.push_back("2000-12-29 10:10:10");
+ data.push_back("52");
+ data.push_back("2000-12-30 10:10:10");
+ data.push_back("52");
+ data.push_back("2000-12-31 10:10:10");
+ data.push_back("52");
+ // CB type
+ // Jan 1, 2 and 3
+ data.push_back("2016-01-01 10:10:10");
+ data.push_back("53");
+ data.push_back("2016-01-02 10:10:10");
+ data.push_back("53");
+ data.push_back("2016-01-03 10:10:10");
+ data.push_back("53");
+ // middle, Monday and Sunday
+ data.push_back("2016-06-20 10:10:10");
+ data.push_back("25");
+ data.push_back("2016-06-26 10:10:10");
+ data.push_back("25");
+ // Dec 29-31
+ data.push_back("2016-12-29 10:10:10");
+ data.push_back("52");
+ data.push_back("2016-12-30 10:10:10");
+ data.push_back("52");
+ data.push_back("2016-12-31 10:10:10");
+ data.push_back("52");
+ // DC type
+ // Jan 1, 2 and 3
+ data.push_back("2004-01-01 10:10:10");
+ data.push_back("1");
+ data.push_back("2004-01-02 10:10:10");
+ data.push_back("1");
+ data.push_back("2004-01-03 10:10:10");
+ data.push_back("1");
+ // middle, Monday and Sunday
+ data.push_back("2004-07-19 10:10:10");
+ data.push_back("30");
+ data.push_back("2004-07-25 10:10:10");
+ data.push_back("30");
+ // Dec 29-31
+ data.push_back("2004-12-29 10:10:10");
+ data.push_back("53");
+ data.push_back("2004-12-30 10:10:10");
+ data.push_back("53");
+ data.push_back("2004-12-31 10:10:10");
+ data.push_back("53");
+ // ED type
+ // Jan 1, 2 and 3
+ data.push_back("2020-01-01 10:10:10");
+ data.push_back("1");
+ data.push_back("2020-01-02 10:10:10");
+ data.push_back("1");
+ data.push_back("2020-01-03 10:10:10");
+ data.push_back("1");
+ // middle, Monday and Sunday
+ data.push_back("2020-08-17 10:10:10");
+ data.push_back("34");
+ data.push_back("2020-08-23 10:10:10");
+ data.push_back("34");
+ // Dec 29-31
+ data.push_back("2020-12-29 10:10:10");
+ data.push_back("53");
+ data.push_back("2020-12-30 10:10:10");
+ data.push_back("53");
+ data.push_back("2020-12-31 10:10:10");
+ data.push_back("53");
+ // FE type
+ // Jan 1, 2 and 3
+ data.push_back("2008-01-01 10:10:10");
+ data.push_back("1");
+ data.push_back("2008-01-02 10:10:10");
+ data.push_back("1");
+ data.push_back("2008-01-03 10:10:10");
+ data.push_back("1");
+ // middle, Monday and Sunday
+ data.push_back("2008-09-15 10:10:10");
+ data.push_back("38");
+ data.push_back("2008-09-21 10:10:10");
+ data.push_back("38");
+ // Dec 29-31
+ data.push_back("2008-12-29 10:10:10");
+ data.push_back("1");
+ data.push_back("2008-12-30 10:10:10");
+ data.push_back("1");
+ data.push_back("2008-12-31 10:10:10");
+ data.push_back("1");
+ // GF type
+ // Jan 1, 2 and 3
+ data.push_back("2024-01-01 10:10:10");
+ data.push_back("1");
+ data.push_back("2024-01-02 10:10:10");
+ data.push_back("1");
+ data.push_back("2024-01-03 10:10:10");
+ data.push_back("1");
+ // middle, Monday and Sunday
+ data.push_back("2024-10-07 10:10:10");
+ data.push_back("41");
+ data.push_back("2024-10-13 10:10:10");
+ data.push_back("41");
+ // Dec 29-31
+ data.push_back("2024-12-29 10:10:10");
+ data.push_back("52");
+ data.push_back("2024-12-30 10:10:10");
+ data.push_back("1");
+ data.push_back("2024-12-31 10:10:10");
+ data.push_back("1");
+
+ for (uint32_t i = 0; i < data.size(); i += 2) {
+ gdv_timestamp ts = StringToTimestamp(data.at(i).c_str());
+ gdv_int64 exp = atol(data.at(i + 1).c_str());
+ EXPECT_EQ(extractWeek_timestamp(ts), exp);
+ }
+}
+
+TEST(TestTime, TestMonthsBetween) {
+ std::vector<std::string> testStrings = {
+ "1995-03-02 00:00:00", "1995-02-02 00:00:00", "1.0",
+ "1995-02-02 00:00:00", "1995-03-02 00:00:00", "-1.0",
+ "1995-03-31 00:00:00", "1995-02-28 00:00:00", "1.0",
+ "1996-03-31 00:00:00", "1996-02-28 00:00:00", "1.09677418",
+ "1996-03-31 00:00:00", "1996-02-29 00:00:00", "1.0",
+ "1996-05-31 00:00:00", "1996-04-30 00:00:00", "1.0",
+ "1996-05-31 00:00:00", "1996-03-31 00:00:00", "2.0",
+ "1996-05-31 00:00:00", "1996-03-30 00:00:00", "2.03225806",
+ "1996-03-15 00:00:00", "1996-02-14 00:00:00", "1.03225806",
+ "1995-02-02 00:00:00", "1995-01-01 00:00:00", "1.03225806",
+ "1995-02-02 10:00:00", "1995-01-01 11:00:00", "1.03091397"};
+
+ for (uint32_t i = 0; i < testStrings.size();) {
+ gdv_timestamp endTs = StringToTimestamp(testStrings[i++].c_str());
+ gdv_timestamp startTs = StringToTimestamp(testStrings[i++].c_str());
+
+ double expectedResult = atof(testStrings[i++].c_str());
+ double actualResult = months_between_timestamp_timestamp(endTs, startTs);
+
+ double diff = actualResult - expectedResult;
+ if (diff < 0) {
+ diff = expectedResult - actualResult;
+ }
+
+ EXPECT_TRUE(diff < 0.001);
+ }
+}
+
+TEST(TestTime, castVarcharTimestamp) {
+ ExecutionContext context;
+ int64_t context_ptr = reinterpret_cast<int64_t>(&context);
+ gdv_int32 out_len;
+ gdv_timestamp ts = StringToTimestamp("2000-05-01 10:20:34");
+ const char* out = castVARCHAR_timestamp_int64(context_ptr, ts, 30L, &out_len);
+ EXPECT_EQ(std::string(out, out_len), "2000-05-01 10:20:34.000");
+
+ out = castVARCHAR_timestamp_int64(context_ptr, ts, 19L, &out_len);
+ EXPECT_EQ(std::string(out, out_len), "2000-05-01 10:20:34");
+
+ out = castVARCHAR_timestamp_int64(context_ptr, ts, 0L, &out_len);
+ EXPECT_EQ(std::string(out, out_len), "");
+
+ ts = StringToTimestamp("2-5-1 00:00:04");
+ out = castVARCHAR_timestamp_int64(context_ptr, ts, 24L, &out_len);
+ EXPECT_EQ(std::string(out, out_len), "0002-05-01 00:00:04.000");
+}
+
+TEST(TestTime, TestCastTimestampToDate) {
+ gdv_timestamp ts = StringToTimestamp("2000-05-01 10:20:34");
+ auto out = castDATE_timestamp(ts);
+ EXPECT_EQ(StringToTimestamp("2000-05-01 00:00:00"), out);
+}
+
+TEST(TestTime, TestCastTimestampToTime) {
+ gdv_timestamp ts = StringToTimestamp("2000-05-01 10:20:34");
+ auto expected_response =
+ static_cast<int32_t>(ts - StringToTimestamp("2000-05-01 00:00:00"));
+ auto out = castTIME_timestamp(ts);
+ EXPECT_EQ(expected_response, out);
+
+ // Test when the defined value is midnight, so the returned value must 0
+ ts = StringToTimestamp("1998-12-01 00:00:00");
+ expected_response = 0;
+ out = castTIME_timestamp(ts);
+ EXPECT_EQ(expected_response, out);
+
+ ts = StringToTimestamp("2015-09-16 23:59:59");
+ expected_response = static_cast<int32_t>(ts - StringToTimestamp("2015-09-16 00:00:00"));
+ out = castTIME_timestamp(ts);
+ EXPECT_EQ(expected_response, out);
+}
+
+TEST(TestTime, TestLastDay) {
+ // leap year test
+ gdv_timestamp ts = StringToTimestamp("2016-02-11 03:20:34");
+ auto out = last_day_from_timestamp(ts);
+ EXPECT_EQ(StringToTimestamp("2016-02-29 00:00:00"), out);
+
+ ts = StringToTimestamp("2016-02-29 23:59:59");
+ out = last_day_from_timestamp(ts);
+ EXPECT_EQ(StringToTimestamp("2016-02-29 00:00:00"), out);
+
+ ts = StringToTimestamp("2016-01-30 23:59:00");
+ out = last_day_from_timestamp(ts);
+ EXPECT_EQ(StringToTimestamp("2016-01-31 00:00:00"), out);
+
+ // normal year
+ ts = StringToTimestamp("2017-02-03 23:59:59");
+ out = last_day_from_timestamp(ts);
+ EXPECT_EQ(StringToTimestamp("2017-02-28 00:00:00"), out);
+
+ // december
+ ts = StringToTimestamp("2015-12-03 03:12:59");
+ out = last_day_from_timestamp(ts);
+ EXPECT_EQ(StringToTimestamp("2015-12-31 00:00:00"), out);
+}
+
+TEST(TestTime, TestToTimestamp) {
+ auto ts = StringToTimestamp("1970-01-01 00:00:00");
+ EXPECT_EQ(ts, to_timestamp_int32(0));
+ EXPECT_EQ(ts, to_timestamp_int64(0));
+ EXPECT_EQ(ts, to_timestamp_float32(0));
+ EXPECT_EQ(ts, to_timestamp_float64(0));
+
+ ts = StringToTimestamp("1970-01-01 00:00:01");
+ EXPECT_EQ(ts, to_timestamp_int32(1));
+ EXPECT_EQ(ts, to_timestamp_int64(1));
+ EXPECT_EQ(ts, to_timestamp_float32(1));
+ EXPECT_EQ(ts, to_timestamp_float64(1));
+
+ ts = StringToTimestamp("1970-01-01 00:01:00");
+ EXPECT_EQ(ts, to_timestamp_int32(60));
+ EXPECT_EQ(ts, to_timestamp_int64(60));
+ EXPECT_EQ(ts, to_timestamp_float32(60));
+ EXPECT_EQ(ts, to_timestamp_float64(60));
+
+ ts = StringToTimestamp("1970-01-01 01:00:00");
+ EXPECT_EQ(ts, to_timestamp_int32(3600));
+ EXPECT_EQ(ts, to_timestamp_int64(3600));
+ EXPECT_EQ(ts, to_timestamp_float32(3600));
+ EXPECT_EQ(ts, to_timestamp_float64(3600));
+
+ ts = StringToTimestamp("1970-01-02 00:00:00");
+ EXPECT_EQ(ts, to_timestamp_int32(86400));
+ EXPECT_EQ(ts, to_timestamp_int64(86400));
+ EXPECT_EQ(ts, to_timestamp_float32(86400));
+ EXPECT_EQ(ts, to_timestamp_float64(86400));
+
+ // tests with fractional part
+ ts = StringToTimestamp("1970-01-01 00:00:01") + 500;
+ EXPECT_EQ(ts, to_timestamp_float32(1.500f));
+ EXPECT_EQ(ts, to_timestamp_float64(1.500));
+
+ ts = StringToTimestamp("1970-01-01 00:01:01") + 600;
+ EXPECT_EQ(ts, to_timestamp_float32(61.600f));
+ EXPECT_EQ(ts, to_timestamp_float64(61.600));
+
+ ts = StringToTimestamp("1970-01-01 01:00:01") + 400;
+ EXPECT_EQ(ts, to_timestamp_float32(3601.400f));
+ EXPECT_EQ(ts, to_timestamp_float64(3601.400));
+}
+
+TEST(TestTime, TestToTimeNumeric) {
+ // input timestamp in seconds: 1970-01-01 00:00:00
+ int64_t expected_output = 0; // 0 milliseconds
+ EXPECT_EQ(expected_output, to_time_int32(0));
+ EXPECT_EQ(expected_output, to_time_int64(0));
+ EXPECT_EQ(expected_output, to_time_float32(0.000f));
+ EXPECT_EQ(expected_output, to_time_float64(0.000));
+
+ // input timestamp in seconds: 1970-01-01 00:00:01
+ expected_output = 1000; // 1 seconds
+ EXPECT_EQ(expected_output, to_time_int32(1));
+ EXPECT_EQ(expected_output, to_time_int64(1));
+ EXPECT_EQ(expected_output, to_time_float32(1.000f));
+ EXPECT_EQ(expected_output, to_time_float64(1.000));
+
+ // input timestamp in seconds: 1970-01-01 01:00:00
+ expected_output = 3600000; // 3600 seconds
+ EXPECT_EQ(expected_output, to_time_int32(3600));
+ EXPECT_EQ(expected_output, to_time_int64(3600));
+ EXPECT_EQ(expected_output, to_time_float32(3600.000f));
+ EXPECT_EQ(expected_output, to_time_float64(3600.000));
+
+ // input timestamp in seconds: 1970-01-01 23:59:59
+ expected_output = 86399000; // 86399 seconds
+ EXPECT_EQ(expected_output, to_time_int32(86399));
+ EXPECT_EQ(expected_output, to_time_int64(86399));
+ EXPECT_EQ(expected_output, to_time_float32(86399.000f));
+ EXPECT_EQ(expected_output, to_time_float64(86399.000));
+
+ // input timestamp in seconds: 2020-01-01 00:00:01
+ expected_output = 1000; // 1 second
+ EXPECT_EQ(expected_output, to_time_int64(1577836801));
+ EXPECT_EQ(expected_output, to_time_float64(1577836801.000));
+
+ // tests with fractional part
+ // input timestamp in seconds: 1970-01-01 00:00:01.500
+ expected_output = 1500; // 1.5 seconds
+ EXPECT_EQ(expected_output, to_time_float32(1.500f));
+ EXPECT_EQ(expected_output, to_time_float64(1.500));
+
+ // input timestamp in seconds: 1970-01-01 00:01:01.500
+ expected_output = 61500; // 61.5 seconds
+ EXPECT_EQ(expected_output, to_time_float32(61.500f));
+ EXPECT_EQ(expected_output, to_time_float64(61.500));
+
+ // input timestamp in seconds: 1970-01-01 01:00:01.500
+ expected_output = 3601500; // 3601.5 seconds
+ EXPECT_EQ(expected_output, to_time_float32(3601.500f));
+ EXPECT_EQ(expected_output, to_time_float64(3601.500));
+}
+
+TEST(TestTime, TestCastIntDayInterval) {
+ EXPECT_EQ(castBIGINT_daytimeinterval(10), 864000000);
+ EXPECT_EQ(castBIGINT_daytimeinterval(-100), -8640000001);
+ EXPECT_EQ(castBIGINT_daytimeinterval(-0), 0);
+}
+
+TEST(TestTime, TestCastIntYearInterval) {
+ EXPECT_EQ(castINT_year_interval(24), 2);
+ EXPECT_EQ(castINT_year_interval(-24), -2);
+ EXPECT_EQ(castINT_year_interval(-23), -1);
+
+ EXPECT_EQ(castBIGINT_year_interval(24), 2);
+ EXPECT_EQ(castBIGINT_year_interval(-24), -2);
+ EXPECT_EQ(castBIGINT_year_interval(-23), -1);
+}
+
+TEST(TestTime, TestCastNullableInterval) {
+ ExecutionContext context;
+ auto context_ptr = reinterpret_cast<int64_t>(&context);
+ // Test castNULLABLEINTERVALDAY for int and bigint
+ EXPECT_EQ(castNULLABLEINTERVALDAY_int32(1), 1);
+ EXPECT_EQ(castNULLABLEINTERVALDAY_int32(12), 12);
+ EXPECT_EQ(castNULLABLEINTERVALDAY_int32(-55), -55);
+ EXPECT_EQ(castNULLABLEINTERVALDAY_int32(-1201), -1201);
+ EXPECT_EQ(castNULLABLEINTERVALDAY_int64(1), 1);
+ EXPECT_EQ(castNULLABLEINTERVALDAY_int64(12), 12);
+ EXPECT_EQ(castNULLABLEINTERVALDAY_int64(-55), -55);
+ EXPECT_EQ(castNULLABLEINTERVALDAY_int64(-1201), -1201);
+
+ // Test castNULLABLEINTERVALYEAR for int and bigint
+ EXPECT_EQ(castNULLABLEINTERVALYEAR_int32(context_ptr, 1), 1);
+ EXPECT_EQ(castNULLABLEINTERVALYEAR_int32(context_ptr, 12), 12);
+ EXPECT_EQ(castNULLABLEINTERVALYEAR_int32(context_ptr, 55), 55);
+ EXPECT_EQ(castNULLABLEINTERVALYEAR_int32(context_ptr, 1201), 1201);
+ EXPECT_EQ(castNULLABLEINTERVALYEAR_int64(context_ptr, 1), 1);
+ EXPECT_EQ(castNULLABLEINTERVALYEAR_int64(context_ptr, 12), 12);
+ EXPECT_EQ(castNULLABLEINTERVALYEAR_int64(context_ptr, 55), 55);
+ EXPECT_EQ(castNULLABLEINTERVALYEAR_int64(context_ptr, 1201), 1201);
+ // validate overflow error when using bigint as input
+ castNULLABLEINTERVALYEAR_int64(context_ptr, INT64_MAX);
+ EXPECT_EQ(context.get_error(), "Integer overflow");
+ context.Reset();
+}
+
+} // namespace gandiva
diff --git a/src/arrow/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc b/src/arrow/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc
new file mode 100644
index 000000000..695605b3c
--- /dev/null
+++ b/src/arrow/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc
@@ -0,0 +1,283 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "./epoch_time_point.h"
+
+// The first row is for non-leap years
+static int days_in_a_month[2][12] = {{31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31},
+ {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}};
+
+bool is_leap_year(int yy) {
+ if ((yy % 4) != 0) {
+ // not divisible by 4
+ return false;
+ }
+ // yy = 4x
+ if ((yy % 400) == 0) {
+ // yy = 400x
+ return true;
+ }
+ // yy = 4x, return true if yy != 100x
+ return ((yy % 100) != 0);
+}
+
+bool is_last_day_of_month(const EpochTimePoint& tp) {
+ int matrix_index = is_leap_year(tp.TmYear()) ? 1 : 0;
+
+ return (tp.TmMday() == days_in_a_month[matrix_index][tp.TmMon()]);
+}
+
+bool did_days_overflow(arrow_vendored::date::year_month_day ymd) {
+ int year = static_cast<int>(ymd.year());
+ int month = static_cast<unsigned int>(ymd.month());
+ int days = static_cast<unsigned int>(ymd.day());
+
+ int matrix_index = is_leap_year(year) ? 1 : 0;
+
+ return days > days_in_a_month[matrix_index][month - 1];
+}
+
+int last_possible_day_in_month(int year, int month) {
+ int matrix_index = is_leap_year(year) ? 1 : 0;
+
+ return days_in_a_month[matrix_index][month - 1];
+}
+
+extern "C" {
+
+#include <time.h>
+
+#include "./time_constants.h"
+#include "./types.h"
+
+#define TIMESTAMP_DIFF_FIXED_UNITS(TYPE, NAME, FROM_MILLIS) \
+ FORCE_INLINE \
+ gdv_int32 NAME##_##TYPE##_##TYPE(gdv_##TYPE start_millis, gdv_##TYPE end_millis) { \
+ return static_cast<int32_t>(FROM_MILLIS(end_millis - start_millis)); \
+ }
+
+#define SIGN_ADJUST_DIFF(is_positive, diff) ((is_positive) ? (diff) : -(diff))
+#define MONTHS_TO_TIMEUNIT(diff, num_months) (diff) / (num_months)
+
+// Assuming end_millis > start_millis, the algorithm to find the diff in months is:
+// diff_in_months = year_diff * 12 + month_diff
+// This is approximately correct, except when the last month has not fully elapsed
+//
+// a) If end_day > start_day, return diff_in_months e.g. diff(2015-09-10, 2017-03-31)
+// b) If end_day < start_day, return diff_in_months - 1 e.g. diff(2015-09-30, 2017-03-10)
+// c) If end_day = start_day, check for millis e.g. diff(2017-03-10, 2015-03-10)
+// Need to check if end_millis_in_day > start_millis_in_day
+// c1) If end_millis_in_day >= start_millis_in_day, return diff_in_months
+// c2) else return diff_in_months - 1
+#define TIMESTAMP_DIFF_MONTH_UNITS(TYPE, NAME, N_MONTHS) \
+ FORCE_INLINE \
+ gdv_int32 NAME##_##TYPE##_##TYPE(gdv_##TYPE start_millis, gdv_##TYPE end_millis) { \
+ gdv_int32 diff; \
+ bool is_positive = (end_millis > start_millis); \
+ if (!is_positive) { \
+ /* if end_millis < start_millis, swap and multiply by -1 at the end */ \
+ gdv_##TYPE tmp = start_millis; \
+ start_millis = end_millis; \
+ end_millis = tmp; \
+ } \
+ EpochTimePoint start_tm(start_millis); \
+ EpochTimePoint end_tm(end_millis); \
+ gdv_int32 months_diff; \
+ months_diff = static_cast<gdv_int32>(12 * (end_tm.TmYear() - start_tm.TmYear()) + \
+ (end_tm.TmMon() - start_tm.TmMon())); \
+ if (end_tm.TmMday() > start_tm.TmMday()) { \
+ /* case a */ \
+ diff = MONTHS_TO_TIMEUNIT(months_diff, N_MONTHS); \
+ return SIGN_ADJUST_DIFF(is_positive, diff); \
+ } \
+ if (end_tm.TmMday() < start_tm.TmMday()) { \
+ /* case b */ \
+ months_diff += (is_last_day_of_month(end_tm) ? 1 : 0); \
+ diff = MONTHS_TO_TIMEUNIT(months_diff - 1, N_MONTHS); \
+ return SIGN_ADJUST_DIFF(is_positive, diff); \
+ } \
+ gdv_int32 end_day_millis = \
+ static_cast<gdv_int32>(end_tm.TmHour() * MILLIS_IN_HOUR + \
+ end_tm.TmMin() * MILLIS_IN_MIN + end_tm.TmSec()); \
+ gdv_int32 start_day_millis = \
+ static_cast<gdv_int32>(start_tm.TmHour() * MILLIS_IN_HOUR + \
+ start_tm.TmMin() * MILLIS_IN_MIN + start_tm.TmSec()); \
+ if (end_day_millis >= start_day_millis) { \
+ /* case c1 */ \
+ diff = MONTHS_TO_TIMEUNIT(months_diff, N_MONTHS); \
+ return SIGN_ADJUST_DIFF(is_positive, diff); \
+ } \
+ /* case c2 */ \
+ diff = MONTHS_TO_TIMEUNIT(months_diff - 1, N_MONTHS); \
+ return SIGN_ADJUST_DIFF(is_positive, diff); \
+ }
+
+#define TIMESTAMP_DIFF(TYPE) \
+ TIMESTAMP_DIFF_FIXED_UNITS(TYPE, timestampdiffSecond, MILLIS_TO_SEC) \
+ TIMESTAMP_DIFF_FIXED_UNITS(TYPE, timestampdiffMinute, MILLIS_TO_MINS) \
+ TIMESTAMP_DIFF_FIXED_UNITS(TYPE, timestampdiffHour, MILLIS_TO_HOUR) \
+ TIMESTAMP_DIFF_FIXED_UNITS(TYPE, timestampdiffDay, MILLIS_TO_DAY) \
+ TIMESTAMP_DIFF_FIXED_UNITS(TYPE, timestampdiffWeek, MILLIS_TO_WEEK) \
+ TIMESTAMP_DIFF_MONTH_UNITS(TYPE, timestampdiffMonth, 1) \
+ TIMESTAMP_DIFF_MONTH_UNITS(TYPE, timestampdiffQuarter, 3) \
+ TIMESTAMP_DIFF_MONTH_UNITS(TYPE, timestampdiffYear, 12)
+
+TIMESTAMP_DIFF(timestamp)
+
+#define ADD_INT32_TO_TIMESTAMP_FIXED_UNITS(TYPE, NAME, TO_MILLIS) \
+ FORCE_INLINE \
+ gdv_##TYPE NAME##_int32_##TYPE(gdv_int32 count, gdv_##TYPE millis) { \
+ return millis + TO_MILLIS * static_cast<gdv_##TYPE>(count); \
+ }
+
+// Documentation of mktime suggests that it handles
+// TmMon() being negative, and also TmMon() being >= 12 by
+// adjusting TmYear() accordingly
+//
+// Using gmtime_r() and timegm() instead of localtime_r() and mktime()
+// since the input millis are since epoch
+#define ADD_INT32_TO_TIMESTAMP_MONTH_UNITS(TYPE, NAME, N_MONTHS) \
+ FORCE_INLINE \
+ gdv_##TYPE NAME##_int32_##TYPE(gdv_int32 count, gdv_##TYPE millis) { \
+ EpochTimePoint tp(millis); \
+ return tp.AddMonths(static_cast<int>(count * N_MONTHS)).MillisSinceEpoch(); \
+ }
+
+// TODO: Handle overflow while converting gdv_int64 to millis
+#define ADD_INT64_TO_TIMESTAMP_FIXED_UNITS(TYPE, NAME, TO_MILLIS) \
+ FORCE_INLINE \
+ gdv_##TYPE NAME##_int64_##TYPE(gdv_int64 count, gdv_##TYPE millis) { \
+ return millis + TO_MILLIS * static_cast<gdv_##TYPE>(count); \
+ }
+
+#define ADD_INT64_TO_TIMESTAMP_MONTH_UNITS(TYPE, NAME, N_MONTHS) \
+ FORCE_INLINE \
+ gdv_##TYPE NAME##_int64_##TYPE(gdv_int64 count, gdv_##TYPE millis) { \
+ EpochTimePoint tp(millis); \
+ return tp.AddMonths(static_cast<int>(count * N_MONTHS)).MillisSinceEpoch(); \
+ }
+
+#define ADD_TIMESTAMP_TO_INT32_FIXED_UNITS(TYPE, NAME, TO_MILLIS) \
+ FORCE_INLINE \
+ gdv_##TYPE NAME##_##TYPE##_int32(gdv_##TYPE millis, gdv_int32 count) { \
+ return millis + TO_MILLIS * static_cast<gdv_##TYPE>(count); \
+ }
+
+#define ADD_TIMESTAMP_TO_INT64_FIXED_UNITS(TYPE, NAME, TO_MILLIS) \
+ FORCE_INLINE \
+ gdv_##TYPE NAME##_##TYPE##_int64(gdv_##TYPE millis, gdv_int64 count) { \
+ return millis + TO_MILLIS * static_cast<gdv_##TYPE>(count); \
+ }
+
+#define ADD_TIMESTAMP_TO_INT32_MONTH_UNITS(TYPE, NAME, N_MONTHS) \
+ FORCE_INLINE \
+ gdv_##TYPE NAME##_##TYPE##_int32(gdv_##TYPE millis, gdv_int32 count) { \
+ EpochTimePoint tp(millis); \
+ return tp.AddMonths(static_cast<int>(count * N_MONTHS)).MillisSinceEpoch(); \
+ }
+
+#define ADD_TIMESTAMP_TO_INT64_MONTH_UNITS(TYPE, NAME, N_MONTHS) \
+ FORCE_INLINE \
+ gdv_##TYPE NAME##_##TYPE##_int64(gdv_##TYPE millis, gdv_int64 count) { \
+ EpochTimePoint tp(millis); \
+ return tp.AddMonths(static_cast<int>(count * N_MONTHS)).MillisSinceEpoch(); \
+ }
+
+#define ADD_TIMESTAMP_INT32_FIXEDUNITS(TYPE, NAME, TO_MILLIS) \
+ ADD_INT32_TO_TIMESTAMP_FIXED_UNITS(TYPE, NAME, TO_MILLIS) \
+ ADD_TIMESTAMP_TO_INT32_FIXED_UNITS(TYPE, NAME, TO_MILLIS)
+
+#define ADD_TIMESTAMP_INT32_MONTHUNITS(TYPE, NAME, N_MONTHS) \
+ ADD_INT32_TO_TIMESTAMP_MONTH_UNITS(TYPE, NAME, N_MONTHS) \
+ ADD_TIMESTAMP_TO_INT32_MONTH_UNITS(TYPE, NAME, N_MONTHS)
+
+#define TIMESTAMP_ADD_INT32(TYPE) \
+ ADD_TIMESTAMP_INT32_FIXEDUNITS(TYPE, timestampaddSecond, MILLIS_IN_SEC) \
+ ADD_TIMESTAMP_INT32_FIXEDUNITS(TYPE, timestampaddMinute, MILLIS_IN_MIN) \
+ ADD_TIMESTAMP_INT32_FIXEDUNITS(TYPE, timestampaddHour, MILLIS_IN_HOUR) \
+ ADD_TIMESTAMP_INT32_FIXEDUNITS(TYPE, timestampaddDay, MILLIS_IN_DAY) \
+ ADD_TIMESTAMP_INT32_FIXEDUNITS(TYPE, timestampaddWeek, MILLIS_IN_WEEK) \
+ ADD_TIMESTAMP_INT32_MONTHUNITS(TYPE, timestampaddMonth, 1) \
+ ADD_TIMESTAMP_INT32_MONTHUNITS(TYPE, timestampaddQuarter, 3) \
+ ADD_TIMESTAMP_INT32_MONTHUNITS(TYPE, timestampaddYear, 12)
+
+#define ADD_TIMESTAMP_INT64_FIXEDUNITS(TYPE, NAME, TO_MILLIS) \
+ ADD_INT64_TO_TIMESTAMP_FIXED_UNITS(TYPE, NAME, TO_MILLIS) \
+ ADD_TIMESTAMP_TO_INT64_FIXED_UNITS(TYPE, NAME, TO_MILLIS)
+
+#define ADD_TIMESTAMP_INT64_MONTHUNITS(TYPE, NAME, N_MONTHS) \
+ ADD_INT64_TO_TIMESTAMP_MONTH_UNITS(TYPE, NAME, N_MONTHS) \
+ ADD_TIMESTAMP_TO_INT64_MONTH_UNITS(TYPE, NAME, N_MONTHS)
+
+#define TIMESTAMP_ADD_INT64(TYPE) \
+ ADD_TIMESTAMP_INT64_FIXEDUNITS(TYPE, timestampaddSecond, MILLIS_IN_SEC) \
+ ADD_TIMESTAMP_INT64_FIXEDUNITS(TYPE, timestampaddMinute, MILLIS_IN_MIN) \
+ ADD_TIMESTAMP_INT64_FIXEDUNITS(TYPE, timestampaddHour, MILLIS_IN_HOUR) \
+ ADD_TIMESTAMP_INT64_FIXEDUNITS(TYPE, timestampaddDay, MILLIS_IN_DAY) \
+ ADD_TIMESTAMP_INT64_FIXEDUNITS(TYPE, timestampaddWeek, MILLIS_IN_WEEK) \
+ ADD_TIMESTAMP_INT64_MONTHUNITS(TYPE, timestampaddMonth, 1) \
+ ADD_TIMESTAMP_INT64_MONTHUNITS(TYPE, timestampaddQuarter, 3) \
+ ADD_TIMESTAMP_INT64_MONTHUNITS(TYPE, timestampaddYear, 12)
+
+#define TIMESTAMP_ADD_INT(TYPE) \
+ TIMESTAMP_ADD_INT32(TYPE) \
+ TIMESTAMP_ADD_INT64(TYPE)
+
+TIMESTAMP_ADD_INT(date64)
+TIMESTAMP_ADD_INT(timestamp)
+
+// add gdv_int32 to timestamp
+ADD_INT32_TO_TIMESTAMP_FIXED_UNITS(date64, date_add, MILLIS_IN_DAY)
+ADD_INT32_TO_TIMESTAMP_FIXED_UNITS(date64, add, MILLIS_IN_DAY)
+ADD_INT32_TO_TIMESTAMP_FIXED_UNITS(timestamp, date_add, MILLIS_IN_DAY)
+ADD_INT32_TO_TIMESTAMP_FIXED_UNITS(timestamp, add, MILLIS_IN_DAY)
+
+// add gdv_int64 to timestamp
+ADD_INT64_TO_TIMESTAMP_FIXED_UNITS(date64, date_add, MILLIS_IN_DAY)
+ADD_INT64_TO_TIMESTAMP_FIXED_UNITS(date64, add, MILLIS_IN_DAY)
+ADD_INT64_TO_TIMESTAMP_FIXED_UNITS(timestamp, date_add, MILLIS_IN_DAY)
+ADD_INT64_TO_TIMESTAMP_FIXED_UNITS(timestamp, add, MILLIS_IN_DAY)
+
+// date_sub, subtract, date_diff on gdv_int32
+ADD_TIMESTAMP_TO_INT32_FIXED_UNITS(date64, date_sub, -1 * MILLIS_IN_DAY)
+ADD_TIMESTAMP_TO_INT32_FIXED_UNITS(date64, subtract, -1 * MILLIS_IN_DAY)
+ADD_TIMESTAMP_TO_INT32_FIXED_UNITS(date64, date_diff, -1 * MILLIS_IN_DAY)
+ADD_TIMESTAMP_TO_INT32_FIXED_UNITS(timestamp, date_sub, -1 * MILLIS_IN_DAY)
+ADD_TIMESTAMP_TO_INT32_FIXED_UNITS(timestamp, subtract, -1 * MILLIS_IN_DAY)
+ADD_TIMESTAMP_TO_INT32_FIXED_UNITS(timestamp, date_diff, -1 * MILLIS_IN_DAY)
+
+// date_sub, subtract, date_diff on gdv_int64
+ADD_TIMESTAMP_TO_INT64_FIXED_UNITS(date64, date_sub, -1 * MILLIS_IN_DAY)
+ADD_TIMESTAMP_TO_INT64_FIXED_UNITS(date64, subtract, -1 * MILLIS_IN_DAY)
+ADD_TIMESTAMP_TO_INT64_FIXED_UNITS(date64, date_diff, -1 * MILLIS_IN_DAY)
+ADD_TIMESTAMP_TO_INT64_FIXED_UNITS(timestamp, date_sub, -1 * MILLIS_IN_DAY)
+ADD_TIMESTAMP_TO_INT64_FIXED_UNITS(timestamp, subtract, -1 * MILLIS_IN_DAY)
+ADD_TIMESTAMP_TO_INT64_FIXED_UNITS(timestamp, date_diff, -1 * MILLIS_IN_DAY)
+
+// add timestamp to gdv_int32
+ADD_TIMESTAMP_TO_INT32_FIXED_UNITS(date64, date_add, MILLIS_IN_DAY)
+ADD_TIMESTAMP_TO_INT32_FIXED_UNITS(date64, add, MILLIS_IN_DAY)
+ADD_TIMESTAMP_TO_INT32_FIXED_UNITS(timestamp, date_add, MILLIS_IN_DAY)
+ADD_TIMESTAMP_TO_INT32_FIXED_UNITS(timestamp, add, MILLIS_IN_DAY)
+
+// add timestamp to gdv_int64
+ADD_TIMESTAMP_TO_INT64_FIXED_UNITS(date64, date_add, MILLIS_IN_DAY)
+ADD_TIMESTAMP_TO_INT64_FIXED_UNITS(date64, add, MILLIS_IN_DAY)
+ADD_TIMESTAMP_TO_INT64_FIXED_UNITS(timestamp, date_add, MILLIS_IN_DAY)
+ADD_TIMESTAMP_TO_INT64_FIXED_UNITS(timestamp, add, MILLIS_IN_DAY)
+
+} // extern "C"
diff --git a/src/arrow/cpp/src/gandiva/precompiled/types.h b/src/arrow/cpp/src/gandiva/precompiled/types.h
new file mode 100644
index 000000000..987ee2c6d
--- /dev/null
+++ b/src/arrow/cpp/src/gandiva/precompiled/types.h
@@ -0,0 +1,592 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+
+#include "gandiva/gdv_function_stubs.h"
+
+// Use the same names as in arrow data types. Makes it easy to write pre-processor macros.
+using gdv_boolean = bool;
+using gdv_int8 = int8_t;
+using gdv_int16 = int16_t;
+using gdv_int32 = int32_t;
+using gdv_int64 = int64_t;
+using gdv_uint8 = uint8_t;
+using gdv_uint16 = uint16_t;
+using gdv_uint32 = uint32_t;
+using gdv_uint64 = uint64_t;
+using gdv_float32 = float;
+using gdv_float64 = double;
+using gdv_date64 = int64_t;
+using gdv_date32 = int32_t;
+using gdv_time32 = int32_t;
+using gdv_timestamp = int64_t;
+using gdv_utf8 = char*;
+using gdv_binary = char*;
+using gdv_day_time_interval = int64_t;
+
+#ifdef GANDIVA_UNIT_TEST
+// unit tests may be compiled without O2, so inlining may not happen.
+#define FORCE_INLINE
+#else
+#define FORCE_INLINE __attribute__((always_inline))
+#endif
+
+extern "C" {
+
+bool bitMapGetBit(const unsigned char* bmap, int64_t position);
+void bitMapSetBit(unsigned char* bmap, int64_t position, bool value);
+void bitMapClearBitIfFalse(unsigned char* bmap, int64_t position, bool value);
+
+gdv_int64 extractMillennium_timestamp(gdv_timestamp millis);
+gdv_int64 extractCentury_timestamp(gdv_timestamp millis);
+gdv_int64 extractDecade_timestamp(gdv_timestamp millis);
+gdv_int64 extractYear_timestamp(gdv_timestamp millis);
+gdv_int64 extractDoy_timestamp(gdv_timestamp millis);
+gdv_int64 extractQuarter_timestamp(gdv_timestamp millis);
+gdv_int64 extractMonth_timestamp(gdv_timestamp millis);
+gdv_int64 extractWeek_timestamp(gdv_timestamp millis);
+gdv_int64 extractDow_timestamp(gdv_timestamp millis);
+gdv_int64 extractDay_timestamp(gdv_timestamp millis);
+gdv_int64 extractHour_timestamp(gdv_timestamp millis);
+gdv_int64 extractMinute_timestamp(gdv_timestamp millis);
+gdv_int64 extractSecond_timestamp(gdv_timestamp millis);
+gdv_int64 extractHour_time32(gdv_int32 millis_in_day);
+gdv_int64 extractMinute_time32(gdv_int32 millis_in_day);
+gdv_int64 extractSecond_time32(gdv_int32 millis_in_day);
+
+gdv_int32 hash32(double val, gdv_int32 seed);
+gdv_int32 hash32_buf(const gdv_uint8* buf, int len, gdv_int32 seed);
+gdv_int64 hash64(double val, gdv_int64 seed);
+gdv_int64 hash64_buf(const gdv_uint8* buf, int len, gdv_int64 seed);
+
+gdv_int32 timestampdiffMonth_timestamp_timestamp(gdv_timestamp, gdv_timestamp);
+
+gdv_int64 timestampaddSecond_int32_timestamp(gdv_int32, gdv_timestamp);
+gdv_int64 timestampaddMinute_int32_timestamp(gdv_int32, gdv_timestamp);
+gdv_int64 timestampaddHour_int32_timestamp(gdv_int32, gdv_timestamp);
+gdv_int64 timestampaddDay_int32_timestamp(gdv_int32, gdv_timestamp);
+gdv_int64 timestampaddWeek_int32_timestamp(gdv_int32, gdv_timestamp);
+gdv_int64 timestampaddMonth_int32_timestamp(gdv_int32, gdv_timestamp);
+gdv_int64 timestampaddQuarter_int32_timestamp(gdv_int32, gdv_timestamp);
+gdv_int64 timestampaddYear_int32_timestamp(gdv_int32, gdv_timestamp);
+
+gdv_int64 timestampaddSecond_timestamp_int32(gdv_timestamp, gdv_int32);
+gdv_int64 timestampaddMinute_timestamp_int32(gdv_timestamp, gdv_int32);
+gdv_int64 timestampaddHour_timestamp_int32(gdv_timestamp, gdv_int32);
+gdv_int64 timestampaddDay_timestamp_int32(gdv_timestamp, gdv_int32);
+gdv_int64 timestampaddWeek_timestamp_int32(gdv_timestamp, gdv_int32);
+gdv_int64 timestampaddMonth_timestamp_int32(gdv_timestamp, gdv_int32);
+gdv_int64 timestampaddQuarter_timestamp_int32(gdv_timestamp, gdv_int32);
+gdv_int64 timestampaddYear_timestamp_int32(gdv_timestamp, gdv_int32);
+
+gdv_int64 timestampaddSecond_int64_timestamp(gdv_int64, gdv_timestamp);
+gdv_int64 timestampaddMinute_int64_timestamp(gdv_int64, gdv_timestamp);
+gdv_int64 timestampaddHour_int64_timestamp(gdv_int64, gdv_timestamp);
+gdv_int64 timestampaddDay_int64_timestamp(gdv_int64, gdv_timestamp);
+gdv_int64 timestampaddWeek_int64_timestamp(gdv_int64, gdv_timestamp);
+gdv_int64 timestampaddMonth_int64_timestamp(gdv_int64, gdv_timestamp);
+gdv_int64 timestampaddQuarter_int64_timestamp(gdv_int64, gdv_timestamp);
+gdv_int64 timestampaddYear_int64_timestamp(gdv_int64, gdv_timestamp);
+
+gdv_int64 timestampaddSecond_timestamp_int64(gdv_timestamp, gdv_int64);
+gdv_int64 timestampaddMinute_timestamp_int64(gdv_timestamp, gdv_int64);
+gdv_int64 timestampaddHour_timestamp_int64(gdv_timestamp, gdv_int64);
+gdv_int64 timestampaddDay_timestamp_int64(gdv_timestamp, gdv_int64);
+gdv_int64 timestampaddWeek_timestamp_int64(gdv_timestamp, gdv_int64);
+gdv_int64 timestampaddMonth_timestamp_int64(gdv_timestamp, gdv_int64);
+gdv_int64 timestampaddQuarter_timestamp_int64(gdv_timestamp, gdv_int64);
+gdv_int64 timestampaddYear_timestamp_int64(gdv_timestamp, gdv_int64);
+
+gdv_int64 date_add_int32_timestamp(gdv_int32, gdv_timestamp);
+gdv_int64 add_int64_timestamp(gdv_int64, gdv_timestamp);
+gdv_int64 add_int32_timestamp(gdv_int32, gdv_timestamp);
+gdv_int64 date_add_int64_timestamp(gdv_int64, gdv_timestamp);
+gdv_timestamp add_date64_int64(gdv_date64, gdv_int64);
+
+gdv_timestamp to_timestamp_int32(gdv_int32);
+gdv_timestamp to_timestamp_int64(gdv_int64);
+gdv_timestamp to_timestamp_float32(gdv_float32);
+gdv_timestamp to_timestamp_float64(gdv_float64);
+
+gdv_time32 to_time_int32(gdv_int32);
+gdv_time32 to_time_int64(gdv_int64);
+gdv_time32 to_time_float32(gdv_float32);
+gdv_time32 to_time_float64(gdv_float64);
+
+gdv_int64 date_sub_timestamp_int32(gdv_timestamp, gdv_int32);
+gdv_int64 subtract_timestamp_int32(gdv_timestamp, gdv_int32);
+gdv_int64 date_diff_timestamp_int64(gdv_timestamp, gdv_int64);
+
+gdv_boolean castBIT_utf8(gdv_int64 context, const char* data, gdv_int32 data_len);
+
+bool is_distinct_from_timestamp_timestamp(gdv_int64, bool, gdv_int64, bool);
+bool is_not_distinct_from_int32_int32(gdv_int32, bool, gdv_int32, bool);
+
+gdv_int64 date_trunc_Second_date64(gdv_date64);
+gdv_int64 date_trunc_Minute_date64(gdv_date64);
+gdv_int64 date_trunc_Hour_date64(gdv_date64);
+gdv_int64 date_trunc_Day_date64(gdv_date64);
+gdv_int64 date_trunc_Month_date64(gdv_date64);
+gdv_int64 date_trunc_Quarter_date64(gdv_date64);
+gdv_int64 date_trunc_Year_date64(gdv_date64);
+gdv_int64 date_trunc_Decade_date64(gdv_date64);
+gdv_int64 date_trunc_Century_date64(gdv_date64);
+gdv_int64 date_trunc_Millennium_date64(gdv_date64);
+
+gdv_int64 date_trunc_Week_timestamp(gdv_timestamp);
+double months_between_timestamp_timestamp(gdv_uint64, gdv_uint64);
+
+gdv_int32 mem_compare(const char* left, gdv_int32 left_len, const char* right,
+ gdv_int32 right_len);
+
+gdv_int32 mod_int64_int32(gdv_int64 left, gdv_int32 right);
+gdv_float64 mod_float64_float64(gdv_int64 context, gdv_float64 left, gdv_float64 right);
+
+gdv_int64 divide_int64_int64(gdv_int64 context, gdv_int64 in1, gdv_int64 in2);
+
+gdv_int64 div_int64_int64(gdv_int64 context, gdv_int64 in1, gdv_int64 in2);
+gdv_float32 div_float32_float32(gdv_int64 context, gdv_float32 in1, gdv_float32 in2);
+gdv_float64 div_float64_float64(gdv_int64 context, gdv_float64 in1, gdv_float64 in2);
+
+gdv_float32 round_float32(gdv_float32);
+gdv_float64 round_float64(gdv_float64);
+gdv_float32 round_float32_int32(gdv_float32 number, gdv_int32 out_scale);
+gdv_float64 round_float64_int32(gdv_float64 number, gdv_int32 out_scale);
+gdv_float64 get_scale_multiplier(gdv_int32);
+gdv_int32 round_int32_int32(gdv_int32 number, gdv_int32 precision);
+gdv_int64 round_int64_int32(gdv_int64 number, gdv_int32 precision);
+gdv_int32 round_int32(gdv_int32);
+gdv_int64 round_int64(gdv_int64);
+gdv_int64 get_power_of_10(gdv_int32);
+
+const char* bin_int32(int64_t context, gdv_int32 value, int32_t* out_len);
+const char* bin_int64(int64_t context, gdv_int64 value, int32_t* out_len);
+
+gdv_float64 cbrt_int32(gdv_int32);
+gdv_float64 cbrt_int64(gdv_int64);
+gdv_float64 cbrt_float32(gdv_float32);
+gdv_float64 cbrt_float64(gdv_float64);
+
+gdv_float64 exp_int32(gdv_int32);
+gdv_float64 exp_int64(gdv_int64);
+gdv_float64 exp_float32(gdv_float32);
+gdv_float64 exp_float64(gdv_float64);
+
+gdv_float64 log_int32(gdv_int32);
+gdv_float64 log_int64(gdv_int64);
+gdv_float64 log_float32(gdv_float32);
+gdv_float64 log_float64(gdv_float64);
+
+gdv_float64 log10_int32(gdv_int32);
+gdv_float64 log10_int64(gdv_int64);
+gdv_float64 log10_float32(gdv_float32);
+gdv_float64 log10_float64(gdv_float64);
+
+gdv_float64 sin_int32(gdv_int32);
+gdv_float64 sin_int64(gdv_int64);
+gdv_float64 sin_float32(gdv_float32);
+gdv_float64 sin_float64(gdv_float64);
+gdv_float64 cos_int32(gdv_int32);
+gdv_float64 cos_int64(gdv_int64);
+gdv_float64 cos_float32(gdv_float32);
+gdv_float64 cos_float64(gdv_float64);
+gdv_float64 asin_int32(gdv_int32);
+gdv_float64 asin_int64(gdv_int64);
+gdv_float64 asin_float32(gdv_float32);
+gdv_float64 asin_float64(gdv_float64);
+gdv_float64 acos_int32(gdv_int32);
+gdv_float64 acos_int64(gdv_int64);
+gdv_float64 acos_float32(gdv_float32);
+gdv_float64 acos_float64(gdv_float64);
+gdv_float64 tan_int32(gdv_int32);
+gdv_float64 tan_int64(gdv_int64);
+gdv_float64 tan_float32(gdv_float32);
+gdv_float64 tan_float64(gdv_float64);
+gdv_float64 atan_int32(gdv_int32);
+gdv_float64 atan_int64(gdv_int64);
+gdv_float64 atan_float32(gdv_float32);
+gdv_float64 atan_float64(gdv_float64);
+gdv_float64 sinh_int32(gdv_int32);
+gdv_float64 sinh_int64(gdv_int64);
+gdv_float64 sinh_float32(gdv_float32);
+gdv_float64 sinh_float64(gdv_float64);
+gdv_float64 cosh_int32(gdv_int32);
+gdv_float64 cosh_int64(gdv_int64);
+gdv_float64 cosh_float32(gdv_float32);
+gdv_float64 cosh_float64(gdv_float64);
+gdv_float64 tanh_int32(gdv_int32);
+gdv_float64 tanh_int64(gdv_int64);
+gdv_float64 tanh_float32(gdv_float32);
+gdv_float64 tanh_float64(gdv_float64);
+gdv_float64 atan2_int32_int32(gdv_int32 in1, gdv_int32 in2);
+gdv_float64 atan2_int64_int64(gdv_int64 in1, gdv_int64 in2);
+gdv_float64 atan2_float32_float32(gdv_float32 in1, gdv_float32 in2);
+gdv_float64 atan2_float64_float64(gdv_float64 in1, gdv_float64 in2);
+gdv_float64 cot_float32(gdv_float32);
+gdv_float64 cot_float64(gdv_float64);
+gdv_float64 radians_int32(gdv_int32);
+gdv_float64 radians_int64(gdv_int64);
+gdv_float64 radians_float32(gdv_float32);
+gdv_float64 radians_float64(gdv_float64);
+gdv_float64 degrees_int32(gdv_int32);
+gdv_float64 degrees_int64(gdv_int64);
+gdv_float64 degrees_float32(gdv_float32);
+gdv_float64 degrees_float64(gdv_float64);
+
+gdv_int32 bitwise_and_int32_int32(gdv_int32 in1, gdv_int32 in2);
+gdv_int64 bitwise_and_int64_int64(gdv_int64 in1, gdv_int64 in2);
+gdv_int32 bitwise_or_int32_int32(gdv_int32 in1, gdv_int32 in2);
+gdv_int64 bitwise_or_int64_int64(gdv_int64 in1, gdv_int64 in2);
+gdv_int32 bitwise_xor_int32_int32(gdv_int32 in1, gdv_int32 in2);
+gdv_int64 bitwise_xor_int64_int64(gdv_int64 in1, gdv_int64 in2);
+gdv_int32 bitwise_not_int32(gdv_int32);
+gdv_int64 bitwise_not_int64(gdv_int64);
+
+gdv_float64 power_float64_float64(gdv_float64, gdv_float64);
+
+gdv_float64 log_int32_int32(gdv_int64 context, gdv_int32 base, gdv_int32 value);
+
+bool starts_with_utf8_utf8(const char* data, gdv_int32 data_len, const char* prefix,
+ gdv_int32 prefix_len);
+bool ends_with_utf8_utf8(const char* data, gdv_int32 data_len, const char* suffix,
+ gdv_int32 suffix_len);
+bool is_substr_utf8_utf8(const char* data, gdv_int32 data_len, const char* substr,
+ gdv_int32 substr_len);
+
+gdv_int32 utf8_length(gdv_int64 context, const char* data, gdv_int32 data_len);
+
+gdv_int32 utf8_last_char_pos(gdv_int64 context, const char* data, gdv_int32 data_len);
+
+gdv_date64 castDATE_utf8(int64_t execution_context, const char* input, gdv_int32 length);
+
+gdv_date64 castDATE_int64(gdv_int64 date);
+
+gdv_date64 castDATE_date32(gdv_date32 date);
+
+gdv_date32 castDATE_int32(gdv_int32 date);
+
+gdv_timestamp castTIMESTAMP_utf8(int64_t execution_context, const char* input,
+ gdv_int32 length);
+gdv_timestamp castTIMESTAMP_date64(gdv_date64);
+gdv_timestamp castTIMESTAMP_int64(gdv_int64);
+gdv_date64 castDATE_timestamp(gdv_timestamp);
+gdv_time32 castTIME_timestamp(gdv_timestamp timestamp_in_millis);
+const char* castVARCHAR_timestamp_int64(int64_t, gdv_timestamp, gdv_int64, gdv_int32*);
+gdv_date64 last_day_from_timestamp(gdv_date64 millis);
+
+gdv_int64 truncate_int64_int32(gdv_int64 in, gdv_int32 out_scale);
+
+const char* repeat_utf8_int32(gdv_int64 context, const char* in, gdv_int32 in_len,
+ gdv_int32 repeat_times, gdv_int32* out_len);
+
+const char* substr_utf8_int64_int64(gdv_int64 context, const char* input,
+ gdv_int32 in_len, gdv_int64 offset64,
+ gdv_int64 length, gdv_int32* out_len);
+const char* substr_utf8_int64(gdv_int64 context, const char* input, gdv_int32 in_len,
+ gdv_int64 offset64, gdv_int32* out_len);
+
+const char* concat_utf8_utf8(gdv_int64 context, const char* left, gdv_int32 left_len,
+ bool left_validity, const char* right, gdv_int32 right_len,
+ bool right_validity, gdv_int32* out_len);
+const char* concat_utf8_utf8_utf8(gdv_int64 context, const char* in1, gdv_int32 in1_len,
+ bool in1_validity, const char* in2, gdv_int32 in2_len,
+ bool in2_validity, const char* in3, gdv_int32 in3_len,
+ bool in3_validity, gdv_int32* out_len);
+const char* concat_utf8_utf8_utf8_utf8(gdv_int64 context, const char* in1,
+ gdv_int32 in1_len, bool in1_validity,
+ const char* in2, gdv_int32 in2_len,
+ bool in2_validity, const char* in3,
+ gdv_int32 in3_len, bool in3_validity,
+ const char* in4, gdv_int32 in4_len,
+ bool in4_validity, gdv_int32* out_len);
+const char* space_int32(gdv_int64 ctx, gdv_int32 n, int32_t* out_len);
+const char* space_int64(gdv_int64 ctx, gdv_int64 n, int32_t* out_len);
+const char* concat_utf8_utf8_utf8_utf8_utf8(
+ gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity,
+ const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3,
+ gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len,
+ bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity,
+ gdv_int32* out_len);
+const char* concat_utf8_utf8_utf8_utf8_utf8_utf8(
+ gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity,
+ const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3,
+ gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len,
+ bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity,
+ const char* in6, gdv_int32 in6_len, bool in6_validity, gdv_int32* out_len);
+const char* concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity,
+ const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3,
+ gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len,
+ bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity,
+ const char* in6, gdv_int32 in6_len, bool in6_validity, const char* in7,
+ gdv_int32 in7_len, bool in7_validity, gdv_int32* out_len);
+const char* concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity,
+ const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3,
+ gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len,
+ bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity,
+ const char* in6, gdv_int32 in6_len, bool in6_validity, const char* in7,
+ gdv_int32 in7_len, bool in7_validity, const char* in8, gdv_int32 in8_len,
+ bool in8_validity, gdv_int32* out_len);
+const char* concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity,
+ const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3,
+ gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len,
+ bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity,
+ const char* in6, gdv_int32 in6_len, bool in6_validity, const char* in7,
+ gdv_int32 in7_len, bool in7_validity, const char* in8, gdv_int32 in8_len,
+ bool in8_validity, const char* in9, gdv_int32 in9_len, bool in9_validity,
+ gdv_int32* out_len);
+const char* concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity,
+ const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3,
+ gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len,
+ bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity,
+ const char* in6, gdv_int32 in6_len, bool in6_validity, const char* in7,
+ gdv_int32 in7_len, bool in7_validity, const char* in8, gdv_int32 in8_len,
+ bool in8_validity, const char* in9, gdv_int32 in9_len, bool in9_validity,
+ const char* in10, gdv_int32 in10_len, bool in10_validity, gdv_int32* out_len);
+
+const char* concatOperator_utf8_utf8(gdv_int64 context, const char* left,
+ gdv_int32 left_len, const char* right,
+ gdv_int32 right_len, gdv_int32* out_len);
+const char* concatOperator_utf8_utf8_utf8(gdv_int64 context, const char* in1,
+ gdv_int32 in1_len, const char* in2,
+ gdv_int32 in2_len, const char* in3,
+ gdv_int32 in3_len, gdv_int32* out_len);
+const char* concatOperator_utf8_utf8_utf8_utf8(gdv_int64 context, const char* in1,
+ gdv_int32 in1_len, const char* in2,
+ gdv_int32 in2_len, const char* in3,
+ gdv_int32 in3_len, const char* in4,
+ gdv_int32 in4_len, gdv_int32* out_len);
+const char* concatOperator_utf8_utf8_utf8_utf8_utf8(
+ gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2,
+ gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4,
+ gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, gdv_int32* out_len);
+const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8(
+ gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2,
+ gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4,
+ gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, const char* in6,
+ gdv_int32 in6_len, gdv_int32* out_len);
+const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2,
+ gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4,
+ gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, const char* in6,
+ gdv_int32 in6_len, const char* in7, gdv_int32 in7_len, gdv_int32* out_len);
+const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2,
+ gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4,
+ gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, const char* in6,
+ gdv_int32 in6_len, const char* in7, gdv_int32 in7_len, const char* in8,
+ gdv_int32 in8_len, gdv_int32* out_len);
+const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2,
+ gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4,
+ gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, const char* in6,
+ gdv_int32 in6_len, const char* in7, gdv_int32 in7_len, const char* in8,
+ gdv_int32 in8_len, const char* in9, gdv_int32 in9_len, gdv_int32* out_len);
+const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
+ gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2,
+ gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4,
+ gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, const char* in6,
+ gdv_int32 in6_len, const char* in7, gdv_int32 in7_len, const char* in8,
+ gdv_int32 in8_len, const char* in9, gdv_int32 in9_len, const char* in10,
+ gdv_int32 in10_len, gdv_int32* out_len);
+
+const char* castVARCHAR_binary_int64(gdv_int64 context, const char* data,
+ gdv_int32 data_len, int64_t out_len,
+ int32_t* out_length);
+
+const char* castVARCHAR_utf8_int64(gdv_int64 context, const char* data,
+ gdv_int32 data_len, int64_t out_len,
+ int32_t* out_length);
+
+const char* castVARBINARY_utf8_int64(gdv_int64 context, const char* data,
+ gdv_int32 data_len, int64_t out_len,
+ int32_t* out_length);
+
+const char* castVARBINARY_binary_int64(gdv_int64 context, const char* data,
+ gdv_int32 data_len, int64_t out_len,
+ int32_t* out_length);
+
+const char* reverse_utf8(gdv_int64 context, const char* data, gdv_int32 data_len,
+ int32_t* out_len);
+
+const char* ltrim_utf8(gdv_int64 context, const char* data, gdv_int32 data_len,
+ int32_t* out_len);
+
+const char* rtrim_utf8(gdv_int64 context, const char* data, gdv_int32 data_len,
+ int32_t* out_len);
+
+const char* btrim_utf8(gdv_int64 context, const char* data, gdv_int32 data_len,
+ int32_t* out_len);
+
+const char* ltrim_utf8_utf8(gdv_int64 context, const char* basetext,
+ gdv_int32 basetext_len, const char* trimtext,
+ gdv_int32 trimtext_len, int32_t* out_len);
+
+const char* rtrim_utf8_utf8(gdv_int64 context, const char* basetext,
+ gdv_int32 basetext_len, const char* trimtext,
+ gdv_int32 trimtext_len, int32_t* out_len);
+
+const char* btrim_utf8_utf8(gdv_int64 context, const char* basetext,
+ gdv_int32 basetext_len, const char* trimtext,
+ gdv_int32 trimtext_len, int32_t* out_len);
+
+gdv_int32 ascii_utf8(const char* data, gdv_int32 data_len);
+
+gdv_int32 locate_utf8_utf8(gdv_int64 context, const char* sub_str, gdv_int32 sub_str_len,
+ const char* str, gdv_int32 str_len);
+
+gdv_int32 strpos_utf8_utf8(gdv_int64 context, const char* str, gdv_int32 str_len,
+ const char* sub_str, gdv_int32 sub_str_len);
+
+gdv_int32 locate_utf8_utf8_int32(gdv_int64 context, const char* sub_str,
+ gdv_int32 sub_str_len, const char* str,
+ gdv_int32 str_len, gdv_int32 start_pos);
+
+const char* lpad_utf8_int32_utf8(gdv_int64 context, const char* text, gdv_int32 text_len,
+ gdv_int32 return_length, const char* fill_text,
+ gdv_int32 fill_text_len, gdv_int32* out_len);
+
+const char* rpad_utf8_int32_utf8(gdv_int64 context, const char* text, gdv_int32 text_len,
+ gdv_int32 return_length, const char* fill_text,
+ gdv_int32 fill_text_len, gdv_int32* out_len);
+
+const char* lpad_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
+ gdv_int32 return_length, gdv_int32* out_len);
+
+const char* rpad_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
+ gdv_int32 return_length, gdv_int32* out_len);
+
+const char* replace_with_max_len_utf8_utf8_utf8(gdv_int64 context, const char* text,
+ gdv_int32 text_len, const char* from_str,
+ gdv_int32 from_str_len,
+ const char* to_str, gdv_int32 to_str_len,
+ gdv_int32 max_length, gdv_int32* out_len);
+
+const char* replace_utf8_utf8_utf8(gdv_int64 context, const char* text,
+ gdv_int32 text_len, const char* from_str,
+ gdv_int32 from_str_len, const char* to_str,
+ gdv_int32 to_str_len, gdv_int32* out_len);
+
+const char* convert_replace_invalid_fromUTF8_binary(int64_t context, const char* text_in,
+ int32_t text_len,
+ const char* char_to_replace,
+ int32_t char_to_replace_len,
+ int32_t* out_len);
+
+const char* convert_toDOUBLE(int64_t context, double value, int32_t* out_len);
+
+const char* convert_toDOUBLE_be(int64_t context, double value, int32_t* out_len);
+
+const char* convert_toFLOAT(int64_t context, float value, int32_t* out_len);
+
+const char* convert_toFLOAT_be(int64_t context, float value, int32_t* out_len);
+
+const char* convert_toBIGINT(int64_t context, int64_t value, int32_t* out_len);
+
+const char* convert_toBIGINT_be(int64_t context, int64_t value, int32_t* out_len);
+
+const char* convert_toINT(int64_t context, int32_t value, int32_t* out_len);
+
+const char* convert_toINT_be(int64_t context, int32_t value, int32_t* out_len);
+
+const char* convert_toBOOLEAN(int64_t context, bool value, int32_t* out_len);
+
+const char* convert_toTIME_EPOCH(int64_t context, int32_t value, int32_t* out_len);
+
+const char* convert_toTIME_EPOCH_be(int64_t context, int32_t value, int32_t* out_len);
+
+const char* convert_toTIMESTAMP_EPOCH(int64_t context, int64_t timestamp,
+ int32_t* out_len);
+const char* convert_toTIMESTAMP_EPOCH_be(int64_t context, int64_t timestamp,
+ int32_t* out_len);
+
+const char* convert_toDATE_EPOCH(int64_t context, int64_t date, int32_t* out_len);
+
+const char* convert_toDATE_EPOCH_be(int64_t context, int64_t date, int32_t* out_len);
+
+const char* convert_toUTF8(int64_t context, const char* value, int32_t value_len,
+ int32_t* out_len);
+
+const char* split_part(gdv_int64 context, const char* text, gdv_int32 text_len,
+ const char* splitter, gdv_int32 split_len, gdv_int32 index,
+ gdv_int32* out_len);
+
+const char* byte_substr_binary_int32_int32(gdv_int64 context, const char* text,
+ gdv_int32 text_len, gdv_int32 offset,
+ gdv_int32 length, gdv_int32* out_len);
+
+const char* castVARCHAR_bool_int64(gdv_int64 context, gdv_boolean value,
+ gdv_int64 out_len, gdv_int32* out_length);
+
+const char* castVARCHAR_int32_int64(int64_t context, int32_t value, int64_t len,
+ int32_t* out_len);
+
+const char* castVARCHAR_int64_int64(int64_t context, int64_t value, int64_t len,
+ int32_t* out_len);
+
+const char* castVARCHAR_float32_int64(int64_t context, float value, int64_t len,
+ int32_t* out_len);
+
+const char* castVARCHAR_float64_int64(int64_t context, double value, int64_t len,
+ int32_t* out_len);
+
+const char* left_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
+ gdv_int32 number, gdv_int32* out_len);
+
+const char* right_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
+ gdv_int32 number, gdv_int32* out_len);
+
+const char* binary_string(gdv_int64 context, const char* text, gdv_int32 text_len,
+ gdv_int32* out_len);
+
+int32_t castINT_utf8(int64_t context, const char* data, int32_t len);
+
+int64_t castBIGINT_utf8(int64_t context, const char* data, int32_t len);
+
+float castFLOAT4_utf8(int64_t context, const char* data, int32_t len);
+
+double castFLOAT8_utf8(int64_t context, const char* data, int32_t len);
+
+int32_t castINT_float32(gdv_float32 value);
+
+int32_t castINT_float64(gdv_float64 value);
+
+int64_t castBIGINT_float32(gdv_float32 value);
+
+int64_t castBIGINT_float64(gdv_float64 value);
+
+int64_t castBIGINT_daytimeinterval(gdv_day_time_interval in);
+
+int32_t castINT_year_interval(gdv_month_interval in);
+
+int64_t castBIGINT_year_interval(gdv_month_interval in);
+
+gdv_day_time_interval castNULLABLEINTERVALDAY_int32(gdv_int32 in);
+
+gdv_day_time_interval castNULLABLEINTERVALDAY_int64(gdv_int64 in);
+
+gdv_month_interval castNULLABLEINTERVALYEAR_int32(int64_t context, gdv_int32 in);
+
+gdv_month_interval castNULLABLEINTERVALYEAR_int64(int64_t context, gdv_int64 in);
+
+} // extern "C"