diff options
Diffstat (limited to 'src/arrow/cpp/src/gandiva/precompiled')
25 files changed, 11382 insertions, 0 deletions
diff --git a/src/arrow/cpp/src/gandiva/precompiled/CMakeLists.txt b/src/arrow/cpp/src/gandiva/precompiled/CMakeLists.txt new file mode 100644 index 000000000..650b80f6b --- /dev/null +++ b/src/arrow/cpp/src/gandiva/precompiled/CMakeLists.txt @@ -0,0 +1,142 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +project(gandiva) + +set(PRECOMPILED_SRCS + arithmetic_ops.cc + bitmap.cc + decimal_ops.cc + decimal_wrapper.cc + extended_math_ops.cc + hash.cc + print.cc + string_ops.cc + time.cc + timestamp_arithmetic.cc + ../../arrow/util/basic_decimal.cc) + +if(MSVC) + # clang pretends to be a particular version of MSVC. 191[0-9] is + # Visual Studio 2017, and the standard library uses C++14 features, + # so we have to use that -std version to get the IR compilation to work + if(MSVC_VERSION MATCHES "^191[0-9]$") + set(FMS_COMPATIBILITY 19.10) + else() + message(FATAL_ERROR "Unsupported MSVC_VERSION=${MSVC_VERSION}") + endif() + set(PLATFORM_CLANG_OPTIONS -std=c++14 -fms-compatibility + -fms-compatibility-version=${FMS_COMPATIBILITY}) +else() + set(PLATFORM_CLANG_OPTIONS -std=c++11) +endif() + +# Create bitcode for each of the source files. +foreach(SRC_FILE ${PRECOMPILED_SRCS}) + get_filename_component(SRC_BASE ${SRC_FILE} NAME_WE) + get_filename_component(ABSOLUTE_SRC ${SRC_FILE} ABSOLUTE) + set(BC_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SRC_BASE}.bc) + set(PRECOMPILE_COMMAND) + if(CMAKE_OSX_SYSROOT) + list(APPEND + PRECOMPILE_COMMAND + ${CMAKE_COMMAND} + -E + env + SDKROOT=${CMAKE_OSX_SYSROOT}) + endif() + list(APPEND + PRECOMPILE_COMMAND + ${CLANG_EXECUTABLE} + ${PLATFORM_CLANG_OPTIONS} + -DGANDIVA_IR + -DNDEBUG # DCHECK macros not implemented in precompiled code + -DARROW_STATIC # Do not set __declspec(dllimport) on MSVC on Arrow symbols + -DGANDIVA_STATIC # Do not set __declspec(dllimport) on MSVC on Gandiva symbols + -fno-use-cxa-atexit # Workaround for unresolved __dso_handle + -emit-llvm + -O3 + -c + ${ABSOLUTE_SRC} + -o + ${BC_FILE} + ${ARROW_GANDIVA_PC_CXX_FLAGS} + -I${CMAKE_SOURCE_DIR}/src + -I${ARROW_BINARY_DIR}/src) + + if(NOT ARROW_USE_NATIVE_INT128) + list(APPEND PRECOMPILE_COMMAND -I${Boost_INCLUDE_DIR}) + endif() + add_custom_command(OUTPUT ${BC_FILE} + COMMAND ${PRECOMPILE_COMMAND} + DEPENDS ${SRC_FILE}) + list(APPEND BC_FILES ${BC_FILE}) +endforeach() + +# link all of the bitcode files into a single bitcode file. +add_custom_command(OUTPUT ${GANDIVA_PRECOMPILED_BC_PATH} + COMMAND ${LLVM_LINK_EXECUTABLE} -o ${GANDIVA_PRECOMPILED_BC_PATH} + ${BC_FILES} + DEPENDS ${BC_FILES}) + +# turn the bitcode file into a C++ static data variable. +add_custom_command(OUTPUT ${GANDIVA_PRECOMPILED_CC_PATH} + COMMAND ${PYTHON_EXECUTABLE} + "${CMAKE_CURRENT_SOURCE_DIR}/../make_precompiled_bitcode.py" + ${GANDIVA_PRECOMPILED_CC_IN_PATH} + ${GANDIVA_PRECOMPILED_BC_PATH} ${GANDIVA_PRECOMPILED_CC_PATH} + DEPENDS ${GANDIVA_PRECOMPILED_CC_IN_PATH} + ${GANDIVA_PRECOMPILED_BC_PATH}) + +add_custom_target(precompiled ALL DEPENDS ${GANDIVA_PRECOMPILED_BC_PATH} + ${GANDIVA_PRECOMPILED_CC_PATH}) + +# testing +if(ARROW_BUILD_TESTS) + add_executable(gandiva-precompiled-test + ../context_helper.cc + bitmap_test.cc + bitmap.cc + epoch_time_point_test.cc + time_test.cc + time.cc + timestamp_arithmetic.cc + ../cast_time.cc + ../../arrow/vendored/datetime/tz.cpp + hash_test.cc + hash.cc + string_ops_test.cc + string_ops.cc + arithmetic_ops_test.cc + arithmetic_ops.cc + extended_math_ops_test.cc + extended_math_ops.cc + decimal_ops_test.cc + decimal_ops.cc + ../decimal_type_util.cc + ../decimal_xlarge.cc) + target_include_directories(gandiva-precompiled-test PRIVATE ${CMAKE_SOURCE_DIR}/src) + target_link_libraries(gandiva-precompiled-test PRIVATE ${ARROW_TEST_LINK_LIBS}) + target_compile_definitions(gandiva-precompiled-test PRIVATE GANDIVA_UNIT_TEST=1 + ARROW_STATIC GANDIVA_STATIC) + set(TEST_PATH "${EXECUTABLE_OUTPUT_PATH}/gandiva-precompiled-test") + add_test(gandiva-precompiled-test ${TEST_PATH}) + set_property(TEST gandiva-precompiled-test + APPEND + PROPERTY LABELS "unittest;gandiva-tests") + add_dependencies(gandiva-tests gandiva-precompiled-test) +endif() diff --git a/src/arrow/cpp/src/gandiva/precompiled/arithmetic_ops.cc b/src/arrow/cpp/src/gandiva/precompiled/arithmetic_ops.cc new file mode 100644 index 000000000..c736c38d3 --- /dev/null +++ b/src/arrow/cpp/src/gandiva/precompiled/arithmetic_ops.cc @@ -0,0 +1,274 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +extern "C" { + +#include <math.h> +#include "./types.h" + +// Expand inner macro for all numeric types. +#define NUMERIC_TYPES(INNER, NAME, OP) \ + INNER(NAME, int8, OP) \ + INNER(NAME, int16, OP) \ + INNER(NAME, int32, OP) \ + INNER(NAME, int64, OP) \ + INNER(NAME, uint8, OP) \ + INNER(NAME, uint16, OP) \ + INNER(NAME, uint32, OP) \ + INNER(NAME, uint64, OP) \ + INNER(NAME, float32, OP) \ + INNER(NAME, float64, OP) + +// Expand inner macros for all date/time types. +#define DATE_TYPES(INNER, NAME, OP) \ + INNER(NAME, date64, OP) \ + INNER(NAME, date32, OP) \ + INNER(NAME, timestamp, OP) \ + INNER(NAME, time32, OP) + +#define NUMERIC_DATE_TYPES(INNER, NAME, OP) \ + NUMERIC_TYPES(INNER, NAME, OP) \ + DATE_TYPES(INNER, NAME, OP) + +#define NUMERIC_BOOL_DATE_TYPES(INNER, NAME, OP) \ + NUMERIC_TYPES(INNER, NAME, OP) \ + DATE_TYPES(INNER, NAME, OP) \ + INNER(NAME, boolean, OP) + +#define MOD_OP(NAME, IN_TYPE1, IN_TYPE2, OUT_TYPE) \ + FORCE_INLINE \ + gdv_##OUT_TYPE NAME##_##IN_TYPE1##_##IN_TYPE2(gdv_##IN_TYPE1 left, \ + gdv_##IN_TYPE2 right) { \ + return (right == 0 ? static_cast<gdv_##OUT_TYPE>(left) \ + : static_cast<gdv_##OUT_TYPE>(left % right)); \ + } + +// Symmetric binary fns : left, right params and return type are same. +#define BINARY_SYMMETRIC(NAME, TYPE, OP) \ + FORCE_INLINE \ + gdv_##TYPE NAME##_##TYPE##_##TYPE(gdv_##TYPE left, gdv_##TYPE right) { \ + return static_cast<gdv_##TYPE>(left OP right); \ + } + +NUMERIC_TYPES(BINARY_SYMMETRIC, add, +) +NUMERIC_TYPES(BINARY_SYMMETRIC, subtract, -) +NUMERIC_TYPES(BINARY_SYMMETRIC, multiply, *) +BINARY_SYMMETRIC(bitwise_and, int32, &) +BINARY_SYMMETRIC(bitwise_and, int64, &) +BINARY_SYMMETRIC(bitwise_or, int32, |) +BINARY_SYMMETRIC(bitwise_or, int64, |) +BINARY_SYMMETRIC(bitwise_xor, int32, ^) +BINARY_SYMMETRIC(bitwise_xor, int64, ^) + +#undef BINARY_SYMMETRIC + +MOD_OP(mod, int64, int32, int32) +MOD_OP(mod, int64, int64, int64) + +#undef MOD_OP + +gdv_float64 mod_float64_float64(int64_t context, gdv_float64 x, gdv_float64 y) { + if (y == 0.0) { + char const* err_msg = "divide by zero error"; + gdv_fn_context_set_error_msg(context, err_msg); + return 0.0; + } + return fmod(x, y); +} + +// Relational binary fns : left, right params are same, return is bool. +#define BINARY_RELATIONAL(NAME, TYPE, OP) \ + FORCE_INLINE \ + bool NAME##_##TYPE##_##TYPE(gdv_##TYPE left, gdv_##TYPE right) { return left OP right; } + +NUMERIC_BOOL_DATE_TYPES(BINARY_RELATIONAL, equal, ==) +NUMERIC_BOOL_DATE_TYPES(BINARY_RELATIONAL, not_equal, !=) +NUMERIC_DATE_TYPES(BINARY_RELATIONAL, less_than, <) +NUMERIC_DATE_TYPES(BINARY_RELATIONAL, less_than_or_equal_to, <=) +NUMERIC_DATE_TYPES(BINARY_RELATIONAL, greater_than, >) +NUMERIC_DATE_TYPES(BINARY_RELATIONAL, greater_than_or_equal_to, >=) + +#undef BINARY_RELATIONAL + +// cast fns : takes one param type, returns another type. +#define CAST_UNARY(NAME, IN_TYPE, OUT_TYPE) \ + FORCE_INLINE \ + gdv_##OUT_TYPE NAME##_##IN_TYPE(gdv_##IN_TYPE in) { \ + return static_cast<gdv_##OUT_TYPE>(in); \ + } + +CAST_UNARY(castBIGINT, int32, int64) +CAST_UNARY(castINT, int64, int32) +CAST_UNARY(castFLOAT4, int32, float32) +CAST_UNARY(castFLOAT4, int64, float32) +CAST_UNARY(castFLOAT8, int32, float64) +CAST_UNARY(castFLOAT8, int64, float64) +CAST_UNARY(castFLOAT8, float32, float64) +CAST_UNARY(castFLOAT4, float64, float32) + +#undef CAST_UNARY + +// cast float types to int types. +#define CAST_INT_FLOAT(NAME, IN_TYPE, OUT_TYPE) \ + FORCE_INLINE \ + gdv_##OUT_TYPE NAME##_##IN_TYPE(gdv_##IN_TYPE in) { \ + gdv_##OUT_TYPE out = static_cast<gdv_##OUT_TYPE>(round(in)); \ + return out; \ + } + +CAST_INT_FLOAT(castBIGINT, float32, int64) +CAST_INT_FLOAT(castBIGINT, float64, int64) +CAST_INT_FLOAT(castINT, float32, int32) +CAST_INT_FLOAT(castINT, float64, int32) + +#undef CAST_INT_FLOAT + +// simple nullable functions, result value = fn(input validity) +#define VALIDITY_OP(NAME, TYPE, OP) \ + FORCE_INLINE \ + bool NAME##_##TYPE(gdv_##TYPE in, gdv_boolean is_valid) { return OP is_valid; } + +NUMERIC_BOOL_DATE_TYPES(VALIDITY_OP, isnull, !) +NUMERIC_BOOL_DATE_TYPES(VALIDITY_OP, isnotnull, +) +NUMERIC_TYPES(VALIDITY_OP, isnumeric, +) + +#undef VALIDITY_OP + +#define NUMERIC_FUNCTION(INNER) \ + INNER(int8) \ + INNER(int16) \ + INNER(int32) \ + INNER(int64) \ + INNER(uint8) \ + INNER(uint16) \ + INNER(uint32) \ + INNER(uint64) \ + INNER(float32) \ + INNER(float64) + +#define DATE_FUNCTION(INNER) \ + INNER(date32) \ + INNER(date64) \ + INNER(timestamp) \ + INNER(time32) + +#define NUMERIC_BOOL_DATE_FUNCTION(INNER) \ + NUMERIC_FUNCTION(INNER) \ + DATE_FUNCTION(INNER) \ + INNER(boolean) + +FORCE_INLINE +gdv_boolean not_boolean(gdv_boolean in) { return !in; } + +// is_distinct_from +#define IS_DISTINCT_FROM(TYPE) \ + FORCE_INLINE \ + bool is_distinct_from_##TYPE##_##TYPE(gdv_##TYPE in1, gdv_boolean is_valid1, \ + gdv_##TYPE in2, gdv_boolean is_valid2) { \ + if (is_valid1 != is_valid2) { \ + return true; \ + } \ + if (!is_valid1) { \ + return false; \ + } \ + return in1 != in2; \ + } + +// is_not_distinct_from +#define IS_NOT_DISTINCT_FROM(TYPE) \ + FORCE_INLINE \ + bool is_not_distinct_from_##TYPE##_##TYPE(gdv_##TYPE in1, gdv_boolean is_valid1, \ + gdv_##TYPE in2, gdv_boolean is_valid2) { \ + if (is_valid1 != is_valid2) { \ + return false; \ + } \ + if (!is_valid1) { \ + return true; \ + } \ + return in1 == in2; \ + } + +NUMERIC_BOOL_DATE_FUNCTION(IS_DISTINCT_FROM) +NUMERIC_BOOL_DATE_FUNCTION(IS_NOT_DISTINCT_FROM) + +#undef IS_DISTINCT_FROM +#undef IS_NOT_DISTINCT_FROM + +#define DIVIDE(TYPE) \ + FORCE_INLINE \ + gdv_##TYPE divide_##TYPE##_##TYPE(gdv_int64 context, gdv_##TYPE in1, gdv_##TYPE in2) { \ + if (in2 == 0) { \ + char const* err_msg = "divide by zero error"; \ + gdv_fn_context_set_error_msg(context, err_msg); \ + return 0; \ + } \ + return static_cast<gdv_##TYPE>(in1 / in2); \ + } + +NUMERIC_FUNCTION(DIVIDE) + +#undef DIVIDE + +#define DIV(TYPE) \ + FORCE_INLINE \ + gdv_##TYPE div_##TYPE##_##TYPE(gdv_int64 context, gdv_##TYPE in1, gdv_##TYPE in2) { \ + if (in2 == 0) { \ + char const* err_msg = "divide by zero error"; \ + gdv_fn_context_set_error_msg(context, err_msg); \ + return 0; \ + } \ + return static_cast<gdv_##TYPE>(in1 / in2); \ + } + +DIV(int32) +DIV(int64) + +#undef DIV + +#define DIV_FLOAT(TYPE) \ + FORCE_INLINE \ + gdv_##TYPE div_##TYPE##_##TYPE(gdv_int64 context, gdv_##TYPE in1, gdv_##TYPE in2) { \ + if (in2 == 0) { \ + char const* err_msg = "divide by zero error"; \ + gdv_fn_context_set_error_msg(context, err_msg); \ + return 0; \ + } \ + return static_cast<gdv_##TYPE>(::trunc(in1 / in2)); \ + } + +DIV_FLOAT(float32) +DIV_FLOAT(float64) + +#undef DIV_FLOAT + +#define BITWISE_NOT(TYPE) \ + FORCE_INLINE \ + gdv_##TYPE bitwise_not_##TYPE(gdv_##TYPE in) { return static_cast<gdv_##TYPE>(~in); } + +BITWISE_NOT(int32) +BITWISE_NOT(int64) + +#undef BITWISE_NOT + +#undef DATE_FUNCTION +#undef DATE_TYPES +#undef NUMERIC_BOOL_DATE_TYPES +#undef NUMERIC_DATE_TYPES +#undef NUMERIC_FUNCTION +#undef NUMERIC_TYPES + +} // extern "C" diff --git a/src/arrow/cpp/src/gandiva/precompiled/arithmetic_ops_test.cc b/src/arrow/cpp/src/gandiva/precompiled/arithmetic_ops_test.cc new file mode 100644 index 000000000..36b50bcfd --- /dev/null +++ b/src/arrow/cpp/src/gandiva/precompiled/arithmetic_ops_test.cc @@ -0,0 +1,180 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gmock/gmock.h> +#include <gtest/gtest.h> +#include "../execution_context.h" +#include "gandiva/precompiled/types.h" + +namespace gandiva { + +TEST(TestArithmeticOps, TestIsDistinctFrom) { + EXPECT_EQ(is_distinct_from_timestamp_timestamp(1000, true, 1000, false), true); + EXPECT_EQ(is_distinct_from_timestamp_timestamp(1000, false, 1000, true), true); + EXPECT_EQ(is_distinct_from_timestamp_timestamp(1000, false, 1000, false), false); + EXPECT_EQ(is_distinct_from_timestamp_timestamp(1000, true, 1000, true), false); + + EXPECT_EQ(is_not_distinct_from_int32_int32(1000, true, 1000, false), false); + EXPECT_EQ(is_not_distinct_from_int32_int32(1000, false, 1000, true), false); + EXPECT_EQ(is_not_distinct_from_int32_int32(1000, false, 1000, false), true); + EXPECT_EQ(is_not_distinct_from_int32_int32(1000, true, 1000, true), true); +} + +TEST(TestArithmeticOps, TestMod) { + gandiva::ExecutionContext context; + EXPECT_EQ(mod_int64_int32(10, 0), 10); + + const double acceptable_abs_error = 0.00000000001; // 1e-10 + + EXPECT_DOUBLE_EQ(mod_float64_float64(reinterpret_cast<gdv_int64>(&context), 2.5, 0.0), + 0.0); + EXPECT_TRUE(context.has_error()); + EXPECT_EQ(context.get_error(), "divide by zero error"); + + context.Reset(); + EXPECT_NEAR(mod_float64_float64(reinterpret_cast<gdv_int64>(&context), 2.5, 1.2), 0.1, + acceptable_abs_error); + EXPECT_FALSE(context.has_error()); + + context.Reset(); + EXPECT_DOUBLE_EQ(mod_float64_float64(reinterpret_cast<gdv_int64>(&context), 2.5, 2.5), + 0.0); + EXPECT_FALSE(context.has_error()); + + context.Reset(); + EXPECT_NEAR(mod_float64_float64(reinterpret_cast<gdv_int64>(&context), 9.2, 3.7), 1.8, + acceptable_abs_error); + EXPECT_FALSE(context.has_error()); +} + +TEST(TestArithmeticOps, TestDivide) { + gandiva::ExecutionContext context; + EXPECT_EQ(divide_int64_int64(reinterpret_cast<gdv_int64>(&context), 10, 0), 0); + EXPECT_EQ(context.has_error(), true); + EXPECT_EQ(context.get_error(), "divide by zero error"); + + context.Reset(); + EXPECT_EQ(divide_int64_int64(reinterpret_cast<gdv_int64>(&context), 10, 2), 5); + EXPECT_EQ(context.has_error(), false); +} + +TEST(TestArithmeticOps, TestDiv) { + gandiva::ExecutionContext context; + EXPECT_EQ(div_int64_int64(reinterpret_cast<gdv_int64>(&context), 101, 0), 0); + EXPECT_EQ(context.has_error(), true); + EXPECT_EQ(context.get_error(), "divide by zero error"); + context.Reset(); + + EXPECT_EQ(div_int64_int64(reinterpret_cast<gdv_int64>(&context), 101, 111), 0); + EXPECT_EQ(context.has_error(), false); + context.Reset(); + + EXPECT_EQ(div_float64_float64(reinterpret_cast<gdv_int64>(&context), 1010.1010, 2.1), + 481.0); + EXPECT_EQ(context.has_error(), false); + context.Reset(); + + EXPECT_EQ( + div_float64_float64(reinterpret_cast<gdv_int64>(&context), 1010.1010, 0.00000), + 0.0); + EXPECT_EQ(context.has_error(), true); + EXPECT_EQ(context.get_error(), "divide by zero error"); + context.Reset(); + + EXPECT_EQ(div_float32_float32(reinterpret_cast<gdv_int64>(&context), 1010.1010f, 2.1f), + 481.0f); + EXPECT_EQ(context.has_error(), false); + context.Reset(); +} + +TEST(TestArithmeticOps, TestBitwiseOps) { + // bitwise AND + EXPECT_EQ(bitwise_and_int32_int32(0x0147D, 0x17159), 0x01059); + EXPECT_EQ(bitwise_and_int32_int32(0xFFFFFFCC, 0x00000297), 0x00000284); + EXPECT_EQ(bitwise_and_int32_int32(0x000, 0x285), 0x000); + EXPECT_EQ(bitwise_and_int64_int64(0x563672F83, 0x0D9FCF85B), 0x041642803); + EXPECT_EQ(bitwise_and_int64_int64(0xFFFFFFFFFFDA8F6A, 0xFFFFFFFFFFFF791C), + 0xFFFFFFFFFFDA0908); + EXPECT_EQ(bitwise_and_int64_int64(0x6A5B1, 0x00000), 0x00000); + + // bitwise OR + EXPECT_EQ(bitwise_or_int32_int32(0x0147D, 0x17159), 0x1757D); + EXPECT_EQ(bitwise_or_int32_int32(0xFFFFFFCC, 0x00000297), 0xFFFFFFDF); + EXPECT_EQ(bitwise_or_int32_int32(0x000, 0x285), 0x285); + EXPECT_EQ(bitwise_or_int64_int64(0x563672F83, 0x0D9FCF85B), 0x5FBFFFFDB); + EXPECT_EQ(bitwise_or_int64_int64(0xFFFFFFFFFFDA8F6A, 0xFFFFFFFFFFFF791C), + 0xFFFFFFFFFFFFFF7E); + EXPECT_EQ(bitwise_or_int64_int64(0x6A5B1, 0x00000), 0x6A5B1); + + // bitwise XOR + EXPECT_EQ(bitwise_xor_int32_int32(0x0147D, 0x17159), 0x16524); + EXPECT_EQ(bitwise_xor_int32_int32(0xFFFFFFCC, 0x00000297), 0XFFFFFD5B); + EXPECT_EQ(bitwise_xor_int32_int32(0x000, 0x285), 0x285); + EXPECT_EQ(bitwise_xor_int64_int64(0x563672F83, 0x0D9FCF85B), 0x5BA9BD7D8); + EXPECT_EQ(bitwise_xor_int64_int64(0xFFFFFFFFFFDA8F6A, 0xFFFFFFFFFFFF791C), 0X25F676); + EXPECT_EQ(bitwise_xor_int64_int64(0x6A5B1, 0x00000), 0x6A5B1); + EXPECT_EQ(bitwise_xor_int64_int64(0x6A5B1, 0x6A5B1), 0x00000); + + // bitwise NOT + EXPECT_EQ(bitwise_not_int32(0x00017159), 0xFFFE8EA6); + EXPECT_EQ(bitwise_not_int32(0xFFFFF226), 0x00000DD9); + EXPECT_EQ(bitwise_not_int64(0x000000008BCAE9B4), 0xFFFFFFFF7435164B); + EXPECT_EQ(bitwise_not_int64(0xFFFFFF966C8D7997), 0x0000006993728668); + EXPECT_EQ(bitwise_not_int64(0x0000000000000000), 0xFFFFFFFFFFFFFFFF); +} + +TEST(TestArithmeticOps, TestIntCastFloatDouble) { + // castINT from floats + EXPECT_EQ(castINT_float32(6.6f), 7); + EXPECT_EQ(castINT_float32(-6.6f), -7); + EXPECT_EQ(castINT_float32(-6.3f), -6); + EXPECT_EQ(castINT_float32(0.0f), 0); + EXPECT_EQ(castINT_float32(-0), 0); + + // castINT from doubles + EXPECT_EQ(castINT_float64(6.6), 7); + EXPECT_EQ(castINT_float64(-6.6), -7); + EXPECT_EQ(castINT_float64(-6.3), -6); + EXPECT_EQ(castINT_float64(0.0), 0); + EXPECT_EQ(castINT_float64(-0), 0); + EXPECT_EQ(castINT_float64(999999.99999999999999999999999), 1000000); + EXPECT_EQ(castINT_float64(-999999.99999999999999999999999), -1000000); + EXPECT_EQ(castINT_float64(INT32_MAX), 2147483647); + EXPECT_EQ(castINT_float64(-2147483647), -2147483647); +} + +TEST(TestArithmeticOps, TestBigIntCastFloatDouble) { + // castINT from floats + EXPECT_EQ(castBIGINT_float32(6.6f), 7); + EXPECT_EQ(castBIGINT_float32(-6.6f), -7); + EXPECT_EQ(castBIGINT_float32(-6.3f), -6); + EXPECT_EQ(castBIGINT_float32(0.0f), 0); + EXPECT_EQ(castBIGINT_float32(-0), 0); + + // castINT from doubles + EXPECT_EQ(castBIGINT_float64(6.6), 7); + EXPECT_EQ(castBIGINT_float64(-6.6), -7); + EXPECT_EQ(castBIGINT_float64(-6.3), -6); + EXPECT_EQ(castBIGINT_float64(0.0), 0); + EXPECT_EQ(castBIGINT_float64(-0), 0); + EXPECT_EQ(castBIGINT_float64(999999.99999999999999999999999), 1000000); + EXPECT_EQ(castBIGINT_float64(-999999.99999999999999999999999), -1000000); + EXPECT_EQ(castBIGINT_float64(INT32_MAX), 2147483647); + EXPECT_EQ(castBIGINT_float64(-2147483647), -2147483647); +} + +} // namespace gandiva diff --git a/src/arrow/cpp/src/gandiva/precompiled/bitmap.cc b/src/arrow/cpp/src/gandiva/precompiled/bitmap.cc new file mode 100644 index 000000000..332f08dbe --- /dev/null +++ b/src/arrow/cpp/src/gandiva/precompiled/bitmap.cc @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// BitMap functions + +#include "arrow/util/bit_util.h" + +extern "C" { + +#include "./types.h" + +#define BITS_TO_BYTES(x) ((x + 7) / 8) +#define BITS_TO_WORDS(x) ((x + 63) / 64) + +#define POS_TO_BYTE_INDEX(p) (p / 8) +#define POS_TO_BIT_INDEX(p) (p % 8) + +FORCE_INLINE +bool bitMapGetBit(const uint8_t* bmap, int64_t position) { + return arrow::BitUtil::GetBit(bmap, position); +} + +FORCE_INLINE +bool bitMapValidityGetBit(const uint8_t* bmap, int64_t position) { + if (bmap == nullptr) { + // if validity bitmap is null, all entries are valid. + return true; + } else { + return bitMapGetBit(bmap, position); + } +} + +FORCE_INLINE +void bitMapSetBit(uint8_t* bmap, int64_t position, bool value) { + arrow::BitUtil::SetBitTo(bmap, position, value); +} + +// Clear the bit if value = false. Does nothing if value = true. +FORCE_INLINE +void bitMapClearBitIfFalse(uint8_t* bmap, int64_t position, bool value) { + if (!value) { + arrow::BitUtil::ClearBit(bmap, position); + } +} + +} // extern "C" diff --git a/src/arrow/cpp/src/gandiva/precompiled/bitmap_test.cc b/src/arrow/cpp/src/gandiva/precompiled/bitmap_test.cc new file mode 100644 index 000000000..ac3084ade --- /dev/null +++ b/src/arrow/cpp/src/gandiva/precompiled/bitmap_test.cc @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gtest/gtest.h> +#include "gandiva/precompiled/types.h" + +namespace gandiva { + +TEST(TestBitMap, TestSimple) { + static const int kNumBytes = 16; + uint8_t bit_map[kNumBytes]; + memset(bit_map, 0, kNumBytes); + + EXPECT_EQ(bitMapGetBit(bit_map, 100), false); + + // set 100th bit and verify + bitMapSetBit(bit_map, 100, true); + EXPECT_EQ(bitMapGetBit(bit_map, 100), true); + + // clear 100th bit and verify + bitMapSetBit(bit_map, 100, false); + EXPECT_EQ(bitMapGetBit(bit_map, 100), false); +} + +TEST(TestBitMap, TestClearIfFalse) { + static const int kNumBytes = 32; + uint8_t bit_map[kNumBytes]; + memset(bit_map, 0, kNumBytes); + + bitMapSetBit(bit_map, 24, true); + + // bit should remain unchanged. + bitMapClearBitIfFalse(bit_map, 24, true); + EXPECT_EQ(bitMapGetBit(bit_map, 24), true); + + // bit should be cleared. + bitMapClearBitIfFalse(bit_map, 24, false); + EXPECT_EQ(bitMapGetBit(bit_map, 24), false); + + // this function should have no impact if the bit is already clear. + bitMapClearBitIfFalse(bit_map, 24, true); + EXPECT_EQ(bitMapGetBit(bit_map, 24), false); + + bitMapClearBitIfFalse(bit_map, 24, false); + EXPECT_EQ(bitMapGetBit(bit_map, 24), false); +} + +} // namespace gandiva diff --git a/src/arrow/cpp/src/gandiva/precompiled/decimal_ops.cc b/src/arrow/cpp/src/gandiva/precompiled/decimal_ops.cc new file mode 100644 index 000000000..61cac6062 --- /dev/null +++ b/src/arrow/cpp/src/gandiva/precompiled/decimal_ops.cc @@ -0,0 +1,723 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Algorithms adapted from Apache Impala + +#include "gandiva/precompiled/decimal_ops.h" + +#include <algorithm> +#include <cmath> +#include <limits> + +#include "arrow/util/logging.h" +#include "gandiva/decimal_type_util.h" +#include "gandiva/decimal_xlarge.h" +#include "gandiva/gdv_function_stubs.h" + +// Several operations (multiply, divide, mod, ..) require converting to 256-bit, and we +// use the boost library for doing 256-bit operations. To avoid references to boost from +// the precompiled-to-ir code (this causes issues with symbol resolution at runtime), we +// use a wrapper exported from the CPP code. The wrapper functions are named gdv_xlarge_xx + +namespace gandiva { +namespace decimalops { + +using arrow::BasicDecimal128; + +static BasicDecimal128 CheckAndIncreaseScale(const BasicDecimal128& in, int32_t delta) { + return (delta <= 0) ? in : in.IncreaseScaleBy(delta); +} + +static BasicDecimal128 CheckAndReduceScale(const BasicDecimal128& in, int32_t delta) { + return (delta <= 0) ? in : in.ReduceScaleBy(delta); +} + +/// Adjust x and y to the same scale, and add them. +static BasicDecimal128 AddFastPath(const BasicDecimalScalar128& x, + const BasicDecimalScalar128& y, int32_t out_scale) { + auto higher_scale = std::max(x.scale(), y.scale()); + + auto x_scaled = CheckAndIncreaseScale(x.value(), higher_scale - x.scale()); + auto y_scaled = CheckAndIncreaseScale(y.value(), higher_scale - y.scale()); + return x_scaled + y_scaled; +} + +/// Add x and y, caller has ensured there can be no overflow. +static BasicDecimal128 AddNoOverflow(const BasicDecimalScalar128& x, + const BasicDecimalScalar128& y, int32_t out_scale) { + auto higher_scale = std::max(x.scale(), y.scale()); + auto sum = AddFastPath(x, y, out_scale); + return CheckAndReduceScale(sum, higher_scale - out_scale); +} + +/// Both x_value and y_value must be >= 0 +static BasicDecimal128 AddLargePositive(const BasicDecimalScalar128& x, + const BasicDecimalScalar128& y, + int32_t out_scale) { + DCHECK_GE(x.value(), 0); + DCHECK_GE(y.value(), 0); + + // separate out whole/fractions. + BasicDecimal128 x_left, x_right, y_left, y_right; + x.value().GetWholeAndFraction(x.scale(), &x_left, &x_right); + y.value().GetWholeAndFraction(y.scale(), &y_left, &y_right); + + // Adjust fractional parts to higher scale. + auto higher_scale = std::max(x.scale(), y.scale()); + auto x_right_scaled = CheckAndIncreaseScale(x_right, higher_scale - x.scale()); + auto y_right_scaled = CheckAndIncreaseScale(y_right, higher_scale - y.scale()); + + BasicDecimal128 right; + BasicDecimal128 carry_to_left; + auto multiplier = BasicDecimal128::GetScaleMultiplier(higher_scale); + if (x_right_scaled >= multiplier - y_right_scaled) { + right = x_right_scaled - (multiplier - y_right_scaled); + carry_to_left = 1; + } else { + right = x_right_scaled + y_right_scaled; + carry_to_left = 0; + } + right = CheckAndReduceScale(right, higher_scale - out_scale); + + auto left = x_left + y_left + carry_to_left; + return (left * BasicDecimal128::GetScaleMultiplier(out_scale)) + right; +} + +/// x_value and y_value cannot be 0, and one must be positive and the other negative. +static BasicDecimal128 AddLargeNegative(const BasicDecimalScalar128& x, + const BasicDecimalScalar128& y, + int32_t out_scale) { + DCHECK_NE(x.value(), 0); + DCHECK_NE(y.value(), 0); + DCHECK((x.value() < 0 && y.value() > 0) || (x.value() > 0 && y.value() < 0)); + + // separate out whole/fractions. + BasicDecimal128 x_left, x_right, y_left, y_right; + x.value().GetWholeAndFraction(x.scale(), &x_left, &x_right); + y.value().GetWholeAndFraction(y.scale(), &y_left, &y_right); + + // Adjust fractional parts to higher scale. + auto higher_scale = std::max(x.scale(), y.scale()); + x_right = CheckAndIncreaseScale(x_right, higher_scale - x.scale()); + y_right = CheckAndIncreaseScale(y_right, higher_scale - y.scale()); + + // Overflow not possible because one is +ve and the other is -ve. + auto left = x_left + y_left; + auto right = x_right + y_right; + + // If the whole and fractional parts have different signs, then we need to make the + // fractional part have the same sign as the whole part. If either left or right is + // zero, then nothing needs to be done. + if (left < 0 && right > 0) { + left += 1; + right -= BasicDecimal128::GetScaleMultiplier(higher_scale); + } else if (left > 0 && right < 0) { + left -= 1; + right += BasicDecimal128::GetScaleMultiplier(higher_scale); + } + right = CheckAndReduceScale(right, higher_scale - out_scale); + return (left * BasicDecimal128::GetScaleMultiplier(out_scale)) + right; +} + +static BasicDecimal128 AddLarge(const BasicDecimalScalar128& x, + const BasicDecimalScalar128& y, int32_t out_scale) { + if (x.value() >= 0 && y.value() >= 0) { + // both positive or 0 + return AddLargePositive(x, y, out_scale); + } else if (x.value() <= 0 && y.value() <= 0) { + // both negative or 0 + BasicDecimalScalar128 x_neg(-x.value(), x.precision(), x.scale()); + BasicDecimalScalar128 y_neg(-y.value(), y.precision(), y.scale()); + return -AddLargePositive(x_neg, y_neg, out_scale); + } else { + // one positive and the other negative + return AddLargeNegative(x, y, out_scale); + } +} + +// Suppose we have a number that requires x bits to be represented and we scale it up by +// 10^scale_by. Let's say now y bits are required to represent it. This function returns +// the maximum possible y - x for a given 'scale_by'. +inline int32_t MaxBitsRequiredIncreaseAfterScaling(int32_t scale_by) { + // We rely on the following formula: + // bits_required(x * 10^y) <= bits_required(x) + floor(log2(10^y)) + 1 + // We precompute floor(log2(10^x)) + 1 for x = 0, 1, 2...75, 76 + DCHECK_GE(scale_by, 0); + DCHECK_LE(scale_by, 76); + static const int32_t floor_log2_plus_one[] = { + 0, 4, 7, 10, 14, 17, 20, 24, 27, 30, 34, 37, 40, 44, 47, 50, + 54, 57, 60, 64, 67, 70, 74, 77, 80, 84, 87, 90, 94, 97, 100, 103, + 107, 110, 113, 117, 120, 123, 127, 130, 133, 137, 140, 143, 147, 150, 153, 157, + 160, 163, 167, 170, 173, 177, 180, 183, 187, 190, 193, 196, 200, 203, 206, 210, + 213, 216, 220, 223, 226, 230, 233, 236, 240, 243, 246, 250, 253}; + return floor_log2_plus_one[scale_by]; +} + +// If we have a number with 'num_lz' leading zeros, and we scale it up by 10^scale_by, +// this function returns the minimum number of leading zeros the result can have. +inline int32_t MinLeadingZerosAfterScaling(int32_t num_lz, int32_t scale_by) { + DCHECK_GE(scale_by, 0); + DCHECK_LE(scale_by, 76); + int32_t result = num_lz - MaxBitsRequiredIncreaseAfterScaling(scale_by); + return result; +} + +// Returns the maximum possible number of bits required to represent num * 10^scale_by. +inline int32_t MaxBitsRequiredAfterScaling(const BasicDecimalScalar128& num, + int32_t scale_by) { + auto value = num.value(); + auto value_abs = value.Abs(); + + int32_t num_occupied = 128 - value_abs.CountLeadingBinaryZeros(); + DCHECK_GE(scale_by, 0); + DCHECK_LE(scale_by, 76); + return num_occupied + MaxBitsRequiredIncreaseAfterScaling(scale_by); +} + +// Returns the minimum number of leading zero x or y would have after one of them gets +// scaled up to match the scale of the other one. +inline int32_t MinLeadingZeros(const BasicDecimalScalar128& x, + const BasicDecimalScalar128& y) { + auto x_value = x.value(); + auto x_value_abs = x_value.Abs(); + + auto y_value = y.value(); + auto y_value_abs = y_value.Abs(); + + int32_t x_lz = x_value_abs.CountLeadingBinaryZeros(); + int32_t y_lz = y_value_abs.CountLeadingBinaryZeros(); + if (x.scale() < y.scale()) { + x_lz = MinLeadingZerosAfterScaling(x_lz, y.scale() - x.scale()); + } else if (x.scale() > y.scale()) { + y_lz = MinLeadingZerosAfterScaling(y_lz, x.scale() - y.scale()); + } + return std::min(x_lz, y_lz); +} + +BasicDecimal128 Add(const BasicDecimalScalar128& x, const BasicDecimalScalar128& y, + int32_t out_precision, int32_t out_scale) { + if (out_precision < DecimalTypeUtil::kMaxPrecision) { + // fast-path add + return AddFastPath(x, y, out_scale); + } else { + int32_t min_lz = MinLeadingZeros(x, y); + if (min_lz >= 3) { + // If both numbers have at least MIN_LZ leading zeros, we can add them directly + // without the risk of overflow. + // We want the result to have at least 2 leading zeros, which ensures that it fits + // into the maximum decimal because 2^126 - 1 < 10^38 - 1. If both x and y have at + // least 3 leading zeros, then we are guaranteed that the result will have at lest 2 + // leading zeros. + return AddNoOverflow(x, y, out_scale); + } else { + // slower-version : add whole/fraction parts separately, and then, combine. + return AddLarge(x, y, out_scale); + } + } +} + +BasicDecimal128 Subtract(const BasicDecimalScalar128& x, const BasicDecimalScalar128& y, + int32_t out_precision, int32_t out_scale) { + return Add(x, {-y.value(), y.precision(), y.scale()}, out_precision, out_scale); +} + +// Multiply when the out_precision is 38, and there is no trimming of the scale i.e +// the intermediate value is the same as the final value. +static BasicDecimal128 MultiplyMaxPrecisionNoScaleDown(const BasicDecimalScalar128& x, + const BasicDecimalScalar128& y, + int32_t out_scale, + bool* overflow) { + DCHECK_EQ(x.scale() + y.scale(), out_scale); + + BasicDecimal128 result; + auto x_abs = BasicDecimal128::Abs(x.value()); + auto y_abs = BasicDecimal128::Abs(y.value()); + + if (x_abs > BasicDecimal128::GetMaxValue() / y_abs) { + *overflow = true; + } else { + // We've verified that the result will fit into 128 bits. + *overflow = false; + result = x.value() * y.value(); + } + return result; +} + +// Multiply when the out_precision is 38, and there is trimming of the scale i.e +// the intermediate value could be larger than the final value. +static BasicDecimal128 MultiplyMaxPrecisionAndScaleDown(const BasicDecimalScalar128& x, + const BasicDecimalScalar128& y, + int32_t out_scale, + bool* overflow) { + auto delta_scale = x.scale() + y.scale() - out_scale; + DCHECK_GT(delta_scale, 0); + + *overflow = false; + BasicDecimal128 result; + auto x_abs = BasicDecimal128::Abs(x.value()); + auto y_abs = BasicDecimal128::Abs(y.value()); + + // It's possible that the intermediate value does not fit in 128-bits, but the + // final value will (after scaling down). + bool needs_int256 = false; + int32_t total_leading_zeros = + x_abs.CountLeadingBinaryZeros() + y_abs.CountLeadingBinaryZeros(); + // This check is quick, but conservative. In some cases it will indicate that + // converting to 256 bits is necessary, when it's not actually the case. + needs_int256 = total_leading_zeros <= 128; + if (ARROW_PREDICT_FALSE(needs_int256)) { + int64_t result_high; + uint64_t result_low; + + // This requires converting to 256-bit, and we use the boost library for that. To + // avoid references to boost from the precompiled-to-ir code (this causes issues + // with symbol resolution at runtime), we use a wrapper exported from the CPP code. + gdv_xlarge_multiply_and_scale_down(x.value().high_bits(), x.value().low_bits(), + y.value().high_bits(), y.value().low_bits(), + delta_scale, &result_high, &result_low, overflow); + result = BasicDecimal128(result_high, result_low); + } else { + if (ARROW_PREDICT_TRUE(delta_scale <= 38)) { + // The largest value that result can have here is (2^64 - 1) * (2^63 - 1), which is + // greater than BasicDecimal128::kMaxValue. + result = x.value() * y.value(); + // Since delta_scale is greater than zero, result can now be at most + // ((2^64 - 1) * (2^63 - 1)) / 10, which is less than BasicDecimal128::kMaxValue, so + // there cannot be any overflow. + result = result.ReduceScaleBy(delta_scale); + } else { + // We are multiplying decimal(38, 38) by decimal(38, 38). The result should be a + // decimal(38, 37), so delta scale = 38 + 38 - 37 = 39. Since we are not in the + // 256 bit intermediate value case and we are scaling down by 39, then we are + // guaranteed that the result is 0 (even if we try to round). The largest possible + // intermediate result is 38 "9"s. If we scale down by 39, the leftmost 9 is now + // two digits to the right of the rightmost "visible" one. The reason why we have + // to handle this case separately is because a scale multiplier with a delta_scale + // 39 does not fit into 128 bit. + DCHECK_EQ(delta_scale, 39); + result = 0; + } + } + return result; +} + +// Multiply when the out_precision is 38. +static BasicDecimal128 MultiplyMaxPrecision(const BasicDecimalScalar128& x, + const BasicDecimalScalar128& y, + int32_t out_scale, bool* overflow) { + auto delta_scale = x.scale() + y.scale() - out_scale; + DCHECK_GE(delta_scale, 0); + if (delta_scale == 0) { + return MultiplyMaxPrecisionNoScaleDown(x, y, out_scale, overflow); + } else { + return MultiplyMaxPrecisionAndScaleDown(x, y, out_scale, overflow); + } +} + +BasicDecimal128 Multiply(const BasicDecimalScalar128& x, const BasicDecimalScalar128& y, + int32_t out_precision, int32_t out_scale, bool* overflow) { + BasicDecimal128 result; + *overflow = false; + if (out_precision < DecimalTypeUtil::kMaxPrecision) { + // fast-path multiply + result = x.value() * y.value(); + DCHECK_EQ(x.scale() + y.scale(), out_scale); + DCHECK_LE(BasicDecimal128::Abs(result), BasicDecimal128::GetMaxValue()); + } else if (x.value() == 0 || y.value() == 0) { + // Handle this separately to avoid divide-by-zero errors. + result = BasicDecimal128(0, 0); + } else { + result = MultiplyMaxPrecision(x, y, out_scale, overflow); + } + DCHECK(*overflow || BasicDecimal128::Abs(result) <= BasicDecimal128::GetMaxValue()); + return result; +} + +BasicDecimal128 Divide(int64_t context, const BasicDecimalScalar128& x, + const BasicDecimalScalar128& y, int32_t out_precision, + int32_t out_scale, bool* overflow) { + if (y.value() == 0) { + char const* err_msg = "divide by zero error"; + gdv_fn_context_set_error_msg(context, err_msg); + return 0; + } + + // scale up to the output scale, and do an integer division. + int32_t delta_scale = out_scale + y.scale() - x.scale(); + DCHECK_GE(delta_scale, 0); + + BasicDecimal128 result; + auto num_bits_required_after_scaling = MaxBitsRequiredAfterScaling(x, delta_scale); + if (num_bits_required_after_scaling <= 127) { + // fast-path. The dividend fits in 128-bit after scaling too. + *overflow = false; + + // do the division. + auto x_scaled = CheckAndIncreaseScale(x.value(), delta_scale); + BasicDecimal128 remainder; + auto status = x_scaled.Divide(y.value(), &result, &remainder); + DCHECK_EQ(status, arrow::DecimalStatus::kSuccess); + + // round-up + if (BasicDecimal128::Abs(2 * remainder) >= BasicDecimal128::Abs(y.value())) { + result += (x.value().Sign() ^ y.value().Sign()) + 1; + } + } else { + // convert to 256-bit and do the divide. + *overflow = delta_scale > 38 && num_bits_required_after_scaling > 255; + if (!*overflow) { + int64_t result_high; + uint64_t result_low; + + gdv_xlarge_scale_up_and_divide(x.value().high_bits(), x.value().low_bits(), + y.value().high_bits(), y.value().low_bits(), + delta_scale, &result_high, &result_low, overflow); + result = BasicDecimal128(result_high, result_low); + } + } + return result; +} + +BasicDecimal128 Mod(int64_t context, const BasicDecimalScalar128& x, + const BasicDecimalScalar128& y, int32_t out_precision, + int32_t out_scale, bool* overflow) { + if (y.value() == 0) { + char const* err_msg = "divide by zero error"; + gdv_fn_context_set_error_msg(context, err_msg); + return 0; + } + + // Adsjust x and y to the same scale (higher one), and then, do a integer mod. + *overflow = false; + BasicDecimal128 result; + int32_t min_lz = MinLeadingZeros(x, y); + if (min_lz >= 2) { + auto higher_scale = std::max(x.scale(), y.scale()); + auto x_scaled = CheckAndIncreaseScale(x.value(), higher_scale - x.scale()); + auto y_scaled = CheckAndIncreaseScale(y.value(), higher_scale - y.scale()); + result = x_scaled % y_scaled; + DCHECK_LE(BasicDecimal128::Abs(result), BasicDecimal128::GetMaxValue()); + } else { + int64_t result_high; + uint64_t result_low; + + gdv_xlarge_mod(x.value().high_bits(), x.value().low_bits(), x.scale(), + y.value().high_bits(), y.value().low_bits(), y.scale(), &result_high, + &result_low); + result = BasicDecimal128(result_high, result_low); + } + DCHECK(BasicDecimal128::Abs(result) <= BasicDecimal128::Abs(x.value()) || + BasicDecimal128::Abs(result) <= BasicDecimal128::Abs(y.value())); + return result; +} + +int32_t CompareSameScale(const BasicDecimal128& x, const BasicDecimal128& y) { + if (x == y) { + return 0; + } else if (x < y) { + return -1; + } else { + return 1; + } +} + +int32_t Compare(const BasicDecimalScalar128& x, const BasicDecimalScalar128& y) { + int32_t delta_scale = x.scale() - y.scale(); + + // fast-path : both are of the same scale. + if (delta_scale == 0) { + return CompareSameScale(x.value(), y.value()); + } + + // Check if we'll need more than 256-bits after adjusting the scale. + bool need256 = + (delta_scale < 0 && x.precision() - delta_scale > DecimalTypeUtil::kMaxPrecision) || + (y.precision() + delta_scale > DecimalTypeUtil::kMaxPrecision); + if (need256) { + return gdv_xlarge_compare(x.value().high_bits(), x.value().low_bits(), x.scale(), + y.value().high_bits(), y.value().low_bits(), y.scale()); + } else { + BasicDecimal128 x_scaled; + BasicDecimal128 y_scaled; + + if (delta_scale < 0) { + x_scaled = x.value().IncreaseScaleBy(-delta_scale); + y_scaled = y.value(); + } else { + x_scaled = x.value(); + y_scaled = y.value().IncreaseScaleBy(delta_scale); + } + return CompareSameScale(x_scaled, y_scaled); + } +} + +#define DECIMAL_OVERFLOW_IF(condition, overflow) \ + do { \ + if (*overflow || (condition)) { \ + *overflow = true; \ + return 0; \ + } \ + } while (0) + +static BasicDecimal128 GetMaxValue(int32_t precision) { + return BasicDecimal128::GetScaleMultiplier(precision) - 1; +} + +// Compute the double scale multipliers once. +static std::array<double, DecimalTypeUtil::kMaxPrecision + 1> kDoubleScaleMultipliers = + ([]() -> std::array<double, DecimalTypeUtil::kMaxPrecision + 1> { + std::array<double, DecimalTypeUtil::kMaxPrecision + 1> values; + values[0] = 1.0; + for (int32_t idx = 1; idx <= DecimalTypeUtil::kMaxPrecision; idx++) { + values[idx] = values[idx - 1] * 10; + } + return values; + })(); + +BasicDecimal128 FromDouble(double in, int32_t precision, int32_t scale, bool* overflow) { + // Multiply decimal with the scale + auto unscaled = in * kDoubleScaleMultipliers[scale]; + DECIMAL_OVERFLOW_IF(std::isnan(unscaled), overflow); + + unscaled = std::round(unscaled); + + // convert scaled double to int128 + int32_t sign = unscaled < 0 ? -1 : 1; + auto unscaled_abs = std::abs(unscaled); + + // overflow if > 2^127 - 1 + DECIMAL_OVERFLOW_IF(unscaled_abs > std::ldexp(static_cast<double>(1), 127) - 1, + overflow); + + uint64_t high_bits = static_cast<uint64_t>(std::ldexp(unscaled_abs, -64)); + uint64_t low_bits = static_cast<uint64_t>( + unscaled_abs - std::ldexp(static_cast<double>(high_bits), 64)); + + auto result = BasicDecimal128(static_cast<int64_t>(high_bits), low_bits); + + // overflow if > max value based on precision + DECIMAL_OVERFLOW_IF(result > GetMaxValue(precision), overflow); + return result * sign; +} + +double ToDouble(const BasicDecimalScalar128& in, bool* overflow) { + // convert int128 to double + int64_t sign = in.value().Sign(); + auto value_abs = BasicDecimal128::Abs(in.value()); + double unscaled = static_cast<double>(value_abs.low_bits()) + + std::ldexp(static_cast<double>(value_abs.high_bits()), 64); + + // scale double. + return (unscaled * sign) / kDoubleScaleMultipliers[in.scale()]; +} + +BasicDecimal128 FromInt64(int64_t in, int32_t precision, int32_t scale, bool* overflow) { + // check if multiplying by scale will cause an overflow. + DECIMAL_OVERFLOW_IF(std::abs(in) > GetMaxValue(precision - scale), overflow); + return in * BasicDecimal128::GetScaleMultiplier(scale); +} + +// Helper function to modify the scale and/or precision of a decimal value. +static BasicDecimal128 ModifyScaleAndPrecision(const BasicDecimalScalar128& x, + int32_t out_precision, int32_t out_scale, + bool* overflow) { + int32_t delta_scale = out_scale - x.scale(); + if (delta_scale >= 0) { + // check if multiplying by delta_scale will cause an overflow. + DECIMAL_OVERFLOW_IF( + BasicDecimal128::Abs(x.value()) > GetMaxValue(out_precision - delta_scale), + overflow); + return x.value().IncreaseScaleBy(delta_scale); + } else { + // Do not do any rounding, that is handled by the caller. + auto result = x.value().ReduceScaleBy(-delta_scale, false); + DECIMAL_OVERFLOW_IF(BasicDecimal128::Abs(result) > GetMaxValue(out_precision), + overflow); + return result; + } +} + +enum RoundType { + kRoundTypeCeil, // +1 if +ve and trailing value is > 0, else no rounding. + kRoundTypeFloor, // -1 if -ve and trailing value is < 0, else no rounding. + kRoundTypeTrunc, // no rounding, truncate the trailing digits. + kRoundTypeHalfRoundUp, // if +ve and trailing value is >= half of base, +1. + // else if -ve and trailing value is >= half of base, -1. +}; + +// Compute the rounding delta for the givven rounding type. +static int32_t ComputeRoundingDelta(const BasicDecimal128& x, int32_t x_scale, + int32_t out_scale, RoundType type) { + if (type == kRoundTypeTrunc || // no rounding for this type. + out_scale >= x_scale) { // no digits dropped, so no rounding. + return 0; + } + + int32_t result = 0; + switch (type) { + case kRoundTypeHalfRoundUp: { + auto base = BasicDecimal128::GetScaleMultiplier(x_scale - out_scale); + auto trailing = x % base; + if (trailing == 0) { + result = 0; + } else if (trailing.Abs() < base / 2) { + result = 0; + } else { + result = (x < 0) ? -1 : 1; + } + break; + } + + case kRoundTypeCeil: + if (x < 0) { + // no rounding for -ve + result = 0; + } else { + auto base = BasicDecimal128::GetScaleMultiplier(x_scale - out_scale); + auto trailing = x % base; + result = (trailing == 0) ? 0 : 1; + } + break; + + case kRoundTypeFloor: + if (x > 0) { + // no rounding for +ve + result = 0; + } else { + auto base = BasicDecimal128::GetScaleMultiplier(x_scale - out_scale); + auto trailing = x % base; + result = (trailing == 0) ? 0 : -1; + } + break; + + case kRoundTypeTrunc: + break; + } + return result; +} + +// Modify the scale and round. +static BasicDecimal128 RoundWithPositiveScale(const BasicDecimalScalar128& x, + int32_t out_precision, int32_t out_scale, + RoundType round_type, bool* overflow) { + DCHECK_GE(out_scale, 0); + + auto scaled = ModifyScaleAndPrecision(x, out_precision, out_scale, overflow); + if (*overflow) { + return 0; + } + + auto delta = ComputeRoundingDelta(x.value(), x.scale(), out_scale, round_type); + if (delta == 0) { + return scaled; + } + + // If there is a rounding delta, the output scale must be less than the input scale. + // That means at least one digit is dropped after the decimal. The delta add can add + // utmost one digit before the decimal. So, overflow will occur only if the output + // precision has changed. + DCHECK_GT(x.scale(), out_scale); + auto result = scaled + delta; + DECIMAL_OVERFLOW_IF(out_precision < x.precision() && + BasicDecimal128::Abs(result) > GetMaxValue(out_precision), + overflow); + return result; +} + +// Modify scale to drop all digits to the right of the decimal and round. +// Then, zero out 'rounding_scale' number of digits to the left of the decimal point. +static BasicDecimal128 RoundWithNegativeScale(const BasicDecimalScalar128& x, + int32_t out_precision, + int32_t rounding_scale, + RoundType round_type, bool* overflow) { + DCHECK_LT(rounding_scale, 0); + + // get rid of the fractional part. + auto scaled = ModifyScaleAndPrecision(x, out_precision, 0, overflow); + auto rounding_delta = ComputeRoundingDelta(scaled, 0, -rounding_scale, round_type); + + auto base = BasicDecimal128::GetScaleMultiplier(-rounding_scale); + auto delta = rounding_delta * base - (scaled % base); + DECIMAL_OVERFLOW_IF(BasicDecimal128::Abs(scaled) > + GetMaxValue(out_precision) - BasicDecimal128::Abs(delta), + overflow); + return scaled + delta; +} + +BasicDecimal128 Round(const BasicDecimalScalar128& x, int32_t out_precision, + int32_t out_scale, int32_t rounding_scale, bool* overflow) { + // no-op if target scale is same as arg scale + if (x.scale() == out_scale && rounding_scale >= 0) { + return x.value(); + } + + if (rounding_scale < 0) { + return RoundWithNegativeScale(x, out_precision, rounding_scale, + RoundType::kRoundTypeHalfRoundUp, overflow); + } else { + return RoundWithPositiveScale(x, out_precision, rounding_scale, + RoundType::kRoundTypeHalfRoundUp, overflow); + } +} + +BasicDecimal128 Truncate(const BasicDecimalScalar128& x, int32_t out_precision, + int32_t out_scale, int32_t rounding_scale, bool* overflow) { + // no-op if target scale is same as arg scale + if (x.scale() == out_scale && rounding_scale >= 0) { + return x.value(); + } + + if (rounding_scale < 0) { + return RoundWithNegativeScale(x, out_precision, rounding_scale, + RoundType::kRoundTypeTrunc, overflow); + } else { + return RoundWithPositiveScale(x, out_precision, rounding_scale, + RoundType::kRoundTypeTrunc, overflow); + } +} + +BasicDecimal128 Ceil(const BasicDecimalScalar128& x, bool* overflow) { + return RoundWithPositiveScale(x, x.precision(), 0, RoundType::kRoundTypeCeil, overflow); +} + +BasicDecimal128 Floor(const BasicDecimalScalar128& x, bool* overflow) { + return RoundWithPositiveScale(x, x.precision(), 0, RoundType::kRoundTypeFloor, + overflow); +} + +BasicDecimal128 Convert(const BasicDecimalScalar128& x, int32_t out_precision, + int32_t out_scale, bool* overflow) { + DCHECK_GE(out_scale, 0); + DCHECK_LE(out_scale, DecimalTypeUtil::kMaxScale); + DCHECK_GT(out_precision, 0); + DCHECK_LE(out_precision, DecimalTypeUtil::kMaxScale); + + return RoundWithPositiveScale(x, out_precision, out_scale, + RoundType::kRoundTypeHalfRoundUp, overflow); +} + +int64_t ToInt64(const BasicDecimalScalar128& in, bool* overflow) { + auto rounded = RoundWithPositiveScale(in, in.precision(), 0 /*scale*/, + RoundType::kRoundTypeHalfRoundUp, overflow); + DECIMAL_OVERFLOW_IF((rounded > std::numeric_limits<int64_t>::max()) || + (rounded < std::numeric_limits<int64_t>::min()), + overflow); + return static_cast<int64_t>(rounded.low_bits()); +} + +} // namespace decimalops +} // namespace gandiva diff --git a/src/arrow/cpp/src/gandiva/precompiled/decimal_ops.h b/src/arrow/cpp/src/gandiva/precompiled/decimal_ops.h new file mode 100644 index 000000000..292dce220 --- /dev/null +++ b/src/arrow/cpp/src/gandiva/precompiled/decimal_ops.h @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <cstdint> +#include <string> +#include "gandiva/basic_decimal_scalar.h" + +namespace gandiva { +namespace decimalops { + +/// Return the sum of 'x' and 'y'. +/// out_precision and out_scale are passed along for efficiency, they must match +/// the rules in DecimalTypeSql::GetResultType. +arrow::BasicDecimal128 Add(const BasicDecimalScalar128& x, const BasicDecimalScalar128& y, + int32_t out_precision, int32_t out_scale); + +/// Subtract 'y' from 'x', and return the result. +arrow::BasicDecimal128 Subtract(const BasicDecimalScalar128& x, + const BasicDecimalScalar128& y, int32_t out_precision, + int32_t out_scale); + +/// Multiply 'x' from 'y', and return the result. +arrow::BasicDecimal128 Multiply(const BasicDecimalScalar128& x, + const BasicDecimalScalar128& y, int32_t out_precision, + int32_t out_scale, bool* overflow); + +/// Divide 'x' by 'y', and return the result. +arrow::BasicDecimal128 Divide(int64_t context, const BasicDecimalScalar128& x, + const BasicDecimalScalar128& y, int32_t out_precision, + int32_t out_scale, bool* overflow); + +/// Divide 'x' by 'y', and return the remainder. +arrow::BasicDecimal128 Mod(int64_t context, const BasicDecimalScalar128& x, + const BasicDecimalScalar128& y, int32_t out_precision, + int32_t out_scale, bool* overflow); + +/// Compare two decimals. Returns : +/// 0 if x == y +/// 1 if x > y +/// -1 if x < y +int32_t Compare(const BasicDecimalScalar128& x, const BasicDecimalScalar128& y); + +/// Convert to decimal from double. +BasicDecimal128 FromDouble(double in, int32_t precision, int32_t scale, bool* overflow); + +/// Convert from decimal to double. +double ToDouble(const BasicDecimalScalar128& in, bool* overflow); + +/// Convert to decimal from gdv_int64. +BasicDecimal128 FromInt64(int64_t in, int32_t precision, int32_t scale, bool* overflow); + +/// Convert from decimal to gdv_int64 +int64_t ToInt64(const BasicDecimalScalar128& in, bool* overflow); + +/// Convert from one decimal scale/precision to another. +BasicDecimal128 Convert(const BasicDecimalScalar128& x, int32_t out_precision, + int32_t out_scale, bool* overflow); + +/// round decimal. +BasicDecimal128 Round(const BasicDecimalScalar128& x, int32_t out_precision, + int32_t out_scale, int32_t rounding_scale, bool* overflow); + +/// truncate decimal. +BasicDecimal128 Truncate(const BasicDecimalScalar128& x, int32_t out_precision, + int32_t out_scale, int32_t rounding_scale, bool* overflow); + +/// ceil decimal +BasicDecimal128 Ceil(const BasicDecimalScalar128& x, bool* overflow); + +/// floor decimal +BasicDecimal128 Floor(const BasicDecimalScalar128& x, bool* overflow); + +} // namespace decimalops +} // namespace gandiva diff --git a/src/arrow/cpp/src/gandiva/precompiled/decimal_ops_test.cc b/src/arrow/cpp/src/gandiva/precompiled/decimal_ops_test.cc new file mode 100644 index 000000000..be8a1fe8a --- /dev/null +++ b/src/arrow/cpp/src/gandiva/precompiled/decimal_ops_test.cc @@ -0,0 +1,1095 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gtest/gtest.h> +#include <algorithm> +#include <limits> +#include <memory> +#include <tuple> +#include <vector> + +#include "arrow/testing/gtest_util.h" +#include "gandiva/decimal_scalar.h" +#include "gandiva/decimal_type_util.h" +#include "gandiva/execution_context.h" +#include "gandiva/precompiled/decimal_ops.h" +#include "gandiva/precompiled/types.h" + +namespace gandiva { + +const arrow::Decimal128 kThirtyFive9s(std::string(35, '9')); +const arrow::Decimal128 kThirtySix9s(std::string(36, '9')); +const arrow::Decimal128 kThirtyEight9s(std::string(38, '9')); + +class TestDecimalSql : public ::testing::Test { + protected: + static void Verify(DecimalTypeUtil::Op op, const DecimalScalar128& x, + const DecimalScalar128& y, const DecimalScalar128& expected_result, + bool expected_overflow); + + static void VerifyAllSign(DecimalTypeUtil::Op op, const DecimalScalar128& left, + const DecimalScalar128& right, + const DecimalScalar128& expected_output, + bool expected_overflow); + + void AddAndVerify(const DecimalScalar128& x, const DecimalScalar128& y, + const DecimalScalar128& expected_result) { + // TODO: overflow checks + return Verify(DecimalTypeUtil::kOpAdd, x, y, expected_result, false); + } + + void SubtractAndVerify(const DecimalScalar128& x, const DecimalScalar128& y, + const DecimalScalar128& expected_result) { + // TODO: overflow checks + return Verify(DecimalTypeUtil::kOpSubtract, x, y, expected_result, false); + } + + void MultiplyAndVerify(const DecimalScalar128& x, const DecimalScalar128& y, + const DecimalScalar128& expected_result, + bool expected_overflow) { + return Verify(DecimalTypeUtil::kOpMultiply, x, y, expected_result, expected_overflow); + } + + void MultiplyAndVerifyAllSign(const DecimalScalar128& x, const DecimalScalar128& y, + const DecimalScalar128& expected_result, + bool expected_overflow) { + return VerifyAllSign(DecimalTypeUtil::kOpMultiply, x, y, expected_result, + expected_overflow); + } + + void DivideAndVerify(const DecimalScalar128& x, const DecimalScalar128& y, + const DecimalScalar128& expected_result, bool expected_overflow) { + return Verify(DecimalTypeUtil::kOpDivide, x, y, expected_result, expected_overflow); + } + + void DivideAndVerifyAllSign(const DecimalScalar128& x, const DecimalScalar128& y, + const DecimalScalar128& expected_result, + bool expected_overflow) { + return VerifyAllSign(DecimalTypeUtil::kOpDivide, x, y, expected_result, + expected_overflow); + } + + void ModAndVerify(const DecimalScalar128& x, const DecimalScalar128& y, + const DecimalScalar128& expected_result, bool expected_overflow) { + return Verify(DecimalTypeUtil::kOpMod, x, y, expected_result, expected_overflow); + } + + void ModAndVerifyAllSign(const DecimalScalar128& x, const DecimalScalar128& y, + const DecimalScalar128& expected_result, + bool expected_overflow) { + return VerifyAllSign(DecimalTypeUtil::kOpMod, x, y, expected_result, + expected_overflow); + } +}; + +#define EXPECT_DECIMAL_EQ(op, x, y, expected_result, expected_overflow, actual_result, \ + actual_overflow) \ + { \ + EXPECT_TRUE(expected_overflow == actual_overflow) \ + << op << "(" << (x).ToString() << " and " << (y).ToString() << ")" \ + << " expected overflow : " << expected_overflow \ + << " actual overflow : " << actual_overflow; \ + if (!expected_overflow) { \ + EXPECT_TRUE(expected_result == actual_result) \ + << op << "(" << (x).ToString() << " and " << (y).ToString() << ")" \ + << " expected : " << expected_result.ToString() \ + << " actual : " << actual_result.ToString(); \ + } \ + } + +void TestDecimalSql::Verify(DecimalTypeUtil::Op op, const DecimalScalar128& x, + const DecimalScalar128& y, + const DecimalScalar128& expected_result, + bool expected_overflow) { + auto t1 = std::make_shared<arrow::Decimal128Type>(x.precision(), x.scale()); + auto t2 = std::make_shared<arrow::Decimal128Type>(y.precision(), y.scale()); + bool overflow = false; + int64_t context = 0; + + Decimal128TypePtr out_type; + ARROW_EXPECT_OK(DecimalTypeUtil::GetResultType(op, {t1, t2}, &out_type)); + + arrow::BasicDecimal128 out_value; + std::string op_name; + switch (op) { + case DecimalTypeUtil::kOpAdd: + op_name = "add"; + out_value = decimalops::Add(x, y, out_type->precision(), out_type->scale()); + break; + + case DecimalTypeUtil::kOpSubtract: + op_name = "subtract"; + out_value = decimalops::Subtract(x, y, out_type->precision(), out_type->scale()); + break; + + case DecimalTypeUtil::kOpMultiply: + op_name = "multiply"; + out_value = + decimalops::Multiply(x, y, out_type->precision(), out_type->scale(), &overflow); + break; + + case DecimalTypeUtil::kOpDivide: + op_name = "divide"; + out_value = decimalops::Divide(context, x, y, out_type->precision(), + out_type->scale(), &overflow); + break; + + case DecimalTypeUtil::kOpMod: + op_name = "mod"; + out_value = decimalops::Mod(context, x, y, out_type->precision(), out_type->scale(), + &overflow); + break; + + default: + // not implemented. + ASSERT_FALSE(true); + } + EXPECT_DECIMAL_EQ(op_name, x, y, expected_result, expected_overflow, + DecimalScalar128(out_value, out_type->precision(), out_type->scale()), + overflow); +} + +void TestDecimalSql::VerifyAllSign(DecimalTypeUtil::Op op, const DecimalScalar128& left, + const DecimalScalar128& right, + const DecimalScalar128& expected_output, + bool expected_overflow) { + // both +ve + Verify(op, left, right, expected_output, expected_overflow); + + // left -ve + Verify(op, -left, right, -expected_output, expected_overflow); + + if (op == DecimalTypeUtil::kOpMod) { + // right -ve + Verify(op, left, -right, expected_output, expected_overflow); + + // both -ve + Verify(op, -left, -right, -expected_output, expected_overflow); + } else { + ASSERT_TRUE(op == DecimalTypeUtil::kOpMultiply || op == DecimalTypeUtil::kOpDivide); + + // right -ve + Verify(op, left, -right, -expected_output, expected_overflow); + + // both -ve + Verify(op, -left, -right, expected_output, expected_overflow); + } +} + +TEST_F(TestDecimalSql, Add) { + // fast-path + AddAndVerify(DecimalScalar128{"201", 30, 3}, // x + DecimalScalar128{"301", 30, 3}, // y + DecimalScalar128{"502", 31, 3}); // expected + + // max precision + AddAndVerify(DecimalScalar128{"09999999999999999999999999999999000000", 38, 5}, // x + DecimalScalar128{"100", 38, 7}, // y + DecimalScalar128{"99999999999999999999999999999990000010", 38, 6}); + + // Both -ve + AddAndVerify(DecimalScalar128{"-201", 30, 3}, // x + DecimalScalar128{"-301", 30, 2}, // y + DecimalScalar128{"-3211", 32, 3}); // expected + + // -ve and max precision + AddAndVerify(DecimalScalar128{"-09999999999999999999999999999999000000", 38, 5}, // x + DecimalScalar128{"-100", 38, 7}, // y + DecimalScalar128{"-99999999999999999999999999999990000010", 38, 6}); +} + +TEST_F(TestDecimalSql, Subtract) { + // fast-path + SubtractAndVerify(DecimalScalar128{"201", 30, 3}, // x + DecimalScalar128{"301", 30, 3}, // y + DecimalScalar128{"-100", 31, 3}); // expected + + // max precision + SubtractAndVerify( + DecimalScalar128{"09999999999999999999999999999999000000", 38, 5}, // x + DecimalScalar128{"100", 38, 7}, // y + DecimalScalar128{"99999999999999999999999999999989999990", 38, 6}); + + // Both -ve + SubtractAndVerify(DecimalScalar128{"-201", 30, 3}, // x + DecimalScalar128{"-301", 30, 2}, // y + DecimalScalar128{"2809", 32, 3}); // expected + + // -ve and max precision + SubtractAndVerify( + DecimalScalar128{"-09999999999999999999999999999999000000", 38, 5}, // x + DecimalScalar128{"-100", 38, 7}, // y + DecimalScalar128{"-99999999999999999999999999999989999990", 38, 6}); +} + +TEST_F(TestDecimalSql, Multiply) { + // fast-path : out_precision < 38 + MultiplyAndVerifyAllSign(DecimalScalar128{"201", 10, 3}, // x + DecimalScalar128{"301", 10, 2}, // y + DecimalScalar128{"60501", 21, 5}, // expected + false); // overflow + + // right 0 + MultiplyAndVerify(DecimalScalar128{"201", 20, 3}, // x + DecimalScalar128{"0", 20, 2}, // y + DecimalScalar128{"0", 38, 5}, // expected + false); // overflow + + // left 0 + MultiplyAndVerify(DecimalScalar128{"0", 20, 3}, // x + DecimalScalar128{"301", 20, 2}, // y + DecimalScalar128{"0", 38, 5}, // expected + false); // overflow + + // out_precision == 38, small input values, no trimming of scale (scale <= 6 doesn't + // get trimmed). + MultiplyAndVerify(DecimalScalar128{"201", 20, 3}, // x + DecimalScalar128{"301", 20, 2}, // y + DecimalScalar128{"60501", 38, 5}, // expected + false); // overflow + + // out_precision == 38, large values, no trimming of scale (scale <= 6 doesn't + // get trimmed). + MultiplyAndVerifyAllSign( + DecimalScalar128{"201", 20, 3}, // x + DecimalScalar128{kThirtyFive9s, 35, 2}, // y + DecimalScalar128{"20099999999999999999999999999999999799", 38, 5}, // expected + false); // overflow + + // out_precision == 38, very large values, no trimming of scale (scale <= 6 doesn't + // get trimmed). overflow expected. + MultiplyAndVerifyAllSign(DecimalScalar128{"201", 20, 3}, // x + DecimalScalar128{kThirtySix9s, 35, 2}, // y + DecimalScalar128{"0", 38, 5}, // expected + true); // overflow + + MultiplyAndVerifyAllSign(DecimalScalar128{"201", 20, 3}, // x + DecimalScalar128{kThirtyEight9s, 35, 2}, // y + DecimalScalar128{"0", 38, 5}, // expected + true); // overflow + + // out_precision == 38, small input values, trimming of scale. + MultiplyAndVerifyAllSign(DecimalScalar128{"201", 20, 5}, // x + DecimalScalar128{"301", 20, 5}, // y + DecimalScalar128{"61", 38, 7}, // expected + false); // overflow + + // out_precision == 38, large values, trimming of scale. + MultiplyAndVerifyAllSign( + DecimalScalar128{"201", 20, 5}, // x + DecimalScalar128{kThirtyFive9s, 35, 5}, // y + DecimalScalar128{"2010000000000000000000000000000000", 38, 6}, // expected + false); // overflow + + // out_precision == 38, very large values, trimming of scale (requires convert to 256). + MultiplyAndVerifyAllSign( + DecimalScalar128{kThirtyFive9s, 38, 20}, // x + DecimalScalar128{kThirtySix9s, 38, 20}, // y + DecimalScalar128{"9999999999999999999999999999999999890", 38, 6}, // expected + false); // overflow + + // out_precision == 38, very large values, trimming of scale (requires convert to 256). + // should cause overflow. + MultiplyAndVerifyAllSign(DecimalScalar128{kThirtyFive9s, 38, 4}, // x + DecimalScalar128{kThirtySix9s, 38, 4}, // y + DecimalScalar128{"0", 38, 6}, // expected + true); // overflow + + // corner cases. + MultiplyAndVerifyAllSign( + DecimalScalar128{0, UINT64_MAX, 38, 4}, // x + DecimalScalar128{0, UINT64_MAX, 38, 4}, // y + DecimalScalar128{"3402823669209384634264811192843491082", 38, 6}, // expected + false); // overflow + + MultiplyAndVerifyAllSign( + DecimalScalar128{0, UINT64_MAX, 38, 4}, // x + DecimalScalar128{0, INT64_MAX, 38, 4}, // y + DecimalScalar128{"1701411834604692317040171876053197783", 38, 6}, // expected + false); // overflow + + MultiplyAndVerifyAllSign(DecimalScalar128{"201", 38, 38}, // x + DecimalScalar128{"301", 38, 38}, // y + DecimalScalar128{"0", 38, 37}, // expected + false); // overflow + + MultiplyAndVerifyAllSign(DecimalScalar128{0, UINT64_MAX, 38, 38}, // x + DecimalScalar128{0, UINT64_MAX, 38, 38}, // y + DecimalScalar128{"0", 38, 37}, // expected + false); // overflow + + MultiplyAndVerifyAllSign( + DecimalScalar128{kThirtyFive9s, 38, 38}, // x + DecimalScalar128{kThirtySix9s, 38, 38}, // y + DecimalScalar128{"100000000000000000000000000000000", 38, 37}, // expected + false); // overflow +} + +TEST_F(TestDecimalSql, Divide) { + DivideAndVerifyAllSign(DecimalScalar128{"201", 10, 3}, // x + DecimalScalar128{"301", 10, 2}, // y + DecimalScalar128{"6677740863787", 23, 14}, // expected + false); // overflow + + DivideAndVerifyAllSign(DecimalScalar128{"201", 20, 3}, // x + DecimalScalar128{"301", 20, 2}, // y + DecimalScalar128{"667774086378737542", 38, 19}, // expected + false); // overflow + + DivideAndVerifyAllSign(DecimalScalar128{"201", 20, 3}, // x + DecimalScalar128{kThirtyFive9s, 35, 2}, // y + DecimalScalar128{"0", 38, 19}, // expected + false); // overflow + + DivideAndVerifyAllSign( + DecimalScalar128{kThirtyFive9s, 35, 6}, // x + DecimalScalar128{"201", 20, 3}, // y + DecimalScalar128{"497512437810945273631840796019900493", 38, 6}, // expected + false); // overflow + + DivideAndVerifyAllSign(DecimalScalar128{kThirtyEight9s, 38, 20}, // x + DecimalScalar128{kThirtyFive9s, 38, 20}, // y + DecimalScalar128{"1000000000", 38, 6}, // expected + false); // overflow + + DivideAndVerifyAllSign(DecimalScalar128{"31939128063561476055", 38, 8}, // x + DecimalScalar128{"10000", 20, 0}, // y + DecimalScalar128{"3193912806356148", 38, 8}, // expected + false); + + // Corner cases + DivideAndVerifyAllSign(DecimalScalar128{0, UINT64_MAX, 38, 4}, // x + DecimalScalar128{0, UINT64_MAX, 38, 4}, // y + DecimalScalar128{"1000000", 38, 6}, // expected + false); // overflow + + DivideAndVerifyAllSign(DecimalScalar128{0, UINT64_MAX, 38, 4}, // x + DecimalScalar128{0, INT64_MAX, 38, 4}, // y + DecimalScalar128{"2000000", 38, 6}, // expected + false); // overflow + + DivideAndVerifyAllSign(DecimalScalar128{0, UINT64_MAX, 19, 5}, // x + DecimalScalar128{0, INT64_MAX, 19, 5}, // y + DecimalScalar128{"20000000000000000001", 38, 19}, // expected + false); // overflow + + DivideAndVerifyAllSign(DecimalScalar128{kThirtyFive9s, 38, 37}, // x + DecimalScalar128{kThirtyFive9s, 38, 38}, // y + DecimalScalar128{"10000000", 38, 6}, // expected + false); // overflow + + // overflow + DivideAndVerifyAllSign(DecimalScalar128{kThirtyEight9s, 38, 6}, // x + DecimalScalar128{"201", 20, 3}, // y + DecimalScalar128{"0", 38, 6}, // expected + true); +} + +TEST_F(TestDecimalSql, Mod) { + ModAndVerifyAllSign(DecimalScalar128{"201", 10, 3}, // x + DecimalScalar128{"301", 10, 2}, // y + DecimalScalar128{"201", 10, 3}, // expected + false); // overflow + + ModAndVerify(DecimalScalar128{"201", 20, 2}, // x + DecimalScalar128{"301", 20, 3}, // y + DecimalScalar128{"204", 20, 3}, // expected + false); // overflow + + ModAndVerifyAllSign(DecimalScalar128{"201", 20, 3}, // x + DecimalScalar128{kThirtyFive9s, 35, 2}, // y + DecimalScalar128{"201", 20, 3}, // expected + false); // overflow + + ModAndVerifyAllSign(DecimalScalar128{kThirtyFive9s, 35, 6}, // x + DecimalScalar128{"201", 20, 3}, // y + DecimalScalar128{"180999", 23, 6}, // expected + false); // overflow + + ModAndVerifyAllSign(DecimalScalar128{kThirtyEight9s, 38, 20}, // x + DecimalScalar128{kThirtyFive9s, 38, 21}, // y + DecimalScalar128{"9990", 38, 21}, // expected + false); // overflow + + ModAndVerifyAllSign(DecimalScalar128{"31939128063561476055", 38, 8}, // x + DecimalScalar128{"10000", 20, 0}, // y + DecimalScalar128{"63561476055", 28, 8}, // expected + false); + + ModAndVerifyAllSign(DecimalScalar128{0, UINT64_MAX, 38, 4}, // x + DecimalScalar128{0, UINT64_MAX, 38, 4}, // y + DecimalScalar128{"0", 38, 4}, // expected + false); // overflow + + ModAndVerifyAllSign(DecimalScalar128{0, UINT64_MAX, 38, 4}, // x + DecimalScalar128{0, INT64_MAX, 38, 4}, // y + DecimalScalar128{"1", 38, 4}, // expected + false); // overflow +} + +TEST_F(TestDecimalSql, DivideByZero) { + gandiva::ExecutionContext context; + int32_t result_precision; + int32_t result_scale; + bool overflow; + + // divide-by-zero should cause an error. + context.Reset(); + result_precision = 38; + result_scale = 19; + decimalops::Divide(reinterpret_cast<gdv_int64>(&context), + DecimalScalar128{"201", 20, 3}, DecimalScalar128{"0", 20, 2}, + result_precision, result_scale, &overflow); + EXPECT_TRUE(context.has_error()); + EXPECT_EQ(context.get_error(), "divide by zero error"); + + // divide-by-nonzero should not cause an error. + context.Reset(); + decimalops::Divide(reinterpret_cast<gdv_int64>(&context), + DecimalScalar128{"201", 20, 3}, DecimalScalar128{"1", 20, 2}, + result_precision, result_scale, &overflow); + EXPECT_FALSE(context.has_error()); + + // mod-by-zero should cause an error. + context.Reset(); + result_precision = 20; + result_scale = 3; + decimalops::Mod(reinterpret_cast<gdv_int64>(&context), DecimalScalar128{"201", 20, 3}, + DecimalScalar128{"0", 20, 2}, result_precision, result_scale, + &overflow); + EXPECT_TRUE(context.has_error()); + EXPECT_EQ(context.get_error(), "divide by zero error"); + + // mod-by-nonzero should not cause an error. + context.Reset(); + decimalops::Mod(reinterpret_cast<gdv_int64>(&context), DecimalScalar128{"201", 20, 3}, + DecimalScalar128{"1", 20, 2}, result_precision, result_scale, + &overflow); + EXPECT_FALSE(context.has_error()); +} + +TEST_F(TestDecimalSql, Compare) { + // x.scale == y.scale + EXPECT_EQ( + 0, decimalops::Compare(DecimalScalar128{100, 38, 6}, DecimalScalar128{100, 38, 6})); + EXPECT_EQ( + 1, decimalops::Compare(DecimalScalar128{200, 38, 6}, DecimalScalar128{100, 38, 6})); + EXPECT_EQ(-1, decimalops::Compare(DecimalScalar128{100, 38, 6}, + DecimalScalar128{200, 38, 6})); + + // x.scale == y.scale, with -ve. + EXPECT_EQ(0, decimalops::Compare(DecimalScalar128{-100, 38, 6}, + DecimalScalar128{-100, 38, 6})); + EXPECT_EQ(-1, decimalops::Compare(DecimalScalar128{-200, 38, 6}, + DecimalScalar128{-100, 38, 6})); + EXPECT_EQ(1, decimalops::Compare(DecimalScalar128{-100, 38, 6}, + DecimalScalar128{-200, 38, 6})); + EXPECT_EQ(1, decimalops::Compare(DecimalScalar128{100, 38, 6}, + DecimalScalar128{-200, 38, 6})); + + for (int32_t precision : {16, 36, 38}) { + // x_scale > y_scale + EXPECT_EQ(0, decimalops::Compare(DecimalScalar128{10000, precision, 6}, + DecimalScalar128{100, precision, 4})); + EXPECT_EQ(1, decimalops::Compare(DecimalScalar128{20000, precision, 6}, + DecimalScalar128{100, precision, 4})); + EXPECT_EQ(-1, decimalops::Compare(DecimalScalar128{10000, precision, 6}, + DecimalScalar128{200, precision, 4})); + + // x.scale > y.scale, with -ve + EXPECT_EQ(0, decimalops::Compare(DecimalScalar128{-10000, precision, 6}, + DecimalScalar128{-100, precision, 4})); + EXPECT_EQ(-1, decimalops::Compare(DecimalScalar128{-20000, precision, 6}, + DecimalScalar128{-100, precision, 4})); + EXPECT_EQ(1, decimalops::Compare(DecimalScalar128{-10000, precision, 6}, + DecimalScalar128{-200, precision, 4})); + EXPECT_EQ(1, decimalops::Compare(DecimalScalar128{10000, precision, 6}, + DecimalScalar128{-200, precision, 4})); + + // x.scale < y.scale + EXPECT_EQ(0, decimalops::Compare(DecimalScalar128{100, precision, 4}, + DecimalScalar128{10000, precision, 6})); + EXPECT_EQ(1, decimalops::Compare(DecimalScalar128{200, precision, 4}, + DecimalScalar128{10000, precision, 6})); + EXPECT_EQ(-1, decimalops::Compare(DecimalScalar128{100, precision, 4}, + DecimalScalar128{20000, precision, 6})); + + // x.scale < y.scale, with -ve + EXPECT_EQ(0, decimalops::Compare(DecimalScalar128{-100, precision, 4}, + DecimalScalar128{-10000, precision, 6})); + EXPECT_EQ(-1, decimalops::Compare(DecimalScalar128{-200, precision, 4}, + DecimalScalar128{-10000, precision, 6})); + EXPECT_EQ(1, decimalops::Compare(DecimalScalar128{-100, precision, 4}, + DecimalScalar128{-20000, precision, 6})); + EXPECT_EQ(1, decimalops::Compare(DecimalScalar128{100, precision, 4}, + DecimalScalar128{-200, precision, 6})); + } + + // large cases. + EXPECT_EQ(0, decimalops::Compare(DecimalScalar128{kThirtyEight9s, 38, 6}, + DecimalScalar128{kThirtyEight9s, 38, 6})); + + EXPECT_EQ(1, decimalops::Compare(DecimalScalar128{kThirtyEight9s, 38, 6}, + DecimalScalar128{kThirtySix9s, 38, 4})); + + EXPECT_EQ(-1, decimalops::Compare(DecimalScalar128{kThirtyEight9s, 38, 6}, + DecimalScalar128{kThirtyEight9s, 38, 4})); +} + +TEST_F(TestDecimalSql, Round) { + // expected, input, rounding_scale, overflow + using TupleType = std::tuple<DecimalScalar128, DecimalScalar128, int32_t, bool>; + std::vector<TupleType> test_values = { + // examples from + // https://dev.mysql.com/doc/refman/5.7/en/mathematical-functions.html#function_round + std::make_tuple(DecimalScalar128{-1, 36, 0}, DecimalScalar128{-123, 38, 2}, 0, + false), + std::make_tuple(DecimalScalar128{-2, 36, 0}, DecimalScalar128{-158, 38, 2}, 0, + false), + std::make_tuple(DecimalScalar128{2, 36, 0}, DecimalScalar128{158, 38, 2}, 0, false), + std::make_tuple(DecimalScalar128{-13, 36, 1}, DecimalScalar128{-1298, 38, 3}, 1, + false), + std::make_tuple(DecimalScalar128{-1, 35, 0}, DecimalScalar128{-1298, 38, 3}, 0, + false), + std::make_tuple(DecimalScalar128{20, 35, 0}, DecimalScalar128{23298, 38, 3}, -1, + false), + std::make_tuple(DecimalScalar128{100, 38, 0}, DecimalScalar128{122, 38, 0}, -2, + false), + std::make_tuple(DecimalScalar128{3, 37, 0}, DecimalScalar128{25, 38, 1}, 0, false), + + // border cases + std::make_tuple(DecimalScalar128{INT64_MIN / 100, 36, 0}, + DecimalScalar128{INT64_MIN, 38, 2}, 0, false), + + std::make_tuple(DecimalScalar128{INT64_MIN, 38, 0}, + DecimalScalar128{INT64_MIN, 38, 0}, 0, false), + std::make_tuple(DecimalScalar128{0, 0, 36, 0}, DecimalScalar128{0, 0, 38, 2}, 0, + false), + std::make_tuple(DecimalScalar128{INT64_MAX, 38, 0}, + DecimalScalar128{INT64_MAX, 38, 0}, 0, false), + + std::make_tuple(DecimalScalar128{INT64_MAX / 100, 36, 0}, + DecimalScalar128{INT64_MAX, 38, 2}, 0, false), + + // large scales + std::make_tuple(DecimalScalar128{0, 0, 22, 0}, DecimalScalar128{12345, 38, 16}, 0, + false), + + std::make_tuple( + DecimalScalar128{BasicDecimal128{124}, 22, 0}, + DecimalScalar128{BasicDecimal128{12389}.IncreaseScaleBy(14), 38, 16}, 0, false), + std::make_tuple( + DecimalScalar128{BasicDecimal128{-124}, 22, 0}, + DecimalScalar128{BasicDecimal128{-12389}.IncreaseScaleBy(14), 38, 16}, 0, + false), + std::make_tuple( + DecimalScalar128{BasicDecimal128{124}, 6, 0}, + DecimalScalar128{BasicDecimal128{12389}.IncreaseScaleBy(30), 38, 32}, 0, false), + std::make_tuple( + DecimalScalar128{BasicDecimal128{-124}, 6, 0}, + DecimalScalar128{BasicDecimal128{-12389}.IncreaseScaleBy(30), 38, 32}, 0, + false), + + // scale bigger than arg + std::make_tuple( + DecimalScalar128{BasicDecimal128{12389}.IncreaseScaleBy(32), 38, 32}, + DecimalScalar128{BasicDecimal128{12389}.IncreaseScaleBy(32), 38, 32}, 35, + false), + std::make_tuple( + DecimalScalar128{BasicDecimal128{-12389}.IncreaseScaleBy(32), 38, 32}, + DecimalScalar128{BasicDecimal128{-12389}.IncreaseScaleBy(32), 38, 32}, 35, + false), + + // overflow + std::make_tuple(DecimalScalar128{0, 0, 1, 0}, DecimalScalar128{99, 2, 1}, 0, true), + }; + + for (auto iter : test_values) { + auto expected = std::get<0>(iter); + auto input = std::get<1>(iter); + auto rounding_scale = std::get<2>(iter); + auto expected_overflow = std::get<3>(iter); + bool overflow = false; + + EXPECT_EQ(expected.value(), + decimalops::Round(input, expected.precision(), expected.scale(), + rounding_scale, &overflow)) + << " failed on input " << input << " rounding scale " << rounding_scale; + if (expected_overflow) { + ASSERT_TRUE(overflow) << "overflow expected for input " << input; + } else { + ASSERT_FALSE(overflow) << "overflow not expected for input " << input; + } + } +} + +TEST_F(TestDecimalSql, Truncate) { + // expected, input, rounding_scale, overflow + using TupleType = std::tuple<DecimalScalar128, DecimalScalar128, int32_t, bool>; + std::vector<TupleType> test_values = { + // examples from + // https://dev.mysql.com/doc/refman/5.7/en/mathematical-functions.html#function_truncate + std::make_tuple(DecimalScalar128{12, 36, 1}, DecimalScalar128{1223, 38, 3}, 1, + false), + std::make_tuple(DecimalScalar128{19, 36, 1}, DecimalScalar128{1999, 38, 3}, 1, + false), + std::make_tuple(DecimalScalar128{1, 35, 0}, DecimalScalar128{1999, 38, 3}, 0, + false), + std::make_tuple(DecimalScalar128{-19, 36, 1}, DecimalScalar128{-1999, 38, 3}, 1, + false), + std::make_tuple(DecimalScalar128{100, 38, 0}, DecimalScalar128{122, 38, 0}, -2, + false), + std::make_tuple(DecimalScalar128{1028, 38, 0}, DecimalScalar128{1028, 38, 0}, 0, + false), + + // border cases + std::make_tuple(DecimalScalar128{BasicDecimal128{INT64_MIN / 100}, 36, 0}, + DecimalScalar128{INT64_MIN, 38, 2}, 0, false), + + std::make_tuple(DecimalScalar128{INT64_MIN, 38, 0}, + DecimalScalar128{INT64_MIN, 38, 0}, 0, false), + std::make_tuple(DecimalScalar128{0, 0, 38, 0}, DecimalScalar128{0, 0, 38, 2}, 0, + false), + std::make_tuple(DecimalScalar128{INT64_MAX, 38, 0}, + DecimalScalar128{INT64_MAX, 38, 0}, 0, false), + + std::make_tuple(DecimalScalar128{BasicDecimal128(INT64_MAX / 100), 36, 0}, + DecimalScalar128{INT64_MAX, 38, 2}, 0, false), + + // large scales + std::make_tuple(DecimalScalar128{BasicDecimal128{0, 0}, 22, 0}, + DecimalScalar128{12345, 38, 16}, 0, false), + std::make_tuple( + DecimalScalar128{BasicDecimal128{123}, 22, 0}, + DecimalScalar128{BasicDecimal128{12389}.IncreaseScaleBy(14), 38, 16}, 0, false), + std::make_tuple( + DecimalScalar128{BasicDecimal128{-123}, 22, 0}, + DecimalScalar128{BasicDecimal128{-12389}.IncreaseScaleBy(14), 38, 16}, 0, + false), + std::make_tuple( + DecimalScalar128{BasicDecimal128{123}, 6, 0}, + DecimalScalar128{BasicDecimal128{12389}.IncreaseScaleBy(30), 38, 32}, 0, false), + std::make_tuple( + DecimalScalar128{BasicDecimal128{-123}, 6, 0}, + DecimalScalar128{BasicDecimal128{-12389}.IncreaseScaleBy(30), 38, 32}, 0, + false), + + // overflow + std::make_tuple( + DecimalScalar128{BasicDecimal128{12389}.IncreaseScaleBy(32), 38, 32}, + DecimalScalar128{BasicDecimal128{12389}.IncreaseScaleBy(32), 38, 32}, 35, + false), + std::make_tuple( + DecimalScalar128{BasicDecimal128{-12389}.IncreaseScaleBy(32), 38, 32}, + DecimalScalar128{BasicDecimal128{-12389}.IncreaseScaleBy(32), 38, 32}, 35, + false), + }; + + for (auto iter : test_values) { + auto expected = std::get<0>(iter); + auto input = std::get<1>(iter); + auto rounding_scale = std::get<2>(iter); + auto expected_overflow = std::get<3>(iter); + bool overflow = false; + + EXPECT_EQ(expected.value(), + decimalops::Truncate(input, expected.precision(), expected.scale(), + rounding_scale, &overflow)) + << " failed on input " << input << " rounding scale " << rounding_scale; + if (expected_overflow) { + ASSERT_TRUE(overflow) << "overflow expected for input " << input; + } else { + ASSERT_FALSE(overflow) << "overflow not expected for input " << input; + } + } +} + +TEST_F(TestDecimalSql, Ceil) { + // expected, input, overflow + std::vector<std::tuple<BasicDecimal128, DecimalScalar128, bool>> test_values = { + // https://dev.mysql.com/doc/refman/5.7/en/mathematical-functions.html#function_ceil + std::make_tuple(2, DecimalScalar128{123, 38, 2}, false), + std::make_tuple(-1, DecimalScalar128{-123, 38, 2}, false), + + // border cases + std::make_tuple(BasicDecimal128{INT64_MIN / 100}, + DecimalScalar128{INT64_MIN, 38, 2}, false), + + std::make_tuple(INT64_MIN, DecimalScalar128{INT64_MIN, 38, 0}, false), + std::make_tuple(BasicDecimal128{0, 0}, DecimalScalar128{0, 0, 38, 2}, false), + std::make_tuple(INT64_MAX, DecimalScalar128{INT64_MAX, 38, 0}, false), + + std::make_tuple(BasicDecimal128(INT64_MAX / 100 + 1), + DecimalScalar128{INT64_MAX, 38, 2}, false), + + // large scales + std::make_tuple(BasicDecimal128{0, 1}, DecimalScalar128{12345, 38, 16}, false), + std::make_tuple( + BasicDecimal128{124}, + DecimalScalar128{BasicDecimal128{12389}.IncreaseScaleBy(14), 38, 16}, false), + std::make_tuple( + BasicDecimal128{-123}, + DecimalScalar128{BasicDecimal128{-12389}.IncreaseScaleBy(14), 38, 16}, false), + std::make_tuple( + BasicDecimal128{124}, + DecimalScalar128{BasicDecimal128{12389}.IncreaseScaleBy(30), 38, 32}, false), + std::make_tuple( + BasicDecimal128{-123}, + DecimalScalar128{BasicDecimal128{-12389}.IncreaseScaleBy(30), 38, 32}, false), + }; + + for (auto iter : test_values) { + auto expected = std::get<0>(iter); + auto input = std::get<1>(iter); + auto expected_overflow = std::get<2>(iter); + bool overflow = false; + + EXPECT_EQ(expected, decimalops::Ceil(input, &overflow)) + << " failed on input " << input; + if (expected_overflow) { + ASSERT_TRUE(overflow) << "overflow expected for input " << input; + } else { + ASSERT_FALSE(overflow) << "overflow not expected for input " << input; + } + } +} + +TEST_F(TestDecimalSql, Floor) { + // expected, input, overflow + std::vector<std::tuple<BasicDecimal128, DecimalScalar128, bool>> test_values = { + // https://dev.mysql.com/doc/refman/5.7/en/mathematical-functions.html#function_floor + std::make_tuple(1, DecimalScalar128{123, 38, 2}, false), + std::make_tuple(-2, DecimalScalar128{-123, 38, 2}, false), + + // border cases + std::make_tuple(BasicDecimal128{INT64_MIN / 100 - 1}, + DecimalScalar128{INT64_MIN, 38, 2}, false), + + std::make_tuple(INT64_MIN, DecimalScalar128{INT64_MIN, 38, 0}, false), + std::make_tuple(BasicDecimal128{0, 0}, DecimalScalar128{0, 0, 38, 2}, false), + std::make_tuple(INT64_MAX, DecimalScalar128{INT64_MAX, 38, 0}, false), + + std::make_tuple(BasicDecimal128{INT64_MAX / 100}, + DecimalScalar128{INT64_MAX, 38, 2}, false), + + // large scales + std::make_tuple(BasicDecimal128{0, 0}, DecimalScalar128{12345, 38, 16}, false), + std::make_tuple( + BasicDecimal128{123}, + DecimalScalar128{BasicDecimal128{12389}.IncreaseScaleBy(14), 38, 16}, false), + std::make_tuple( + BasicDecimal128{-124}, + DecimalScalar128{BasicDecimal128{-12389}.IncreaseScaleBy(14), 38, 16}, false), + std::make_tuple( + BasicDecimal128{123}, + DecimalScalar128{BasicDecimal128{12389}.IncreaseScaleBy(30), 38, 32}, false), + std::make_tuple( + BasicDecimal128{-124}, + DecimalScalar128{BasicDecimal128{-12389}.IncreaseScaleBy(30), 38, 32}, false), + }; + + for (auto iter : test_values) { + auto expected = std::get<0>(iter); + auto input = std::get<1>(iter); + auto expected_overflow = std::get<2>(iter); + bool overflow = false; + + EXPECT_EQ(expected, decimalops::Floor(input, &overflow)) + << " failed on input " << input; + if (expected_overflow) { + ASSERT_TRUE(overflow) << "overflow expected for input " << input; + } else { + ASSERT_FALSE(overflow) << "overflow not expected for input " << input; + } + } +} + +TEST_F(TestDecimalSql, Convert) { + // expected, input, overflow + std::vector<std::tuple<DecimalScalar128, DecimalScalar128, bool>> test_values = { + // simple cases + std::make_tuple(DecimalScalar128{12, 38, 1}, DecimalScalar128{123, 38, 2}, false), + std::make_tuple(DecimalScalar128{1230, 38, 3}, DecimalScalar128{123, 38, 2}, false), + std::make_tuple(DecimalScalar128{123, 38, 2}, DecimalScalar128{123, 38, 2}, false), + + std::make_tuple(DecimalScalar128{-12, 38, 1}, DecimalScalar128{-123, 38, 2}, false), + std::make_tuple(DecimalScalar128{-1230, 38, 3}, DecimalScalar128{-123, 38, 2}, + false), + std::make_tuple(DecimalScalar128{-123, 38, 2}, DecimalScalar128{-123, 38, 2}, + false), + + // border cases + std::make_tuple( + DecimalScalar128{BasicDecimal128(INT64_MIN).ReduceScaleBy(1), 38, 1}, + DecimalScalar128{INT64_MIN, 38, 2}, false), + std::make_tuple( + DecimalScalar128{BasicDecimal128(INT64_MIN).IncreaseScaleBy(1), 38, 3}, + DecimalScalar128{INT64_MIN, 38, 2}, false), + std::make_tuple(DecimalScalar128{-3, 38, 1}, DecimalScalar128{-32, 38, 2}, false), + std::make_tuple(DecimalScalar128{0, 0, 38, 1}, DecimalScalar128{0, 0, 38, 2}, + false), + std::make_tuple(DecimalScalar128{3, 38, 1}, DecimalScalar128{32, 38, 2}, false), + std::make_tuple( + DecimalScalar128{BasicDecimal128(INT64_MAX).ReduceScaleBy(1), 38, 1}, + DecimalScalar128{INT64_MAX, 38, 2}, false), + std::make_tuple( + DecimalScalar128{BasicDecimal128(INT64_MAX).IncreaseScaleBy(1), 38, 3}, + DecimalScalar128{INT64_MAX, 38, 2}, false), + + // large scales + std::make_tuple(DecimalScalar128{BasicDecimal128(123).IncreaseScaleBy(16), 38, 18}, + DecimalScalar128{123, 38, 2}, false), + std::make_tuple(DecimalScalar128{BasicDecimal128(-123).IncreaseScaleBy(16), 38, 18}, + DecimalScalar128{-123, 38, 2}, false), + std::make_tuple(DecimalScalar128{BasicDecimal128(123).IncreaseScaleBy(30), 38, 32}, + DecimalScalar128{123, 38, 2}, false), + std::make_tuple(DecimalScalar128{BasicDecimal128(-123).IncreaseScaleBy(30), 38, 32}, + DecimalScalar128{-123, 38, 2}, false), + + // overflow due to scaling up. + std::make_tuple(DecimalScalar128{0, 0, 38, 36}, DecimalScalar128{12345, 38, 2}, + true), + std::make_tuple(DecimalScalar128{0, 0, 38, 36}, DecimalScalar128{-12345, 38, 2}, + true), + + // overflow due to precision. + std::make_tuple(DecimalScalar128{0, 0, 5, 3}, DecimalScalar128{12345, 5, 2}, true), + }; + + for (auto iter : test_values) { + auto expected = std::get<0>(iter); + auto input = std::get<1>(iter); + auto expected_overflow = std::get<2>(iter); + bool overflow = false; + + EXPECT_EQ(expected.value(), decimalops::Convert(input, expected.precision(), + expected.scale(), &overflow)) + << " failed on input " << input; + + if (expected_overflow) { + ASSERT_TRUE(overflow) << "overflow expected for input " << input; + } else { + ASSERT_FALSE(overflow) << "overflow not expected for input " << input; + } + } +} + +// double can store up to this integer value without losing precision +static const int64_t kMaxDoubleInt = 1ull << 53; + +TEST_F(TestDecimalSql, FromDouble) { + // expected, input, overflow + std::vector<std::tuple<DecimalScalar128, double, bool>> test_values = { + // simple cases + std::make_tuple(DecimalScalar128{-16285, 38, 3}, -16.285, false), + std::make_tuple(DecimalScalar128{-162850, 38, 4}, -16.285, false), + std::make_tuple(DecimalScalar128{-1629, 38, 2}, -16.285, false), + + std::make_tuple(DecimalScalar128{16285, 38, 3}, 16.285, false), + std::make_tuple(DecimalScalar128{162850, 38, 4}, 16.285, false), + std::make_tuple(DecimalScalar128{1629, 38, 2}, 16.285, false), + + // round up + std::make_tuple(DecimalScalar128{1, 18, 0}, 1.15470053838, false), + std::make_tuple(DecimalScalar128{-1, 18, 0}, -1.15470053838, false), + std::make_tuple(DecimalScalar128{2, 18, 0}, 1.55470053838, false), + std::make_tuple(DecimalScalar128{-2, 18, 0}, -1.55470053838, false), + + // border cases + std::make_tuple(DecimalScalar128{-kMaxDoubleInt, 38, 0}, + static_cast<double>(-kMaxDoubleInt), false), + std::make_tuple(DecimalScalar128{-32, 38, 0}, -32, false), + std::make_tuple(DecimalScalar128{0, 0, 38, 0}, 0, false), + std::make_tuple(DecimalScalar128{32, 38, 0}, 32, false), + std::make_tuple(DecimalScalar128{kMaxDoubleInt, 38, 0}, + static_cast<double>(kMaxDoubleInt), false), + + // large scales + std::make_tuple(DecimalScalar128{123, 38, 16}, 1.23E-14, false), + std::make_tuple(DecimalScalar128{123, 38, 32}, 1.23E-30, false), + std::make_tuple(DecimalScalar128{1230, 38, 33}, 1.23E-30, false), + std::make_tuple(DecimalScalar128{123, 38, 38}, 1.23E-36, false), + + // very small doubles + std::make_tuple(DecimalScalar128{0, 0, 38, 0}, std::numeric_limits<double>::min(), + false), + std::make_tuple(DecimalScalar128{0, 0, 38, 0}, -std::numeric_limits<double>::min(), + false), + + // overflow due to large -ve double + std::make_tuple(DecimalScalar128{0, 0, 38, 0}, -std::numeric_limits<double>::max(), + true), + // overflow due to large +ve double + std::make_tuple(DecimalScalar128{0, 0, 38, 0}, std::numeric_limits<double>::max(), + true), + // overflow due to scaling up. + std::make_tuple(DecimalScalar128{0, 0, 38, 36}, 123.45, true), + // overflow due to precision. + std::make_tuple(DecimalScalar128{0, 0, 4, 2}, 12345.67, true), + }; + + for (auto iter : test_values) { + auto dscalar = std::get<0>(iter); + auto input = std::get<1>(iter); + auto expected_overflow = std::get<2>(iter); + bool overflow = false; + + EXPECT_EQ(dscalar.value(), decimalops::FromDouble(input, dscalar.precision(), + dscalar.scale(), &overflow)) + << " failed on input " << input; + + if (expected_overflow) { + ASSERT_TRUE(overflow) << "overflow expected for input " << input; + } else { + ASSERT_FALSE(overflow) << "overflow not expected for input " << input; + } + } +} + +#define EXPECT_FUZZY_EQ(x, y) \ + EXPECT_TRUE(x - y <= 0.00001) << "expected " << x << ", got " << y + +TEST_F(TestDecimalSql, ToDouble) { + // expected, input, overflow + std::vector<std::tuple<double, DecimalScalar128>> test_values = { + // simple ones + std::make_tuple(-16.285, DecimalScalar128{-16285, 38, 3}), + std::make_tuple(-162.85, DecimalScalar128{-16285, 38, 2}), + std::make_tuple(-1.6285, DecimalScalar128{-16285, 38, 4}), + + // large scales + std::make_tuple(1.23E-14, DecimalScalar128{123, 38, 16}), + std::make_tuple(1.23E-30, DecimalScalar128{123, 38, 32}), + std::make_tuple(1.23E-36, DecimalScalar128{123, 38, 38}), + + // border cases + std::make_tuple(static_cast<double>(-kMaxDoubleInt), + DecimalScalar128{-kMaxDoubleInt, 38, 0}), + std::make_tuple(-32, DecimalScalar128{-32, 38, 0}), + std::make_tuple(0, DecimalScalar128{0, 0, 38, 0}), + std::make_tuple(32, DecimalScalar128{32, 38, 0}), + std::make_tuple(static_cast<double>(kMaxDoubleInt), + DecimalScalar128{kMaxDoubleInt, 38, 0}), + }; + for (auto iter : test_values) { + auto input = std::get<1>(iter); + bool overflow = false; + + EXPECT_FUZZY_EQ(std::get<0>(iter), decimalops::ToDouble(input, &overflow)); + ASSERT_FALSE(overflow) << "overflow not expected for input " << input; + } +} + +TEST_F(TestDecimalSql, FromInt64) { + // expected, input, overflow + std::vector<std::tuple<DecimalScalar128, int64_t, bool>> test_values = { + // simple cases + std::make_tuple(DecimalScalar128{-16000, 38, 3}, -16, false), + std::make_tuple(DecimalScalar128{-160000, 38, 4}, -16, false), + std::make_tuple(DecimalScalar128{-1600, 38, 2}, -16, false), + + std::make_tuple(DecimalScalar128{16000, 38, 3}, 16, false), + std::make_tuple(DecimalScalar128{160000, 38, 4}, 16, false), + std::make_tuple(DecimalScalar128{1600, 38, 2}, 16, false), + + // border cases + std::make_tuple(DecimalScalar128{INT64_MIN, 38, 0}, INT64_MIN, false), + std::make_tuple(DecimalScalar128{-32, 38, 0}, -32, false), + std::make_tuple(DecimalScalar128{0, 0, 38, 0}, 0, false), + std::make_tuple(DecimalScalar128{32, 38, 0}, 32, false), + std::make_tuple(DecimalScalar128{INT64_MAX, 38, 0}, INT64_MAX, false), + + // large scales + std::make_tuple(DecimalScalar128{BasicDecimal128(123).IncreaseScaleBy(16), 38, 16}, + 123, false), + std::make_tuple(DecimalScalar128{BasicDecimal128(123).IncreaseScaleBy(32), 38, 32}, + 123, false), + std::make_tuple(DecimalScalar128{BasicDecimal128(-123).IncreaseScaleBy(16), 38, 16}, + -123, false), + std::make_tuple(DecimalScalar128{BasicDecimal128(-123).IncreaseScaleBy(32), 38, 32}, + -123, false), + + // overflow due to scaling up. + std::make_tuple(DecimalScalar128{0, 0, 38, 36}, 123, true), + // overflow due to precision. + std::make_tuple(DecimalScalar128{0, 0, 4, 2}, 12345, true), + }; + + for (auto iter : test_values) { + auto dscalar = std::get<0>(iter); + auto input = std::get<1>(iter); + auto expected_overflow = std::get<2>(iter); + bool overflow = false; + + EXPECT_EQ(dscalar.value(), decimalops::FromInt64(input, dscalar.precision(), + dscalar.scale(), &overflow)) + << " failed on input " << input; + + if (expected_overflow) { + ASSERT_TRUE(overflow) << "overflow expected for input " << input; + } else { + ASSERT_FALSE(overflow) << "overflow not expected for input " << input; + } + } +} + +TEST_F(TestDecimalSql, ToInt64) { + // expected, input, overflow + std::vector<std::tuple<int64_t, DecimalScalar128, bool>> test_values = { + // simple ones + std::make_tuple(-16, DecimalScalar128{-16285, 38, 3}, false), + std::make_tuple(-163, DecimalScalar128{-16285, 38, 2}, false), + std::make_tuple(-2, DecimalScalar128{-16285, 38, 4}, false), + + // border cases + std::make_tuple(INT64_MIN, DecimalScalar128{INT64_MIN, 38, 0}, false), + std::make_tuple(-32, DecimalScalar128{-32, 38, 0}, false), + std::make_tuple(0, DecimalScalar128{0, 0, 38, 0}, false), + std::make_tuple(32, DecimalScalar128{32, 38, 0}, false), + std::make_tuple(INT64_MAX, DecimalScalar128{INT64_MAX, 38, 0}, false), + + // large scales + std::make_tuple(0, DecimalScalar128{123, 38, 16}, false), + std::make_tuple(0, DecimalScalar128{123, 38, 32}, false), + std::make_tuple(0, DecimalScalar128{123, 38, 38}, false), + + // overflow test cases + // very large + std::make_tuple(0, DecimalScalar128{32768, 16, 38, 2}, true), + std::make_tuple(0, DecimalScalar128{INT64_MAX, UINT64_MAX, 38, 10}, true), + // very small + std::make_tuple(0, -DecimalScalar128{32768, 16, 38, 2}, true), + std::make_tuple(0, -DecimalScalar128{INT64_MAX, UINT64_MAX, 38, 10}, true), + }; + + for (auto iter : test_values) { + auto expected_value = std::get<0>(iter); + auto input = std::get<1>(iter); + auto expected_overflow = std::get<2>(iter); + bool overflow = false; + + EXPECT_EQ(expected_value, decimalops::ToInt64(input, &overflow)) + << " failed on input " << input; + if (expected_overflow) { + ASSERT_TRUE(overflow) << "overflow expected for input " << input; + } else { + ASSERT_FALSE(overflow) << "overflow not expected for input " << input; + } + } +} + +} // namespace gandiva diff --git a/src/arrow/cpp/src/gandiva/precompiled/decimal_wrapper.cc b/src/arrow/cpp/src/gandiva/precompiled/decimal_wrapper.cc new file mode 100644 index 000000000..082d5832d --- /dev/null +++ b/src/arrow/cpp/src/gandiva/precompiled/decimal_wrapper.cc @@ -0,0 +1,433 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/precompiled/decimal_ops.h" +#include "gandiva/precompiled/types.h" + +extern "C" { + +FORCE_INLINE +void add_large_decimal128_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision, + int32_t x_scale, int64_t y_high, uint64_t y_low, + int32_t y_precision, int32_t y_scale, + int32_t out_precision, int32_t out_scale, + int64_t* out_high, uint64_t* out_low) { + gandiva::BasicDecimalScalar128 x(x_high, x_low, x_precision, x_scale); + gandiva::BasicDecimalScalar128 y(y_high, y_low, y_precision, y_scale); + + arrow::BasicDecimal128 out = gandiva::decimalops::Add(x, y, out_precision, out_scale); + *out_high = out.high_bits(); + *out_low = out.low_bits(); +} + +FORCE_INLINE +void multiply_decimal128_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision, + int32_t x_scale, int64_t y_high, uint64_t y_low, + int32_t y_precision, int32_t y_scale, + int32_t out_precision, int32_t out_scale, + int64_t* out_high, uint64_t* out_low) { + gandiva::BasicDecimalScalar128 x(x_high, x_low, x_precision, x_scale); + gandiva::BasicDecimalScalar128 y(y_high, y_low, y_precision, y_scale); + bool overflow; + + // TODO ravindra: generate error on overflows (ARROW-4570). + arrow::BasicDecimal128 out = + gandiva::decimalops::Multiply(x, y, out_precision, out_scale, &overflow); + *out_high = out.high_bits(); + *out_low = out.low_bits(); +} + +FORCE_INLINE +void divide_decimal128_decimal128(int64_t context, int64_t x_high, uint64_t x_low, + int32_t x_precision, int32_t x_scale, int64_t y_high, + uint64_t y_low, int32_t y_precision, int32_t y_scale, + int32_t out_precision, int32_t out_scale, + int64_t* out_high, uint64_t* out_low) { + gandiva::BasicDecimalScalar128 x(x_high, x_low, x_precision, x_scale); + gandiva::BasicDecimalScalar128 y(y_high, y_low, y_precision, y_scale); + bool overflow; + + // TODO ravindra: generate error on overflows (ARROW-4570). + arrow::BasicDecimal128 out = + gandiva::decimalops::Divide(context, x, y, out_precision, out_scale, &overflow); + *out_high = out.high_bits(); + *out_low = out.low_bits(); +} + +FORCE_INLINE +void mod_decimal128_decimal128(int64_t context, int64_t x_high, uint64_t x_low, + int32_t x_precision, int32_t x_scale, int64_t y_high, + uint64_t y_low, int32_t y_precision, int32_t y_scale, + int32_t out_precision, int32_t out_scale, + int64_t* out_high, uint64_t* out_low) { + gandiva::BasicDecimalScalar128 x(x_high, x_low, x_precision, x_scale); + gandiva::BasicDecimalScalar128 y(y_high, y_low, y_precision, y_scale); + bool overflow; + + // TODO ravindra: generate error on overflows (ARROW-4570). + arrow::BasicDecimal128 out = + gandiva::decimalops::Mod(context, x, y, out_precision, out_scale, &overflow); + *out_high = out.high_bits(); + *out_low = out.low_bits(); +} + +FORCE_INLINE +int32_t compare_decimal128_decimal128_internal(int64_t x_high, uint64_t x_low, + int32_t x_precision, int32_t x_scale, + int64_t y_high, uint64_t y_low, + int32_t y_precision, int32_t y_scale) { + gandiva::BasicDecimalScalar128 x(x_high, x_low, x_precision, x_scale); + gandiva::BasicDecimalScalar128 y(y_high, y_low, y_precision, y_scale); + + return gandiva::decimalops::Compare(x, y); +} + +FORCE_INLINE +void abs_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision, int32_t x_scale, + int32_t out_precision, int32_t out_scale, int64_t* out_high, + uint64_t* out_low) { + gandiva::BasicDecimal128 x(x_high, x_low); + x.Abs(); + *out_high = x.high_bits(); + *out_low = x.low_bits(); +} + +FORCE_INLINE +void ceil_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision, int32_t x_scale, + int32_t out_precision, int32_t out_scale, int64_t* out_high, + uint64_t* out_low) { + gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale); + + bool overflow = false; + auto out = gandiva::decimalops::Ceil(x, &overflow); + *out_high = out.high_bits(); + *out_low = out.low_bits(); +} + +FORCE_INLINE +void floor_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision, + int32_t x_scale, int32_t out_precision, int32_t out_scale, + int64_t* out_high, uint64_t* out_low) { + gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale); + + bool overflow = false; + auto out = gandiva::decimalops::Floor(x, &overflow); + *out_high = out.high_bits(); + *out_low = out.low_bits(); +} + +FORCE_INLINE +void round_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision, + int32_t x_scale, int32_t out_precision, int32_t out_scale, + int64_t* out_high, uint64_t* out_low) { + gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale); + + bool overflow = false; + auto out = gandiva::decimalops::Round(x, out_precision, 0, 0, &overflow); + *out_high = out.high_bits(); + *out_low = out.low_bits(); +} + +FORCE_INLINE +void round_decimal128_int32(int64_t x_high, uint64_t x_low, int32_t x_precision, + int32_t x_scale, int32_t rounding_scale, + int32_t out_precision, int32_t out_scale, int64_t* out_high, + uint64_t* out_low) { + gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale); + + bool overflow = false; + auto out = + gandiva::decimalops::Round(x, out_precision, out_scale, rounding_scale, &overflow); + *out_high = out.high_bits(); + *out_low = out.low_bits(); +} + +FORCE_INLINE +void truncate_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision, + int32_t x_scale, int32_t out_precision, int32_t out_scale, + int64_t* out_high, uint64_t* out_low) { + gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale); + + bool overflow = false; + auto out = gandiva::decimalops::Truncate(x, out_precision, 0, 0, &overflow); + *out_high = out.high_bits(); + *out_low = out.low_bits(); +} + +FORCE_INLINE +void truncate_decimal128_int32(int64_t x_high, uint64_t x_low, int32_t x_precision, + int32_t x_scale, int32_t rounding_scale, + int32_t out_precision, int32_t out_scale, + int64_t* out_high, uint64_t* out_low) { + gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale); + + bool overflow = false; + auto out = gandiva::decimalops::Truncate(x, out_precision, out_scale, rounding_scale, + &overflow); + *out_high = out.high_bits(); + *out_low = out.low_bits(); +} + +FORCE_INLINE +double castFLOAT8_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision, + int32_t x_scale) { + gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale); + + bool overflow = false; + return gandiva::decimalops::ToDouble(x, &overflow); +} + +FORCE_INLINE +int64_t castBIGINT_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision, + int32_t x_scale) { + gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale); + + bool overflow = false; + return gandiva::decimalops::ToInt64(x, &overflow); +} + +FORCE_INLINE +void castDECIMAL_int64(int64_t in, int32_t x_precision, int32_t x_scale, + int64_t* out_high, uint64_t* out_low) { + bool overflow = false; + auto out = gandiva::decimalops::FromInt64(in, x_precision, x_scale, &overflow); + *out_high = out.high_bits(); + *out_low = out.low_bits(); +} + +FORCE_INLINE +void castDECIMAL_int32(int32_t in, int32_t x_precision, int32_t x_scale, + int64_t* out_high, uint64_t* out_low) { + castDECIMAL_int64(in, x_precision, x_scale, out_high, out_low); +} + +FORCE_INLINE +void castDECIMAL_float64(double in, int32_t x_precision, int32_t x_scale, + int64_t* out_high, uint64_t* out_low) { + bool overflow = false; + auto out = gandiva::decimalops::FromDouble(in, x_precision, x_scale, &overflow); + *out_high = out.high_bits(); + *out_low = out.low_bits(); +} + +FORCE_INLINE +void castDECIMAL_float32(float in, int32_t x_precision, int32_t x_scale, + int64_t* out_high, uint64_t* out_low) { + castDECIMAL_float64(in, x_precision, x_scale, out_high, out_low); +} + +FORCE_INLINE +bool castDecimal_internal(int64_t x_high, uint64_t x_low, int32_t x_precision, + int32_t x_scale, int32_t out_precision, int32_t out_scale, + int64_t* out_high, int64_t* out_low) { + gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale); + bool overflow = false; + auto out = gandiva::decimalops::Convert(x, out_precision, out_scale, &overflow); + *out_high = out.high_bits(); + *out_low = out.low_bits(); + return overflow; +} + +FORCE_INLINE +void castDECIMAL_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision, + int32_t x_scale, int32_t out_precision, int32_t out_scale, + int64_t* out_high, int64_t* out_low) { + castDecimal_internal(x_high, x_low, x_precision, x_scale, out_precision, out_scale, + out_high, out_low); +} + +FORCE_INLINE +void castDECIMALNullOnOverflow_decimal128(int64_t x_high, uint64_t x_low, + int32_t x_precision, int32_t x_scale, + bool x_isvalid, bool* out_valid, + int32_t out_precision, int32_t out_scale, + int64_t* out_high, int64_t* out_low) { + *out_valid = true; + + if (!x_isvalid) { + *out_valid = false; + return; + } + + if (castDecimal_internal(x_high, x_low, x_precision, x_scale, out_precision, out_scale, + out_high, out_low)) { + *out_valid = false; + } +} + +FORCE_INLINE +int32_t hash32_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision, + int32_t x_scale, gdv_boolean x_isvalid) { + return x_isvalid + ? hash32_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, 0) + : 0; +} + +FORCE_INLINE +int32_t hash_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision, + int32_t x_scale, gdv_boolean x_isvalid) { + return hash32_decimal128(x_high, x_low, x_precision, x_scale, x_isvalid); +} + +FORCE_INLINE +int64_t hash64_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision, + int32_t x_scale, gdv_boolean x_isvalid) { + return x_isvalid + ? hash64_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, 0) + : 0; +} + +FORCE_INLINE +int32_t hash32WithSeed_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision, + int32_t x_scale, gdv_boolean x_isvalid, int32_t seed, + gdv_boolean seed_isvalid) { + if (!x_isvalid) { + return seed; + } + return hash32_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, seed); +} + +FORCE_INLINE +int64_t hash64WithSeed_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision, + int32_t x_scale, gdv_boolean x_isvalid, int64_t seed, + gdv_boolean seed_isvalid) { + if (!x_isvalid) { + return seed; + } + return hash64_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, seed); +} + +FORCE_INLINE +int32_t hash32AsDouble_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision, + int32_t x_scale, gdv_boolean x_isvalid) { + return x_isvalid + ? hash32_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, 0) + : 0; +} + +FORCE_INLINE +int64_t hash64AsDouble_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision, + int32_t x_scale, gdv_boolean x_isvalid) { + return x_isvalid + ? hash64_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, 0) + : 0; +} + +FORCE_INLINE +int32_t hash32AsDoubleWithSeed_decimal128(int64_t x_high, uint64_t x_low, + int32_t x_precision, int32_t x_scale, + gdv_boolean x_isvalid, int32_t seed, + gdv_boolean seed_isvalid) { + if (!x_isvalid) { + return seed; + } + return hash32_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, seed); +} + +FORCE_INLINE +int64_t hash64AsDoubleWithSeed_decimal128(int64_t x_high, uint64_t x_low, + int32_t x_precision, int32_t x_scale, + gdv_boolean x_isvalid, int64_t seed, + gdv_boolean seed_isvalid) { + if (!x_isvalid) { + return seed; + } + return hash64_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, seed); +} + +FORCE_INLINE +gdv_boolean isnull_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision, + int32_t x_scale, gdv_boolean x_isvalid) { + return !x_isvalid; +} + +FORCE_INLINE +gdv_boolean isnotnull_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision, + int32_t x_scale, gdv_boolean x_isvalid) { + return x_isvalid; +} + +FORCE_INLINE +gdv_boolean isnumeric_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision, + int32_t x_scale, gdv_boolean x_isvalid) { + return x_isvalid; +} + +FORCE_INLINE +gdv_boolean is_not_distinct_from_decimal128_decimal128( + int64_t x_high, uint64_t x_low, int32_t x_precision, int32_t x_scale, + gdv_boolean x_isvalid, int64_t y_high, uint64_t y_low, int32_t y_precision, + int32_t y_scale, gdv_boolean y_isvalid) { + if (x_isvalid != y_isvalid) { + return false; + } + if (!x_isvalid) { + return true; + } + return 0 == compare_decimal128_decimal128_internal(x_high, x_low, x_precision, x_scale, + y_high, y_low, y_precision, y_scale); +} + +FORCE_INLINE +gdv_boolean is_distinct_from_decimal128_decimal128(int64_t x_high, uint64_t x_low, + int32_t x_precision, int32_t x_scale, + gdv_boolean x_isvalid, int64_t y_high, + uint64_t y_low, int32_t y_precision, + int32_t y_scale, + gdv_boolean y_isvalid) { + return !is_not_distinct_from_decimal128_decimal128(x_high, x_low, x_precision, x_scale, + x_isvalid, y_high, y_low, + y_precision, y_scale, y_isvalid); +} + +FORCE_INLINE +void castDECIMAL_utf8(int64_t context, const char* in, int32_t in_length, + int32_t out_precision, int32_t out_scale, int64_t* out_high, + uint64_t* out_low) { + int64_t dec_high_from_str; + uint64_t dec_low_from_str; + int32_t precision_from_str; + int32_t scale_from_str; + int32_t status = + gdv_fn_dec_from_string(context, in, in_length, &precision_from_str, &scale_from_str, + &dec_high_from_str, &dec_low_from_str); + if (status != 0) { + return; + } + + gandiva::BasicDecimalScalar128 x({dec_high_from_str, dec_low_from_str}, + precision_from_str, scale_from_str); + bool overflow = false; + auto out = gandiva::decimalops::Convert(x, out_precision, out_scale, &overflow); + *out_high = out.high_bits(); + *out_low = out.low_bits(); +} + +FORCE_INLINE +char* castVARCHAR_decimal128_int64(int64_t context, int64_t x_high, uint64_t x_low, + int32_t x_precision, int32_t x_scale, + int64_t out_len_param, int32_t* out_length) { + int32_t full_dec_str_len; + char* dec_str = + gdv_fn_dec_to_string(context, x_high, x_low, x_scale, &full_dec_str_len); + int32_t trunc_dec_str_len = + out_len_param < full_dec_str_len ? out_len_param : full_dec_str_len; + *out_length = trunc_dec_str_len; + return dec_str; +} + +} // extern "C" diff --git a/src/arrow/cpp/src/gandiva/precompiled/epoch_time_point.h b/src/arrow/cpp/src/gandiva/precompiled/epoch_time_point.h new file mode 100644 index 000000000..45cfb28ca --- /dev/null +++ b/src/arrow/cpp/src/gandiva/precompiled/epoch_time_point.h @@ -0,0 +1,118 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +// TODO(wesm): IR compilation does not have any include directories set +#include "../../arrow/vendored/datetime/date.h" + +bool is_leap_year(int yy); +bool did_days_overflow(arrow_vendored::date::year_month_day ymd); +int last_possible_day_in_month(int month, int year); + +// A point of time measured in millis since epoch. +class EpochTimePoint { + public: + explicit EpochTimePoint(std::chrono::milliseconds millis_since_epoch) + : tp_(millis_since_epoch) {} + + explicit EpochTimePoint(int64_t millis_since_epoch) + : EpochTimePoint(std::chrono::milliseconds(millis_since_epoch)) {} + + int TmYear() const { return static_cast<int>(YearMonthDay().year()) - 1900; } + + int TmMon() const { return static_cast<unsigned int>(YearMonthDay().month()) - 1; } + + int TmYday() const { + auto to_days = arrow_vendored::date::floor<arrow_vendored::date::days>(tp_); + auto first_day_in_year = arrow_vendored::date::sys_days{ + YearMonthDay().year() / arrow_vendored::date::jan / 1}; + return (to_days - first_day_in_year).count(); + } + + int TmMday() const { return static_cast<unsigned int>(YearMonthDay().day()); } + + int TmWday() const { + auto to_days = arrow_vendored::date::floor<arrow_vendored::date::days>(tp_); + return (arrow_vendored::date::weekday{to_days} - // NOLINT + arrow_vendored::date::Sunday) + .count(); + } + + int TmHour() const { return static_cast<int>(TimeOfDay().hours().count()); } + + int TmMin() const { return static_cast<int>(TimeOfDay().minutes().count()); } + + int TmSec() const { + // TODO(wesm): UNIX y2k issue on int=gdv_int32 platforms + return static_cast<int>(TimeOfDay().seconds().count()); + } + + EpochTimePoint AddYears(int num_years) const { + auto ymd = YearMonthDay() + arrow_vendored::date::years(num_years); + return EpochTimePoint((arrow_vendored::date::sys_days{ymd} + // NOLINT + TimeOfDay().to_duration()) + .time_since_epoch()); + } + + EpochTimePoint AddMonths(int num_months) const { + auto ymd = YearMonthDay() + arrow_vendored::date::months(num_months); + + EpochTimePoint tp = EpochTimePoint((arrow_vendored::date::sys_days{ymd} + // NOLINT + TimeOfDay().to_duration()) + .time_since_epoch()); + + if (did_days_overflow(ymd)) { + int days_to_offset = + last_possible_day_in_month(static_cast<int>(ymd.year()), + static_cast<unsigned int>(ymd.month())) - + static_cast<unsigned int>(ymd.day()); + tp = tp.AddDays(days_to_offset); + } + return tp; + } + + EpochTimePoint AddDays(int num_days) const { + auto days_since_epoch = arrow_vendored::date::sys_days{YearMonthDay()} + // NOLINT + arrow_vendored::date::days(num_days); + return EpochTimePoint( + (days_since_epoch + TimeOfDay().to_duration()).time_since_epoch()); + } + + EpochTimePoint ClearTimeOfDay() const { + return EpochTimePoint((tp_ - TimeOfDay().to_duration()).time_since_epoch()); + } + + bool operator==(const EpochTimePoint& other) const { return tp_ == other.tp_; } + + int64_t MillisSinceEpoch() const { return tp_.time_since_epoch().count(); } + + arrow_vendored::date::time_of_day<std::chrono::milliseconds> TimeOfDay() const { + auto millis_since_midnight = + tp_ - arrow_vendored::date::floor<arrow_vendored::date::days>(tp_); + return arrow_vendored::date::time_of_day<std::chrono::milliseconds>( + millis_since_midnight); + } + + private: + arrow_vendored::date::year_month_day YearMonthDay() const { + return arrow_vendored::date::year_month_day{ + arrow_vendored::date::floor<arrow_vendored::date::days>(tp_)}; // NOLINT + } + + std::chrono::time_point<std::chrono::system_clock, std::chrono::milliseconds> tp_; +}; diff --git a/src/arrow/cpp/src/gandiva/precompiled/epoch_time_point_test.cc b/src/arrow/cpp/src/gandiva/precompiled/epoch_time_point_test.cc new file mode 100644 index 000000000..9180aac07 --- /dev/null +++ b/src/arrow/cpp/src/gandiva/precompiled/epoch_time_point_test.cc @@ -0,0 +1,103 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <ctime> + +#include <gtest/gtest.h> +#include "./epoch_time_point.h" +#include "gandiva/precompiled/testing.h" +#include "gandiva/precompiled/types.h" + +#include "gandiva/date_utils.h" + +namespace gandiva { + +TEST(TestEpochTimePoint, TestTm) { + auto ts = StringToTimestamp("2015-05-07 10:20:34"); + EpochTimePoint tp(ts); + + struct tm* tm_ptr; +#if defined(_WIN32) + __time64_t tsec = ts / 1000; + tm_ptr = _gmtime64(&tsec); +#else + struct tm tm; + time_t tsec = ts / 1000; + tm_ptr = gmtime_r(&tsec, &tm); +#endif + + EXPECT_EQ(tp.TmYear(), tm_ptr->tm_year); + EXPECT_EQ(tp.TmMon(), tm_ptr->tm_mon); + EXPECT_EQ(tp.TmYday(), tm_ptr->tm_yday); + EXPECT_EQ(tp.TmMday(), tm_ptr->tm_mday); + EXPECT_EQ(tp.TmWday(), tm_ptr->tm_wday); + EXPECT_EQ(tp.TmHour(), tm_ptr->tm_hour); + EXPECT_EQ(tp.TmMin(), tm_ptr->tm_min); + EXPECT_EQ(tp.TmSec(), tm_ptr->tm_sec); +} + +TEST(TestEpochTimePoint, TestAddYears) { + EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).AddYears(2), + EpochTimePoint(StringToTimestamp("2017-05-05 10:20:34"))); + + EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).AddYears(0), + EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34"))); + + EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).AddYears(-1), + EpochTimePoint(StringToTimestamp("2014-05-05 10:20:34"))); +} + +TEST(TestEpochTimePoint, TestAddMonths) { + EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).AddMonths(2), + EpochTimePoint(StringToTimestamp("2015-07-05 10:20:34"))); + + EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).AddMonths(11), + EpochTimePoint(StringToTimestamp("2016-04-05 10:20:34"))); + + EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).AddMonths(0), + EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34"))); + + EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).AddMonths(-1), + EpochTimePoint(StringToTimestamp("2015-04-05 10:20:34"))); + + EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).AddMonths(-10), + EpochTimePoint(StringToTimestamp("2014-07-05 10:20:34"))); +} + +TEST(TestEpochTimePoint, TestAddDays) { + EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).AddDays(2), + EpochTimePoint(StringToTimestamp("2015-05-07 10:20:34"))); + + EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).AddDays(11), + EpochTimePoint(StringToTimestamp("2015-05-16 10:20:34"))); + + EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).AddDays(0), + EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34"))); + + EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).AddDays(-1), + EpochTimePoint(StringToTimestamp("2015-05-04 10:20:34"))); + + EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).AddDays(-10), + EpochTimePoint(StringToTimestamp("2015-04-25 10:20:34"))); +} + +TEST(TestEpochTimePoint, TestClearTimeOfDay) { + EXPECT_EQ(EpochTimePoint(StringToTimestamp("2015-05-05 10:20:34")).ClearTimeOfDay(), + EpochTimePoint(StringToTimestamp("2015-05-05 00:00:00"))); +} + +} // namespace gandiva diff --git a/src/arrow/cpp/src/gandiva/precompiled/extended_math_ops.cc b/src/arrow/cpp/src/gandiva/precompiled/extended_math_ops.cc new file mode 100644 index 000000000..365b08a6d --- /dev/null +++ b/src/arrow/cpp/src/gandiva/precompiled/extended_math_ops.cc @@ -0,0 +1,410 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +#include "arrow/util/logging.h" +#include "gandiva/precompiled/decimal_ops.h" + +extern "C" { + +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "./types.h" + +// Expand the inner fn for types that support extended math. +#define ENUMERIC_TYPES_UNARY(INNER, OUT_TYPE) \ + INNER(int32, OUT_TYPE) \ + INNER(uint32, OUT_TYPE) \ + INNER(int64, OUT_TYPE) \ + INNER(uint64, OUT_TYPE) \ + INNER(float32, OUT_TYPE) \ + INNER(float64, OUT_TYPE) + +// Cubic root +#define CBRT(IN_TYPE, OUT_TYPE) \ + FORCE_INLINE \ + gdv_##OUT_TYPE cbrt_##IN_TYPE(gdv_##IN_TYPE in) { \ + return static_cast<gdv_float64>(cbrtl(static_cast<long double>(in))); \ + } + +ENUMERIC_TYPES_UNARY(CBRT, float64) + +// Exponent +#define EXP(IN_TYPE, OUT_TYPE) \ + FORCE_INLINE \ + gdv_##OUT_TYPE exp_##IN_TYPE(gdv_##IN_TYPE in) { \ + return static_cast<gdv_float64>(expl(static_cast<long double>(in))); \ + } + +ENUMERIC_TYPES_UNARY(EXP, float64) + +// log +#define LOG(IN_TYPE, OUT_TYPE) \ + FORCE_INLINE \ + gdv_##OUT_TYPE log_##IN_TYPE(gdv_##IN_TYPE in) { \ + return static_cast<gdv_float64>(logl(static_cast<long double>(in))); \ + } + +ENUMERIC_TYPES_UNARY(LOG, float64) + +// log base 10 +#define LOG10(IN_TYPE, OUT_TYPE) \ + FORCE_INLINE \ + gdv_##OUT_TYPE log10_##IN_TYPE(gdv_##IN_TYPE in) { \ + return static_cast<gdv_float64>(log10l(static_cast<long double>(in))); \ + } + +#define LOGL(VALUE) static_cast<gdv_float64>(logl(static_cast<long double>(VALUE))) + +ENUMERIC_TYPES_UNARY(LOG10, float64) + +FORCE_INLINE +void set_error_for_logbase(int64_t execution_context, double base) { + char const* prefix = "divide by zero error with log of base"; + int size = static_cast<int>(strlen(prefix)) + 64; + char* error = reinterpret_cast<char*>(malloc(size)); + snprintf(error, size, "%s %f", prefix, base); + gdv_fn_context_set_error_msg(execution_context, error); + free(static_cast<char*>(error)); +} + +// log with base +#define LOG_WITH_BASE(IN_TYPE1, IN_TYPE2, OUT_TYPE) \ + FORCE_INLINE \ + gdv_##OUT_TYPE log_##IN_TYPE1##_##IN_TYPE2(gdv_int64 context, gdv_##IN_TYPE1 base, \ + gdv_##IN_TYPE2 value) { \ + gdv_##OUT_TYPE log_of_base = LOGL(base); \ + if (log_of_base == 0) { \ + set_error_for_logbase(context, static_cast<gdv_float64>(base)); \ + return 0; \ + } \ + return LOGL(value) / LOGL(base); \ + } + +LOG_WITH_BASE(int32, int32, float64) +LOG_WITH_BASE(uint32, uint32, float64) +LOG_WITH_BASE(int64, int64, float64) +LOG_WITH_BASE(uint64, uint64, float64) +LOG_WITH_BASE(float32, float32, float64) +LOG_WITH_BASE(float64, float64, float64) + +// Sin +#define SIN(IN_TYPE, OUT_TYPE) \ + FORCE_INLINE \ + gdv_##OUT_TYPE sin_##IN_TYPE(gdv_##IN_TYPE in) { \ + return static_cast<gdv_##OUT_TYPE>(sin(static_cast<long double>(in))); \ + } +ENUMERIC_TYPES_UNARY(SIN, float64) + +// Asin +#define ASIN(IN_TYPE, OUT_TYPE) \ + FORCE_INLINE \ + gdv_##OUT_TYPE asin_##IN_TYPE(gdv_##IN_TYPE in) { \ + return static_cast<gdv_##OUT_TYPE>(asin(static_cast<long double>(in))); \ + } +ENUMERIC_TYPES_UNARY(ASIN, float64) + +// Cos +#define COS(IN_TYPE, OUT_TYPE) \ + FORCE_INLINE \ + gdv_##OUT_TYPE cos_##IN_TYPE(gdv_##IN_TYPE in) { \ + return static_cast<gdv_##OUT_TYPE>(cos(static_cast<long double>(in))); \ + } +ENUMERIC_TYPES_UNARY(COS, float64) + +// Acos +#define ACOS(IN_TYPE, OUT_TYPE) \ + FORCE_INLINE \ + gdv_##OUT_TYPE acos_##IN_TYPE(gdv_##IN_TYPE in) { \ + return static_cast<gdv_##OUT_TYPE>(acos(static_cast<long double>(in))); \ + } +ENUMERIC_TYPES_UNARY(ACOS, float64) + +// Tan +#define TAN(IN_TYPE, OUT_TYPE) \ + FORCE_INLINE \ + gdv_##OUT_TYPE tan_##IN_TYPE(gdv_##IN_TYPE in) { \ + return static_cast<gdv_##OUT_TYPE>(tan(static_cast<long double>(in))); \ + } +ENUMERIC_TYPES_UNARY(TAN, float64) + +// Atan +#define ATAN(IN_TYPE, OUT_TYPE) \ + FORCE_INLINE \ + gdv_##OUT_TYPE atan_##IN_TYPE(gdv_##IN_TYPE in) { \ + return static_cast<gdv_##OUT_TYPE>(atan(static_cast<long double>(in))); \ + } +ENUMERIC_TYPES_UNARY(ATAN, float64) + +// Sinh +#define SINH(IN_TYPE, OUT_TYPE) \ + FORCE_INLINE \ + gdv_##OUT_TYPE sinh_##IN_TYPE(gdv_##IN_TYPE in) { \ + return static_cast<gdv_##OUT_TYPE>(sinh(static_cast<long double>(in))); \ + } +ENUMERIC_TYPES_UNARY(SINH, float64) + +// Cosh +#define COSH(IN_TYPE, OUT_TYPE) \ + FORCE_INLINE \ + gdv_##OUT_TYPE cosh_##IN_TYPE(gdv_##IN_TYPE in) { \ + return static_cast<gdv_##OUT_TYPE>(cosh(static_cast<long double>(in))); \ + } +ENUMERIC_TYPES_UNARY(COSH, float64) + +// Tanh +#define TANH(IN_TYPE, OUT_TYPE) \ + FORCE_INLINE \ + gdv_##OUT_TYPE tanh_##IN_TYPE(gdv_##IN_TYPE in) { \ + return static_cast<gdv_##OUT_TYPE>(tanh(static_cast<long double>(in))); \ + } +ENUMERIC_TYPES_UNARY(TANH, float64) + +// Atan2 +#define ATAN2(IN_TYPE, OUT_TYPE) \ + FORCE_INLINE \ + gdv_##OUT_TYPE atan2_##IN_TYPE##_##IN_TYPE(gdv_##IN_TYPE in1, gdv_##IN_TYPE in2) { \ + return static_cast<gdv_##OUT_TYPE>( \ + atan2(static_cast<long double>(in1), static_cast<long double>(in2))); \ + } +ENUMERIC_TYPES_UNARY(ATAN2, float64) + +// Cot +#define COT(IN_TYPE, OUT_TYPE) \ + FORCE_INLINE \ + gdv_##OUT_TYPE cot_##IN_TYPE(gdv_##IN_TYPE in) { \ + return static_cast<gdv_##OUT_TYPE>(tan(M_PI / 2 - static_cast<long double>(in))); \ + } +ENUMERIC_TYPES_UNARY(COT, float64) + +// Radians +#define RADIANS(IN_TYPE, OUT_TYPE) \ + FORCE_INLINE \ + gdv_##OUT_TYPE radians_##IN_TYPE(gdv_##IN_TYPE in) { \ + return static_cast<gdv_##OUT_TYPE>(static_cast<long double>(in) * M_PI / 180.0); \ + } +ENUMERIC_TYPES_UNARY(RADIANS, float64) + +// Degrees +#define DEGREES(IN_TYPE, OUT_TYPE) \ + FORCE_INLINE \ + gdv_##OUT_TYPE degrees_##IN_TYPE(gdv_##IN_TYPE in) { \ + return static_cast<gdv_##OUT_TYPE>(static_cast<long double>(in) * 180.0 / M_PI); \ + } +ENUMERIC_TYPES_UNARY(DEGREES, float64) + +// power +#define POWER(IN_TYPE1, IN_TYPE2, OUT_TYPE) \ + FORCE_INLINE \ + gdv_##OUT_TYPE power_##IN_TYPE1##_##IN_TYPE2(gdv_##IN_TYPE1 in1, gdv_##IN_TYPE2 in2) { \ + return static_cast<gdv_float64>(powl(in1, in2)); \ + } +POWER(float64, float64, float64) + +FORCE_INLINE +gdv_int32 round_int32(gdv_int32 num) { return num; } + +FORCE_INLINE +gdv_int64 round_int64(gdv_int64 num) { return num; } + +// rounds the number to the nearest integer +#define ROUND_DECIMAL(TYPE) \ + FORCE_INLINE \ + gdv_##TYPE round_##TYPE(gdv_##TYPE num) { \ + return static_cast<gdv_##TYPE>(trunc(num + ((num >= 0) ? 0.5 : -0.5))); \ + } + +ROUND_DECIMAL(float32) +ROUND_DECIMAL(float64) + +// rounds the number to the given scale +#define ROUND_DECIMAL_TO_SCALE(TYPE) \ + FORCE_INLINE \ + gdv_##TYPE round_##TYPE##_int32(gdv_##TYPE number, gdv_int32 out_scale) { \ + gdv_float64 scale_multiplier = get_scale_multiplier(out_scale); \ + return static_cast<gdv_##TYPE>( \ + trunc(number * scale_multiplier + ((number >= 0) ? 0.5 : -0.5)) / \ + scale_multiplier); \ + } + +ROUND_DECIMAL_TO_SCALE(float32) +ROUND_DECIMAL_TO_SCALE(float64) + +FORCE_INLINE +gdv_int32 round_int32_int32(gdv_int32 number, gdv_int32 precision) { + // for integers, there is nothing following the decimal point, + // so round() always returns the same number if precision >= 0 + if (precision >= 0) { + return number; + } + gdv_int32 abs_precision = -precision; + // This is to ensure that there is no overflow while calculating 10^precision, 9 is + // the smallest N for which 10^N does not fit into 32 bits, so we can safely return 0 + if (abs_precision > 9) { + return 0; + } + gdv_int32 num_sign = (number > 0) ? 1 : -1; + gdv_int32 abs_number = number * num_sign; + gdv_int32 power_of_10 = static_cast<gdv_int32>(get_power_of_10(abs_precision)); + gdv_int32 remainder = abs_number % power_of_10; + abs_number -= remainder; + // if the fractional part of the quotient >= 0.5, round to next higher integer + if (remainder >= power_of_10 / 2) { + abs_number += power_of_10; + } + return abs_number * num_sign; +} + +FORCE_INLINE +gdv_int64 round_int64_int32(gdv_int64 number, gdv_int32 precision) { + // for long integers, there is nothing following the decimal point, + // so round() always returns the same number if precision >= 0 + if (precision >= 0) { + return number; + } + gdv_int32 abs_precision = -precision; + // This is to ensure that there is no overflow while calculating 10^precision, 19 is + // the smallest N for which 10^N does not fit into 64 bits, so we can safely return 0 + if (abs_precision > 18) { + return 0; + } + gdv_int32 num_sign = (number > 0) ? 1 : -1; + gdv_int64 abs_number = number * num_sign; + gdv_int64 power_of_10 = get_power_of_10(abs_precision); + gdv_int64 remainder = abs_number % power_of_10; + abs_number -= remainder; + // if the fractional part of the quotient >= 0.5, round to next higher integer + if (remainder >= power_of_10 / 2) { + abs_number += power_of_10; + } + return abs_number * num_sign; +} + +FORCE_INLINE +gdv_int64 get_power_of_10(gdv_int32 exp) { + DCHECK_GE(exp, 0); + DCHECK_LE(exp, 18); + static const gdv_int64 power_of_10[] = {1, + 10, + 100, + 1000, + 10000, + 100000, + 1000000, + 10000000, + 100000000, + 1000000000, + 10000000000, + 100000000000, + 1000000000000, + 10000000000000, + 100000000000000, + 1000000000000000, + 10000000000000000, + 100000000000000000, + 1000000000000000000}; + return power_of_10[exp]; +} + +FORCE_INLINE +gdv_int64 truncate_int64_int32(gdv_int64 in, gdv_int32 out_scale) { + bool overflow = false; + arrow::BasicDecimal128 decimal = gandiva::decimalops::FromInt64(in, 38, 0, &overflow); + arrow::BasicDecimal128 decimal_with_outscale = + gandiva::decimalops::Truncate(gandiva::BasicDecimalScalar128(decimal, 38, 0), 38, + out_scale, out_scale, &overflow); + if (out_scale < 0) { + out_scale = 0; + } + return gandiva::decimalops::ToInt64( + gandiva::BasicDecimalScalar128(decimal_with_outscale, 38, out_scale), &overflow); +} + +FORCE_INLINE +gdv_float64 get_scale_multiplier(gdv_int32 scale) { + static const gdv_float64 values[] = {1.0, + 10.0, + 100.0, + 1000.0, + 10000.0, + 100000.0, + 1000000.0, + 10000000.0, + 100000000.0, + 1000000000.0, + 10000000000.0, + 100000000000.0, + 1000000000000.0, + 10000000000000.0, + 100000000000000.0, + 1000000000000000.0, + 10000000000000000.0, + 100000000000000000.0, + 1000000000000000000.0, + 10000000000000000000.0}; + if (scale >= 0 && scale < 20) { + return values[scale]; + } + return power_float64_float64(10.0, scale); +} + +// returns the binary representation of a given integer (e.g. 928 -> 1110100000) +#define BIN_INTEGER(IN_TYPE) \ + FORCE_INLINE \ + const char* bin_##IN_TYPE(int64_t context, gdv_##IN_TYPE value, int32_t* out_len) { \ + *out_len = 0; \ + int32_t len = 8 * sizeof(value); \ + char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, len)); \ + if (ret == nullptr) { \ + gdv_fn_context_set_error_msg(context, "Could not allocate memory for output"); \ + return ""; \ + } \ + /* handle case when value is zero */ \ + if (value == 0) { \ + *out_len = 1; \ + ret[0] = '0'; \ + return ret; \ + } \ + /* generate binary representation iteratively */ \ + gdv_u##IN_TYPE i; \ + int8_t count = 0; \ + bool first = false; /* flag for not printing left zeros in positive numbers */ \ + for (i = static_cast<gdv_u##IN_TYPE>(1) << (len - 1); i > 0; i = i / 2) { \ + if ((value & i) != 0) { \ + ret[count] = '1'; \ + if (!first) first = true; \ + } else { \ + if (!first) continue; \ + ret[count] = '0'; \ + } \ + count += 1; \ + } \ + *out_len = count; \ + return ret; \ + } + +BIN_INTEGER(int32) +BIN_INTEGER(int64) + +#undef BIN_INTEGER + +} // extern "C" diff --git a/src/arrow/cpp/src/gandiva/precompiled/extended_math_ops_test.cc b/src/arrow/cpp/src/gandiva/precompiled/extended_math_ops_test.cc new file mode 100644 index 000000000..147b4035c --- /dev/null +++ b/src/arrow/cpp/src/gandiva/precompiled/extended_math_ops_test.cc @@ -0,0 +1,349 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +#include <gtest/gtest.h> +#include <cmath> +#include "gandiva/execution_context.h" +#include "gandiva/precompiled/types.h" + +namespace gandiva { + +static const double MAX_ERROR = 0.00005; + +void VerifyFuzzyEquals(double actual, double expected, double max_error = MAX_ERROR) { + EXPECT_TRUE(fabs(actual - expected) < max_error) << actual << " != " << expected; +} + +TEST(TestExtendedMathOps, TestCbrt) { + VerifyFuzzyEquals(cbrt_int32(27), 3); + VerifyFuzzyEquals(cbrt_int64(27), 3); + VerifyFuzzyEquals(cbrt_float32(27), 3); + VerifyFuzzyEquals(cbrt_float64(27), 3); + VerifyFuzzyEquals(cbrt_float64(-27), -3); + + VerifyFuzzyEquals(cbrt_float32(15.625), 2.5); + VerifyFuzzyEquals(cbrt_float64(15.625), 2.5); +} + +TEST(TestExtendedMathOps, TestExp) { + double val = 20.085536923187668; + + VerifyFuzzyEquals(exp_int32(3), val); + VerifyFuzzyEquals(exp_int64(3), val); + VerifyFuzzyEquals(exp_float32(3), val); + VerifyFuzzyEquals(exp_float64(3), val); +} + +TEST(TestExtendedMathOps, TestLog) { + double val = 4.1588830833596715; + + VerifyFuzzyEquals(log_int32(64), val); + VerifyFuzzyEquals(log_int64(64), val); + VerifyFuzzyEquals(log_float32(64), val); + VerifyFuzzyEquals(log_float64(64), val); + + EXPECT_EQ(log_int32(0), -std::numeric_limits<double>::infinity()); +} + +TEST(TestExtendedMathOps, TestLog10) { + VerifyFuzzyEquals(log10_int32(100), 2); + VerifyFuzzyEquals(log10_int64(100), 2); + VerifyFuzzyEquals(log10_float32(100), 2); + VerifyFuzzyEquals(log10_float64(100), 2); +} + +TEST(TestExtendedMathOps, TestPower) { + VerifyFuzzyEquals(power_float64_float64(2, 5.4), 42.22425314473263); + VerifyFuzzyEquals(power_float64_float64(5.4, 2), 29.160000000000004); +} + +TEST(TestExtendedMathOps, TestLogWithBase) { + gandiva::ExecutionContext context; + gdv_float64 out = + log_int32_int32(reinterpret_cast<gdv_int64>(&context), 1 /*base*/, 10 /*value*/); + VerifyFuzzyEquals(out, 0); + EXPECT_EQ(context.has_error(), true); + EXPECT_TRUE(context.get_error().find("divide by zero error") != std::string::npos) + << context.get_error(); + + gandiva::ExecutionContext context1; + out = log_int32_int32(reinterpret_cast<gdv_int64>(&context), 2 /*base*/, 64 /*value*/); + VerifyFuzzyEquals(out, 6); + EXPECT_EQ(context1.has_error(), false); +} + +TEST(TestExtendedMathOps, TestRoundDecimal) { + EXPECT_FLOAT_EQ(round_float32(1234.245f), 1234); + EXPECT_FLOAT_EQ(round_float32(-11.7892f), -12); + EXPECT_FLOAT_EQ(round_float32(1.4999999f), 1); + EXPECT_EQ(std::signbit(round_float32(0)), 0); + EXPECT_FLOAT_EQ(round_float32_int32(1234.789f, 2), 1234.79f); + EXPECT_FLOAT_EQ(round_float32_int32(1234.12345f, -3), 1000); + EXPECT_FLOAT_EQ(round_float32_int32(-1234.4567f, 3), -1234.457f); + EXPECT_FLOAT_EQ(round_float32_int32(-1234.4567f, -3), -1000); + EXPECT_FLOAT_EQ(round_float32_int32(1234.4567f, 0), 1234); + EXPECT_FLOAT_EQ(round_float32_int32(1.5499999523162842f, 1), 1.5f); + EXPECT_EQ(std::signbit(round_float32_int32(0, 5)), 0); + EXPECT_FLOAT_EQ(round_float32_int32(static_cast<float>(1.55), 1), 1.5f); + EXPECT_FLOAT_EQ(round_float32_int32(static_cast<float>(9.134123), 2), 9.13f); + EXPECT_FLOAT_EQ(round_float32_int32(static_cast<float>(-1.923), 1), -1.9f); + + VerifyFuzzyEquals(round_float64(1234.245), 1234); + VerifyFuzzyEquals(round_float64(-11.7892), -12); + VerifyFuzzyEquals(round_float64(1.4999999), 1); + EXPECT_EQ(std::signbit(round_float64(0)), 0); + VerifyFuzzyEquals(round_float64_int32(1234.789, 2), 1234.79); + VerifyFuzzyEquals(round_float64_int32(1234.12345, -3), 1000); + VerifyFuzzyEquals(round_float64_int32(-1234.4567, 3), -1234.457); + VerifyFuzzyEquals(round_float64_int32(-1234.4567, -3), -1000); + VerifyFuzzyEquals(round_float64_int32(1234.4567, 0), 1234); + EXPECT_EQ(std::signbit(round_float64_int32(0, -2)), 0); + VerifyFuzzyEquals(round_float64_int32((double)INT_MAX + 1, 0), (double)INT_MAX + 1); + VerifyFuzzyEquals(round_float64_int32((double)INT_MIN - 1, 0), (double)INT_MIN - 1); +} + +TEST(TestExtendedMathOps, TestRound) { + EXPECT_EQ(round_int32(21134), 21134); + EXPECT_EQ(round_int32(-132422), -132422); + EXPECT_EQ(round_int32_int32(7589, -1), 7590); + EXPECT_EQ(round_int32_int32(8532, -2), 8500); + EXPECT_EQ(round_int32_int32(-8579, -1), -8580); + EXPECT_EQ(round_int32_int32(-8612, -2), -8600); + EXPECT_EQ(round_int32_int32(758, 2), 758); + EXPECT_EQ(round_int32_int32(8612, -5), 0); + + EXPECT_EQ(round_int64(3453562312), 3453562312); + EXPECT_EQ(round_int64(-23453462343), -23453462343); + EXPECT_EQ(round_int64_int32(3453562312, -2), 3453562300); + EXPECT_EQ(round_int64_int32(3453562343, -5), 3453600000); + EXPECT_EQ(round_int64_int32(345353425343, 12), 345353425343); + EXPECT_EQ(round_int64_int32(-23453462343, -4), -23453460000); + EXPECT_EQ(round_int64_int32(-23453462343, -5), -23453500000); + EXPECT_EQ(round_int64_int32(345353425343, -12), 0); +} + +TEST(TestExtendedMathOps, TestTruncate) { + EXPECT_EQ(truncate_int64_int32(1234, 4), 1234); + EXPECT_EQ(truncate_int64_int32(-1234, 4), -1234); + EXPECT_EQ(truncate_int64_int32(1234, -4), 0); + EXPECT_EQ(truncate_int64_int32(-1234, -2), -1200); + EXPECT_EQ(truncate_int64_int32(8124674407369523212, 0), 8124674407369523212); + EXPECT_EQ(truncate_int64_int32(8124674407369523212, -2), 8124674407369523200); +} + +TEST(TestExtendedMathOps, TestTrigonometricFunctions) { + auto pi_float = static_cast<float>(M_PI); + // Sin functions + VerifyFuzzyEquals(sin_float32(0), sin(0)); + VerifyFuzzyEquals(sin_float32(0), sin(0)); + VerifyFuzzyEquals(sin_float32(pi_float / 2), sin(M_PI / 2)); + VerifyFuzzyEquals(sin_float32(pi_float), sin(M_PI)); + VerifyFuzzyEquals(sin_float32(-pi_float / 2), sin(-M_PI / 2)); + VerifyFuzzyEquals(sin_float64(0), sin(0)); + VerifyFuzzyEquals(sin_float64(M_PI / 2), sin(M_PI / 2)); + VerifyFuzzyEquals(sin_float64(M_PI), sin(M_PI)); + VerifyFuzzyEquals(sin_float64(-M_PI / 2), sin(-M_PI / 2)); + VerifyFuzzyEquals(sin_int32(0), sin(0)); + VerifyFuzzyEquals(sin_int64(0), sin(0)); + + // Cos functions + VerifyFuzzyEquals(cos_float32(0), cos(0)); + VerifyFuzzyEquals(cos_float32(pi_float / 2), cos(M_PI / 2)); + VerifyFuzzyEquals(cos_float32(pi_float), cos(M_PI)); + VerifyFuzzyEquals(cos_float32(-pi_float / 2), cos(-M_PI / 2)); + VerifyFuzzyEquals(cos_float64(0), cos(0)); + VerifyFuzzyEquals(cos_float64(M_PI / 2), cos(M_PI / 2)); + VerifyFuzzyEquals(cos_float64(M_PI), cos(M_PI)); + VerifyFuzzyEquals(cos_float64(-M_PI / 2), cos(-M_PI / 2)); + VerifyFuzzyEquals(cos_int32(0), cos(0)); + VerifyFuzzyEquals(cos_int64(0), cos(0)); + + // Asin functions + VerifyFuzzyEquals(asin_float32(-1.0), asin(-1.0)); + VerifyFuzzyEquals(asin_float32(1.0), asin(1.0)); + VerifyFuzzyEquals(asin_float64(-1.0), asin(-1.0)); + VerifyFuzzyEquals(asin_float64(1.0), asin(1.0)); + VerifyFuzzyEquals(asin_int32(0), asin(0)); + VerifyFuzzyEquals(asin_int64(0), asin(0)); + + // Acos functions + VerifyFuzzyEquals(acos_float32(-1.0), acos(-1.0)); + VerifyFuzzyEquals(acos_float32(1.0), acos(1.0)); + VerifyFuzzyEquals(acos_float64(-1.0), acos(-1.0)); + VerifyFuzzyEquals(acos_float64(1.0), acos(1.0)); + VerifyFuzzyEquals(acos_int32(0), acos(0)); + VerifyFuzzyEquals(acos_int64(0), acos(0)); + + // Tan + VerifyFuzzyEquals(tan_float32(pi_float), tan(M_PI)); + VerifyFuzzyEquals(tan_float32(-pi_float), tan(-M_PI)); + VerifyFuzzyEquals(tan_float64(M_PI), tan(M_PI)); + VerifyFuzzyEquals(tan_float64(-M_PI), tan(-M_PI)); + VerifyFuzzyEquals(tan_int32(0), tan(0)); + VerifyFuzzyEquals(tan_int64(0), tan(0)); + + // Atan + VerifyFuzzyEquals(atan_float32(pi_float), atan(M_PI)); + VerifyFuzzyEquals(atan_float32(-pi_float), atan(-M_PI)); + VerifyFuzzyEquals(atan_float64(M_PI), atan(M_PI)); + VerifyFuzzyEquals(atan_float64(-M_PI), atan(-M_PI)); + VerifyFuzzyEquals(atan_int32(0), atan(0)); + VerifyFuzzyEquals(atan_int64(0), atan(0)); + + // Sinh functions + VerifyFuzzyEquals(sinh_float32(0), sinh(0)); + VerifyFuzzyEquals(sinh_float32(pi_float / 2), sinh(M_PI / 2)); + VerifyFuzzyEquals(sinh_float32(pi_float), sinh(M_PI)); + VerifyFuzzyEquals(sinh_float32(-pi_float / 2), sinh(-M_PI / 2)); + VerifyFuzzyEquals(sinh_float64(0), sinh(0)); + VerifyFuzzyEquals(sinh_float64(M_PI / 2), sinh(M_PI / 2)); + VerifyFuzzyEquals(sinh_float64(M_PI), sinh(M_PI)); + VerifyFuzzyEquals(sinh_float64(-M_PI / 2), sinh(-M_PI / 2)); + VerifyFuzzyEquals(sinh_int32(0), sinh(0)); + VerifyFuzzyEquals(sinh_int64(0), sinh(0)); + + // Cosh functions + VerifyFuzzyEquals(cosh_float32(0), cosh(0)); + VerifyFuzzyEquals(cosh_float32(pi_float / 2), cosh(M_PI / 2)); + VerifyFuzzyEquals(cosh_float32(pi_float), cosh(M_PI)); + VerifyFuzzyEquals(cosh_float32(-pi_float / 2), cosh(-M_PI / 2)); + VerifyFuzzyEquals(cosh_float64(0), cosh(0)); + VerifyFuzzyEquals(cosh_float64(M_PI / 2), cosh(M_PI / 2)); + VerifyFuzzyEquals(cosh_float64(M_PI), cosh(M_PI)); + VerifyFuzzyEquals(cosh_float64(-M_PI / 2), cosh(-M_PI / 2)); + VerifyFuzzyEquals(cosh_int32(0), cosh(0)); + VerifyFuzzyEquals(cosh_int64(0), cosh(0)); + + // Tanh + VerifyFuzzyEquals(tanh_float32(pi_float), tanh(M_PI)); + VerifyFuzzyEquals(tanh_float32(-pi_float), tanh(-M_PI)); + VerifyFuzzyEquals(tanh_float64(M_PI), tanh(M_PI)); + VerifyFuzzyEquals(tanh_float64(-M_PI), tanh(-M_PI)); + VerifyFuzzyEquals(tanh_int32(0), tanh(0)); + VerifyFuzzyEquals(tanh_int64(0), tanh(0)); + + // Atan2 + VerifyFuzzyEquals(atan2_float32_float32(1, 0), atan2(1, 0)); + VerifyFuzzyEquals(atan2_float32_float32(-1.0, 0), atan2(-1, 0)); + VerifyFuzzyEquals(atan2_float64_float64(1.0, 0.0), atan2(1, 0)); + VerifyFuzzyEquals(atan2_float64_float64(-1, 0), atan2(-1, 0)); + VerifyFuzzyEquals(atan2_int32_int32(1, 0), atan2(1, 0)); + VerifyFuzzyEquals(atan2_int64_int64(-1, 0), atan2(-1, 0)); + + // Radians + VerifyFuzzyEquals(radians_float32(0), 0); + VerifyFuzzyEquals(radians_float32(180.0), M_PI); + VerifyFuzzyEquals(radians_float32(90.0), M_PI / 2); + VerifyFuzzyEquals(radians_float64(0), 0); + VerifyFuzzyEquals(radians_float64(180.0), M_PI); + VerifyFuzzyEquals(radians_float64(90.0), M_PI / 2); + VerifyFuzzyEquals(radians_int32(180), M_PI); + VerifyFuzzyEquals(radians_int64(90), M_PI / 2); + + // Degrees + VerifyFuzzyEquals(degrees_float32(0), 0.0); + VerifyFuzzyEquals(degrees_float32(pi_float), 180.0); + VerifyFuzzyEquals(degrees_float32(pi_float / 2), 90.0); + VerifyFuzzyEquals(degrees_float64(0), 0.0); + VerifyFuzzyEquals(degrees_float64(M_PI), 180.0); + VerifyFuzzyEquals(degrees_float64(M_PI / 2), 90.0); + VerifyFuzzyEquals(degrees_int32(1), 57.2958); + VerifyFuzzyEquals(degrees_int64(1), 57.2958); + + // Cot + VerifyFuzzyEquals(cot_float32(pi_float / 2), tan(M_PI / 2 - M_PI / 2)); + VerifyFuzzyEquals(cot_float64(M_PI / 2), tan(M_PI / 2 - M_PI / 2)); +} + +TEST(TestExtendedMathOps, TestBinRepresentation) { + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + gdv_int32 out_len = 0; + + const char* out_str = bin_int32(ctx_ptr, 7, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "111"); + EXPECT_FALSE(ctx.has_error()); + + out_str = bin_int32(ctx_ptr, 0, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "0"); + EXPECT_FALSE(ctx.has_error()); + + out_str = bin_int32(ctx_ptr, 28550, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "110111110000110"); + EXPECT_FALSE(ctx.has_error()); + + out_str = bin_int32(ctx_ptr, -28550, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "11111111111111111001000001111010"); + EXPECT_FALSE(ctx.has_error()); + + out_str = bin_int32(ctx_ptr, 58117, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "1110001100000101"); + EXPECT_FALSE(ctx.has_error()); + + out_str = bin_int32(ctx_ptr, -58117, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "11111111111111110001110011111011"); + EXPECT_FALSE(ctx.has_error()); + + out_str = bin_int32(ctx_ptr, INT32_MAX, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "1111111111111111111111111111111"); + EXPECT_FALSE(ctx.has_error()); + + out_str = bin_int32(ctx_ptr, INT32_MIN, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "10000000000000000000000000000000"); + EXPECT_FALSE(ctx.has_error()); + + out_str = bin_int64(ctx_ptr, 7, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "111"); + EXPECT_FALSE(ctx.has_error()); + + out_str = bin_int64(ctx_ptr, 0, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "0"); + EXPECT_FALSE(ctx.has_error()); + + out_str = bin_int64(ctx_ptr, 28550, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "110111110000110"); + EXPECT_FALSE(ctx.has_error()); + + out_str = bin_int64(ctx_ptr, -28550, &out_len); + EXPECT_EQ(std::string(out_str, out_len), + "1111111111111111111111111111111111111111111111111001000001111010"); + EXPECT_FALSE(ctx.has_error()); + + out_str = bin_int64(ctx_ptr, 58117, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "1110001100000101"); + EXPECT_FALSE(ctx.has_error()); + + out_str = bin_int64(ctx_ptr, -58117, &out_len); + EXPECT_EQ(std::string(out_str, out_len), + "1111111111111111111111111111111111111111111111110001110011111011"); + EXPECT_FALSE(ctx.has_error()); + + out_str = bin_int64(ctx_ptr, INT64_MAX, &out_len); + EXPECT_EQ(std::string(out_str, out_len), + "111111111111111111111111111111111111111111111111111111111111111"); + EXPECT_FALSE(ctx.has_error()); + + out_str = bin_int64(ctx_ptr, INT64_MIN, &out_len); + EXPECT_EQ(std::string(out_str, out_len), + "1000000000000000000000000000000000000000000000000000000000000000"); + EXPECT_FALSE(ctx.has_error()); +} +} // namespace gandiva diff --git a/src/arrow/cpp/src/gandiva/precompiled/hash.cc b/src/arrow/cpp/src/gandiva/precompiled/hash.cc new file mode 100644 index 000000000..eacf36230 --- /dev/null +++ b/src/arrow/cpp/src/gandiva/precompiled/hash.cc @@ -0,0 +1,407 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +extern "C" { + +#include <string.h> + +#include "./types.h" + +static inline gdv_uint64 rotate_left(gdv_uint64 val, int distance) { + return (val << distance) | (val >> (64 - distance)); +} + +// +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. +// See http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp +// MurmurHash3_x64_128 +// +static inline gdv_uint64 fmix64(gdv_uint64 k) { + k ^= k >> 33; + k *= 0xff51afd7ed558ccduLL; + k ^= k >> 33; + k *= 0xc4ceb9fe1a85ec53uLL; + k ^= k >> 33; + return k; +} + +static inline gdv_uint64 murmur3_64(gdv_uint64 val, gdv_int32 seed) { + gdv_uint64 h1 = seed; + gdv_uint64 h2 = seed; + + gdv_uint64 c1 = 0x87c37b91114253d5ull; + gdv_uint64 c2 = 0x4cf5ad432745937full; + + int length = 8; + gdv_uint64 k1 = 0; + + k1 = val; + k1 *= c1; + k1 = rotate_left(k1, 31); + k1 *= c2; + h1 ^= k1; + + h1 ^= length; + h2 ^= length; + + h1 += h2; + h2 += h1; + + h1 = fmix64(h1); + h2 = fmix64(h2); + + h1 += h2; + + // h2 += h1; + // murmur3_128 should return 128 bit (h1,h2), now we return only 64bits, + return h1; +} + +static inline gdv_uint32 murmur3_32(gdv_uint64 val, gdv_int32 seed) { + gdv_uint64 c1 = 0xcc9e2d51ull; + gdv_uint64 c2 = 0x1b873593ull; + int length = 8; + static gdv_uint64 UINT_MASK = 0xffffffffull; + gdv_uint64 lh1 = seed & UINT_MASK; + for (int i = 0; i < 2; i++) { + gdv_uint64 lk1 = ((val >> i * 32) & UINT_MASK); + lk1 *= c1; + lk1 &= UINT_MASK; + + lk1 = ((lk1 << 15) & UINT_MASK) | (lk1 >> 17); + + lk1 *= c2; + lk1 &= UINT_MASK; + + lh1 ^= lk1; + lh1 = ((lh1 << 13) & UINT_MASK) | (lh1 >> 19); + + lh1 = lh1 * 5 + 0xe6546b64L; + lh1 = UINT_MASK & lh1; + } + lh1 ^= length; + + lh1 ^= lh1 >> 16; + lh1 *= 0x85ebca6bull; + lh1 = UINT_MASK & lh1; + lh1 ^= lh1 >> 13; + lh1 *= 0xc2b2ae35ull; + lh1 = UINT_MASK & lh1; + lh1 ^= lh1 >> 16; + + return static_cast<gdv_uint32>(lh1); +} + +static inline gdv_uint64 double_to_long_bits(double value) { + gdv_uint64 result; + memcpy(&result, &value, sizeof(result)); + return result; +} + +FORCE_INLINE gdv_int64 hash64(double val, gdv_int64 seed) { + return murmur3_64(double_to_long_bits(val), static_cast<gdv_int32>(seed)); +} + +FORCE_INLINE gdv_int32 hash32(double val, gdv_int32 seed) { + return murmur3_32(double_to_long_bits(val), seed); +} + +// Wrappers for all the numeric/data/time arrow types + +#define HASH64_WITH_SEED_OP(NAME, TYPE) \ + FORCE_INLINE \ + gdv_int64 NAME##_##TYPE(gdv_##TYPE in, gdv_boolean is_valid, gdv_int64 seed, \ + gdv_boolean seed_isvalid) { \ + if (!is_valid) { \ + return seed; \ + } \ + return hash64(static_cast<double>(in), seed); \ + } + +#define HASH32_WITH_SEED_OP(NAME, TYPE) \ + FORCE_INLINE \ + gdv_int32 NAME##_##TYPE(gdv_##TYPE in, gdv_boolean is_valid, gdv_int32 seed, \ + gdv_boolean seed_isvalid) { \ + if (!is_valid) { \ + return seed; \ + } \ + return hash32(static_cast<double>(in), seed); \ + } + +#define HASH64_OP(NAME, TYPE) \ + FORCE_INLINE \ + gdv_int64 NAME##_##TYPE(gdv_##TYPE in, gdv_boolean is_valid) { \ + return is_valid ? hash64(static_cast<double>(in), 0) : 0; \ + } + +#define HASH32_OP(NAME, TYPE) \ + FORCE_INLINE \ + gdv_int32 NAME##_##TYPE(gdv_##TYPE in, gdv_boolean is_valid) { \ + return is_valid ? hash32(static_cast<double>(in), 0) : 0; \ + } + +// Expand inner macro for all numeric types. +#define NUMERIC_BOOL_DATE_TYPES(INNER, NAME) \ + INNER(NAME, int8) \ + INNER(NAME, int16) \ + INNER(NAME, int32) \ + INNER(NAME, int64) \ + INNER(NAME, uint8) \ + INNER(NAME, uint16) \ + INNER(NAME, uint32) \ + INNER(NAME, uint64) \ + INNER(NAME, float32) \ + INNER(NAME, float64) \ + INNER(NAME, boolean) \ + INNER(NAME, date64) \ + INNER(NAME, date32) \ + INNER(NAME, time32) \ + INNER(NAME, timestamp) + +NUMERIC_BOOL_DATE_TYPES(HASH32_OP, hash) +NUMERIC_BOOL_DATE_TYPES(HASH32_OP, hash32) +NUMERIC_BOOL_DATE_TYPES(HASH32_OP, hash32AsDouble) +NUMERIC_BOOL_DATE_TYPES(HASH32_WITH_SEED_OP, hash32WithSeed) +NUMERIC_BOOL_DATE_TYPES(HASH32_WITH_SEED_OP, hash32AsDoubleWithSeed) + +NUMERIC_BOOL_DATE_TYPES(HASH64_OP, hash64) +NUMERIC_BOOL_DATE_TYPES(HASH64_OP, hash64AsDouble) +NUMERIC_BOOL_DATE_TYPES(HASH64_WITH_SEED_OP, hash64WithSeed) +NUMERIC_BOOL_DATE_TYPES(HASH64_WITH_SEED_OP, hash64AsDoubleWithSeed) + +#undef NUMERIC_BOOL_DATE_TYPES + +static inline gdv_uint64 murmur3_64_buf(const gdv_uint8* key, gdv_int32 len, + gdv_int32 seed) { + gdv_uint64 h1 = seed; + gdv_uint64 h2 = seed; + gdv_uint64 c1 = 0x87c37b91114253d5ull; + gdv_uint64 c2 = 0x4cf5ad432745937full; + + const gdv_uint64* blocks = reinterpret_cast<const gdv_uint64*>(key); + int nblocks = len / 16; + for (int i = 0; i < nblocks; i++) { + gdv_uint64 k1 = blocks[i * 2 + 0]; + gdv_uint64 k2 = blocks[i * 2 + 1]; + + k1 *= c1; + k1 = rotate_left(k1, 31); + k1 *= c2; + h1 ^= k1; + h1 = rotate_left(h1, 27); + h1 += h2; + h1 = h1 * 5 + 0x52dce729; + k2 *= c2; + k2 = rotate_left(k2, 33); + k2 *= c1; + h2 ^= k2; + h2 = rotate_left(h2, 31); + h2 += h1; + h2 = h2 * 5 + 0x38495ab5; + } + + // tail + gdv_uint64 k1 = 0; + gdv_uint64 k2 = 0; + + const gdv_uint8* tail = reinterpret_cast<const gdv_uint8*>(key + nblocks * 16); + switch (len & 15) { + case 15: + k2 = static_cast<gdv_uint64>(tail[14]) << 48; + case 14: + k2 ^= static_cast<gdv_uint64>(tail[13]) << 40; + case 13: + k2 ^= static_cast<gdv_uint64>(tail[12]) << 32; + case 12: + k2 ^= static_cast<gdv_uint64>(tail[11]) << 24; + case 11: + k2 ^= static_cast<gdv_uint64>(tail[10]) << 16; + case 10: + k2 ^= static_cast<gdv_uint64>(tail[9]) << 8; + case 9: + k2 ^= static_cast<gdv_uint64>(tail[8]); + k2 *= c2; + k2 = rotate_left(k2, 33); + k2 *= c1; + h2 ^= k2; + case 8: + k1 ^= static_cast<gdv_uint64>(tail[7]) << 56; + case 7: + k1 ^= static_cast<gdv_uint64>(tail[6]) << 48; + case 6: + k1 ^= static_cast<gdv_uint64>(tail[5]) << 40; + case 5: + k1 ^= static_cast<gdv_uint64>(tail[4]) << 32; + case 4: + k1 ^= static_cast<gdv_uint64>(tail[3]) << 24; + case 3: + k1 ^= static_cast<gdv_uint64>(tail[2]) << 16; + case 2: + k1 ^= static_cast<gdv_uint64>(tail[1]) << 8; + case 1: + k1 ^= static_cast<gdv_uint64>(tail[0]) << 0; + k1 *= c1; + k1 = rotate_left(k1, 31); + k1 *= c2; + h1 ^= k1; + } + + h1 ^= len; + h2 ^= len; + + h1 += h2; + h2 += h1; + + h1 = fmix64(h1); + h2 = fmix64(h2); + + h1 += h2; + // h2 += h1; + // returning 64-bits of the 128-bit hash. + return h1; +} + +static gdv_uint32 murmur3_32_buf(const gdv_uint8* key, gdv_int32 len, gdv_int32 seed) { + static const gdv_uint64 c1 = 0xcc9e2d51ull; + static const gdv_uint64 c2 = 0x1b873593ull; + static const gdv_uint64 UINT_MASK = 0xffffffffull; + gdv_uint64 lh1 = seed; + const gdv_uint32* blocks = reinterpret_cast<const gdv_uint32*>(key); + int nblocks = len / 4; + const gdv_uint8* tail = reinterpret_cast<const gdv_uint8*>(key + nblocks * 4); + for (int i = 0; i < nblocks; i++) { + gdv_uint64 lk1 = static_cast<gdv_uint64>(blocks[i]); + + // k1 *= c1; + lk1 *= c1; + lk1 &= UINT_MASK; + + lk1 = ((lk1 << 15) & UINT_MASK) | (lk1 >> 17); + + lk1 *= c2; + lk1 = lk1 & UINT_MASK; + lh1 ^= lk1; + lh1 = ((lh1 << 13) & UINT_MASK) | (lh1 >> 19); + + lh1 = lh1 * 5 + 0xe6546b64ull; + lh1 = UINT_MASK & lh1; + } + + // tail + gdv_uint64 lk1 = 0; + + switch (len & 3) { + case 3: + lk1 = (tail[2] & 0xff) << 16; + case 2: + lk1 |= (tail[1] & 0xff) << 8; + case 1: + lk1 |= (tail[0] & 0xff); + lk1 *= c1; + lk1 = UINT_MASK & lk1; + lk1 = ((lk1 << 15) & UINT_MASK) | (lk1 >> 17); + + lk1 *= c2; + lk1 = lk1 & UINT_MASK; + + lh1 ^= lk1; + } + + // finalization + lh1 ^= len; + + lh1 ^= lh1 >> 16; + lh1 *= 0x85ebca6b; + lh1 = UINT_MASK & lh1; + lh1 ^= lh1 >> 13; + + lh1 *= 0xc2b2ae35; + lh1 = UINT_MASK & lh1; + lh1 ^= lh1 >> 16; + + return static_cast<gdv_uint32>(lh1 & UINT_MASK); +} + +FORCE_INLINE gdv_int64 hash64_buf(const gdv_uint8* buf, int len, gdv_int64 seed) { + return murmur3_64_buf(buf, len, static_cast<gdv_int32>(seed)); +} + +FORCE_INLINE gdv_int32 hash32_buf(const gdv_uint8* buf, int len, gdv_int32 seed) { + return murmur3_32_buf(buf, len, seed); +} + +// Wrappers for the varlen types + +#define HASH64_BUF_WITH_SEED_OP(NAME, TYPE) \ + FORCE_INLINE \ + gdv_int64 NAME##_##TYPE(gdv_##TYPE in, gdv_int32 len, gdv_boolean is_valid, \ + gdv_int64 seed, gdv_boolean seed_isvalid) { \ + if (!is_valid) { \ + return seed; \ + } \ + return hash64_buf(reinterpret_cast<const uint8_t*>(in), len, seed); \ + } + +#define HASH32_BUF_WITH_SEED_OP(NAME, TYPE) \ + FORCE_INLINE \ + gdv_int32 NAME##_##TYPE(gdv_##TYPE in, gdv_int32 len, gdv_boolean is_valid, \ + gdv_int32 seed, gdv_boolean seed_isvalid) { \ + if (!is_valid) { \ + return seed; \ + } \ + return hash32_buf(reinterpret_cast<const uint8_t*>(in), len, seed); \ + } + +#define HASH64_BUF_OP(NAME, TYPE) \ + FORCE_INLINE \ + gdv_int64 NAME##_##TYPE(gdv_##TYPE in, gdv_int32 len, gdv_boolean is_valid) { \ + return is_valid ? hash64_buf(reinterpret_cast<const uint8_t*>(in), len, 0) : 0; \ + } + +#define HASH32_BUF_OP(NAME, TYPE) \ + FORCE_INLINE \ + gdv_int32 NAME##_##TYPE(gdv_##TYPE in, gdv_int32 len, gdv_boolean is_valid) { \ + return is_valid ? hash32_buf(reinterpret_cast<const uint8_t*>(in), len, 0) : 0; \ + } + +// Expand inner macro for all non-numeric types. +#define VAR_LEN_TYPES(INNER, NAME) \ + INNER(NAME, utf8) \ + INNER(NAME, binary) + +VAR_LEN_TYPES(HASH32_BUF_OP, hash) +VAR_LEN_TYPES(HASH32_BUF_OP, hash32) +VAR_LEN_TYPES(HASH32_BUF_OP, hash32AsDouble) +VAR_LEN_TYPES(HASH32_BUF_WITH_SEED_OP, hash32WithSeed) +VAR_LEN_TYPES(HASH32_BUF_WITH_SEED_OP, hash32AsDoubleWithSeed) + +VAR_LEN_TYPES(HASH64_BUF_OP, hash64) +VAR_LEN_TYPES(HASH64_BUF_OP, hash64AsDouble) +VAR_LEN_TYPES(HASH64_BUF_WITH_SEED_OP, hash64WithSeed) +VAR_LEN_TYPES(HASH64_BUF_WITH_SEED_OP, hash64AsDoubleWithSeed) + +#undef HASH32_BUF_OP +#undef HASH32_BUF_WITH_SEED_OP +#undef HASH32_OP +#undef HASH32_WITH_SEED_OP +#undef HASH64_BUF_OP +#undef HASH64_BUF_WITH_SEED_OP +#undef HASH64_OP +#undef HASH64_WITH_SEED_OP + +} // extern "C" diff --git a/src/arrow/cpp/src/gandiva/precompiled/hash_test.cc b/src/arrow/cpp/src/gandiva/precompiled/hash_test.cc new file mode 100644 index 000000000..0a51dced2 --- /dev/null +++ b/src/arrow/cpp/src/gandiva/precompiled/hash_test.cc @@ -0,0 +1,122 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <time.h> + +#include <gtest/gtest.h> +#include "gandiva/precompiled/types.h" + +namespace gandiva { + +TEST(TestHash, TestHash32) { + gdv_int8 s8 = 0; + gdv_uint8 u8 = 0; + gdv_int16 s16 = 0; + gdv_uint16 u16 = 0; + gdv_int32 s32 = 0; + gdv_uint32 u32 = 0; + gdv_int64 s64 = 0; + gdv_uint64 u64 = 0; + gdv_float32 f32 = 0; + gdv_float64 f64 = 0; + + // hash of 0 should be non-zero (zero is the hash value for nulls). + gdv_int32 zero_hash = hash32(s8, 0); + EXPECT_NE(zero_hash, 0); + + // for a given value, all numeric types must have the same hash. + EXPECT_EQ(hash32(u8, 0), zero_hash); + EXPECT_EQ(hash32(s16, 0), zero_hash); + EXPECT_EQ(hash32(u16, 0), zero_hash); + EXPECT_EQ(hash32(s32, 0), zero_hash); + EXPECT_EQ(hash32(u32, 0), zero_hash); + EXPECT_EQ(hash32(static_cast<double>(s64), 0), zero_hash); + EXPECT_EQ(hash32(static_cast<double>(u64), 0), zero_hash); + EXPECT_EQ(hash32(f32, 0), zero_hash); + EXPECT_EQ(hash32(f64, 0), zero_hash); + + // hash must change with a change in seed. + EXPECT_NE(hash32(s8, 1), zero_hash); + + // for a given value and seed, all numeric types must have the same hash. + EXPECT_EQ(hash32(s8, 1), hash32(s16, 1)); + EXPECT_EQ(hash32(s8, 1), hash32(u32, 1)); + EXPECT_EQ(hash32(s8, 1), hash32(f32, 1)); + EXPECT_EQ(hash32(s8, 1), hash32(f64, 1)); +} + +TEST(TestHash, TestHash64) { + gdv_int8 s8 = 0; + gdv_uint8 u8 = 0; + gdv_int16 s16 = 0; + gdv_uint16 u16 = 0; + gdv_int32 s32 = 0; + gdv_uint32 u32 = 0; + gdv_int64 s64 = 0; + gdv_uint64 u64 = 0; + gdv_float32 f32 = 0; + gdv_float64 f64 = 0; + + // hash of 0 should be non-zero (zero is the hash value for nulls). + gdv_int64 zero_hash = hash64(s8, 0); + EXPECT_NE(zero_hash, 0); + EXPECT_NE(hash64(u8, 0), hash32(u8, 0)); + + // for a given value, all numeric types must have the same hash. + EXPECT_EQ(hash64(u8, 0), zero_hash); + EXPECT_EQ(hash64(s16, 0), zero_hash); + EXPECT_EQ(hash64(u16, 0), zero_hash); + EXPECT_EQ(hash64(s32, 0), zero_hash); + EXPECT_EQ(hash64(u32, 0), zero_hash); + EXPECT_EQ(hash64(static_cast<double>(s64), 0), zero_hash); + EXPECT_EQ(hash64(static_cast<double>(u64), 0), zero_hash); + EXPECT_EQ(hash64(f32, 0), zero_hash); + EXPECT_EQ(hash64(f64, 0), zero_hash); + + // hash must change with a change in seed. + EXPECT_NE(hash64(s8, 1), zero_hash); + + // for a given value and seed, all numeric types must have the same hash. + EXPECT_EQ(hash64(s8, 1), hash64(s16, 1)); + EXPECT_EQ(hash64(s8, 1), hash64(u32, 1)); + EXPECT_EQ(hash64(s8, 1), hash64(f32, 1)); +} + +TEST(TestHash, TestHashBuf) { + const char* buf = "hello"; + int buf_len = 5; + + // hash should be non-zero (zero is the hash value for nulls). + EXPECT_NE(hash32_buf((const gdv_uint8*)buf, buf_len, 0), 0); + EXPECT_NE(hash64_buf((const gdv_uint8*)buf, buf_len, 0), 0); + + // hash must change if the string is changed. + EXPECT_NE(hash32_buf((const gdv_uint8*)buf, buf_len, 0), + hash32_buf((const gdv_uint8*)buf, buf_len - 1, 0)); + + EXPECT_NE(hash64_buf((const gdv_uint8*)buf, buf_len, 0), + hash64_buf((const gdv_uint8*)buf, buf_len - 1, 0)); + + // hash must change if the seed is changed. + EXPECT_NE(hash32_buf((const gdv_uint8*)buf, buf_len, 0), + hash32_buf((const gdv_uint8*)buf, buf_len, 1)); + + EXPECT_NE(hash64_buf((const gdv_uint8*)buf, buf_len, 0), + hash64_buf((const gdv_uint8*)buf, buf_len, 1)); +} + +} // namespace gandiva diff --git a/src/arrow/cpp/src/gandiva/precompiled/print.cc b/src/arrow/cpp/src/gandiva/precompiled/print.cc new file mode 100644 index 000000000..ecb90e1a3 --- /dev/null +++ b/src/arrow/cpp/src/gandiva/precompiled/print.cc @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +extern "C" { + +#include <stdio.h> + +#include "./types.h" + +int print_double(char* msg, double val) { return printf(msg, val); } + +int print_float(char* msg, float val) { return printf(msg, val); } + +} // extern "C" diff --git a/src/arrow/cpp/src/gandiva/precompiled/string_ops.cc b/src/arrow/cpp/src/gandiva/precompiled/string_ops.cc new file mode 100644 index 000000000..48c24b862 --- /dev/null +++ b/src/arrow/cpp/src/gandiva/precompiled/string_ops.cc @@ -0,0 +1,2198 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// String functions +#include "arrow/util/value_parsing.h" + +extern "C" { + +#include <algorithm> +#include <climits> +#include <cstdio> +#include <cstdlib> +#include <cstring> + +#include "./types.h" + +FORCE_INLINE +gdv_int32 octet_length_utf8(const gdv_utf8 input, gdv_int32 length) { return length; } + +FORCE_INLINE +gdv_int32 bit_length_utf8(const gdv_utf8 input, gdv_int32 length) { return length * 8; } + +FORCE_INLINE +gdv_int32 octet_length_binary(const gdv_binary input, gdv_int32 length) { return length; } + +FORCE_INLINE +gdv_int32 bit_length_binary(const gdv_binary input, gdv_int32 length) { + return length * 8; +} + +FORCE_INLINE +int match_string(const char* input, gdv_int32 input_len, gdv_int32 start_pos, + const char* delim, gdv_int32 delim_len) { + for (int i = start_pos; i < input_len; i++) { + int left_chars = input_len - i; + if ((left_chars >= delim_len) && memcmp(input + i, delim, delim_len) == 0) { + return i + delim_len; + } + } + + return -1; +} + +FORCE_INLINE +gdv_int32 mem_compare(const char* left, gdv_int32 left_len, const char* right, + gdv_int32 right_len) { + int min = left_len; + if (right_len < min) { + min = right_len; + } + + int cmp_ret = memcmp(left, right, min); + if (cmp_ret != 0) { + return cmp_ret; + } else { + return left_len - right_len; + } +} + +// Expand inner macro for all varlen types. +#define VAR_LEN_OP_TYPES(INNER, NAME, OP) \ + INNER(NAME, utf8, OP) \ + INNER(NAME, binary, OP) + +// Relational binary fns : left, right params are same, return is bool. +#define BINARY_RELATIONAL(NAME, TYPE, OP) \ + FORCE_INLINE \ + bool NAME##_##TYPE##_##TYPE(const gdv_##TYPE left, gdv_int32 left_len, \ + const gdv_##TYPE right, gdv_int32 right_len) { \ + return mem_compare(left, left_len, right, right_len) OP 0; \ + } + +VAR_LEN_OP_TYPES(BINARY_RELATIONAL, equal, ==) +VAR_LEN_OP_TYPES(BINARY_RELATIONAL, not_equal, !=) +VAR_LEN_OP_TYPES(BINARY_RELATIONAL, less_than, <) +VAR_LEN_OP_TYPES(BINARY_RELATIONAL, less_than_or_equal_to, <=) +VAR_LEN_OP_TYPES(BINARY_RELATIONAL, greater_than, >) +VAR_LEN_OP_TYPES(BINARY_RELATIONAL, greater_than_or_equal_to, >=) + +#undef BINARY_RELATIONAL +#undef VAR_LEN_OP_TYPES + +// Expand inner macro for all varlen types. +#define VAR_LEN_TYPES(INNER, NAME) \ + INNER(NAME, utf8) \ + INNER(NAME, binary) + +FORCE_INLINE +int to_binary_from_hex(char ch) { + if (ch >= 'A' && ch <= 'F') { + return 10 + (ch - 'A'); + } else if (ch >= 'a' && ch <= 'f') { + return 10 + (ch - 'a'); + } + return ch - '0'; +} + +FORCE_INLINE +bool starts_with_utf8_utf8(const char* data, gdv_int32 data_len, const char* prefix, + gdv_int32 prefix_len) { + return ((data_len >= prefix_len) && (memcmp(data, prefix, prefix_len) == 0)); +} + +FORCE_INLINE +bool ends_with_utf8_utf8(const char* data, gdv_int32 data_len, const char* suffix, + gdv_int32 suffix_len) { + return ((data_len >= suffix_len) && + (memcmp(data + data_len - suffix_len, suffix, suffix_len) == 0)); +} + +FORCE_INLINE +bool is_substr_utf8_utf8(const char* data, int32_t data_len, const char* substr, + int32_t substr_len) { + for (int32_t i = 0; i <= data_len - substr_len; ++i) { + if (memcmp(data + i, substr, substr_len) == 0) { + return true; + } + } + return false; +} + +FORCE_INLINE +gdv_int32 utf8_char_length(char c) { + if ((signed char)c >= 0) { // 1-byte char (0x00 ~ 0x7F) + return 1; + } else if ((c & 0xE0) == 0xC0) { // 2-byte char + return 2; + } else if ((c & 0xF0) == 0xE0) { // 3-byte char + return 3; + } else if ((c & 0xF8) == 0xF0) { // 4-byte char + return 4; + } + // invalid char + return 0; +} + +FORCE_INLINE +void set_error_for_invalid_utf(int64_t execution_context, char val) { + char const* fmt = "unexpected byte \\%02hhx encountered while decoding utf8 string"; + int size = static_cast<int>(strlen(fmt)) + 64; + char* error = reinterpret_cast<char*>(malloc(size)); + snprintf(error, size, fmt, (unsigned char)val); + gdv_fn_context_set_error_msg(execution_context, error); + free(error); +} + +FORCE_INLINE +bool validate_utf8_following_bytes(const char* data, int32_t data_len, + int32_t char_index) { + for (int j = 1; j < data_len; ++j) { + if ((data[char_index + j] & 0xC0) != 0x80) { // bytes following head-byte of glyph + return false; + } + } + return true; +} + +// Count the number of utf8 characters +// return 0 for invalid/incomplete input byte sequences +FORCE_INLINE +gdv_int32 utf8_length(gdv_int64 context, const char* data, gdv_int32 data_len) { + int char_len = 0; + int count = 0; + for (int i = 0; i < data_len; i += char_len) { + char_len = utf8_char_length(data[i]); + if (char_len == 0 || i + char_len > data_len) { // invalid byte or incomplete glyph + set_error_for_invalid_utf(context, data[i]); + return 0; + } + for (int j = 1; j < char_len; ++j) { + if ((data[i + j] & 0xC0) != 0x80) { // bytes following head-byte of glyph + set_error_for_invalid_utf(context, data[i + j]); + return 0; + } + } + ++count; + } + return count; +} + +// Count the number of utf8 characters, ignoring invalid char, considering size 1 +FORCE_INLINE +gdv_int32 utf8_length_ignore_invalid(const char* data, gdv_int32 data_len) { + int char_len = 0; + int count = 0; + for (int i = 0; i < data_len; i += char_len) { + char_len = utf8_char_length(data[i]); + if (char_len == 0 || i + char_len > data_len) { // invalid byte or incomplete glyph + // if invalid byte or incomplete glyph, ignore it + char_len = 1; + } + for (int j = 1; j < char_len; ++j) { + if ((data[i + j] & 0xC0) != 0x80) { // bytes following head-byte of glyph + char_len += 1; + } + } + ++count; + } + return count; +} + +// Get the byte position corresponding to a character position for a non-empty utf8 +// sequence +FORCE_INLINE +gdv_int32 utf8_byte_pos(gdv_int64 context, const char* str, gdv_int32 str_len, + gdv_int32 char_pos) { + int char_len = 0; + int byte_index = 0; + for (gdv_int32 char_index = 0; char_index < char_pos && byte_index < str_len; + char_index++) { + char_len = utf8_char_length(str[byte_index]); + if (char_len == 0 || + byte_index + char_len > str_len) { // invalid byte or incomplete glyph + set_error_for_invalid_utf(context, str[byte_index]); + return -1; + } + byte_index += char_len; + } + return byte_index; +} + +#define UTF8_LENGTH(NAME, TYPE) \ + FORCE_INLINE \ + gdv_int32 NAME##_##TYPE(gdv_int64 context, gdv_##TYPE in, gdv_int32 in_len) { \ + return utf8_length(context, in, in_len); \ + } + +UTF8_LENGTH(char_length, utf8) +UTF8_LENGTH(length, utf8) +UTF8_LENGTH(lengthUtf8, binary) + +// Returns a string of 'n' spaces. +#define SPACE_STR(IN_TYPE) \ + GANDIVA_EXPORT \ + const char* space_##IN_TYPE(gdv_int64 ctx, gdv_##IN_TYPE n, int32_t* out_len) { \ + gdv_int32 n_times = static_cast<gdv_int32>(n); \ + if (n_times <= 0) { \ + *out_len = 0; \ + return ""; \ + } \ + char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(ctx, n_times)); \ + if (ret == nullptr) { \ + gdv_fn_context_set_error_msg(ctx, "Could not allocate memory for output string"); \ + *out_len = 0; \ + return ""; \ + } \ + for (int i = 0; i < n_times; i++) { \ + ret[i] = ' '; \ + } \ + *out_len = n_times; \ + return ret; \ + } + +SPACE_STR(int32) +SPACE_STR(int64) + +// Reverse a utf8 sequence +FORCE_INLINE +const char* reverse_utf8(gdv_int64 context, const char* data, gdv_int32 data_len, + int32_t* out_len) { + if (data_len == 0) { + *out_len = 0; + return ""; + } + + char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, data_len)); + if (ret == nullptr) { + gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string"); + *out_len = 0; + return ""; + } + + gdv_int32 char_len; + for (gdv_int32 i = 0; i < data_len; i += char_len) { + char_len = utf8_char_length(data[i]); + + if (char_len == 0 || i + char_len > data_len) { // invalid byte or incomplete glyph + set_error_for_invalid_utf(context, data[i]); + *out_len = 0; + return ""; + } + + for (gdv_int32 j = 0; j < char_len; ++j) { + if (j > 0 && (data[i + j] & 0xC0) != 0x80) { // bytes following head-byte of glyph + set_error_for_invalid_utf(context, data[i + j]); + *out_len = 0; + return ""; + } + ret[data_len - i - char_len + j] = data[i + j]; + } + } + *out_len = data_len; + return ret; +} + +// Trims whitespaces from the left end of the input utf8 sequence +FORCE_INLINE +const char* ltrim_utf8(gdv_int64 context, const char* data, gdv_int32 data_len, + int32_t* out_len) { + if (data_len == 0) { + *out_len = 0; + return ""; + } + + gdv_int32 start = 0; + // start denotes the first position of non-space characters in the input string + while (start < data_len && data[start] == ' ') { + ++start; + } + + *out_len = data_len - start; + return data + start; +} + +// Trims whitespaces from the right end of the input utf8 sequence +FORCE_INLINE +const char* rtrim_utf8(gdv_int64 context, const char* data, gdv_int32 data_len, + int32_t* out_len) { + if (data_len == 0) { + *out_len = 0; + return ""; + } + + gdv_int32 end = data_len - 1; + // end denotes the last position of non-space characters in the input string + while (end >= 0 && data[end] == ' ') { + --end; + } + + *out_len = end + 1; + return data; +} + +// Trims whitespaces from both the ends of the input utf8 sequence +FORCE_INLINE +const char* btrim_utf8(gdv_int64 context, const char* data, gdv_int32 data_len, + int32_t* out_len) { + if (data_len == 0) { + *out_len = 0; + return ""; + } + + gdv_int32 start = 0, end = data_len - 1; + // start and end denote the first and last positions of non-space + // characters in the input string respectively + while (start <= end && data[start] == ' ') { + ++start; + } + while (end >= start && data[end] == ' ') { + --end; + } + + // string has some leading/trailing spaces and some non-space characters + *out_len = end - start + 1; + return data + start; +} + +// Trims characters present in the trim text from the left end of the base text +FORCE_INLINE +const char* ltrim_utf8_utf8(gdv_int64 context, const char* basetext, + gdv_int32 basetext_len, const char* trimtext, + gdv_int32 trimtext_len, int32_t* out_len) { + if (basetext_len == 0) { + *out_len = 0; + return ""; + } else if (trimtext_len == 0) { + *out_len = basetext_len; + return basetext; + } + + gdv_int32 start_ptr, char_len; + // scan the base text from left to right and increment the start pointer till + // there is a character which is not present in the trim text + for (start_ptr = 0; start_ptr < basetext_len; start_ptr += char_len) { + char_len = utf8_char_length(basetext[start_ptr]); + if (char_len == 0 || start_ptr + char_len > basetext_len) { + // invalid byte or incomplete glyph + set_error_for_invalid_utf(context, basetext[start_ptr]); + *out_len = 0; + return ""; + } + if (!is_substr_utf8_utf8(trimtext, trimtext_len, basetext + start_ptr, char_len)) { + break; + } + } + + *out_len = basetext_len - start_ptr; + return basetext + start_ptr; +} + +// Trims characters present in the trim text from the right end of the base text +FORCE_INLINE +const char* rtrim_utf8_utf8(gdv_int64 context, const char* basetext, + gdv_int32 basetext_len, const char* trimtext, + gdv_int32 trimtext_len, int32_t* out_len) { + if (basetext_len == 0) { + *out_len = 0; + return ""; + } else if (trimtext_len == 0) { + *out_len = basetext_len; + return basetext; + } + + gdv_int32 char_len, end_ptr, byte_cnt = 1; + // scan the base text from right to left and decrement the end pointer till + // there is a character which is not present in the trim text + for (end_ptr = basetext_len - 1; end_ptr >= 0; --end_ptr) { + char_len = utf8_char_length(basetext[end_ptr]); + if (char_len == 0) { // trailing bytes of multibyte character + ++byte_cnt; + continue; + } + // this is the first byte of a character, hence check if char_len = char_cnt + if (byte_cnt != char_len) { // invalid byte or incomplete glyph + set_error_for_invalid_utf(context, basetext[end_ptr]); + *out_len = 0; + return ""; + } + byte_cnt = 1; // reset the counter*/ + if (!is_substr_utf8_utf8(trimtext, trimtext_len, basetext + end_ptr, char_len)) { + break; + } + } + + // when all characters in the basetext are part of the trimtext + if (end_ptr == -1) { + *out_len = 0; + return ""; + } + + end_ptr += utf8_char_length(basetext[end_ptr]); // point to the next character + *out_len = end_ptr; + return basetext; +} + +// Trims characters present in the trim text from both ends of the base text +FORCE_INLINE +const char* btrim_utf8_utf8(gdv_int64 context, const char* basetext, + gdv_int32 basetext_len, const char* trimtext, + gdv_int32 trimtext_len, int32_t* out_len) { + if (basetext_len == 0) { + *out_len = 0; + return ""; + } else if (trimtext_len == 0) { + *out_len = basetext_len; + return basetext; + } + + gdv_int32 start_ptr, end_ptr, char_len, byte_cnt = 1; + // scan the base text from left to right and increment the start and decrement the + // end pointers till there are characters which are not present in the trim text + for (start_ptr = 0; start_ptr < basetext_len; start_ptr += char_len) { + char_len = utf8_char_length(basetext[start_ptr]); + if (char_len == 0 || start_ptr + char_len > basetext_len) { + // invalid byte or incomplete glyph + set_error_for_invalid_utf(context, basetext[start_ptr]); + *out_len = 0; + return ""; + } + if (!is_substr_utf8_utf8(trimtext, trimtext_len, basetext + start_ptr, char_len)) { + break; + } + } + for (end_ptr = basetext_len - 1; end_ptr >= start_ptr; --end_ptr) { + char_len = utf8_char_length(basetext[end_ptr]); + if (char_len == 0) { // trailing byte in multibyte character + ++byte_cnt; + continue; + } + // this is the first byte of a character, hence check if char_len = char_cnt + if (byte_cnt != char_len) { // invalid byte or incomplete glyph + set_error_for_invalid_utf(context, basetext[end_ptr]); + *out_len = 0; + return ""; + } + byte_cnt = 1; // reset the counter*/ + if (!is_substr_utf8_utf8(trimtext, trimtext_len, basetext + end_ptr, char_len)) { + break; + } + } + + // when all characters are trimmed, start_ptr has been incremented to basetext_len and + // end_ptr still points to basetext_len - 1, hence we need to handle this case + if (start_ptr > end_ptr) { + *out_len = 0; + return ""; + } + + end_ptr += utf8_char_length(basetext[end_ptr]); // point to the next character + *out_len = end_ptr - start_ptr; + return basetext + start_ptr; +} + +FORCE_INLINE +gdv_boolean compare_lower_strings(const char* base_str, gdv_int32 base_str_len, + const char* str, gdv_int32 str_len) { + if (base_str_len != str_len) { + return false; + } + for (int i = 0; i < str_len; i++) { + // convert char to lower + char cur = str[i]; + // 'A' - 'Z' : 0x41 - 0x5a + // 'a' - 'z' : 0x61 - 0x7a + if (cur >= 0x41 && cur <= 0x5a) { + cur = static_cast<char>(cur + 0x20); + } + // if the character does not match, break the flow + if (cur != base_str[i]) break; + // if the character matches and it is the last iteration, return true + if (i == str_len - 1) return true; + } + return false; +} + +// Try to cast the received string ('0', '1', 'true', 'false'), ignoring leading +// and trailing spaces, also ignoring lower and upper case. +FORCE_INLINE +gdv_boolean castBIT_utf8(gdv_int64 context, const char* data, gdv_int32 data_len) { + if (data_len <= 0) { + gdv_fn_context_set_error_msg(context, "Invalid value for boolean."); + return false; + } + + // trim leading and trailing spaces + int32_t trimmed_len; + int32_t start = 0, end = data_len - 1; + while (start <= end && data[start] == ' ') { + ++start; + } + while (end >= start && data[end] == ' ') { + --end; + } + trimmed_len = end - start + 1; + const char* trimmed_data = data + start; + + // compare received string with the valid bool string values '1', '0', 'true', 'false' + if (trimmed_len == 1) { + // case for '0' and '1' value + if (trimmed_data[0] == '1') return true; + if (trimmed_data[0] == '0') return false; + } else if (trimmed_len == 4) { + // case for matching 'true' + if (compare_lower_strings("true", 4, trimmed_data, trimmed_len)) return true; + } else if (trimmed_len == 5) { + // case for matching 'false' + if (compare_lower_strings("false", 5, trimmed_data, trimmed_len)) return false; + } + // if no 'true', 'false', '0' or '1' value is found, set an error + gdv_fn_context_set_error_msg(context, "Invalid value for boolean."); + return false; +} + +FORCE_INLINE +const char* castVARCHAR_bool_int64(gdv_int64 context, gdv_boolean value, + gdv_int64 out_len, gdv_int32* out_length) { + gdv_int32 len = static_cast<gdv_int32>(out_len); + if (len < 0) { + gdv_fn_context_set_error_msg(context, "Output buffer length can't be negative"); + *out_length = 0; + return ""; + } + const char* out = + reinterpret_cast<const char*>(gdv_fn_context_arena_malloc(context, 5)); + out = value ? "true" : "false"; + *out_length = value ? ((len > 4) ? 4 : len) : ((len > 5) ? 5 : len); + return out; +} + +// Truncates the string to given length +#define CAST_VARCHAR_FROM_VARLEN_TYPE(TYPE) \ + FORCE_INLINE \ + const char* castVARCHAR_##TYPE##_int64(gdv_int64 context, const char* data, \ + gdv_int32 data_len, int64_t out_len, \ + int32_t* out_length) { \ + int32_t len = static_cast<int32_t>(out_len); \ + \ + if (len < 0) { \ + gdv_fn_context_set_error_msg(context, "Output buffer length can't be negative"); \ + *out_length = 0; \ + return ""; \ + } \ + \ + if (len >= data_len || len == 0) { \ + *out_length = data_len; \ + return data; \ + } \ + \ + int32_t remaining = len; \ + int32_t index = 0; \ + bool is_multibyte = false; \ + do { \ + /* In utf8, MSB of a single byte unicode char is always 0, \ + * whereas for a multibyte character the MSB of each byte is 1. \ + * So for a single byte char, a bitwise-and with x80 (10000000) will be 0 \ + * and it won't be 0 for bytes of a multibyte char. \ + */ \ + char* data_ptr = const_cast<char*>(data); \ + \ + /* advance byte by byte till the 8-byte boundary then advance 8 bytes */ \ + auto num_bytes = reinterpret_cast<uintptr_t>(data_ptr) & 0x07; \ + num_bytes = (8 - num_bytes) & 0x07; \ + while (num_bytes > 0) { \ + uint8_t* ptr = reinterpret_cast<uint8_t*>(data_ptr + index); \ + if ((*ptr & 0x80) != 0) { \ + is_multibyte = true; \ + break; \ + } \ + index++; \ + remaining--; \ + num_bytes--; \ + } \ + if (is_multibyte) break; \ + while (remaining >= 8) { \ + uint64_t* ptr = reinterpret_cast<uint64_t*>(data_ptr + index); \ + if ((*ptr & 0x8080808080808080) != 0) { \ + is_multibyte = true; \ + break; \ + } \ + index += 8; \ + remaining -= 8; \ + } \ + if (is_multibyte) break; \ + if (remaining >= 4) { \ + uint32_t* ptr = reinterpret_cast<uint32_t*>(data_ptr + index); \ + if ((*ptr & 0x80808080) != 0) break; \ + index += 4; \ + remaining -= 4; \ + } \ + while (remaining > 0) { \ + uint8_t* ptr = reinterpret_cast<uint8_t*>(data_ptr + index); \ + if ((*ptr & 0x80) != 0) { \ + is_multibyte = true; \ + break; \ + } \ + index++; \ + remaining--; \ + } \ + if (is_multibyte) break; \ + /* reached here; all are single byte characters */ \ + *out_length = len; \ + return data; \ + } while (false); \ + \ + /* detected multibyte utf8 characters; slow path */ \ + int32_t byte_pos = \ + utf8_byte_pos(context, data + index, data_len - index, len - index); \ + if (byte_pos < 0) { \ + *out_length = 0; \ + return ""; \ + } \ + \ + *out_length = index + byte_pos; \ + return data; \ + } + +CAST_VARCHAR_FROM_VARLEN_TYPE(utf8) +CAST_VARCHAR_FROM_VARLEN_TYPE(binary) + +#undef CAST_VARCHAR_FROM_VARLEN_TYPE + +// Add functions for castVARBINARY +#define CAST_VARBINARY_FROM_STRING_AND_BINARY(TYPE) \ + GANDIVA_EXPORT \ + const char* castVARBINARY_##TYPE##_int64(gdv_int64 context, const char* data, \ + gdv_int32 data_len, int64_t out_len, \ + int32_t* out_length) { \ + int32_t len = static_cast<int32_t>(out_len); \ + if (len < 0) { \ + gdv_fn_context_set_error_msg(context, "Output buffer length can't be negative"); \ + *out_length = 0; \ + return ""; \ + } \ + \ + if (len >= data_len || len == 0) { \ + *out_length = data_len; \ + } else { \ + *out_length = len; \ + } \ + return data; \ + } + +CAST_VARBINARY_FROM_STRING_AND_BINARY(utf8) +CAST_VARBINARY_FROM_STRING_AND_BINARY(binary) + +#undef CAST_VARBINARY_FROM_STRING_AND_BINARY + +#define IS_NULL(NAME, TYPE) \ + FORCE_INLINE \ + bool NAME##_##TYPE(gdv_##TYPE in, gdv_int32 len, gdv_boolean is_valid) { \ + return !is_valid; \ + } + +VAR_LEN_TYPES(IS_NULL, isnull) + +#undef IS_NULL + +#define IS_NOT_NULL(NAME, TYPE) \ + FORCE_INLINE \ + bool NAME##_##TYPE(gdv_##TYPE in, gdv_int32 len, gdv_boolean is_valid) { \ + return is_valid; \ + } + +VAR_LEN_TYPES(IS_NOT_NULL, isnotnull) + +#undef IS_NOT_NULL +#undef VAR_LEN_TYPES + +/* + We follow Oracle semantics for offset: + - If position is positive, then the first glyph in the substring is determined by + counting that many glyphs forward from the beginning of the input. (i.e., for position == + 1 the first glyph in the substring will be identical to the first glyph in the input) + + - If position is negative, then the first glyph in the substring is determined by + counting that many glyphs backward from the end of the input. (i.e., for position == -1 + the first glyph in the substring will be identical to the last glyph in the input) + + - If position is 0 then it is treated as 1. + */ +FORCE_INLINE +const char* substr_utf8_int64_int64(gdv_int64 context, const char* input, + gdv_int32 in_data_len, gdv_int64 position, + gdv_int64 substring_length, gdv_int32* out_data_len) { + if (substring_length <= 0 || input == nullptr || in_data_len <= 0) { + *out_data_len = 0; + return ""; + } + + gdv_int64 in_glyphs_count = + static_cast<gdv_int64>(utf8_length(context, input, in_data_len)); + + // in_glyphs_count is zero if input has invalid glyphs + if (in_glyphs_count == 0) { + *out_data_len = 0; + return ""; + } + + gdv_int64 from_glyph; // from_glyph==0 indicates the first glyph of the input + if (position > 0) { + from_glyph = position - 1; + } else if (position < 0) { + from_glyph = in_glyphs_count + position; + } else { + from_glyph = 0; + } + + if (from_glyph < 0 || from_glyph >= in_glyphs_count) { + *out_data_len = 0; + return ""; + } + + gdv_int64 out_glyphs_count = substring_length; + if (substring_length > in_glyphs_count - from_glyph) { + out_glyphs_count = in_glyphs_count - from_glyph; + } + + gdv_int64 in_data_len64 = static_cast<gdv_int64>(in_data_len); + gdv_int64 start_pos = 0; + gdv_int64 end_pos = in_data_len64; + + gdv_int64 current_glyph = 0; + gdv_int64 pos = 0; + while (pos < in_data_len64) { + if (current_glyph == from_glyph) { + start_pos = pos; + } + pos += static_cast<gdv_int64>(utf8_char_length(input[pos])); + if (current_glyph - from_glyph + 1 == out_glyphs_count) { + end_pos = pos; + } + current_glyph++; + } + + if (end_pos > in_data_len64 || end_pos > INT_MAX) { + end_pos = in_data_len64; + } + + *out_data_len = static_cast<gdv_int32>(end_pos - start_pos); + char* ret = + reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_data_len)); + if (ret == nullptr) { + gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string"); + *out_data_len = 0; + return ""; + } + memcpy(ret, input + start_pos, *out_data_len); + return ret; +} + +FORCE_INLINE +const char* substr_utf8_int64(gdv_int64 context, const char* input, gdv_int32 in_len, + gdv_int64 offset64, gdv_int32* out_len) { + return substr_utf8_int64_int64(context, input, in_len, offset64, in_len, out_len); +} + +FORCE_INLINE +const char* repeat_utf8_int32(gdv_int64 context, const char* in, gdv_int32 in_len, + gdv_int32 repeat_number, gdv_int32* out_len) { + // if the repeat number is zero, then return empty string + if (repeat_number == 0 || in_len <= 0) { + *out_len = 0; + return ""; + } + // if the repeat number is a negative number, an error is set on context + if (repeat_number < 0) { + gdv_fn_context_set_error_msg(context, "Repeat number can't be negative"); + *out_len = 0; + return ""; + } + *out_len = repeat_number * in_len; + char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len)); + if (ret == nullptr) { + gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string"); + *out_len = 0; + return ""; + } + for (int i = 0; i < repeat_number; ++i) { + memcpy(ret + (i * in_len), in, in_len); + } + return ret; +} + +FORCE_INLINE +const char* concat_utf8_utf8(gdv_int64 context, const char* left, gdv_int32 left_len, + bool left_validity, const char* right, gdv_int32 right_len, + bool right_validity, gdv_int32* out_len) { + if (!left_validity) { + left_len = 0; + } + if (!right_validity) { + right_len = 0; + } + return concatOperator_utf8_utf8(context, left, left_len, right, right_len, out_len); +} + +FORCE_INLINE +const char* concatOperator_utf8_utf8(gdv_int64 context, const char* left, + gdv_int32 left_len, const char* right, + gdv_int32 right_len, gdv_int32* out_len) { + *out_len = left_len + right_len; + if (*out_len <= 0) { + *out_len = 0; + return ""; + } + char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len)); + if (ret == nullptr) { + gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string"); + *out_len = 0; + return ""; + } + memcpy(ret, left, left_len); + memcpy(ret + left_len, right, right_len); + return ret; +} + +FORCE_INLINE +const char* concat_utf8_utf8_utf8(gdv_int64 context, const char* in1, gdv_int32 in1_len, + bool in1_validity, const char* in2, gdv_int32 in2_len, + bool in2_validity, const char* in3, gdv_int32 in3_len, + bool in3_validity, gdv_int32* out_len) { + if (!in1_validity) { + in1_len = 0; + } + if (!in2_validity) { + in2_len = 0; + } + if (!in3_validity) { + in3_len = 0; + } + return concatOperator_utf8_utf8_utf8(context, in1, in1_len, in2, in2_len, in3, in3_len, + out_len); +} + +FORCE_INLINE +const char* concatOperator_utf8_utf8_utf8(gdv_int64 context, const char* in1, + gdv_int32 in1_len, const char* in2, + gdv_int32 in2_len, const char* in3, + gdv_int32 in3_len, gdv_int32* out_len) { + *out_len = in1_len + in2_len + in3_len; + if (*out_len <= 0) { + *out_len = 0; + return ""; + } + char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len)); + if (ret == nullptr) { + gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string"); + *out_len = 0; + return ""; + } + memcpy(ret, in1, in1_len); + memcpy(ret + in1_len, in2, in2_len); + memcpy(ret + in1_len + in2_len, in3, in3_len); + return ret; +} + +FORCE_INLINE +const char* concat_utf8_utf8_utf8_utf8(gdv_int64 context, const char* in1, + gdv_int32 in1_len, bool in1_validity, + const char* in2, gdv_int32 in2_len, + bool in2_validity, const char* in3, + gdv_int32 in3_len, bool in3_validity, + const char* in4, gdv_int32 in4_len, + bool in4_validity, gdv_int32* out_len) { + if (!in1_validity) { + in1_len = 0; + } + if (!in2_validity) { + in2_len = 0; + } + if (!in3_validity) { + in3_len = 0; + } + if (!in4_validity) { + in4_len = 0; + } + return concatOperator_utf8_utf8_utf8_utf8(context, in1, in1_len, in2, in2_len, in3, + in3_len, in4, in4_len, out_len); +} + +FORCE_INLINE +const char* concatOperator_utf8_utf8_utf8_utf8(gdv_int64 context, const char* in1, + gdv_int32 in1_len, const char* in2, + gdv_int32 in2_len, const char* in3, + gdv_int32 in3_len, const char* in4, + gdv_int32 in4_len, gdv_int32* out_len) { + *out_len = in1_len + in2_len + in3_len + in4_len; + if (*out_len <= 0) { + *out_len = 0; + return ""; + } + char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len)); + if (ret == nullptr) { + gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string"); + *out_len = 0; + return ""; + } + memcpy(ret, in1, in1_len); + memcpy(ret + in1_len, in2, in2_len); + memcpy(ret + in1_len + in2_len, in3, in3_len); + memcpy(ret + in1_len + in2_len + in3_len, in4, in4_len); + return ret; +} + +FORCE_INLINE +const char* concat_utf8_utf8_utf8_utf8_utf8( + gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity, + const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3, + gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len, + bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity, + gdv_int32* out_len) { + if (!in1_validity) { + in1_len = 0; + } + if (!in2_validity) { + in2_len = 0; + } + if (!in3_validity) { + in3_len = 0; + } + if (!in4_validity) { + in4_len = 0; + } + if (!in5_validity) { + in5_len = 0; + } + return concatOperator_utf8_utf8_utf8_utf8_utf8(context, in1, in1_len, in2, in2_len, in3, + in3_len, in4, in4_len, in5, in5_len, + out_len); +} + +FORCE_INLINE +const char* concatOperator_utf8_utf8_utf8_utf8_utf8( + gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2, + gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4, + gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, gdv_int32* out_len) { + *out_len = in1_len + in2_len + in3_len + in4_len + in5_len; + if (*out_len <= 0) { + *out_len = 0; + return ""; + } + char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len)); + if (ret == nullptr) { + gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string"); + *out_len = 0; + return ""; + } + memcpy(ret, in1, in1_len); + memcpy(ret + in1_len, in2, in2_len); + memcpy(ret + in1_len + in2_len, in3, in3_len); + memcpy(ret + in1_len + in2_len + in3_len, in4, in4_len); + memcpy(ret + in1_len + in2_len + in3_len + in4_len, in5, in5_len); + return ret; +} + +FORCE_INLINE +const char* concat_utf8_utf8_utf8_utf8_utf8_utf8( + gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity, + const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3, + gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len, + bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity, + const char* in6, gdv_int32 in6_len, bool in6_validity, gdv_int32* out_len) { + if (!in1_validity) { + in1_len = 0; + } + if (!in2_validity) { + in2_len = 0; + } + if (!in3_validity) { + in3_len = 0; + } + if (!in4_validity) { + in4_len = 0; + } + if (!in5_validity) { + in5_len = 0; + } + if (!in6_validity) { + in6_len = 0; + } + return concatOperator_utf8_utf8_utf8_utf8_utf8_utf8(context, in1, in1_len, in2, in2_len, + in3, in3_len, in4, in4_len, in5, + in5_len, in6, in6_len, out_len); +} + +FORCE_INLINE +const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8( + gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2, + gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4, + gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, const char* in6, + gdv_int32 in6_len, gdv_int32* out_len) { + *out_len = in1_len + in2_len + in3_len + in4_len + in5_len + in6_len; + if (*out_len <= 0) { + *out_len = 0; + return ""; + } + char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len)); + if (ret == nullptr) { + gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string"); + *out_len = 0; + return ""; + } + memcpy(ret, in1, in1_len); + memcpy(ret + in1_len, in2, in2_len); + memcpy(ret + in1_len + in2_len, in3, in3_len); + memcpy(ret + in1_len + in2_len + in3_len, in4, in4_len); + memcpy(ret + in1_len + in2_len + in3_len + in4_len, in5, in5_len); + memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len, in6, in6_len); + return ret; +} + +FORCE_INLINE +const char* concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity, + const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3, + gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len, + bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity, + const char* in6, gdv_int32 in6_len, bool in6_validity, const char* in7, + gdv_int32 in7_len, bool in7_validity, gdv_int32* out_len) { + if (!in1_validity) { + in1_len = 0; + } + if (!in2_validity) { + in2_len = 0; + } + if (!in3_validity) { + in3_len = 0; + } + if (!in4_validity) { + in4_len = 0; + } + if (!in5_validity) { + in5_len = 0; + } + if (!in6_validity) { + in6_len = 0; + } + if (!in7_validity) { + in7_len = 0; + } + return concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + context, in1, in1_len, in2, in2_len, in3, in3_len, in4, in4_len, in5, in5_len, in6, + in6_len, in7, in7_len, out_len); +} + +FORCE_INLINE +const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2, + gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4, + gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, const char* in6, + gdv_int32 in6_len, const char* in7, gdv_int32 in7_len, gdv_int32* out_len) { + *out_len = in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len; + if (*out_len <= 0) { + *out_len = 0; + return ""; + } + char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len)); + if (ret == nullptr) { + gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string"); + *out_len = 0; + return ""; + } + memcpy(ret, in1, in1_len); + memcpy(ret + in1_len, in2, in2_len); + memcpy(ret + in1_len + in2_len, in3, in3_len); + memcpy(ret + in1_len + in2_len + in3_len, in4, in4_len); + memcpy(ret + in1_len + in2_len + in3_len + in4_len, in5, in5_len); + memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len, in6, in6_len); + memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len, in7, in7_len); + return ret; +} + +FORCE_INLINE +const char* concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity, + const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3, + gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len, + bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity, + const char* in6, gdv_int32 in6_len, bool in6_validity, const char* in7, + gdv_int32 in7_len, bool in7_validity, const char* in8, gdv_int32 in8_len, + bool in8_validity, gdv_int32* out_len) { + if (!in1_validity) { + in1_len = 0; + } + if (!in2_validity) { + in2_len = 0; + } + if (!in3_validity) { + in3_len = 0; + } + if (!in4_validity) { + in4_len = 0; + } + if (!in5_validity) { + in5_len = 0; + } + if (!in6_validity) { + in6_len = 0; + } + if (!in7_validity) { + in7_len = 0; + } + if (!in8_validity) { + in8_len = 0; + } + return concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + context, in1, in1_len, in2, in2_len, in3, in3_len, in4, in4_len, in5, in5_len, in6, + in6_len, in7, in7_len, in8, in8_len, out_len); +} + +FORCE_INLINE +const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2, + gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4, + gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, const char* in6, + gdv_int32 in6_len, const char* in7, gdv_int32 in7_len, const char* in8, + gdv_int32 in8_len, gdv_int32* out_len) { + *out_len = + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len + in8_len; + if (*out_len <= 0) { + *out_len = 0; + return ""; + } + char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len)); + if (ret == nullptr) { + gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string"); + *out_len = 0; + return ""; + } + memcpy(ret, in1, in1_len); + memcpy(ret + in1_len, in2, in2_len); + memcpy(ret + in1_len + in2_len, in3, in3_len); + memcpy(ret + in1_len + in2_len + in3_len, in4, in4_len); + memcpy(ret + in1_len + in2_len + in3_len + in4_len, in5, in5_len); + memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len, in6, in6_len); + memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len, in7, in7_len); + memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len, in8, + in8_len); + return ret; +} + +FORCE_INLINE +const char* concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity, + const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3, + gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len, + bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity, + const char* in6, gdv_int32 in6_len, bool in6_validity, const char* in7, + gdv_int32 in7_len, bool in7_validity, const char* in8, gdv_int32 in8_len, + bool in8_validity, const char* in9, gdv_int32 in9_len, bool in9_validity, + gdv_int32* out_len) { + if (!in1_validity) { + in1_len = 0; + } + if (!in2_validity) { + in2_len = 0; + } + if (!in3_validity) { + in3_len = 0; + } + if (!in4_validity) { + in4_len = 0; + } + if (!in5_validity) { + in5_len = 0; + } + if (!in6_validity) { + in6_len = 0; + } + if (!in7_validity) { + in7_len = 0; + } + if (!in8_validity) { + in8_len = 0; + } + if (!in9_validity) { + in9_len = 0; + } + return concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + context, in1, in1_len, in2, in2_len, in3, in3_len, in4, in4_len, in5, in5_len, in6, + in6_len, in7, in7_len, in8, in8_len, in9, in9_len, out_len); +} + +FORCE_INLINE +const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2, + gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4, + gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, const char* in6, + gdv_int32 in6_len, const char* in7, gdv_int32 in7_len, const char* in8, + gdv_int32 in8_len, const char* in9, gdv_int32 in9_len, gdv_int32* out_len) { + *out_len = in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len + + in8_len + in9_len; + if (*out_len <= 0) { + *out_len = 0; + return ""; + } + char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len)); + if (ret == nullptr) { + gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string"); + *out_len = 0; + return ""; + } + memcpy(ret, in1, in1_len); + memcpy(ret + in1_len, in2, in2_len); + memcpy(ret + in1_len + in2_len, in3, in3_len); + memcpy(ret + in1_len + in2_len + in3_len, in4, in4_len); + memcpy(ret + in1_len + in2_len + in3_len + in4_len, in5, in5_len); + memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len, in6, in6_len); + memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len, in7, in7_len); + memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len, in8, + in8_len); + memcpy( + ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len + in8_len, + in9, in9_len); + return ret; +} + +FORCE_INLINE +const char* concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity, + const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3, + gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len, + bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity, + const char* in6, gdv_int32 in6_len, bool in6_validity, const char* in7, + gdv_int32 in7_len, bool in7_validity, const char* in8, gdv_int32 in8_len, + bool in8_validity, const char* in9, gdv_int32 in9_len, bool in9_validity, + const char* in10, gdv_int32 in10_len, bool in10_validity, gdv_int32* out_len) { + if (!in1_validity) { + in1_len = 0; + } + if (!in2_validity) { + in2_len = 0; + } + if (!in3_validity) { + in3_len = 0; + } + if (!in4_validity) { + in4_len = 0; + } + if (!in5_validity) { + in5_len = 0; + } + if (!in6_validity) { + in6_len = 0; + } + if (!in7_validity) { + in7_len = 0; + } + if (!in8_validity) { + in8_len = 0; + } + if (!in9_validity) { + in9_len = 0; + } + if (!in10_validity) { + in10_len = 0; + } + return concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + context, in1, in1_len, in2, in2_len, in3, in3_len, in4, in4_len, in5, in5_len, in6, + in6_len, in7, in7_len, in8, in8_len, in9, in9_len, in10, in10_len, out_len); +} + +FORCE_INLINE +const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2, + gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4, + gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, const char* in6, + gdv_int32 in6_len, const char* in7, gdv_int32 in7_len, const char* in8, + gdv_int32 in8_len, const char* in9, gdv_int32 in9_len, const char* in10, + gdv_int32 in10_len, gdv_int32* out_len) { + *out_len = in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len + + in8_len + in9_len + in10_len; + if (*out_len <= 0) { + *out_len = 0; + return ""; + } + char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len)); + if (ret == nullptr) { + gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string"); + *out_len = 0; + return ""; + } + memcpy(ret, in1, in1_len); + memcpy(ret + in1_len, in2, in2_len); + memcpy(ret + in1_len + in2_len, in3, in3_len); + memcpy(ret + in1_len + in2_len + in3_len, in4, in4_len); + memcpy(ret + in1_len + in2_len + in3_len + in4_len, in5, in5_len); + memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len, in6, in6_len); + memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len, in7, in7_len); + memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len, in8, + in8_len); + memcpy( + ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len + in8_len, + in9, in9_len); + memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len + + in8_len + in9_len, + in10, in10_len); + return ret; +} + +// Returns the numeric value of the first character of str. +GANDIVA_EXPORT +gdv_int32 ascii_utf8(const char* data, gdv_int32 data_len) { + if (data_len == 0) { + return 0; + } + return static_cast<gdv_int32>(data[0]); +} + +FORCE_INLINE +const char* convert_fromUTF8_binary(gdv_int64 context, const char* bin_in, gdv_int32 len, + gdv_int32* out_len) { + *out_len = len; + char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len)); + if (ret == nullptr) { + gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string"); + *out_len = 0; + return ""; + } + memcpy(ret, bin_in, *out_len); + return ret; +} + +FORCE_INLINE +const char* convert_replace_invalid_fromUTF8_binary(int64_t context, const char* text_in, + int32_t text_len, + const char* char_to_replace, + int32_t char_to_replace_len, + int32_t* out_len) { + if (char_to_replace_len > 1) { + gdv_fn_context_set_error_msg(context, "Replacement of multiple bytes not supported"); + *out_len = 0; + return ""; + } + // actually the convert_replace function replaces invalid chars with an ASCII + // character so the output length will be the same as the input length + *out_len = text_len; + char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len)); + if (ret == nullptr) { + gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string"); + *out_len = 0; + return ""; + } + int32_t valid_bytes_to_cpy = 0; + int32_t out_byte_counter = 0; + int32_t in_byte_counter = 0; + int32_t char_len; + // scan the base text from left to right and increment the start pointer till + // looking for invalid chars to substitute + for (int text_index = 0; text_index < text_len; text_index += char_len) { + char_len = utf8_char_length(text_in[text_index]); + // only memory copy the bytes when detect invalid char + if (char_len == 0 || text_index + char_len > text_len || + !validate_utf8_following_bytes(text_in, char_len, text_index)) { + // define char_len = 1 to increase text_index by 1 (as ASCII char fits in 1 byte) + char_len = 1; + // first copy the valid bytes until now and then replace the invalid character + memcpy(ret + out_byte_counter, text_in + in_byte_counter, valid_bytes_to_cpy); + // if the replacement char is empty, the invalid char should be ignored + if (char_to_replace_len == 0) { + out_byte_counter += valid_bytes_to_cpy; + } else { + ret[out_byte_counter + valid_bytes_to_cpy] = char_to_replace[0]; + out_byte_counter += valid_bytes_to_cpy + char_len; + } + in_byte_counter += valid_bytes_to_cpy + char_len; + valid_bytes_to_cpy = 0; + continue; + } + valid_bytes_to_cpy += char_len; + } + // if invalid chars were not found, return the original string + if (out_byte_counter == 0 && in_byte_counter == 0) return text_in; + // if there are still valid bytes to copy, do it + if (valid_bytes_to_cpy != 0) { + memcpy(ret + out_byte_counter, text_in + in_byte_counter, valid_bytes_to_cpy); + } + // the out length will be the out bytes copied + the missing end bytes copied + *out_len = valid_bytes_to_cpy + out_byte_counter; + return ret; +} + +// The function reverse a char array in-place +static inline void reverse_char_buf(char* buf, int32_t len) { + char temp; + + for (int32_t i = 0; i < len / 2; i++) { + int32_t pos_swp = len - (1 + i); + temp = buf[pos_swp]; + buf[pos_swp] = buf[i]; + buf[i] = temp; + } +} + +// Converts a double variable to binary +FORCE_INLINE +const char* convert_toDOUBLE(int64_t context, double value, int32_t* out_len) { + *out_len = sizeof(value); + char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len)); + + if (ret == nullptr) { + gdv_fn_context_set_error_msg(context, + "Could not allocate memory for the output string"); + + *out_len = 0; + return ""; + } + + memcpy(ret, &value, *out_len); + + return ret; +} + +FORCE_INLINE +const char* convert_toDOUBLE_be(int64_t context, double value, int32_t* out_len) { + // The function behaves like convert_toDOUBLE, but always return the result + // in big endian format + char* ret = const_cast<char*>(convert_toDOUBLE(context, value, out_len)); + +#if ARROW_LITTLE_ENDIAN + reverse_char_buf(ret, *out_len); +#endif + + return ret; +} + +// Converts a float variable to binary +FORCE_INLINE +const char* convert_toFLOAT(int64_t context, float value, int32_t* out_len) { + *out_len = sizeof(value); + char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len)); + + if (ret == nullptr) { + gdv_fn_context_set_error_msg(context, + "Could not allocate memory for the output string"); + + *out_len = 0; + return ""; + } + + memcpy(ret, &value, *out_len); + + return ret; +} + +FORCE_INLINE +const char* convert_toFLOAT_be(int64_t context, float value, int32_t* out_len) { + // The function behaves like convert_toFLOAT, but always return the result + // in big endian format + char* ret = const_cast<char*>(convert_toFLOAT(context, value, out_len)); + +#if ARROW_LITTLE_ENDIAN + reverse_char_buf(ret, *out_len); +#endif + + return ret; +} + +// Converts a bigint(int with 64 bits) variable to binary +FORCE_INLINE +const char* convert_toBIGINT(int64_t context, int64_t value, int32_t* out_len) { + *out_len = sizeof(value); + char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len)); + + if (ret == nullptr) { + gdv_fn_context_set_error_msg(context, + "Could not allocate memory for the output string"); + + *out_len = 0; + return ""; + } + + memcpy(ret, &value, *out_len); + + return ret; +} + +FORCE_INLINE +const char* convert_toBIGINT_be(int64_t context, int64_t value, int32_t* out_len) { + // The function behaves like convert_toBIGINT, but always return the result + // in big endian format + char* ret = const_cast<char*>(convert_toBIGINT(context, value, out_len)); + +#if ARROW_LITTLE_ENDIAN + reverse_char_buf(ret, *out_len); +#endif + + return ret; +} + +// Converts an integer(with 32 bits) variable to binary +FORCE_INLINE +const char* convert_toINT(int64_t context, int32_t value, int32_t* out_len) { + *out_len = sizeof(value); + char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len)); + + if (ret == nullptr) { + gdv_fn_context_set_error_msg(context, + "Could not allocate memory for the output string"); + + *out_len = 0; + return ""; + } + + memcpy(ret, &value, *out_len); + + return ret; +} + +FORCE_INLINE +const char* convert_toINT_be(int64_t context, int32_t value, int32_t* out_len) { + // The function behaves like convert_toINT, but always return the result + // in big endian format + char* ret = const_cast<char*>(convert_toINT(context, value, out_len)); + +#if ARROW_LITTLE_ENDIAN + reverse_char_buf(ret, *out_len); +#endif + + return ret; +} + +// Converts a boolean variable to binary +FORCE_INLINE +const char* convert_toBOOLEAN(int64_t context, bool value, int32_t* out_len) { + *out_len = sizeof(value); + char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len)); + + if (ret == nullptr) { + gdv_fn_context_set_error_msg(context, + "Could not allocate memory for the output string"); + + *out_len = 0; + return ""; + } + + memcpy(ret, &value, *out_len); + + return ret; +} + +// Converts a time variable to binary +FORCE_INLINE +const char* convert_toTIME_EPOCH(int64_t context, int32_t value, int32_t* out_len) { + return convert_toINT(context, value, out_len); +} + +FORCE_INLINE +const char* convert_toTIME_EPOCH_be(int64_t context, int32_t value, int32_t* out_len) { + // The function behaves as convert_toTIME_EPOCH, but + // returns the bytes in big endian format + return convert_toINT_be(context, value, out_len); +} + +// Converts a timestamp variable to binary +FORCE_INLINE +const char* convert_toTIMESTAMP_EPOCH(int64_t context, int64_t timestamp, + int32_t* out_len) { + return convert_toBIGINT(context, timestamp, out_len); +} + +FORCE_INLINE +const char* convert_toTIMESTAMP_EPOCH_be(int64_t context, int64_t timestamp, + int32_t* out_len) { + // The function behaves as convert_toTIMESTAMP_EPOCH, but + // returns the bytes in big endian format + return convert_toBIGINT_be(context, timestamp, out_len); +} + +// Converts a date variable to binary +FORCE_INLINE +const char* convert_toDATE_EPOCH(int64_t context, int64_t date, int32_t* out_len) { + return convert_toBIGINT(context, date, out_len); +} + +FORCE_INLINE +const char* convert_toDATE_EPOCH_be(int64_t context, int64_t date, int32_t* out_len) { + // The function behaves as convert_toDATE_EPOCH, but + // returns the bytes in big endian format + return convert_toBIGINT_be(context, date, out_len); +} + +// Converts a string variable to binary +FORCE_INLINE +const char* convert_toUTF8(int64_t context, const char* value, int32_t value_len, + int32_t* out_len) { + *out_len = value_len; + return value; +} + +// Search for a string within another string +// Same as "locate(substr, str)", except for the reverse order of the arguments. +FORCE_INLINE +gdv_int32 strpos_utf8_utf8(gdv_int64 context, const char* str, gdv_int32 str_len, + const char* sub_str, gdv_int32 sub_str_len) { + return locate_utf8_utf8_int32(context, sub_str, sub_str_len, str, str_len, 1); +} + +// Search for a string within another string +FORCE_INLINE +gdv_int32 locate_utf8_utf8(gdv_int64 context, const char* sub_str, gdv_int32 sub_str_len, + const char* str, gdv_int32 str_len) { + return locate_utf8_utf8_int32(context, sub_str, sub_str_len, str, str_len, 1); +} + +// Search for a string within another string starting at position start-pos (1-indexed) +FORCE_INLINE +gdv_int32 locate_utf8_utf8_int32(gdv_int64 context, const char* sub_str, + gdv_int32 sub_str_len, const char* str, + gdv_int32 str_len, gdv_int32 start_pos) { + if (start_pos < 1) { + gdv_fn_context_set_error_msg(context, "Start position must be greater than 0"); + return 0; + } + + if (str_len == 0 || sub_str_len == 0) { + return 0; + } + + gdv_int32 byte_pos = utf8_byte_pos(context, str, str_len, start_pos - 1); + if (byte_pos < 0 || byte_pos >= str_len) { + return 0; + } + for (gdv_int32 i = byte_pos; i <= str_len - sub_str_len; ++i) { + if (memcmp(str + i, sub_str, sub_str_len) == 0) { + return utf8_length(context, str, i) + 1; + } + } + return 0; +} + +FORCE_INLINE +const char* replace_with_max_len_utf8_utf8_utf8(gdv_int64 context, const char* text, + gdv_int32 text_len, const char* from_str, + gdv_int32 from_str_len, + const char* to_str, gdv_int32 to_str_len, + gdv_int32 max_length, + gdv_int32* out_len) { + // if from_str is empty or its length exceeds that of original string, + // return the original string + if (from_str_len <= 0 || from_str_len > text_len) { + *out_len = text_len; + return text; + } + + bool found = false; + gdv_int32 text_index = 0; + char* out; + gdv_int32 out_index = 0; + gdv_int32 last_match_index = + 0; // defer copying string from last_match_index till next match is found + + for (; text_index <= text_len - from_str_len;) { + if (memcmp(text + text_index, from_str, from_str_len) == 0) { + if (out_index + text_index - last_match_index + to_str_len > max_length) { + gdv_fn_context_set_error_msg(context, "Buffer overflow for output string"); + *out_len = 0; + return ""; + } + if (!found) { + // found match for first time + out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, max_length)); + if (out == nullptr) { + gdv_fn_context_set_error_msg(context, + "Could not allocate memory for output string"); + *out_len = 0; + return ""; + } + found = true; + } + // first copy the part deferred till now + memcpy(out + out_index, text + last_match_index, (text_index - last_match_index)); + out_index += text_index - last_match_index; + // then copy the target string + memcpy(out + out_index, to_str, to_str_len); + out_index += to_str_len; + + text_index += from_str_len; + last_match_index = text_index; + } else { + text_index++; + } + } + + if (!found) { + *out_len = text_len; + return text; + } + + if (out_index + text_len - last_match_index > max_length) { + gdv_fn_context_set_error_msg(context, "Buffer overflow for output string"); + *out_len = 0; + return ""; + } + memcpy(out + out_index, text + last_match_index, text_len - last_match_index); + out_index += text_len - last_match_index; + *out_len = out_index; + return out; +} + +FORCE_INLINE +const char* replace_utf8_utf8_utf8(gdv_int64 context, const char* text, + gdv_int32 text_len, const char* from_str, + gdv_int32 from_str_len, const char* to_str, + gdv_int32 to_str_len, gdv_int32* out_len) { + return replace_with_max_len_utf8_utf8_utf8(context, text, text_len, from_str, + from_str_len, to_str, to_str_len, 65535, + out_len); +} + +FORCE_INLINE +const char* lpad_utf8_int32_utf8(gdv_int64 context, const char* text, gdv_int32 text_len, + gdv_int32 return_length, const char* fill_text, + gdv_int32 fill_text_len, gdv_int32* out_len) { + // if the text length or the defined return length (number of characters to return) + // is <=0, then return an empty string. + if (text_len == 0 || return_length <= 0) { + *out_len = 0; + return ""; + } + + // count the number of utf8 characters on text, ignoring invalid bytes + int text_char_count = utf8_length_ignore_invalid(text, text_len); + + if (return_length == text_char_count || + (return_length > text_char_count && fill_text_len == 0)) { + // case where the return length is same as the text's length, or if it need to + // fill into text but "fill_text" is empty, then return text directly. + *out_len = text_len; + return text; + } else if (return_length < text_char_count) { + // case where it truncates the result on return length. + *out_len = utf8_byte_pos(context, text, text_len, return_length); + return text; + } else { + // case (return_length > text_char_count) + // case where it needs to copy "fill_text" on the string left. The total number + // of chars to copy is given by (return_length - text_char_count) + char* ret = + reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(context, return_length)); + if (ret == nullptr) { + gdv_fn_context_set_error_msg(context, + "Could not allocate memory for output string"); + *out_len = 0; + return ""; + } + // try to fulfill the return string with the "fill_text" continuously + int32_t copied_chars_count = 0; + int32_t copied_chars_position = 0; + while (copied_chars_count < return_length - text_char_count) { + int32_t char_len; + int32_t fill_index; + // for each char, evaluate its length to consider it when mem copying + for (fill_index = 0; fill_index < fill_text_len; fill_index += char_len) { + if (copied_chars_count >= return_length - text_char_count) { + break; + } + char_len = utf8_char_length(fill_text[fill_index]); + // ignore invalid char on the fill text, considering it as size 1 + if (char_len == 0) char_len += 1; + copied_chars_count++; + } + memcpy(ret + copied_chars_position, fill_text, fill_index); + copied_chars_position += fill_index; + } + // after fulfilling the text, copy the main string + memcpy(ret + copied_chars_position, text, text_len); + *out_len = copied_chars_position + text_len; + return ret; + } +} + +FORCE_INLINE +const char* rpad_utf8_int32_utf8(gdv_int64 context, const char* text, gdv_int32 text_len, + gdv_int32 return_length, const char* fill_text, + gdv_int32 fill_text_len, gdv_int32* out_len) { + // if the text length or the defined return length (number of characters to return) + // is <=0, then return an empty string. + if (text_len == 0 || return_length <= 0) { + *out_len = 0; + return ""; + } + + // count the number of utf8 characters on text, ignoring invalid bytes + int text_char_count = utf8_length_ignore_invalid(text, text_len); + + if (return_length == text_char_count || + (return_length > text_char_count && fill_text_len == 0)) { + // case where the return length is same as the text's length, or if it need to + // fill into text but "fill_text" is empty, then return text directly. + *out_len = text_len; + return text; + } else if (return_length < text_char_count) { + // case where it truncates the result on return length. + *out_len = utf8_byte_pos(context, text, text_len, return_length); + return text; + } else { + // case (return_length > text_char_count) + // case where it needs to copy "fill_text" on the string right + char* ret = + reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(context, return_length)); + if (ret == nullptr) { + gdv_fn_context_set_error_msg(context, + "Could not allocate memory for output string"); + *out_len = 0; + return ""; + } + // fulfill the initial text copying the main input string + memcpy(ret, text, text_len); + // try to fulfill the return string with the "fill_text" continuously + int32_t copied_chars_count = 0; + int32_t copied_chars_position = 0; + while (text_char_count + copied_chars_count < return_length) { + int32_t char_len; + int32_t fill_length; + // for each char, evaluate its length to consider it when mem copying + for (fill_length = 0; fill_length < fill_text_len; fill_length += char_len) { + if (text_char_count + copied_chars_count >= return_length) { + break; + } + char_len = utf8_char_length(fill_text[fill_length]); + // ignore invalid char on the fill text, considering it as size 1 + if (char_len == 0) char_len += 1; + copied_chars_count++; + } + memcpy(ret + text_len + copied_chars_position, fill_text, fill_length); + copied_chars_position += fill_length; + } + *out_len = copied_chars_position + text_len; + return ret; + } +} + +FORCE_INLINE +const char* lpad_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len, + gdv_int32 return_length, gdv_int32* out_len) { + return lpad_utf8_int32_utf8(context, text, text_len, return_length, " ", 1, out_len); +} + +FORCE_INLINE +const char* rpad_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len, + gdv_int32 return_length, gdv_int32* out_len) { + return rpad_utf8_int32_utf8(context, text, text_len, return_length, " ", 1, out_len); +} + +FORCE_INLINE +const char* split_part(gdv_int64 context, const char* text, gdv_int32 text_len, + const char* delimiter, gdv_int32 delim_len, gdv_int32 index, + gdv_int32* out_len) { + *out_len = 0; + if (index < 1) { + char error_message[100]; + snprintf(error_message, sizeof(error_message), + "Index in split_part must be positive, value provided was %d", index); + gdv_fn_context_set_error_msg(context, error_message); + return ""; + } + + if (delim_len == 0 || text_len == 0) { + // output will just be text if no delimiter is provided + *out_len = text_len; + return text; + } + + int i = 0, match_no = 1; + + while (i < text_len) { + // find the position where delimiter matched for the first time + int match_pos = match_string(text, text_len, i, delimiter, delim_len); + if (match_pos == -1 && match_no != index) { + // reached the end without finding a match. + return ""; + } else { + // Found a match. If the match number is index then return this match + if (match_no == index) { + int end_pos = match_pos - delim_len; + + if (match_pos == -1) { + // end position should be last position of the string as we have the last + // delimiter + end_pos = text_len; + } + + *out_len = end_pos - i; + char* out_str = + reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len)); + if (out_str == nullptr) { + gdv_fn_context_set_error_msg(context, + "Could not allocate memory for output string"); + *out_len = 0; + return ""; + } + memcpy(out_str, text + i, *out_len); + return out_str; + } else { + i = match_pos; + match_no++; + } + } + } + + return ""; +} + +// Returns the x leftmost characters of a given string. Cases: +// LEFT("TestString", 10) => "TestString" +// LEFT("TestString", 3) => "Tes" +// LEFT("TestString", -3) => "TestStr" +FORCE_INLINE +const char* left_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len, + gdv_int32 number, gdv_int32* out_len) { + // returns the 'number' left most characters of a given text + if (text_len == 0 || number == 0) { + *out_len = 0; + return ""; + } + + // iterate over the utf8 string validating each character + int char_len; + int char_count = 0; + int byte_index = 0; + for (int i = 0; i < text_len; i += char_len) { + char_len = utf8_char_length(text[i]); + if (char_len == 0 || i + char_len > text_len) { // invalid byte or incomplete glyph + set_error_for_invalid_utf(context, text[i]); + *out_len = 0; + return ""; + } + for (int j = 1; j < char_len; ++j) { + if ((text[i + j] & 0xC0) != 0x80) { // bytes following head-byte of glyph + set_error_for_invalid_utf(context, text[i + j]); + *out_len = 0; + return ""; + } + } + byte_index += char_len; + ++char_count; + // Define the rules to stop the iteration over the string + // case where left('abc', 5) -> 'abc' + if (number > 0 && char_count == number) break; + // case where left('abc', -5) ==> '' + if (number < 0 && char_count == number + text_len) break; + } + + *out_len = byte_index; + return text; +} + +// Returns the x rightmost characters of a given string. Cases: +// RIGHT("TestString", 10) => "TestString" +// RIGHT("TestString", 3) => "ing" +// RIGHT("TestString", -3) => "tString" +FORCE_INLINE +const char* right_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len, + gdv_int32 number, gdv_int32* out_len) { + // returns the 'number' left most characters of a given text + if (text_len == 0 || number == 0) { + *out_len = 0; + return ""; + } + + // initially counts the number of utf8 characters in the defined text + int32_t char_count = utf8_length(context, text, text_len); + // char_count is zero if input has invalid utf8 char + if (char_count == 0) { + *out_len = 0; + return ""; + } + + int32_t start_char_pos; // the char result start position (inclusive) + int32_t end_char_len; // the char result end position (inclusive) + if (number > 0) { + // case where right('abc', 5) ==> 'abc' start_char_pos=1. + start_char_pos = (char_count > number) ? char_count - number : 0; + end_char_len = char_count - start_char_pos; + } else { + start_char_pos = number * -1; + end_char_len = char_count - start_char_pos; + } + + // calculate the start byte position and the output length + int32_t start_byte_pos = utf8_byte_pos(context, text, text_len, start_char_pos); + *out_len = utf8_byte_pos(context, text, text_len, end_char_len); + + // try to allocate memory for the response + char* ret = + reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(context, *out_len)); + if (ret == nullptr) { + gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string"); + *out_len = 0; + return ""; + } + memcpy(ret, text + start_byte_pos, *out_len); + return ret; +} + +FORCE_INLINE +const char* binary_string(gdv_int64 context, const char* text, gdv_int32 text_len, + gdv_int32* out_len) { + gdv_binary ret = + reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(context, text_len)); + + if (ret == nullptr) { + gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string"); + *out_len = 0; + return ""; + } + + if (text_len == 0) { + *out_len = 0; + return ""; + } + + // converting hex encoded string to normal string + int j = 0; + for (int i = 0; i < text_len; i++, j++) { + if (text[i] == '\\' && i + 3 < text_len && + (text[i + 1] == 'x' || text[i + 1] == 'X')) { + char hd1 = text[i + 2]; + char hd2 = text[i + 3]; + if (isxdigit(hd1) && isxdigit(hd2)) { + // [a-fA-F0-9] + ret[j] = to_binary_from_hex(hd1) * 16 + to_binary_from_hex(hd2); + i += 3; + } else { + ret[j] = text[i]; + } + } else { + ret[j] = text[i]; + } + } + *out_len = j; + return ret; +} + +#define CAST_INT_BIGINT_VARBINARY(OUT_TYPE, TYPE_NAME) \ + FORCE_INLINE \ + OUT_TYPE \ + cast##TYPE_NAME##_varbinary(gdv_int64 context, const char* in, int32_t in_len) { \ + if (in_len == 0) { \ + gdv_fn_context_set_error_msg(context, "Can't cast an empty string."); \ + return -1; \ + } \ + char sign = in[0]; \ + \ + bool negative = false; \ + if (sign == '-') { \ + negative = true; \ + /* Ignores the sign char in the hexadecimal string */ \ + in++; \ + in_len--; \ + } \ + \ + if (negative && in_len == 0) { \ + gdv_fn_context_set_error_msg(context, \ + "Can't cast hexadecimal with only a minus sign."); \ + return -1; \ + } \ + \ + OUT_TYPE result = 0; \ + int digit; \ + \ + int read_index = 0; \ + while (read_index < in_len) { \ + char c1 = in[read_index]; \ + if (isxdigit(c1)) { \ + digit = to_binary_from_hex(c1); \ + \ + OUT_TYPE next = result * 16 - digit; \ + \ + if (next > result) { \ + gdv_fn_context_set_error_msg(context, "Integer overflow."); \ + return -1; \ + } \ + result = next; \ + read_index++; \ + } else { \ + gdv_fn_context_set_error_msg(context, \ + "The hexadecimal given has invalid characters."); \ + return -1; \ + } \ + } \ + if (!negative) { \ + result *= -1; \ + \ + if (result < 0) { \ + gdv_fn_context_set_error_msg(context, "Integer overflow."); \ + return -1; \ + } \ + } \ + return result; \ + } + +CAST_INT_BIGINT_VARBINARY(int32_t, INT) +CAST_INT_BIGINT_VARBINARY(int64_t, BIGINT) + +#undef CAST_INT_BIGINT_VARBINARY + +// Produces the binary representation of a string y characters long derived by starting +// at offset 'x' and considering the defined length 'y'. Notice that the offset index +// may be a negative number (starting from the end of the string), or a positive number +// starting on index 1. Cases: +// BYTE_SUBSTR("TestString", 1, 10) => "TestString" +// BYTE_SUBSTR("TestString", 5, 10) => "String" +// BYTE_SUBSTR("TestString", -6, 10) => "String" +// BYTE_SUBSTR("TestString", -600, 10) => "TestString" +FORCE_INLINE +const char* byte_substr_binary_int32_int32(gdv_int64 context, const char* text, + gdv_int32 text_len, gdv_int32 offset, + gdv_int32 length, gdv_int32* out_len) { + // the first offset position for a string is 1, so not consider offset == 0 + // also, the length should be always a positive number + if (text_len == 0 || offset == 0 || length <= 0) { + *out_len = 0; + return ""; + } + + char* ret = + reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(context, text_len)); + + if (ret == nullptr) { + gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string"); + *out_len = 0; + return ""; + } + + int32_t startPos = 0; + if (offset >= 0) { + startPos = offset - 1; + } else if (text_len + offset >= 0) { + startPos = text_len + offset; + } + + // calculate end position from length and truncate to upper value bounds + if (startPos + length > text_len) { + *out_len = text_len - startPos; + } else { + *out_len = length; + } + + memcpy(ret, text + startPos, *out_len); + return ret; +} +} // extern "C" diff --git a/src/arrow/cpp/src/gandiva/precompiled/string_ops_test.cc b/src/arrow/cpp/src/gandiva/precompiled/string_ops_test.cc new file mode 100644 index 000000000..6221dffb3 --- /dev/null +++ b/src/arrow/cpp/src/gandiva/precompiled/string_ops_test.cc @@ -0,0 +1,1758 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gmock/gmock.h> +#include <gtest/gtest.h> + +#include <limits> + +#include "gandiva/execution_context.h" +#include "gandiva/precompiled/types.h" + +namespace gandiva { + +TEST(TestStringOps, TestCompare) { + const char* left = "abcd789"; + const char* right = "abcd123"; + + // 0 for equal + EXPECT_EQ(mem_compare(left, 4, right, 4), 0); + + // compare lengths if the prefixes match + EXPECT_GT(mem_compare(left, 5, right, 4), 0); + EXPECT_LT(mem_compare(left, 4, right, 5), 0); + + // compare bytes if the prefixes don't match + EXPECT_GT(mem_compare(left, 5, right, 5), 0); + EXPECT_GT(mem_compare(left, 5, right, 7), 0); + EXPECT_GT(mem_compare(left, 7, right, 5), 0); +} + +TEST(TestStringOps, TestAscii) { + // ASCII + EXPECT_EQ(ascii_utf8("ABC", 3), 65); + EXPECT_EQ(ascii_utf8("abc", 3), 97); + EXPECT_EQ(ascii_utf8("Hello World!", 12), 72); + EXPECT_EQ(ascii_utf8("This is us", 10), 84); + EXPECT_EQ(ascii_utf8("", 0), 0); + EXPECT_EQ(ascii_utf8("123", 3), 49); + EXPECT_EQ(ascii_utf8("999", 3), 57); +} + +TEST(TestStringOps, TestBeginsEnds) { + // starts_with + EXPECT_TRUE(starts_with_utf8_utf8("hello sir", 9, "hello", 5)); + EXPECT_TRUE(starts_with_utf8_utf8("hellos", 6, "hello", 5)); + EXPECT_TRUE(starts_with_utf8_utf8("hello", 5, "hello", 5)); + EXPECT_FALSE(starts_with_utf8_utf8("hell", 4, "hello", 5)); + EXPECT_FALSE(starts_with_utf8_utf8("world hello", 11, "hello", 5)); + + // ends_with + EXPECT_TRUE(ends_with_utf8_utf8("hello sir", 9, "sir", 3)); + EXPECT_TRUE(ends_with_utf8_utf8("ssir", 4, "sir", 3)); + EXPECT_TRUE(ends_with_utf8_utf8("sir", 3, "sir", 3)); + EXPECT_FALSE(ends_with_utf8_utf8("ir", 2, "sir", 3)); + EXPECT_FALSE(ends_with_utf8_utf8("hello", 5, "sir", 3)); +} + +TEST(TestStringOps, TestSpace) { + // Space - returns a string with 'n' spaces + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + int32_t out_len = 0; + + auto out = space_int32(ctx_ptr, 1, &out_len); + EXPECT_EQ(std::string(out, out_len), " "); + out = space_int32(ctx_ptr, 10, &out_len); + EXPECT_EQ(std::string(out, out_len), " "); + out = space_int32(ctx_ptr, 5, &out_len); + EXPECT_EQ(std::string(out, out_len), " "); + out = space_int32(ctx_ptr, -5, &out_len); + EXPECT_EQ(std::string(out, out_len), ""); + + out = space_int64(ctx_ptr, 2, &out_len); + EXPECT_EQ(std::string(out, out_len), " "); + out = space_int64(ctx_ptr, 9, &out_len); + EXPECT_EQ(std::string(out, out_len), " "); + out = space_int64(ctx_ptr, 4, &out_len); + EXPECT_EQ(std::string(out, out_len), " "); + out = space_int64(ctx_ptr, -5, &out_len); + EXPECT_EQ(std::string(out, out_len), ""); +} + +TEST(TestStringOps, TestIsSubstr) { + EXPECT_TRUE(is_substr_utf8_utf8("hello world", 11, "world", 5)); + EXPECT_TRUE(is_substr_utf8_utf8("hello world", 11, "lo wo", 5)); + EXPECT_FALSE(is_substr_utf8_utf8("hello world", 11, "adsed", 5)); + EXPECT_FALSE(is_substr_utf8_utf8("hel", 3, "hello", 5)); + EXPECT_TRUE(is_substr_utf8_utf8("hello", 5, "hello", 5)); + EXPECT_TRUE(is_substr_utf8_utf8("hello world", 11, "", 0)); +} + +TEST(TestStringOps, TestCharLength) { + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + + EXPECT_EQ(utf8_length(ctx_ptr, "hello sir", 9), 9); + + std::string a("âpple"); + EXPECT_EQ(utf8_length(ctx_ptr, a.data(), static_cast<int>(a.length())), 5); + + std::string b("मदन"); + EXPECT_EQ(utf8_length(ctx_ptr, b.data(), static_cast<int>(b.length())), 3); + + // invalid utf8 + std::string c("\xf8\x28"); + EXPECT_EQ(utf8_length(ctx_ptr, c.data(), static_cast<int>(c.length())), 0); + EXPECT_TRUE(ctx.get_error().find( + "unexpected byte \\f8 encountered while decoding utf8 string") != + std::string::npos) + << ctx.get_error(); + ctx.Reset(); + + std::string d("aa\xc3"); + EXPECT_EQ(utf8_length(ctx_ptr, d.data(), static_cast<int>(d.length())), 0); + EXPECT_TRUE(ctx.get_error().find( + "unexpected byte \\c3 encountered while decoding utf8 string") != + std::string::npos) + << ctx.get_error(); + ctx.Reset(); + + std::string e( + "a\xc3" + "a"); + EXPECT_EQ(utf8_length(ctx_ptr, e.data(), static_cast<int>(e.length())), 0); + EXPECT_TRUE(ctx.get_error().find( + "unexpected byte \\61 encountered while decoding utf8 string") != + std::string::npos) + << ctx.get_error(); + ctx.Reset(); + + std::string f( + "a\xc3\xe3" + "a"); + EXPECT_EQ(utf8_length(ctx_ptr, f.data(), static_cast<int>(f.length())), 0); + EXPECT_TRUE(ctx.get_error().find( + "unexpected byte \\e3 encountered while decoding utf8 string") != + std::string::npos) + << ctx.get_error(); + ctx.Reset(); +} + +TEST(TestStringOps, TestConvertReplaceInvalidUtf8Char) { + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + + // invalid utf8 (xf8 is invalid but x28 is not - x28 = '(') + std::string a( + "ok-\xf8\x28" + "-a"); + auto a_in_out_len = static_cast<int>(a.length()); + const char* a_str = convert_replace_invalid_fromUTF8_binary( + ctx_ptr, a.data(), a_in_out_len, "a", 1, &a_in_out_len); + EXPECT_EQ(std::string(a_str, a_in_out_len), "ok-a(-a"); + EXPECT_FALSE(ctx.has_error()); + + // invalid utf8 (xa0 and xa1 are invalid) + std::string b("ok-\xa0\xa1-valid"); + auto b_in_out_len = static_cast<int>(b.length()); + const char* b_str = convert_replace_invalid_fromUTF8_binary( + ctx_ptr, b.data(), b_in_out_len, "b", 1, &b_in_out_len); + EXPECT_EQ(std::string(b_str, b_in_out_len), "ok-bb-valid"); + EXPECT_FALSE(ctx.has_error()); + + // full valid utf8 + std::string c("all-valid"); + auto c_in_out_len = static_cast<int>(c.length()); + const char* c_str = convert_replace_invalid_fromUTF8_binary( + ctx_ptr, c.data(), c_in_out_len, "c", 1, &c_in_out_len); + EXPECT_EQ(std::string(c_str, c_in_out_len), "all-valid"); + EXPECT_FALSE(ctx.has_error()); + + // valid utf8 (महसुस is 4-char string, each char of which is likely a multibyte char) + std::string d("ok-महसुस-valid-new"); + auto d_in_out_len = static_cast<int>(d.length()); + const char* d_str = convert_replace_invalid_fromUTF8_binary( + ctx_ptr, d.data(), d_in_out_len, "d", 1, &d_in_out_len); + EXPECT_EQ(std::string(d_str, d_in_out_len), "ok-महसुस-valid-new"); + EXPECT_FALSE(ctx.has_error()); + + // full valid utf8, but invalid replacement char length + std::string e("all-valid"); + auto e_in_out_len = static_cast<int>(e.length()); + const char* e_str = convert_replace_invalid_fromUTF8_binary( + ctx_ptr, e.data(), e_in_out_len, "ee", 2, &e_in_out_len); + EXPECT_EQ(std::string(e_str, e_in_out_len), ""); + EXPECT_TRUE(ctx.has_error()); + ctx.Reset(); + + // invalid utf8 (xa0 and xa1 are invalid) with empty replacement char length + std::string f("ok-\xa0\xa1-valid"); + auto f_in_out_len = static_cast<int>(f.length()); + const char* f_str = convert_replace_invalid_fromUTF8_binary( + ctx_ptr, f.data(), f_in_out_len, "", 0, &f_in_out_len); + EXPECT_EQ(std::string(f_str, f_in_out_len), "ok--valid"); + EXPECT_FALSE(ctx.has_error()); + ctx.Reset(); + + // invalid utf8 (xa0 and xa1 are invalid) with empty replacement char length + std::string g("\xa0\xa1-ok-\xa0\xa1-valid-\xa0\xa1"); + auto g_in_out_len = static_cast<int>(g.length()); + const char* g_str = convert_replace_invalid_fromUTF8_binary( + ctx_ptr, g.data(), g_in_out_len, "", 0, &g_in_out_len); + EXPECT_EQ(std::string(g_str, g_in_out_len), "-ok--valid-"); + EXPECT_FALSE(ctx.has_error()); + ctx.Reset(); + + std::string h("\xa0\xa1-valid"); + auto h_in_out_len = static_cast<int>(h.length()); + const char* h_str = convert_replace_invalid_fromUTF8_binary( + ctx_ptr, h.data(), h_in_out_len, "", 0, &h_in_out_len); + EXPECT_EQ(std::string(h_str, h_in_out_len), "-valid"); + EXPECT_FALSE(ctx.has_error()); + ctx.Reset(); + + std::string i("\xa0\xa1-valid-\xa0\xa1-valid-\xa0\xa1"); + auto i_in_out_len = static_cast<int>(i.length()); + const char* i_str = convert_replace_invalid_fromUTF8_binary( + ctx_ptr, i.data(), i_in_out_len, "", 0, &i_in_out_len); + EXPECT_EQ(std::string(i_str, i_in_out_len), "-valid--valid-"); + EXPECT_FALSE(ctx.has_error()); + ctx.Reset(); +} + +TEST(TestStringOps, TestRepeat) { + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + gdv_int32 out_len = 0; + + const char* out_str = repeat_utf8_int32(ctx_ptr, "abc", 3, 2, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "abcabc"); + EXPECT_FALSE(ctx.has_error()); + + out_str = repeat_utf8_int32(ctx_ptr, "a", 1, 5, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "aaaaa"); + EXPECT_FALSE(ctx.has_error()); + + out_str = repeat_utf8_int32(ctx_ptr, "", 0, 10, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); + + out_str = repeat_utf8_int32(ctx_ptr, "", -20, 10, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); + + out_str = repeat_utf8_int32(ctx_ptr, "a", 1, -10, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Repeat number can't be negative")); + ctx.Reset(); +} + +TEST(TestStringOps, TestCastBoolToVarchar) { + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + gdv_int32 out_len = 0; + + const char* out_str = castVARCHAR_bool_int64(ctx_ptr, true, 2, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "tr"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_bool_int64(ctx_ptr, true, 7, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "true"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_bool_int64(ctx_ptr, false, 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "fals"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_bool_int64(ctx_ptr, false, 5, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "false"); + EXPECT_FALSE(ctx.has_error()); + + castVARCHAR_bool_int64(ctx_ptr, true, -3, &out_len); + EXPECT_THAT(ctx.get_error(), + ::testing::HasSubstr("Output buffer length can't be negative")); + ctx.Reset(); +} + +TEST(TestStringOps, TestCastVarcharToBool) { + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + + EXPECT_EQ(castBIT_utf8(ctx_ptr, "true", 4), true); + EXPECT_FALSE(ctx.has_error()); + + EXPECT_EQ(castBIT_utf8(ctx_ptr, " true ", 14), true); + EXPECT_FALSE(ctx.has_error()); + + EXPECT_EQ(castBIT_utf8(ctx_ptr, "true ", 9), true); + EXPECT_FALSE(ctx.has_error()); + + EXPECT_EQ(castBIT_utf8(ctx_ptr, " true", 9), true); + EXPECT_FALSE(ctx.has_error()); + + EXPECT_EQ(castBIT_utf8(ctx_ptr, "TRUE", 4), true); + EXPECT_FALSE(ctx.has_error()); + + EXPECT_EQ(castBIT_utf8(ctx_ptr, "TrUe", 4), true); + EXPECT_FALSE(ctx.has_error()); + + EXPECT_EQ(castBIT_utf8(ctx_ptr, "1", 1), true); + EXPECT_FALSE(ctx.has_error()); + + EXPECT_EQ(castBIT_utf8(ctx_ptr, " 1", 3), true); + EXPECT_FALSE(ctx.has_error()); + + EXPECT_EQ(castBIT_utf8(ctx_ptr, "false", 5), false); + EXPECT_FALSE(ctx.has_error()); + + EXPECT_EQ(castBIT_utf8(ctx_ptr, "false ", 10), false); + EXPECT_FALSE(ctx.has_error()); + + EXPECT_EQ(castBIT_utf8(ctx_ptr, " false", 10), false); + EXPECT_FALSE(ctx.has_error()); + + EXPECT_EQ(castBIT_utf8(ctx_ptr, "0", 1), false); + EXPECT_FALSE(ctx.has_error()); + + EXPECT_EQ(castBIT_utf8(ctx_ptr, "0 ", 4), false); + EXPECT_FALSE(ctx.has_error()); + + EXPECT_EQ(castBIT_utf8(ctx_ptr, "FALSE", 5), false); + EXPECT_FALSE(ctx.has_error()); + + EXPECT_EQ(castBIT_utf8(ctx_ptr, "FaLsE", 5), false); + EXPECT_FALSE(ctx.has_error()); + + EXPECT_EQ(castBIT_utf8(ctx_ptr, "test", 4), false); + EXPECT_TRUE(ctx.has_error()); + EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Invalid value for boolean")); + ctx.Reset(); +} + +TEST(TestStringOps, TestCastVarchar) { + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + gdv_int32 out_len = 0; + + // BINARY TESTS + const char* out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 1, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "a"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 6, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "asdf"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 3, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "asd"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "asdf"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 5, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "asdf"); + EXPECT_FALSE(ctx.has_error()); + + // do not truncate if output length is 0 + out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 0, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "asdf"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_binary_int64(ctx_ptr, "", 0, 3, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†", 9, 3, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "çåå"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†", 9, 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "çåå†"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†", 9, 5, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "çåå†"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†", 9, 10, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "çåå†"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†", 9, 6, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "çåå†"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_binary_int64(ctx_ptr, "abc", 3, -1, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_THAT(ctx.get_error(), + ::testing::HasSubstr("Output buffer length can't be negative")); + ctx.Reset(); + + std::string z("aa\xc3"); + out_str = castVARCHAR_binary_int64(ctx_ptr, z.data(), static_cast<int>(z.length()), 2, + &out_len); + EXPECT_EQ(std::string(out_str, out_len), "aa"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 16, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "1234567812341234"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 15, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "123456781234123"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 12, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "123456781234"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 8, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "12345678"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 7, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "1234567"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "1234"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 3, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "123"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812çåå†123456", 25, 16, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "1234567812çåå†12"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_binary_int64(ctx_ptr, "123456781234çåå†1234", 25, 15, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "123456781234çåå"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_binary_int64(ctx_ptr, "12çåå†34567812123456", 25, 16, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "12çåå†3456781212"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†1234567812123456", 25, 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "çåå†"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†1234567812123456", 25, 3, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "çåå"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_binary_int64(ctx_ptr, "123456781234çåå†", 21, 40, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "123456781234çåå†"); + EXPECT_FALSE(ctx.has_error()); + + std::string f("123456781234çåå\xc3"); + out_str = castVARCHAR_binary_int64(ctx_ptr, f.data(), static_cast<int32_t>(f.length()), + 16, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_THAT(ctx.get_error(), + ::testing::HasSubstr( + "unexpected byte \\c3 encountered while decoding utf8 string")); + ctx.Reset(); + + // UTF8 TESTS + out_str = castVARCHAR_utf8_int64(ctx_ptr, "asdf", 4, 1, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "a"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_utf8_int64(ctx_ptr, "asdf", 4, 6, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "asdf"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_utf8_int64(ctx_ptr, "asdf", 4, 3, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "asd"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_utf8_int64(ctx_ptr, "asdf", 4, 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "asdf"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_utf8_int64(ctx_ptr, "asdf", 4, 5, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "asdf"); + EXPECT_FALSE(ctx.has_error()); + + // do not truncate if output length is 0 + out_str = castVARCHAR_utf8_int64(ctx_ptr, "asdf", 4, 0, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "asdf"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_utf8_int64(ctx_ptr, "", 0, 3, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_utf8_int64(ctx_ptr, "çåå†", 9, 3, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "çåå"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_utf8_int64(ctx_ptr, "çåå†", 9, 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "çåå†"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_utf8_int64(ctx_ptr, "çåå†", 9, 5, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "çåå†"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_utf8_int64(ctx_ptr, "çåå†", 9, 10, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "çåå†"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_utf8_int64(ctx_ptr, "çåå†", 9, 6, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "çåå†"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_utf8_int64(ctx_ptr, "abc", 3, -1, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_THAT(ctx.get_error(), + ::testing::HasSubstr("Output buffer length can't be negative")); + ctx.Reset(); + + std::string d("aa\xc3"); + out_str = castVARCHAR_utf8_int64(ctx_ptr, d.data(), static_cast<int>(d.length()), 2, + &out_len); + EXPECT_EQ(std::string(out_str, out_len), "aa"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_utf8_int64(ctx_ptr, "1234567812341234", 16, 16, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "1234567812341234"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_utf8_int64(ctx_ptr, "1234567812341234", 16, 15, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "123456781234123"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_utf8_int64(ctx_ptr, "1234567812341234", 16, 12, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "123456781234"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_utf8_int64(ctx_ptr, "1234567812341234", 16, 8, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "12345678"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_utf8_int64(ctx_ptr, "1234567812341234", 16, 7, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "1234567"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_utf8_int64(ctx_ptr, "1234567812341234", 16, 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "1234"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_utf8_int64(ctx_ptr, "1234567812341234", 16, 3, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "123"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_utf8_int64(ctx_ptr, "1234567812çåå†123456", 25, 16, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "1234567812çåå†12"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_utf8_int64(ctx_ptr, "123456781234çåå†1234", 25, 15, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "123456781234çåå"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_utf8_int64(ctx_ptr, "12çåå†34567812123456", 25, 16, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "12çåå†3456781212"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_utf8_int64(ctx_ptr, "çåå†1234567812123456", 25, 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "çåå†"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_utf8_int64(ctx_ptr, "çåå†1234567812123456", 25, 3, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "çåå"); + EXPECT_FALSE(ctx.has_error()); + + out_str = castVARCHAR_utf8_int64(ctx_ptr, "123456781234çåå†", 21, 40, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "123456781234çåå†"); + EXPECT_FALSE(ctx.has_error()); + + std::string y("123456781234çåå\xc3"); + out_str = castVARCHAR_utf8_int64(ctx_ptr, y.data(), static_cast<int32_t>(y.length()), + 16, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_THAT(ctx.get_error(), + ::testing::HasSubstr( + "unexpected byte \\c3 encountered while decoding utf8 string")); + ctx.Reset(); +} + +TEST(TestStringOps, TestSubstring) { + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + gdv_int32 out_len = 0; + + const char* out_str = substr_utf8_int64_int64(ctx_ptr, "asdf", 4, 1, 0, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); + + out_str = substr_utf8_int64_int64(ctx_ptr, "asdf", 4, 1, 2, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "as"); + EXPECT_FALSE(ctx.has_error()); + + out_str = substr_utf8_int64_int64(ctx_ptr, "asdf", 4, 1, 5, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "asdf"); + EXPECT_FALSE(ctx.has_error()); + + out_str = substr_utf8_int64_int64(ctx_ptr, "asdf", 4, 0, 5, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "asdf"); + EXPECT_FALSE(ctx.has_error()); + + out_str = substr_utf8_int64_int64(ctx_ptr, "asdf", 4, -2, 5, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "df"); + EXPECT_FALSE(ctx.has_error()); + + out_str = substr_utf8_int64_int64(ctx_ptr, "asdf", 4, -5, 5, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); + + out_str = substr_utf8_int64_int64(ctx_ptr, "अपाचे एरो", 25, 1, 5, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "अपाचे"); + EXPECT_FALSE(ctx.has_error()); + + out_str = substr_utf8_int64_int64(ctx_ptr, "अपाचे एरो", 25, 7, 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "एरो"); + EXPECT_FALSE(ctx.has_error()); + + out_str = substr_utf8_int64_int64(ctx_ptr, "çåå†", 9, 4, 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "†"); + EXPECT_FALSE(ctx.has_error()); + + out_str = substr_utf8_int64_int64(ctx_ptr, "çåå†", 9, 2, 2, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "åå"); + EXPECT_FALSE(ctx.has_error()); + + out_str = substr_utf8_int64_int64(ctx_ptr, "çåå†", 9, 0, 2, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "çå"); + EXPECT_FALSE(ctx.has_error()); + + out_str = substr_utf8_int64_int64(ctx_ptr, "afg", 4, 0, -5, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); + + out_str = substr_utf8_int64_int64(ctx_ptr, "", 0, 5, 5, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); + + out_str = substr_utf8_int64(ctx_ptr, "abcd", 4, 2, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "bcd"); + EXPECT_FALSE(ctx.has_error()); + + out_str = substr_utf8_int64(ctx_ptr, "abcd", 4, 0, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "abcd"); + EXPECT_FALSE(ctx.has_error()); + + out_str = substr_utf8_int64(ctx_ptr, "çåå†", 9, 2, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "åå†"); + EXPECT_FALSE(ctx.has_error()); +} + +TEST(TestStringOps, TestSubstringInvalidInputs) { + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + gdv_int32 out_len = 0; + + char bytes[] = {'\xA7', 'a'}; + const char* out_str = substr_utf8_int64_int64(ctx_ptr, bytes, 2, 1, 1, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_TRUE(ctx.has_error()); + ctx.Reset(); + + char midbytes[] = {'c', '\xA7', 'a'}; + out_str = substr_utf8_int64_int64(ctx_ptr, midbytes, 3, 1, 1, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_TRUE(ctx.has_error()); + ctx.Reset(); + + char midbytes2[] = {'\xC3', 'a', 'a'}; + out_str = substr_utf8_int64_int64(ctx_ptr, midbytes2, 3, 1, 2, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_TRUE(ctx.has_error()); + ctx.Reset(); + + char endbytes[] = {'a', 'a', '\xA7'}; + out_str = substr_utf8_int64_int64(ctx_ptr, endbytes, 3, 1, 1, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_TRUE(ctx.has_error()); + ctx.Reset(); + + char endbytes2[] = {'a', 'a', '\xC3'}; + out_str = substr_utf8_int64_int64(ctx_ptr, endbytes2, 3, 1, 3, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_TRUE(ctx.has_error()); + ctx.Reset(); + + out_str = substr_utf8_int64_int64(ctx_ptr, "çåå†", 9, 2147483656, 2, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); +} + +TEST(TestGdvFnStubs, TestCastVarbinaryUtf8) { + gandiva::ExecutionContext ctx; + + int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx); + int32_t out_len = 0; + const char* input = "abc"; + const char* out; + + out = castVARBINARY_utf8_int64(ctx_ptr, input, 3, 0, &out_len); + EXPECT_EQ(std::string(out, out_len), input); + + out = castVARBINARY_utf8_int64(ctx_ptr, input, 3, 1, &out_len); + EXPECT_EQ(std::string(out, out_len), "a"); + + out = castVARBINARY_utf8_int64(ctx_ptr, input, 3, 500, &out_len); + EXPECT_EQ(std::string(out, out_len), input); + + out = castVARBINARY_utf8_int64(ctx_ptr, input, 3, -10, &out_len); + EXPECT_EQ(std::string(out, out_len), ""); + EXPECT_THAT(ctx.get_error(), + ::testing::HasSubstr("Output buffer length can't be negative")); + ctx.Reset(); +} + +TEST(TestGdvFnStubs, TestCastVarbinaryBinary) { + gandiva::ExecutionContext ctx; + + int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx); + int32_t out_len = 0; + const char* input = "\\x41\\x42\\x43"; + const char* out; + + out = castVARBINARY_binary_int64(ctx_ptr, input, 12, 0, &out_len); + EXPECT_EQ(std::string(out, out_len), input); + + out = castVARBINARY_binary_int64(ctx_ptr, input, 8, 8, &out_len); + EXPECT_EQ(std::string(out, out_len), "\\x41\\x42"); + + out = castVARBINARY_binary_int64(ctx_ptr, input, 12, 500, &out_len); + EXPECT_EQ(std::string(out, out_len), input); + + out = castVARBINARY_binary_int64(ctx_ptr, input, 12, -10, &out_len); + EXPECT_EQ(std::string(out, out_len), ""); + EXPECT_THAT(ctx.get_error(), + ::testing::HasSubstr("Output buffer length can't be negative")); + ctx.Reset(); +} + +TEST(TestStringOps, TestConcat) { + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + gdv_int32 out_len = 0; + + const char* out_str = + concat_utf8_utf8(ctx_ptr, "abcd", 4, true, "\npq", 3, false, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "abcd"); + EXPECT_FALSE(ctx.has_error()); + + out_str = concatOperator_utf8_utf8(ctx_ptr, "asdf", 4, "jkl", 3, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "asdfjkl"); + EXPECT_FALSE(ctx.has_error()); + + out_str = concatOperator_utf8_utf8(ctx_ptr, "asdf", 4, "", 0, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "asdf"); + EXPECT_FALSE(ctx.has_error()); + + out_str = concatOperator_utf8_utf8(ctx_ptr, "", 0, "jkl", 3, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "jkl"); + EXPECT_FALSE(ctx.has_error()); + + out_str = concatOperator_utf8_utf8(ctx_ptr, "", 0, "", 0, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); + + out_str = concatOperator_utf8_utf8(ctx_ptr, "abcd\n", 5, "a", 1, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "abcd\na"); + EXPECT_FALSE(ctx.has_error()); + + out_str = concat_utf8_utf8_utf8(ctx_ptr, "abcd", 4, false, "\npq", 3, true, "ard", 3, + true, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "\npqard"); + EXPECT_FALSE(ctx.has_error()); + + out_str = + concatOperator_utf8_utf8_utf8(ctx_ptr, "abcd\n", 5, "a", 1, "bcd", 3, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "abcd\nabcd"); + EXPECT_FALSE(ctx.has_error()); + + out_str = concatOperator_utf8_utf8_utf8(ctx_ptr, "abcd", 4, "a", 1, "", 0, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "abcda"); + EXPECT_FALSE(ctx.has_error()); + + out_str = concatOperator_utf8_utf8_utf8(ctx_ptr, "", 0, "a", 1, "pqrs", 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "apqrs"); + EXPECT_FALSE(ctx.has_error()); + + out_str = concat_utf8_utf8_utf8_utf8(ctx_ptr, "abcd", 4, false, "\npq", 3, true, "ard", + 3, true, "uvw", 3, false, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "\npqard"); + EXPECT_FALSE(ctx.has_error()); + + out_str = concatOperator_utf8_utf8_utf8_utf8(ctx_ptr, "pqrs", 4, "", 0, "\nabc", 4, "y", + 1, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "pqrs\nabcy"); + EXPECT_FALSE(ctx.has_error()); + + out_str = concat_utf8_utf8_utf8_utf8_utf8(ctx_ptr, "abcd", 4, false, "\npq", 3, true, + "ard", 3, true, "uvw", 3, false, "abc\n", 4, + true, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "\npqardabc\n"); + EXPECT_FALSE(ctx.has_error()); + + out_str = concatOperator_utf8_utf8_utf8_utf8_utf8(ctx_ptr, "pqrs", 4, "", 0, "\nabc", 4, + "y", 1, "", 0, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "pqrs\nabcy"); + EXPECT_FALSE(ctx.has_error()); + + out_str = concat_utf8_utf8_utf8_utf8_utf8_utf8( + ctx_ptr, "abcd", 4, false, "\npq", 3, true, "ard", 3, true, "uvw", 3, false, + "abc\n", 4, true, "sdfgs", 5, true, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "\npqardabc\nsdfgs"); + EXPECT_FALSE(ctx.has_error()); + + out_str = concatOperator_utf8_utf8_utf8_utf8_utf8_utf8( + ctx_ptr, "pqrs", 4, "", 0, "\nabc", 4, "y", 1, "", 0, "\nbcd", 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "pqrs\nabcy\nbcd"); + EXPECT_FALSE(ctx.has_error()); + + out_str = concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + ctx_ptr, "abcd", 4, false, "\npq", 3, true, "ard", 3, true, "uvw", 3, false, + "abc\n", 4, true, "sdfgs", 5, true, "wfw", 3, false, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "\npqardabc\nsdfgs"); + EXPECT_FALSE(ctx.has_error()); + + out_str = concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + ctx_ptr, "", 0, "pqrs", 4, "abc\n", 4, "y", 1, "", 0, "asdf", 4, "jkl", 3, + &out_len); + EXPECT_EQ(std::string(out_str, out_len), "pqrsabc\nyasdfjkl"); + EXPECT_FALSE(ctx.has_error()); + + out_str = concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + ctx_ptr, "abcd", 4, false, "\npq", 3, true, "ard", 3, true, "uvw", 3, false, + "abc\n", 4, true, "sdfgs", 5, true, "wfw", 3, false, "", 0, true, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "\npqardabc\nsdfgs"); + EXPECT_FALSE(ctx.has_error()); + + out_str = concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + ctx_ptr, "", 0, "pqrs", 4, "abc\n", 4, "y", 1, "", 0, "asdf", 4, "jkl", 3, "", 0, + &out_len); + EXPECT_EQ(std::string(out_str, out_len), "pqrsabc\nyasdfjkl"); + EXPECT_FALSE(ctx.has_error()); + + out_str = concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + ctx_ptr, "abcd", 4, false, "\npq", 3, true, "ard", 3, true, "uvw", 3, false, + "abc\n", 4, true, "sdfgs", 5, true, "wfw", 3, false, "", 0, true, "qwert|n", 7, + true, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "\npqardabc\nsdfgsqwert|n"); + EXPECT_FALSE(ctx.has_error()); + + out_str = concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + ctx_ptr, "", 0, "pqrs", 4, "abc\n", 4, "y", 1, "", 0, "asdf", 4, "jkl", 3, "", 0, + "sfl\n", 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "pqrsabc\nyasdfjklsfl\n"); + EXPECT_FALSE(ctx.has_error()); + + out_str = concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + ctx_ptr, "abcd", 4, false, "\npq", 3, true, "ard", 3, true, "uvw", 3, false, + "abc\n", 4, true, "sdfgs", 5, true, "wfw", 3, false, "", 0, true, "qwert|n", 7, + true, "ewfwe", 5, false, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "\npqardabc\nsdfgsqwert|n"); + EXPECT_FALSE(ctx.has_error()); + + out_str = concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + ctx_ptr, "", 0, "pqrs", 4, "abc\n", 4, "y", 1, "", 0, "asdf", 4, "", 0, "jkl", 3, + "sfl\n", 4, "", 0, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "pqrsabc\nyasdfjklsfl\n"); + EXPECT_FALSE(ctx.has_error()); +} + +TEST(TestStringOps, TestReverse) { + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + gdv_int32 out_len = 0; + + const char* out_str; + out_str = reverse_utf8(ctx_ptr, "TestString", 10, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "gnirtStseT"); + EXPECT_FALSE(ctx.has_error()); + + out_str = reverse_utf8(ctx_ptr, "", 0, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); + + out_str = reverse_utf8(ctx_ptr, "çåå†", 9, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "†ååç"); + EXPECT_FALSE(ctx.has_error()); + + std::string d("aa\xc3"); + out_str = reverse_utf8(ctx_ptr, d.data(), static_cast<int>(d.length()), &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_THAT(ctx.get_error(), + ::testing::HasSubstr( + "unexpected byte \\c3 encountered while decoding utf8 string")); + ctx.Reset(); +} + +TEST(TestStringOps, TestLtrim) { + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + gdv_int32 out_len = 0; + const char* out_str; + + out_str = ltrim_utf8(ctx_ptr, "TestString ", 12, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestString "); + EXPECT_FALSE(ctx.has_error()); + + out_str = ltrim_utf8(ctx_ptr, " TestString ", 18, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestString "); + EXPECT_FALSE(ctx.has_error()); + + out_str = ltrim_utf8(ctx_ptr, " Test çåå†bD", 18, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "Test çåå†bD"); + EXPECT_FALSE(ctx.has_error()); + + out_str = ltrim_utf8(ctx_ptr, "", 0, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); + + out_str = ltrim_utf8(ctx_ptr, " ", 6, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); + + out_str = ltrim_utf8_utf8(ctx_ptr, "", 0, "TestString", 10, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); + + out_str = ltrim_utf8_utf8(ctx_ptr, "TestString", 10, "", 0, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestString"); + EXPECT_FALSE(ctx.has_error()); + + out_str = ltrim_utf8_utf8(ctx_ptr, "abcbbaccabbcdef", 15, "abc", 3, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "def"); + EXPECT_FALSE(ctx.has_error()); + + out_str = ltrim_utf8_utf8(ctx_ptr, "abcbbaccabbcdef", 15, "ababbac", 7, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "def"); + EXPECT_FALSE(ctx.has_error()); + + out_str = ltrim_utf8_utf8(ctx_ptr, "ååçåå†eç†Dd", 21, "çåå†", 9, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "eç†Dd"); + EXPECT_FALSE(ctx.has_error()); + + out_str = ltrim_utf8_utf8(ctx_ptr, "ç†ååçåå†", 18, "çåå†", 9, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); + + std::string d( + "aa\xc3" + "bcd"); + out_str = + ltrim_utf8_utf8(ctx_ptr, d.data(), static_cast<int>(d.length()), "a", 1, &out_len); + EXPECT_EQ(std::string(out_str, out_len), + "\xc3" + "bcd"); + EXPECT_FALSE(ctx.has_error()); + + std::string e( + "åå\xe0\xa0" + "bcd"); + out_str = + ltrim_utf8_utf8(ctx_ptr, e.data(), static_cast<int>(e.length()), "å", 2, &out_len); + EXPECT_EQ(std::string(out_str, out_len), + "\xE0\xa0" + "bcd"); + EXPECT_FALSE(ctx.has_error()); + + out_str = ltrim_utf8_utf8(ctx_ptr, "TestString", 10, "abcd", 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestString"); + EXPECT_FALSE(ctx.has_error()); + + out_str = ltrim_utf8_utf8(ctx_ptr, "acbabbcabb", 10, "abcbd", 5, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); +} + +TEST(TestStringOps, TestLpadString) { + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + gdv_int32 out_len = 0; + const char* out_str; + + // LPAD function tests - with defined fill pad text + out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 4, "fill", 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "Test"); + + out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 10, "fill", 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestString"); + + out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 0, 10, "fill", 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + + out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 0, "fill", 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + + out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, -500, "fill", 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + + out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 500, "", 0, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestString"); + + out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 18, "Fill", 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "FillFillTestString"); + + out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 15, "Fill", 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "FillFTestString"); + + out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 20, "Fill", 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "FillFillFiTestString"); + + out_str = lpad_utf8_int32_utf8(ctx_ptr, "абвгд", 10, 7, "д", 2, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "ддабвгд"); + + out_str = lpad_utf8_int32_utf8(ctx_ptr, "абвгд", 10, 20, "абвгд", 10, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "абвгдабвгдабвгдабвгд"); + + out_str = lpad_utf8_int32_utf8(ctx_ptr, "hello", 5, 6, "д", 2, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "дhello"); + + // LPAD function tests - with NO pad text + out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "Test"); + + out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 10, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestString"); + + out_str = lpad_utf8_int32(ctx_ptr, "TestString", 0, 10, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + + out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 0, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + + out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, -500, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + + out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 18, &out_len); + EXPECT_EQ(std::string(out_str, out_len), " TestString"); + + out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 15, &out_len); + EXPECT_EQ(std::string(out_str, out_len), " TestString"); + + out_str = lpad_utf8_int32(ctx_ptr, "абвгд", 10, 7, &out_len); + EXPECT_EQ(std::string(out_str, out_len), " абвгд"); +} + +TEST(TestStringOps, TestRpadString) { + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + gdv_int32 out_len = 0; + const char* out_str; + + // RPAD function tests - with defined fill pad text + out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 4, "fill", 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "Test"); + + out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 10, "fill", 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestString"); + + out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 0, 10, "fill", 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + + out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 0, "fill", 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + + out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, -500, "fill", 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + + out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 500, "", 0, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestString"); + + out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 18, "Fill", 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestStringFillFill"); + + out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 15, "Fill", 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestStringFillF"); + + out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 20, "Fill", 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestStringFillFillFi"); + + out_str = rpad_utf8_int32_utf8(ctx_ptr, "абвгд", 10, 7, "д", 2, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "абвгддд"); + + out_str = rpad_utf8_int32_utf8(ctx_ptr, "абвгд", 10, 20, "абвгд", 10, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "абвгдабвгдабвгдабвгд"); + + out_str = rpad_utf8_int32_utf8(ctx_ptr, "hello", 5, 6, "д", 2, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "helloд"); + + // RPAD function tests - with NO pad text + out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "Test"); + + out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 10, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestString"); + + out_str = rpad_utf8_int32(ctx_ptr, "TestString", 0, 10, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + + out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 0, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + + out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, -500, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + + out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 18, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestString "); + + out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 15, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestString "); + + out_str = rpad_utf8_int32(ctx_ptr, "абвгд", 10, 7, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "абвгд "); +} + +TEST(TestStringOps, TestRtrim) { + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + gdv_int32 out_len = 0; + const char* out_str; + + out_str = rtrim_utf8(ctx_ptr, " TestString", 12, &out_len); + EXPECT_EQ(std::string(out_str, out_len), " TestString"); + EXPECT_FALSE(ctx.has_error()); + + out_str = rtrim_utf8(ctx_ptr, " TestString ", 18, &out_len); + EXPECT_EQ(std::string(out_str, out_len), " TestString"); + EXPECT_FALSE(ctx.has_error()); + + out_str = rtrim_utf8(ctx_ptr, "Test çåå†bD ", 20, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "Test çåå†bD"); + EXPECT_FALSE(ctx.has_error()); + + out_str = rtrim_utf8(ctx_ptr, "", 0, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); + + out_str = rtrim_utf8(ctx_ptr, " ", 6, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); + + out_str = rtrim_utf8_utf8(ctx_ptr, "", 0, "TestString", 10, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); + + out_str = rtrim_utf8_utf8(ctx_ptr, "TestString", 10, "", 0, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestString"); + EXPECT_FALSE(ctx.has_error()); + + out_str = rtrim_utf8_utf8(ctx_ptr, "TestString", 10, "ring", 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestSt"); + EXPECT_FALSE(ctx.has_error()); + + out_str = rtrim_utf8_utf8(ctx_ptr, "defabcbbaccabbc", 15, "abc", 3, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "def"); + EXPECT_FALSE(ctx.has_error()); + + out_str = rtrim_utf8_utf8(ctx_ptr, "defabcbbaccabbc", 15, "ababbac", 7, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "def"); + EXPECT_FALSE(ctx.has_error()); + + out_str = rtrim_utf8_utf8(ctx_ptr, "eDdç†ååçåå†", 21, "çåå†", 9, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "eDd"); + EXPECT_FALSE(ctx.has_error()); + + out_str = rtrim_utf8_utf8(ctx_ptr, "ç†ååçåå†", 18, "çåå†", 9, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); + + std::string d( + "\xc3" + "aaa"); + out_str = + rtrim_utf8_utf8(ctx_ptr, d.data(), static_cast<int>(d.length()), "a", 1, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_TRUE(ctx.has_error()); + ctx.Reset(); + + std::string e( + "\xe0\xa0" + "åå"); + out_str = + rtrim_utf8_utf8(ctx_ptr, e.data(), static_cast<int>(e.length()), "å", 2, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_TRUE(ctx.has_error()); + ctx.Reset(); + + out_str = rtrim_utf8_utf8(ctx_ptr, "åeçå", 7, "çå", 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "åe"); + EXPECT_FALSE(ctx.has_error()); + + out_str = rtrim_utf8_utf8(ctx_ptr, "TestString", 10, "abcd", 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestString"); + EXPECT_FALSE(ctx.has_error()); + + out_str = rtrim_utf8_utf8(ctx_ptr, "acbabbcabb", 10, "abcbd", 5, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); +} + +TEST(TestStringOps, TestBtrim) { + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + gdv_int32 out_len = 0; + const char* out_str; + + out_str = btrim_utf8(ctx_ptr, "TestString", 10, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestString"); + EXPECT_FALSE(ctx.has_error()); + + out_str = btrim_utf8(ctx_ptr, " TestString ", 18, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestString"); + EXPECT_FALSE(ctx.has_error()); + + out_str = btrim_utf8(ctx_ptr, " Test çåå†bD ", 21, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "Test çåå†bD"); + EXPECT_FALSE(ctx.has_error()); + + out_str = btrim_utf8(ctx_ptr, "", 0, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); + + out_str = btrim_utf8(ctx_ptr, " ", 6, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); + + out_str = btrim_utf8_utf8(ctx_ptr, "", 0, "TestString", 10, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); + + out_str = btrim_utf8_utf8(ctx_ptr, "TestString", 10, "Test", 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "String"); + EXPECT_FALSE(ctx.has_error()); + + out_str = btrim_utf8_utf8(ctx_ptr, "TestString", 10, "String", 6, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "Tes"); + EXPECT_FALSE(ctx.has_error()); + + out_str = btrim_utf8_utf8(ctx_ptr, "TestString", 10, "", 0, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestString"); + EXPECT_FALSE(ctx.has_error()); + + out_str = btrim_utf8_utf8(ctx_ptr, "abcbbadefccabbc", 15, "abc", 3, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "def"); + EXPECT_FALSE(ctx.has_error()); + + out_str = btrim_utf8_utf8(ctx_ptr, "abcbbadefccabbc", 15, "ababbac", 7, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "def"); + EXPECT_FALSE(ctx.has_error()); + + out_str = btrim_utf8_utf8(ctx_ptr, "ååçåå†Ddeç†", 21, "çåå†", 9, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "Dde"); + EXPECT_FALSE(ctx.has_error()); + + out_str = btrim_utf8_utf8(ctx_ptr, "ç†ååçåå†", 18, "çåå†", 9, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); + ctx.Reset(); + + std::string d( + "acd\xc3" + "aaa"); + out_str = + btrim_utf8_utf8(ctx_ptr, d.data(), static_cast<int>(d.length()), "a", 1, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_TRUE(ctx.has_error()); + ctx.Reset(); + + std::string e( + "åbc\xe0\xa0" + "åå"); + out_str = + btrim_utf8_utf8(ctx_ptr, e.data(), static_cast<int>(e.length()), "å", 2, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_TRUE(ctx.has_error()); + ctx.Reset(); + + std::string f( + "aa\xc3" + "bcd"); + out_str = + btrim_utf8_utf8(ctx_ptr, f.data(), static_cast<int>(f.length()), "a", 1, &out_len); + EXPECT_EQ(std::string(out_str, out_len), + "\xc3" + "bcd"); + EXPECT_FALSE(ctx.has_error()); + + std::string g( + "åå\xe0\xa0" + "bcå"); + out_str = + btrim_utf8_utf8(ctx_ptr, g.data(), static_cast<int>(g.length()), "å", 2, &out_len); + EXPECT_EQ(std::string(out_str, out_len), + "\xe0\xa0" + "bc"); + + out_str = btrim_utf8_utf8(ctx_ptr, "åe†çå", 10, "çå", 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "e†"); + EXPECT_FALSE(ctx.has_error()); + + out_str = btrim_utf8_utf8(ctx_ptr, "TestString", 10, "abcd", 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestString"); + EXPECT_FALSE(ctx.has_error()); + + out_str = btrim_utf8_utf8(ctx_ptr, "acbabbcabb", 10, "abcbd", 5, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); +} + +TEST(TestStringOps, TestLocate) { + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + + int pos; + + pos = locate_utf8_utf8(ctx_ptr, "String", 6, "TestString", 10); + EXPECT_EQ(pos, 5); + EXPECT_FALSE(ctx.has_error()); + + pos = locate_utf8_utf8_int32(ctx_ptr, "String", 6, "TestString", 10, 1); + EXPECT_EQ(pos, 5); + EXPECT_FALSE(ctx.has_error()); + + pos = locate_utf8_utf8_int32(ctx_ptr, "abc", 3, "abcabc", 6, 2); + EXPECT_EQ(pos, 4); + EXPECT_FALSE(ctx.has_error()); + + pos = locate_utf8_utf8(ctx_ptr, "çåå", 6, "s†å†emçåå†d", 21); + EXPECT_EQ(pos, 7); + EXPECT_FALSE(ctx.has_error()); + + pos = locate_utf8_utf8_int32(ctx_ptr, "bar", 3, "†barbar", 9, 3); + EXPECT_EQ(pos, 5); + EXPECT_FALSE(ctx.has_error()); + + pos = locate_utf8_utf8_int32(ctx_ptr, "sub", 3, "", 0, 1); + EXPECT_EQ(pos, 0); + EXPECT_FALSE(ctx.has_error()); + + pos = locate_utf8_utf8_int32(ctx_ptr, "", 0, "str", 3, 1); + EXPECT_EQ(pos, 0); + EXPECT_FALSE(ctx.has_error()); + + pos = locate_utf8_utf8_int32(ctx_ptr, "bar", 3, "barbar", 6, 0); + EXPECT_EQ(pos, 0); + EXPECT_THAT(ctx.get_error(), + ::testing::HasSubstr("Start position must be greater than 0")); + ctx.Reset(); + + pos = locate_utf8_utf8_int32(ctx_ptr, "bar", 3, "barbar", 6, 7); + EXPECT_EQ(pos, 0); + EXPECT_FALSE(ctx.has_error()); + + std::string d( + "a\xff" + "c"); + pos = + locate_utf8_utf8_int32(ctx_ptr, "c", 1, d.data(), static_cast<int>(d.length()), 3); + EXPECT_EQ(pos, 0); + EXPECT_THAT(ctx.get_error(), + ::testing::HasSubstr( + "unexpected byte \\ff encountered while decoding utf8 string")); + ctx.Reset(); +} + +TEST(TestStringOps, TestByteSubstr) { + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + gdv_int32 out_len = 0; + + const char* out_str; + out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 5, 10, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "String"); + EXPECT_FALSE(ctx.has_error()); + + out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, -6, 10, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "String"); + EXPECT_FALSE(ctx.has_error()); + + out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 0, 10, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); + + out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 0, -500, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); + + out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 1, 10, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestString"); + EXPECT_FALSE(ctx.has_error()); + + out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 1, 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "Test"); + EXPECT_FALSE(ctx.has_error()); + + out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 1, 1000, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestString"); + EXPECT_FALSE(ctx.has_error()); + + out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 5, 3, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "Str"); + EXPECT_FALSE(ctx.has_error()); + + out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 5, 10, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "String"); + EXPECT_FALSE(ctx.has_error()); + + out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, -100, 10, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestString"); + EXPECT_FALSE(ctx.has_error()); +} + +TEST(TestStringOps, TestStrPos) { + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + + int pos; + + pos = strpos_utf8_utf8(ctx_ptr, "TestString", 10, "String", 6); + EXPECT_EQ(pos, 5); + EXPECT_FALSE(ctx.has_error()); + + pos = strpos_utf8_utf8(ctx_ptr, "TestString", 10, "String", 6); + EXPECT_EQ(pos, 5); + EXPECT_FALSE(ctx.has_error()); + + pos = strpos_utf8_utf8(ctx_ptr, "abcabc", 6, "abc", 3); + EXPECT_EQ(pos, 1); + EXPECT_FALSE(ctx.has_error()); + + pos = strpos_utf8_utf8(ctx_ptr, "s†å†emçåå†d", 21, "çåå", 6); + EXPECT_EQ(pos, 7); + EXPECT_FALSE(ctx.has_error()); + + pos = strpos_utf8_utf8(ctx_ptr, "†barbar", 9, "bar", 3); + EXPECT_EQ(pos, 2); + EXPECT_FALSE(ctx.has_error()); + + pos = strpos_utf8_utf8(ctx_ptr, "", 0, "sub", 3); + EXPECT_EQ(pos, 0); + EXPECT_FALSE(ctx.has_error()); + + pos = strpos_utf8_utf8(ctx_ptr, "str", 3, "", 0); + EXPECT_EQ(pos, 0); + EXPECT_FALSE(ctx.has_error()); + + std::string d( + "a\xff" + "c"); + pos = strpos_utf8_utf8(ctx_ptr, d.data(), static_cast<int>(d.length()), "c", 1); + EXPECT_THAT(ctx.get_error(), + ::testing::HasSubstr( + "unexpected byte \\ff encountered while decoding utf8 string")); + ctx.Reset(); +} + +TEST(TestStringOps, TestReplace) { + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + gdv_int32 out_len = 0; + + const char* out_str; + out_str = replace_utf8_utf8_utf8(ctx_ptr, "TestString1String2", 18, "String", 6, + "Replace", 7, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestReplace1Replace2"); + EXPECT_FALSE(ctx.has_error()); + + out_str = + replace_utf8_utf8_utf8(ctx_ptr, "TestString1", 11, "String", 6, "", 0, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "Test1"); + EXPECT_FALSE(ctx.has_error()); + + out_str = replace_utf8_utf8_utf8(ctx_ptr, "", 0, "test", 4, "rep", 3, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); + + out_str = replace_utf8_utf8_utf8(ctx_ptr, "dž†çåå†", 17, "†", 3, "t", 1, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "Çttçååt"); + EXPECT_FALSE(ctx.has_error()); + + out_str = replace_utf8_utf8_utf8(ctx_ptr, "TestString", 10, "", 0, "rep", 3, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestString"); + EXPECT_FALSE(ctx.has_error()); + + out_str = + replace_utf8_utf8_utf8(ctx_ptr, "Test", 4, "TestString", 10, "rep", 3, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "Test"); + EXPECT_FALSE(ctx.has_error()); + + out_str = replace_utf8_utf8_utf8(ctx_ptr, "Test", 4, "Test", 4, "", 0, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_FALSE(ctx.has_error()); + + out_str = + replace_utf8_utf8_utf8(ctx_ptr, "TestString", 10, "abc", 3, "xyz", 3, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "TestString"); + EXPECT_FALSE(ctx.has_error()); + + replace_with_max_len_utf8_utf8_utf8(ctx_ptr, "Hell", 4, "ell", 3, "ollow", 5, 5, + &out_len); + EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer overflow for output string")); + ctx.Reset(); + + replace_with_max_len_utf8_utf8_utf8(ctx_ptr, "eeee", 4, "e", 1, "aaaa", 4, 14, + &out_len); + EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer overflow for output string")); + ctx.Reset(); +} + +TEST(TestStringOps, TestLeftString) { + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + gdv_int32 out_len = 0; + const char* out_str; + + out_str = left_utf8_int32(ctx_ptr, "TestString", 10, 10, &out_len); + std::string output = std::string(out_str, out_len); + EXPECT_EQ(output, "TestString"); + + out_str = left_utf8_int32(ctx_ptr, "", 0, 0, &out_len); + output = std::string(out_str, out_len); + EXPECT_EQ(output, ""); + + out_str = left_utf8_int32(ctx_ptr, "", 0, 500, &out_len); + output = std::string(out_str, out_len); + EXPECT_EQ(output, ""); + + out_str = left_utf8_int32(ctx_ptr, "TestString", 10, 3, &out_len); + output = std::string(out_str, out_len); + EXPECT_EQ(output, "Tes"); + + out_str = left_utf8_int32(ctx_ptr, "TestString", 10, -3, &out_len); + output = std::string(out_str, out_len); + EXPECT_EQ(output, "TestStr"); + + // the text length for this string is 10 (each utf8 char is represented by two bytes) + out_str = left_utf8_int32(ctx_ptr, "абвгд", 10, 3, &out_len); + output = std::string(out_str, out_len); + EXPECT_EQ(output, "абв"); +} + +TEST(TestStringOps, TestRightString) { + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + gdv_int32 out_len = 0; + const char* out_str; + + out_str = right_utf8_int32(ctx_ptr, "TestString", 10, 10, &out_len); + std::string output = std::string(out_str, out_len); + EXPECT_EQ(output, "TestString"); + + out_str = right_utf8_int32(ctx_ptr, "", 0, 0, &out_len); + output = std::string(out_str, out_len); + EXPECT_EQ(output, ""); + + out_str = right_utf8_int32(ctx_ptr, "", 0, 500, &out_len); + output = std::string(out_str, out_len); + EXPECT_EQ(output, ""); + + out_str = right_utf8_int32(ctx_ptr, "TestString", 10, 3, &out_len); + output = std::string(out_str, out_len); + EXPECT_EQ(output, "ing"); + + out_str = right_utf8_int32(ctx_ptr, "TestString", 10, -3, &out_len); + output = std::string(out_str, out_len); + EXPECT_EQ(output, "tString"); + + // the text length for this string is 10 (each utf8 char is represented by two bytes) + out_str = right_utf8_int32(ctx_ptr, "абвгд", 10, 3, &out_len); + output = std::string(out_str, out_len); + EXPECT_EQ(output, "вгд"); +} + +TEST(TestStringOps, TestBinaryString) { + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + gdv_int32 out_len = 0; + const char* out_str; + + out_str = binary_string(ctx_ptr, "TestString", 10, &out_len); + std::string output = std::string(out_str, out_len); + EXPECT_EQ(output, "TestString"); + + out_str = binary_string(ctx_ptr, "", 0, &out_len); + output = std::string(out_str, out_len); + EXPECT_EQ(output, ""); + + out_str = binary_string(ctx_ptr, "T", 1, &out_len); + output = std::string(out_str, out_len); + EXPECT_EQ(output, "T"); + + out_str = binary_string(ctx_ptr, "\\x41\\x42\\x43", 12, &out_len); + output = std::string(out_str, out_len); + EXPECT_EQ(output, "ABC"); + + out_str = binary_string(ctx_ptr, "\\x41", 4, &out_len); + output = std::string(out_str, out_len); + EXPECT_EQ(output, "A"); + + out_str = binary_string(ctx_ptr, "\\x6d\\x6D", 8, &out_len); + output = std::string(out_str, out_len); + EXPECT_EQ(output, "mm"); + + out_str = binary_string(ctx_ptr, "\\x6f\\x6d", 8, &out_len); + output = std::string(out_str, out_len); + EXPECT_EQ(output, "om"); + + out_str = binary_string(ctx_ptr, "\\x4f\\x4D", 8, &out_len); + output = std::string(out_str, out_len); + EXPECT_EQ(output, "OM"); +} + +TEST(TestStringOps, TestSplitPart) { + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + gdv_int32 out_len = 0; + const char* out_str; + + out_str = split_part(ctx_ptr, "A,B,C", 5, ",", 1, 0, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + EXPECT_THAT( + ctx.get_error(), + ::testing::HasSubstr("Index in split_part must be positive, value provided was 0")); + + out_str = split_part(ctx_ptr, "A,B,C", 5, ",", 1, 1, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "A"); + + out_str = split_part(ctx_ptr, "A,B,C", 5, ",", 1, 2, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "B"); + + out_str = split_part(ctx_ptr, "A,B,C", 5, ",", 1, 3, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "C"); + + out_str = split_part(ctx_ptr, "abc~@~def~@~ghi", 15, "~@~", 3, 1, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "abc"); + + out_str = split_part(ctx_ptr, "abc~@~def~@~ghi", 15, "~@~", 3, 2, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "def"); + + out_str = split_part(ctx_ptr, "abc~@~def~@~ghi", 15, "~@~", 3, 3, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "ghi"); + + // Result must be empty when the index is > no of elements + out_str = split_part(ctx_ptr, "123|456|789", 11, "|", 1, 4, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + + out_str = split_part(ctx_ptr, "123|", 4, "|", 1, 1, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "123"); + + out_str = split_part(ctx_ptr, "|123", 4, "|", 1, 1, &out_len); + EXPECT_EQ(std::string(out_str, out_len), ""); + + out_str = split_part(ctx_ptr, "ç†ååçåå†", 18, "å", 2, 1, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "ç†"); + + out_str = split_part(ctx_ptr, "ç†ååçåå†", 18, "†åå", 6, 1, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "ç"); + + out_str = split_part(ctx_ptr, "ç†ååçåå†", 18, "†", 3, 2, &out_len); + EXPECT_EQ(std::string(out_str, out_len), "ååçåå"); +} + +TEST(TestStringOps, TestConvertTo) { + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + gdv_int32 out_len = 0; + const char* out_str; + + const int32_t ALL_BYTES_MATCH = 0; + + int32_t integer_value = std::numeric_limits<int32_t>::max(); + out_str = convert_toINT(ctx_ptr, integer_value, &out_len); + EXPECT_EQ(out_len, sizeof(integer_value)); + EXPECT_EQ(ALL_BYTES_MATCH, memcmp(out_str, &integer_value, out_len)); + + int64_t big_integer_value = std::numeric_limits<int64_t>::max(); + out_str = convert_toBIGINT(ctx_ptr, big_integer_value, &out_len); + EXPECT_EQ(out_len, sizeof(big_integer_value)); + EXPECT_EQ(ALL_BYTES_MATCH, memcmp(out_str, &big_integer_value, out_len)); + + float float_value = std::numeric_limits<float>::max(); + out_str = convert_toFLOAT(ctx_ptr, float_value, &out_len); + EXPECT_EQ(out_len, sizeof(float_value)); + EXPECT_EQ(ALL_BYTES_MATCH, memcmp(out_str, &float_value, out_len)); + + double double_value = std::numeric_limits<double>::max(); + out_str = convert_toDOUBLE(ctx_ptr, double_value, &out_len); + EXPECT_EQ(out_len, sizeof(double_value)); + EXPECT_EQ(ALL_BYTES_MATCH, memcmp(out_str, &double_value, out_len)); + + const char* test_string = "test string"; + int32_t str_len = 11; + out_str = convert_toUTF8(ctx_ptr, test_string, str_len, &out_len); + EXPECT_EQ(out_len, str_len); + EXPECT_EQ(ALL_BYTES_MATCH, memcmp(out_str, test_string, out_len)); +} + +TEST(TestStringOps, TestConvertToBigEndian) { + gandiva::ExecutionContext ctx; + uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx); + gdv_int32 out_len = 0; + gdv_int32 out_len_big_endian = 0; + const char* out_str; + const char* out_str_big_endian; + + int64_t big_integer_value = std::numeric_limits<int64_t>::max(); + out_str = convert_toBIGINT(ctx_ptr, big_integer_value, &out_len); + out_str_big_endian = + convert_toBIGINT_be(ctx_ptr, big_integer_value, &out_len_big_endian); + EXPECT_EQ(out_len_big_endian, sizeof(big_integer_value)); + EXPECT_EQ(out_len_big_endian, out_len); + +#if ARROW_LITTLE_ENDIAN + // Checks that bytes are in reverse order + for (auto i = 0; i < out_len; i++) { + EXPECT_EQ(out_str[i], out_str_big_endian[out_len - (i + 1)]); + } +#else + for (auto i = 0; i < out_len; i++) { + EXPECT_EQ(out_str[i], out_str_big_endian[i]); + } +#endif + + double double_value = std::numeric_limits<double>::max(); + out_str = convert_toDOUBLE(ctx_ptr, double_value, &out_len); + out_str_big_endian = convert_toDOUBLE_be(ctx_ptr, double_value, &out_len_big_endian); + EXPECT_EQ(out_len_big_endian, sizeof(double_value)); + EXPECT_EQ(out_len_big_endian, out_len); + +#if ARROW_LITTLE_ENDIAN + // Checks that bytes are in reverse order + for (auto i = 0; i < out_len; i++) { + EXPECT_EQ(out_str[i], out_str_big_endian[out_len - (i + 1)]); + } +#else + for (auto i = 0; i < out_len; i++) { + EXPECT_EQ(out_str[i], out_str_big_endian[i]); + } +#endif +} + +} // namespace gandiva diff --git a/src/arrow/cpp/src/gandiva/precompiled/testing.h b/src/arrow/cpp/src/gandiva/precompiled/testing.h new file mode 100644 index 000000000..c41bc5471 --- /dev/null +++ b/src/arrow/cpp/src/gandiva/precompiled/testing.h @@ -0,0 +1,43 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <ctime> +#include <string> + +#include <gtest/gtest.h> + +#include "arrow/util/logging.h" +#include "arrow/util/value_parsing.h" + +#include "gandiva/date_utils.h" +#include "gandiva/precompiled/types.h" + +namespace gandiva { + +static inline gdv_timestamp StringToTimestamp(const std::string& s) { + int64_t out = 0; + bool success = ::arrow::internal::ParseTimestampStrptime( + s.c_str(), s.length(), "%Y-%m-%d %H:%M:%S", /*ignore_time_in_day=*/false, + /*allow_trailing_chars=*/false, ::arrow::TimeUnit::SECOND, &out); + DCHECK(success); + ARROW_UNUSED(success); + return out * 1000; +} + +} // namespace gandiva diff --git a/src/arrow/cpp/src/gandiva/precompiled/time.cc b/src/arrow/cpp/src/gandiva/precompiled/time.cc new file mode 100644 index 000000000..336f69226 --- /dev/null +++ b/src/arrow/cpp/src/gandiva/precompiled/time.cc @@ -0,0 +1,894 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "./epoch_time_point.h" + +extern "C" { + +#define __STDC_FORMAT_MACROS +#include <inttypes.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "./time_constants.h" +#include "./time_fields.h" +#include "./types.h" + +#define MINS_IN_HOUR 60 +#define SECONDS_IN_MINUTE 60 +#define SECONDS_IN_HOUR (SECONDS_IN_MINUTE) * (MINS_IN_HOUR) + +#define HOURS_IN_DAY 24 + +// Expand inner macro for all date types. +#define DATE_TYPES(INNER) \ + INNER(date64) \ + INNER(timestamp) + +// Expand inner macro for all base numeric types. +#define NUMERIC_TYPES(INNER) \ + INNER(int8) \ + INNER(int16) \ + INNER(int32) \ + INNER(int64) \ + INNER(uint8) \ + INNER(uint16) \ + INNER(uint32) \ + INNER(uint64) \ + INNER(float32) \ + INNER(float64) + +// Extract millennium +#define EXTRACT_MILLENNIUM(TYPE) \ + FORCE_INLINE \ + gdv_int64 extractMillennium##_##TYPE(gdv_##TYPE millis) { \ + EpochTimePoint tp(millis); \ + return (1900 + tp.TmYear() - 1) / 1000 + 1; \ + } + +DATE_TYPES(EXTRACT_MILLENNIUM) + +// Extract century +#define EXTRACT_CENTURY(TYPE) \ + FORCE_INLINE \ + gdv_int64 extractCentury##_##TYPE(gdv_##TYPE millis) { \ + EpochTimePoint tp(millis); \ + return (1900 + tp.TmYear() - 1) / 100 + 1; \ + } + +DATE_TYPES(EXTRACT_CENTURY) + +// Extract decade +#define EXTRACT_DECADE(TYPE) \ + FORCE_INLINE \ + gdv_int64 extractDecade##_##TYPE(gdv_##TYPE millis) { \ + EpochTimePoint tp(millis); \ + return (1900 + tp.TmYear()) / 10; \ + } + +DATE_TYPES(EXTRACT_DECADE) + +// Extract year. +#define EXTRACT_YEAR(TYPE) \ + FORCE_INLINE \ + gdv_int64 extractYear##_##TYPE(gdv_##TYPE millis) { \ + EpochTimePoint tp(millis); \ + return 1900 + tp.TmYear(); \ + } + +DATE_TYPES(EXTRACT_YEAR) + +#define EXTRACT_DOY(TYPE) \ + FORCE_INLINE \ + gdv_int64 extractDoy##_##TYPE(gdv_##TYPE millis) { \ + EpochTimePoint tp(millis); \ + return 1 + tp.TmYday(); \ + } + +DATE_TYPES(EXTRACT_DOY) + +#define EXTRACT_QUARTER(TYPE) \ + FORCE_INLINE \ + gdv_int64 extractQuarter##_##TYPE(gdv_##TYPE millis) { \ + EpochTimePoint tp(millis); \ + return tp.TmMon() / 3 + 1; \ + } + +DATE_TYPES(EXTRACT_QUARTER) + +#define EXTRACT_MONTH(TYPE) \ + FORCE_INLINE \ + gdv_int64 extractMonth##_##TYPE(gdv_##TYPE millis) { \ + EpochTimePoint tp(millis); \ + return 1 + tp.TmMon(); \ + } + +DATE_TYPES(EXTRACT_MONTH) + +#define JAN1_WDAY(tp) ((tp.TmWday() - (tp.TmYday() % 7) + 7) % 7) + +bool IsLeapYear(int yy) { + if ((yy % 4) != 0) { + // not divisible by 4 + return false; + } + + // yy = 4x + if ((yy % 400) == 0) { + // yy = 400x + return true; + } + + // yy = 4x, return true if yy != 100x + return ((yy % 100) != 0); +} + +// Day belongs to current year +// Note that TmYday is 0 for Jan 1 (subtract 1 from day in the below examples) +// +// If Jan 1 is Mon, (TmYday) / 7 + 1 (Jan 1->WK1, Jan 8->WK2, etc) +// If Jan 1 is Tues, (TmYday + 1) / 7 + 1 (Jan 1->WK1, Jan 7->WK2, etc) +// If Jan 1 is Wed, (TmYday + 2) / 7 + 1 +// If Jan 1 is Thu, (TmYday + 3) / 7 + 1 +// +// If Jan 1 is Fri, Sat or Sun, the first few days belong to the previous year +// If Jan 1 is Fri, (TmYday - 3) / 7 + 1 (Jan 4->WK1, Jan 11->WK2) +// If Jan 1 is Sat, (TmYday - 2) / 7 + 1 (Jan 3->WK1, Jan 10->WK2) +// If Jan 1 is Sun, (TmYday - 1) / 7 + 1 (Jan 2->WK1, Jan 9->WK2) +int weekOfCurrentYear(const EpochTimePoint& tp) { + int jan1_wday = JAN1_WDAY(tp); + switch (jan1_wday) { + // Monday + case 1: + // Tuesday + case 2: + // Wednesday + case 3: + // Thursday + case 4: { + return (tp.TmYday() + jan1_wday - 1) / 7 + 1; + } + // Friday + case 5: + // Saturday + case 6: { + return (tp.TmYday() - (8 - jan1_wday)) / 7 + 1; + } + // Sunday + case 0: { + return (tp.TmYday() - 1) / 7 + 1; + } + } + + // cannot reach here + // keep compiler happy + return 0; +} + +// Jan 1-3 +// If Jan 1 is one of Mon, Tue, Wed, Thu - belongs to week of current year +// If Jan 1 is Fri/Sat/Sun - belongs to previous year +int getJanWeekOfYear(const EpochTimePoint& tp) { + int jan1_wday = JAN1_WDAY(tp); + + if ((jan1_wday >= 1) && (jan1_wday <= 4)) { + // Jan 1-3 with the week belonging to this year + return 1; + } + + if (jan1_wday == 5) { + // Jan 1 is a Fri + // Jan 1-3 belong to previous year. Dec 31 of previous year same week # as Jan 1-3 + // previous year is a leap year: + // Prev Jan 1 is a Wed. Jan 6th is Mon + // Dec 31 - Jan 6 = 366 - 5 = 361 + // week from Jan 6 = (361 - 1) / 7 + 1 = 52 + // week # in previous year = 52 + 1 = 53 + // + // previous year is not a leap year. Jan 1 is Thu. Jan 5th is Mon + // Dec 31 - Jan 5 = 365 - 4 = 361 + // week from Jan 5 = (361 - 1) / 7 + 1 = 52 + // week # in previous year = 52 + 1 = 53 + return 53; + } + + if (jan1_wday == 0) { + // Jan 1 is a Sun + if (tp.TmMday() > 1) { + // Jan 2 and 3 belong to current year + return 1; + } + + // day belongs to previous year. Same as Dec 31 + // Same as the case where Jan 1 is a Fri, except that previous year + // does not have an extra week + // Hence, return 52 + return 52; + } + + // Jan 1 is a Sat + // Jan 1-2 belong to previous year + if (tp.TmMday() == 3) { + // Jan 3, return 1 + return 1; + } + + // prev Jan 1 is leap year + // prev Jan 1 is a Thu + // return 53 (extra week) + if (IsLeapYear(1900 + tp.TmYear() - 1)) { + return 53; + } + + // prev Jan 1 is not a leap year + // prev Jan 1 is a Fri + // return 52 (no extra week) + return 52; +} + +// Dec 29-31 +int getDecWeekOfYear(const EpochTimePoint& tp) { + int next_jan1_wday = (tp.TmWday() + (31 - tp.TmMday()) + 1) % 7; + + if (next_jan1_wday == 4) { + // next Jan 1 is a Thu + // day belongs to week 1 of next year + return 1; + } + + if (next_jan1_wday == 3) { + // next Jan 1 is a Wed + // Dec 31 and 30 belong to next year - return 1 + if (tp.TmMday() != 29) { + return 1; + } + + // Dec 29 belongs to current year + return weekOfCurrentYear(tp); + } + + if (next_jan1_wday == 2) { + // next Jan 1 is a Tue + // Dec 31 belongs to next year - return 1 + if (tp.TmMday() == 31) { + return 1; + } + + // Dec 29 and 30 belong to current year + return weekOfCurrentYear(tp); + } + + // next Jan 1 is a Fri/Sat/Sun. No day from this year belongs to that week + // next Jan 1 is a Mon. No day from this year belongs to that week + return weekOfCurrentYear(tp); +} + +// Week of year is determined by ISO 8601 standard +// Take a look at: https://en.wikipedia.org/wiki/ISO_week_date +// +// Important points to note: +// Week starts with a Monday and ends with a Sunday +// A week can have some days in this year and some days in the previous/next year +// This is true for the first and last weeks +// +// The first week of the year should have at-least 4 days in the current year +// The last week of the year should have at-least 4 days in the current year +// +// A given day might belong to the first week of the next year - e.g Dec 29, 30 and 31 +// A given day might belong to the last week of the previous year - e.g. Jan 1, 2 and 3 +// +// Algorithm: +// If day belongs to week in current year, weekOfCurrentYear +// +// If day is Jan 1-3, see getJanWeekOfYear +// If day is Dec 29-21, see getDecWeekOfYear +// +gdv_int64 weekOfYear(const EpochTimePoint& tp) { + if (tp.TmYday() < 3) { + // Jan 1-3 + return getJanWeekOfYear(tp); + } + + if ((tp.TmMon() == 11) && (tp.TmMday() >= 29)) { + // Dec 29-31 + return getDecWeekOfYear(tp); + } + + return weekOfCurrentYear(tp); +} + +#define EXTRACT_WEEK(TYPE) \ + FORCE_INLINE \ + gdv_int64 extractWeek##_##TYPE(gdv_##TYPE millis) { \ + EpochTimePoint tp(millis); \ + return weekOfYear(tp); \ + } + +DATE_TYPES(EXTRACT_WEEK) + +#define EXTRACT_DOW(TYPE) \ + FORCE_INLINE \ + gdv_int64 extractDow##_##TYPE(gdv_##TYPE millis) { \ + EpochTimePoint tp(millis); \ + return 1 + tp.TmWday(); \ + } + +DATE_TYPES(EXTRACT_DOW) + +#define EXTRACT_DAY(TYPE) \ + FORCE_INLINE \ + gdv_int64 extractDay##_##TYPE(gdv_##TYPE millis) { \ + EpochTimePoint tp(millis); \ + return tp.TmMday(); \ + } + +DATE_TYPES(EXTRACT_DAY) + +#define EXTRACT_HOUR(TYPE) \ + FORCE_INLINE \ + gdv_int64 extractHour##_##TYPE(gdv_##TYPE millis) { \ + EpochTimePoint tp(millis); \ + return tp.TmHour(); \ + } + +DATE_TYPES(EXTRACT_HOUR) + +#define EXTRACT_MINUTE(TYPE) \ + FORCE_INLINE \ + gdv_int64 extractMinute##_##TYPE(gdv_##TYPE millis) { \ + EpochTimePoint tp(millis); \ + return tp.TmMin(); \ + } + +DATE_TYPES(EXTRACT_MINUTE) + +#define EXTRACT_SECOND(TYPE) \ + FORCE_INLINE \ + gdv_int64 extractSecond##_##TYPE(gdv_##TYPE millis) { \ + EpochTimePoint tp(millis); \ + return tp.TmSec(); \ + } + +DATE_TYPES(EXTRACT_SECOND) + +#define EXTRACT_EPOCH(TYPE) \ + FORCE_INLINE \ + gdv_int64 extractEpoch##_##TYPE(gdv_##TYPE millis) { return MILLIS_TO_SEC(millis); } + +DATE_TYPES(EXTRACT_EPOCH) + +// Functions that work on millis in a day +#define EXTRACT_SECOND_TIME(TYPE) \ + FORCE_INLINE \ + gdv_int64 extractSecond##_##TYPE(gdv_##TYPE millis) { \ + gdv_int64 seconds_of_day = MILLIS_TO_SEC(millis); \ + gdv_int64 sec = seconds_of_day % SECONDS_IN_MINUTE; \ + return sec; \ + } + +EXTRACT_SECOND_TIME(time32) + +#define EXTRACT_MINUTE_TIME(TYPE) \ + FORCE_INLINE \ + gdv_int64 extractMinute##_##TYPE(gdv_##TYPE millis) { \ + gdv_##TYPE mins = MILLIS_TO_MINS(millis); \ + return (mins % (MINS_IN_HOUR)); \ + } + +EXTRACT_MINUTE_TIME(time32) + +#define EXTRACT_HOUR_TIME(TYPE) \ + FORCE_INLINE \ + gdv_int64 extractHour##_##TYPE(gdv_##TYPE millis) { return MILLIS_TO_HOUR(millis); } + +EXTRACT_HOUR_TIME(time32) + +#define DATE_TRUNC_FIXED_UNIT(NAME, TYPE, NMILLIS_IN_UNIT) \ + FORCE_INLINE \ + gdv_##TYPE NAME##_##TYPE(gdv_##TYPE millis) { \ + return ((millis / NMILLIS_IN_UNIT) * NMILLIS_IN_UNIT); \ + } + +#define DATE_TRUNC_WEEK(TYPE) \ + FORCE_INLINE \ + gdv_##TYPE date_trunc_Week_##TYPE(gdv_##TYPE millis) { \ + EpochTimePoint tp(millis); \ + int ndays_to_trunc = 0; \ + if (tp.TmWday() == 0) { \ + /* Sunday */ \ + ndays_to_trunc = 6; \ + } else { \ + /* All other days */ \ + ndays_to_trunc = tp.TmWday() - 1; \ + } \ + return tp.AddDays(-ndays_to_trunc).ClearTimeOfDay().MillisSinceEpoch(); \ + } + +#define DATE_TRUNC_MONTH_UNITS(NAME, TYPE, NMONTHS_IN_UNIT) \ + FORCE_INLINE \ + gdv_##TYPE NAME##_##TYPE(gdv_##TYPE millis) { \ + EpochTimePoint tp(millis); \ + int ndays_to_trunc = tp.TmMday() - 1; \ + int nmonths_to_trunc = \ + tp.TmMon() - ((tp.TmMon() / NMONTHS_IN_UNIT) * NMONTHS_IN_UNIT); \ + return tp.AddDays(-ndays_to_trunc) \ + .AddMonths(-nmonths_to_trunc) \ + .ClearTimeOfDay() \ + .MillisSinceEpoch(); \ + } + +#define DATE_TRUNC_YEAR_UNITS(NAME, TYPE, NYEARS_IN_UNIT, OFF_BY) \ + FORCE_INLINE \ + gdv_##TYPE NAME##_##TYPE(gdv_##TYPE millis) { \ + EpochTimePoint tp(millis); \ + int ndays_to_trunc = tp.TmMday() - 1; \ + int nmonths_to_trunc = tp.TmMon(); \ + int year = 1900 + tp.TmYear(); \ + year = ((year - OFF_BY) / NYEARS_IN_UNIT) * NYEARS_IN_UNIT + OFF_BY; \ + int nyears_to_trunc = tp.TmYear() - (year - 1900); \ + return tp.AddDays(-ndays_to_trunc) \ + .AddMonths(-nmonths_to_trunc) \ + .AddYears(-nyears_to_trunc) \ + .ClearTimeOfDay() \ + .MillisSinceEpoch(); \ + } + +#define DATE_TRUNC_FUNCTIONS(TYPE) \ + DATE_TRUNC_FIXED_UNIT(date_trunc_Second, TYPE, MILLIS_IN_SEC) \ + DATE_TRUNC_FIXED_UNIT(date_trunc_Minute, TYPE, MILLIS_IN_MIN) \ + DATE_TRUNC_FIXED_UNIT(date_trunc_Hour, TYPE, MILLIS_IN_HOUR) \ + DATE_TRUNC_FIXED_UNIT(date_trunc_Day, TYPE, MILLIS_IN_DAY) \ + DATE_TRUNC_WEEK(TYPE) \ + DATE_TRUNC_MONTH_UNITS(date_trunc_Month, TYPE, 1) \ + DATE_TRUNC_MONTH_UNITS(date_trunc_Quarter, TYPE, 3) \ + DATE_TRUNC_MONTH_UNITS(date_trunc_Year, TYPE, 12) \ + DATE_TRUNC_YEAR_UNITS(date_trunc_Decade, TYPE, 10, 0) \ + DATE_TRUNC_YEAR_UNITS(date_trunc_Century, TYPE, 100, 1) \ + DATE_TRUNC_YEAR_UNITS(date_trunc_Millennium, TYPE, 1000, 1) + +DATE_TRUNC_FUNCTIONS(date64) +DATE_TRUNC_FUNCTIONS(timestamp) + +#define LAST_DAY_FUNC(TYPE) \ + FORCE_INLINE \ + gdv_date64 last_day_from_##TYPE(gdv_date64 millis) { \ + EpochTimePoint received_day(millis); \ + const auto& day_without_hours_and_sec = received_day.ClearTimeOfDay(); \ + \ + int received_day_in_month = day_without_hours_and_sec.TmMday(); \ + const auto& first_day_in_month = \ + day_without_hours_and_sec.AddDays(1 - received_day_in_month); \ + \ + const auto& month_last_day = first_day_in_month.AddMonths(1).AddDays(-1); \ + \ + return month_last_day.MillisSinceEpoch(); \ + } + +DATE_TYPES(LAST_DAY_FUNC) + +FORCE_INLINE +gdv_date64 castDATE_int64(gdv_int64 in) { return in; } + +FORCE_INLINE +gdv_date32 castDATE_int32(gdv_int32 in) { return in; } + +FORCE_INLINE +gdv_date64 castDATE_date32(gdv_date32 days) { + return days * static_cast<gdv_date64>(MILLIS_IN_DAY); +} + +static int days_in_month[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; + +bool IsLastDayOfMonth(const EpochTimePoint& tp) { + if (tp.TmMon() != 1) { + // not February. Don't worry about leap year + return (tp.TmMday() == days_in_month[tp.TmMon()]); + } + + // this is February, check if the day is 28 or 29 + if (tp.TmMday() < 28) { + return false; + } + + if (tp.TmMday() == 29) { + // Feb 29th + return true; + } + + // check if year is non-leap year + return !IsLeapYear(tp.TmYear()); +} + +FORCE_INLINE +bool is_valid_time(const int hours, const int minutes, const int seconds) { + return hours >= 0 && hours < 24 && minutes >= 0 && minutes < 60 && seconds >= 0 && + seconds < 60; +} + +// MONTHS_BETWEEN returns number of months between dates date1 and date2. +// If date1 is later than date2, then the result is positive. +// If date1 is earlier than date2, then the result is negative. +// If date1 and date2 are either the same days of the month or both last days of months, +// then the result is always an integer. Otherwise Oracle Database calculates the +// fractional portion of the result based on a 31-day month and considers the difference +// in time components date1 and date2 +#define MONTHS_BETWEEN(TYPE) \ + FORCE_INLINE \ + double months_between##_##TYPE##_##TYPE(uint64_t endEpoch, uint64_t startEpoch) { \ + EpochTimePoint endTime(endEpoch); \ + EpochTimePoint startTime(startEpoch); \ + int endYear = endTime.TmYear(); \ + int endMonth = endTime.TmMon(); \ + int startYear = startTime.TmYear(); \ + int startMonth = startTime.TmMon(); \ + int monthsDiff = (endYear - startYear) * 12 + (endMonth - startMonth); \ + if ((endTime.TmMday() == startTime.TmMday()) || \ + (IsLastDayOfMonth(endTime) && IsLastDayOfMonth(startTime))) { \ + return static_cast<double>(monthsDiff); \ + } \ + double diffDays = static_cast<double>(endTime.TmMday() - startTime.TmMday()) / \ + static_cast<double>(31); \ + double diffHours = static_cast<double>(endTime.TmHour() - startTime.TmHour()) + \ + static_cast<double>(endTime.TmMin() - startTime.TmMin()) / \ + static_cast<double>(MINS_IN_HOUR) + \ + static_cast<double>(endTime.TmSec() - startTime.TmSec()) / \ + static_cast<double>(SECONDS_IN_HOUR); \ + return static_cast<double>(monthsDiff) + diffDays + \ + diffHours / static_cast<double>(HOURS_IN_DAY * 31); \ + } + +DATE_TYPES(MONTHS_BETWEEN) + +FORCE_INLINE +void set_error_for_date(gdv_int32 length, const char* input, const char* msg, + int64_t execution_context) { + int size = length + static_cast<int>(strlen(msg)) + 1; + char* error = reinterpret_cast<char*>(malloc(size)); + snprintf(error, size, "%s%s", msg, input); + gdv_fn_context_set_error_msg(execution_context, error); + free(error); +} + +gdv_date64 castDATE_utf8(int64_t context, const char* input, gdv_int32 length) { + using arrow_vendored::date::day; + using arrow_vendored::date::month; + using arrow_vendored::date::sys_days; + using arrow_vendored::date::year; + using arrow_vendored::date::year_month_day; + using gandiva::TimeFields; + // format : 0 is year, 1 is month and 2 is day. + int dateFields[3]; + int dateIndex = 0, index = 0, value = 0; + int year_str_len = 0; + while (dateIndex < 3 && index < length) { + if (!isdigit(input[index])) { + dateFields[dateIndex++] = value; + value = 0; + } else { + value = (value * 10) + (input[index] - '0'); + if (dateIndex == TimeFields::kYear) { + year_str_len++; + } + } + index++; + } + + if (dateIndex < 3) { + // If we reached the end of input, we would have not encountered a separator + // store the last value + dateFields[dateIndex++] = value; + } + const char* msg = "Not a valid date value "; + if (dateIndex != 3) { + set_error_for_date(length, input, msg, context); + return 0; + } + + /* Handle two digit years + * If range of two digits is between 70 - 99 then year = 1970 - 1999 + * Else if two digits is between 00 - 69 = 2000 - 2069 + */ + if (dateFields[TimeFields::kYear] < 100 && year_str_len < 4) { + if (dateFields[TimeFields::kYear] < 70) { + dateFields[TimeFields::kYear] += 2000; + } else { + dateFields[TimeFields::kYear] += 1900; + } + } + year_month_day date = year(dateFields[TimeFields::kYear]) / + month(dateFields[TimeFields::kMonth]) / + day(dateFields[TimeFields::kDay]); + if (!date.ok()) { + set_error_for_date(length, input, msg, context); + return 0; + } + return std::chrono::time_point_cast<std::chrono::milliseconds>(sys_days(date)) + .time_since_epoch() + .count(); +} + +/* + * Input consists of mandatory and optional fields. + * Mandatory fields are year, month and day. + * Optional fields are time, displacement and zone. + * Format is <year-month-day>[ hours:minutes:seconds][.millis][ displacement|zone] + */ +gdv_timestamp castTIMESTAMP_utf8(int64_t context, const char* input, gdv_int32 length) { + using arrow_vendored::date::day; + using arrow_vendored::date::month; + using arrow_vendored::date::sys_days; + using arrow_vendored::date::year; + using arrow_vendored::date::year_month_day; + using gandiva::TimeFields; + using std::chrono::hours; + using std::chrono::milliseconds; + using std::chrono::minutes; + using std::chrono::seconds; + + int ts_fields[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; + gdv_boolean add_displacement = true; + gdv_boolean encountered_zone = false; + int year_str_len = 0, sub_seconds_len = 0; + int ts_field_index = TimeFields::kYear, index = 0, value = 0; + while (ts_field_index < TimeFields::kMax && index < length) { + if (isdigit(input[index])) { + value = (value * 10) + (input[index] - '0'); + if (ts_field_index == TimeFields::kYear) { + year_str_len++; + } + if (ts_field_index == TimeFields::kSubSeconds) { + sub_seconds_len++; + } + } else { + ts_fields[ts_field_index] = value; + value = 0; + + switch (input[index]) { + case '.': + case ':': + case ' ': + ts_field_index++; + break; + case '+': + // +08:00, means time zone is 8 hours ahead. Need to subtract. + add_displacement = false; + ts_field_index = TimeFields::kDisplacementHours; + break; + case '-': + // Overloaded as date separator and negative displacement. + ts_field_index = (ts_field_index < 3) ? (ts_field_index + 1) + : TimeFields::kDisplacementHours; + break; + default: + encountered_zone = true; + break; + } + } + if (encountered_zone) { + break; + } + index++; + } + + // Store the last value + if (ts_field_index < TimeFields::kMax) { + ts_fields[ts_field_index++] = value; + } + + // adjust the year + if (ts_fields[TimeFields::kYear] < 100 && year_str_len < 4) { + if (ts_fields[TimeFields::kYear] < 70) { + ts_fields[TimeFields::kYear] += 2000; + } else { + ts_fields[TimeFields::kYear] += 1900; + } + } + + // adjust the milliseconds + if (sub_seconds_len > 0) { + if (sub_seconds_len > 3) { + const char* msg = "Invalid millis for timestamp value "; + set_error_for_date(length, input, msg, context); + return 0; + } + while (sub_seconds_len < 3) { + ts_fields[TimeFields::kSubSeconds] *= 10; + sub_seconds_len++; + } + } + // handle timezone + if (encountered_zone) { + int err = 0; + gdv_timestamp ret_time = 0; + err = gdv_fn_time_with_zone(&ts_fields[0], (input + index), (length - index), + &ret_time); + if (err) { + const char* msg = "Invalid timestamp or unknown zone for timestamp value "; + set_error_for_date(length, input, msg, context); + return 0; + } + return ret_time; + } + + year_month_day date = year(ts_fields[TimeFields::kYear]) / + month(ts_fields[TimeFields::kMonth]) / + day(ts_fields[TimeFields::kDay]); + if (!date.ok()) { + const char* msg = "Not a valid day for timestamp value "; + set_error_for_date(length, input, msg, context); + return 0; + } + + if (!is_valid_time(ts_fields[TimeFields::kHours], ts_fields[TimeFields::kMinutes], + ts_fields[TimeFields::kSeconds])) { + const char* msg = "Not a valid time for timestamp value "; + set_error_for_date(length, input, msg, context); + return 0; + } + + auto date_time = sys_days(date) + hours(ts_fields[TimeFields::kHours]) + + minutes(ts_fields[TimeFields::kMinutes]) + + seconds(ts_fields[TimeFields::kSeconds]) + + milliseconds(ts_fields[TimeFields::kSubSeconds]); + if (ts_fields[TimeFields::kDisplacementHours] || + ts_fields[TimeFields::kDisplacementMinutes]) { + auto displacement_time = hours(ts_fields[TimeFields::kDisplacementHours]) + + minutes(ts_fields[TimeFields::kDisplacementMinutes]); + date_time = (add_displacement) ? (date_time + displacement_time) + : (date_time - displacement_time); + } + return std::chrono::time_point_cast<milliseconds>(date_time).time_since_epoch().count(); +} + +gdv_timestamp castTIMESTAMP_date64(gdv_date64 date_in_millis) { return date_in_millis; } + +gdv_timestamp castTIMESTAMP_int64(gdv_int64 in) { return in; } + +gdv_date64 castDATE_timestamp(gdv_timestamp timestamp_in_millis) { + EpochTimePoint tp(timestamp_in_millis); + return tp.ClearTimeOfDay().MillisSinceEpoch(); +} + +gdv_time32 castTIME_timestamp(gdv_timestamp timestamp_in_millis) { + // Retrieves a timestamp and returns the number of milliseconds since the midnight + EpochTimePoint tp(timestamp_in_millis); + auto tp_at_midnight = tp.ClearTimeOfDay(); + + int64_t millis_since_midnight = + tp.MillisSinceEpoch() - tp_at_midnight.MillisSinceEpoch(); + + return static_cast<int32_t>(millis_since_midnight); +} + +const char* castVARCHAR_timestamp_int64(gdv_int64 context, gdv_timestamp in, + gdv_int64 length, gdv_int32* out_len) { + gdv_int64 year = extractYear_timestamp(in); + gdv_int64 month = extractMonth_timestamp(in); + gdv_int64 day = extractDay_timestamp(in); + gdv_int64 hour = extractHour_timestamp(in); + gdv_int64 minute = extractMinute_timestamp(in); + gdv_int64 second = extractSecond_timestamp(in); + gdv_int64 millis = in % MILLIS_IN_SEC; + + static const int kTimeStampStringLen = 23; + const int char_buffer_length = kTimeStampStringLen + 1; // snprintf adds \0 + char char_buffer[char_buffer_length]; + + // yyyy-MM-dd hh:mm:ss.sss + int res = snprintf(char_buffer, char_buffer_length, + "%04" PRId64 "-%02" PRId64 "-%02" PRId64 " %02" PRId64 ":%02" PRId64 + ":%02" PRId64 ".%03" PRId64, + year, month, day, hour, minute, second, millis); + if (res < 0) { + gdv_fn_context_set_error_msg(context, "Could not format the timestamp"); + return ""; + } + + *out_len = static_cast<gdv_int32>(length); + if (*out_len > kTimeStampStringLen) { + *out_len = kTimeStampStringLen; + } + + if (*out_len <= 0) { + if (*out_len < 0) { + gdv_fn_context_set_error_msg(context, "Length of output string cannot be negative"); + } + *out_len = 0; + return ""; + } + + char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len)); + if (ret == nullptr) { + gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string"); + *out_len = 0; + return ""; + } + + memcpy(ret, char_buffer, *out_len); + return ret; +} + +FORCE_INLINE +gdv_int64 extractDay_daytimeinterval(gdv_day_time_interval in) { + gdv_int32 days = static_cast<gdv_int32>(in & 0x00000000FFFFFFFF); + return static_cast<gdv_int64>(days); +} + +FORCE_INLINE +gdv_int64 extractMillis_daytimeinterval(gdv_day_time_interval in) { + gdv_int32 millis = static_cast<gdv_int32>((in & 0xFFFFFFFF00000000) >> 32); + return static_cast<gdv_int64>(millis); +} + +FORCE_INLINE +gdv_int64 castBIGINT_daytimeinterval(gdv_day_time_interval in) { + return extractMillis_daytimeinterval(in) + + extractDay_daytimeinterval(in) * MILLIS_IN_DAY; +} + +// Convert the seconds since epoch argument to timestamp +#define TO_TIMESTAMP(TYPE) \ + FORCE_INLINE \ + gdv_timestamp to_timestamp##_##TYPE(gdv_##TYPE seconds) { \ + return static_cast<gdv_timestamp>(seconds * MILLIS_IN_SEC); \ + } + +NUMERIC_TYPES(TO_TIMESTAMP) + +// Convert the seconds since epoch argument to time +#define TO_TIME(TYPE) \ + FORCE_INLINE \ + gdv_time32 to_time##_##TYPE(gdv_##TYPE seconds) { \ + EpochTimePoint tp(static_cast<int64_t>(seconds * MILLIS_IN_SEC)); \ + return static_cast<gdv_time32>(tp.TimeOfDay().to_duration().count()); \ + } + +NUMERIC_TYPES(TO_TIME) + +#define CAST_INT_YEAR_INTERVAL(TYPE, OUT_TYPE) \ + FORCE_INLINE \ + gdv_##OUT_TYPE TYPE##_year_interval(gdv_month_interval in) { \ + return static_cast<gdv_##OUT_TYPE>(in / 12.0); \ + } + +CAST_INT_YEAR_INTERVAL(castBIGINT, int64) +CAST_INT_YEAR_INTERVAL(castINT, int32) + +#define CAST_NULLABLE_INTERVAL_DAY(TYPE) \ + FORCE_INLINE \ + gdv_day_time_interval castNULLABLEINTERVALDAY_##TYPE(gdv_##TYPE in) { \ + return static_cast<gdv_day_time_interval>(in); \ + } + +CAST_NULLABLE_INTERVAL_DAY(int32) +CAST_NULLABLE_INTERVAL_DAY(int64) + +#define CAST_NULLABLE_INTERVAL_YEAR(TYPE) \ + FORCE_INLINE \ + gdv_month_interval castNULLABLEINTERVALYEAR_##TYPE(int64_t context, gdv_##TYPE in) { \ + gdv_month_interval value = static_cast<gdv_month_interval>(in); \ + if (value != in) { \ + gdv_fn_context_set_error_msg(context, "Integer overflow"); \ + } \ + return value; \ + } + +CAST_NULLABLE_INTERVAL_YEAR(int32) +CAST_NULLABLE_INTERVAL_YEAR(int64) + +} // extern "C" diff --git a/src/arrow/cpp/src/gandiva/precompiled/time_constants.h b/src/arrow/cpp/src/gandiva/precompiled/time_constants.h new file mode 100644 index 000000000..015ef4bf9 --- /dev/null +++ b/src/arrow/cpp/src/gandiva/precompiled/time_constants.h @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#define MILLIS_IN_SEC (1000) +#define MILLIS_IN_MIN (60 * MILLIS_IN_SEC) +#define MILLIS_IN_HOUR (60 * MILLIS_IN_MIN) +#define MILLIS_IN_DAY (24 * MILLIS_IN_HOUR) +#define MILLIS_IN_WEEK (7 * MILLIS_IN_DAY) + +#define MILLIS_TO_SEC(millis) ((millis) / MILLIS_IN_SEC) +#define MILLIS_TO_MINS(millis) ((millis) / MILLIS_IN_MIN) +#define MILLIS_TO_HOUR(millis) ((millis) / MILLIS_IN_HOUR) +#define MILLIS_TO_DAY(millis) ((millis) / MILLIS_IN_DAY) +#define MILLIS_TO_WEEK(millis) ((millis) / MILLIS_IN_WEEK) diff --git a/src/arrow/cpp/src/gandiva/precompiled/time_fields.h b/src/arrow/cpp/src/gandiva/precompiled/time_fields.h new file mode 100644 index 000000000..d5277e743 --- /dev/null +++ b/src/arrow/cpp/src/gandiva/precompiled/time_fields.h @@ -0,0 +1,35 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +namespace gandiva { + +enum TimeFields { + kYear, + kMonth, + kDay, + kHours, + kMinutes, + kSeconds, + kSubSeconds, + kDisplacementHours, + kDisplacementMinutes, + kMax +}; + +} // namespace gandiva diff --git a/src/arrow/cpp/src/gandiva/precompiled/time_test.cc b/src/arrow/cpp/src/gandiva/precompiled/time_test.cc new file mode 100644 index 000000000..332ffa332 --- /dev/null +++ b/src/arrow/cpp/src/gandiva/precompiled/time_test.cc @@ -0,0 +1,953 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gtest/gtest.h> +#include <time.h> + +#include "../execution_context.h" +#include "gandiva/precompiled/testing.h" +#include "gandiva/precompiled/types.h" + +namespace gandiva { + +TEST(TestTime, TestCastDate) { + ExecutionContext context; + int64_t context_ptr = reinterpret_cast<int64_t>(&context); + + EXPECT_EQ(castDATE_utf8(context_ptr, "1967-12-1", 9), -65836800000); + EXPECT_EQ(castDATE_utf8(context_ptr, "2067-12-1", 9), 3089923200000); + + EXPECT_EQ(castDATE_utf8(context_ptr, "7-12-1", 6), 1196467200000); + EXPECT_EQ(castDATE_utf8(context_ptr, "67-12-1", 7), 3089923200000); + EXPECT_EQ(castDATE_utf8(context_ptr, "067-12-1", 8), 3089923200000); + EXPECT_EQ(castDATE_utf8(context_ptr, "0067-12-1", 9), -60023980800000); + EXPECT_EQ(castDATE_utf8(context_ptr, "00067-12-1", 10), -60023980800000); + EXPECT_EQ(castDATE_utf8(context_ptr, "167-12-1", 8), -56868307200000); + + EXPECT_EQ(castDATE_utf8(context_ptr, "1972-12-1", 9), 92016000000); + EXPECT_EQ(castDATE_utf8(context_ptr, "72-12-1", 7), 92016000000); + + EXPECT_EQ(castDATE_utf8(context_ptr, "1972222222", 10), 0); + EXPECT_EQ(context.get_error(), "Not a valid date value 1972222222"); + context.Reset(); + + EXPECT_EQ(castDATE_utf8(context_ptr, "blahblah", 8), 0); + EXPECT_EQ(castDATE_utf8(context_ptr, "1967-12-1bb", 11), -65836800000); + + EXPECT_EQ(castDATE_utf8(context_ptr, "67-12-1", 7), 3089923200000); + EXPECT_EQ(castDATE_utf8(context_ptr, "67-1-1", 6), 3061065600000); + EXPECT_EQ(castDATE_utf8(context_ptr, "71-1-1", 6), 31536000000); + EXPECT_EQ(castDATE_utf8(context_ptr, "71-45-1", 7), 0); + EXPECT_EQ(castDATE_utf8(context_ptr, "71-12-XX", 8), 0); + + EXPECT_EQ(castDATE_date32(1), 86400000); +} + +TEST(TestTime, TestCastTimestamp) { + ExecutionContext context; + int64_t context_ptr = reinterpret_cast<int64_t>(&context); + + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "1967-12-1", 9), -65836800000); + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2067-12-1", 9), 3089923200000); + + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "7-12-1", 6), 1196467200000); + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "67-12-1", 7), 3089923200000); + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "067-12-1", 8), 3089923200000); + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "0067-12-1", 9), -60023980800000); + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "00067-12-1", 10), -60023980800000); + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "167-12-1", 8), -56868307200000); + + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "1972-12-1", 9), 92016000000); + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "72-12-1", 7), 92016000000); + + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "1972-12-1", 9), 92016000000); + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "67-12-1", 7), 3089923200000); + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "67-1-1", 6), 3061065600000); + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "71-1-1", 6), 31536000000); + + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30", 18), 969702330000); + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30.920", 22), 969702330920); + + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30.920 +08:00", 29), + 969673530920); + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30.920 -11:45", 29), + 969744630920); + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "65-03-04 00:20:40.920 +00:30", 28), + 3003349840920); + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "1932-05-18 11:30:00.920 +11:30", 30), + -1187308799080); + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "1857-02-11 20:31:40.920 -05:30", 30), + -3562264699080); + EXPECT_EQ(castTIMESTAMP_date64( + castDATE_utf8(context_ptr, "2000-09-23 9:45:30.920 +08:00", 29)), + castTIMESTAMP_utf8(context_ptr, "2000-09-23 0:00:00.000 +00:00", 29)); + + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30.1", 20), + castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30", 18) + 100); + + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30.10", 20), + castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30", 18) + 100); + + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30.100", 20), + castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30", 18) + 100); + + // error cases + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 24:00:00", 19), 0); + EXPECT_EQ(context.get_error(), + "Not a valid time for timestamp value 2000-01-01 24:00:00"); + context.Reset(); + + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:60:00", 19), 0); + EXPECT_EQ(context.get_error(), + "Not a valid time for timestamp value 2000-01-01 00:60:00"); + context.Reset(); + + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:100", 20), 0); + EXPECT_EQ(context.get_error(), + "Not a valid time for timestamp value 2000-01-01 00:00:100"); + context.Reset(); + + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.0001", 24), 0); + EXPECT_EQ(context.get_error(), + "Invalid millis for timestamp value 2000-01-01 00:00:00.0001"); + context.Reset(); + + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.1000", 24), 0); + EXPECT_EQ(context.get_error(), + "Invalid millis for timestamp value 2000-01-01 00:00:00.1000"); + context.Reset(); +} + +#ifndef _WIN32 + +// TODO(wesm): ARROW-4495. Need to address TZ database issues on Windows + +TEST(TestTime, TestCastTimestampWithTZ) { + ExecutionContext context; + int64_t context_ptr = reinterpret_cast<int64_t>(&context); + + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30.920 Canada/Pacific", 37), + 969727530920); + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2012-02-28 23:30:59 Asia/Kolkata", 32), + 1330452059000); + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "1923-10-07 03:03:03 America/New_York", 36), + -1459094217000); +} + +TEST(TestTime, TestCastTimestampErrors) { + ExecutionContext context; + int64_t context_ptr = reinterpret_cast<int64_t>(&context); + + // error cases + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "20000923", 8), 0); + EXPECT_EQ(context.get_error(), "Not a valid day for timestamp value 20000923"); + context.Reset(); + + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-2b", 10), 0); + EXPECT_EQ(context.get_error(), + "Invalid timestamp or unknown zone for timestamp value 2000-09-2b"); + context.Reset(); + + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30.920 Unknown/Zone", 35), + 0); + EXPECT_EQ(context.get_error(), + "Invalid timestamp or unknown zone for timestamp value 2000-09-23 " + "9:45:30.920 Unknown/Zone"); + context.Reset(); +} + +#endif + +TEST(TestTime, TestExtractTime) { + // 10:20:33 + gdv_int32 time_as_millis_in_day = 37233000; + + EXPECT_EQ(extractHour_time32(time_as_millis_in_day), 10); + EXPECT_EQ(extractMinute_time32(time_as_millis_in_day), 20); + EXPECT_EQ(extractSecond_time32(time_as_millis_in_day), 33); +} + +TEST(TestTime, TestTimestampDiffMonth) { + gdv_timestamp ts1 = StringToTimestamp("2019-06-30 00:00:00"); + gdv_timestamp ts2 = StringToTimestamp("2019-05-31 00:00:00"); + EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), -1); + + ts1 = StringToTimestamp("2019-06-30 00:00:00"); + ts2 = StringToTimestamp("2019-02-28 00:00:00"); + EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), -4); + + ts1 = StringToTimestamp("2019-06-30 00:00:00"); + ts2 = StringToTimestamp("2019-03-31 00:00:00"); + EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), -3); + + ts1 = StringToTimestamp("2019-06-30 00:00:00"); + ts2 = StringToTimestamp("2019-06-30 00:00:00"); + EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), 0); + + ts1 = StringToTimestamp("2019-06-30 00:00:00"); + ts2 = StringToTimestamp("2019-07-31 00:00:00"); + EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), 1); + + ts1 = StringToTimestamp("2019-06-30 00:00:00"); + ts2 = StringToTimestamp("2019-07-30 00:00:00"); + EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), 1); + + ts1 = StringToTimestamp("2019-06-30 00:00:00"); + ts2 = StringToTimestamp("2019-07-29 00:00:00"); + EXPECT_EQ(timestampdiffMonth_timestamp_timestamp(ts1, ts2), 0); +} + +TEST(TestTime, TestExtractTimestamp) { + gdv_timestamp ts = StringToTimestamp("1970-05-02 10:20:33"); + + EXPECT_EQ(extractMillennium_timestamp(ts), 2); + EXPECT_EQ(extractCentury_timestamp(ts), 20); + EXPECT_EQ(extractDecade_timestamp(ts), 197); + EXPECT_EQ(extractYear_timestamp(ts), 1970); + EXPECT_EQ(extractDoy_timestamp(ts), 122); + EXPECT_EQ(extractMonth_timestamp(ts), 5); + EXPECT_EQ(extractDow_timestamp(ts), 7); + EXPECT_EQ(extractDay_timestamp(ts), 2); + EXPECT_EQ(extractHour_timestamp(ts), 10); + EXPECT_EQ(extractMinute_timestamp(ts), 20); + EXPECT_EQ(extractSecond_timestamp(ts), 33); +} + +TEST(TestTime, TimeStampTrunc) { + EXPECT_EQ(date_trunc_Second_date64(StringToTimestamp("2015-05-05 10:20:34")), + StringToTimestamp("2015-05-05 10:20:34")); + EXPECT_EQ(date_trunc_Minute_date64(StringToTimestamp("2015-05-05 10:20:34")), + StringToTimestamp("2015-05-05 10:20:00")); + EXPECT_EQ(date_trunc_Hour_date64(StringToTimestamp("2015-05-05 10:20:34")), + StringToTimestamp("2015-05-05 10:00:00")); + EXPECT_EQ(date_trunc_Day_date64(StringToTimestamp("2015-05-05 10:20:34")), + StringToTimestamp("2015-05-05 00:00:00")); + EXPECT_EQ(date_trunc_Month_date64(StringToTimestamp("2015-05-05 10:20:34")), + StringToTimestamp("2015-05-01 00:00:00")); + EXPECT_EQ(date_trunc_Quarter_date64(StringToTimestamp("2015-05-05 10:20:34")), + StringToTimestamp("2015-04-01 00:00:00")); + EXPECT_EQ(date_trunc_Year_date64(StringToTimestamp("2015-05-05 10:20:34")), + StringToTimestamp("2015-01-01 00:00:00")); + EXPECT_EQ(date_trunc_Decade_date64(StringToTimestamp("2015-05-05 10:20:34")), + StringToTimestamp("2010-01-01 00:00:00")); + EXPECT_EQ(date_trunc_Century_date64(StringToTimestamp("2115-05-05 10:20:34")), + StringToTimestamp("2101-01-01 00:00:00")); + EXPECT_EQ(date_trunc_Millennium_date64(StringToTimestamp("2115-05-05 10:20:34")), + StringToTimestamp("2001-01-01 00:00:00")); + + // truncate week going to previous year + EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2011-01-01 10:10:10")), + StringToTimestamp("2010-12-27 00:00:00")); + EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2011-01-02 10:10:10")), + StringToTimestamp("2010-12-27 00:00:00")); + EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2011-01-03 10:10:10")), + StringToTimestamp("2011-01-03 00:00:00")); + EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2011-01-04 10:10:10")), + StringToTimestamp("2011-01-03 00:00:00")); + EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2011-01-05 10:10:10")), + StringToTimestamp("2011-01-03 00:00:00")); + EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2011-01-06 10:10:10")), + StringToTimestamp("2011-01-03 00:00:00")); + EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2011-01-07 10:10:10")), + StringToTimestamp("2011-01-03 00:00:00")); + EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2011-01-08 10:10:10")), + StringToTimestamp("2011-01-03 00:00:00")); + EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2011-01-09 10:10:10")), + StringToTimestamp("2011-01-03 00:00:00")); + + // truncate week for Feb in a leap year + EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2000-02-28 10:10:10")), + StringToTimestamp("2000-02-28 00:00:00")); + EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2000-02-29 10:10:10")), + StringToTimestamp("2000-02-28 00:00:00")); + EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2000-03-01 10:10:10")), + StringToTimestamp("2000-02-28 00:00:00")); + EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2000-03-02 10:10:10")), + StringToTimestamp("2000-02-28 00:00:00")); + EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2000-03-03 10:10:10")), + StringToTimestamp("2000-02-28 00:00:00")); + EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2000-03-04 10:10:10")), + StringToTimestamp("2000-02-28 00:00:00")); + EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2000-03-05 10:10:10")), + StringToTimestamp("2000-02-28 00:00:00")); + EXPECT_EQ(date_trunc_Week_timestamp(StringToTimestamp("2000-03-06 10:10:10")), + StringToTimestamp("2000-03-06 00:00:00")); +} + +TEST(TestTime, TimeStampAdd) { + EXPECT_EQ( + timestampaddSecond_int32_timestamp(30, StringToTimestamp("2000-05-01 10:20:34")), + StringToTimestamp("2000-05-01 10:21:04")); + + EXPECT_EQ( + timestampaddSecond_timestamp_int32(StringToTimestamp("2000-05-01 10:20:34"), 30), + StringToTimestamp("2000-05-01 10:21:04")); + + EXPECT_EQ( + timestampaddMinute_int64_timestamp(-30, StringToTimestamp("2000-05-01 10:20:34")), + StringToTimestamp("2000-05-01 09:50:34")); + + EXPECT_EQ( + timestampaddMinute_timestamp_int64(StringToTimestamp("2000-05-01 10:20:34"), -30), + StringToTimestamp("2000-05-01 09:50:34")); + + EXPECT_EQ( + timestampaddHour_int32_timestamp(20, StringToTimestamp("2000-05-01 10:20:34")), + StringToTimestamp("2000-05-02 06:20:34")); + + EXPECT_EQ( + timestampaddHour_timestamp_int32(StringToTimestamp("2000-05-01 10:20:34"), 20), + StringToTimestamp("2000-05-02 06:20:34")); + + EXPECT_EQ( + timestampaddDay_int64_timestamp(-35, StringToTimestamp("2000-05-01 10:20:34")), + StringToTimestamp("2000-03-27 10:20:34")); + + EXPECT_EQ( + timestampaddDay_timestamp_int64(StringToTimestamp("2000-05-01 10:20:34"), -35), + StringToTimestamp("2000-03-27 10:20:34")); + + EXPECT_EQ(timestampaddWeek_int32_timestamp(4, StringToTimestamp("2000-05-01 10:20:34")), + StringToTimestamp("2000-05-29 10:20:34")); + + EXPECT_EQ(timestampaddWeek_timestamp_int32(StringToTimestamp("2000-05-01 10:20:34"), 4), + StringToTimestamp("2000-05-29 10:20:34")); + + EXPECT_EQ(timestampaddWeek_timestamp_int32(StringToTimestamp("2000-05-01 10:20:34"), 4), + StringToTimestamp("2000-05-29 10:20:34")); + + EXPECT_EQ( + timestampaddMonth_int64_timestamp(10, StringToTimestamp("2000-05-01 10:20:34")), + StringToTimestamp("2001-03-01 10:20:34")); + + EXPECT_EQ( + timestampaddMonth_int64_timestamp(1, StringToTimestamp("2000-01-31 10:20:34")), + StringToTimestamp("2000-2-29 10:20:34")); + EXPECT_EQ( + timestampaddMonth_int64_timestamp(13, StringToTimestamp("2001-01-31 10:20:34")), + StringToTimestamp("2002-02-28 10:20:34")); + + EXPECT_EQ( + timestampaddMonth_int64_timestamp(11, StringToTimestamp("2000-05-31 10:20:34")), + StringToTimestamp("2001-04-30 10:20:34")); + + EXPECT_EQ( + timestampaddMonth_timestamp_int64(StringToTimestamp("2000-05-31 10:20:34"), 11), + StringToTimestamp("2001-04-30 10:20:34")); + + EXPECT_EQ( + timestampaddQuarter_int32_timestamp(-2, StringToTimestamp("2000-05-01 10:20:34")), + StringToTimestamp("1999-11-01 10:20:34")); + + EXPECT_EQ(timestampaddYear_int64_timestamp(2, StringToTimestamp("2000-05-01 10:20:34")), + StringToTimestamp("2002-05-01 10:20:34")); + + EXPECT_EQ( + timestampaddQuarter_int32_timestamp(-5, StringToTimestamp("2000-05-01 10:20:34")), + StringToTimestamp("1999-02-01 10:20:34")); + EXPECT_EQ( + timestampaddQuarter_int32_timestamp(-6, StringToTimestamp("2000-05-01 10:20:34")), + StringToTimestamp("1998-11-01 10:20:34")); + + // date_add + EXPECT_EQ(date_add_int32_timestamp(7, StringToTimestamp("2000-05-01 00:00:00")), + StringToTimestamp("2000-05-08 00:00:00")); + + EXPECT_EQ(add_int32_timestamp(4, StringToTimestamp("2000-05-01 00:00:00")), + StringToTimestamp("2000-05-05 00:00:00")); + + EXPECT_EQ(add_int64_timestamp(7, StringToTimestamp("2000-05-01 00:00:00")), + StringToTimestamp("2000-05-08 00:00:00")); + + EXPECT_EQ(date_add_int64_timestamp(4, StringToTimestamp("2000-05-01 00:00:00")), + StringToTimestamp("2000-05-05 00:00:00")); + + EXPECT_EQ(date_add_int64_timestamp(4, StringToTimestamp("2000-02-27 00:00:00")), + StringToTimestamp("2000-03-02 00:00:00")); + + EXPECT_EQ(add_date64_int64(StringToTimestamp("2000-02-27 00:00:00"), 4), + StringToTimestamp("2000-03-02 00:00:00")); + + // date_sub + EXPECT_EQ(date_sub_timestamp_int32(StringToTimestamp("2000-05-01 00:00:00"), 7), + StringToTimestamp("2000-04-24 00:00:00")); + + EXPECT_EQ(subtract_timestamp_int32(StringToTimestamp("2000-05-01 00:00:00"), -7), + StringToTimestamp("2000-05-08 00:00:00")); + + EXPECT_EQ(date_diff_timestamp_int64(StringToTimestamp("2000-05-01 00:00:00"), 365), + StringToTimestamp("1999-05-02 00:00:00")); + + EXPECT_EQ(date_diff_timestamp_int64(StringToTimestamp("2000-03-01 00:00:00"), 1), + StringToTimestamp("2000-02-29 00:00:00")); + + EXPECT_EQ(date_diff_timestamp_int64(StringToTimestamp("2000-02-29 00:00:00"), 365), + StringToTimestamp("1999-03-01 00:00:00")); +} + +// test cases from http://www.staff.science.uu.nl/~gent0113/calendar/isocalendar.htm +TEST(TestTime, TestExtractWeek) { + std::vector<std::string> data; + + // A type + // Jan 1, 2 and 3 + data.push_back("2006-01-01 10:10:10"); + data.push_back("52"); + data.push_back("2006-01-02 10:10:10"); + data.push_back("1"); + data.push_back("2006-01-03 10:10:10"); + data.push_back("1"); + // middle, Monday and Sunday + data.push_back("2006-04-24 10:10:10"); + data.push_back("17"); + data.push_back("2006-04-30 10:10:10"); + data.push_back("17"); + // Dec 29-31 + data.push_back("2006-12-29 10:10:10"); + data.push_back("52"); + data.push_back("2006-12-30 10:10:10"); + data.push_back("52"); + data.push_back("2006-12-31 10:10:10"); + data.push_back("52"); + // B(C) type + // Jan 1, 2 and 3 + data.push_back("2011-01-01 10:10:10"); + data.push_back("52"); + data.push_back("2011-01-02 10:10:10"); + data.push_back("52"); + data.push_back("2011-01-03 10:10:10"); + data.push_back("1"); + // middle, Monday and Sunday + data.push_back("2011-07-18 10:10:10"); + data.push_back("29"); + data.push_back("2011-07-24 10:10:10"); + data.push_back("29"); + // Dec 29-31 + data.push_back("2011-12-29 10:10:10"); + data.push_back("52"); + data.push_back("2011-12-30 10:10:10"); + data.push_back("52"); + data.push_back("2011-12-31 10:10:10"); + data.push_back("52"); + // B(DC) type + // Jan 1, 2 and 3 + data.push_back("2005-01-01 10:10:10"); + data.push_back("53"); + data.push_back("2005-01-02 10:10:10"); + data.push_back("53"); + data.push_back("2005-01-03 10:10:10"); + data.push_back("1"); + // middle, Monday and Sunday + data.push_back("2005-11-07 10:10:10"); + data.push_back("45"); + data.push_back("2005-11-13 10:10:10"); + data.push_back("45"); + // Dec 29-31 + data.push_back("2005-12-29 10:10:10"); + data.push_back("52"); + data.push_back("2005-12-30 10:10:10"); + data.push_back("52"); + data.push_back("2005-12-31 10:10:10"); + data.push_back("52"); + // C type + // Jan 1, 2 and 3 + data.push_back("2010-01-01 10:10:10"); + data.push_back("53"); + data.push_back("2010-01-02 10:10:10"); + data.push_back("53"); + data.push_back("2010-01-03 10:10:10"); + data.push_back("53"); + // middle, Monday and Sunday + data.push_back("2010-09-13 10:10:10"); + data.push_back("37"); + data.push_back("2010-09-19 10:10:10"); + data.push_back("37"); + // Dec 29-31 + data.push_back("2010-12-29 10:10:10"); + data.push_back("52"); + data.push_back("2010-12-30 10:10:10"); + data.push_back("52"); + data.push_back("2010-12-31 10:10:10"); + data.push_back("52"); + // D type + // Jan 1, 2 and 3 + data.push_back("2037-01-01 10:10:10"); + data.push_back("1"); + data.push_back("2037-01-02 10:10:10"); + data.push_back("1"); + data.push_back("2037-01-03 10:10:10"); + data.push_back("1"); + // middle, Monday and Sunday + data.push_back("2037-08-17 10:10:10"); + data.push_back("34"); + data.push_back("2037-08-23 10:10:10"); + data.push_back("34"); + // Dec 29-31 + data.push_back("2037-12-29 10:10:10"); + data.push_back("53"); + data.push_back("2037-12-30 10:10:10"); + data.push_back("53"); + data.push_back("2037-12-31 10:10:10"); + data.push_back("53"); + // E type + // Jan 1, 2 and 3 + data.push_back("2014-01-01 10:10:10"); + data.push_back("1"); + data.push_back("2014-01-02 10:10:10"); + data.push_back("1"); + data.push_back("2014-01-03 10:10:10"); + data.push_back("1"); + // middle, Monday and Sunday + data.push_back("2014-01-13 10:10:10"); + data.push_back("3"); + data.push_back("2014-01-19 10:10:10"); + data.push_back("3"); + // Dec 29-31 + data.push_back("2014-12-29 10:10:10"); + data.push_back("1"); + data.push_back("2014-12-30 10:10:10"); + data.push_back("1"); + data.push_back("2014-12-31 10:10:10"); + data.push_back("1"); + // F type + // Jan 1, 2 and 3 + data.push_back("2019-01-01 10:10:10"); + data.push_back("1"); + data.push_back("2019-01-02 10:10:10"); + data.push_back("1"); + data.push_back("2019-01-03 10:10:10"); + data.push_back("1"); + // middle, Monday and Sunday + data.push_back("2019-02-11 10:10:10"); + data.push_back("7"); + data.push_back("2019-02-17 10:10:10"); + data.push_back("7"); + // Dec 29-31 + data.push_back("2019-12-29 10:10:10"); + data.push_back("52"); + data.push_back("2019-12-30 10:10:10"); + data.push_back("1"); + data.push_back("2019-12-31 10:10:10"); + data.push_back("1"); + // G type + // Jan 1, 2 and 3 + data.push_back("2001-01-01 10:10:10"); + data.push_back("1"); + data.push_back("2001-01-02 10:10:10"); + data.push_back("1"); + data.push_back("2001-01-03 10:10:10"); + data.push_back("1"); + // middle, Monday and Sunday + data.push_back("2001-03-19 10:10:10"); + data.push_back("12"); + data.push_back("2001-03-25 10:10:10"); + data.push_back("12"); + // Dec 29-31 + data.push_back("2001-12-29 10:10:10"); + data.push_back("52"); + data.push_back("2001-12-30 10:10:10"); + data.push_back("52"); + data.push_back("2001-12-31 10:10:10"); + data.push_back("1"); + // AG type + // Jan 1, 2 and 3 + data.push_back("2012-01-01 10:10:10"); + data.push_back("52"); + data.push_back("2012-01-02 10:10:10"); + data.push_back("1"); + data.push_back("2012-01-03 10:10:10"); + data.push_back("1"); + // middle, Monday and Sunday + data.push_back("2012-04-02 10:10:10"); + data.push_back("14"); + data.push_back("2012-04-08 10:10:10"); + data.push_back("14"); + // Dec 29-31 + data.push_back("2012-12-29 10:10:10"); + data.push_back("52"); + data.push_back("2012-12-30 10:10:10"); + data.push_back("52"); + data.push_back("2012-12-31 10:10:10"); + data.push_back("1"); + // BA type + // Jan 1, 2 and 3 + data.push_back("2000-01-01 10:10:10"); + data.push_back("52"); + data.push_back("2000-01-02 10:10:10"); + data.push_back("52"); + data.push_back("2000-01-03 10:10:10"); + data.push_back("1"); + // middle, Monday and Sunday + data.push_back("2000-05-22 10:10:10"); + data.push_back("21"); + data.push_back("2000-05-28 10:10:10"); + data.push_back("21"); + // Dec 29-31 + data.push_back("2000-12-29 10:10:10"); + data.push_back("52"); + data.push_back("2000-12-30 10:10:10"); + data.push_back("52"); + data.push_back("2000-12-31 10:10:10"); + data.push_back("52"); + // CB type + // Jan 1, 2 and 3 + data.push_back("2016-01-01 10:10:10"); + data.push_back("53"); + data.push_back("2016-01-02 10:10:10"); + data.push_back("53"); + data.push_back("2016-01-03 10:10:10"); + data.push_back("53"); + // middle, Monday and Sunday + data.push_back("2016-06-20 10:10:10"); + data.push_back("25"); + data.push_back("2016-06-26 10:10:10"); + data.push_back("25"); + // Dec 29-31 + data.push_back("2016-12-29 10:10:10"); + data.push_back("52"); + data.push_back("2016-12-30 10:10:10"); + data.push_back("52"); + data.push_back("2016-12-31 10:10:10"); + data.push_back("52"); + // DC type + // Jan 1, 2 and 3 + data.push_back("2004-01-01 10:10:10"); + data.push_back("1"); + data.push_back("2004-01-02 10:10:10"); + data.push_back("1"); + data.push_back("2004-01-03 10:10:10"); + data.push_back("1"); + // middle, Monday and Sunday + data.push_back("2004-07-19 10:10:10"); + data.push_back("30"); + data.push_back("2004-07-25 10:10:10"); + data.push_back("30"); + // Dec 29-31 + data.push_back("2004-12-29 10:10:10"); + data.push_back("53"); + data.push_back("2004-12-30 10:10:10"); + data.push_back("53"); + data.push_back("2004-12-31 10:10:10"); + data.push_back("53"); + // ED type + // Jan 1, 2 and 3 + data.push_back("2020-01-01 10:10:10"); + data.push_back("1"); + data.push_back("2020-01-02 10:10:10"); + data.push_back("1"); + data.push_back("2020-01-03 10:10:10"); + data.push_back("1"); + // middle, Monday and Sunday + data.push_back("2020-08-17 10:10:10"); + data.push_back("34"); + data.push_back("2020-08-23 10:10:10"); + data.push_back("34"); + // Dec 29-31 + data.push_back("2020-12-29 10:10:10"); + data.push_back("53"); + data.push_back("2020-12-30 10:10:10"); + data.push_back("53"); + data.push_back("2020-12-31 10:10:10"); + data.push_back("53"); + // FE type + // Jan 1, 2 and 3 + data.push_back("2008-01-01 10:10:10"); + data.push_back("1"); + data.push_back("2008-01-02 10:10:10"); + data.push_back("1"); + data.push_back("2008-01-03 10:10:10"); + data.push_back("1"); + // middle, Monday and Sunday + data.push_back("2008-09-15 10:10:10"); + data.push_back("38"); + data.push_back("2008-09-21 10:10:10"); + data.push_back("38"); + // Dec 29-31 + data.push_back("2008-12-29 10:10:10"); + data.push_back("1"); + data.push_back("2008-12-30 10:10:10"); + data.push_back("1"); + data.push_back("2008-12-31 10:10:10"); + data.push_back("1"); + // GF type + // Jan 1, 2 and 3 + data.push_back("2024-01-01 10:10:10"); + data.push_back("1"); + data.push_back("2024-01-02 10:10:10"); + data.push_back("1"); + data.push_back("2024-01-03 10:10:10"); + data.push_back("1"); + // middle, Monday and Sunday + data.push_back("2024-10-07 10:10:10"); + data.push_back("41"); + data.push_back("2024-10-13 10:10:10"); + data.push_back("41"); + // Dec 29-31 + data.push_back("2024-12-29 10:10:10"); + data.push_back("52"); + data.push_back("2024-12-30 10:10:10"); + data.push_back("1"); + data.push_back("2024-12-31 10:10:10"); + data.push_back("1"); + + for (uint32_t i = 0; i < data.size(); i += 2) { + gdv_timestamp ts = StringToTimestamp(data.at(i).c_str()); + gdv_int64 exp = atol(data.at(i + 1).c_str()); + EXPECT_EQ(extractWeek_timestamp(ts), exp); + } +} + +TEST(TestTime, TestMonthsBetween) { + std::vector<std::string> testStrings = { + "1995-03-02 00:00:00", "1995-02-02 00:00:00", "1.0", + "1995-02-02 00:00:00", "1995-03-02 00:00:00", "-1.0", + "1995-03-31 00:00:00", "1995-02-28 00:00:00", "1.0", + "1996-03-31 00:00:00", "1996-02-28 00:00:00", "1.09677418", + "1996-03-31 00:00:00", "1996-02-29 00:00:00", "1.0", + "1996-05-31 00:00:00", "1996-04-30 00:00:00", "1.0", + "1996-05-31 00:00:00", "1996-03-31 00:00:00", "2.0", + "1996-05-31 00:00:00", "1996-03-30 00:00:00", "2.03225806", + "1996-03-15 00:00:00", "1996-02-14 00:00:00", "1.03225806", + "1995-02-02 00:00:00", "1995-01-01 00:00:00", "1.03225806", + "1995-02-02 10:00:00", "1995-01-01 11:00:00", "1.03091397"}; + + for (uint32_t i = 0; i < testStrings.size();) { + gdv_timestamp endTs = StringToTimestamp(testStrings[i++].c_str()); + gdv_timestamp startTs = StringToTimestamp(testStrings[i++].c_str()); + + double expectedResult = atof(testStrings[i++].c_str()); + double actualResult = months_between_timestamp_timestamp(endTs, startTs); + + double diff = actualResult - expectedResult; + if (diff < 0) { + diff = expectedResult - actualResult; + } + + EXPECT_TRUE(diff < 0.001); + } +} + +TEST(TestTime, castVarcharTimestamp) { + ExecutionContext context; + int64_t context_ptr = reinterpret_cast<int64_t>(&context); + gdv_int32 out_len; + gdv_timestamp ts = StringToTimestamp("2000-05-01 10:20:34"); + const char* out = castVARCHAR_timestamp_int64(context_ptr, ts, 30L, &out_len); + EXPECT_EQ(std::string(out, out_len), "2000-05-01 10:20:34.000"); + + out = castVARCHAR_timestamp_int64(context_ptr, ts, 19L, &out_len); + EXPECT_EQ(std::string(out, out_len), "2000-05-01 10:20:34"); + + out = castVARCHAR_timestamp_int64(context_ptr, ts, 0L, &out_len); + EXPECT_EQ(std::string(out, out_len), ""); + + ts = StringToTimestamp("2-5-1 00:00:04"); + out = castVARCHAR_timestamp_int64(context_ptr, ts, 24L, &out_len); + EXPECT_EQ(std::string(out, out_len), "0002-05-01 00:00:04.000"); +} + +TEST(TestTime, TestCastTimestampToDate) { + gdv_timestamp ts = StringToTimestamp("2000-05-01 10:20:34"); + auto out = castDATE_timestamp(ts); + EXPECT_EQ(StringToTimestamp("2000-05-01 00:00:00"), out); +} + +TEST(TestTime, TestCastTimestampToTime) { + gdv_timestamp ts = StringToTimestamp("2000-05-01 10:20:34"); + auto expected_response = + static_cast<int32_t>(ts - StringToTimestamp("2000-05-01 00:00:00")); + auto out = castTIME_timestamp(ts); + EXPECT_EQ(expected_response, out); + + // Test when the defined value is midnight, so the returned value must 0 + ts = StringToTimestamp("1998-12-01 00:00:00"); + expected_response = 0; + out = castTIME_timestamp(ts); + EXPECT_EQ(expected_response, out); + + ts = StringToTimestamp("2015-09-16 23:59:59"); + expected_response = static_cast<int32_t>(ts - StringToTimestamp("2015-09-16 00:00:00")); + out = castTIME_timestamp(ts); + EXPECT_EQ(expected_response, out); +} + +TEST(TestTime, TestLastDay) { + // leap year test + gdv_timestamp ts = StringToTimestamp("2016-02-11 03:20:34"); + auto out = last_day_from_timestamp(ts); + EXPECT_EQ(StringToTimestamp("2016-02-29 00:00:00"), out); + + ts = StringToTimestamp("2016-02-29 23:59:59"); + out = last_day_from_timestamp(ts); + EXPECT_EQ(StringToTimestamp("2016-02-29 00:00:00"), out); + + ts = StringToTimestamp("2016-01-30 23:59:00"); + out = last_day_from_timestamp(ts); + EXPECT_EQ(StringToTimestamp("2016-01-31 00:00:00"), out); + + // normal year + ts = StringToTimestamp("2017-02-03 23:59:59"); + out = last_day_from_timestamp(ts); + EXPECT_EQ(StringToTimestamp("2017-02-28 00:00:00"), out); + + // december + ts = StringToTimestamp("2015-12-03 03:12:59"); + out = last_day_from_timestamp(ts); + EXPECT_EQ(StringToTimestamp("2015-12-31 00:00:00"), out); +} + +TEST(TestTime, TestToTimestamp) { + auto ts = StringToTimestamp("1970-01-01 00:00:00"); + EXPECT_EQ(ts, to_timestamp_int32(0)); + EXPECT_EQ(ts, to_timestamp_int64(0)); + EXPECT_EQ(ts, to_timestamp_float32(0)); + EXPECT_EQ(ts, to_timestamp_float64(0)); + + ts = StringToTimestamp("1970-01-01 00:00:01"); + EXPECT_EQ(ts, to_timestamp_int32(1)); + EXPECT_EQ(ts, to_timestamp_int64(1)); + EXPECT_EQ(ts, to_timestamp_float32(1)); + EXPECT_EQ(ts, to_timestamp_float64(1)); + + ts = StringToTimestamp("1970-01-01 00:01:00"); + EXPECT_EQ(ts, to_timestamp_int32(60)); + EXPECT_EQ(ts, to_timestamp_int64(60)); + EXPECT_EQ(ts, to_timestamp_float32(60)); + EXPECT_EQ(ts, to_timestamp_float64(60)); + + ts = StringToTimestamp("1970-01-01 01:00:00"); + EXPECT_EQ(ts, to_timestamp_int32(3600)); + EXPECT_EQ(ts, to_timestamp_int64(3600)); + EXPECT_EQ(ts, to_timestamp_float32(3600)); + EXPECT_EQ(ts, to_timestamp_float64(3600)); + + ts = StringToTimestamp("1970-01-02 00:00:00"); + EXPECT_EQ(ts, to_timestamp_int32(86400)); + EXPECT_EQ(ts, to_timestamp_int64(86400)); + EXPECT_EQ(ts, to_timestamp_float32(86400)); + EXPECT_EQ(ts, to_timestamp_float64(86400)); + + // tests with fractional part + ts = StringToTimestamp("1970-01-01 00:00:01") + 500; + EXPECT_EQ(ts, to_timestamp_float32(1.500f)); + EXPECT_EQ(ts, to_timestamp_float64(1.500)); + + ts = StringToTimestamp("1970-01-01 00:01:01") + 600; + EXPECT_EQ(ts, to_timestamp_float32(61.600f)); + EXPECT_EQ(ts, to_timestamp_float64(61.600)); + + ts = StringToTimestamp("1970-01-01 01:00:01") + 400; + EXPECT_EQ(ts, to_timestamp_float32(3601.400f)); + EXPECT_EQ(ts, to_timestamp_float64(3601.400)); +} + +TEST(TestTime, TestToTimeNumeric) { + // input timestamp in seconds: 1970-01-01 00:00:00 + int64_t expected_output = 0; // 0 milliseconds + EXPECT_EQ(expected_output, to_time_int32(0)); + EXPECT_EQ(expected_output, to_time_int64(0)); + EXPECT_EQ(expected_output, to_time_float32(0.000f)); + EXPECT_EQ(expected_output, to_time_float64(0.000)); + + // input timestamp in seconds: 1970-01-01 00:00:01 + expected_output = 1000; // 1 seconds + EXPECT_EQ(expected_output, to_time_int32(1)); + EXPECT_EQ(expected_output, to_time_int64(1)); + EXPECT_EQ(expected_output, to_time_float32(1.000f)); + EXPECT_EQ(expected_output, to_time_float64(1.000)); + + // input timestamp in seconds: 1970-01-01 01:00:00 + expected_output = 3600000; // 3600 seconds + EXPECT_EQ(expected_output, to_time_int32(3600)); + EXPECT_EQ(expected_output, to_time_int64(3600)); + EXPECT_EQ(expected_output, to_time_float32(3600.000f)); + EXPECT_EQ(expected_output, to_time_float64(3600.000)); + + // input timestamp in seconds: 1970-01-01 23:59:59 + expected_output = 86399000; // 86399 seconds + EXPECT_EQ(expected_output, to_time_int32(86399)); + EXPECT_EQ(expected_output, to_time_int64(86399)); + EXPECT_EQ(expected_output, to_time_float32(86399.000f)); + EXPECT_EQ(expected_output, to_time_float64(86399.000)); + + // input timestamp in seconds: 2020-01-01 00:00:01 + expected_output = 1000; // 1 second + EXPECT_EQ(expected_output, to_time_int64(1577836801)); + EXPECT_EQ(expected_output, to_time_float64(1577836801.000)); + + // tests with fractional part + // input timestamp in seconds: 1970-01-01 00:00:01.500 + expected_output = 1500; // 1.5 seconds + EXPECT_EQ(expected_output, to_time_float32(1.500f)); + EXPECT_EQ(expected_output, to_time_float64(1.500)); + + // input timestamp in seconds: 1970-01-01 00:01:01.500 + expected_output = 61500; // 61.5 seconds + EXPECT_EQ(expected_output, to_time_float32(61.500f)); + EXPECT_EQ(expected_output, to_time_float64(61.500)); + + // input timestamp in seconds: 1970-01-01 01:00:01.500 + expected_output = 3601500; // 3601.5 seconds + EXPECT_EQ(expected_output, to_time_float32(3601.500f)); + EXPECT_EQ(expected_output, to_time_float64(3601.500)); +} + +TEST(TestTime, TestCastIntDayInterval) { + EXPECT_EQ(castBIGINT_daytimeinterval(10), 864000000); + EXPECT_EQ(castBIGINT_daytimeinterval(-100), -8640000001); + EXPECT_EQ(castBIGINT_daytimeinterval(-0), 0); +} + +TEST(TestTime, TestCastIntYearInterval) { + EXPECT_EQ(castINT_year_interval(24), 2); + EXPECT_EQ(castINT_year_interval(-24), -2); + EXPECT_EQ(castINT_year_interval(-23), -1); + + EXPECT_EQ(castBIGINT_year_interval(24), 2); + EXPECT_EQ(castBIGINT_year_interval(-24), -2); + EXPECT_EQ(castBIGINT_year_interval(-23), -1); +} + +TEST(TestTime, TestCastNullableInterval) { + ExecutionContext context; + auto context_ptr = reinterpret_cast<int64_t>(&context); + // Test castNULLABLEINTERVALDAY for int and bigint + EXPECT_EQ(castNULLABLEINTERVALDAY_int32(1), 1); + EXPECT_EQ(castNULLABLEINTERVALDAY_int32(12), 12); + EXPECT_EQ(castNULLABLEINTERVALDAY_int32(-55), -55); + EXPECT_EQ(castNULLABLEINTERVALDAY_int32(-1201), -1201); + EXPECT_EQ(castNULLABLEINTERVALDAY_int64(1), 1); + EXPECT_EQ(castNULLABLEINTERVALDAY_int64(12), 12); + EXPECT_EQ(castNULLABLEINTERVALDAY_int64(-55), -55); + EXPECT_EQ(castNULLABLEINTERVALDAY_int64(-1201), -1201); + + // Test castNULLABLEINTERVALYEAR for int and bigint + EXPECT_EQ(castNULLABLEINTERVALYEAR_int32(context_ptr, 1), 1); + EXPECT_EQ(castNULLABLEINTERVALYEAR_int32(context_ptr, 12), 12); + EXPECT_EQ(castNULLABLEINTERVALYEAR_int32(context_ptr, 55), 55); + EXPECT_EQ(castNULLABLEINTERVALYEAR_int32(context_ptr, 1201), 1201); + EXPECT_EQ(castNULLABLEINTERVALYEAR_int64(context_ptr, 1), 1); + EXPECT_EQ(castNULLABLEINTERVALYEAR_int64(context_ptr, 12), 12); + EXPECT_EQ(castNULLABLEINTERVALYEAR_int64(context_ptr, 55), 55); + EXPECT_EQ(castNULLABLEINTERVALYEAR_int64(context_ptr, 1201), 1201); + // validate overflow error when using bigint as input + castNULLABLEINTERVALYEAR_int64(context_ptr, INT64_MAX); + EXPECT_EQ(context.get_error(), "Integer overflow"); + context.Reset(); +} + +} // namespace gandiva diff --git a/src/arrow/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc b/src/arrow/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc new file mode 100644 index 000000000..695605b3c --- /dev/null +++ b/src/arrow/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc @@ -0,0 +1,283 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "./epoch_time_point.h" + +// The first row is for non-leap years +static int days_in_a_month[2][12] = {{31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, + {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}}; + +bool is_leap_year(int yy) { + if ((yy % 4) != 0) { + // not divisible by 4 + return false; + } + // yy = 4x + if ((yy % 400) == 0) { + // yy = 400x + return true; + } + // yy = 4x, return true if yy != 100x + return ((yy % 100) != 0); +} + +bool is_last_day_of_month(const EpochTimePoint& tp) { + int matrix_index = is_leap_year(tp.TmYear()) ? 1 : 0; + + return (tp.TmMday() == days_in_a_month[matrix_index][tp.TmMon()]); +} + +bool did_days_overflow(arrow_vendored::date::year_month_day ymd) { + int year = static_cast<int>(ymd.year()); + int month = static_cast<unsigned int>(ymd.month()); + int days = static_cast<unsigned int>(ymd.day()); + + int matrix_index = is_leap_year(year) ? 1 : 0; + + return days > days_in_a_month[matrix_index][month - 1]; +} + +int last_possible_day_in_month(int year, int month) { + int matrix_index = is_leap_year(year) ? 1 : 0; + + return days_in_a_month[matrix_index][month - 1]; +} + +extern "C" { + +#include <time.h> + +#include "./time_constants.h" +#include "./types.h" + +#define TIMESTAMP_DIFF_FIXED_UNITS(TYPE, NAME, FROM_MILLIS) \ + FORCE_INLINE \ + gdv_int32 NAME##_##TYPE##_##TYPE(gdv_##TYPE start_millis, gdv_##TYPE end_millis) { \ + return static_cast<int32_t>(FROM_MILLIS(end_millis - start_millis)); \ + } + +#define SIGN_ADJUST_DIFF(is_positive, diff) ((is_positive) ? (diff) : -(diff)) +#define MONTHS_TO_TIMEUNIT(diff, num_months) (diff) / (num_months) + +// Assuming end_millis > start_millis, the algorithm to find the diff in months is: +// diff_in_months = year_diff * 12 + month_diff +// This is approximately correct, except when the last month has not fully elapsed +// +// a) If end_day > start_day, return diff_in_months e.g. diff(2015-09-10, 2017-03-31) +// b) If end_day < start_day, return diff_in_months - 1 e.g. diff(2015-09-30, 2017-03-10) +// c) If end_day = start_day, check for millis e.g. diff(2017-03-10, 2015-03-10) +// Need to check if end_millis_in_day > start_millis_in_day +// c1) If end_millis_in_day >= start_millis_in_day, return diff_in_months +// c2) else return diff_in_months - 1 +#define TIMESTAMP_DIFF_MONTH_UNITS(TYPE, NAME, N_MONTHS) \ + FORCE_INLINE \ + gdv_int32 NAME##_##TYPE##_##TYPE(gdv_##TYPE start_millis, gdv_##TYPE end_millis) { \ + gdv_int32 diff; \ + bool is_positive = (end_millis > start_millis); \ + if (!is_positive) { \ + /* if end_millis < start_millis, swap and multiply by -1 at the end */ \ + gdv_##TYPE tmp = start_millis; \ + start_millis = end_millis; \ + end_millis = tmp; \ + } \ + EpochTimePoint start_tm(start_millis); \ + EpochTimePoint end_tm(end_millis); \ + gdv_int32 months_diff; \ + months_diff = static_cast<gdv_int32>(12 * (end_tm.TmYear() - start_tm.TmYear()) + \ + (end_tm.TmMon() - start_tm.TmMon())); \ + if (end_tm.TmMday() > start_tm.TmMday()) { \ + /* case a */ \ + diff = MONTHS_TO_TIMEUNIT(months_diff, N_MONTHS); \ + return SIGN_ADJUST_DIFF(is_positive, diff); \ + } \ + if (end_tm.TmMday() < start_tm.TmMday()) { \ + /* case b */ \ + months_diff += (is_last_day_of_month(end_tm) ? 1 : 0); \ + diff = MONTHS_TO_TIMEUNIT(months_diff - 1, N_MONTHS); \ + return SIGN_ADJUST_DIFF(is_positive, diff); \ + } \ + gdv_int32 end_day_millis = \ + static_cast<gdv_int32>(end_tm.TmHour() * MILLIS_IN_HOUR + \ + end_tm.TmMin() * MILLIS_IN_MIN + end_tm.TmSec()); \ + gdv_int32 start_day_millis = \ + static_cast<gdv_int32>(start_tm.TmHour() * MILLIS_IN_HOUR + \ + start_tm.TmMin() * MILLIS_IN_MIN + start_tm.TmSec()); \ + if (end_day_millis >= start_day_millis) { \ + /* case c1 */ \ + diff = MONTHS_TO_TIMEUNIT(months_diff, N_MONTHS); \ + return SIGN_ADJUST_DIFF(is_positive, diff); \ + } \ + /* case c2 */ \ + diff = MONTHS_TO_TIMEUNIT(months_diff - 1, N_MONTHS); \ + return SIGN_ADJUST_DIFF(is_positive, diff); \ + } + +#define TIMESTAMP_DIFF(TYPE) \ + TIMESTAMP_DIFF_FIXED_UNITS(TYPE, timestampdiffSecond, MILLIS_TO_SEC) \ + TIMESTAMP_DIFF_FIXED_UNITS(TYPE, timestampdiffMinute, MILLIS_TO_MINS) \ + TIMESTAMP_DIFF_FIXED_UNITS(TYPE, timestampdiffHour, MILLIS_TO_HOUR) \ + TIMESTAMP_DIFF_FIXED_UNITS(TYPE, timestampdiffDay, MILLIS_TO_DAY) \ + TIMESTAMP_DIFF_FIXED_UNITS(TYPE, timestampdiffWeek, MILLIS_TO_WEEK) \ + TIMESTAMP_DIFF_MONTH_UNITS(TYPE, timestampdiffMonth, 1) \ + TIMESTAMP_DIFF_MONTH_UNITS(TYPE, timestampdiffQuarter, 3) \ + TIMESTAMP_DIFF_MONTH_UNITS(TYPE, timestampdiffYear, 12) + +TIMESTAMP_DIFF(timestamp) + +#define ADD_INT32_TO_TIMESTAMP_FIXED_UNITS(TYPE, NAME, TO_MILLIS) \ + FORCE_INLINE \ + gdv_##TYPE NAME##_int32_##TYPE(gdv_int32 count, gdv_##TYPE millis) { \ + return millis + TO_MILLIS * static_cast<gdv_##TYPE>(count); \ + } + +// Documentation of mktime suggests that it handles +// TmMon() being negative, and also TmMon() being >= 12 by +// adjusting TmYear() accordingly +// +// Using gmtime_r() and timegm() instead of localtime_r() and mktime() +// since the input millis are since epoch +#define ADD_INT32_TO_TIMESTAMP_MONTH_UNITS(TYPE, NAME, N_MONTHS) \ + FORCE_INLINE \ + gdv_##TYPE NAME##_int32_##TYPE(gdv_int32 count, gdv_##TYPE millis) { \ + EpochTimePoint tp(millis); \ + return tp.AddMonths(static_cast<int>(count * N_MONTHS)).MillisSinceEpoch(); \ + } + +// TODO: Handle overflow while converting gdv_int64 to millis +#define ADD_INT64_TO_TIMESTAMP_FIXED_UNITS(TYPE, NAME, TO_MILLIS) \ + FORCE_INLINE \ + gdv_##TYPE NAME##_int64_##TYPE(gdv_int64 count, gdv_##TYPE millis) { \ + return millis + TO_MILLIS * static_cast<gdv_##TYPE>(count); \ + } + +#define ADD_INT64_TO_TIMESTAMP_MONTH_UNITS(TYPE, NAME, N_MONTHS) \ + FORCE_INLINE \ + gdv_##TYPE NAME##_int64_##TYPE(gdv_int64 count, gdv_##TYPE millis) { \ + EpochTimePoint tp(millis); \ + return tp.AddMonths(static_cast<int>(count * N_MONTHS)).MillisSinceEpoch(); \ + } + +#define ADD_TIMESTAMP_TO_INT32_FIXED_UNITS(TYPE, NAME, TO_MILLIS) \ + FORCE_INLINE \ + gdv_##TYPE NAME##_##TYPE##_int32(gdv_##TYPE millis, gdv_int32 count) { \ + return millis + TO_MILLIS * static_cast<gdv_##TYPE>(count); \ + } + +#define ADD_TIMESTAMP_TO_INT64_FIXED_UNITS(TYPE, NAME, TO_MILLIS) \ + FORCE_INLINE \ + gdv_##TYPE NAME##_##TYPE##_int64(gdv_##TYPE millis, gdv_int64 count) { \ + return millis + TO_MILLIS * static_cast<gdv_##TYPE>(count); \ + } + +#define ADD_TIMESTAMP_TO_INT32_MONTH_UNITS(TYPE, NAME, N_MONTHS) \ + FORCE_INLINE \ + gdv_##TYPE NAME##_##TYPE##_int32(gdv_##TYPE millis, gdv_int32 count) { \ + EpochTimePoint tp(millis); \ + return tp.AddMonths(static_cast<int>(count * N_MONTHS)).MillisSinceEpoch(); \ + } + +#define ADD_TIMESTAMP_TO_INT64_MONTH_UNITS(TYPE, NAME, N_MONTHS) \ + FORCE_INLINE \ + gdv_##TYPE NAME##_##TYPE##_int64(gdv_##TYPE millis, gdv_int64 count) { \ + EpochTimePoint tp(millis); \ + return tp.AddMonths(static_cast<int>(count * N_MONTHS)).MillisSinceEpoch(); \ + } + +#define ADD_TIMESTAMP_INT32_FIXEDUNITS(TYPE, NAME, TO_MILLIS) \ + ADD_INT32_TO_TIMESTAMP_FIXED_UNITS(TYPE, NAME, TO_MILLIS) \ + ADD_TIMESTAMP_TO_INT32_FIXED_UNITS(TYPE, NAME, TO_MILLIS) + +#define ADD_TIMESTAMP_INT32_MONTHUNITS(TYPE, NAME, N_MONTHS) \ + ADD_INT32_TO_TIMESTAMP_MONTH_UNITS(TYPE, NAME, N_MONTHS) \ + ADD_TIMESTAMP_TO_INT32_MONTH_UNITS(TYPE, NAME, N_MONTHS) + +#define TIMESTAMP_ADD_INT32(TYPE) \ + ADD_TIMESTAMP_INT32_FIXEDUNITS(TYPE, timestampaddSecond, MILLIS_IN_SEC) \ + ADD_TIMESTAMP_INT32_FIXEDUNITS(TYPE, timestampaddMinute, MILLIS_IN_MIN) \ + ADD_TIMESTAMP_INT32_FIXEDUNITS(TYPE, timestampaddHour, MILLIS_IN_HOUR) \ + ADD_TIMESTAMP_INT32_FIXEDUNITS(TYPE, timestampaddDay, MILLIS_IN_DAY) \ + ADD_TIMESTAMP_INT32_FIXEDUNITS(TYPE, timestampaddWeek, MILLIS_IN_WEEK) \ + ADD_TIMESTAMP_INT32_MONTHUNITS(TYPE, timestampaddMonth, 1) \ + ADD_TIMESTAMP_INT32_MONTHUNITS(TYPE, timestampaddQuarter, 3) \ + ADD_TIMESTAMP_INT32_MONTHUNITS(TYPE, timestampaddYear, 12) + +#define ADD_TIMESTAMP_INT64_FIXEDUNITS(TYPE, NAME, TO_MILLIS) \ + ADD_INT64_TO_TIMESTAMP_FIXED_UNITS(TYPE, NAME, TO_MILLIS) \ + ADD_TIMESTAMP_TO_INT64_FIXED_UNITS(TYPE, NAME, TO_MILLIS) + +#define ADD_TIMESTAMP_INT64_MONTHUNITS(TYPE, NAME, N_MONTHS) \ + ADD_INT64_TO_TIMESTAMP_MONTH_UNITS(TYPE, NAME, N_MONTHS) \ + ADD_TIMESTAMP_TO_INT64_MONTH_UNITS(TYPE, NAME, N_MONTHS) + +#define TIMESTAMP_ADD_INT64(TYPE) \ + ADD_TIMESTAMP_INT64_FIXEDUNITS(TYPE, timestampaddSecond, MILLIS_IN_SEC) \ + ADD_TIMESTAMP_INT64_FIXEDUNITS(TYPE, timestampaddMinute, MILLIS_IN_MIN) \ + ADD_TIMESTAMP_INT64_FIXEDUNITS(TYPE, timestampaddHour, MILLIS_IN_HOUR) \ + ADD_TIMESTAMP_INT64_FIXEDUNITS(TYPE, timestampaddDay, MILLIS_IN_DAY) \ + ADD_TIMESTAMP_INT64_FIXEDUNITS(TYPE, timestampaddWeek, MILLIS_IN_WEEK) \ + ADD_TIMESTAMP_INT64_MONTHUNITS(TYPE, timestampaddMonth, 1) \ + ADD_TIMESTAMP_INT64_MONTHUNITS(TYPE, timestampaddQuarter, 3) \ + ADD_TIMESTAMP_INT64_MONTHUNITS(TYPE, timestampaddYear, 12) + +#define TIMESTAMP_ADD_INT(TYPE) \ + TIMESTAMP_ADD_INT32(TYPE) \ + TIMESTAMP_ADD_INT64(TYPE) + +TIMESTAMP_ADD_INT(date64) +TIMESTAMP_ADD_INT(timestamp) + +// add gdv_int32 to timestamp +ADD_INT32_TO_TIMESTAMP_FIXED_UNITS(date64, date_add, MILLIS_IN_DAY) +ADD_INT32_TO_TIMESTAMP_FIXED_UNITS(date64, add, MILLIS_IN_DAY) +ADD_INT32_TO_TIMESTAMP_FIXED_UNITS(timestamp, date_add, MILLIS_IN_DAY) +ADD_INT32_TO_TIMESTAMP_FIXED_UNITS(timestamp, add, MILLIS_IN_DAY) + +// add gdv_int64 to timestamp +ADD_INT64_TO_TIMESTAMP_FIXED_UNITS(date64, date_add, MILLIS_IN_DAY) +ADD_INT64_TO_TIMESTAMP_FIXED_UNITS(date64, add, MILLIS_IN_DAY) +ADD_INT64_TO_TIMESTAMP_FIXED_UNITS(timestamp, date_add, MILLIS_IN_DAY) +ADD_INT64_TO_TIMESTAMP_FIXED_UNITS(timestamp, add, MILLIS_IN_DAY) + +// date_sub, subtract, date_diff on gdv_int32 +ADD_TIMESTAMP_TO_INT32_FIXED_UNITS(date64, date_sub, -1 * MILLIS_IN_DAY) +ADD_TIMESTAMP_TO_INT32_FIXED_UNITS(date64, subtract, -1 * MILLIS_IN_DAY) +ADD_TIMESTAMP_TO_INT32_FIXED_UNITS(date64, date_diff, -1 * MILLIS_IN_DAY) +ADD_TIMESTAMP_TO_INT32_FIXED_UNITS(timestamp, date_sub, -1 * MILLIS_IN_DAY) +ADD_TIMESTAMP_TO_INT32_FIXED_UNITS(timestamp, subtract, -1 * MILLIS_IN_DAY) +ADD_TIMESTAMP_TO_INT32_FIXED_UNITS(timestamp, date_diff, -1 * MILLIS_IN_DAY) + +// date_sub, subtract, date_diff on gdv_int64 +ADD_TIMESTAMP_TO_INT64_FIXED_UNITS(date64, date_sub, -1 * MILLIS_IN_DAY) +ADD_TIMESTAMP_TO_INT64_FIXED_UNITS(date64, subtract, -1 * MILLIS_IN_DAY) +ADD_TIMESTAMP_TO_INT64_FIXED_UNITS(date64, date_diff, -1 * MILLIS_IN_DAY) +ADD_TIMESTAMP_TO_INT64_FIXED_UNITS(timestamp, date_sub, -1 * MILLIS_IN_DAY) +ADD_TIMESTAMP_TO_INT64_FIXED_UNITS(timestamp, subtract, -1 * MILLIS_IN_DAY) +ADD_TIMESTAMP_TO_INT64_FIXED_UNITS(timestamp, date_diff, -1 * MILLIS_IN_DAY) + +// add timestamp to gdv_int32 +ADD_TIMESTAMP_TO_INT32_FIXED_UNITS(date64, date_add, MILLIS_IN_DAY) +ADD_TIMESTAMP_TO_INT32_FIXED_UNITS(date64, add, MILLIS_IN_DAY) +ADD_TIMESTAMP_TO_INT32_FIXED_UNITS(timestamp, date_add, MILLIS_IN_DAY) +ADD_TIMESTAMP_TO_INT32_FIXED_UNITS(timestamp, add, MILLIS_IN_DAY) + +// add timestamp to gdv_int64 +ADD_TIMESTAMP_TO_INT64_FIXED_UNITS(date64, date_add, MILLIS_IN_DAY) +ADD_TIMESTAMP_TO_INT64_FIXED_UNITS(date64, add, MILLIS_IN_DAY) +ADD_TIMESTAMP_TO_INT64_FIXED_UNITS(timestamp, date_add, MILLIS_IN_DAY) +ADD_TIMESTAMP_TO_INT64_FIXED_UNITS(timestamp, add, MILLIS_IN_DAY) + +} // extern "C" diff --git a/src/arrow/cpp/src/gandiva/precompiled/types.h b/src/arrow/cpp/src/gandiva/precompiled/types.h new file mode 100644 index 000000000..987ee2c6d --- /dev/null +++ b/src/arrow/cpp/src/gandiva/precompiled/types.h @@ -0,0 +1,592 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <cstdint> + +#include "gandiva/gdv_function_stubs.h" + +// Use the same names as in arrow data types. Makes it easy to write pre-processor macros. +using gdv_boolean = bool; +using gdv_int8 = int8_t; +using gdv_int16 = int16_t; +using gdv_int32 = int32_t; +using gdv_int64 = int64_t; +using gdv_uint8 = uint8_t; +using gdv_uint16 = uint16_t; +using gdv_uint32 = uint32_t; +using gdv_uint64 = uint64_t; +using gdv_float32 = float; +using gdv_float64 = double; +using gdv_date64 = int64_t; +using gdv_date32 = int32_t; +using gdv_time32 = int32_t; +using gdv_timestamp = int64_t; +using gdv_utf8 = char*; +using gdv_binary = char*; +using gdv_day_time_interval = int64_t; + +#ifdef GANDIVA_UNIT_TEST +// unit tests may be compiled without O2, so inlining may not happen. +#define FORCE_INLINE +#else +#define FORCE_INLINE __attribute__((always_inline)) +#endif + +extern "C" { + +bool bitMapGetBit(const unsigned char* bmap, int64_t position); +void bitMapSetBit(unsigned char* bmap, int64_t position, bool value); +void bitMapClearBitIfFalse(unsigned char* bmap, int64_t position, bool value); + +gdv_int64 extractMillennium_timestamp(gdv_timestamp millis); +gdv_int64 extractCentury_timestamp(gdv_timestamp millis); +gdv_int64 extractDecade_timestamp(gdv_timestamp millis); +gdv_int64 extractYear_timestamp(gdv_timestamp millis); +gdv_int64 extractDoy_timestamp(gdv_timestamp millis); +gdv_int64 extractQuarter_timestamp(gdv_timestamp millis); +gdv_int64 extractMonth_timestamp(gdv_timestamp millis); +gdv_int64 extractWeek_timestamp(gdv_timestamp millis); +gdv_int64 extractDow_timestamp(gdv_timestamp millis); +gdv_int64 extractDay_timestamp(gdv_timestamp millis); +gdv_int64 extractHour_timestamp(gdv_timestamp millis); +gdv_int64 extractMinute_timestamp(gdv_timestamp millis); +gdv_int64 extractSecond_timestamp(gdv_timestamp millis); +gdv_int64 extractHour_time32(gdv_int32 millis_in_day); +gdv_int64 extractMinute_time32(gdv_int32 millis_in_day); +gdv_int64 extractSecond_time32(gdv_int32 millis_in_day); + +gdv_int32 hash32(double val, gdv_int32 seed); +gdv_int32 hash32_buf(const gdv_uint8* buf, int len, gdv_int32 seed); +gdv_int64 hash64(double val, gdv_int64 seed); +gdv_int64 hash64_buf(const gdv_uint8* buf, int len, gdv_int64 seed); + +gdv_int32 timestampdiffMonth_timestamp_timestamp(gdv_timestamp, gdv_timestamp); + +gdv_int64 timestampaddSecond_int32_timestamp(gdv_int32, gdv_timestamp); +gdv_int64 timestampaddMinute_int32_timestamp(gdv_int32, gdv_timestamp); +gdv_int64 timestampaddHour_int32_timestamp(gdv_int32, gdv_timestamp); +gdv_int64 timestampaddDay_int32_timestamp(gdv_int32, gdv_timestamp); +gdv_int64 timestampaddWeek_int32_timestamp(gdv_int32, gdv_timestamp); +gdv_int64 timestampaddMonth_int32_timestamp(gdv_int32, gdv_timestamp); +gdv_int64 timestampaddQuarter_int32_timestamp(gdv_int32, gdv_timestamp); +gdv_int64 timestampaddYear_int32_timestamp(gdv_int32, gdv_timestamp); + +gdv_int64 timestampaddSecond_timestamp_int32(gdv_timestamp, gdv_int32); +gdv_int64 timestampaddMinute_timestamp_int32(gdv_timestamp, gdv_int32); +gdv_int64 timestampaddHour_timestamp_int32(gdv_timestamp, gdv_int32); +gdv_int64 timestampaddDay_timestamp_int32(gdv_timestamp, gdv_int32); +gdv_int64 timestampaddWeek_timestamp_int32(gdv_timestamp, gdv_int32); +gdv_int64 timestampaddMonth_timestamp_int32(gdv_timestamp, gdv_int32); +gdv_int64 timestampaddQuarter_timestamp_int32(gdv_timestamp, gdv_int32); +gdv_int64 timestampaddYear_timestamp_int32(gdv_timestamp, gdv_int32); + +gdv_int64 timestampaddSecond_int64_timestamp(gdv_int64, gdv_timestamp); +gdv_int64 timestampaddMinute_int64_timestamp(gdv_int64, gdv_timestamp); +gdv_int64 timestampaddHour_int64_timestamp(gdv_int64, gdv_timestamp); +gdv_int64 timestampaddDay_int64_timestamp(gdv_int64, gdv_timestamp); +gdv_int64 timestampaddWeek_int64_timestamp(gdv_int64, gdv_timestamp); +gdv_int64 timestampaddMonth_int64_timestamp(gdv_int64, gdv_timestamp); +gdv_int64 timestampaddQuarter_int64_timestamp(gdv_int64, gdv_timestamp); +gdv_int64 timestampaddYear_int64_timestamp(gdv_int64, gdv_timestamp); + +gdv_int64 timestampaddSecond_timestamp_int64(gdv_timestamp, gdv_int64); +gdv_int64 timestampaddMinute_timestamp_int64(gdv_timestamp, gdv_int64); +gdv_int64 timestampaddHour_timestamp_int64(gdv_timestamp, gdv_int64); +gdv_int64 timestampaddDay_timestamp_int64(gdv_timestamp, gdv_int64); +gdv_int64 timestampaddWeek_timestamp_int64(gdv_timestamp, gdv_int64); +gdv_int64 timestampaddMonth_timestamp_int64(gdv_timestamp, gdv_int64); +gdv_int64 timestampaddQuarter_timestamp_int64(gdv_timestamp, gdv_int64); +gdv_int64 timestampaddYear_timestamp_int64(gdv_timestamp, gdv_int64); + +gdv_int64 date_add_int32_timestamp(gdv_int32, gdv_timestamp); +gdv_int64 add_int64_timestamp(gdv_int64, gdv_timestamp); +gdv_int64 add_int32_timestamp(gdv_int32, gdv_timestamp); +gdv_int64 date_add_int64_timestamp(gdv_int64, gdv_timestamp); +gdv_timestamp add_date64_int64(gdv_date64, gdv_int64); + +gdv_timestamp to_timestamp_int32(gdv_int32); +gdv_timestamp to_timestamp_int64(gdv_int64); +gdv_timestamp to_timestamp_float32(gdv_float32); +gdv_timestamp to_timestamp_float64(gdv_float64); + +gdv_time32 to_time_int32(gdv_int32); +gdv_time32 to_time_int64(gdv_int64); +gdv_time32 to_time_float32(gdv_float32); +gdv_time32 to_time_float64(gdv_float64); + +gdv_int64 date_sub_timestamp_int32(gdv_timestamp, gdv_int32); +gdv_int64 subtract_timestamp_int32(gdv_timestamp, gdv_int32); +gdv_int64 date_diff_timestamp_int64(gdv_timestamp, gdv_int64); + +gdv_boolean castBIT_utf8(gdv_int64 context, const char* data, gdv_int32 data_len); + +bool is_distinct_from_timestamp_timestamp(gdv_int64, bool, gdv_int64, bool); +bool is_not_distinct_from_int32_int32(gdv_int32, bool, gdv_int32, bool); + +gdv_int64 date_trunc_Second_date64(gdv_date64); +gdv_int64 date_trunc_Minute_date64(gdv_date64); +gdv_int64 date_trunc_Hour_date64(gdv_date64); +gdv_int64 date_trunc_Day_date64(gdv_date64); +gdv_int64 date_trunc_Month_date64(gdv_date64); +gdv_int64 date_trunc_Quarter_date64(gdv_date64); +gdv_int64 date_trunc_Year_date64(gdv_date64); +gdv_int64 date_trunc_Decade_date64(gdv_date64); +gdv_int64 date_trunc_Century_date64(gdv_date64); +gdv_int64 date_trunc_Millennium_date64(gdv_date64); + +gdv_int64 date_trunc_Week_timestamp(gdv_timestamp); +double months_between_timestamp_timestamp(gdv_uint64, gdv_uint64); + +gdv_int32 mem_compare(const char* left, gdv_int32 left_len, const char* right, + gdv_int32 right_len); + +gdv_int32 mod_int64_int32(gdv_int64 left, gdv_int32 right); +gdv_float64 mod_float64_float64(gdv_int64 context, gdv_float64 left, gdv_float64 right); + +gdv_int64 divide_int64_int64(gdv_int64 context, gdv_int64 in1, gdv_int64 in2); + +gdv_int64 div_int64_int64(gdv_int64 context, gdv_int64 in1, gdv_int64 in2); +gdv_float32 div_float32_float32(gdv_int64 context, gdv_float32 in1, gdv_float32 in2); +gdv_float64 div_float64_float64(gdv_int64 context, gdv_float64 in1, gdv_float64 in2); + +gdv_float32 round_float32(gdv_float32); +gdv_float64 round_float64(gdv_float64); +gdv_float32 round_float32_int32(gdv_float32 number, gdv_int32 out_scale); +gdv_float64 round_float64_int32(gdv_float64 number, gdv_int32 out_scale); +gdv_float64 get_scale_multiplier(gdv_int32); +gdv_int32 round_int32_int32(gdv_int32 number, gdv_int32 precision); +gdv_int64 round_int64_int32(gdv_int64 number, gdv_int32 precision); +gdv_int32 round_int32(gdv_int32); +gdv_int64 round_int64(gdv_int64); +gdv_int64 get_power_of_10(gdv_int32); + +const char* bin_int32(int64_t context, gdv_int32 value, int32_t* out_len); +const char* bin_int64(int64_t context, gdv_int64 value, int32_t* out_len); + +gdv_float64 cbrt_int32(gdv_int32); +gdv_float64 cbrt_int64(gdv_int64); +gdv_float64 cbrt_float32(gdv_float32); +gdv_float64 cbrt_float64(gdv_float64); + +gdv_float64 exp_int32(gdv_int32); +gdv_float64 exp_int64(gdv_int64); +gdv_float64 exp_float32(gdv_float32); +gdv_float64 exp_float64(gdv_float64); + +gdv_float64 log_int32(gdv_int32); +gdv_float64 log_int64(gdv_int64); +gdv_float64 log_float32(gdv_float32); +gdv_float64 log_float64(gdv_float64); + +gdv_float64 log10_int32(gdv_int32); +gdv_float64 log10_int64(gdv_int64); +gdv_float64 log10_float32(gdv_float32); +gdv_float64 log10_float64(gdv_float64); + +gdv_float64 sin_int32(gdv_int32); +gdv_float64 sin_int64(gdv_int64); +gdv_float64 sin_float32(gdv_float32); +gdv_float64 sin_float64(gdv_float64); +gdv_float64 cos_int32(gdv_int32); +gdv_float64 cos_int64(gdv_int64); +gdv_float64 cos_float32(gdv_float32); +gdv_float64 cos_float64(gdv_float64); +gdv_float64 asin_int32(gdv_int32); +gdv_float64 asin_int64(gdv_int64); +gdv_float64 asin_float32(gdv_float32); +gdv_float64 asin_float64(gdv_float64); +gdv_float64 acos_int32(gdv_int32); +gdv_float64 acos_int64(gdv_int64); +gdv_float64 acos_float32(gdv_float32); +gdv_float64 acos_float64(gdv_float64); +gdv_float64 tan_int32(gdv_int32); +gdv_float64 tan_int64(gdv_int64); +gdv_float64 tan_float32(gdv_float32); +gdv_float64 tan_float64(gdv_float64); +gdv_float64 atan_int32(gdv_int32); +gdv_float64 atan_int64(gdv_int64); +gdv_float64 atan_float32(gdv_float32); +gdv_float64 atan_float64(gdv_float64); +gdv_float64 sinh_int32(gdv_int32); +gdv_float64 sinh_int64(gdv_int64); +gdv_float64 sinh_float32(gdv_float32); +gdv_float64 sinh_float64(gdv_float64); +gdv_float64 cosh_int32(gdv_int32); +gdv_float64 cosh_int64(gdv_int64); +gdv_float64 cosh_float32(gdv_float32); +gdv_float64 cosh_float64(gdv_float64); +gdv_float64 tanh_int32(gdv_int32); +gdv_float64 tanh_int64(gdv_int64); +gdv_float64 tanh_float32(gdv_float32); +gdv_float64 tanh_float64(gdv_float64); +gdv_float64 atan2_int32_int32(gdv_int32 in1, gdv_int32 in2); +gdv_float64 atan2_int64_int64(gdv_int64 in1, gdv_int64 in2); +gdv_float64 atan2_float32_float32(gdv_float32 in1, gdv_float32 in2); +gdv_float64 atan2_float64_float64(gdv_float64 in1, gdv_float64 in2); +gdv_float64 cot_float32(gdv_float32); +gdv_float64 cot_float64(gdv_float64); +gdv_float64 radians_int32(gdv_int32); +gdv_float64 radians_int64(gdv_int64); +gdv_float64 radians_float32(gdv_float32); +gdv_float64 radians_float64(gdv_float64); +gdv_float64 degrees_int32(gdv_int32); +gdv_float64 degrees_int64(gdv_int64); +gdv_float64 degrees_float32(gdv_float32); +gdv_float64 degrees_float64(gdv_float64); + +gdv_int32 bitwise_and_int32_int32(gdv_int32 in1, gdv_int32 in2); +gdv_int64 bitwise_and_int64_int64(gdv_int64 in1, gdv_int64 in2); +gdv_int32 bitwise_or_int32_int32(gdv_int32 in1, gdv_int32 in2); +gdv_int64 bitwise_or_int64_int64(gdv_int64 in1, gdv_int64 in2); +gdv_int32 bitwise_xor_int32_int32(gdv_int32 in1, gdv_int32 in2); +gdv_int64 bitwise_xor_int64_int64(gdv_int64 in1, gdv_int64 in2); +gdv_int32 bitwise_not_int32(gdv_int32); +gdv_int64 bitwise_not_int64(gdv_int64); + +gdv_float64 power_float64_float64(gdv_float64, gdv_float64); + +gdv_float64 log_int32_int32(gdv_int64 context, gdv_int32 base, gdv_int32 value); + +bool starts_with_utf8_utf8(const char* data, gdv_int32 data_len, const char* prefix, + gdv_int32 prefix_len); +bool ends_with_utf8_utf8(const char* data, gdv_int32 data_len, const char* suffix, + gdv_int32 suffix_len); +bool is_substr_utf8_utf8(const char* data, gdv_int32 data_len, const char* substr, + gdv_int32 substr_len); + +gdv_int32 utf8_length(gdv_int64 context, const char* data, gdv_int32 data_len); + +gdv_int32 utf8_last_char_pos(gdv_int64 context, const char* data, gdv_int32 data_len); + +gdv_date64 castDATE_utf8(int64_t execution_context, const char* input, gdv_int32 length); + +gdv_date64 castDATE_int64(gdv_int64 date); + +gdv_date64 castDATE_date32(gdv_date32 date); + +gdv_date32 castDATE_int32(gdv_int32 date); + +gdv_timestamp castTIMESTAMP_utf8(int64_t execution_context, const char* input, + gdv_int32 length); +gdv_timestamp castTIMESTAMP_date64(gdv_date64); +gdv_timestamp castTIMESTAMP_int64(gdv_int64); +gdv_date64 castDATE_timestamp(gdv_timestamp); +gdv_time32 castTIME_timestamp(gdv_timestamp timestamp_in_millis); +const char* castVARCHAR_timestamp_int64(int64_t, gdv_timestamp, gdv_int64, gdv_int32*); +gdv_date64 last_day_from_timestamp(gdv_date64 millis); + +gdv_int64 truncate_int64_int32(gdv_int64 in, gdv_int32 out_scale); + +const char* repeat_utf8_int32(gdv_int64 context, const char* in, gdv_int32 in_len, + gdv_int32 repeat_times, gdv_int32* out_len); + +const char* substr_utf8_int64_int64(gdv_int64 context, const char* input, + gdv_int32 in_len, gdv_int64 offset64, + gdv_int64 length, gdv_int32* out_len); +const char* substr_utf8_int64(gdv_int64 context, const char* input, gdv_int32 in_len, + gdv_int64 offset64, gdv_int32* out_len); + +const char* concat_utf8_utf8(gdv_int64 context, const char* left, gdv_int32 left_len, + bool left_validity, const char* right, gdv_int32 right_len, + bool right_validity, gdv_int32* out_len); +const char* concat_utf8_utf8_utf8(gdv_int64 context, const char* in1, gdv_int32 in1_len, + bool in1_validity, const char* in2, gdv_int32 in2_len, + bool in2_validity, const char* in3, gdv_int32 in3_len, + bool in3_validity, gdv_int32* out_len); +const char* concat_utf8_utf8_utf8_utf8(gdv_int64 context, const char* in1, + gdv_int32 in1_len, bool in1_validity, + const char* in2, gdv_int32 in2_len, + bool in2_validity, const char* in3, + gdv_int32 in3_len, bool in3_validity, + const char* in4, gdv_int32 in4_len, + bool in4_validity, gdv_int32* out_len); +const char* space_int32(gdv_int64 ctx, gdv_int32 n, int32_t* out_len); +const char* space_int64(gdv_int64 ctx, gdv_int64 n, int32_t* out_len); +const char* concat_utf8_utf8_utf8_utf8_utf8( + gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity, + const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3, + gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len, + bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity, + gdv_int32* out_len); +const char* concat_utf8_utf8_utf8_utf8_utf8_utf8( + gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity, + const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3, + gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len, + bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity, + const char* in6, gdv_int32 in6_len, bool in6_validity, gdv_int32* out_len); +const char* concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity, + const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3, + gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len, + bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity, + const char* in6, gdv_int32 in6_len, bool in6_validity, const char* in7, + gdv_int32 in7_len, bool in7_validity, gdv_int32* out_len); +const char* concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity, + const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3, + gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len, + bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity, + const char* in6, gdv_int32 in6_len, bool in6_validity, const char* in7, + gdv_int32 in7_len, bool in7_validity, const char* in8, gdv_int32 in8_len, + bool in8_validity, gdv_int32* out_len); +const char* concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity, + const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3, + gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len, + bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity, + const char* in6, gdv_int32 in6_len, bool in6_validity, const char* in7, + gdv_int32 in7_len, bool in7_validity, const char* in8, gdv_int32 in8_len, + bool in8_validity, const char* in9, gdv_int32 in9_len, bool in9_validity, + gdv_int32* out_len); +const char* concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity, + const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3, + gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len, + bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity, + const char* in6, gdv_int32 in6_len, bool in6_validity, const char* in7, + gdv_int32 in7_len, bool in7_validity, const char* in8, gdv_int32 in8_len, + bool in8_validity, const char* in9, gdv_int32 in9_len, bool in9_validity, + const char* in10, gdv_int32 in10_len, bool in10_validity, gdv_int32* out_len); + +const char* concatOperator_utf8_utf8(gdv_int64 context, const char* left, + gdv_int32 left_len, const char* right, + gdv_int32 right_len, gdv_int32* out_len); +const char* concatOperator_utf8_utf8_utf8(gdv_int64 context, const char* in1, + gdv_int32 in1_len, const char* in2, + gdv_int32 in2_len, const char* in3, + gdv_int32 in3_len, gdv_int32* out_len); +const char* concatOperator_utf8_utf8_utf8_utf8(gdv_int64 context, const char* in1, + gdv_int32 in1_len, const char* in2, + gdv_int32 in2_len, const char* in3, + gdv_int32 in3_len, const char* in4, + gdv_int32 in4_len, gdv_int32* out_len); +const char* concatOperator_utf8_utf8_utf8_utf8_utf8( + gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2, + gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4, + gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, gdv_int32* out_len); +const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8( + gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2, + gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4, + gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, const char* in6, + gdv_int32 in6_len, gdv_int32* out_len); +const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2, + gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4, + gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, const char* in6, + gdv_int32 in6_len, const char* in7, gdv_int32 in7_len, gdv_int32* out_len); +const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2, + gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4, + gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, const char* in6, + gdv_int32 in6_len, const char* in7, gdv_int32 in7_len, const char* in8, + gdv_int32 in8_len, gdv_int32* out_len); +const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2, + gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4, + gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, const char* in6, + gdv_int32 in6_len, const char* in7, gdv_int32 in7_len, const char* in8, + gdv_int32 in8_len, const char* in9, gdv_int32 in9_len, gdv_int32* out_len); +const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8( + gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2, + gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4, + gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, const char* in6, + gdv_int32 in6_len, const char* in7, gdv_int32 in7_len, const char* in8, + gdv_int32 in8_len, const char* in9, gdv_int32 in9_len, const char* in10, + gdv_int32 in10_len, gdv_int32* out_len); + +const char* castVARCHAR_binary_int64(gdv_int64 context, const char* data, + gdv_int32 data_len, int64_t out_len, + int32_t* out_length); + +const char* castVARCHAR_utf8_int64(gdv_int64 context, const char* data, + gdv_int32 data_len, int64_t out_len, + int32_t* out_length); + +const char* castVARBINARY_utf8_int64(gdv_int64 context, const char* data, + gdv_int32 data_len, int64_t out_len, + int32_t* out_length); + +const char* castVARBINARY_binary_int64(gdv_int64 context, const char* data, + gdv_int32 data_len, int64_t out_len, + int32_t* out_length); + +const char* reverse_utf8(gdv_int64 context, const char* data, gdv_int32 data_len, + int32_t* out_len); + +const char* ltrim_utf8(gdv_int64 context, const char* data, gdv_int32 data_len, + int32_t* out_len); + +const char* rtrim_utf8(gdv_int64 context, const char* data, gdv_int32 data_len, + int32_t* out_len); + +const char* btrim_utf8(gdv_int64 context, const char* data, gdv_int32 data_len, + int32_t* out_len); + +const char* ltrim_utf8_utf8(gdv_int64 context, const char* basetext, + gdv_int32 basetext_len, const char* trimtext, + gdv_int32 trimtext_len, int32_t* out_len); + +const char* rtrim_utf8_utf8(gdv_int64 context, const char* basetext, + gdv_int32 basetext_len, const char* trimtext, + gdv_int32 trimtext_len, int32_t* out_len); + +const char* btrim_utf8_utf8(gdv_int64 context, const char* basetext, + gdv_int32 basetext_len, const char* trimtext, + gdv_int32 trimtext_len, int32_t* out_len); + +gdv_int32 ascii_utf8(const char* data, gdv_int32 data_len); + +gdv_int32 locate_utf8_utf8(gdv_int64 context, const char* sub_str, gdv_int32 sub_str_len, + const char* str, gdv_int32 str_len); + +gdv_int32 strpos_utf8_utf8(gdv_int64 context, const char* str, gdv_int32 str_len, + const char* sub_str, gdv_int32 sub_str_len); + +gdv_int32 locate_utf8_utf8_int32(gdv_int64 context, const char* sub_str, + gdv_int32 sub_str_len, const char* str, + gdv_int32 str_len, gdv_int32 start_pos); + +const char* lpad_utf8_int32_utf8(gdv_int64 context, const char* text, gdv_int32 text_len, + gdv_int32 return_length, const char* fill_text, + gdv_int32 fill_text_len, gdv_int32* out_len); + +const char* rpad_utf8_int32_utf8(gdv_int64 context, const char* text, gdv_int32 text_len, + gdv_int32 return_length, const char* fill_text, + gdv_int32 fill_text_len, gdv_int32* out_len); + +const char* lpad_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len, + gdv_int32 return_length, gdv_int32* out_len); + +const char* rpad_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len, + gdv_int32 return_length, gdv_int32* out_len); + +const char* replace_with_max_len_utf8_utf8_utf8(gdv_int64 context, const char* text, + gdv_int32 text_len, const char* from_str, + gdv_int32 from_str_len, + const char* to_str, gdv_int32 to_str_len, + gdv_int32 max_length, gdv_int32* out_len); + +const char* replace_utf8_utf8_utf8(gdv_int64 context, const char* text, + gdv_int32 text_len, const char* from_str, + gdv_int32 from_str_len, const char* to_str, + gdv_int32 to_str_len, gdv_int32* out_len); + +const char* convert_replace_invalid_fromUTF8_binary(int64_t context, const char* text_in, + int32_t text_len, + const char* char_to_replace, + int32_t char_to_replace_len, + int32_t* out_len); + +const char* convert_toDOUBLE(int64_t context, double value, int32_t* out_len); + +const char* convert_toDOUBLE_be(int64_t context, double value, int32_t* out_len); + +const char* convert_toFLOAT(int64_t context, float value, int32_t* out_len); + +const char* convert_toFLOAT_be(int64_t context, float value, int32_t* out_len); + +const char* convert_toBIGINT(int64_t context, int64_t value, int32_t* out_len); + +const char* convert_toBIGINT_be(int64_t context, int64_t value, int32_t* out_len); + +const char* convert_toINT(int64_t context, int32_t value, int32_t* out_len); + +const char* convert_toINT_be(int64_t context, int32_t value, int32_t* out_len); + +const char* convert_toBOOLEAN(int64_t context, bool value, int32_t* out_len); + +const char* convert_toTIME_EPOCH(int64_t context, int32_t value, int32_t* out_len); + +const char* convert_toTIME_EPOCH_be(int64_t context, int32_t value, int32_t* out_len); + +const char* convert_toTIMESTAMP_EPOCH(int64_t context, int64_t timestamp, + int32_t* out_len); +const char* convert_toTIMESTAMP_EPOCH_be(int64_t context, int64_t timestamp, + int32_t* out_len); + +const char* convert_toDATE_EPOCH(int64_t context, int64_t date, int32_t* out_len); + +const char* convert_toDATE_EPOCH_be(int64_t context, int64_t date, int32_t* out_len); + +const char* convert_toUTF8(int64_t context, const char* value, int32_t value_len, + int32_t* out_len); + +const char* split_part(gdv_int64 context, const char* text, gdv_int32 text_len, + const char* splitter, gdv_int32 split_len, gdv_int32 index, + gdv_int32* out_len); + +const char* byte_substr_binary_int32_int32(gdv_int64 context, const char* text, + gdv_int32 text_len, gdv_int32 offset, + gdv_int32 length, gdv_int32* out_len); + +const char* castVARCHAR_bool_int64(gdv_int64 context, gdv_boolean value, + gdv_int64 out_len, gdv_int32* out_length); + +const char* castVARCHAR_int32_int64(int64_t context, int32_t value, int64_t len, + int32_t* out_len); + +const char* castVARCHAR_int64_int64(int64_t context, int64_t value, int64_t len, + int32_t* out_len); + +const char* castVARCHAR_float32_int64(int64_t context, float value, int64_t len, + int32_t* out_len); + +const char* castVARCHAR_float64_int64(int64_t context, double value, int64_t len, + int32_t* out_len); + +const char* left_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len, + gdv_int32 number, gdv_int32* out_len); + +const char* right_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len, + gdv_int32 number, gdv_int32* out_len); + +const char* binary_string(gdv_int64 context, const char* text, gdv_int32 text_len, + gdv_int32* out_len); + +int32_t castINT_utf8(int64_t context, const char* data, int32_t len); + +int64_t castBIGINT_utf8(int64_t context, const char* data, int32_t len); + +float castFLOAT4_utf8(int64_t context, const char* data, int32_t len); + +double castFLOAT8_utf8(int64_t context, const char* data, int32_t len); + +int32_t castINT_float32(gdv_float32 value); + +int32_t castINT_float64(gdv_float64 value); + +int64_t castBIGINT_float32(gdv_float32 value); + +int64_t castBIGINT_float64(gdv_float64 value); + +int64_t castBIGINT_daytimeinterval(gdv_day_time_interval in); + +int32_t castINT_year_interval(gdv_month_interval in); + +int64_t castBIGINT_year_interval(gdv_month_interval in); + +gdv_day_time_interval castNULLABLEINTERVALDAY_int32(gdv_int32 in); + +gdv_day_time_interval castNULLABLEINTERVALDAY_int64(gdv_int64 in); + +gdv_month_interval castNULLABLEINTERVALYEAR_int32(int64_t context, gdv_int32 in); + +gdv_month_interval castNULLABLEINTERVALYEAR_int64(int64_t context, gdv_int64 in); + +} // extern "C" |