diff options
Diffstat (limited to 'src/arrow/cpp/src/parquet/encoding_benchmark.cc')
-rw-r--r-- | src/arrow/cpp/src/parquet/encoding_benchmark.cc | 802 |
1 files changed, 802 insertions, 0 deletions
diff --git a/src/arrow/cpp/src/parquet/encoding_benchmark.cc b/src/arrow/cpp/src/parquet/encoding_benchmark.cc new file mode 100644 index 000000000..7c5eafd15 --- /dev/null +++ b/src/arrow/cpp/src/parquet/encoding_benchmark.cc @@ -0,0 +1,802 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "benchmark/benchmark.h" + +#include "arrow/array.h" +#include "arrow/array/builder_binary.h" +#include "arrow/array/builder_dict.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/testing/random.h" +#include "arrow/testing/util.h" +#include "arrow/type.h" +#include "arrow/util/byte_stream_split.h" + +#include "parquet/encoding.h" +#include "parquet/platform.h" +#include "parquet/schema.h" + +#include <cmath> +#include <random> + +using arrow::default_memory_pool; +using arrow::MemoryPool; + +namespace { + +// The min/max number of values used to drive each family of encoding benchmarks +constexpr int MIN_RANGE = 1024; +constexpr int MAX_RANGE = 65536; +} // namespace + +namespace parquet { + +using schema::PrimitiveNode; + +std::shared_ptr<ColumnDescriptor> Int64Schema(Repetition::type repetition) { + auto node = PrimitiveNode::Make("int64", repetition, Type::INT64); + return std::make_shared<ColumnDescriptor>(node, repetition != Repetition::REQUIRED, + repetition == Repetition::REPEATED); +} + +static void BM_PlainEncodingBoolean(benchmark::State& state) { + std::vector<bool> values(state.range(0), true); + auto encoder = MakeEncoder(Type::BOOLEAN, Encoding::PLAIN); + auto typed_encoder = dynamic_cast<BooleanEncoder*>(encoder.get()); + + for (auto _ : state) { + typed_encoder->Put(values, static_cast<int>(values.size())); + typed_encoder->FlushValues(); + } + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(bool)); +} + +BENCHMARK(BM_PlainEncodingBoolean)->Range(MIN_RANGE, MAX_RANGE); + +static void BM_PlainDecodingBoolean(benchmark::State& state) { + std::vector<bool> values(state.range(0), true); + bool* output = new bool[state.range(0)]; + auto encoder = MakeEncoder(Type::BOOLEAN, Encoding::PLAIN); + auto typed_encoder = dynamic_cast<BooleanEncoder*>(encoder.get()); + typed_encoder->Put(values, static_cast<int>(values.size())); + std::shared_ptr<Buffer> buf = encoder->FlushValues(); + + for (auto _ : state) { + auto decoder = MakeTypedDecoder<BooleanType>(Encoding::PLAIN); + decoder->SetData(static_cast<int>(values.size()), buf->data(), + static_cast<int>(buf->size())); + decoder->Decode(output, static_cast<int>(values.size())); + } + + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(bool)); + delete[] output; +} + +BENCHMARK(BM_PlainDecodingBoolean)->Range(MIN_RANGE, MAX_RANGE); + +static void BM_PlainEncodingInt64(benchmark::State& state) { + std::vector<int64_t> values(state.range(0), 64); + auto encoder = MakeTypedEncoder<Int64Type>(Encoding::PLAIN); + for (auto _ : state) { + encoder->Put(values.data(), static_cast<int>(values.size())); + encoder->FlushValues(); + } + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(int64_t)); +} + +BENCHMARK(BM_PlainEncodingInt64)->Range(MIN_RANGE, MAX_RANGE); + +static void BM_PlainDecodingInt64(benchmark::State& state) { + std::vector<int64_t> values(state.range(0), 64); + auto encoder = MakeTypedEncoder<Int64Type>(Encoding::PLAIN); + encoder->Put(values.data(), static_cast<int>(values.size())); + std::shared_ptr<Buffer> buf = encoder->FlushValues(); + + for (auto _ : state) { + auto decoder = MakeTypedDecoder<Int64Type>(Encoding::PLAIN); + decoder->SetData(static_cast<int>(values.size()), buf->data(), + static_cast<int>(buf->size())); + decoder->Decode(values.data(), static_cast<int>(values.size())); + } + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(int64_t)); +} + +BENCHMARK(BM_PlainDecodingInt64)->Range(MIN_RANGE, MAX_RANGE); + +static void BM_PlainEncodingDouble(benchmark::State& state) { + std::vector<double> values(state.range(0), 64.0); + auto encoder = MakeTypedEncoder<DoubleType>(Encoding::PLAIN); + for (auto _ : state) { + encoder->Put(values.data(), static_cast<int>(values.size())); + encoder->FlushValues(); + } + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(double)); +} + +BENCHMARK(BM_PlainEncodingDouble)->Range(MIN_RANGE, MAX_RANGE); + +static void BM_PlainEncodingDoubleNaN(benchmark::State& state) { + std::vector<double> values(state.range(0), nan("")); + auto encoder = MakeTypedEncoder<DoubleType>(Encoding::PLAIN); + for (auto _ : state) { + encoder->Put(values.data(), static_cast<int>(values.size())); + encoder->FlushValues(); + } + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(double)); +} + +BENCHMARK(BM_PlainEncodingDoubleNaN)->Range(MIN_RANGE, MAX_RANGE); + +static void BM_PlainDecodingDouble(benchmark::State& state) { + std::vector<double> values(state.range(0), 64.0); + auto encoder = MakeTypedEncoder<DoubleType>(Encoding::PLAIN); + encoder->Put(values.data(), static_cast<int>(values.size())); + std::shared_ptr<Buffer> buf = encoder->FlushValues(); + + for (auto _ : state) { + auto decoder = MakeTypedDecoder<DoubleType>(Encoding::PLAIN); + decoder->SetData(static_cast<int>(values.size()), buf->data(), + static_cast<int>(buf->size())); + decoder->Decode(values.data(), static_cast<int>(values.size())); + } + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(double)); +} + +BENCHMARK(BM_PlainDecodingDouble)->Range(MIN_RANGE, MAX_RANGE); + +static void BM_PlainEncodingFloat(benchmark::State& state) { + std::vector<float> values(state.range(0), 64.0); + auto encoder = MakeTypedEncoder<FloatType>(Encoding::PLAIN); + for (auto _ : state) { + encoder->Put(values.data(), static_cast<int>(values.size())); + encoder->FlushValues(); + } + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(float)); +} + +BENCHMARK(BM_PlainEncodingFloat)->Range(MIN_RANGE, MAX_RANGE); + +static void BM_PlainEncodingFloatNaN(benchmark::State& state) { + std::vector<float> values(state.range(0), nanf("")); + auto encoder = MakeTypedEncoder<FloatType>(Encoding::PLAIN); + for (auto _ : state) { + encoder->Put(values.data(), static_cast<int>(values.size())); + encoder->FlushValues(); + } + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(float)); +} + +BENCHMARK(BM_PlainEncodingFloatNaN)->Range(MIN_RANGE, MAX_RANGE); + +static void BM_PlainDecodingFloat(benchmark::State& state) { + std::vector<float> values(state.range(0), 64.0); + auto encoder = MakeTypedEncoder<FloatType>(Encoding::PLAIN); + encoder->Put(values.data(), static_cast<int>(values.size())); + std::shared_ptr<Buffer> buf = encoder->FlushValues(); + + for (auto _ : state) { + auto decoder = MakeTypedDecoder<FloatType>(Encoding::PLAIN); + decoder->SetData(static_cast<int>(values.size()), buf->data(), + static_cast<int>(buf->size())); + decoder->Decode(values.data(), static_cast<int>(values.size())); + } + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(float)); +} + +BENCHMARK(BM_PlainDecodingFloat)->Range(MIN_RANGE, MAX_RANGE); + +template <typename ParquetType> +struct BM_SpacedEncodingTraits { + using ArrowType = typename EncodingTraits<ParquetType>::ArrowType; + using ArrayType = typename ::arrow::TypeTraits<ArrowType>::ArrayType; + using CType = typename ParquetType::c_type; +}; + +template <> +struct BM_SpacedEncodingTraits<BooleanType> { + // Leverage UInt8 vector array data for Boolean, the input src of PutSpaced is bool* + using ArrowType = ::arrow::UInt8Type; + using ArrayType = ::arrow::UInt8Array; + using CType = bool; +}; + +static void BM_PlainSpacedArgs(benchmark::internal::Benchmark* bench) { + constexpr auto kPlainSpacedSize = 32 * 1024; // 32k + + bench->Args({/*size*/ kPlainSpacedSize, /*null_in_ten_thousand*/ 1}); + bench->Args({/*size*/ kPlainSpacedSize, /*null_in_ten_thousand*/ 100}); + bench->Args({/*size*/ kPlainSpacedSize, /*null_in_ten_thousand*/ 1000}); + bench->Args({/*size*/ kPlainSpacedSize, /*null_in_ten_thousand*/ 5000}); + bench->Args({/*size*/ kPlainSpacedSize, /*null_in_ten_thousand*/ 10000}); +} + +template <typename ParquetType> +static void BM_PlainEncodingSpaced(benchmark::State& state) { + using ArrowType = typename BM_SpacedEncodingTraits<ParquetType>::ArrowType; + using ArrayType = typename BM_SpacedEncodingTraits<ParquetType>::ArrayType; + using CType = typename BM_SpacedEncodingTraits<ParquetType>::CType; + + const int num_values = static_cast<int>(state.range(0)); + const double null_percent = static_cast<double>(state.range(1)) / 10000.0; + + auto rand = ::arrow::random::RandomArrayGenerator(1923); + const auto array = rand.Numeric<ArrowType>(num_values, -100, 100, null_percent); + const auto valid_bits = array->null_bitmap_data(); + const auto array_actual = ::arrow::internal::checked_pointer_cast<ArrayType>(array); + const auto raw_values = array_actual->raw_values(); + // Guarantee the type cast between raw_values and input of PutSpaced. + static_assert(sizeof(CType) == sizeof(*raw_values), "Type mismatch"); + // Cast only happens for BooleanType as it use UInt8 for the array data to match a bool* + // input to PutSpaced. + const auto src = reinterpret_cast<const CType*>(raw_values); + + auto encoder = MakeTypedEncoder<ParquetType>(Encoding::PLAIN); + for (auto _ : state) { + encoder->PutSpaced(src, num_values, valid_bits, 0); + encoder->FlushValues(); + } + state.counters["null_percent"] = null_percent * 100; + state.SetBytesProcessed(state.iterations() * num_values * sizeof(CType)); +} + +static void BM_PlainEncodingSpacedBoolean(benchmark::State& state) { + BM_PlainEncodingSpaced<BooleanType>(state); +} +BENCHMARK(BM_PlainEncodingSpacedBoolean)->Apply(BM_PlainSpacedArgs); + +static void BM_PlainEncodingSpacedFloat(benchmark::State& state) { + BM_PlainEncodingSpaced<FloatType>(state); +} +BENCHMARK(BM_PlainEncodingSpacedFloat)->Apply(BM_PlainSpacedArgs); + +static void BM_PlainEncodingSpacedDouble(benchmark::State& state) { + BM_PlainEncodingSpaced<DoubleType>(state); +} +BENCHMARK(BM_PlainEncodingSpacedDouble)->Apply(BM_PlainSpacedArgs); + +template <typename ParquetType> +static void BM_PlainDecodingSpaced(benchmark::State& state) { + using ArrowType = typename BM_SpacedEncodingTraits<ParquetType>::ArrowType; + using ArrayType = typename BM_SpacedEncodingTraits<ParquetType>::ArrayType; + using CType = typename BM_SpacedEncodingTraits<ParquetType>::CType; + + const int num_values = static_cast<int>(state.range(0)); + const auto null_percent = static_cast<double>(state.range(1)) / 10000.0; + + auto rand = ::arrow::random::RandomArrayGenerator(1923); + const auto array = rand.Numeric<ArrowType>(num_values, -100, 100, null_percent); + const auto valid_bits = array->null_bitmap_data(); + const int null_count = static_cast<int>(array->null_count()); + const auto array_actual = ::arrow::internal::checked_pointer_cast<ArrayType>(array); + const auto raw_values = array_actual->raw_values(); + // Guarantee the type cast between raw_values and input of PutSpaced. + static_assert(sizeof(CType) == sizeof(*raw_values), "Type mismatch"); + // Cast only happens for BooleanType as it use UInt8 for the array data to match a bool* + // input to PutSpaced. + const auto src = reinterpret_cast<const CType*>(raw_values); + + auto encoder = MakeTypedEncoder<ParquetType>(Encoding::PLAIN); + encoder->PutSpaced(src, num_values, valid_bits, 0); + std::shared_ptr<Buffer> buf = encoder->FlushValues(); + + auto decoder = MakeTypedDecoder<ParquetType>(Encoding::PLAIN); + std::vector<uint8_t> decode_values(num_values * sizeof(CType)); + auto decode_buf = reinterpret_cast<CType*>(decode_values.data()); + for (auto _ : state) { + decoder->SetData(num_values - null_count, buf->data(), static_cast<int>(buf->size())); + decoder->DecodeSpaced(decode_buf, num_values, null_count, valid_bits, 0); + } + state.counters["null_percent"] = null_percent * 100; + state.SetBytesProcessed(state.iterations() * num_values * sizeof(CType)); +} + +static void BM_PlainDecodingSpacedBoolean(benchmark::State& state) { + BM_PlainDecodingSpaced<BooleanType>(state); +} +BENCHMARK(BM_PlainDecodingSpacedBoolean)->Apply(BM_PlainSpacedArgs); + +static void BM_PlainDecodingSpacedFloat(benchmark::State& state) { + BM_PlainDecodingSpaced<FloatType>(state); +} +BENCHMARK(BM_PlainDecodingSpacedFloat)->Apply(BM_PlainSpacedArgs); + +static void BM_PlainDecodingSpacedDouble(benchmark::State& state) { + BM_PlainDecodingSpaced<DoubleType>(state); +} +BENCHMARK(BM_PlainDecodingSpacedDouble)->Apply(BM_PlainSpacedArgs); + +template <typename T, typename DecodeFunc> +static void BM_ByteStreamSplitDecode(benchmark::State& state, DecodeFunc&& decode_func) { + std::vector<T> values(state.range(0), 64.0); + const uint8_t* values_raw = reinterpret_cast<const uint8_t*>(values.data()); + std::vector<T> output(state.range(0), 0); + + for (auto _ : state) { + decode_func(values_raw, static_cast<int64_t>(values.size()), + static_cast<int64_t>(values.size()), output.data()); + benchmark::ClobberMemory(); + } + state.SetBytesProcessed(state.iterations() * values.size() * sizeof(T)); +} + +template <typename T, typename EncodeFunc> +static void BM_ByteStreamSplitEncode(benchmark::State& state, EncodeFunc&& encode_func) { + std::vector<T> values(state.range(0), 64.0); + const uint8_t* values_raw = reinterpret_cast<const uint8_t*>(values.data()); + std::vector<uint8_t> output(state.range(0) * sizeof(T), 0); + + for (auto _ : state) { + encode_func(values_raw, values.size(), output.data()); + benchmark::ClobberMemory(); + } + state.SetBytesProcessed(state.iterations() * values.size() * sizeof(T)); +} + +static void BM_ByteStreamSplitDecode_Float_Scalar(benchmark::State& state) { + BM_ByteStreamSplitDecode<float>( + state, ::arrow::util::internal::ByteStreamSplitDecodeScalar<float>); +} + +static void BM_ByteStreamSplitDecode_Double_Scalar(benchmark::State& state) { + BM_ByteStreamSplitDecode<double>( + state, ::arrow::util::internal::ByteStreamSplitDecodeScalar<double>); +} + +static void BM_ByteStreamSplitEncode_Float_Scalar(benchmark::State& state) { + BM_ByteStreamSplitEncode<float>( + state, ::arrow::util::internal::ByteStreamSplitEncodeScalar<float>); +} + +static void BM_ByteStreamSplitEncode_Double_Scalar(benchmark::State& state) { + BM_ByteStreamSplitEncode<double>( + state, ::arrow::util::internal::ByteStreamSplitEncodeScalar<double>); +} + +BENCHMARK(BM_ByteStreamSplitDecode_Float_Scalar)->Range(MIN_RANGE, MAX_RANGE); +BENCHMARK(BM_ByteStreamSplitDecode_Double_Scalar)->Range(MIN_RANGE, MAX_RANGE); +BENCHMARK(BM_ByteStreamSplitEncode_Float_Scalar)->Range(MIN_RANGE, MAX_RANGE); +BENCHMARK(BM_ByteStreamSplitEncode_Double_Scalar)->Range(MIN_RANGE, MAX_RANGE); + +#if defined(ARROW_HAVE_SSE4_2) +static void BM_ByteStreamSplitDecode_Float_Sse2(benchmark::State& state) { + BM_ByteStreamSplitDecode<float>( + state, ::arrow::util::internal::ByteStreamSplitDecodeSse2<float>); +} + +static void BM_ByteStreamSplitDecode_Double_Sse2(benchmark::State& state) { + BM_ByteStreamSplitDecode<double>( + state, ::arrow::util::internal::ByteStreamSplitDecodeSse2<double>); +} + +static void BM_ByteStreamSplitEncode_Float_Sse2(benchmark::State& state) { + BM_ByteStreamSplitEncode<float>( + state, ::arrow::util::internal::ByteStreamSplitEncodeSse2<float>); +} + +static void BM_ByteStreamSplitEncode_Double_Sse2(benchmark::State& state) { + BM_ByteStreamSplitEncode<double>( + state, ::arrow::util::internal::ByteStreamSplitEncodeSse2<double>); +} + +BENCHMARK(BM_ByteStreamSplitDecode_Float_Sse2)->Range(MIN_RANGE, MAX_RANGE); +BENCHMARK(BM_ByteStreamSplitDecode_Double_Sse2)->Range(MIN_RANGE, MAX_RANGE); +BENCHMARK(BM_ByteStreamSplitEncode_Float_Sse2)->Range(MIN_RANGE, MAX_RANGE); +BENCHMARK(BM_ByteStreamSplitEncode_Double_Sse2)->Range(MIN_RANGE, MAX_RANGE); +#endif + +#if defined(ARROW_HAVE_AVX2) +static void BM_ByteStreamSplitDecode_Float_Avx2(benchmark::State& state) { + BM_ByteStreamSplitDecode<float>( + state, ::arrow::util::internal::ByteStreamSplitDecodeAvx2<float>); +} + +static void BM_ByteStreamSplitDecode_Double_Avx2(benchmark::State& state) { + BM_ByteStreamSplitDecode<double>( + state, ::arrow::util::internal::ByteStreamSplitDecodeAvx2<double>); +} + +static void BM_ByteStreamSplitEncode_Float_Avx2(benchmark::State& state) { + BM_ByteStreamSplitEncode<float>( + state, ::arrow::util::internal::ByteStreamSplitEncodeAvx2<float>); +} + +static void BM_ByteStreamSplitEncode_Double_Avx2(benchmark::State& state) { + BM_ByteStreamSplitEncode<double>( + state, ::arrow::util::internal::ByteStreamSplitEncodeAvx2<double>); +} + +BENCHMARK(BM_ByteStreamSplitDecode_Float_Avx2)->Range(MIN_RANGE, MAX_RANGE); +BENCHMARK(BM_ByteStreamSplitDecode_Double_Avx2)->Range(MIN_RANGE, MAX_RANGE); +BENCHMARK(BM_ByteStreamSplitEncode_Float_Avx2)->Range(MIN_RANGE, MAX_RANGE); +BENCHMARK(BM_ByteStreamSplitEncode_Double_Avx2)->Range(MIN_RANGE, MAX_RANGE); +#endif + +#if defined(ARROW_HAVE_AVX512) +static void BM_ByteStreamSplitDecode_Float_Avx512(benchmark::State& state) { + BM_ByteStreamSplitDecode<float>( + state, ::arrow::util::internal::ByteStreamSplitDecodeAvx512<float>); +} + +static void BM_ByteStreamSplitDecode_Double_Avx512(benchmark::State& state) { + BM_ByteStreamSplitDecode<double>( + state, ::arrow::util::internal::ByteStreamSplitDecodeAvx512<double>); +} + +static void BM_ByteStreamSplitEncode_Float_Avx512(benchmark::State& state) { + BM_ByteStreamSplitEncode<float>( + state, ::arrow::util::internal::ByteStreamSplitEncodeAvx512<float>); +} + +static void BM_ByteStreamSplitEncode_Double_Avx512(benchmark::State& state) { + BM_ByteStreamSplitEncode<double>( + state, ::arrow::util::internal::ByteStreamSplitEncodeAvx512<double>); +} + +BENCHMARK(BM_ByteStreamSplitDecode_Float_Avx512)->Range(MIN_RANGE, MAX_RANGE); +BENCHMARK(BM_ByteStreamSplitDecode_Double_Avx512)->Range(MIN_RANGE, MAX_RANGE); +BENCHMARK(BM_ByteStreamSplitEncode_Float_Avx512)->Range(MIN_RANGE, MAX_RANGE); +BENCHMARK(BM_ByteStreamSplitEncode_Double_Avx512)->Range(MIN_RANGE, MAX_RANGE); +#endif + +template <typename Type> +static void DecodeDict(std::vector<typename Type::c_type>& values, + benchmark::State& state) { + typedef typename Type::c_type T; + int num_values = static_cast<int>(values.size()); + + MemoryPool* allocator = default_memory_pool(); + std::shared_ptr<ColumnDescriptor> descr = Int64Schema(Repetition::REQUIRED); + + auto base_encoder = + MakeEncoder(Type::type_num, Encoding::PLAIN, true, descr.get(), allocator); + auto encoder = + dynamic_cast<typename EncodingTraits<Type>::Encoder*>(base_encoder.get()); + auto dict_traits = dynamic_cast<DictEncoder<Type>*>(base_encoder.get()); + encoder->Put(values.data(), num_values); + + std::shared_ptr<ResizableBuffer> dict_buffer = + AllocateBuffer(allocator, dict_traits->dict_encoded_size()); + + std::shared_ptr<ResizableBuffer> indices = + AllocateBuffer(allocator, encoder->EstimatedDataEncodedSize()); + + dict_traits->WriteDict(dict_buffer->mutable_data()); + int actual_bytes = dict_traits->WriteIndices(indices->mutable_data(), + static_cast<int>(indices->size())); + + PARQUET_THROW_NOT_OK(indices->Resize(actual_bytes)); + + for (auto _ : state) { + auto dict_decoder = MakeTypedDecoder<Type>(Encoding::PLAIN, descr.get()); + dict_decoder->SetData(dict_traits->num_entries(), dict_buffer->data(), + static_cast<int>(dict_buffer->size())); + + auto decoder = MakeDictDecoder<Type>(descr.get()); + decoder->SetDict(dict_decoder.get()); + decoder->SetData(num_values, indices->data(), static_cast<int>(indices->size())); + decoder->Decode(values.data(), num_values); + } + + state.SetBytesProcessed(state.iterations() * state.range(0) * sizeof(T)); +} + +static void BM_DictDecodingInt64_repeats(benchmark::State& state) { + typedef Int64Type Type; + typedef typename Type::c_type T; + + std::vector<T> values(state.range(0), 64); + DecodeDict<Type>(values, state); +} + +BENCHMARK(BM_DictDecodingInt64_repeats)->Range(MIN_RANGE, MAX_RANGE); + +static void BM_DictDecodingInt64_literals(benchmark::State& state) { + typedef Int64Type Type; + typedef typename Type::c_type T; + + std::vector<T> values(state.range(0)); + for (size_t i = 0; i < values.size(); ++i) { + values[i] = i; + } + DecodeDict<Type>(values, state); +} + +BENCHMARK(BM_DictDecodingInt64_literals)->Range(MIN_RANGE, MAX_RANGE); + +// ---------------------------------------------------------------------- +// Shared benchmarks for decoding using arrow builders + +using ::arrow::BinaryBuilder; +using ::arrow::BinaryDictionary32Builder; + +class BenchmarkDecodeArrow : public ::benchmark::Fixture { + public: + void SetUp(const ::benchmark::State& state) override { + num_values_ = static_cast<int>(state.range()); + InitDataInputs(); + DoEncodeArrow(); + } + + void TearDown(const ::benchmark::State& state) override { + buffer_.reset(); + input_array_.reset(); + values_.clear(); + } + + void InitDataInputs() { + // Generate a random string dictionary without any nulls so that this dataset can be + // used for benchmarking the DecodeArrowNonNull API + constexpr int repeat_factor = 8; + constexpr int64_t min_length = 2; + constexpr int64_t max_length = 10; + ::arrow::random::RandomArrayGenerator rag(0); + input_array_ = rag.StringWithRepeats(num_values_, num_values_ / repeat_factor, + min_length, max_length, /*null_probability=*/0); + valid_bits_ = input_array_->null_bitmap_data(); + total_size_ = input_array_->data()->buffers[2]->size(); + + values_.reserve(num_values_); + const auto& binary_array = static_cast<const ::arrow::BinaryArray&>(*input_array_); + for (int64_t i = 0; i < binary_array.length(); i++) { + auto view = binary_array.GetView(i); + values_.emplace_back(static_cast<uint32_t>(view.length()), + reinterpret_cast<const uint8_t*>(view.data())); + } + } + + virtual void DoEncodeArrow() = 0; + virtual void DoEncodeLowLevel() = 0; + + virtual std::unique_ptr<ByteArrayDecoder> InitializeDecoder() = 0; + + void EncodeArrowBenchmark(benchmark::State& state) { + for (auto _ : state) { + DoEncodeArrow(); + } + state.SetBytesProcessed(state.iterations() * total_size_); + } + + void EncodeLowLevelBenchmark(benchmark::State& state) { + for (auto _ : state) { + DoEncodeLowLevel(); + } + state.SetBytesProcessed(state.iterations() * total_size_); + } + + void DecodeArrowDenseBenchmark(benchmark::State& state) { + for (auto _ : state) { + auto decoder = InitializeDecoder(); + typename EncodingTraits<ByteArrayType>::Accumulator acc; + acc.builder.reset(new BinaryBuilder); + decoder->DecodeArrow(num_values_, 0, valid_bits_, 0, &acc); + } + state.SetBytesProcessed(state.iterations() * total_size_); + } + + void DecodeArrowNonNullDenseBenchmark(benchmark::State& state) { + for (auto _ : state) { + auto decoder = InitializeDecoder(); + typename EncodingTraits<ByteArrayType>::Accumulator acc; + acc.builder.reset(new BinaryBuilder); + decoder->DecodeArrowNonNull(num_values_, &acc); + } + state.SetBytesProcessed(state.iterations() * total_size_); + } + + void DecodeArrowDictBenchmark(benchmark::State& state) { + for (auto _ : state) { + auto decoder = InitializeDecoder(); + BinaryDictionary32Builder builder(default_memory_pool()); + decoder->DecodeArrow(num_values_, 0, valid_bits_, 0, &builder); + } + + state.SetBytesProcessed(state.iterations() * total_size_); + } + + void DecodeArrowNonNullDictBenchmark(benchmark::State& state) { + for (auto _ : state) { + auto decoder = InitializeDecoder(); + BinaryDictionary32Builder builder(default_memory_pool()); + decoder->DecodeArrowNonNull(num_values_, &builder); + } + + state.SetBytesProcessed(state.iterations() * total_size_); + } + + protected: + int num_values_; + std::shared_ptr<::arrow::Array> input_array_; + std::vector<ByteArray> values_; + uint64_t total_size_; + const uint8_t* valid_bits_; + std::shared_ptr<Buffer> buffer_; +}; + +// ---------------------------------------------------------------------- +// Benchmark Decoding from Plain Encoding +class BM_ArrowBinaryPlain : public BenchmarkDecodeArrow { + public: + void DoEncodeArrow() override { + auto encoder = MakeTypedEncoder<ByteArrayType>(Encoding::PLAIN); + encoder->Put(*input_array_); + buffer_ = encoder->FlushValues(); + } + + void DoEncodeLowLevel() override { + auto encoder = MakeTypedEncoder<ByteArrayType>(Encoding::PLAIN); + encoder->Put(values_.data(), num_values_); + buffer_ = encoder->FlushValues(); + } + + std::unique_ptr<ByteArrayDecoder> InitializeDecoder() override { + auto decoder = MakeTypedDecoder<ByteArrayType>(Encoding::PLAIN); + decoder->SetData(num_values_, buffer_->data(), static_cast<int>(buffer_->size())); + return decoder; + } +}; + +BENCHMARK_DEFINE_F(BM_ArrowBinaryPlain, EncodeArrow) +(benchmark::State& state) { EncodeArrowBenchmark(state); } +BENCHMARK_REGISTER_F(BM_ArrowBinaryPlain, EncodeArrow)->Range(1 << 18, 1 << 20); + +BENCHMARK_DEFINE_F(BM_ArrowBinaryPlain, EncodeLowLevel) +(benchmark::State& state) { EncodeLowLevelBenchmark(state); } +BENCHMARK_REGISTER_F(BM_ArrowBinaryPlain, EncodeLowLevel)->Range(1 << 18, 1 << 20); + +BENCHMARK_DEFINE_F(BM_ArrowBinaryPlain, DecodeArrow_Dense) +(benchmark::State& state) { DecodeArrowDenseBenchmark(state); } +BENCHMARK_REGISTER_F(BM_ArrowBinaryPlain, DecodeArrow_Dense)->Range(MIN_RANGE, MAX_RANGE); + +BENCHMARK_DEFINE_F(BM_ArrowBinaryPlain, DecodeArrowNonNull_Dense) +(benchmark::State& state) { DecodeArrowNonNullDenseBenchmark(state); } +BENCHMARK_REGISTER_F(BM_ArrowBinaryPlain, DecodeArrowNonNull_Dense) + ->Range(MIN_RANGE, MAX_RANGE); + +BENCHMARK_DEFINE_F(BM_ArrowBinaryPlain, DecodeArrow_Dict) +(benchmark::State& state) { DecodeArrowDictBenchmark(state); } +BENCHMARK_REGISTER_F(BM_ArrowBinaryPlain, DecodeArrow_Dict)->Range(MIN_RANGE, MAX_RANGE); + +BENCHMARK_DEFINE_F(BM_ArrowBinaryPlain, DecodeArrowNonNull_Dict) +(benchmark::State& state) { DecodeArrowNonNullDictBenchmark(state); } +BENCHMARK_REGISTER_F(BM_ArrowBinaryPlain, DecodeArrowNonNull_Dict) + ->Range(MIN_RANGE, MAX_RANGE); + +// ---------------------------------------------------------------------- +// Benchmark Decoding from Dictionary Encoding +class BM_ArrowBinaryDict : public BenchmarkDecodeArrow { + public: + template <typename PutValuesFunc> + void DoEncode(PutValuesFunc&& put_values) { + auto node = schema::ByteArray("name"); + descr_ = std::unique_ptr<ColumnDescriptor>(new ColumnDescriptor(node, 0, 0)); + + auto encoder = MakeTypedEncoder<ByteArrayType>(Encoding::PLAIN, + /*use_dictionary=*/true, descr_.get()); + put_values(encoder.get()); + buffer_ = encoder->FlushValues(); + + auto dict_encoder = dynamic_cast<DictEncoder<ByteArrayType>*>(encoder.get()); + ASSERT_NE(dict_encoder, nullptr); + dict_buffer_ = + AllocateBuffer(default_memory_pool(), dict_encoder->dict_encoded_size()); + dict_encoder->WriteDict(dict_buffer_->mutable_data()); + num_dict_entries_ = dict_encoder->num_entries(); + } + + template <typename IndexType> + void EncodeDictBenchmark(benchmark::State& state) { + constexpr int64_t nunique = 100; + constexpr int64_t min_length = 32; + constexpr int64_t max_length = 32; + ::arrow::random::RandomArrayGenerator rag(0); + auto dict = rag.String(nunique, min_length, max_length, + /*null_probability=*/0); + auto indices = rag.Numeric<IndexType, int32_t>(num_values_, 0, nunique - 1); + + auto PutValues = [&](ByteArrayEncoder* encoder) { + auto dict_encoder = dynamic_cast<DictEncoder<ByteArrayType>*>(encoder); + dict_encoder->PutDictionary(*dict); + dict_encoder->PutIndices(*indices); + }; + for (auto _ : state) { + DoEncode(std::move(PutValues)); + } + state.SetItemsProcessed(state.iterations() * num_values_); + } + + void DoEncodeArrow() override { + auto PutValues = [&](ByteArrayEncoder* encoder) { + ASSERT_NO_THROW(encoder->Put(*input_array_)); + }; + DoEncode(std::move(PutValues)); + } + + void DoEncodeLowLevel() override { + auto PutValues = [&](ByteArrayEncoder* encoder) { + encoder->Put(values_.data(), num_values_); + }; + DoEncode(std::move(PutValues)); + } + + std::unique_ptr<ByteArrayDecoder> InitializeDecoder() override { + auto decoder = MakeTypedDecoder<ByteArrayType>(Encoding::PLAIN, descr_.get()); + decoder->SetData(num_dict_entries_, dict_buffer_->data(), + static_cast<int>(dict_buffer_->size())); + auto dict_decoder = MakeDictDecoder<ByteArrayType>(descr_.get()); + dict_decoder->SetDict(decoder.get()); + dict_decoder->SetData(num_values_, buffer_->data(), + static_cast<int>(buffer_->size())); + return std::unique_ptr<ByteArrayDecoder>( + dynamic_cast<ByteArrayDecoder*>(dict_decoder.release())); + } + + void TearDown(const ::benchmark::State& state) override { + BenchmarkDecodeArrow::TearDown(state); + dict_buffer_.reset(); + descr_.reset(); + } + + protected: + std::unique_ptr<ColumnDescriptor> descr_; + std::shared_ptr<Buffer> dict_buffer_; + int num_dict_entries_; +}; + +BENCHMARK_DEFINE_F(BM_ArrowBinaryDict, EncodeArrow) +(benchmark::State& state) { EncodeArrowBenchmark(state); } +BENCHMARK_REGISTER_F(BM_ArrowBinaryDict, EncodeArrow)->Range(1 << 18, 1 << 20); + +BENCHMARK_DEFINE_F(BM_ArrowBinaryDict, EncodeDictDirectInt8) +(benchmark::State& state) { EncodeDictBenchmark<::arrow::Int8Type>(state); } +BENCHMARK_REGISTER_F(BM_ArrowBinaryDict, EncodeDictDirectInt8)->Range(1 << 20, 1 << 20); + +BENCHMARK_DEFINE_F(BM_ArrowBinaryDict, EncodeDictDirectInt16) +(benchmark::State& state) { EncodeDictBenchmark<::arrow::Int16Type>(state); } +BENCHMARK_REGISTER_F(BM_ArrowBinaryDict, EncodeDictDirectInt16)->Range(1 << 20, 1 << 20); + +BENCHMARK_DEFINE_F(BM_ArrowBinaryDict, EncodeDictDirectInt32) +(benchmark::State& state) { EncodeDictBenchmark<::arrow::Int32Type>(state); } +BENCHMARK_REGISTER_F(BM_ArrowBinaryDict, EncodeDictDirectInt32)->Range(1 << 20, 1 << 20); + +BENCHMARK_DEFINE_F(BM_ArrowBinaryDict, EncodeDictDirectInt64) +(benchmark::State& state) { EncodeDictBenchmark<::arrow::Int64Type>(state); } +BENCHMARK_REGISTER_F(BM_ArrowBinaryDict, EncodeDictDirectInt64)->Range(1 << 20, 1 << 20); + +BENCHMARK_DEFINE_F(BM_ArrowBinaryDict, EncodeLowLevel) +(benchmark::State& state) { EncodeLowLevelBenchmark(state); } +BENCHMARK_REGISTER_F(BM_ArrowBinaryDict, EncodeLowLevel)->Range(1 << 18, 1 << 20); + +BENCHMARK_DEFINE_F(BM_ArrowBinaryDict, DecodeArrow_Dense)(benchmark::State& state) { + DecodeArrowDenseBenchmark(state); +} +BENCHMARK_REGISTER_F(BM_ArrowBinaryDict, DecodeArrow_Dense)->Range(MIN_RANGE, MAX_RANGE); + +BENCHMARK_DEFINE_F(BM_ArrowBinaryDict, DecodeArrowNonNull_Dense) +(benchmark::State& state) { DecodeArrowNonNullDenseBenchmark(state); } +BENCHMARK_REGISTER_F(BM_ArrowBinaryDict, DecodeArrowNonNull_Dense) + ->Range(MIN_RANGE, MAX_RANGE); + +BENCHMARK_DEFINE_F(BM_ArrowBinaryDict, DecodeArrow_Dict) +(benchmark::State& state) { DecodeArrowDictBenchmark(state); } +BENCHMARK_REGISTER_F(BM_ArrowBinaryDict, DecodeArrow_Dict)->Range(MIN_RANGE, MAX_RANGE); + +BENCHMARK_DEFINE_F(BM_ArrowBinaryDict, DecodeArrowNonNull_Dict) +(benchmark::State& state) { DecodeArrowNonNullDictBenchmark(state); } +BENCHMARK_REGISTER_F(BM_ArrowBinaryDict, DecodeArrowNonNull_Dict) + ->Range(MIN_RANGE, MAX_RANGE); + +} // namespace parquet |