summaryrefslogtreecommitdiffstats
path: root/src/arrow/cpp/src/arrow/array/builder_union.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/arrow/cpp/src/arrow/array/builder_union.h')
-rw-r--r--src/arrow/cpp/src/arrow/array/builder_union.h242
1 files changed, 242 insertions, 0 deletions
diff --git a/src/arrow/cpp/src/arrow/array/builder_union.h b/src/arrow/cpp/src/arrow/array/builder_union.h
new file mode 100644
index 000000000..c1a799e56
--- /dev/null
+++ b/src/arrow/cpp/src/arrow/array/builder_union.h
@@ -0,0 +1,242 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/array/array_nested.h"
+#include "arrow/array/builder_base.h"
+#include "arrow/array/data.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/memory_pool.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \brief Base class for union array builds.
+///
+/// Note that while we subclass ArrayBuilder, as union types do not have a
+/// validity bitmap, the bitmap builder member of ArrayBuilder is not used.
+class ARROW_EXPORT BasicUnionBuilder : public ArrayBuilder {
+ public:
+ Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+ /// \cond FALSE
+ using ArrayBuilder::Finish;
+ /// \endcond
+
+ Status Finish(std::shared_ptr<UnionArray>* out) { return FinishTyped(out); }
+
+ /// \brief Make a new child builder available to the UnionArray
+ ///
+ /// \param[in] new_child the child builder
+ /// \param[in] field_name the name of the field in the union array type
+ /// if type inference is used
+ /// \return child index, which is the "type" argument that needs
+ /// to be passed to the "Append" method to add a new element to
+ /// the union array.
+ int8_t AppendChild(const std::shared_ptr<ArrayBuilder>& new_child,
+ const std::string& field_name = "");
+
+ std::shared_ptr<DataType> type() const override;
+
+ int64_t length() const override { return types_builder_.length(); }
+
+ protected:
+ BasicUnionBuilder(MemoryPool* pool,
+ const std::vector<std::shared_ptr<ArrayBuilder>>& children,
+ const std::shared_ptr<DataType>& type);
+
+ int8_t NextTypeId();
+
+ std::vector<std::shared_ptr<Field>> child_fields_;
+ std::vector<int8_t> type_codes_;
+ UnionMode::type mode_;
+
+ std::vector<ArrayBuilder*> type_id_to_children_;
+ std::vector<int> type_id_to_child_id_;
+ // for all type_id < dense_type_id_, type_id_to_children_[type_id] != nullptr
+ int8_t dense_type_id_ = 0;
+ TypedBufferBuilder<int8_t> types_builder_;
+};
+
+/// \class DenseUnionBuilder
+///
+/// This API is EXPERIMENTAL.
+class ARROW_EXPORT DenseUnionBuilder : public BasicUnionBuilder {
+ public:
+ /// Use this constructor to initialize the UnionBuilder with no child builders,
+ /// allowing type to be inferred. You will need to call AppendChild for each of the
+ /// children builders you want to use.
+ explicit DenseUnionBuilder(MemoryPool* pool)
+ : BasicUnionBuilder(pool, {}, dense_union(FieldVector{})), offsets_builder_(pool) {}
+
+ /// Use this constructor to specify the type explicitly.
+ /// You can still add child builders to the union after using this constructor
+ DenseUnionBuilder(MemoryPool* pool,
+ const std::vector<std::shared_ptr<ArrayBuilder>>& children,
+ const std::shared_ptr<DataType>& type)
+ : BasicUnionBuilder(pool, children, type), offsets_builder_(pool) {}
+
+ Status AppendNull() final {
+ const int8_t first_child_code = type_codes_[0];
+ ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
+ ARROW_RETURN_NOT_OK(types_builder_.Append(first_child_code));
+ ARROW_RETURN_NOT_OK(
+ offsets_builder_.Append(static_cast<int32_t>(child_builder->length())));
+ // Append a null arbitrarily to the first child
+ return child_builder->AppendNull();
+ }
+
+ Status AppendNulls(int64_t length) final {
+ const int8_t first_child_code = type_codes_[0];
+ ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
+ ARROW_RETURN_NOT_OK(types_builder_.Append(length, first_child_code));
+ ARROW_RETURN_NOT_OK(
+ offsets_builder_.Append(length, static_cast<int32_t>(child_builder->length())));
+ // Append just a single null to the first child
+ return child_builder->AppendNull();
+ }
+
+ Status AppendEmptyValue() final {
+ const int8_t first_child_code = type_codes_[0];
+ ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
+ ARROW_RETURN_NOT_OK(types_builder_.Append(first_child_code));
+ ARROW_RETURN_NOT_OK(
+ offsets_builder_.Append(static_cast<int32_t>(child_builder->length())));
+ // Append an empty value arbitrarily to the first child
+ return child_builder->AppendEmptyValue();
+ }
+
+ Status AppendEmptyValues(int64_t length) final {
+ const int8_t first_child_code = type_codes_[0];
+ ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
+ ARROW_RETURN_NOT_OK(types_builder_.Append(length, first_child_code));
+ ARROW_RETURN_NOT_OK(
+ offsets_builder_.Append(length, static_cast<int32_t>(child_builder->length())));
+ // Append just a single empty value to the first child
+ return child_builder->AppendEmptyValue();
+ }
+
+ /// \brief Append an element to the UnionArray. This must be followed
+ /// by an append to the appropriate child builder.
+ ///
+ /// \param[in] next_type type_id of the child to which the next value will be appended.
+ ///
+ /// The corresponding child builder must be appended to independently after this method
+ /// is called.
+ Status Append(int8_t next_type) {
+ ARROW_RETURN_NOT_OK(types_builder_.Append(next_type));
+ if (type_id_to_children_[next_type]->length() == kListMaximumElements) {
+ return Status::CapacityError(
+ "a dense UnionArray cannot contain more than 2^31 - 1 elements from a single "
+ "child");
+ }
+ auto offset = static_cast<int32_t>(type_id_to_children_[next_type]->length());
+ return offsets_builder_.Append(offset);
+ }
+
+ Status AppendArraySlice(const ArrayData& array, int64_t offset,
+ int64_t length) override;
+
+ Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+ private:
+ TypedBufferBuilder<int32_t> offsets_builder_;
+};
+
+/// \class SparseUnionBuilder
+///
+/// This API is EXPERIMENTAL.
+class ARROW_EXPORT SparseUnionBuilder : public BasicUnionBuilder {
+ public:
+ /// Use this constructor to initialize the UnionBuilder with no child builders,
+ /// allowing type to be inferred. You will need to call AppendChild for each of the
+ /// children builders you want to use.
+ explicit SparseUnionBuilder(MemoryPool* pool)
+ : BasicUnionBuilder(pool, {}, sparse_union(FieldVector{})) {}
+
+ /// Use this constructor to specify the type explicitly.
+ /// You can still add child builders to the union after using this constructor
+ SparseUnionBuilder(MemoryPool* pool,
+ const std::vector<std::shared_ptr<ArrayBuilder>>& children,
+ const std::shared_ptr<DataType>& type)
+ : BasicUnionBuilder(pool, children, type) {}
+
+ /// \brief Append a null value.
+ ///
+ /// A null is appended to the first child, empty values to the other children.
+ Status AppendNull() final {
+ const auto first_child_code = type_codes_[0];
+ ARROW_RETURN_NOT_OK(types_builder_.Append(first_child_code));
+ ARROW_RETURN_NOT_OK(type_id_to_children_[first_child_code]->AppendNull());
+ for (int i = 1; i < static_cast<int>(type_codes_.size()); ++i) {
+ ARROW_RETURN_NOT_OK(type_id_to_children_[type_codes_[i]]->AppendEmptyValue());
+ }
+ return Status::OK();
+ }
+
+ /// \brief Append multiple null values.
+ ///
+ /// Nulls are appended to the first child, empty values to the other children.
+ Status AppendNulls(int64_t length) final {
+ const auto first_child_code = type_codes_[0];
+ ARROW_RETURN_NOT_OK(types_builder_.Append(length, first_child_code));
+ ARROW_RETURN_NOT_OK(type_id_to_children_[first_child_code]->AppendNulls(length));
+ for (int i = 1; i < static_cast<int>(type_codes_.size()); ++i) {
+ ARROW_RETURN_NOT_OK(
+ type_id_to_children_[type_codes_[i]]->AppendEmptyValues(length));
+ }
+ return Status::OK();
+ }
+
+ Status AppendEmptyValue() final {
+ ARROW_RETURN_NOT_OK(types_builder_.Append(type_codes_[0]));
+ for (int8_t code : type_codes_) {
+ ARROW_RETURN_NOT_OK(type_id_to_children_[code]->AppendEmptyValue());
+ }
+ return Status::OK();
+ }
+
+ Status AppendEmptyValues(int64_t length) final {
+ ARROW_RETURN_NOT_OK(types_builder_.Append(length, type_codes_[0]));
+ for (int8_t code : type_codes_) {
+ ARROW_RETURN_NOT_OK(type_id_to_children_[code]->AppendEmptyValues(length));
+ }
+ return Status::OK();
+ }
+
+ /// \brief Append an element to the UnionArray. This must be followed
+ /// by an append to the appropriate child builder.
+ ///
+ /// \param[in] next_type type_id of the child to which the next value will be appended.
+ ///
+ /// The corresponding child builder must be appended to independently after this method
+ /// is called, and all other child builders must have null or empty value appended.
+ Status Append(int8_t next_type) { return types_builder_.Append(next_type); }
+
+ Status AppendArraySlice(const ArrayData& array, int64_t offset,
+ int64_t length) override;
+};
+
+} // namespace arrow