summaryrefslogtreecommitdiffstats
path: root/src/arrow/ruby/red-arrow/lib
diff options
context:
space:
mode:
Diffstat (limited to 'src/arrow/ruby/red-arrow/lib')
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow.rb30
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/aggregate-node-options.rb35
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/aggregation.rb46
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/array-builder.rb214
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/array.rb234
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/bigdecimal-extension.rb28
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/binary-dictionary-array-builder.rb27
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/block-closable.rb35
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/buffer.rb32
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/chunked-array.rb91
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/column-containable.rb147
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/column.rb76
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/compression-type.rb37
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/constructor-arguments-gc-guardable.rb25
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/csv-loader.rb384
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/csv-read-options.rb43
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/data-type.rb198
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/date32-array-builder.rb32
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/date32-array.rb30
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/date64-array-builder.rb33
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/date64-array.rb29
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/datum.rb100
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/decimal128-array-builder.rb58
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/decimal128-array.rb24
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/decimal128-data-type.rb71
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/decimal128.rb60
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/decimal256-array-builder.rb61
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/decimal256-array.rb25
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/decimal256-data-type.rb73
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/decimal256.rb60
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/dense-union-data-type.rb90
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/dictionary-array.rb24
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/dictionary-data-type.rb117
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/equal-options.rb38
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/expression.rb48
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/field-containable.rb38
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/field.rb118
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/file-output-stream.rb34
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/file-system.rb34
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/fixed-size-binary-array-builder.rb38
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/fixed-size-binary-array.rb26
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/generic-filterable.rb43
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/generic-takeable.rb38
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/group.rb164
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/list-array-builder.rb96
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/list-data-type.rb118
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/loader.rb216
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/map-array-builder.rb109
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/map-array.rb26
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/map-data-type.rb89
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/null-array-builder.rb26
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/null-array.rb24
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/path-extension.rb45
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/raw-table-converter.rb47
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/record-batch-builder.rb114
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/record-batch-file-reader.rb28
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/record-batch-iterator.rb22
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/record-batch-reader.rb41
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/record-batch-stream-reader.rb30
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/record-batch.rb75
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/record-containable.rb38
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/record.rb60
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/rolling-window.rb48
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/scalar.rb32
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/schema.rb100
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/slicer.rb355
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/sort-key.rb193
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/sort-options.rb109
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/source-node-options.rb32
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/sparse-union-data-type.rb90
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/string-dictionary-array-builder.rb27
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/struct-array-builder.rb146
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/struct-array.rb68
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/struct-data-type.rb128
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/symbol-values-appendable.rb34
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/table-concatenate-options.rb36
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/table-formatter.rb190
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/table-list-formatter.rb41
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/table-loader.rb225
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/table-saver.rb195
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/table-table-formatter.rb49
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/table.rb519
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/tensor.rb24
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/time.rb159
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/time32-array-builder.rb49
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/time32-array.rb28
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/time32-data-type.rb61
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/time64-array-builder.rb49
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/time64-array.rb28
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/time64-data-type.rb61
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/timestamp-array-builder.rb65
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/timestamp-array.rb42
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/timestamp-data-type.rb57
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/version.rb26
-rw-r--r--src/arrow/ruby/red-arrow/lib/arrow/writable.rb22
95 files changed, 7680 insertions, 0 deletions
diff --git a/src/arrow/ruby/red-arrow/lib/arrow.rb b/src/arrow/ruby/red-arrow/lib/arrow.rb
new file mode 100644
index 000000000..8fbc537bc
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow.rb
@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require "extpp/setup"
+require "gio2"
+
+require "arrow/version"
+
+require "arrow/loader"
+
+module Arrow
+ class Error < StandardError
+ end
+
+ Loader.load
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/aggregate-node-options.rb b/src/arrow/ruby/red-arrow/lib/arrow/aggregate-node-options.rb
new file mode 100644
index 000000000..f3a6ace58
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/aggregate-node-options.rb
@@ -0,0 +1,35 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class AggregateNodeOptions
+ class << self
+ # @api private
+ def try_convert(value)
+ case value
+ when Hash
+ aggregations = value[:aggregations]
+ return nil if aggregations.nil?
+ keys = value[:keys]
+ new(aggregations, keys)
+ else
+ nil
+ end
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/aggregation.rb b/src/arrow/ruby/red-arrow/lib/arrow/aggregation.rb
new file mode 100644
index 000000000..9aac8239d
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/aggregation.rb
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Aggregation
+ class << self
+ # @api private
+ def try_convert(value)
+ case value
+ when Hash
+ function = value[:function]
+ return nil if function.nil?
+ function = function.to_s if function.is_a?(Symbol)
+ return nil unless function.is_a?(String)
+ # TODO: Improve this when we have non hash based aggregate function
+ function = "hash_#{function}" unless function.start_with?("hash_")
+ options = value[:options]
+ input = value[:input]
+ return nil if input.nil?
+ output = value[:output]
+ if output.nil?
+ normalized_function = function.gsub(/\Ahash_/, "")
+ output = "#{normalized_function}(#{input})"
+ end
+ new(function, options, input, output)
+ else
+ nil
+ end
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/array-builder.rb
new file mode 100644
index 000000000..651aed962
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/array-builder.rb
@@ -0,0 +1,214 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require "date"
+
+module Arrow
+ class ArrayBuilder
+ class << self
+ def build(values)
+ if self != ArrayBuilder
+ builder = new
+ return builder.build(values)
+ end
+
+ builder_info = nil
+ values.each do |value|
+ builder_info = detect_builder_info(value, builder_info)
+ break if builder_info and builder_info[:detected]
+ end
+ if builder_info
+ builder = builder_info[:builder]
+ builder.build(values)
+ else
+ Arrow::StringArray.new(values)
+ end
+ end
+
+ def buildable?(args)
+ args.size == method(:build).arity
+ end
+
+ private
+ def detect_builder_info(value, builder_info)
+ case value
+ when nil
+ builder_info
+ when true, false
+ {
+ builder: BooleanArrayBuilder.new,
+ detected: true,
+ }
+ when String
+ {
+ builder: StringArrayBuilder.new,
+ detected: true,
+ }
+ when Symbol
+ {
+ builder: StringDictionaryArrayBuilder.new,
+ detected: true,
+ }
+ when Float
+ {
+ builder: DoubleArrayBuilder.new,
+ detected: true,
+ }
+ when Integer
+ if value < 0
+ {
+ builder: IntArrayBuilder.new,
+ detected: true,
+ }
+ else
+ {
+ builder: UIntArrayBuilder.new,
+ }
+ end
+ when Time
+ data_type = value.data_type
+ case data_type.unit
+ when TimeUnit::SECOND
+ builder_info || {
+ builder: Time32ArrayBuilder.new(data_type)
+ }
+ when TimeUnit::MILLI
+ if builder_info and builder_info[:builder].is_a?(Time64ArrayBuilder)
+ builder_info
+ else
+ {
+ builder: Time32ArrayBuilder.new(data_type),
+ }
+ end
+ when TimeUnit::MICRO
+ {
+ builder: Time64ArrayBuilder.new(data_type),
+ }
+ when TimeUnit::NANO
+ {
+ builder: Time64ArrayBuilder.new(data_type),
+ detected: true
+ }
+ end
+ when ::Time
+ data_type = TimestampDataType.new(:nano)
+ {
+ builder: TimestampArrayBuilder.new(data_type),
+ detected: true,
+ }
+ when DateTime
+ {
+ builder: Date64ArrayBuilder.new,
+ detected: true,
+ }
+ when Date
+ {
+ builder: Date32ArrayBuilder.new,
+ detected: true,
+ }
+ when BigDecimal
+ if value.to_arrow.is_a?(Decimal128)
+ {
+ builder: Decimal128ArrayBuilder.new,
+ }
+ else
+ {
+ builder: Decimal256ArrayBuilder.new,
+ detected: true,
+ }
+ end
+ when ::Array
+ sub_builder_info = nil
+ value.each do |sub_value|
+ sub_builder_info = detect_builder_info(sub_value, sub_builder_info)
+ break if sub_builder_info and sub_builder_info[:detected]
+ end
+ if sub_builder_info and sub_builder_info[:detected]
+ sub_value_data_type = sub_builder_info[:builder].value_data_type
+ field = Field.new("item", sub_value_data_type)
+ {
+ builder: ListArrayBuilder.new(ListDataType.new(field)),
+ detected: true,
+ }
+ else
+ builder_info
+ end
+ else
+ {
+ builder: StringArrayBuilder.new,
+ detected: true,
+ }
+ end
+ end
+ end
+
+ def build(values)
+ append(*values)
+ finish
+ end
+
+ # @since 0.12.0
+ def append(*values)
+ value_convertable = respond_to?(:convert_to_arrow_value, true)
+ start_index = 0
+ current_index = 0
+ status = :value
+
+ values.each do |value|
+ if value.nil?
+ if status == :value
+ if start_index != current_index
+ target_values = values[start_index...current_index]
+ if value_convertable
+ target_values = target_values.collect do |v|
+ convert_to_arrow_value(v)
+ end
+ end
+ append_values(target_values, nil)
+ start_index = current_index
+ end
+ status = :null
+ end
+ else
+ if status == :null
+ append_nulls(current_index - start_index)
+ start_index = current_index
+ status = :value
+ end
+ end
+ current_index += 1
+ end
+ if start_index != current_index
+ if status == :value
+ if start_index == 0 and current_index == values.size
+ target_values = values
+ else
+ target_values = values[start_index...current_index]
+ end
+ if value_convertable
+ target_values = target_values.collect do |v|
+ convert_to_arrow_value(v)
+ end
+ end
+ append_values(target_values, nil)
+ else
+ append_nulls(current_index - start_index)
+ end
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/array.rb b/src/arrow/ruby/red-arrow/lib/arrow/array.rb
new file mode 100644
index 000000000..c6c0daaec
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/array.rb
@@ -0,0 +1,234 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Array
+ include Enumerable
+ include GenericFilterable
+ include GenericTakeable
+
+ class << self
+ def new(*args)
+ _builder_class = builder_class
+ return super if _builder_class.nil?
+ return super unless _builder_class.buildable?(args)
+ _builder_class.build(*args)
+ end
+
+ def builder_class
+ builder_class_name = "#{name}Builder"
+ return nil unless const_defined?(builder_class_name)
+ const_get(builder_class_name)
+ end
+ end
+
+ # @param i [Integer]
+ # The index of the value to be gotten.
+ #
+ # You can specify negative index like for `::Array#[]`.
+ #
+ # @return [Object, nil]
+ # The `i`-th value.
+ #
+ # `nil` for NULL value or out of range `i`.
+ def [](i)
+ i += length if i < 0
+ return nil if i < 0 or i >= length
+ if null?(i)
+ nil
+ else
+ get_value(i)
+ end
+ end
+
+ # @param other [Arrow::Array] The array to be compared.
+ # @param options [Arrow::EqualOptions, Hash] (nil)
+ # The options to custom how to compare.
+ #
+ # @return [Boolean]
+ # `true` if both of them have the same data, `false` otherwise.
+ #
+ # @since 5.0.0
+ def equal_array?(other, options=nil)
+ equal_options(other, options)
+ end
+
+ def each
+ return to_enum(__method__) unless block_given?
+
+ length.times do |i|
+ yield(self[i])
+ end
+ end
+
+ def reverse_each
+ return to_enum(__method__) unless block_given?
+
+ (length - 1).downto(0) do |i|
+ yield(self[i])
+ end
+ end
+
+ def to_arrow
+ self
+ end
+
+ alias_method :value_data_type_raw, :value_data_type
+ def value_data_type
+ @value_data_type ||= value_data_type_raw
+ end
+
+ def to_a
+ values
+ end
+
+ alias_method :is_in_raw, :is_in
+ def is_in(values)
+ case values
+ when ::Array
+ if self.class.builder_class.buildable?([values])
+ values = self.class.new(values)
+ else
+ values = self.class.new(value_data_type, values)
+ end
+ is_in_raw(values)
+ when ChunkedArray
+ is_in_chunked_array(values)
+ else
+ is_in_raw(values)
+ end
+ end
+
+ # @api private
+ alias_method :concatenate_raw, :concatenate
+ # Concatenates the given other arrays to the array.
+ #
+ # @param other_arrays [::Array, Arrow::Array] The arrays to be
+ # concatenated.
+ #
+ # Each other array is processed by {#resolve} before they're
+ # concatenated.
+ #
+ # @example Raw Ruby Array
+ # array = Arrow::Int32Array.new([1])
+ # array.concatenate([2, 3], [4]) # => Arrow::Int32Array.new([1, 2, 3, 4])
+ #
+ # @example Arrow::Array
+ # array = Arrow::Int32Array.new([1])
+ # array.concatenate(Arrow::Int32Array.new([2, 3]),
+ # Arrow::Int8Array.new([4])) # => Arrow::Int32Array.new([1, 2, 3, 4])
+ #
+ # @since 4.0.0
+ def concatenate(*other_arrays)
+ other_arrays = other_arrays.collect do |other_array|
+ resolve(other_array)
+ end
+ concatenate_raw(other_arrays)
+ end
+
+ # Concatenates the given other array to the array.
+ #
+ # If you have multiple arrays to be concatenated, you should use
+ # {#concatenate} to concatenate multiple arrays at once.
+ #
+ # @param other_array [::Array, Arrow::Array] The array to be concatenated.
+ #
+ # `@other_array` is processed by {#resolve} before it's
+ # concatenated.
+ #
+ # @example Raw Ruby Array
+ # Arrow::Int32Array.new([1]) + [2, 3] # => Arrow::Int32Array.new([1, 2, 3])
+ #
+ # @example Arrow::Array
+ # Arrow::Int32Array.new([1]) +
+ # Arrow::Int32Array.new([2, 3]) # => Arrow::Int32Array.new([1, 2, 3])
+ #
+ # @since 4.0.0
+ def +(other_array)
+ concatenate(other_array)
+ end
+
+ # Ensures returning the same data type array from the given array.
+ #
+ # @return [Arrow::Array]
+ #
+ # @overload resolve(other_raw_array)
+ #
+ # @param other_raw_array [::Array] A raw Ruby Array. A new Arrow::Array
+ # is built by `self.class.new`.
+ #
+ # @example Raw Ruby Array
+ # int32_array = Arrow::Int32Array.new([1])
+ # other_array = int32_array.resolve([2, 3, 4])
+ # other_array # => Arrow::Int32Array.new([2, 3, 4])
+ #
+ # @overload resolve(other_array)
+ #
+ # @param other_array [Arrow::Array] Another Arrow::Array.
+ #
+ # If the given other array is an same data type array of
+ # `self`, the given other array is returned as-is.
+ #
+ # If the given other array isn't an same data type array of
+ # `self`, the given other array is casted.
+ #
+ # @example Same data type
+ # int32_array = Arrow::Int32Array.new([1])
+ # other_int32_array = Arrow::Int32Array.new([2, 3, 4])
+ # other_array = int32_array.resolve(other_int32_array)
+ # other_array.object_id == other_int32_array.object_id
+ #
+ # @example Other data type
+ # int32_array = Arrow::Int32Array.new([1])
+ # other_int8_array = Arrow::Int8Array.new([2, 3, 4])
+ # other_array = int32_array.resolve(other_int32_array)
+ # other_array #=> Arrow::Int32Array.new([2, 3, 4])
+ #
+ # @since 4.0.0
+ def resolve(other_array)
+ if other_array.is_a?(::Array)
+ builder_class = self.class.builder_class
+ if builder_class.nil?
+ message =
+ "[array][resolve] can't build #{value_data_type} array " +
+ "from raw Ruby Array"
+ raise ArgumentError, message
+ end
+ if builder_class.buildable?([other_array])
+ other_array = builder_class.build(other_array)
+ elsif builder_class.buildable?([value_data_type, other_array])
+ other_array = builder_class.build(value_data_type, other_array)
+ else
+ message =
+ "[array][resolve] need to implement " +
+ "a feature that building #{value_data_type} array " +
+ "from raw Ruby Array"
+ raise NotImpelemented, message
+ end
+ other_array
+ elsif other_array.respond_to?(:value_data_type)
+ return other_array if value_data_type == other_array.value_data_type
+ other_array.cast(value_data_type)
+ else
+ message =
+ "[array][resolve] can't build #{value_data_type} array: " +
+ "#{other_array.inspect}"
+ raise ArgumentError, message
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/bigdecimal-extension.rb b/src/arrow/ruby/red-arrow/lib/arrow/bigdecimal-extension.rb
new file mode 100644
index 000000000..338efe696
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/bigdecimal-extension.rb
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require "bigdecimal"
+
+class BigDecimal
+ def to_arrow
+ if precision <= Arrow::Decimal128DataType::MAX_PRECISION
+ Arrow::Decimal128.new(to_s)
+ else
+ Arrow::Decimal256.new(to_s)
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/binary-dictionary-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/binary-dictionary-array-builder.rb
new file mode 100644
index 000000000..6d05e2c41
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/binary-dictionary-array-builder.rb
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class BinaryDictionaryArrayBuilder
+ include SymbolValuesAppendable
+
+ private
+ def create_values_array_builder
+ BinaryArrayBuilder.new
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/block-closable.rb b/src/arrow/ruby/red-arrow/lib/arrow/block-closable.rb
new file mode 100644
index 000000000..ec236bd15
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/block-closable.rb
@@ -0,0 +1,35 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ module BlockClosable
+ def open(*args, &block)
+ io = new(*args)
+ return io unless block
+
+ begin
+ yield(io)
+ ensure
+ if io.respond_to?(:closed?)
+ io.close unless io.closed?
+ else
+ io.close
+ end
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/buffer.rb b/src/arrow/ruby/red-arrow/lib/arrow/buffer.rb
new file mode 100644
index 000000000..9f3a3f61b
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/buffer.rb
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Buffer
+ class << self
+ # @api private
+ def try_convert(value)
+ case value
+ when String
+ new(value)
+ else
+ nil
+ end
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/chunked-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/chunked-array.rb
new file mode 100644
index 000000000..30dffa856
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/chunked-array.rb
@@ -0,0 +1,91 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class ChunkedArray
+ include Enumerable
+ include GenericFilterable
+ include GenericTakeable
+
+ alias_method :size, :n_rows
+ unless method_defined?(:length)
+ alias_method :length, :n_rows
+ end
+
+ alias_method :chunks_raw, :chunks
+ def chunks
+ @chunks ||= chunks_raw
+ end
+
+ def null?(i)
+ chunks.each do |array|
+ return array.null?(i) if i < array.length
+ i -= array.length
+ end
+ nil
+ end
+
+ def valid?(i)
+ chunks.each do |array|
+ return array.valid?(i) if i < array.length
+ i -= array.length
+ end
+ nil
+ end
+
+ def [](i)
+ i += length if i < 0
+ chunks.each do |array|
+ return array[i] if i < array.length
+ i -= array.length
+ end
+ nil
+ end
+
+ def each(&block)
+ return to_enum(__method__) unless block_given?
+
+ chunks.each do |array|
+ array.each(&block)
+ end
+ end
+
+ def reverse_each(&block)
+ return to_enum(__method__) unless block_given?
+
+ chunks.reverse_each do |array|
+ array.reverse_each(&block)
+ end
+ end
+
+ def each_chunk(&block)
+ chunks.each(&block)
+ end
+
+ def pack
+ first_chunk = chunks.first
+ data_type = first_chunk.value_data_type
+ case data_type
+ when TimestampDataType
+ builder = TimestampArrayBuilder.new(data_type)
+ builder.build(to_a)
+ else
+ first_chunk.class.new(to_a)
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/column-containable.rb b/src/arrow/ruby/red-arrow/lib/arrow/column-containable.rb
new file mode 100644
index 000000000..7d7de66bd
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/column-containable.rb
@@ -0,0 +1,147 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ module ColumnContainable
+ def columns
+ @columns ||= schema.n_fields.times.collect do |i|
+ Column.new(self, i)
+ end
+ end
+
+ def each_column(&block)
+ columns.each(&block)
+ end
+
+ # @overload [](name)
+ # Find a column that has the given name.
+ #
+ # @param name [String, Symbol] The column name to be found.
+ # @return [Column] The found column.
+ #
+ # @overload [](index)
+ # Find the `index`-th column.
+ #
+ # @param index [Integer] The index to be found.
+ # @return [Column] The found column.
+ def find_column(name_or_index)
+ case name_or_index
+ when String, Symbol
+ name = name_or_index.to_s
+ index = schema.get_field_index(name)
+ return nil if index == -1
+ Column.new(self, index)
+ when Integer
+ index = name_or_index
+ index += n_columns if index < 0
+ return nil if index < 0 or index >= n_columns
+ Column.new(self, index)
+ else
+ message = "column name or index must be String, Symbol or Integer: "
+ message << name_or_index.inspect
+ raise ArgumentError, message
+ end
+ end
+
+ # Selects columns that are selected by `selectors` and/or `block`
+ # and creates a new container only with the selected columns.
+ #
+ # @param selectors [Array<String, Symbol, Integer, Range>]
+ # If a selector is `String`, `Symbol` or `Integer`, the selector
+ # selects a column by {#find_column}.
+ #
+ # If a selector is `Range`, the selector selects columns by `::Array#[]`.
+ # @yield [column] Gives a column to the block to select columns.
+ # This uses `::Array#select`.
+ # @yieldparam column [Column] A target column.
+ # @yieldreturn [Boolean] Whether the given column is selected or not.
+ # @return [self.class] The newly created container that only has selected
+ # columns.
+ def select_columns(*selectors, &block)
+ if selectors.empty?
+ return to_enum(__method__) unless block_given?
+ selected_columns = columns.select(&block)
+ else
+ selected_columns = []
+ selectors.each do |selector|
+ case selector
+ when Range
+ selected_columns.concat(columns[selector])
+ else
+ column = find_column(selector)
+ if column.nil?
+ case selector
+ when String, Symbol
+ message = "unknown column: #{selector.inspect}: #{inspect}"
+ raise KeyError.new(message)
+ else
+ message = "out of index (0..#{n_columns - 1}): "
+ message << "#{selector.inspect}: #{inspect}"
+ raise IndexError.new(message)
+ end
+ end
+ selected_columns << column
+ end
+ end
+ selected_columns = selected_columns.select(&block) if block_given?
+ end
+ self.class.new(selected_columns)
+ end
+
+ # @overload [](name)
+ # Find a column that has the given name.
+ #
+ # @param name [String, Symbol] The column name to be found.
+ # @return [Column] The found column.
+ # @see #find_column
+ #
+ # @overload [](index)
+ # Find the `index`-th column.
+ #
+ # @param index [Integer] The index to be found.
+ # @return [Column] The found column.
+ # @see #find_column
+ #
+ # @overload [](range)
+ # Selects columns that are in `range` and creates a new container
+ # only with the selected columns.
+ #
+ # @param range [Range] The range to be selected.
+ # @return [self.class] The newly created container that only has selected
+ # columns.
+ # @see #select_columns
+ #
+ # @overload [](selectors)
+ # Selects columns that are selected by `selectors` and creates a
+ # new container only with the selected columns.
+ #
+ # @param selectors [Array] The selectors that are used to select columns.
+ # @return [self.class] The newly created container that only has selected
+ # columns.
+ # @see #select_columns
+ def [](selector)
+ case selector
+ when ::Array
+ select_columns(*selector)
+ when Range
+ select_columns(selector)
+ else
+ find_column(selector)
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/column.rb b/src/arrow/ruby/red-arrow/lib/arrow/column.rb
new file mode 100644
index 000000000..06f3dbdc0
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/column.rb
@@ -0,0 +1,76 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Column
+ include Enumerable
+
+ attr_reader :container
+ attr_reader :field
+ attr_reader :data
+ def initialize(container, index)
+ @container = container
+ @index = index
+ @field = @container.schema[@index]
+ @data = @container.get_column_data(@index)
+ end
+
+ def name
+ @field.name
+ end
+
+ def data_type
+ @field.data_type
+ end
+
+ def null?(i)
+ @data.null?(i)
+ end
+
+ def valid?(i)
+ @data.valid?(i)
+ end
+
+ def [](i)
+ @data[i]
+ end
+
+ def each(&block)
+ @data.each(&block)
+ end
+
+ def reverse_each(&block)
+ @data.reverse_each(&block)
+ end
+
+ def n_rows
+ @data.n_rows
+ end
+ alias_method :size, :n_rows
+ alias_method :length, :n_rows
+
+ def n_nulls
+ @data.n_nulls
+ end
+
+ def ==(other)
+ other.is_a?(self.class) and
+ @field == other.field and
+ @data == other.data
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/compression-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/compression-type.rb
new file mode 100644
index 000000000..b913e48ff
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/compression-type.rb
@@ -0,0 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class CompressionType
+ EXTENSIONS = {}
+ values.each do |value|
+ case value
+ when UNCOMPRESSED
+ when GZIP
+ EXTENSIONS["gz"] = value
+ else
+ EXTENSIONS[value.nick] = value
+ end
+ end
+
+ class << self
+ def resolve_extension(extension)
+ EXTENSIONS[extension.to_s]
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/constructor-arguments-gc-guardable.rb b/src/arrow/ruby/red-arrow/lib/arrow/constructor-arguments-gc-guardable.rb
new file mode 100644
index 000000000..16669be93
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/constructor-arguments-gc-guardable.rb
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ module ConstructorArgumentsGCGuardable
+ def initialize(*args)
+ super
+ @arguments = args
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/csv-loader.rb b/src/arrow/ruby/red-arrow/lib/arrow/csv-loader.rb
new file mode 100644
index 000000000..f82263e46
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/csv-loader.rb
@@ -0,0 +1,384 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require "csv"
+require "pathname"
+require "time"
+
+module Arrow
+ class CSVLoader
+ class << self
+ def load(path_or_data, **options)
+ new(path_or_data, **options).load
+ end
+ end
+
+ def initialize(path_or_data, **options)
+ @path_or_data = path_or_data
+ @options = options
+ if @options.key?(:delimiter)
+ @options[:col_sep] = @options.delete(:delimiter)
+ end
+ @compression = @options.delete(:compression)
+ end
+
+ def load
+ case @path_or_data
+ when Pathname
+ load_from_path(@path_or_data.to_path)
+ when /\A.+\.csv\z/i
+ load_from_path(@path_or_data)
+ else
+ load_data(@path_or_data)
+ end
+ end
+
+ private
+ def open_csv(path, **options)
+ CSV.open(path, **options) do |csv|
+ yield(csv)
+ end
+ end
+
+ def parse_csv_data(data, **options)
+ csv = CSV.new(data, **options)
+ begin
+ yield(csv)
+ ensure
+ csv.close
+ end
+ end
+
+ def read_csv(csv)
+ values_set = []
+ csv.each do |row|
+ if row.is_a?(CSV::Row)
+ row = row.collect(&:last)
+ end
+ row.each_with_index do |value, i|
+ values = (values_set[i] ||= [])
+ values << value
+ end
+ end
+ return nil if values_set.empty?
+
+ arrays = values_set.collect.with_index do |values, i|
+ ArrayBuilder.build(values)
+ end
+ if csv.headers
+ names = csv.headers
+ else
+ names = arrays.size.times.collect(&:to_s)
+ end
+ raw_table = {}
+ names.each_with_index do |name, i|
+ raw_table[name] = arrays[i]
+ end
+ Table.new(raw_table)
+ end
+
+ def reader_options
+ options = CSVReadOptions.new
+ @options.each do |key, value|
+ case key
+ when :headers
+ case value
+ when ::Array
+ options.column_names = value
+ when String
+ return nil
+ else
+ if value
+ options.generate_column_names = false
+ else
+ options.generate_column_names = true
+ end
+ end
+ when :column_types
+ value.each do |name, type|
+ options.add_column_type(name, type)
+ end
+ when :schema
+ options.add_schema(value)
+ when :encoding
+ # process encoding on opening input
+ when :col_sep
+ options.delimiter = value
+ else
+ setter = "#{key}="
+ if options.respond_to?(setter)
+ options.__send__(setter, value)
+ else
+ return nil
+ end
+ end
+ end
+ options
+ end
+
+ def open_decompress_input(raw_input)
+ if @compression
+ codec = Codec.new(@compression)
+ CompressedInputStream.open(codec, raw_input) do |input|
+ yield(input)
+ end
+ else
+ yield(raw_input)
+ end
+ end
+
+ def open_encoding_convert_stream(raw_input, &block)
+ encoding = @options[:encoding]
+ if encoding
+ converter = Gio::CharsetConverter.new("UTF-8", encoding)
+ convert_input_stream =
+ Gio::ConverterInputStream.new(raw_input, converter)
+ GIOInputStream.open(convert_input_stream, &block)
+ else
+ yield(raw_input)
+ end
+ end
+
+ def wrap_input(raw_input)
+ open_decompress_input(raw_input) do |input_|
+ open_encoding_convert_stream(input_) do |input__|
+ yield(input__)
+ end
+ end
+ end
+
+ def load_from_path(path)
+ options = reader_options
+ if options
+ begin
+ MemoryMappedInputStream.open(path) do |raw_input|
+ wrap_input(raw_input) do |input|
+ return CSVReader.new(input, options).read
+ end
+ end
+ rescue Arrow::Error::Invalid, Gio::Error
+ end
+ end
+
+ options = update_csv_parse_options(@options, :open_csv, path)
+ open_csv(path, **options) do |csv|
+ read_csv(csv)
+ end
+ end
+
+ def load_data(data)
+ options = reader_options
+ if options
+ begin
+ BufferInputStream.open(Buffer.new(data)) do |raw_input|
+ wrap_input(raw_input) do |input|
+ return CSVReader.new(input, options).read
+ end
+ end
+ rescue Arrow::Error::Invalid, Gio::Error
+ end
+ end
+
+ options = update_csv_parse_options(@options, :parse_csv_data, data)
+ parse_csv_data(data, **options) do |csv|
+ read_csv(csv)
+ end
+ end
+
+ def selective_converter(target_index)
+ lambda do |field, field_info|
+ if target_index.nil? or field_info.index == target_index
+ yield(field)
+ else
+ field
+ end
+ end
+ end
+
+ BOOLEAN_CONVERTER = lambda do |field|
+ begin
+ encoded_field = field.encode(CSV::ConverterEncoding)
+ rescue EncodingError
+ field
+ else
+ case encoded_field
+ when "true"
+ true
+ when "false"
+ false
+ else
+ field
+ end
+ end
+ end
+
+ ISO8601_CONVERTER = lambda do |field|
+ begin
+ encoded_field = field.encode(CSV::ConverterEncoding)
+ rescue EncodingError
+ field
+ else
+ begin
+ ::Time.iso8601(encoded_field)
+ rescue ArgumentError
+ field
+ end
+ end
+ end
+
+ AVAILABLE_CSV_PARSE_OPTIONS = {}
+ CSV.instance_method(:initialize).parameters.each do |type, name|
+ AVAILABLE_CSV_PARSE_OPTIONS[name] = true if type == :key
+ end
+
+ def update_csv_parse_options(options, create_csv, *args)
+ if options.key?(:converters)
+ new_options = options.dup
+ else
+ converters = [:all, BOOLEAN_CONVERTER, ISO8601_CONVERTER]
+ new_options = options.merge(converters: converters)
+ end
+
+ # TODO: Support :schema and :column_types
+
+ unless AVAILABLE_CSV_PARSE_OPTIONS.empty?
+ new_options.select! do |key, value|
+ AVAILABLE_CSV_PARSE_OPTIONS.key?(key)
+ end
+ end
+
+ unless options.key?(:headers)
+ __send__(create_csv, *args, **new_options) do |csv|
+ new_options[:headers] = have_header?(csv)
+ end
+ end
+ unless options.key?(:converters)
+ __send__(create_csv, *args, **new_options) do |csv|
+ new_options[:converters] = detect_robust_converters(csv)
+ end
+ end
+
+ new_options
+ end
+
+ def have_header?(csv)
+ if @options.key?(:headers)
+ return @options[:headers]
+ end
+
+ row1 = csv.shift
+ return false if row1.nil?
+ return false if row1.any?(&:nil?)
+
+ row2 = csv.shift
+ return nil if row2.nil?
+ return true if row2.any?(&:nil?)
+
+ return false if row1.any? {|value| not value.is_a?(String)}
+
+ if row1.collect(&:class) != row2.collect(&:class)
+ return true
+ end
+
+ nil
+ end
+
+ def detect_robust_converters(csv)
+ column_types = []
+ csv.each do |row|
+ if row.is_a?(CSV::Row)
+ each_value = Enumerator.new do |yielder|
+ row.each do |_name, value|
+ yielder << value
+ end
+ end
+ else
+ each_value = row.each
+ end
+ each_value.with_index do |value, i|
+ current_column_type = column_types[i]
+ next if current_column_type == :string
+
+ candidate_type = nil
+ case value
+ when nil
+ next
+ when "true", "false", true, false
+ candidate_type = :boolean
+ when Integer
+ candidate_type = :integer
+ if current_column_type == :float
+ candidate_type = :float
+ end
+ when Float
+ candidate_type = :float
+ if current_column_type == :integer
+ column_types[i] = candidate_type
+ end
+ when ::Time
+ candidate_type = :time
+ when DateTime
+ candidate_type = :date_time
+ when Date
+ candidate_type = :date
+ when String
+ next if value.empty?
+ candidate_type = :string
+ else
+ candidate_type = :string
+ end
+
+ column_types[i] ||= candidate_type
+ if column_types[i] != candidate_type
+ column_types[i] = :string
+ end
+ end
+ end
+
+ converters = []
+ column_types.each_with_index do |type, i|
+ case type
+ when :boolean
+ converters << selective_converter(i, &BOOLEAN_CONVERTER)
+ when :integer
+ converters << selective_converter(i) do |field|
+ if field.nil? or field.empty?
+ nil
+ else
+ CSV::Converters[:integer].call(field)
+ end
+ end
+ when :float
+ converters << selective_converter(i) do |field|
+ if field.nil? or field.empty?
+ nil
+ else
+ CSV::Converters[:float].call(field)
+ end
+ end
+ when :time
+ converters << selective_converter(i, &ISO8601_CONVERTER)
+ when :date_time
+ converters << selective_converter(i, &CSV::Converters[:date_time])
+ when :date
+ converters << selective_converter(i, &CSV::Converters[:date])
+ end
+ end
+ converters
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/csv-read-options.rb b/src/arrow/ruby/red-arrow/lib/arrow/csv-read-options.rb
new file mode 100644
index 000000000..dec3dec95
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/csv-read-options.rb
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class CSVReadOptions
+ alias_method :add_column_type_raw, :add_column_type
+ def add_column_type(name, type)
+ add_column_type_raw(name, DataType.resolve(type))
+ end
+
+ alias_method :delimiter_raw, :delimiter
+ def delimiter
+ delimiter_raw.chr
+ end
+
+ alias_method :delimiter_raw=, :delimiter=
+ def delimiter=(delimiter)
+ case delimiter
+ when String
+ if delimiter.bytesize != 1
+ message = "delimiter must be 1 byte character: #{delimiter.inspect}"
+ raise ArgumentError, message
+ end
+ delimiter = delimiter.ord
+ end
+ self.delimiter_raw = delimiter
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/data-type.rb
new file mode 100644
index 000000000..07b452521
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/data-type.rb
@@ -0,0 +1,198 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class DataType
+ class << self
+ # Ensure returning suitable {Arrow::DataType}.
+ #
+ # @overload resolve(data_type)
+ #
+ # Returns the given data type itself. This is convenient to
+ # use this method as {Arrow::DataType} converter.
+ #
+ # @param data_type [Arrow::DataType] The data type.
+ #
+ # @return [Arrow::DataType] The given data type itself.
+ #
+ # @overload resolve(name)
+ #
+ # Creates a suitable data type from the given type name. For
+ # example, you can create {Arrow::BooleanDataType} from
+ # `:boolean`.
+ #
+ # @param name [String, Symbol] The type name of the data type.
+ #
+ # @return [Arrow::DataType] A new suitable data type.
+ #
+ # @example Create a boolean data type
+ # Arrow::DataType.resolve(:boolean)
+ #
+ # @overload resolve(name_with_arguments)
+ #
+ # Creates a new suitable data type from the given type name
+ # with arguments.
+ #
+ # @param name_with_arguments [::Array<String, ...>]
+ # The type name of the data type as the first element.
+ #
+ # The rest elements are additional information of the data type.
+ #
+ # For example, {Arrow::TimestampDataType} needs unit as
+ # additional information.
+ #
+ # @return [Arrow::DataType] A new suitable data type.
+ #
+ # @example Create a boolean data type
+ # Arrow::DataType.resolve([:boolean])
+ #
+ # @example Create a milliseconds unit timestamp data type
+ # Arrow::DataType.resolve([:timestamp, :milli])
+ #
+ # @overload resolve(description)
+ #
+ # Creates a new suitable data type from the given data type
+ # description.
+ #
+ # Data type description is a raw `Hash`. Data type description
+ # must have `:type` value. `:type` is the type of the data type.
+ #
+ # If the type needs additional information, you need to
+ # specify it. See constructor document what information is
+ # needed. For example, {Arrow::ListDataType#initialize} needs
+ # `:field` value.
+ #
+ # @param description [Hash] The description of the data type.
+ #
+ # @option description [String, Symbol] :type The type name of
+ # the data type.
+ #
+ # @return [Arrow::DataType] A new suitable data type.
+ #
+ # @example Create a boolean data type
+ # Arrow::DataType.resolve(type: :boolean)
+ #
+ # @example Create a list data type
+ # Arrow::DataType.resolve(type: :list,
+ # field: {name: "visible", type: :boolean})
+ def resolve(data_type)
+ case data_type
+ when DataType
+ data_type
+ when String, Symbol
+ resolve_class(data_type).new
+ when ::Array
+ type, *arguments = data_type
+ resolve_class(type).new(*arguments)
+ when Hash
+ type = nil
+ description = {}
+ data_type.each do |key, value|
+ key = key.to_sym
+ case key
+ when :type
+ type = value
+ else
+ description[key] = value
+ end
+ end
+ if type.nil?
+ message =
+ "data type description must have :type value: #{data_type.inspect}"
+ raise ArgumentError, message
+ end
+ data_type_class = resolve_class(type)
+ if description.empty?
+ data_type_class.new
+ else
+ data_type_class.new(description)
+ end
+ else
+ message =
+ "data type must be " +
+ "Arrow::DataType, String, Symbol, [String, ...], [Symbol, ...] " +
+ "{type: String, ...} or {type: Symbol, ...}: #{data_type.inspect}"
+ raise ArgumentError, message
+ end
+ end
+
+ def sub_types
+ types = {}
+ gtype.children.each do |child|
+ sub_type = child.to_class
+ types[sub_type] = true
+ sub_type.sub_types.each do |sub_sub_type|
+ types[sub_sub_type] = true
+ end
+ end
+ types.keys
+ end
+
+ def try_convert(value)
+ begin
+ resolve(value)
+ rescue ArgumentError
+ nil
+ end
+ end
+
+ private
+ def resolve_class(data_type)
+ components = data_type.to_s.split("_").collect(&:capitalize)
+ data_type_name = components.join.gsub(/\AUint/, "UInt")
+ data_type_class_name = "#{data_type_name}DataType"
+ unless Arrow.const_defined?(data_type_class_name)
+ available_types = []
+ Arrow.constants.each do |name|
+ name = name.to_s
+ next if name == "DataType"
+ next unless name.end_with?("DataType")
+ name = name.gsub(/DataType\z/, "")
+ components = name.scan(/(UInt[0-9]+|[A-Z][a-z\d]+)/).flatten
+ available_types << components.collect(&:downcase).join("_").to_sym
+ end
+ message =
+ "unknown type: <#{data_type.inspect}>: " +
+ "available types: #{available_types.inspect}"
+ raise ArgumentError, message
+ end
+ data_type_class = Arrow.const_get(data_type_class_name)
+ if data_type_class.gtype.abstract?
+ not_abstract_types = data_type_class.sub_types.find_all do |sub_type|
+ not sub_type.gtype.abstract?
+ end
+ not_abstract_types = not_abstract_types.sort_by do |type|
+ type.name
+ end
+ message =
+ "abstract type: <#{data_type.inspect}>: " +
+ "use one of not abstract type: #{not_abstract_types.inspect}"
+ raise ArgumentError, message
+ end
+ data_type_class
+ end
+ end
+
+ def build_array(values)
+ base_name = self.class.name.gsub(/DataType\z/, "")
+ builder_class = self.class.const_get("#{base_name}ArrayBuilder")
+ args = [values]
+ args.unshift(self) unless builder_class.buildable?(args)
+ builder_class.build(*args)
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/date32-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/date32-array-builder.rb
new file mode 100644
index 000000000..dedbba85e
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/date32-array-builder.rb
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Date32ArrayBuilder
+ private
+ UNIX_EPOCH = Date.new(1970, 1, 1)
+ def convert_to_arrow_value(value)
+ value = value.to_date if value.respond_to?(:to_date)
+
+ if value.is_a?(Date)
+ (value - UNIX_EPOCH).to_i
+ else
+ value
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/date32-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/date32-array.rb
new file mode 100644
index 000000000..121dbcb55
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/date32-array.rb
@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Date32Array
+ def get_value(i)
+ to_date(get_raw_value(i))
+ end
+
+ private
+ UNIX_EPOCH = 2440588
+ def to_date(raw_value)
+ Date.jd(UNIX_EPOCH + raw_value)
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/date64-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/date64-array-builder.rb
new file mode 100644
index 000000000..658118122
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/date64-array-builder.rb
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Date64ArrayBuilder
+ private
+ def convert_to_arrow_value(value)
+ if value.respond_to?(:to_time) and not value.is_a?(::Time)
+ value = value.to_time
+ end
+
+ if value.is_a?(::Time)
+ value.to_i * 1_000 + value.usec / 1_000
+ else
+ value
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/date64-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/date64-array.rb
new file mode 100644
index 000000000..9b8a92476
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/date64-array.rb
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Date64Array
+ def get_value(i)
+ to_datetime(get_raw_value(i))
+ end
+
+ private
+ def to_datetime(raw_value)
+ ::Time.at(*raw_value.divmod(1_000)).to_datetime
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/datum.rb b/src/arrow/ruby/red-arrow/lib/arrow/datum.rb
new file mode 100644
index 000000000..196a18f54
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/datum.rb
@@ -0,0 +1,100 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Datum
+ class << self
+ # @api private
+ def try_convert(value)
+ case value
+ when Table
+ TableDatum.new(value)
+ when Array
+ ArrayDatum.new(value)
+ when ChunkedArray
+ ChunkedArrayDatum.new(value)
+ when Scalar
+ ScalarDatum.new(value)
+ when ::Array
+ ArrayDatum.new(ArrayBuilder.build(value))
+ when Integer
+ case value
+ when (0..((2 ** 8) - 1))
+ try_convert(UInt8Scalar.new(value))
+ when ((-(2 ** 7))..((2 ** 7) - 1))
+ try_convert(Int8Scalar.new(value))
+ when (0..((2 ** 16) - 1))
+ try_convert(UInt16Scalar.new(value))
+ when ((-(2 ** 15))..((2 ** 15) - 1))
+ try_convert(Int16Scalar.new(value))
+ when (0..((2 ** 32) - 1))
+ try_convert(UInt32Scalar.new(value))
+ when ((-(2 ** 31))..((2 ** 31) - 1))
+ try_convert(Int32Scalar.new(value))
+ when (0..((2 ** 64) - 1))
+ try_convert(UInt64Scalar.new(value))
+ when ((-(2 ** 63))..((2 ** 63) - 1))
+ try_convert(Int64Scalar.new(value))
+ else
+ nil
+ end
+ when Float
+ try_convert(DoubleScalar.new(value))
+ when true, false
+ try_convert(BooleanScalar.new(value))
+ when String
+ if value.ascii_only? or value.encoding == Encoding::UTF_8
+ if value.bytesize <= ((2 ** 31) - 1)
+ try_convert(StringScalar.new(value))
+ else
+ try_convert(LargeStringScalar.new(value))
+ end
+ else
+ if value.bytesize <= ((2 ** 31) - 1)
+ try_convert(BinaryScalar.new(value))
+ else
+ try_convert(LargeBinaryScalar.new(value))
+ end
+ end
+ when Date
+ date32_value = (value - Date32ArrayBuilder::UNIX_EPOCH).to_i
+ try_convert(Date32Scalar.new(date32_value))
+ when Time
+ case value.unit
+ when TimeUnit::SECOND, TimeUnit::MILLI
+ data_type = Time32DataType.new(value.unit)
+ scalar_class = Time32Scalar
+ else
+ data_type = Time64DataType.new(value.unit)
+ scalar_class = Time64Scalar
+ end
+ try_convert(scalar_class.new(data_type, value.value))
+ when ::Time
+ data_type = TimestampDataType.new(:nano)
+ timestamp_value = value.to_i * 1_000_000_000 + value.nsec
+ try_convert(TimestampScalar.new(data_type, timestamp_value))
+ when Decimal128
+ data_type = TimestampDataType.new(:nano)
+ timestamp_value = value.to_i * 1_000_000_000 + value.nsec
+ try_convert(Decimal128Scalar.new(data_type, timestamp_value))
+ else
+ nil
+ end
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/decimal128-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/decimal128-array-builder.rb
new file mode 100644
index 000000000..d380ce070
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/decimal128-array-builder.rb
@@ -0,0 +1,58 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Decimal128ArrayBuilder
+ class << self
+ def build(data_type, values)
+ builder = new(data_type)
+ builder.build(values)
+ end
+ end
+
+ alias_method :append_value_raw, :append_value
+ def append_value(value)
+ append_value_raw(normalize_value(value))
+ end
+
+ alias_method :append_values_raw, :append_values
+ def append_values(values, is_valids=nil)
+ if values.is_a?(::Array)
+ values = values.collect do |value|
+ normalize_value(value)
+ end
+ append_values_raw(values, is_valids)
+ else
+ append_values_packed(values, is_valids)
+ end
+ end
+
+ private
+ def normalize_value(value)
+ case value
+ when String
+ Decimal128.new(value)
+ when Float
+ Decimal128.new(value.to_s)
+ when BigDecimal
+ Decimal128.new(value.to_s)
+ else
+ value
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/decimal128-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/decimal128-array.rb
new file mode 100644
index 000000000..a5ee53be7
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/decimal128-array.rb
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Decimal128Array
+ def get_value(i)
+ BigDecimal(format_value(i))
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/decimal128-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/decimal128-data-type.rb
new file mode 100644
index 000000000..4b5583896
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/decimal128-data-type.rb
@@ -0,0 +1,71 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Decimal128DataType
+ MAX_PRECISION = max_precision
+
+ alias_method :initialize_raw, :initialize
+ private :initialize_raw
+
+ # Creates a new {Arrow::Decimal128DataType}.
+ #
+ # @overload initialize(precision, scale)
+ #
+ # @param precision [Integer] The precision of the decimal data
+ # type. It's the number of digits including the number of
+ # digits after the decimal point.
+ #
+ # @param scale [Integer] The scale of the decimal data
+ # type. It's the number of digits after the decimal point.
+ #
+ # @example Create a decimal data type for "XXXXXX.YY" decimal
+ # Arrow::Decimal128DataType.new(8, 2)
+ #
+ # @overload initialize(description)
+ #
+ # @param description [Hash] The description of the decimal data
+ # type. It must have `:precision` and `:scale` values.
+ #
+ # @option description [Integer] :precision The precision of the
+ # decimal data type. It's the number of digits including the
+ # number of digits after the decimal point.
+ #
+ # @option description [Integer] :scale The scale of the decimal
+ # data type. It's the number of digits after the decimal
+ # point.
+ #
+ # @example Create a decimal data type for "XXXXXX.YY" decimal
+ # Arrow::Decimal128DataType.new(precision: 8,
+ # scale: 2)
+ def initialize(*args)
+ n_args = args.size
+ case n_args
+ when 1
+ description = args[0]
+ precision = description[:precision]
+ scale = description[:scale]
+ when 2
+ precision, scale = args
+ else
+ message = "wrong number of arguments (given, #{n_args}, expected 1..2)"
+ raise ArgumentError, message
+ end
+ initialize_raw(precision, scale)
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/decimal128.rb b/src/arrow/ruby/red-arrow/lib/arrow/decimal128.rb
new file mode 100644
index 000000000..bf853ae7f
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/decimal128.rb
@@ -0,0 +1,60 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Decimal128
+ alias_method :to_s_raw, :to_s
+
+ # @overload to_s
+ #
+ # @return [String]
+ # The string representation of the decimal.
+ #
+ # @overload to_s(scale)
+ #
+ # @param scale [Integer] The scale of the decimal.
+ # @return [String]
+ # The string representation of the decimal including the scale.
+ #
+ # @since 0.13.0
+ def to_s(scale=nil)
+ if scale
+ to_string_scale(scale)
+ else
+ to_s_raw
+ end
+ end
+
+ alias_method :abs!, :abs
+
+ # @since 3.0.0
+ def abs
+ copied = dup
+ copied.abs!
+ copied
+ end
+
+ alias_method :negate!, :negate
+
+ # @since 3.0.0
+ def negate
+ copied = dup
+ copied.negate!
+ copied
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/decimal256-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/decimal256-array-builder.rb
new file mode 100644
index 000000000..fb89ff00b
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/decimal256-array-builder.rb
@@ -0,0 +1,61 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Decimal256ArrayBuilder
+ class << self
+ # @since 3.0.0
+ def build(data_type, values)
+ builder = new(data_type)
+ builder.build(values)
+ end
+ end
+
+ alias_method :append_value_raw, :append_value
+ # @since 3.0.0
+ def append_value(value)
+ append_value_raw(normalize_value(value))
+ end
+
+ alias_method :append_values_raw, :append_values
+ # @since 3.0.0
+ def append_values(values, is_valids=nil)
+ if values.is_a?(::Array)
+ values = values.collect do |value|
+ normalize_value(value)
+ end
+ append_values_raw(values, is_valids)
+ else
+ append_values_packed(values, is_valids)
+ end
+ end
+
+ private
+ def normalize_value(value)
+ case value
+ when String
+ Decimal256.new(value)
+ when Float
+ Decimal256.new(value.to_s)
+ when BigDecimal
+ Decimal256.new(value.to_s)
+ else
+ value
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/decimal256-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/decimal256-array.rb
new file mode 100644
index 000000000..8c2306dfe
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/decimal256-array.rb
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Decimal256Array
+ # @since 3.0.0
+ def get_value(i)
+ BigDecimal(format_value(i))
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/decimal256-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/decimal256-data-type.rb
new file mode 100644
index 000000000..8264e388e
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/decimal256-data-type.rb
@@ -0,0 +1,73 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Decimal256DataType
+ MAX_PRECISION = max_precision
+
+ alias_method :initialize_raw, :initialize
+ private :initialize_raw
+
+ # Creates a new {Arrow::Decimal256DataType}.
+ #
+ # @overload initialize(precision, scale)
+ #
+ # @param precision [Integer] The precision of the decimal data
+ # type. It's the number of digits including the number of
+ # digits after the decimal point.
+ #
+ # @param scale [Integer] The scale of the decimal data
+ # type. It's the number of digits after the decimal point.
+ #
+ # @example Create a decimal data type for "XXXXXX.YY" decimal
+ # Arrow::Decimal256DataType.new(8, 2)
+ #
+ # @overload initialize(description)
+ #
+ # @param description [Hash] The description of the decimal data
+ # type. It must have `:precision` and `:scale` values.
+ #
+ # @option description [Integer] :precision The precision of the
+ # decimal data type. It's the number of digits including the
+ # number of digits after the decimal point.
+ #
+ # @option description [Integer] :scale The scale of the decimal
+ # data type. It's the number of digits after the decimal
+ # point.
+ #
+ # @example Create a decimal data type for "XXXXXX.YY" decimal
+ # Arrow::Decimal256DataType.new(precision: 8,
+ # scale: 2)
+ #
+ # @since 3.0.0
+ def initialize(*args)
+ n_args = args.size
+ case n_args
+ when 1
+ description = args[0]
+ precision = description[:precision]
+ scale = description[:scale]
+ when 2
+ precision, scale = args
+ else
+ message = "wrong number of arguments (given, #{n_args}, expected 1..2)"
+ raise ArgumentError, message
+ end
+ initialize_raw(precision, scale)
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/decimal256.rb b/src/arrow/ruby/red-arrow/lib/arrow/decimal256.rb
new file mode 100644
index 000000000..1a7097a4d
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/decimal256.rb
@@ -0,0 +1,60 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Decimal256
+ alias_method :to_s_raw, :to_s
+
+ # @overload to_s
+ #
+ # @return [String]
+ # The string representation of the decimal.
+ #
+ # @overload to_s(scale)
+ #
+ # @param scale [Integer] The scale of the decimal.
+ # @return [String]
+ # The string representation of the decimal including the scale.
+ #
+ # @since 3.0.0
+ def to_s(scale=nil)
+ if scale
+ to_string_scale(scale)
+ else
+ to_s_raw
+ end
+ end
+
+ alias_method :abs!, :abs
+
+ # @since 3.0.0
+ def abs
+ copied = dup
+ copied.abs!
+ copied
+ end
+
+ alias_method :negate!, :negate
+
+ # @since 3.0.0
+ def negate
+ copied = dup
+ copied.negate!
+ copied
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/dense-union-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/dense-union-data-type.rb
new file mode 100644
index 000000000..6d2bf5e70
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/dense-union-data-type.rb
@@ -0,0 +1,90 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class DenseUnionDataType
+ alias_method :initialize_raw, :initialize
+ private :initialize_raw
+
+ # Creates a new {Arrow::DenseUnionDataType}.
+ #
+ # @overload initialize(fields, type_codes)
+ #
+ # @param fields [::Array<Arrow::Field, Hash>] The fields of the
+ # dense union data type. You can mix {Arrow::Field} and field
+ # description in the fields.
+ #
+ # See {Arrow::Field.new} how to specify field description.
+ #
+ # @param type_codes [::Array<Integer>] The IDs that indicates
+ # corresponding fields.
+ #
+ # @example Create a dense union data type for `{2: visible, 9: count}`
+ # fields = [
+ # Arrow::Field.new("visible", :boolean),
+ # {
+ # name: "count",
+ # type: :int32,
+ # },
+ # ]
+ # Arrow::DenseUnionDataType.new(fields, [2, 9])
+ #
+ # @overload initialize(description)
+ #
+ # @param description [Hash] The description of the dense union
+ # data type. It must have `:fields` and `:type_codes` values.
+ #
+ # @option description [::Array<Arrow::Field, Hash>] :fields The
+ # fields of the dense union data type. You can mix
+ # {Arrow::Field} and field description in the fields.
+ #
+ # See {Arrow::Field.new} how to specify field description.
+ #
+ # @option description [::Array<Integer>] :type_codes The IDs
+ # that indicates corresponding fields.
+ #
+ # @example Create a dense union data type for `{2: visible, 9: count}`
+ # fields = [
+ # Arrow::Field.new("visible", :boolean),
+ # {
+ # name: "count",
+ # type: :int32,
+ # },
+ # ]
+ # Arrow::DenseUnionDataType.new(fields: fields,
+ # type_codes: [2, 9])
+ def initialize(*args)
+ n_args = args.size
+ case n_args
+ when 1
+ description = args[0]
+ fields = description[:fields]
+ type_codes = description[:type_codes]
+ when 2
+ fields, type_codes = args
+ else
+ message = "wrong number of arguments (given, #{n_args}, expected 1..2)"
+ raise ArgumentError, message
+ end
+ fields = fields.collect do |field|
+ field = Field.new(field) unless field.is_a?(Field)
+ field
+ end
+ initialize_raw(fields, type_codes)
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/dictionary-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/dictionary-array.rb
new file mode 100644
index 000000000..70591ab7c
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/dictionary-array.rb
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class DictionaryArray
+ def get_value(i)
+ dictionary[indices[i]]
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/dictionary-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/dictionary-data-type.rb
new file mode 100644
index 000000000..8396e311c
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/dictionary-data-type.rb
@@ -0,0 +1,117 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class DictionaryDataType
+ alias_method :initialize_raw, :initialize
+ private :initialize_raw
+
+ # Creates a new {Arrow::DictionaryDataType}.
+ #
+ # @overload initialize(index_data_type, value_data_type, ordered)
+ #
+ # @param index_data_type [Arrow::DataType, Hash, String, Symbol]
+ # The index data type of the dictionary data type. It must be
+ # signed integer data types. Here are available signed integer
+ # data types:
+ #
+ # * Arrow::Int8DataType
+ # * Arrow::Int16DataType
+ # * Arrow::Int32DataType
+ # * Arrow::Int64DataType
+ #
+ # You can specify data type as a description by `Hash`.
+ #
+ # See {Arrow::DataType.resolve} how to specify data type
+ # description.
+ #
+ # @param value_data_type [Arrow::DataType, Hash, String, Symbol]
+ # The value data type of the dictionary data type.
+ #
+ # You can specify data type as a description by `Hash`.
+ #
+ # See {Arrow::DataType.resolve} how to specify data type
+ # description.
+ #
+ # @param ordered [Boolean] Whether dictionary contents are
+ # ordered or not.
+ #
+ # @example Create a dictionary data type for `{0: "Hello", 1: "World"}`
+ # index_data_type = :int8
+ # value_data_type = :string
+ # ordered = true
+ # Arrow::DictionaryDataType.new(index_data_type,
+ # value_data_type,
+ # ordered)
+ #
+ # @overload initialize(description)
+ #
+ # @param description [Hash] The description of the dictionary
+ # data type. It must have `:index_data_type`, `:dictionary`
+ # and `:ordered` values.
+ #
+ # @option description [Arrow::DataType, Hash, String, Symbol]
+ # :index_data_type The index data type of the dictionary data
+ # type. It must be signed integer data types. Here are
+ # available signed integer data types:
+ #
+ # * Arrow::Int8DataType
+ # * Arrow::Int16DataType
+ # * Arrow::Int32DataType
+ # * Arrow::Int64DataType
+ #
+ # You can specify data type as a description by `Hash`.
+ #
+ # See {Arrow::DataType.resolve} how to specify data type
+ # description.
+ #
+ # @option description [Arrow::DataType, Hash, String, Symbol]
+ # :value_data_type
+ # The value data type of the dictionary data type.
+ #
+ # You can specify data type as a description by `Hash`.
+ #
+ # See {Arrow::DataType.resolve} how to specify data type
+ # description.
+ #
+ # @option description [Boolean] :ordered Whether dictionary
+ # contents are ordered or not.
+ #
+ # @example Create a dictionary data type for `{0: "Hello", 1: "World"}`
+ # Arrow::DictionaryDataType.new(index_data_type: :int8,
+ # value_data_type: :string,
+ # ordered: true)
+ def initialize(*args)
+ n_args = args.size
+ case n_args
+ when 1
+ description = args[0]
+ index_data_type = description[:index_data_type]
+ value_data_type = description[:value_data_type]
+ ordered = description[:ordered]
+ when 3
+ index_data_type, value_data_type, ordered = args
+ else
+ message = "wrong number of arguments (given, #{n_args}, expected 1 or 3)"
+ raise ArgumentError, message
+ end
+ index_data_type = DataType.resolve(index_data_type)
+ value_data_type = DataType.resolve(value_data_type)
+ initialize_raw(index_data_type, value_data_type, ordered)
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/equal-options.rb b/src/arrow/ruby/red-arrow/lib/arrow/equal-options.rb
new file mode 100644
index 000000000..4eb9964ad
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/equal-options.rb
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class EqualOptions
+ class << self
+ # @api private
+ def try_convert(value)
+ case value
+ when Hash
+ options = new
+ value.each do |k, v|
+ setter = :"#{k}="
+ return unless options.respond_to?(setter)
+ options.__send__(setter, v)
+ end
+ options
+ else
+ nil
+ end
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/expression.rb b/src/arrow/ruby/red-arrow/lib/arrow/expression.rb
new file mode 100644
index 000000000..a33cc53c2
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/expression.rb
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Expression
+ class << self
+ # @api private
+ def try_convert(value)
+ case value
+ when Symbol
+ FieldExpression.new(value.to_s)
+ when ::Array
+ function_name, *arguments = value
+ case function_name
+ when String, Symbol
+ function_name = function_name.to_s
+ else
+ return nil
+ end
+ if arguments.last.is_a?(FunctionOptions)
+ options = arguments.pop
+ else
+ options = nil
+ end
+ CallExpression.new(function_name, arguments, options)
+ else
+ datum = Datum.try_convert(value)
+ return nil if datum.nil?
+ LiteralExpression.new(datum)
+ end
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/field-containable.rb b/src/arrow/ruby/red-arrow/lib/arrow/field-containable.rb
new file mode 100644
index 000000000..e4dbf4ec2
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/field-containable.rb
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ module FieldContainable
+ def find_field(name_or_index)
+ case name_or_index
+ when String, Symbol
+ name = name_or_index
+ get_field_by_name(name)
+ when Integer
+ index = name_or_index
+ raise if index < 0
+ index += n_fields if index < 0
+ return nil if index < 0 or index >= n_fields
+ get_field(index)
+ else
+ message = "field name or index must be String, Symbol or Integer"
+ message << ": <#{name_or_index.inspect}>"
+ raise ArgumentError, message
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/field.rb b/src/arrow/ruby/red-arrow/lib/arrow/field.rb
new file mode 100644
index 000000000..e439cb960
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/field.rb
@@ -0,0 +1,118 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Field
+ alias_method :initialize_raw, :initialize
+ private :initialize_raw
+
+ # Creates a new {Arrow::Field}.
+ #
+ # @overload initialize(name, data_type)
+ #
+ # @param name [String, Symbol] The name of the field.
+ #
+ # @param data_type [Arrow::DataType, Hash, String, Symbol] The
+ # data type of the field.
+ #
+ # You can specify data type as a description by `Hash`.
+ #
+ # See {Arrow::DataType.resolve} how to specify data type
+ # description.
+ #
+ # @example Create a field with {Arrow::DataType}s
+ # Arrow::Field.new("visible", Arrow::BooleanDataType.new)
+ #
+ # @example Create a field with data type description
+ # Arrow::Field.new("visible", :boolean)
+ #
+ # @example Create a field with name as `Symbol`
+ # Arrow::Field.new(:visible, :boolean)
+ #
+ # @overload initialize(description)
+ #
+ # @param description [Hash] The description of the field.
+ #
+ # Field description is a raw `Hash`. Field description must
+ # have `:name` and `:data_type` values. `:name` is the name of
+ # the field. `:data_type` is the data type of the field. You
+ # can use {Arrow::DataType} or data type description as
+ # `:data_type` value.
+ #
+ # See {Arrow::DataType.resolve} how to specify data type
+ # description.
+ #
+ # There is a shortcut for convenience. If field description
+ # doesn't have `:data_type`, all keys except `:name` are
+ # processes as data type description. For example, the
+ # following field descriptions are the same:
+ #
+ # ```ruby
+ # {name: "visible", data_type: {type: :boolean}}
+ # {name: "visible", type: :boolean} # Shortcut version
+ # ```
+ #
+ # @option description [String, Symbol] :name The name of the field.
+ #
+ # @option description [Arrow::DataType, Hash] :data_type The
+ # data type of the field. You can specify data type description
+ # by `Hash`.
+ #
+ # See {Arrow::DataType.resolve} how to specify data type
+ # description.
+ #
+ # @example Create a field with {Arrow::DataType}s
+ # Arrow::Field.new(name: "visible",
+ # data_type: Arrow::BooleanDataType.new)
+ #
+ # @example Create a field with data type description
+ # Arrow::Field.new(name: "visible", data_type: {type: :boolean}
+ #
+ # @example Create a field with shortcut form
+ # Arrow::Field.new(name: "visible", type: :boolean)
+ def initialize(*args)
+ n_args = args.size
+ case n_args
+ when 1
+ description = args[0]
+ name = nil
+ data_type = nil
+ data_type_description = {}
+ description.each do |key, value|
+ key = key.to_sym
+ case key
+ when :name
+ name = value
+ when :data_type
+ data_type = DataType.resolve(value)
+ else
+ data_type_description[key] = value
+ end
+ end
+ data_type ||= DataType.resolve(data_type_description)
+ when 2
+ name = args[0]
+ data_type = DataType.resolve(args[1])
+ else
+ message = "wrong number of arguments (given #{n_args}, expected 1..2)"
+ raise ArgumentError, message
+ end
+
+ initialize_raw(name, data_type)
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/file-output-stream.rb b/src/arrow/ruby/red-arrow/lib/arrow/file-output-stream.rb
new file mode 100644
index 000000000..f39ad14ca
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/file-output-stream.rb
@@ -0,0 +1,34 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class FileOutputStream
+ alias_method :initialize_raw, :initialize
+ private :initialize_raw
+ def initialize(path, options={})
+ append = nil
+ case options
+ when true, false
+ append = options
+ when Hash
+ append = options[:append]
+ end
+ append = false if append.nil?
+ initialize_raw(path, append)
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/file-system.rb b/src/arrow/ruby/red-arrow/lib/arrow/file-system.rb
new file mode 100644
index 000000000..7d105b42a
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/file-system.rb
@@ -0,0 +1,34 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class FileSystem
+ alias_method :open_output_stream_raw, :open_output_stream
+ def open_output_stream(path)
+ stream = open_output_stream_raw(path)
+ if block_given?
+ begin
+ yield(stream)
+ ensure
+ stream.close
+ end
+ else
+ stream
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/fixed-size-binary-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/fixed-size-binary-array-builder.rb
new file mode 100644
index 000000000..516d8143d
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/fixed-size-binary-array-builder.rb
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class FixedSizeBinaryArrayBuilder
+ class << self
+ # @since 3.0.0
+ def build(data_type, values)
+ builder = new(data_type)
+ builder.build(values)
+ end
+ end
+
+ alias_method :append_values_raw, :append_values
+ # @since 3.0.0
+ def append_values(values, is_valids=nil)
+ if values.is_a?(::Array)
+ append_values_raw(values, is_valids)
+ else
+ append_values_packed(values, is_valids)
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/fixed-size-binary-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/fixed-size-binary-array.rb
new file mode 100644
index 000000000..37c121d8e
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/fixed-size-binary-array.rb
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class FixedSizeBinaryArray
+ alias_method :get_value_raw, :get_value
+ # @since 3.0.0
+ def get_value(i)
+ get_value_raw(i).to_s
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/generic-filterable.rb b/src/arrow/ruby/red-arrow/lib/arrow/generic-filterable.rb
new file mode 100644
index 000000000..50a79142a
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/generic-filterable.rb
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ module GenericFilterable
+ class << self
+ def included(base)
+ base.__send__(:alias_method, :filter_raw, :filter)
+ base.__send__(:alias_method, :filter, :filter_generic)
+ end
+ end
+
+ def filter_generic(filter, options=nil)
+ case filter
+ when ::Array
+ filter_raw(BooleanArray.new(filter), options)
+ when ChunkedArray
+ if respond_to?(:filter_chunked_array)
+ filter_chunked_array(filter, options)
+ else
+ # TODO: Implement this in C++
+ filter_raw(filter.pack, options)
+ end
+ else
+ filter_raw(filter, options)
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/generic-takeable.rb b/src/arrow/ruby/red-arrow/lib/arrow/generic-takeable.rb
new file mode 100644
index 000000000..f32b43f22
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/generic-takeable.rb
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ module GenericTakeable
+ class << self
+ def included(base)
+ base.__send__(:alias_method, :take_raw, :take)
+ base.__send__(:alias_method, :take, :take_generic)
+ end
+ end
+
+ def take_generic(indices)
+ case indices
+ when ::Array
+ take_raw(IntArrayBuilder.build(indices))
+ when ChunkedArray
+ take_chunked_array(indices)
+ else
+ take_raw(indices)
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/group.rb b/src/arrow/ruby/red-arrow/lib/arrow/group.rb
new file mode 100644
index 000000000..7827ac0bd
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/group.rb
@@ -0,0 +1,164 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Group
+ def initialize(table, keys)
+ @table = table
+ @keys = keys
+ end
+
+ def count(*target_names)
+ aggregate(*build_aggregations("hash_count", target_names))
+ end
+
+ def sum(*target_names)
+ aggregate(*build_aggregations("hash_sum", target_names))
+ end
+
+ def product(*target_names)
+ aggregate(*build_aggregations("hash_product", target_names))
+ end
+
+ def mean(*target_names)
+ aggregate(*build_aggregations("hash_mean", target_names))
+ end
+
+ def min(*target_names)
+ aggregate(*build_aggregations("hash_min", target_names))
+ end
+
+ def max(*target_names)
+ aggregate(*build_aggregations("hash_max", target_names))
+ end
+
+ def stddev(*target_names)
+ aggregate(*build_aggregations("hash_stddev", target_names))
+ end
+
+ def variance(*target_names)
+ aggregate(*build_aggregations("hash_variance", target_names))
+ end
+
+ def aggregate(aggregation, *more_aggregations)
+ aggregations = [aggregation] + more_aggregations
+ normalized_aggregations = normalize_aggregations(aggregations)
+ plan = ExecutePlan.new
+ source_node = plan.build_source_node(@table)
+ aggregate_node =
+ plan.build_aggregate_node(source_node,
+ {
+ aggregations: normalized_aggregations,
+ keys: @keys
+ })
+ sink_node_options = SinkNodeOptions.new
+ plan.build_sink_node(aggregate_node, sink_node_options)
+ plan.validate
+ plan.start
+ plan.wait
+ reader = sink_node_options.get_reader(aggregate_node.output_schema)
+ reader.read_all
+ end
+
+ private
+ def build_aggregations(function_name, target_names)
+ if target_names.empty?
+ [function_name]
+ else
+ target_names.collect do |name|
+ "#{function_name}(#{name})"
+ end
+ end
+ end
+
+ def normalize_aggregations(aggregations)
+ normalized_aggregations = []
+ aggregations.each do |aggregation|
+ case aggregation
+ when :all
+ all_functions = [
+ "hash_count",
+ "hash_sum",
+ "hash_product",
+ "hash_mean",
+ "hash_stddev",
+ "hash_variance",
+ # "hash_tdigest",
+ "hash_min",
+ "hash_max",
+ "hash_any",
+ "hash_all",
+ ]
+ normalized_aggregations.concat(normalize_aggregations(all_functions))
+ when /\A([a-zA-Z0-9_].+?)\((.+?)\)\z/
+ function = $1
+ input = $2.strip
+ normalized_aggregations << {function: function, input: input}
+ when "count", "hash_count"
+ function = aggregation
+ target_columns.each do |column|
+ normalized_aggregations << {function: function, input: column.name}
+ end
+ when "any", "hash_any", "all", "hash_all"
+ function = aggregation
+ boolean_target_columns.each do |column|
+ normalized_aggregations << {function: function, input: column.name}
+ end
+ when String
+ function = aggregation
+ numeric_target_columns.each do |column|
+ normalized_aggregations << {function: function, input: column.name}
+ end
+ else
+ normalized_aggregations << aggregation
+ end
+ end
+ normalized_aggregations
+ end
+
+ def target_columns
+ @target_columns ||= find_target_columns
+ end
+
+ def find_target_columns
+ key_names = @keys.collect(&:to_s)
+ @table.columns.find_all do |column|
+ not key_names.include?(column.name)
+ end
+ end
+
+ def boolean_target_columns
+ @boolean_target_columns ||= find_boolean_target_columns
+ end
+
+ def find_boolean_target_columns
+ target_columns.find_all do |column|
+ column.data_type.is_a?(BooleanDataType)
+ end
+ end
+
+ def numeric_target_columns
+ @numeric_target_columns ||= find_numeric_target_columns
+ end
+
+ def find_numeric_target_columns
+ target_columns.find_all do |column|
+ column.data_type.is_a?(NumericDataType)
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/list-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/list-array-builder.rb
new file mode 100644
index 000000000..d889c8a0c
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/list-array-builder.rb
@@ -0,0 +1,96 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class ListArrayBuilder
+ class << self
+ def build(data_type, values)
+ builder = new(data_type)
+ builder.build(values)
+ end
+ end
+
+ alias_method :append_value_raw, :append_value
+
+ # @overload append_value
+ #
+ # Starts appending a list record. You also need to append list
+ # value by {#value_builder}.
+ #
+ # @overload append_value(list)
+ #
+ # Appends a list record including list value.
+ #
+ # @param value [nil, ::Array] The list value of the record.
+ #
+ # If this is `nil`, the list record is null.
+ #
+ # If this is `Array`, it's the list value of the record.
+ #
+ # @since 0.12.0
+ def append_value(*args)
+ n_args = args.size
+
+ case n_args
+ when 0
+ append_value_raw
+ when 1
+ value = args[0]
+ case value
+ when nil
+ append_null
+ when ::Array
+ append_value_raw
+ @value_builder ||= value_builder
+ @value_builder.append(*value)
+ else
+ message = "list value must be nil or Array: #{value.inspect}"
+ raise ArgumentError, message
+ end
+ else
+ message = "wrong number of arguments (given #{n_args}, expected 0..1)"
+ raise ArgumentError, message
+ end
+ end
+
+ def append_values(lists, is_valids=nil)
+ if is_valids
+ is_valids.each_with_index do |is_valid, i|
+ if is_valid
+ append_value(lists[i])
+ else
+ append_null
+ end
+ end
+ else
+ lists.each do |list|
+ append_value(list)
+ end
+ end
+ end
+
+ # @since 0.12.0
+ def append(*values)
+ if values.empty?
+ # For backward compatibility
+ append_value
+ else
+ super
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/list-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/list-data-type.rb
new file mode 100644
index 000000000..cfcdd2a9e
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/list-data-type.rb
@@ -0,0 +1,118 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class ListDataType
+ alias_method :initialize_raw, :initialize
+ private :initialize_raw
+
+ # Creates a new {Arrow::ListDataType}.
+ #
+ # @overload initialize(field)
+ #
+ # @param field [Arrow::Field, Hash] The field of the list data
+ # type. You can also specify field description by `Hash`.
+ #
+ # See {Arrow::Field.new} how to specify field description.
+ #
+ # @example Create a list data type with {Arrow::Field}
+ # visible_field = Arrow::Field.new("visible", :boolean)
+ # Arrow::ListDataType.new(visible_field)
+ #
+ # @example Create a list data type with field description
+ # Arrow::ListDataType.new(name: "visible", type: :boolean)
+ #
+ # @overload initialize(description)
+ #
+ # @param description [Hash] The description of the list data
+ # type. It must have `:field` value.
+ #
+ # @option description [Arrow::Field, Hash] :field The field of
+ # the list data type. You can also specify field description
+ # by `Hash`.
+ #
+ # See {Arrow::Field.new} how to specify field description.
+ #
+ # @example Create a list data type with {Arrow::Field}
+ # visible_field = Arrow::Field.new("visible", :boolean)
+ # Arrow::ListDataType.new(field: visible_field)
+ #
+ # @example Create a list data type with field description
+ # Arrow::ListDataType.new(field: {name: "visible", type: :boolean})
+ #
+ # @overload initialize(data_type)
+ #
+ # @param data_type [Arrow::DataType, String, Symbol,
+ # ::Array<String>, ::Array<Symbol>, Hash] The element data
+ # type of the list data type. A field is created with the
+ # default name `"item"` from the data type automatically.
+ #
+ # See {Arrow::DataType.resolve} how to specify data type.
+ #
+ # @example Create a list data type with {Arrow::DataType}
+ # Arrow::ListDataType.new(Arrow::BooleanDataType.new)
+ #
+ # @example Create a list data type with data type name as String
+ # Arrow::ListDataType.new("boolean")
+ #
+ # @example Create a list data type with data type name as Symbol
+ # Arrow::ListDataType.new(:boolean)
+ #
+ # @example Create a list data type with data type as Array
+ # Arrow::ListDataType.new([:time32, :milli])
+ def initialize(arg)
+ data_type = resolve_data_type(arg)
+ if data_type
+ field = Field.new(default_field_name, data_type)
+ else
+ field = resolve_field(arg)
+ end
+ initialize_raw(field)
+ end
+
+ private
+ def resolve_data_type(arg)
+ case arg
+ when DataType, String, Symbol, ::Array
+ DataType.resolve(arg)
+ when Hash
+ return nil if arg[:name]
+ return nil unless arg[:type]
+ DataType.resolve(arg)
+ else
+ nil
+ end
+ end
+
+ def default_field_name
+ "item"
+ end
+
+ def resolve_field(arg)
+ if arg.is_a?(Hash) and arg.key?(:field)
+ description = arg
+ arg = description[:field]
+ end
+ if arg.is_a?(Hash)
+ field_description = arg
+ Field.new(field_description)
+ else
+ arg
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/loader.rb b/src/arrow/ruby/red-arrow/lib/arrow/loader.rb
new file mode 100644
index 000000000..804a94894
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/loader.rb
@@ -0,0 +1,216 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require "arrow/block-closable"
+
+module Arrow
+ class Loader < GObjectIntrospection::Loader
+ class << self
+ def load
+ super("Arrow", Arrow)
+ end
+ end
+
+ private
+ def post_load(repository, namespace)
+ require_libraries
+ require_extension_library
+ gc_guard
+ end
+
+ def require_libraries
+ require "arrow/column-containable"
+ require "arrow/field-containable"
+ require "arrow/generic-filterable"
+ require "arrow/generic-takeable"
+ require "arrow/record-containable"
+ require "arrow/symbol-values-appendable"
+
+ require "arrow/aggregate-node-options"
+ require "arrow/aggregation"
+ require "arrow/array"
+ require "arrow/array-builder"
+ require "arrow/bigdecimal-extension"
+ require "arrow/binary-dictionary-array-builder"
+ require "arrow/buffer"
+ require "arrow/chunked-array"
+ require "arrow/column"
+ require "arrow/compression-type"
+ require "arrow/csv-loader"
+ require "arrow/csv-read-options"
+ require "arrow/data-type"
+ require "arrow/date32-array"
+ require "arrow/date32-array-builder"
+ require "arrow/date64-array"
+ require "arrow/date64-array-builder"
+ require "arrow/datum"
+ require "arrow/decimal128"
+ require "arrow/decimal128-array"
+ require "arrow/decimal128-array-builder"
+ require "arrow/decimal128-data-type"
+ require "arrow/decimal256"
+ require "arrow/decimal256-array"
+ require "arrow/decimal256-array-builder"
+ require "arrow/decimal256-data-type"
+ require "arrow/dense-union-data-type"
+ require "arrow/dictionary-array"
+ require "arrow/dictionary-data-type"
+ require "arrow/equal-options"
+ require "arrow/expression"
+ require "arrow/field"
+ require "arrow/file-output-stream"
+ require "arrow/file-system"
+ require "arrow/fixed-size-binary-array"
+ require "arrow/fixed-size-binary-array-builder"
+ require "arrow/group"
+ require "arrow/list-array-builder"
+ require "arrow/list-data-type"
+ require "arrow/map-array"
+ require "arrow/map-array-builder"
+ require "arrow/map-data-type"
+ require "arrow/null-array"
+ require "arrow/null-array-builder"
+ require "arrow/path-extension"
+ require "arrow/record"
+ require "arrow/record-batch"
+ require "arrow/record-batch-builder"
+ require "arrow/record-batch-file-reader"
+ require "arrow/record-batch-iterator"
+ require "arrow/record-batch-reader"
+ require "arrow/record-batch-stream-reader"
+ require "arrow/rolling-window"
+ require "arrow/scalar"
+ require "arrow/schema"
+ require "arrow/slicer"
+ require "arrow/sort-key"
+ require "arrow/sort-options"
+ require "arrow/source-node-options"
+ require "arrow/sparse-union-data-type"
+ require "arrow/string-dictionary-array-builder"
+ require "arrow/struct-array"
+ require "arrow/struct-array-builder"
+ require "arrow/struct-data-type"
+ require "arrow/table"
+ require "arrow/table-concatenate-options"
+ require "arrow/table-formatter"
+ require "arrow/table-list-formatter"
+ require "arrow/table-table-formatter"
+ require "arrow/table-loader"
+ require "arrow/table-saver"
+ require "arrow/tensor"
+ require "arrow/time"
+ require "arrow/time32-array"
+ require "arrow/time32-array-builder"
+ require "arrow/time32-data-type"
+ require "arrow/time64-array"
+ require "arrow/time64-array-builder"
+ require "arrow/time64-data-type"
+ require "arrow/timestamp-array"
+ require "arrow/timestamp-array-builder"
+ require "arrow/timestamp-data-type"
+ require "arrow/writable"
+ end
+
+ def require_extension_library
+ require "arrow.so"
+ end
+
+ def gc_guard
+ require "arrow/constructor-arguments-gc-guardable"
+
+ [
+ @base_module::BinaryScalar,
+ @base_module::Buffer,
+ @base_module::DenseUnionScalar,
+ @base_module::FixedSizeBinaryScalar,
+ @base_module::LargeBinaryScalar,
+ @base_module::LargeListScalar,
+ @base_module::LargeStringScalar,
+ @base_module::ListScalar,
+ @base_module::MapScalar,
+ @base_module::SparseUnionScalar,
+ @base_module::StringScalar,
+ @base_module::StructScalar,
+ ].each do |klass|
+ klass.prepend(ConstructorArgumentsGCGuardable)
+ end
+ end
+
+ def load_object_info(info)
+ super
+
+ klass = @base_module.const_get(rubyish_class_name(info))
+ if klass.method_defined?(:close)
+ klass.extend(BlockClosable)
+ end
+ end
+
+ def load_method_info(info, klass, method_name)
+ case klass.name
+ when /Array\z/
+ case method_name
+ when "values"
+ method_name = "values_raw"
+ end
+ end
+
+ case klass.name
+ when /Builder\z/
+ case method_name
+ when "append"
+ return
+ else
+ super
+ end
+ when "Arrow::StringArray"
+ case method_name
+ when "get_value"
+ method_name = "get_raw_value"
+ when "get_string"
+ method_name = "get_value"
+ end
+ super(info, klass, method_name)
+ when "Arrow::Date32Array",
+ "Arrow::Date64Array",
+ "Arrow::Decimal128Array",
+ "Arrow::Decimal256Array",
+ "Arrow::Time32Array",
+ "Arrow::Time64Array",
+ "Arrow::TimestampArray"
+ case method_name
+ when "get_value"
+ method_name = "get_raw_value"
+ end
+ super(info, klass, method_name)
+ when "Arrow::Decimal128", "Arrow::Decimal256"
+ case method_name
+ when "copy"
+ method_name = "dup"
+ end
+ super(info, klass, method_name)
+ when "Arrow::BooleanScalar"
+ case method_name
+ when "value?"
+ method_name = "value"
+ end
+ super(info, klass, method_name)
+ else
+ super
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/map-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/map-array-builder.rb
new file mode 100644
index 000000000..9e269d1c5
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/map-array-builder.rb
@@ -0,0 +1,109 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class MapArrayBuilder
+ class << self
+ def build(data_type, values)
+ builder = new(data_type)
+ builder.build(values)
+ end
+ end
+
+ alias_method :append_value_raw, :append_value
+
+ # @overload append_value
+ #
+ # Starts appending a map record. You need to append
+ # values of map by {#key_builder} and {#item_builder}.
+ #
+ # @overload append_value(value)
+ #
+ # Appends a map record including key and item values.
+ #
+ # @param value [nil, #each] The map record.
+ #
+ # If this is `nil`, the map record is null.
+ #
+ # If this is an `Object` that has `#each`, each value is a pair of key and item.
+ #
+ # @since 6.0.0
+ def append_value(*args)
+ n_args = args.size
+
+ case n_args
+ when 0
+ append_value_raw
+ when 1
+ value = args[0]
+ case value
+ when nil
+ append_null
+ else
+ unless value.respond_to?(:each)
+ message = "map value must be nil, Hash or Object that has #each: #{value.inspect}"
+ raise ArgumentError, message
+ end
+ append_value_raw
+ @key_builder ||= key_builder
+ @item_builder ||= item_builder
+ case value
+ when Hash
+ keys = value.keys
+ values = value.values
+ else
+ keys = []
+ values = []
+ value.each do |key, item|
+ keys << key
+ values << item
+ end
+ end
+ @key_builder.append(*keys)
+ @item_builder.append(*values)
+ end
+ else
+ message = "wrong number of arguments (given #{n_args}, expected 0..1)"
+ raise ArgumentError, message
+ end
+ end
+
+ alias_method :append_values_raw, :append_values
+
+ def append_values(values, is_valids=nil)
+ value = values[0]
+ case value
+ when Integer
+ append_values_raw(values, is_valids)
+ else
+ if is_valids
+ is_valids.each_with_index do |is_valid, i|
+ if is_valid
+ append_value(values[i])
+ else
+ append_null
+ end
+ end
+ else
+ values.each do |value|
+ append_value(value)
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/map-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/map-array.rb
new file mode 100644
index 000000000..96b8c01b1
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/map-array.rb
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class MapArray
+ def get_value(i)
+ super.each_with_object({}) do |item, result|
+ result[item["key"]] = item["value"]
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/map-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/map-data-type.rb
new file mode 100644
index 000000000..67e134329
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/map-data-type.rb
@@ -0,0 +1,89 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class MapDataType
+ alias_method :initialize_raw, :initialize
+ private :initialize_raw
+
+ # Creates a new {Arrow::MapDataType}.
+ #
+ # @overload initialize(key, item)
+ #
+ # @param key [Arrow::DataType, Hash, String, Symbol]
+ # The key data type of the map data type.
+ #
+ # You can specify data type as a description by `Hash`.
+ #
+ # See {Arrow::DataType.resolve} how to specify data type
+ # description.
+ #
+ # @param item [Arrow::DataType, Hash, String, Symbol]
+ # The item data type of the map data type.
+ #
+ # You can specify data type as a description by `Hash`.
+ #
+ # See {Arrow::DataType.resolve} how to specify data type
+ # description.
+ #
+ # @example Create a map data type for `{0: "Hello", 1: "World"}`
+ # key = :int8
+ # item = :string
+ # Arrow::MapDataType.new(key, item)
+ #
+ # @overload initialize(description)
+ #
+ # @param description [Hash] The description of the map data
+ # type. It must have `:key`, `:item` values.
+ #
+ # @option description [Arrow::DataType, Hash, String, Symbol]
+ # :key The key data type of the map data type.
+ #
+ # You can specify data type as a description by `Hash`.
+ #
+ # See {Arrow::DataType.resolve} how to specify data type
+ # description.
+ #
+ # @option description [Arrow::DataType, Hash, String, Symbol]
+ # :item The item data type of the map data type.
+ #
+ # You can specify data type as a description by `Hash`.
+ #
+ # See {Arrow::DataType.resolve} how to specify data type
+ # description.
+ #
+ # @example Create a map data type for `{0: "Hello", 1: "World"}`
+ # Arrow::MapDataType.new(key: :int8, item: :string)
+ def initialize(*args)
+ n_args = args.size
+ case n_args
+ when 1
+ description = args[0]
+ key = description[:key]
+ item = description[:item]
+ when 2
+ key, item = args
+ else
+ message = "wrong number of arguments (given, #{n_args}, expected 1..2)"
+ raise ArgumentError, message
+ end
+ key = DataType.resolve(key)
+ item = DataType.resolve(item)
+ initialize_raw(key, item)
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/null-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/null-array-builder.rb
new file mode 100644
index 000000000..26e58ccdc
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/null-array-builder.rb
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class NullArrayBuilder
+ class << self
+ def buildable?(args)
+ super and not (args.size == 1 and args[0].is_a?(Integer))
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/null-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/null-array.rb
new file mode 100644
index 000000000..7426bb345
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/null-array.rb
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class NullArray
+ def get_value(i)
+ nil
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/path-extension.rb b/src/arrow/ruby/red-arrow/lib/arrow/path-extension.rb
new file mode 100644
index 000000000..1273f298c
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/path-extension.rb
@@ -0,0 +1,45 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class PathExtension
+ def initialize(path)
+ @path = path
+ end
+
+ def extract
+ basename = ::File.basename(@path)
+ components = basename.split(".")
+ return {} if components.size < 2
+
+ extension = components.last.downcase
+ if components.size > 2
+ compression = CompressionType.resolve_extension(extension)
+ if compression
+ {
+ format: components[-2].downcase,
+ compression: compression,
+ }
+ else
+ {format: extension}
+ end
+ else
+ {format: extension}
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/raw-table-converter.rb b/src/arrow/ruby/red-arrow/lib/arrow/raw-table-converter.rb
new file mode 100644
index 000000000..41d331fb3
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/raw-table-converter.rb
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class RawTableConverter
+ attr_reader :n_rows
+ attr_reader :schema
+ attr_reader :values
+ def initialize(raw_table)
+ @raw_table = raw_table
+ convert
+ end
+
+ private
+ def convert
+ if @raw_table.is_a?(::Array) and @raw_table[0].is_a?(Column)
+ fields = @raw_table.collect(&:field)
+ @schema = Schema.new(fields)
+ @values = @raw_table.collect(&:data)
+ else
+ fields = []
+ @values = []
+ @raw_table.each do |name, array|
+ array = ArrayBuilder.build(array) if array.is_a?(::Array)
+ fields << Field.new(name.to_s, array.value_data_type)
+ @values << array
+ end
+ @schema = Schema.new(fields)
+ end
+ @n_rows = @values[0].length
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/record-batch-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-builder.rb
new file mode 100644
index 000000000..dc20312f2
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-builder.rb
@@ -0,0 +1,114 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class RecordBatchBuilder
+ class << self
+ # @since 0.12.0
+ def build(schema, data)
+ builder = new(schema)
+ builder.append(data)
+ builder.flush
+ end
+ end
+
+ alias_method :initialize_raw, :initialize
+ private :initialize_raw
+ def initialize(schema)
+ unless schema.is_a?(Schema)
+ schema = Schema.new(schema)
+ end
+ initialize_raw(schema)
+ @name_to_index = {}
+ schema.fields.each_with_index do |field, i|
+ @name_to_index[field.name] = i
+ end
+ end
+
+ # @since 0.12.0
+ def [](name_or_index)
+ case name_or_index
+ when String, Symbol
+ name = name_or_index
+ self[resolve_name(name)]
+ else
+ index = name_or_index
+ column_builders[index]
+ end
+ end
+
+ # @since 0.12.0
+ def append(*values)
+ values.each do |value|
+ case value
+ when Hash
+ append_columns(value)
+ else
+ append_records(value)
+ end
+ end
+ end
+
+ # @since 0.12.0
+ def append_records(records)
+ n = n_columns
+ columns = n.times.collect do
+ []
+ end
+ records.each_with_index do |record, nth_record|
+ case record
+ when nil
+ when Hash
+ record.each do |name, value|
+ nth_column = resolve_name(name)
+ next if nth_column.nil?
+ columns[nth_column] << value
+ end
+ else
+ record.each_with_index do |value, nth_column|
+ columns[nth_column] << value
+ end
+ end
+ columns.each do |column|
+ column << nil if column.size != (nth_record + 1)
+ end
+ end
+ columns.each_with_index do |column, i|
+ self[i].append(*column)
+ end
+ end
+
+ # @since 0.12.0
+ def append_columns(columns)
+ columns.each do |name, values|
+ self[name].append(*values)
+ end
+ end
+
+ # @since 0.13.0
+ def column_builders
+ @column_builders ||= n_columns.times.collect do |i|
+ get_column_builder(i)
+ end
+ end
+
+ private
+ def resolve_name(name)
+ @name_to_index[name.to_s]
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/record-batch-file-reader.rb b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-file-reader.rb
new file mode 100644
index 000000000..86a757e32
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-file-reader.rb
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class RecordBatchFileReader
+ include Enumerable
+
+ def each
+ n_record_batches.times do |i|
+ yield(get_record_batch(i))
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/record-batch-iterator.rb b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-iterator.rb
new file mode 100644
index 000000000..4b828c6dc
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-iterator.rb
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class RecordBatchIterator
+ alias_method :to_a, :to_list
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/record-batch-reader.rb b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-reader.rb
new file mode 100644
index 000000000..e030e4f3b
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-reader.rb
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class RecordBatchReader
+ class << self
+ # @api private
+ def try_convert(value)
+ case value
+ when ::Array
+ return nil if value.empty?
+ if value.all? {|v| v.is_a?(RecordBatch)}
+ new(value)
+ else
+ nil
+ end
+ when RecordBatch
+ new([value])
+ when Table
+ TableBatchReader.new(value)
+ else
+ nil
+ end
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/record-batch-stream-reader.rb b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-stream-reader.rb
new file mode 100644
index 000000000..fa15c8000
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-stream-reader.rb
@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class RecordBatchStreamReader
+ include Enumerable
+
+ def each
+ loop do
+ record_batch = next_record_batch
+ break if record_batch.nil?
+ yield(record_batch)
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/record-batch.rb b/src/arrow/ruby/red-arrow/lib/arrow/record-batch.rb
new file mode 100644
index 000000000..c5aaf876b
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/record-batch.rb
@@ -0,0 +1,75 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require "arrow/raw-table-converter"
+
+module Arrow
+ class RecordBatch
+ include ColumnContainable
+ include RecordContainable
+ include Enumerable
+
+ class << self
+ def new(*args)
+ n_args = args.size
+ case n_args
+ when 1
+ raw_table_converter = RawTableConverter.new(args[0])
+ n_rows = raw_table_converter.n_rows
+ schema = raw_table_converter.schema
+ values = raw_table_converter.values
+ super(schema, n_rows, values)
+ when 2
+ schema, data = args
+ RecordBatchBuilder.build(schema, data)
+ when 3
+ super
+ else
+ message = "wrong number of arguments (given #{n_args}, expected 1..3)"
+ raise ArgumentError, message
+ end
+ end
+ end
+
+ alias_method :each, :each_record
+
+ alias_method :size, :n_rows
+ alias_method :length, :n_rows
+
+ # Converts the record batch to {Arrow::Table}.
+ #
+ # @return [Arrow::Table]
+ #
+ # @since 0.12.0
+ def to_table
+ Table.new(schema, [self])
+ end
+
+ def respond_to_missing?(name, include_private)
+ return true if find_column(name)
+ super
+ end
+
+ def method_missing(name, *args, &block)
+ if args.empty?
+ column = find_column(name)
+ return column if column
+ end
+ super
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/record-containable.rb b/src/arrow/ruby/red-arrow/lib/arrow/record-containable.rb
new file mode 100644
index 000000000..20c9ac2f5
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/record-containable.rb
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ module RecordContainable
+ def each_record(reuse_record: false)
+ unless block_given?
+ return to_enum(__method__, reuse_record: reuse_record)
+ end
+
+ if reuse_record
+ record = Record.new(self, nil)
+ n_rows.times do |i|
+ record.index = i
+ yield(record)
+ end
+ else
+ n_rows.times do |i|
+ yield(Record.new(self, i))
+ end
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/record.rb b/src/arrow/ruby/red-arrow/lib/arrow/record.rb
new file mode 100644
index 000000000..6f83dded0
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/record.rb
@@ -0,0 +1,60 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Record
+ attr_reader :container
+ attr_accessor :index
+ def initialize(container, index)
+ @container = container
+ @index = index
+ end
+
+ def [](column_name_or_column_index)
+ column = @container.find_column(column_name_or_column_index)
+ return nil if column.nil?
+ column[@index]
+ end
+
+ def to_a
+ @container.columns.collect do |column|
+ column[@index]
+ end
+ end
+
+ def to_h
+ attributes = {}
+ @container.columns.each do |column|
+ attributes[column.name] = column[@index]
+ end
+ attributes
+ end
+
+ def respond_to_missing?(name, include_private)
+ return true if @container.find_column(name)
+ super
+ end
+
+ def method_missing(name, *args, &block)
+ if args.empty?
+ column = @container.find_column(name)
+ return column[@index] if column
+ end
+ super
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/rolling-window.rb b/src/arrow/ruby/red-arrow/lib/arrow/rolling-window.rb
new file mode 100644
index 000000000..1db03bb23
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/rolling-window.rb
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ # Experimental
+ #
+ # TODO: Almost codes should be implemented in Apache Arrow C++.
+ class RollingWindow
+ def initialize(table, size)
+ @table = table
+ @size = size
+ end
+
+ def lag(key, diff: 1)
+ column = @table[key]
+ if @size
+ windows = column.each_slice(@size)
+ else
+ windows = column
+ end
+ lag_values = [nil] * diff
+ windows.each_cons(diff + 1) do |values|
+ target = values[0]
+ current = values[1]
+ if target.nil? or current.nil?
+ lag_values << nil
+ else
+ lag_values << current - target
+ end
+ end
+ ArrayBuilder.build(lag_values)
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/scalar.rb b/src/arrow/ruby/red-arrow/lib/arrow/scalar.rb
new file mode 100644
index 000000000..b2bf1ac59
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/scalar.rb
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Scalar
+ # @param other [Arrow::Scalar] The scalar to be compared.
+ # @param options [Arrow::EqualOptions, Hash] (nil)
+ # The options to custom how to compare.
+ #
+ # @return [Boolean]
+ # `true` if both of them have the same data, `false` otherwise.
+ #
+ # @since 5.0.0
+ def equal_scalar?(other, options=nil)
+ equal_options(other, options)
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/schema.rb b/src/arrow/ruby/red-arrow/lib/arrow/schema.rb
new file mode 100644
index 000000000..03354c862
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/schema.rb
@@ -0,0 +1,100 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Schema
+ include FieldContainable
+
+ alias_method :initialize_raw, :initialize
+ private :initialize_raw
+
+ # Creates a new {Arrow::Schema}.
+ #
+ # @overload initialize(fields)
+ #
+ # @param fields [::Array<Arrow::Field, Hash>] The fields of the
+ # schema. You can mix {Arrow::Field} and field description in
+ # the fields.
+ #
+ # See {Arrow::Field.new} how to specify field description.
+ #
+ # @example Create a schema with {Arrow::Field}s
+ # visible_field = Arrow::Field.new("visible", :boolean)
+ # Arrow::Schema.new([visible_field])
+ #
+ # @example Create a schema with field descriptions
+ # visible_field_description = {
+ # name: "visible",
+ # data_type: :boolean,
+ # }
+ # Arrow::Schema.new([visible_field_description])
+ #
+ # @example Create a schema with {Arrow::Field}s and field descriptions
+ # fields = [
+ # Arrow::Field.new("visible", :boolean),
+ # {
+ # name: "count",
+ # type: :int32,
+ # },
+ # ]
+ # Arrow::Schema.new(fields)
+ #
+ # @overload initialize(fields)
+ #
+ # @param fields [Hash{String, Symbol => Arrow::DataType, Hash}]
+ # The pairs of field name and field data type of the schema.
+ # You can mix {Arrow::DataType} and data description for field
+ # data type.
+ #
+ # See {Arrow::DataType.new} how to specify data type description.
+ #
+ # @example Create a schema with fields
+ # fields = {
+ # "visible" => Arrow::BooleanDataType.new,
+ # :count => :int32,
+ # :tags => {
+ # type: :list,
+ # field: {
+ # name: "tag",
+ # type: :string,
+ # },
+ # },
+ # }
+ # Arrow::Schema.new(fields)
+ def initialize(fields)
+ case fields
+ when ::Array
+ fields = fields.collect do |field|
+ field = Field.new(field) unless field.is_a?(Field)
+ field
+ end
+ when Hash
+ fields = fields.collect do |name, data_type|
+ Field.new(name, data_type)
+ end
+ end
+ initialize_raw(fields)
+ end
+
+ alias_method :[], :find_field
+
+ alias_method :to_s_raw, :to_s
+ def to_s(show_metadata: false)
+ to_string_metadata(show_metadata)
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/slicer.rb b/src/arrow/ruby/red-arrow/lib/arrow/slicer.rb
new file mode 100644
index 000000000..6cca7f75e
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/slicer.rb
@@ -0,0 +1,355 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Slicer
+ def initialize(table)
+ @table = table
+ end
+
+ def [](column_name)
+ column = @table[column_name]
+ return nil if column.nil?
+ ColumnCondition.new(column)
+ end
+
+ def respond_to_missing?(name, include_private)
+ return true if self[name]
+ super
+ end
+
+ def method_missing(name, *args, &block)
+ if args.empty?
+ column_condition = self[name]
+ return column_condition if column_condition
+ end
+ super
+ end
+
+ module Helper
+ class << self
+ def ensure_boolean(column)
+ case column.data_type
+ when Arrow::BooleanDataType
+ column.data
+ else
+ options = CastOptions.new
+ options.to_data_type = Arrow::BooleanDataType.new
+ Function.find("cast").execute([column.data], options).value
+ end
+ end
+ end
+ end
+
+ class Condition
+ def evaluate
+ message = "Slicer::Condition must define \#evaluate: #{inspect}"
+ raise NotImplementedError.new(message)
+ end
+
+ def &(condition)
+ AndCondition.new(self, condition)
+ end
+
+ def |(condition)
+ OrCondition.new(self, condition)
+ end
+
+ def ^(condition)
+ XorCondition.new(self, condition)
+ end
+ end
+
+ class LogicalCondition < Condition
+ def initialize(condition1, condition2)
+ @condition1 = condition1
+ @condition2 = condition2
+ end
+
+ def evaluate
+ function.execute([@condition1.evaluate, @condition2.evaluate]).value
+ end
+ end
+
+ class AndCondition < LogicalCondition
+ private
+ def function
+ Function.find("and")
+ end
+ end
+
+ class OrCondition < LogicalCondition
+ private
+ def function
+ Function.find("or")
+ end
+ end
+
+ class XorCondition < LogicalCondition
+ private
+ def function
+ Function.find("xor")
+ end
+ end
+
+ class ColumnCondition < Condition
+ def initialize(column)
+ @column = column
+ end
+
+ def evaluate
+ Helper.ensure_boolean(@column)
+ end
+
+ def !@
+ NotColumnCondition.new(@column)
+ end
+
+ def null?
+ self == nil
+ end
+
+ def valid?
+ self != nil
+ end
+
+ def ==(value)
+ EqualCondition.new(@column, value)
+ end
+
+ def !=(value)
+ NotEqualCondition.new(@column, value)
+ end
+
+ def <(value)
+ LessCondition.new(@column, value)
+ end
+
+ def <=(value)
+ LessEqualCondition.new(@column, value)
+ end
+
+ def >(value)
+ GreaterCondition.new(@column, value)
+ end
+
+ def >=(value)
+ GreaterEqualCondition.new(@column, value)
+ end
+
+ def in?(values)
+ InCondition.new(@column, values)
+ end
+
+ def select(&block)
+ SelectCondition.new(@column, block)
+ end
+
+ def reject(&block)
+ RejectCondition.new(@column, block)
+ end
+ end
+
+ class NotColumnCondition < Condition
+ def initialize(column)
+ @column = column
+ end
+
+ def evaluate
+ data = Helper.ensure_boolean(@column)
+ Function.find("invert").execute([data]).value
+ end
+
+ def !@
+ ColumnCondition.new(@column)
+ end
+ end
+
+ class EqualCondition < Condition
+ def initialize(column, value)
+ @column = column
+ @value = value
+ end
+
+ def !@
+ NotEqualCondition.new(@column, @value)
+ end
+
+ def evaluate
+ if @value.nil?
+ Function.find("is_null").execute([@column.data]).value
+ else
+ Function.find("equal").execute([@column.data, @value]).value
+ end
+ end
+ end
+
+ class NotEqualCondition < Condition
+ def initialize(column, value)
+ @column = column
+ @value = value
+ end
+
+ def !@
+ EqualCondition.new(@column, @value)
+ end
+
+ def evaluate
+ if @value.nil?
+ Function.find("is_valid").execute([@column.data]).value
+ else
+ Function.find("not_equal").execute([@column.data, @value]).value
+ end
+ end
+ end
+
+ class LessCondition < Condition
+ def initialize(column, value)
+ @column = column
+ @value = value
+ end
+
+ def !@
+ GreaterEqualCondition.new(@column, @value)
+ end
+
+ def evaluate
+ Function.find("less").execute([@column.data, @value]).value
+ end
+ end
+
+ class LessEqualCondition < Condition
+ def initialize(column, value)
+ @column = column
+ @value = value
+ end
+
+ def !@
+ GreaterCondition.new(@column, @value)
+ end
+
+ def evaluate
+ Function.find("less_equal").execute([@column.data, @value]).value
+ end
+ end
+
+ class GreaterCondition < Condition
+ def initialize(column, value)
+ @column = column
+ @value = value
+ end
+
+ def !@
+ LessEqualCondition.new(@column, @value)
+ end
+
+ def evaluate
+ Function.find("greater").execute([@column.data, @value]).value
+ end
+ end
+
+ class GreaterEqualCondition < Condition
+ def initialize(column, value)
+ @column = column
+ @value = value
+ end
+
+ def !@
+ LessCondition.new(@column, @value)
+ end
+
+ def evaluate
+ Function.find("greater_equal").execute([@column.data, @value]).value
+ end
+ end
+
+ class InCondition < Condition
+ def initialize(column, values)
+ @column = column
+ @values = values
+ end
+
+ def !@
+ NotInCondition.new(@column, @values)
+ end
+
+ def evaluate
+ values = @values
+ values = Array.new(values) unless values.is_a?(Array)
+ options = SetLookupOptions.new(values)
+ Function.find("is_in").execute([@column.data], options).value
+ end
+ end
+
+ class NotInCondition < Condition
+ def initialize(column, values)
+ @column = column
+ @values = values
+ end
+
+ def !@
+ InCondition.new(@column, @values)
+ end
+
+ def evaluate
+ values = @values
+ values = Array.new(values) unless values.is_a?(Array)
+ options = SetLookupOptions.new(values)
+ booleans = Function.find("is_in").execute([@column.data], options).value
+ Function.find("invert").execute([booleans]).value
+ end
+ end
+
+ class SelectCondition < Condition
+ def initialize(column, block)
+ @column = column
+ @block = block
+ end
+
+ def !@
+ RejectCondition.new(@column, @block)
+ end
+
+ def evaluate
+ BooleanArray.new(@column.collect(&@block))
+ end
+ end
+
+ class RejectCondition < Condition
+ def initialize(column, block)
+ @column = column
+ @block = block
+ end
+
+ def !@
+ SelectCondition.new(@column, @block)
+ end
+
+ def evaluate
+ raw_array = @column.collect do |value|
+ evaluated_value = @block.call(value)
+ if evaluated_value.nil?
+ nil
+ else
+ not evaluated_value
+ end
+ end
+ BooleanArray.new(raw_array)
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/sort-key.rb b/src/arrow/ruby/red-arrow/lib/arrow/sort-key.rb
new file mode 100644
index 000000000..987027256
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/sort-key.rb
@@ -0,0 +1,193 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class SortKey
+ class << self
+ # Ensure returning suitable {Arrow::SortKey}.
+ #
+ # @overload resolve(sort_key)
+ #
+ # Returns the given sort key itself. This is convenient to use
+ # this method as {Arrow::SortKey} converter.
+ #
+ # @param sort_key [Arrow::SortKey] The sort key.
+ #
+ # @return [Arrow::SortKey] The given sort key itself.
+ #
+ # @overload resolve(name)
+ #
+ # Creates a new suitable sort key from column name with
+ # leading order mark. See {#initialize} for details about
+ # order mark.
+ #
+ # @return [Arrow::SortKey] A new suitable sort key.
+ #
+ # @overload resolve(name, order)
+ #
+ # Creates a new suitable sort key from column name without
+ # leading order mark and order. See {#initialize} for details.
+ #
+ # @return [Arrow::SortKey] A new suitable sort key.
+ #
+ # @since 4.0.0
+ def resolve(name, order=nil)
+ return name if name.is_a?(self)
+ new(name, order)
+ end
+
+ # @api private
+ def try_convert(value)
+ case value
+ when Symbol, String
+ new(value.to_s, :ascending)
+ else
+ nil
+ end
+ end
+ end
+
+ alias_method :initialize_raw, :initialize
+ private :initialize_raw
+ # Creates a new {Arrow::SortKey}.
+ #
+ # @overload initialize(name)
+ #
+ # @param name [Symbol, String] The name of the sort column.
+ #
+ # If `name` is a String, the first character may be processed
+ # as the "leading order mark". If the first character is `"+"`
+ # or `"-"`, they are processed as a leading order mark. If the
+ # first character is processed as a leading order mark, the
+ # first character is removed from sort column name and
+ # corresponding order is used. `"+"` uses ascending order and
+ # `"-"` uses ascending order.
+ #
+ # If `name` is not a String nor `name` doesn't start with the
+ # leading order mark, sort column name is `name` as-is and
+ # ascending order is used.
+ #
+ # @example String without the leading order mark
+ # key = Arrow::SortKey.new("count")
+ # key.name # => "count"
+ # key.order # => Arrow::SortOrder::ASCENDING
+ #
+ # @example String with the "+" leading order mark
+ # key = Arrow::SortKey.new("+count")
+ # key.name # => "count"
+ # key.order # => Arrow::SortOrder::ASCENDING
+ #
+ # @example String with the "-" leading order mark
+ # key = Arrow::SortKey.new("-count")
+ # key.name # => "count"
+ # key.order # => Arrow::SortOrder::DESCENDING
+ #
+ # @example Symbol that starts with "-"
+ # key = Arrow::SortKey.new(:"-count")
+ # key.name # => "-count"
+ # key.order # => Arrow::SortOrder::ASCENDING
+ #
+ # @overload initialize(name, order)
+ #
+ # @param name [Symbol, String] The name of the sort column.
+ #
+ # No leading order mark processing. The given `name` is used
+ # as-is.
+ #
+ # @param order [Symbol, String, Arrow::SortOrder] How to order
+ # by this sort key.
+ #
+ # If this is a Symbol or String, this must be `:ascending`,
+ # `"ascending"`, `:asc`, `"asc"`, `:descending`,
+ # `"descending"`, `:desc` or `"desc"`.
+ #
+ # @example No leading order mark processing
+ # key = Arrow::SortKey.new("-count", :ascending)
+ # key.name # => "-count"
+ # key.order # => Arrow::SortOrder::ASCENDING
+ #
+ # @example Order by abbreviated name with Symbol
+ # key = Arrow::SortKey.new("count", :desc)
+ # key.name # => "count"
+ # key.order # => Arrow::SortOrder::DESCENDING
+ #
+ # @example Order by String
+ # key = Arrow::SortKey.new("count", "descending")
+ # key.name # => "count"
+ # key.order # => Arrow::SortOrder::DESCENDING
+ #
+ # @example Order by Arrow::SortOrder
+ # key = Arrow::SortKey.new("count", Arrow::SortOrder::DESCENDING)
+ # key.name # => "count"
+ # key.order # => Arrow::SortOrder::DESCENDING
+ #
+ # @since 4.0.0
+ def initialize(name, order=nil)
+ name, order = normalize_name(name, order)
+ order = normalize_order(order) || :ascending
+ initialize_raw(name, order)
+ end
+
+ # @return [String] The string representation of this sort key. You
+ # can use recreate {Arrow::SortKey} by
+ # `Arrow::SortKey.new(key.to_s)`.
+ #
+ # @example Recreate Arrow::SortKey
+ # key = Arrow::SortKey.new("-count")
+ # key.to_s # => "-count"
+ # key == Arrow::SortKey.new(key.to_s) # => true
+ #
+ # @since 4.0.0
+ def to_s
+ if order == SortOrder::ASCENDING
+ "+#{name}"
+ else
+ "-#{name}"
+ end
+ end
+
+ private
+ def normalize_name(name, order)
+ case name
+ when Symbol
+ return name.to_s, order
+ when String
+ return name, order if order
+ if name.start_with?("-")
+ return name[1..-1], order || :descending
+ elsif name.start_with?("+")
+ return name[1..-1], order || :ascending
+ else
+ return name, order
+ end
+ else
+ return name, order
+ end
+ end
+
+ def normalize_order(order)
+ case order
+ when :asc, "asc"
+ :ascending
+ when :desc, "desc"
+ :descending
+ else
+ order
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/sort-options.rb b/src/arrow/ruby/red-arrow/lib/arrow/sort-options.rb
new file mode 100644
index 000000000..a7c2d6431
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/sort-options.rb
@@ -0,0 +1,109 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class SortOptions
+ class << self
+ # @api private
+ def try_convert(value)
+ case value
+ when Symbol, String
+ new(value)
+ when ::Array
+ new(*value)
+ else
+ nil
+ end
+ end
+ end
+
+ alias_method :initialize_raw, :initialize
+ private :initialize_raw
+ # @param sort_keys [::Array<String, Symbol, Arrow::SortKey>] The
+ # sort keys to be used. See {Arrow::SortKey.resolve} how to
+ # resolve each sort key in `sort_keys`.
+ #
+ # You can add more sort keys by {#add_sort_key} later.
+ #
+ # @example No initial sort keys
+ # options = Arrow::SortOptions.new
+ # options.sort_keys # => []
+ #
+ # @example String sort keys
+ # options = Arrow::SortOptions.new("count", "-age")
+ # options.sort_keys.collect(&:to_s) # => ["+count", "-age"]
+ #
+ # @example Symbol sort keys
+ # options = Arrow::SortOptions.new(:count, :age)
+ # options.sort_keys.collect(&:to_s) # => ["+count", "+age"]
+ #
+ # @example Mixed sort keys
+ # options = Arrow::SortOptions.new(:count, "-age")
+ # options.sort_keys.collect(&:to_s) # => ["+count", "-age"]
+ #
+ # @since 4.0.0
+ def initialize(*sort_keys)
+ initialize_raw
+ sort_keys.each do |sort_key|
+ add_sort_key(sort_key)
+ end
+ end
+
+ # @api private
+ alias_method :add_sort_key_raw, :add_sort_key
+ # Add a sort key.
+ #
+ # @return [void]
+ #
+ # @overload add_sort_key(key)
+ #
+ # @param key [Arrow::SortKey] The sort key to be added.
+ #
+ # @example Add a key to sort by "price" column in descending order
+ # options = Arrow::SortOptions.new
+ # options.add_sort_key(Arrow::SortKey.new(:price, :descending))
+ # options.sort_keys.collect(&:to_s) # => ["-price"]
+ #
+ # @overload add_sort_key(name)
+ #
+ # @param name [Symbol, String] The sort key name to be
+ # added. See also {Arrow::SortKey#initialize} for the leading
+ # order mark for String name.
+ #
+ # @example Add a key to sort by "price" column in descending order
+ # options = Arrow::SortOptions.new
+ # options.add_sort_key("-price")
+ # options.sort_keys.collect(&:to_s) # => ["-price"]
+ #
+ # @overload add_sort_key(name, order)
+ #
+ # @param name [Symbol, String] The sort key name.
+ #
+ # @param order [Symbol, String, Arrow::SortOrder] The sort
+ # order. See {Arrow::SortKey#initialize} for details.
+ #
+ # @example Add a key to sort by "price" column in descending order
+ # options = Arrow::SortOptions.new
+ # options.add_sort_key("price", :desc)
+ # options.sort_keys.collect(&:to_s) # => ["-price"]
+ #
+ # @since 4.0.0
+ def add_sort_key(name, order=nil)
+ add_sort_key_raw(SortKey.resolve(name, order))
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/source-node-options.rb b/src/arrow/ruby/red-arrow/lib/arrow/source-node-options.rb
new file mode 100644
index 000000000..402ea85f7
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/source-node-options.rb
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class SourceNodeOptions
+ class << self
+ # @api private
+ def try_convert(value)
+ case value
+ when RecordBatchReader, RecordBatch, Table
+ new(value)
+ else
+ nil
+ end
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/sparse-union-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/sparse-union-data-type.rb
new file mode 100644
index 000000000..14f3e5a7e
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/sparse-union-data-type.rb
@@ -0,0 +1,90 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class SparseUnionDataType
+ alias_method :initialize_raw, :initialize
+ private :initialize_raw
+
+ # Creates a new {Arrow::SparseUnionDataType}.
+ #
+ # @overload initialize(fields, type_codes)
+ #
+ # @param fields [::Array<Arrow::Field, Hash>] The fields of the
+ # sparse union data type. You can mix {Arrow::Field} and field
+ # description in the fields.
+ #
+ # See {Arrow::Field.new} how to specify field description.
+ #
+ # @param type_codes [::Array<Integer>] The IDs that indicates
+ # corresponding fields.
+ #
+ # @example Create a sparse union data type for `{2: visible, 9: count}`
+ # fields = [
+ # Arrow::Field.new("visible", :boolean),
+ # {
+ # name: "count",
+ # type: :int32,
+ # },
+ # ]
+ # Arrow::SparseUnionDataType.new(fields, [2, 9])
+ #
+ # @overload initialize(description)
+ #
+ # @param description [Hash] The description of the sparse union
+ # data type. It must have `:fields` and `:type_codes` values.
+ #
+ # @option description [::Array<Arrow::Field, Hash>] :fields The
+ # fields of the sparse union data type. You can mix
+ # {Arrow::Field} and field description in the fields.
+ #
+ # See {Arrow::Field.new} how to specify field description.
+ #
+ # @option description [::Array<Integer>] :type_codes The IDs
+ # that indicates corresponding fields.
+ #
+ # @example Create a sparse union data type for `{2: visible, 9: count}`
+ # fields = [
+ # Arrow::Field.new("visible", :boolean),
+ # {
+ # name: "count",
+ # type: :int32,
+ # },
+ # ]
+ # Arrow::SparseUnionDataType.new(fields: fields,
+ # type_codes: [2, 9])
+ def initialize(*args)
+ n_args = args.size
+ case n_args
+ when 1
+ description = args[0]
+ fields = description[:fields]
+ type_codes = description[:type_codes]
+ when 2
+ fields, type_codes = args
+ else
+ message = "wrong number of arguments (given, #{n_args}, expected 1..2)"
+ raise ArgumentError, message
+ end
+ fields = fields.collect do |field|
+ field = Field.new(field) unless field.is_a?(Field)
+ field
+ end
+ initialize_raw(fields, type_codes)
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/string-dictionary-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/string-dictionary-array-builder.rb
new file mode 100644
index 000000000..fc2f90b80
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/string-dictionary-array-builder.rb
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class StringDictionaryArrayBuilder
+ include SymbolValuesAppendable
+
+ private
+ def create_values_array_builder
+ StringArrayBuilder.new
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/struct-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/struct-array-builder.rb
new file mode 100644
index 000000000..ce883166a
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/struct-array-builder.rb
@@ -0,0 +1,146 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class StructArrayBuilder
+ class << self
+ def build(data_type, values)
+ builder = new(data_type)
+ builder.build(values)
+ end
+ end
+
+ def [](index_or_name)
+ find_field_builder(index_or_name)
+ end
+
+ def find_field_builder(index_or_name)
+ case index_or_name
+ when String, Symbol
+ name = index_or_name
+ cached_name_to_builder[name.to_s]
+ else
+ index = index_or_name
+ cached_field_builders[index]
+ end
+ end
+
+ alias_method :append_value_raw, :append_value
+
+ # @overload append_value
+ #
+ # Starts appending a struct record. You need to append values of
+ # fields.
+ #
+ # @overload append_value(value)
+ #
+ # Appends a struct record including values of fields.
+ #
+ # @param value [nil, ::Array, Hash] The struct record value.
+ #
+ # If this is `nil`, the struct record is null.
+ #
+ # If this is `Array` or `Hash`, they are values of fields.
+ #
+ # @since 0.12.0
+ def append_value(*args)
+ n_args = args.size
+
+ case n_args
+ when 0
+ append_value_raw
+ when 1
+ value = args[0]
+ case value
+ when nil
+ append_null
+ when ::Array
+ append_value_raw
+ cached_field_builders.zip(value) do |builder, sub_value|
+ builder.append(sub_value)
+ end
+ when Hash
+ append_value_raw
+ local_name_to_builder = cached_name_to_builder.dup
+ value.each do |name, sub_value|
+ builder = local_name_to_builder.delete(name.to_s)
+ builder.append(sub_value)
+ end
+ local_name_to_builder.each do |_, builder|
+ builder.append_null
+ end
+ else
+ message =
+ "struct value must be nil, Array or Hash: #{value.inspect}"
+ raise ArgumentError, message
+ end
+ else
+ message = "wrong number of arguments (given #{n_args}, expected 0..1)"
+ raise ArgumentError, message
+ end
+ end
+
+ def append_values(values, is_valids=nil)
+ if is_valids
+ is_valids.each_with_index do |is_valid, i|
+ if is_valid
+ append_value(values[i])
+ else
+ append_null
+ end
+ end
+ else
+ values.each do |value|
+ append_value(value)
+ end
+ end
+ end
+
+ alias_method :append_null_raw, :append_null
+ def append_null
+ append_null_raw
+ end
+
+ # @since 0.12.0
+ def append(*values)
+ if values.empty?
+ # For backward compatibility
+ append_value_raw
+ else
+ super
+ end
+ end
+
+ private
+ def cached_field_builders
+ @field_builders ||= field_builders
+ end
+
+ def build_name_to_builder
+ name_to_builder = {}
+ builders = cached_field_builders
+ value_data_type.fields.each_with_index do |field, i|
+ name_to_builder[field.name] = builders[i]
+ end
+ name_to_builder
+ end
+
+ def cached_name_to_builder
+ @name_to_builder ||= build_name_to_builder
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/struct-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/struct-array.rb
new file mode 100644
index 000000000..0b293dfc1
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/struct-array.rb
@@ -0,0 +1,68 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class StructArray
+ # @param i [Integer]
+ # The index of the value to be gotten. You must specify the value index.
+ #
+ # You can use {Arrow::Array#[]} for convenient value access.
+ #
+ # @return [Hash] The `i`-th struct.
+ def get_value(i)
+ value = {}
+ value_data_type.fields.zip(fields) do |field, field_array|
+ value[field.name] = field_array[i]
+ end
+ value
+ end
+
+ # @overload find_field(index)
+ # @param index [Integer] The index of the field to be found.
+ # @return [Arrow::Array, nil]
+ # The `index`-th field or `nil` for out of range.
+ #
+ # @overload find_field(name)
+ # @param index [String, Symbol] The name of the field to be found.
+ # @return [Arrow::Array, nil]
+ # The field that has `name` or `nil` for nonexistent name.
+ def find_field(index_or_name)
+ case index_or_name
+ when String, Symbol
+ name = index_or_name
+ (@name_to_field ||= build_name_to_field)[name.to_s]
+ else
+ index = index_or_name
+ fields[index]
+ end
+ end
+
+ alias_method :fields_raw, :fields
+ def fields
+ @fields ||= fields_raw
+ end
+
+ private
+ def build_name_to_field
+ name_to_field = {}
+ value_data_type.fields.zip(fields) do |field, field_array|
+ name_to_field[field.name] = field_array
+ end
+ name_to_field
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/struct-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/struct-data-type.rb
new file mode 100644
index 000000000..a89a01689
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/struct-data-type.rb
@@ -0,0 +1,128 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class StructDataType
+ include FieldContainable
+
+ alias_method :initialize_raw, :initialize
+ private :initialize_raw
+
+ # Creates a new {Arrow::StructDataType}.
+ #
+ # @overload initialize(fields)
+ #
+ # @param fields [::Array<Arrow::Field, Hash>] The fields of the
+ # struct data type. You can also specify field description as
+ # a field. You can mix {Arrow::Field} and field description.
+ #
+ # See {Arrow::Field.new} how to specify field description.
+ #
+ # @example Create a struct data type with {Arrow::Field}s
+ # visible_field = Arrow::Field.new("visible", :boolean)
+ # count_field = Arrow::Field.new("count", :int32)
+ # Arrow::StructDataType.new([visible_field, count_field])
+ #
+ # @example Create a struct data type with field descriptions
+ # field_descriptions = [
+ # {name: "visible", type: :boolean},
+ # {name: "count", type: :int32},
+ # ]
+ # Arrow::StructDataType.new(field_descriptions)
+ #
+ # @example Create a struct data type with {Arrow::Field} and field description
+ # fields = [
+ # Arrow::Field.new("visible", :boolean),
+ # {name: "count", type: :int32},
+ # ]
+ # Arrow::StructDataType.new(fields)
+ #
+ # @overload initialize(fields)
+ #
+ # @param fields [Hash{String, Symbol => Arrow::DataType, Hash}]
+ # The pairs of field name and field data type of the struct
+ # data type. You can also specify data type description by
+ # `Hash`. You can mix {Arrow::DataType} and data type description.
+ #
+ # See {Arrow::DataType.resolve} how to specify data type
+ # description.
+ #
+ # @example Create a struct data type with {Arrow::DataType}s
+ # fields = {
+ # "visible" => Arrow::BooleanDataType.new,
+ # "count" => Arrow::Int32DataType.new,
+ # }
+ # Arrow::StructDataType.new(fields)
+ #
+ # @example Create a struct data type with data type descriptions
+ # fields = {
+ # "visible" => :boolean,
+ # "count" => {type: :int32},
+ # }
+ # Arrow::StructDataType.new(fields)
+ #
+ # @example Create a struct data type with {Arrow::DataType} and data type description
+ # fields = {
+ # "visible" => Arrow::BooleanDataType.new,
+ # "count" => {type: :int32},
+ # }
+ # Arrow::StructDataType.new(fields)
+ #
+ # @overload initialize(description)
+ #
+ # @param description [Hash] The description of the struct data
+ # type. It must have `:fields` value.
+ #
+ # @option description
+ # [::Array<Arrow::Field, Hash>,
+ # Hash{String, Symbol => Arrow::DataType, Hash, String, Symbol}]
+ # :fields The fields of the struct data type.
+ #
+ # @example Create a struct data type with {Arrow::Field} and field description
+ # fields = [
+ # Arrow::Field.new("visible", :boolean),
+ # {name: "count", type: :int32},
+ # ]
+ # Arrow::StructDataType.new(fields: fields)
+ #
+ # @example Create a struct data type with {Arrow::DataType} and data type description
+ # fields = {
+ # "visible" => Arrow::BooleanDataType.new,
+ # "count" => {type: :int32},
+ # }
+ # Arrow::StructDataType.new(fields: fields)
+ def initialize(fields)
+ if fields.is_a?(Hash) and fields.key?(:fields)
+ description = fields
+ fields = description[:fields]
+ end
+ if fields.is_a?(Hash)
+ fields = fields.collect do |name, data_type|
+ Field.new(name, data_type)
+ end
+ else
+ fields = fields.collect do |field|
+ field = Field.new(field) unless field.is_a?(Field)
+ field
+ end
+ end
+ initialize_raw(fields)
+ end
+
+ alias_method :[], :find_field
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/symbol-values-appendable.rb b/src/arrow/ruby/red-arrow/lib/arrow/symbol-values-appendable.rb
new file mode 100644
index 000000000..66ab0a490
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/symbol-values-appendable.rb
@@ -0,0 +1,34 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ module SymbolValuesAppendable
+ def append_values(values, is_valids=nil)
+ builder = create_values_array_builder
+ values = values.collect do |value|
+ case value
+ when Symbol
+ value.to_s
+ else
+ value
+ end
+ end
+ builder.append_values(values, is_valids)
+ append_array(builder.finish)
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/table-concatenate-options.rb b/src/arrow/ruby/red-arrow/lib/arrow/table-concatenate-options.rb
new file mode 100644
index 000000000..730bce1c8
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/table-concatenate-options.rb
@@ -0,0 +1,36 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class TableConcatenateOptions
+ class << self
+ # @api private
+ def try_convert(value)
+ case value
+ when Hash
+ options = new
+ value.each do |k, v|
+ options.public_send("#{k}=", value)
+ end
+ options
+ else
+ nil
+ end
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/table-formatter.rb b/src/arrow/ruby/red-arrow/lib/arrow/table-formatter.rb
new file mode 100644
index 000000000..d039679f9
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/table-formatter.rb
@@ -0,0 +1,190 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ # TODO: Almost codes should be implemented in Apache Arrow C++.
+ class TableFormatter
+ # @private
+ class ColumnFormatter
+ attr_reader :column
+ attr_reader :head_values
+ attr_reader :tail_values
+ attr_reader :sample_values
+ def initialize(column, head_values, tail_values)
+ @column = column
+ @head_values = head_values
+ @tail_values = tail_values
+ @sample_values = head_values + tail_values
+ @field_value_widths = {}
+ end
+
+ def data_type
+ @data_type ||= @column.data_type
+ end
+
+ def name
+ @name ||= @column.name
+ end
+
+ def aligned_name
+ @aligned_name ||= format_aligned_name(name, data_type, @sample_values)
+ end
+
+ FLOAT_N_DIGITS = 10
+ FORMATTED_NULL = "(null)"
+
+ def format_value(value, width=0)
+ case value
+ when ::Time
+ value.iso8601
+ when Float
+ "%*f" % [[width, FLOAT_N_DIGITS].max, value]
+ when Integer
+ "%*d" % [width, value]
+ when Hash
+ formatted_values = data_type.fields.collect do |field|
+ field_name = field.name
+ field_value_width = compute_field_value_width(field, @sample_values)
+ formatted_name = format_value(field_name, 0)
+ formatted_value = format_value(value[field_name], field_value_width)
+ "#{formatted_name}: #{formatted_value}"
+ end
+ formatted = "{"
+ formatted << formatted_values.join(", ")
+ formatted << "}"
+ "%-*s" % [width, formatted]
+ when nil
+ "%*s" % [width, FORMATTED_NULL]
+ else
+ "%-*s" % [width, value.to_s]
+ end
+ end
+
+ private
+ def compute_field_value_width(field, sample_values)
+ unless @field_value_widths.key?(field)
+ field_name = field.name
+ field_sample_values = sample_values.collect do |v|
+ (v || {})[field_name]
+ end
+ field_aligned_name = format_aligned_name("",
+ field.data_type,
+ field_sample_values)
+ @field_value_widths[field] = field_aligned_name.size
+ end
+ @field_value_widths[field]
+ end
+
+ def format_aligned_name(name, data_type, sample_values)
+ case data_type
+ when TimestampDataType
+ "%*s" % [::Time.now.iso8601.size, name]
+ when IntegerDataType
+ have_null = false
+ have_negative = false
+ max_value = nil
+ sample_values.each do |value|
+ if value.nil?
+ have_null = true
+ else
+ if max_value.nil?
+ max_value = value.abs
+ else
+ max_value = [value.abs, max_value].max
+ end
+ have_negative = true if value.negative?
+ end
+ end
+ if max_value.nil?
+ width = 0
+ elsif max_value.zero?
+ width = 1
+ else
+ width = (Math.log10(max_value) + 1).truncate
+ end
+ width += 1 if have_negative # Need "-"
+ width = [width, FORMATTED_NULL.size].max if have_null
+ "%*s" % [width, name]
+ when FloatDataType, DoubleDataType
+ "%*s" % [FLOAT_N_DIGITS, name]
+ when StructDataType
+ field_widths = data_type.fields.collect do |field|
+ field_value_width = compute_field_value_width(field, sample_values)
+ field.name.size + ": ".size + field_value_width
+ end
+ width = "{}".size + field_widths.sum
+ if field_widths.size > 0
+ width += (", ".size * (field_widths.size - 1))
+ end
+ "%*s" % [width, name]
+ else
+ name
+ end
+ end
+ end
+
+ def initialize(table, options={})
+ @table = table
+ @options = options
+ end
+
+ def format
+ text = ""
+ n_rows = @table.n_rows
+ border = @options[:border] || 10
+
+ head_limit = [border, n_rows].min
+
+ tail_start = [border, n_rows - border].max
+ tail_limit = n_rows - tail_start
+
+ column_formatters = @table.columns.collect do |column|
+ head_values = column.each.take(head_limit)
+ if tail_limit > 0
+ tail_values = column.reverse_each.take(tail_limit).reverse
+ else
+ tail_values = []
+ end
+ ColumnFormatter.new(column, head_values, tail_values)
+ end
+
+ format_header(text, column_formatters)
+ return text if n_rows.zero?
+
+ n_digits = (Math.log10(n_rows) + 1).truncate
+ format_rows(text,
+ column_formatters,
+ column_formatters.collect(&:head_values).transpose,
+ n_digits,
+ 0)
+ return text if n_rows <= border
+
+
+ if head_limit != tail_start
+ format_ellipsis(text)
+ end
+
+ format_rows(text,
+ column_formatters,
+ column_formatters.collect(&:tail_values).transpose,
+ n_digits,
+ tail_start)
+
+ text
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/table-list-formatter.rb b/src/arrow/ruby/red-arrow/lib/arrow/table-list-formatter.rb
new file mode 100644
index 000000000..4fe293416
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/table-list-formatter.rb
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ # TODO: Almost codes should be implemented in Apache Arrow C++.
+ class TableListFormatter < TableFormatter
+ private
+ def format_header(text, columns)
+ end
+
+ def format_rows(text, column_formatters, rows, n_digits, start_offset)
+ rows.each_with_index do |row, nth_row|
+ text << ("=" * 20 + " #{start_offset + nth_row} " + "=" * 20 + "\n")
+ row.each_with_index do |column_value, nth_column|
+ column_formatter = column_formatters[nth_column]
+ formatted_name = column_formatter.name
+ formatted_value = column_formatter.format_value(column_value)
+ text << "#{formatted_name}: #{formatted_value}\n"
+ end
+ end
+ end
+
+ def format_ellipsis(text)
+ text << "...\n"
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/table-loader.rb b/src/arrow/ruby/red-arrow/lib/arrow/table-loader.rb
new file mode 100644
index 000000000..8f43b69df
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/table-loader.rb
@@ -0,0 +1,225 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require "uri"
+
+module Arrow
+ class TableLoader
+ class << self
+ def load(input, options={})
+ new(input, options).load
+ end
+ end
+
+ def initialize(input, options={})
+ input = input.to_path if input.respond_to?(:to_path)
+ @input = input
+ @options = options
+ fill_options
+ end
+
+ def load
+ if @input.is_a?(URI)
+ custom_load_method = "load_from_uri"
+ elsif @input.is_a?(String) and ::File.directory?(@input)
+ custom_load_method = "load_from_directory"
+ else
+ custom_load_method = "load_from_file"
+ end
+ unless respond_to?(custom_load_method, true)
+ available_schemes = []
+ (methods(true) | private_methods(true)).each do |name|
+ match_data = /\Aload_from_/.match(name.to_s)
+ if match_data
+ available_schemes << match_data.post_match
+ end
+ end
+ message = "Arrow::Table load source must be one of ["
+ message << available_schemes.join(", ")
+ message << "]: #{@input.inspect}"
+ raise ArgumentError, message
+ end
+ __send__(custom_load_method)
+ end
+
+ private
+ def load_from_file
+ format = @options[:format]
+ custom_load_method = "load_as_#{format}"
+ unless respond_to?(custom_load_method, true)
+ available_formats = []
+ (methods(true) | private_methods(true)).each do |name|
+ match_data = /\Aload_as_/.match(name.to_s)
+ if match_data
+ available_formats << match_data.post_match
+ end
+ end
+ deprecated_formats = ["batch", "stream"]
+ available_formats -= deprecated_formats
+ message = "Arrow::Table load format must be one of ["
+ message << available_formats.join(", ")
+ message << "]: #{format.inspect}"
+ raise ArgumentError, message
+ end
+ if method(custom_load_method).arity.zero?
+ __send__(custom_load_method)
+ else
+ # For backward compatibility.
+ __send__(custom_load_method, @input)
+ end
+ end
+
+ def fill_options
+ if @options[:format] and @options.key?(:compression)
+ return
+ end
+
+ case @input
+ when Buffer
+ info = {}
+ when URI
+ extension = PathExtension.new(@input.path)
+ info = extension.extract
+ else
+ extension = PathExtension.new(@input)
+ info = extension.extract
+ end
+ format = info[:format]
+ @options = @options.dup
+ if format
+ @options[:format] ||= format.to_sym
+ else
+ @options[:format] ||= :arrow
+ end
+ unless @options.key?(:compression)
+ @options[:compression] = info[:compression]
+ end
+ end
+
+ def open_input_stream
+ if @input.is_a?(Buffer)
+ BufferInputStream.new(@input)
+ else
+ MemoryMappedInputStream.new(@input)
+ end
+ end
+
+ def load_raw(input, reader)
+ schema = reader.schema
+ record_batches = []
+ reader.each do |record_batch|
+ record_batches << record_batch
+ end
+ table = Table.new(schema, record_batches)
+ table.instance_variable_set(:@input, input)
+ table
+ end
+
+ def load_as_arrow
+ input = nil
+ reader = nil
+ error = nil
+ reader_class_candidates = [
+ RecordBatchFileReader,
+ RecordBatchStreamReader,
+ ]
+ reader_class_candidates.each do |reader_class_candidate|
+ input = open_input_stream
+ begin
+ reader = reader_class_candidate.new(input)
+ rescue Arrow::Error
+ error = $!
+ else
+ break
+ end
+ end
+ raise error if reader.nil?
+ load_raw(input, reader)
+ end
+
+ # @since 1.0.0
+ def load_as_arrow_file
+ input = open_input_stream
+ reader = RecordBatchFileReader.new(input)
+ load_raw(input, reader)
+ end
+
+ # @deprecated Use `format: :arrow_file` instead.
+ def load_as_batch
+ load_as_arrow_file
+ end
+
+ # @since 1.0.0
+ def load_as_arrow_streaming
+ input = open_input_stream
+ reader = RecordBatchStreamReader.new(input)
+ load_raw(input, reader)
+ end
+
+ # @deprecated Use `format: :arrow_streaming` instead.
+ def load_as_stream
+ load_as_arrow_streaming
+ end
+
+ if Arrow.const_defined?(:ORCFileReader)
+ def load_as_orc
+ input = open_input_stream
+ reader = ORCFileReader.new(input)
+ field_indexes = @options[:field_indexes]
+ reader.set_field_indexes(field_indexes) if field_indexes
+ table = reader.read_stripes
+ table.instance_variable_set(:@input, input)
+ table
+ end
+ end
+
+ def csv_load(options)
+ options.delete(:format)
+ if @input.is_a?(Buffer)
+ CSVLoader.load(@input.data.to_s, **options)
+ else
+ CSVLoader.load(Pathname.new(@input), **options)
+ end
+ end
+
+ def load_as_csv
+ csv_load(@options.dup)
+ end
+
+ def load_as_tsv
+ options = @options.dup
+ options[:delimiter] = "\t"
+ csv_load(options.dup)
+ end
+
+ def load_as_feather
+ input = open_input_stream
+ reader = FeatherFileReader.new(input)
+ table = reader.read
+ table.instance_variable_set(:@input, input)
+ table
+ end
+
+ def load_as_json
+ input = open_input_stream
+ reader = JSONReader.new(input)
+ table = reader.read
+ table.instance_variable_set(:@input, input)
+ table
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/table-saver.rb b/src/arrow/ruby/red-arrow/lib/arrow/table-saver.rb
new file mode 100644
index 000000000..207a10a82
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/table-saver.rb
@@ -0,0 +1,195 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class TableSaver
+ class << self
+ def save(table, output, options={})
+ new(table, output, options).save
+ end
+ end
+
+ def initialize(table, output, options={})
+ @table = table
+ output = output.to_path if output.respond_to?(:to_path)
+ @output = output
+ @options = options
+ fill_options
+ end
+
+ def save
+ if @output.is_a?(URI)
+ custom_save_method = "save_to_uri"
+ else
+ custom_save_method = "save_to_file"
+ end
+ unless respond_to?(custom_save_method, true)
+ available_schemes = []
+ (methods(true) | private_methods(true)).each do |name|
+ match_data = /\Asave_to_/.match(name.to_s)
+ if match_data
+ available_schemes << match_data.post_match
+ end
+ end
+ message = "Arrow::Table save source must be one of ["
+ message << available_schemes.join(", ")
+ message << "]: #{@output.scheme.inspect}"
+ raise ArgumentError, message
+ end
+ __send__(custom_save_method)
+ end
+
+ private
+ def save_to_file
+ format = @options[:format]
+ custom_save_method = "save_as_#{format}"
+ unless respond_to?(custom_save_method, true)
+ available_formats = []
+ (methods(true) | private_methods(true)).each do |name|
+ match_data = /\Asave_as_/.match(name.to_s)
+ if match_data
+ available_formats << match_data.post_match
+ end
+ end
+ deprecated_formats = ["batch", "stream"]
+ available_formats -= deprecated_formats
+ message = "Arrow::Table save format must be one of ["
+ message << available_formats.join(", ")
+ message << "]: #{format.inspect}"
+ raise ArgumentError, message
+ end
+ if method(custom_save_method).arity.zero?
+ __send__(custom_save_method)
+ else
+ # For backward compatibility.
+ __send__(custom_save_method, @output)
+ end
+ end
+
+ def fill_options
+ if @options[:format] and @options.key?(:compression)
+ return
+ end
+
+ case @output
+ when Buffer
+ info = {}
+ when URI
+ extension = PathExtension.new(@output.path)
+ info = extension.extract
+ else
+ extension = PathExtension.new(@output)
+ info = extension.extract
+ end
+ format = info[:format]
+ @options = @options.dup
+ if format
+ @options[:format] ||= format.to_sym
+ else
+ @options[:format] ||= :arrow
+ end
+ unless @options.key?(:compression)
+ @options[:compression] = info[:compression]
+ end
+ end
+
+ def open_raw_output_stream(&block)
+ if @output.is_a?(Buffer)
+ BufferOutputStream.open(@output, &block)
+ else
+ FileOutputStream.open(@output, false, &block)
+ end
+ end
+
+ def open_output_stream(&block)
+ compression = @options[:compression]
+ if compression
+ codec = Codec.new(compression)
+ open_raw_output_stream do |raw_output|
+ CompressedOutputStream.open(codec, raw_output) do |output|
+ yield(output)
+ end
+ end
+ else
+ open_raw_output_stream(&block)
+ end
+ end
+
+ def save_raw(writer_class)
+ open_output_stream do |output|
+ writer_class.open(output, @table.schema) do |writer|
+ writer.write_table(@table)
+ end
+ end
+ end
+
+ def save_as_arrow
+ save_as_arrow_file
+ end
+
+ # @since 1.0.0
+ def save_as_arrow_file
+ save_raw(RecordBatchFileWriter)
+ end
+
+ # @deprecated Use `format: :arrow_batch` instead.
+ def save_as_batch
+ save_as_arrow_file
+ end
+
+ # @since 1.0.0
+ def save_as_arrow_streaming
+ save_raw(RecordBatchStreamWriter)
+ end
+
+ # @deprecated Use `format: :arrow_streaming` instead.
+ def save_as_stream
+ save_as_arrow_streaming
+ end
+
+ def csv_save(**options)
+ open_output_stream do |output|
+ csv = CSV.new(output, **options)
+ names = @table.schema.fields.collect(&:name)
+ csv << names
+ @table.raw_records.each do |record|
+ csv << record
+ end
+ end
+ end
+
+ def save_as_csv
+ csv_save
+ end
+
+ def save_as_tsv
+ csv_save(col_sep: "\t")
+ end
+
+ def save_as_feather
+ properties = FeatherWriteProperties.new
+ properties.class.properties.each do |name|
+ value = @options[name.to_sym]
+ next if value.nil?
+ properties.__send__("#{name}=", value)
+ end
+ open_raw_output_stream do |output|
+ @table.write_as_feather(output, properties)
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/table-table-formatter.rb b/src/arrow/ruby/red-arrow/lib/arrow/table-table-formatter.rb
new file mode 100644
index 000000000..36121e1b6
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/table-table-formatter.rb
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require "time"
+
+module Arrow
+ # TODO: Almost codes should be implemented in Apache Arrow C++.
+ class TableTableFormatter < TableFormatter
+ private
+ def format_header(text, column_formatters)
+ column_formatters.each do |column_formatter|
+ text << "\t"
+ text << column_formatter.aligned_name
+ end
+ text << "\n"
+ end
+
+ def format_rows(text, column_formatters, rows, n_digits, start_offset)
+ rows.each_with_index do |row, nth_row|
+ text << ("%*d" % [n_digits, start_offset + nth_row])
+ row.each_with_index do |column_value, nth_column|
+ text << "\t"
+ column_formatter = column_formatters[nth_column]
+ aligned_name = column_formatter.aligned_name
+ text << column_formatter.format_value(column_value, aligned_name.size)
+ end
+ text << "\n"
+ end
+ end
+
+ def format_ellipsis(text)
+ text << "...\n"
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/table.rb b/src/arrow/ruby/red-arrow/lib/arrow/table.rb
new file mode 100644
index 000000000..e8aa39bac
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/table.rb
@@ -0,0 +1,519 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+require "arrow/raw-table-converter"
+
+module Arrow
+ class Table
+ include ColumnContainable
+ include GenericFilterable
+ include GenericTakeable
+ include RecordContainable
+
+ class << self
+ def load(path, options={})
+ TableLoader.load(path, options)
+ end
+ end
+
+ alias_method :initialize_raw, :initialize
+ private :initialize_raw
+
+ # Creates a new {Arrow::Table}.
+ #
+ # @overload initialize(columns)
+ #
+ # @param columns [::Array<Arrow::Column>] The columns of the table.
+ #
+ # @example Create a table from columns
+ # count_field = Arrow::Field.new("count", :uint32)
+ # count_array = Arrow::UInt32Array.new([0, 2, nil, 4])
+ # count_column = Arrow::Column.new(count_field, count_array)
+ # visible_field = Arrow::Field.new("visible", :boolean)
+ # visible_array = Arrow::BooleanArray.new([true, nil, nil, false])
+ # visible_column = Arrow::Column.new(visible_field, visible_array)
+ # Arrow::Table.new([count_column, visible_column])
+ #
+ # @overload initialize(raw_table)
+ #
+ # @param raw_table [Hash<String, Arrow::Array>]
+ # The pairs of column name and values of the table. Column values is
+ # `Arrow::Array`.
+ #
+ # @example Create a table from column name and values
+ # Arrow::Table.new("count" => Arrow::UInt32Array.new([0, 2, nil, 4]),
+ # "visible" => Arrow::BooleanArray.new([true, nil, nil, false]))
+ #
+ # @overload initialize(raw_table)
+ #
+ # @param raw_table [Hash<String, Arrow::ChunkedArray>]
+ # The pairs of column name and values of the table. Column values is
+ # `Arrow::ChunkedArray`.
+ #
+ # @example Create a table from column name and values
+ # count_chunks = [
+ # Arrow::UInt32Array.new([0, 2]),
+ # Arrow::UInt32Array.new([nil, 4]),
+ # ]
+ # visible_chunks = [
+ # Arrow::BooleanArray.new([true]),
+ # Arrow::BooleanArray.new([nil, nil, false]),
+ # ]
+ # Arrow::Table.new("count" => Arrow::ChunkedArray.new(count_chunks),
+ # "visible" => Arrow::ChunkedArray.new(visible_chunks))
+ #
+ # @overload initialize(raw_table)
+ #
+ # @param raw_table [Hash<String, ::Array>]
+ # The pairs of column name and values of the table. Column values is
+ # `Array`.
+ #
+ # @example Create a table from column name and values
+ # Arrow::Table.new("count" => [0, 2, nil, 4],
+ # "visible" => [true, nil, nil, false])
+ #
+ # @overload initialize(schema, columns)
+ #
+ # @param schema [Arrow::Schema] The schema of the table.
+ # You can also specify schema as primitive Ruby objects.
+ # See {Arrow::Schema#initialize} for details.
+ #
+ # @param columns [::Array<Arrow::Column>] The data of the table.
+ #
+ # @example Create a table from schema and columns
+ # count_field = Arrow::Field.new("count", :uint32)
+ # count_array = Arrow::UInt32Array.new([0, 2, nil, 4])
+ # count_column = Arrow::Column.new(count_field, count_array)
+ # visible_field = Arrow::Field.new("visible", :boolean)
+ # visible_array = Arrow::BooleanArray.new([true, nil, nil, false])
+ # visible_column = Arrow::Column.new(visible_field, visible_array)
+ # Arrow::Table.new(Arrow::Schema.new([count_field, visible_field]),
+ # [count_column, visible_column])
+ #
+ # @overload initialize(schema, arrays)
+ #
+ # @param schema [Arrow::Schema] The schema of the table.
+ # You can also specify schema as primitive Ruby objects.
+ # See {Arrow::Schema#initialize} for details.
+ #
+ # @param arrays [::Array<Arrow::Array>] The data of the table.
+ #
+ # @example Create a table from schema and arrays
+ # count_field = Arrow::Field.new("count", :uint32)
+ # count_array = Arrow::UInt32Array.new([0, 2, nil, 4])
+ # visible_field = Arrow::Field.new("visible", :boolean)
+ # visible_array = Arrow::BooleanArray.new([true, nil, nil, false])
+ # Arrow::Table.new(Arrow::Schema.new([count_field, visible_field]),
+ # [count_array, visible_array])
+ #
+ # @overload initialize(schema, record_batches)
+ #
+ # @param schema [Arrow::Schema] The schema of the table.
+ # You can also specify schema as primitive Ruby objects.
+ # See {Arrow::Schema#initialize} for details.
+ #
+ # @param arrays [::Array<Arrow::RecordBatch>] The data of the table.
+ #
+ # @example Create a table from schema and record batches
+ # count_field = Arrow::Field.new("count", :uint32)
+ # visible_field = Arrow::Field.new("visible", :boolean)
+ # schema = Arrow::Schema.new([count_field, visible_field])
+ # record_batches = [
+ # Arrow::RecordBatch.new(schema, [[0, true], [2, nil], [nil, nil]]),
+ # Arrow::RecordBatch.new(schema, [[4, false]]),
+ # ]
+ # Arrow::Table.new(schema, record_batches)
+ #
+ # @overload initialize(schema, raw_records)
+ #
+ # @param schema [Arrow::Schema] The schema of the table.
+ # You can also specify schema as primitive Ruby objects.
+ # See {Arrow::Schema#initialize} for details.
+ #
+ # @param arrays [::Array<::Array>] The data of the table as primitive
+ # Ruby objects.
+ #
+ # @example Create a table from schema and raw records
+ # schema = {
+ # count: :uint32,
+ # visible: :boolean,
+ # }
+ # raw_records = [
+ # [0, true],
+ # [2, nil],
+ # [nil, nil],
+ # [4, false],
+ # ]
+ # Arrow::Table.new(schema, raw_records)
+ def initialize(*args)
+ n_args = args.size
+ case n_args
+ when 1
+ raw_table_converter = RawTableConverter.new(args[0])
+ schema = raw_table_converter.schema
+ values = raw_table_converter.values
+ when 2
+ schema = args[0]
+ schema = Schema.new(schema) unless schema.is_a?(Schema)
+ values = args[1]
+ case values[0]
+ when ::Array
+ values = [RecordBatch.new(schema, values)]
+ when Column
+ values = values.collect(&:data)
+ end
+ else
+ message = "wrong number of arguments (given #{n_args}, expected 1..2)"
+ raise ArgumentError, message
+ end
+ initialize_raw(schema, values)
+ end
+
+ def each_record_batch
+ return to_enum(__method__) unless block_given?
+
+ reader = TableBatchReader.new(self)
+ while record_batch = reader.read_next
+ yield(record_batch)
+ end
+ end
+
+ alias_method :size, :n_rows
+ alias_method :length, :n_rows
+
+ alias_method :slice_raw, :slice
+
+ # @overload slice(offset, length)
+ #
+ # @param offset [Integer] The offset of sub Arrow::Table.
+ # @param length [Integer] The length of sub Arrow::Table.
+ # @return [Arrow::Table]
+ # The sub `Arrow::Table` that covers only from
+ # `offset` to `offset + length` range.
+ #
+ # @overload slice(index)
+ #
+ # @param index [Integer] The index in this table.
+ # @return [Arrow::Record]
+ # The `Arrow::Record` corresponding to index of
+ # the table.
+ #
+ # @overload slice(booleans)
+ #
+ # @param booleans [::Array<Boolean>]
+ # The values indicating the target rows.
+ # @return [Arrow::Table]
+ # The sub `Arrow::Table` that covers only rows of indices
+ # the values of `booleans` is true.
+ #
+ # @overload slice(boolean_array)
+ #
+ # @param boolean_array [::Array<Arrow::BooleanArray>]
+ # The values indicating the target rows.
+ # @return [Arrow::Table]
+ # The sub `Arrow::Table` that covers only rows of indices
+ # the values of `boolean_array` is true.
+ #
+ # @overload slice(range)
+ #
+ # @param range_included_end [Range] The range indicating the target rows.
+ # @return [Arrow::Table]
+ # The sub `Arrow::Table` that covers only rows of the range of indices.
+ #
+ # @overload slice(conditions)
+ #
+ # @param conditions [Hash] The conditions to select records.
+ # @return [Arrow::Table]
+ # The sub `Arrow::Table` that covers only rows matched by condition
+ #
+ # @overload slice
+ #
+ # @yield [slicer] Gives slicer that constructs condition to select records.
+ # @yieldparam slicer [Arrow::Slicer] The slicer that helps us to
+ # build condition.
+ # @yieldreturn [Arrow::Slicer::Condition, ::Array<Arrow::Slicer::Condition>]
+ # The condition to select records.
+ # @return [Arrow::Table]
+ # The sub `Arrow::Table` that covers only rows matched by condition
+ # specified by slicer.
+ def slice(*args)
+ slicers = []
+ if block_given?
+ unless args.empty?
+ raise ArgumentError, "must not specify both arguments and block"
+ end
+ block_slicer = yield(Slicer.new(self))
+ case block_slicer
+ when ::Array
+ slicers.concat(block_slicer)
+ else
+ slicers << block_slicer
+ end
+ else
+ expected_n_args = nil
+ case args.size
+ when 1
+ case args[0]
+ when Integer
+ index = args[0]
+ index += n_rows if index < 0
+ return nil if index < 0
+ return nil if index >= n_rows
+ return Record.new(self, index)
+ when Hash
+ condition_pairs = args[0]
+ slicer = Slicer.new(self)
+ conditions = []
+ condition_pairs.each do |key, value|
+ case value
+ when Range
+ # TODO: Optimize "begin <= key <= end" case by missing "between" kernel
+ # https://issues.apache.org/jira/browse/ARROW-9843
+ unless value.begin.nil?
+ conditions << (slicer[key] >= value.begin)
+ end
+ unless value.end.nil?
+ if value.exclude_end?
+ conditions << (slicer[key] < value.end)
+ else
+ conditions << (slicer[key] <= value.end)
+ end
+ end
+ else
+ conditions << (slicer[key] == value)
+ end
+ end
+ slicers << conditions.inject(:&)
+ else
+ slicers << args[0]
+ end
+ when 2
+ offset, length = args
+ slicers << (offset...(offset + length))
+ else
+ expected_n_args = "1..2"
+ end
+ if expected_n_args
+ message = "wrong number of arguments " +
+ "(given #{args.size}, expected #{expected_n_args})"
+ raise ArgumentError, message
+ end
+ end
+
+ filter_options = Arrow::FilterOptions.new
+ filter_options.null_selection_behavior = :emit_null
+ sliced_tables = []
+ slicers.each do |slicer|
+ slicer = slicer.evaluate if slicer.respond_to?(:evaluate)
+ case slicer
+ when Integer
+ slicer += n_rows if slicer < 0
+ sliced_tables << slice_by_range(slicer, n_rows - 1)
+ when Range
+ original_from = from = slicer.first
+ to = slicer.last
+ to -= 1 if slicer.exclude_end?
+ from += n_rows if from < 0
+ if from < 0 or from >= n_rows
+ message =
+ "offset is out of range (-#{n_rows + 1},#{n_rows}): " +
+ "#{original_from}"
+ raise ArgumentError, message
+ end
+ to += n_rows if to < 0
+ sliced_tables << slice_by_range(from, to)
+ when ::Array, BooleanArray, ChunkedArray
+ sliced_tables << filter(slicer, filter_options)
+ else
+ message = "slicer must be Integer, Range, (from, to), " +
+ "Arrow::ChunkedArray of Arrow::BooleanArray, " +
+ "Arrow::BooleanArray or Arrow::Slicer::Condition: #{slicer.inspect}"
+ raise ArgumentError, message
+ end
+ end
+ if sliced_tables.size > 1
+ sliced_tables[0].concatenate(sliced_tables[1..-1])
+ else
+ sliced_tables[0]
+ end
+ end
+
+ # TODO
+ #
+ # @return [Arrow::Table]
+ def merge(other)
+ added_columns = {}
+ removed_columns = {}
+
+ case other
+ when Hash
+ other.each do |name, value|
+ name = name.to_s
+ if value
+ added_columns[name] = ensure_raw_column(name, value)
+ else
+ removed_columns[name] = true
+ end
+ end
+ when Table
+ added_columns = {}
+ other.columns.each do |column|
+ name = column.name
+ added_columns[name] = ensure_raw_column(name, column)
+ end
+ else
+ message = "merge target must be Hash or Arrow::Table: " +
+ "<#{other.inspect}>: #{inspect}"
+ raise ArgumentError, message
+ end
+
+ new_columns = []
+ columns.each do |column|
+ column_name = column.name
+ new_column = added_columns.delete(column_name)
+ if new_column
+ new_columns << new_column
+ next
+ end
+ next if removed_columns.key?(column_name)
+ new_columns << ensure_raw_column(column_name, column)
+ end
+ added_columns.each do |name, new_column|
+ new_columns << new_column
+ end
+ new_fields = []
+ new_arrays = []
+ new_columns.each do |new_column|
+ new_fields << new_column[:field]
+ new_arrays << new_column[:data]
+ end
+ self.class.new(new_fields, new_arrays)
+ end
+
+ alias_method :remove_column_raw, :remove_column
+ def remove_column(name_or_index)
+ case name_or_index
+ when String, Symbol
+ name = name_or_index.to_s
+ index = columns.index {|column| column.name == name}
+ if index.nil?
+ message = "unknown column: #{name_or_index.inspect}: #{inspect}"
+ raise KeyError.new(message)
+ end
+ else
+ index = name_or_index
+ index += n_columns if index < 0
+ if index < 0 or index >= n_columns
+ message = "out of index (0..#{n_columns - 1}): " +
+ "#{name_or_index.inspect}: #{inspect}"
+ raise IndexError.new(message)
+ end
+ end
+ remove_column_raw(index)
+ end
+
+ # Experimental
+ def group(*keys)
+ Group.new(self, keys)
+ end
+
+ # Experimental
+ def window(size: nil)
+ RollingWindow.new(self, size)
+ end
+
+ def save(output, options={})
+ saver = TableSaver.new(self, output, options)
+ saver.save
+ end
+
+ def pack
+ packed_arrays = columns.collect do |column|
+ column.data.pack
+ end
+ self.class.new(schema, packed_arrays)
+ end
+
+ alias_method :to_s_raw, :to_s
+ def to_s(options={})
+ format = options[:format]
+ case format
+ when :column
+ return to_s_raw
+ when :list
+ formatter_class = TableListFormatter
+ when :table, nil
+ formatter_class = TableTableFormatter
+ else
+ message = ":format must be :column, :list, :table or nil"
+ raise ArgumentError, "#{message}: <#{format.inspect}>"
+ end
+ formatter = formatter_class.new(self, options)
+ formatter.format
+ end
+
+ alias_method :inspect_raw, :inspect
+ def inspect
+ "#{super}\n#{to_s}"
+ end
+
+ def respond_to_missing?(name, include_private)
+ return true if find_column(name)
+ super
+ end
+
+ def method_missing(name, *args, &block)
+ if args.empty?
+ column = find_column(name)
+ return column if column
+ end
+ super
+ end
+
+ private
+ def slice_by_range(from, to)
+ slice_raw(from, to - from + 1)
+ end
+
+ def ensure_raw_column(name, data)
+ case data
+ when Array
+ {
+ field: Field.new(name, data.value_data_type),
+ data: ChunkedArray.new([data]),
+ }
+ when ChunkedArray
+ {
+ field: Field.new(name, data.value_data_type),
+ data: data,
+ }
+ when Column
+ column = data
+ data = column.data
+ data = ChunkedArray.new([data]) unless data.is_a?(ChunkedArray)
+ {
+ field: column.field,
+ data: data,
+ }
+ else
+ message = "column must be Arrow::Array or Arrow::Column: " +
+ "<#{name}>: <#{data.inspect}>: #{inspect}"
+ raise ArgumentError, message
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/tensor.rb b/src/arrow/ruby/red-arrow/lib/arrow/tensor.rb
new file mode 100644
index 000000000..fdcc6c1ae
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/tensor.rb
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Tensor
+ def to_arrow
+ self
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/time.rb b/src/arrow/ruby/red-arrow/lib/arrow/time.rb
new file mode 100644
index 000000000..3d25a6403
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/time.rb
@@ -0,0 +1,159 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Time
+ attr_reader :unit
+ attr_reader :value
+ def initialize(unit, value)
+ @unit = unit
+ @value = value
+ @unconstructed = false
+ end
+
+ def ==(other)
+ other.is_a?(self.class) and
+ positive? == other.positive? and
+ hour == other.hour and
+ minute == other.minute and
+ second == other.second and
+ nano_second == other.nano_second
+ end
+
+ def cast(target_unit)
+ return self.class.new(@unit, @value) if @unit == target_unit
+
+ target_value = (hour * 60 * 60) + (minute * 60) + second
+ case target_unit
+ when TimeUnit::MILLI
+ target_value *= 1000
+ target_value += nano_second / 1000 / 1000
+ when TimeUnit::MICRO
+ target_value *= 1000 * 1000
+ target_value += nano_second / 1000
+ when TimeUnit::NANO
+ target_value *= 1000 * 1000 * 1000
+ target_value += nano_second
+ end
+ target_value = -target_value if negative?
+ self.class.new(target_unit, target_value)
+ end
+
+ def to_f
+ case @unit
+ when TimeUnit::SECOND
+ @value.to_f
+ when TimeUnit::MILLI
+ @value.to_f / 1000.0
+ when TimeUnit::MICRO
+ @value.to_f / 1000.0 / 1000.0
+ when TimeUnit::NANO
+ @value.to_f / 1000.0 / 1000.0 / 1000.0
+ end
+ end
+
+ def positive?
+ @value.positive?
+ end
+
+ def negative?
+ @value.negative?
+ end
+
+ def hour
+ unconstruct
+ @hour
+ end
+
+ def minute
+ unconstruct
+ @minute
+ end
+ alias_method :min, :minute
+
+ def second
+ unconstruct
+ @second
+ end
+ alias_method :sec, :second
+
+ def nano_second
+ unconstruct
+ @nano_second
+ end
+ alias_method :nsec, :nano_second
+
+ def to_s
+ unconstruct
+ if @nano_second.zero?
+ nano_second_string = ""
+ else
+ nano_second_string = (".%09d" % @nano_second).gsub(/0+\z/, "")
+ end
+ "%s%02d:%02d:%02d%s" % [
+ @value.negative? ? "-" : "",
+ @hour,
+ @minute,
+ @second,
+ nano_second_string,
+ ]
+ end
+
+ private
+ def unconstruct
+ return if @unconstructed
+ abs_value = @value.abs
+ case unit
+ when TimeUnit::SECOND
+ unconstruct_second(abs_value)
+ @nano_second = 0
+ when TimeUnit::MILLI
+ unconstruct_second(abs_value / 1000)
+ @nano_second = (abs_value % 1000) * 1000 * 1000
+ when TimeUnit::MICRO
+ unconstruct_second(abs_value / 1000 / 1000)
+ @nano_second = (abs_value % (1000 * 1000)) * 1000
+ when TimeUnit::NANO
+ unconstruct_second(abs_value / 1000 / 1000 / 1000)
+ @nano_second = abs_value % (1000 * 1000 * 1000)
+ else
+ raise ArgumentError, "invalid unit: #{@unit.inspect}"
+ end
+ @unconstructed = true
+ end
+
+ def unconstruct_second(abs_value_in_second)
+ if abs_value_in_second < 60
+ hour = 0
+ minute = 0
+ second = abs_value_in_second
+ elsif abs_value_in_second < (60 * 60)
+ hour = 0
+ minute = abs_value_in_second / 60
+ second = abs_value_in_second % 60
+ else
+ in_minute = abs_value_in_second / 60
+ hour = in_minute / 60
+ minute = in_minute % 60
+ second = abs_value_in_second % 60
+ end
+ @hour = hour
+ @minute = minute
+ @second = second
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/time32-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/time32-array-builder.rb
new file mode 100644
index 000000000..088f37c4e
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/time32-array-builder.rb
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Time32ArrayBuilder
+ class << self
+ def build(unit_or_data_type, values)
+ builder = new(unit_or_data_type)
+ builder.build(values)
+ end
+ end
+
+ alias_method :initialize_raw, :initialize
+ def initialize(unit_or_data_type)
+ case unit_or_data_type
+ when DataType
+ data_type = unit_or_data_type
+ else
+ unit = unit_or_data_type
+ data_type = Time32DataType.new(unit)
+ end
+ initialize_raw(data_type)
+ end
+
+ def unit
+ @unit ||= value_data_type.unit
+ end
+
+ private
+ def convert_to_arrow_value(value)
+ return value unless value.is_a?(Time)
+ value.cast(unit).value
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/time32-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/time32-array.rb
new file mode 100644
index 000000000..e01dd9732
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/time32-array.rb
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Time32Array
+ def get_value(i)
+ Time.new(unit, get_raw_value(i))
+ end
+
+ def unit
+ @unit ||= value_data_type.unit
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/time32-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/time32-data-type.rb
new file mode 100644
index 000000000..be1d04fa2
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/time32-data-type.rb
@@ -0,0 +1,61 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Time32DataType
+ alias_method :initialize_raw, :initialize
+ private :initialize_raw
+
+ # Creates a new {Arrow::Time32DataType}.
+ #
+ # @overload initialize(unit)
+ #
+ # @param unit [Arrow::TimeUnit, Symbol] The unit of the
+ # time32 data type.
+ #
+ # The unit must be second or millisecond.
+ #
+ # @example Create a time32 data type with Arrow::TimeUnit
+ # Arrow::Time32DataType.new(Arrow::TimeUnit::MILLI)
+ #
+ # @example Create a time32 data type with Symbol
+ # Arrow::Time32DataType.new(:milli)
+ #
+ # @overload initialize(description)
+ #
+ # @param description [Hash] The description of the time32 data
+ # type. It must have `:unit` value.
+ #
+ # @option description [Arrow::TimeUnit, Symbol] :unit The unit of
+ # the time32 data type.
+ #
+ # The unit must be second or millisecond.
+ #
+ # @example Create a time32 data type with Arrow::TimeUnit
+ # Arrow::Time32DataType.new(unit: Arrow::TimeUnit::MILLI)
+ #
+ # @example Create a time32 data type with Symbol
+ # Arrow::Time32DataType.new(unit: :milli)
+ def initialize(unit)
+ if unit.is_a?(Hash)
+ description = unit
+ unit = description[:unit]
+ end
+ initialize_raw(unit)
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/time64-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/time64-array-builder.rb
new file mode 100644
index 000000000..dec15b8bf
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/time64-array-builder.rb
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Time64ArrayBuilder
+ class << self
+ def build(unit_or_data_type, values)
+ builder = new(unit_or_data_type)
+ builder.build(values)
+ end
+ end
+
+ alias_method :initialize_raw, :initialize
+ def initialize(unit_or_data_type)
+ case unit_or_data_type
+ when DataType
+ data_type = unit_or_data_type
+ else
+ unit = unit_or_data_type
+ data_type = Time64DataType.new(unit)
+ end
+ initialize_raw(data_type)
+ end
+
+ def unit
+ @unit ||= value_data_type.unit
+ end
+
+ private
+ def convert_to_arrow_value(value)
+ return value unless value.is_a?(Time)
+ value.cast(unit).value
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/time64-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/time64-array.rb
new file mode 100644
index 000000000..7fc2fd9ab
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/time64-array.rb
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Time64Array
+ def get_value(i)
+ Time.new(unit, get_raw_value(i))
+ end
+
+ def unit
+ @unit ||= value_data_type.unit
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/time64-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/time64-data-type.rb
new file mode 100644
index 000000000..13795aa83
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/time64-data-type.rb
@@ -0,0 +1,61 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class Time64DataType
+ alias_method :initialize_raw, :initialize
+ private :initialize_raw
+
+ # Creates a new {Arrow::Time64DataType}.
+ #
+ # @overload initialize(unit)
+ #
+ # @param unit [Arrow::TimeUnit, Symbol] The unit of the
+ # time64 data type.
+ #
+ # The unit must be microsecond or nanosecond.
+ #
+ # @example Create a time64 data type with Arrow::TimeUnit
+ # Arrow::Time64DataType.new(Arrow::TimeUnit::NANO)
+ #
+ # @example Create a time64 data type with Symbol
+ # Arrow::Time64DataType.new(:nano)
+ #
+ # @overload initialize(description)
+ #
+ # @param description [Hash] The description of the time64 data
+ # type. It must have `:unit` value.
+ #
+ # @option description [Arrow::TimeUnit, Symbol] :unit The unit of
+ # the time64 data type.
+ #
+ # The unit must be microsecond or nanosecond.
+ #
+ # @example Create a time64 data type with Arrow::TimeUnit
+ # Arrow::Time64DataType.new(unit: Arrow::TimeUnit::NANO)
+ #
+ # @example Create a time64 data type with Symbol
+ # Arrow::Time64DataType.new(unit: :nano)
+ def initialize(unit)
+ if unit.is_a?(Hash)
+ description = unit
+ unit = description[:unit]
+ end
+ initialize_raw(unit)
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/timestamp-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/timestamp-array-builder.rb
new file mode 100644
index 000000000..68bcb0fec
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/timestamp-array-builder.rb
@@ -0,0 +1,65 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class TimestampArrayBuilder
+ class << self
+ def build(unit_or_data_type, values)
+ builder = new(unit_or_data_type)
+ builder.build(values)
+ end
+ end
+
+ alias_method :initialize_raw, :initialize
+ def initialize(unit_or_data_type)
+ case unit_or_data_type
+ when DataType
+ data_type = unit_or_data_type
+ else
+ unit = unit_or_data_type
+ data_type = TimestampDataType.new(unit)
+ end
+ initialize_raw(data_type)
+ end
+
+ private
+ def unit_id
+ @unit_id ||= value_data_type.unit.nick.to_sym
+ end
+
+ def convert_to_arrow_value(value)
+ if value.respond_to?(:to_time) and not value.is_a?(Time)
+ value = value.to_time
+ end
+
+ if value.is_a?(::Time)
+ case unit_id
+ when :second
+ value.to_i
+ when :milli
+ value.to_i * 1_000 + value.usec / 1_000
+ when :micro
+ value.to_i * 1_000_000 + value.usec
+ else
+ value.to_i * 1_000_000_000 + value.nsec
+ end
+ else
+ value
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/timestamp-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/timestamp-array.rb
new file mode 100644
index 000000000..011273487
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/timestamp-array.rb
@@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class TimestampArray
+ def get_value(i)
+ cast_to_time(get_raw_value(i))
+ end
+
+ def unit
+ @unit ||= value_data_type.unit
+ end
+
+ private
+ def cast_to_time(raw_value)
+ case unit
+ when TimeUnit::SECOND
+ ::Time.at(raw_value)
+ when TimeUnit::MILLI
+ ::Time.at(*raw_value.divmod(1_000))
+ when TimeUnit::MICRO
+ ::Time.at(*raw_value.divmod(1_000_000))
+ else
+ ::Time.at(raw_value / 1_000_000_000.0)
+ end
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/timestamp-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/timestamp-data-type.rb
new file mode 100644
index 000000000..cd91f567d
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/timestamp-data-type.rb
@@ -0,0 +1,57 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ class TimestampDataType
+ alias_method :initialize_raw, :initialize
+ private :initialize_raw
+
+ # Creates a new {Arrow::TimestampDataType}.
+ #
+ # @overload initialize(unit)
+ #
+ # @param unit [Arrow::TimeUnit, Symbol] The unit of the
+ # timestamp data type.
+ #
+ # @example Create a timestamp data type with Arrow::TimeUnit
+ # Arrow::TimestampDataType.new(Arrow::TimeUnit::MILLI)
+ #
+ # @example Create a timestamp data type with Symbol
+ # Arrow::TimestampDataType.new(:milli)
+ #
+ # @overload initialize(description)
+ #
+ # @param description [Hash] The description of the timestamp data
+ # type. It must have `:unit` value.
+ #
+ # @option description [Arrow::TimeUnit, Symbol] :unit The unit of
+ # the timestamp data type.
+ #
+ # @example Create a timestamp data type with Arrow::TimeUnit
+ # Arrow::TimestampDataType.new(unit: Arrow::TimeUnit::MILLI)
+ #
+ # @example Create a timestamp data type with Symbol
+ # Arrow::TimestampDataType.new(unit: :milli)
+ def initialize(unit)
+ if unit.is_a?(Hash)
+ description = unit
+ unit = description[:unit]
+ end
+ initialize_raw(unit)
+ end
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/version.rb b/src/arrow/ruby/red-arrow/lib/arrow/version.rb
new file mode 100644
index 000000000..f830ff895
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/version.rb
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ VERSION = "6.0.1"
+
+ module Version
+ numbers, TAG = VERSION.split("-")
+ MAJOR, MINOR, MICRO = numbers.split(".").collect(&:to_i)
+ STRING = VERSION
+ end
+end
diff --git a/src/arrow/ruby/red-arrow/lib/arrow/writable.rb b/src/arrow/ruby/red-arrow/lib/arrow/writable.rb
new file mode 100644
index 000000000..02be9ddfc
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/lib/arrow/writable.rb
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+ module Writable
+ alias_method :<<, :write
+ end
+end