diff options
Diffstat (limited to 'src/arrow/ruby/red-arrow/lib')
95 files changed, 7680 insertions, 0 deletions
diff --git a/src/arrow/ruby/red-arrow/lib/arrow.rb b/src/arrow/ruby/red-arrow/lib/arrow.rb new file mode 100644 index 000000000..8fbc537bc --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow.rb @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "extpp/setup" +require "gio2" + +require "arrow/version" + +require "arrow/loader" + +module Arrow + class Error < StandardError + end + + Loader.load +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/aggregate-node-options.rb b/src/arrow/ruby/red-arrow/lib/arrow/aggregate-node-options.rb new file mode 100644 index 000000000..f3a6ace58 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/aggregate-node-options.rb @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class AggregateNodeOptions + class << self + # @api private + def try_convert(value) + case value + when Hash + aggregations = value[:aggregations] + return nil if aggregations.nil? + keys = value[:keys] + new(aggregations, keys) + else + nil + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/aggregation.rb b/src/arrow/ruby/red-arrow/lib/arrow/aggregation.rb new file mode 100644 index 000000000..9aac8239d --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/aggregation.rb @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Aggregation + class << self + # @api private + def try_convert(value) + case value + when Hash + function = value[:function] + return nil if function.nil? + function = function.to_s if function.is_a?(Symbol) + return nil unless function.is_a?(String) + # TODO: Improve this when we have non hash based aggregate function + function = "hash_#{function}" unless function.start_with?("hash_") + options = value[:options] + input = value[:input] + return nil if input.nil? + output = value[:output] + if output.nil? + normalized_function = function.gsub(/\Ahash_/, "") + output = "#{normalized_function}(#{input})" + end + new(function, options, input, output) + else + nil + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/array-builder.rb new file mode 100644 index 000000000..651aed962 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/array-builder.rb @@ -0,0 +1,214 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "date" + +module Arrow + class ArrayBuilder + class << self + def build(values) + if self != ArrayBuilder + builder = new + return builder.build(values) + end + + builder_info = nil + values.each do |value| + builder_info = detect_builder_info(value, builder_info) + break if builder_info and builder_info[:detected] + end + if builder_info + builder = builder_info[:builder] + builder.build(values) + else + Arrow::StringArray.new(values) + end + end + + def buildable?(args) + args.size == method(:build).arity + end + + private + def detect_builder_info(value, builder_info) + case value + when nil + builder_info + when true, false + { + builder: BooleanArrayBuilder.new, + detected: true, + } + when String + { + builder: StringArrayBuilder.new, + detected: true, + } + when Symbol + { + builder: StringDictionaryArrayBuilder.new, + detected: true, + } + when Float + { + builder: DoubleArrayBuilder.new, + detected: true, + } + when Integer + if value < 0 + { + builder: IntArrayBuilder.new, + detected: true, + } + else + { + builder: UIntArrayBuilder.new, + } + end + when Time + data_type = value.data_type + case data_type.unit + when TimeUnit::SECOND + builder_info || { + builder: Time32ArrayBuilder.new(data_type) + } + when TimeUnit::MILLI + if builder_info and builder_info[:builder].is_a?(Time64ArrayBuilder) + builder_info + else + { + builder: Time32ArrayBuilder.new(data_type), + } + end + when TimeUnit::MICRO + { + builder: Time64ArrayBuilder.new(data_type), + } + when TimeUnit::NANO + { + builder: Time64ArrayBuilder.new(data_type), + detected: true + } + end + when ::Time + data_type = TimestampDataType.new(:nano) + { + builder: TimestampArrayBuilder.new(data_type), + detected: true, + } + when DateTime + { + builder: Date64ArrayBuilder.new, + detected: true, + } + when Date + { + builder: Date32ArrayBuilder.new, + detected: true, + } + when BigDecimal + if value.to_arrow.is_a?(Decimal128) + { + builder: Decimal128ArrayBuilder.new, + } + else + { + builder: Decimal256ArrayBuilder.new, + detected: true, + } + end + when ::Array + sub_builder_info = nil + value.each do |sub_value| + sub_builder_info = detect_builder_info(sub_value, sub_builder_info) + break if sub_builder_info and sub_builder_info[:detected] + end + if sub_builder_info and sub_builder_info[:detected] + sub_value_data_type = sub_builder_info[:builder].value_data_type + field = Field.new("item", sub_value_data_type) + { + builder: ListArrayBuilder.new(ListDataType.new(field)), + detected: true, + } + else + builder_info + end + else + { + builder: StringArrayBuilder.new, + detected: true, + } + end + end + end + + def build(values) + append(*values) + finish + end + + # @since 0.12.0 + def append(*values) + value_convertable = respond_to?(:convert_to_arrow_value, true) + start_index = 0 + current_index = 0 + status = :value + + values.each do |value| + if value.nil? + if status == :value + if start_index != current_index + target_values = values[start_index...current_index] + if value_convertable + target_values = target_values.collect do |v| + convert_to_arrow_value(v) + end + end + append_values(target_values, nil) + start_index = current_index + end + status = :null + end + else + if status == :null + append_nulls(current_index - start_index) + start_index = current_index + status = :value + end + end + current_index += 1 + end + if start_index != current_index + if status == :value + if start_index == 0 and current_index == values.size + target_values = values + else + target_values = values[start_index...current_index] + end + if value_convertable + target_values = target_values.collect do |v| + convert_to_arrow_value(v) + end + end + append_values(target_values, nil) + else + append_nulls(current_index - start_index) + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/array.rb b/src/arrow/ruby/red-arrow/lib/arrow/array.rb new file mode 100644 index 000000000..c6c0daaec --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/array.rb @@ -0,0 +1,234 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Array + include Enumerable + include GenericFilterable + include GenericTakeable + + class << self + def new(*args) + _builder_class = builder_class + return super if _builder_class.nil? + return super unless _builder_class.buildable?(args) + _builder_class.build(*args) + end + + def builder_class + builder_class_name = "#{name}Builder" + return nil unless const_defined?(builder_class_name) + const_get(builder_class_name) + end + end + + # @param i [Integer] + # The index of the value to be gotten. + # + # You can specify negative index like for `::Array#[]`. + # + # @return [Object, nil] + # The `i`-th value. + # + # `nil` for NULL value or out of range `i`. + def [](i) + i += length if i < 0 + return nil if i < 0 or i >= length + if null?(i) + nil + else + get_value(i) + end + end + + # @param other [Arrow::Array] The array to be compared. + # @param options [Arrow::EqualOptions, Hash] (nil) + # The options to custom how to compare. + # + # @return [Boolean] + # `true` if both of them have the same data, `false` otherwise. + # + # @since 5.0.0 + def equal_array?(other, options=nil) + equal_options(other, options) + end + + def each + return to_enum(__method__) unless block_given? + + length.times do |i| + yield(self[i]) + end + end + + def reverse_each + return to_enum(__method__) unless block_given? + + (length - 1).downto(0) do |i| + yield(self[i]) + end + end + + def to_arrow + self + end + + alias_method :value_data_type_raw, :value_data_type + def value_data_type + @value_data_type ||= value_data_type_raw + end + + def to_a + values + end + + alias_method :is_in_raw, :is_in + def is_in(values) + case values + when ::Array + if self.class.builder_class.buildable?([values]) + values = self.class.new(values) + else + values = self.class.new(value_data_type, values) + end + is_in_raw(values) + when ChunkedArray + is_in_chunked_array(values) + else + is_in_raw(values) + end + end + + # @api private + alias_method :concatenate_raw, :concatenate + # Concatenates the given other arrays to the array. + # + # @param other_arrays [::Array, Arrow::Array] The arrays to be + # concatenated. + # + # Each other array is processed by {#resolve} before they're + # concatenated. + # + # @example Raw Ruby Array + # array = Arrow::Int32Array.new([1]) + # array.concatenate([2, 3], [4]) # => Arrow::Int32Array.new([1, 2, 3, 4]) + # + # @example Arrow::Array + # array = Arrow::Int32Array.new([1]) + # array.concatenate(Arrow::Int32Array.new([2, 3]), + # Arrow::Int8Array.new([4])) # => Arrow::Int32Array.new([1, 2, 3, 4]) + # + # @since 4.0.0 + def concatenate(*other_arrays) + other_arrays = other_arrays.collect do |other_array| + resolve(other_array) + end + concatenate_raw(other_arrays) + end + + # Concatenates the given other array to the array. + # + # If you have multiple arrays to be concatenated, you should use + # {#concatenate} to concatenate multiple arrays at once. + # + # @param other_array [::Array, Arrow::Array] The array to be concatenated. + # + # `@other_array` is processed by {#resolve} before it's + # concatenated. + # + # @example Raw Ruby Array + # Arrow::Int32Array.new([1]) + [2, 3] # => Arrow::Int32Array.new([1, 2, 3]) + # + # @example Arrow::Array + # Arrow::Int32Array.new([1]) + + # Arrow::Int32Array.new([2, 3]) # => Arrow::Int32Array.new([1, 2, 3]) + # + # @since 4.0.0 + def +(other_array) + concatenate(other_array) + end + + # Ensures returning the same data type array from the given array. + # + # @return [Arrow::Array] + # + # @overload resolve(other_raw_array) + # + # @param other_raw_array [::Array] A raw Ruby Array. A new Arrow::Array + # is built by `self.class.new`. + # + # @example Raw Ruby Array + # int32_array = Arrow::Int32Array.new([1]) + # other_array = int32_array.resolve([2, 3, 4]) + # other_array # => Arrow::Int32Array.new([2, 3, 4]) + # + # @overload resolve(other_array) + # + # @param other_array [Arrow::Array] Another Arrow::Array. + # + # If the given other array is an same data type array of + # `self`, the given other array is returned as-is. + # + # If the given other array isn't an same data type array of + # `self`, the given other array is casted. + # + # @example Same data type + # int32_array = Arrow::Int32Array.new([1]) + # other_int32_array = Arrow::Int32Array.new([2, 3, 4]) + # other_array = int32_array.resolve(other_int32_array) + # other_array.object_id == other_int32_array.object_id + # + # @example Other data type + # int32_array = Arrow::Int32Array.new([1]) + # other_int8_array = Arrow::Int8Array.new([2, 3, 4]) + # other_array = int32_array.resolve(other_int32_array) + # other_array #=> Arrow::Int32Array.new([2, 3, 4]) + # + # @since 4.0.0 + def resolve(other_array) + if other_array.is_a?(::Array) + builder_class = self.class.builder_class + if builder_class.nil? + message = + "[array][resolve] can't build #{value_data_type} array " + + "from raw Ruby Array" + raise ArgumentError, message + end + if builder_class.buildable?([other_array]) + other_array = builder_class.build(other_array) + elsif builder_class.buildable?([value_data_type, other_array]) + other_array = builder_class.build(value_data_type, other_array) + else + message = + "[array][resolve] need to implement " + + "a feature that building #{value_data_type} array " + + "from raw Ruby Array" + raise NotImpelemented, message + end + other_array + elsif other_array.respond_to?(:value_data_type) + return other_array if value_data_type == other_array.value_data_type + other_array.cast(value_data_type) + else + message = + "[array][resolve] can't build #{value_data_type} array: " + + "#{other_array.inspect}" + raise ArgumentError, message + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/bigdecimal-extension.rb b/src/arrow/ruby/red-arrow/lib/arrow/bigdecimal-extension.rb new file mode 100644 index 000000000..338efe696 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/bigdecimal-extension.rb @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "bigdecimal" + +class BigDecimal + def to_arrow + if precision <= Arrow::Decimal128DataType::MAX_PRECISION + Arrow::Decimal128.new(to_s) + else + Arrow::Decimal256.new(to_s) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/binary-dictionary-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/binary-dictionary-array-builder.rb new file mode 100644 index 000000000..6d05e2c41 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/binary-dictionary-array-builder.rb @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class BinaryDictionaryArrayBuilder + include SymbolValuesAppendable + + private + def create_values_array_builder + BinaryArrayBuilder.new + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/block-closable.rb b/src/arrow/ruby/red-arrow/lib/arrow/block-closable.rb new file mode 100644 index 000000000..ec236bd15 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/block-closable.rb @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + module BlockClosable + def open(*args, &block) + io = new(*args) + return io unless block + + begin + yield(io) + ensure + if io.respond_to?(:closed?) + io.close unless io.closed? + else + io.close + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/buffer.rb b/src/arrow/ruby/red-arrow/lib/arrow/buffer.rb new file mode 100644 index 000000000..9f3a3f61b --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/buffer.rb @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Buffer + class << self + # @api private + def try_convert(value) + case value + when String + new(value) + else + nil + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/chunked-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/chunked-array.rb new file mode 100644 index 000000000..30dffa856 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/chunked-array.rb @@ -0,0 +1,91 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class ChunkedArray + include Enumerable + include GenericFilterable + include GenericTakeable + + alias_method :size, :n_rows + unless method_defined?(:length) + alias_method :length, :n_rows + end + + alias_method :chunks_raw, :chunks + def chunks + @chunks ||= chunks_raw + end + + def null?(i) + chunks.each do |array| + return array.null?(i) if i < array.length + i -= array.length + end + nil + end + + def valid?(i) + chunks.each do |array| + return array.valid?(i) if i < array.length + i -= array.length + end + nil + end + + def [](i) + i += length if i < 0 + chunks.each do |array| + return array[i] if i < array.length + i -= array.length + end + nil + end + + def each(&block) + return to_enum(__method__) unless block_given? + + chunks.each do |array| + array.each(&block) + end + end + + def reverse_each(&block) + return to_enum(__method__) unless block_given? + + chunks.reverse_each do |array| + array.reverse_each(&block) + end + end + + def each_chunk(&block) + chunks.each(&block) + end + + def pack + first_chunk = chunks.first + data_type = first_chunk.value_data_type + case data_type + when TimestampDataType + builder = TimestampArrayBuilder.new(data_type) + builder.build(to_a) + else + first_chunk.class.new(to_a) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/column-containable.rb b/src/arrow/ruby/red-arrow/lib/arrow/column-containable.rb new file mode 100644 index 000000000..7d7de66bd --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/column-containable.rb @@ -0,0 +1,147 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + module ColumnContainable + def columns + @columns ||= schema.n_fields.times.collect do |i| + Column.new(self, i) + end + end + + def each_column(&block) + columns.each(&block) + end + + # @overload [](name) + # Find a column that has the given name. + # + # @param name [String, Symbol] The column name to be found. + # @return [Column] The found column. + # + # @overload [](index) + # Find the `index`-th column. + # + # @param index [Integer] The index to be found. + # @return [Column] The found column. + def find_column(name_or_index) + case name_or_index + when String, Symbol + name = name_or_index.to_s + index = schema.get_field_index(name) + return nil if index == -1 + Column.new(self, index) + when Integer + index = name_or_index + index += n_columns if index < 0 + return nil if index < 0 or index >= n_columns + Column.new(self, index) + else + message = "column name or index must be String, Symbol or Integer: " + message << name_or_index.inspect + raise ArgumentError, message + end + end + + # Selects columns that are selected by `selectors` and/or `block` + # and creates a new container only with the selected columns. + # + # @param selectors [Array<String, Symbol, Integer, Range>] + # If a selector is `String`, `Symbol` or `Integer`, the selector + # selects a column by {#find_column}. + # + # If a selector is `Range`, the selector selects columns by `::Array#[]`. + # @yield [column] Gives a column to the block to select columns. + # This uses `::Array#select`. + # @yieldparam column [Column] A target column. + # @yieldreturn [Boolean] Whether the given column is selected or not. + # @return [self.class] The newly created container that only has selected + # columns. + def select_columns(*selectors, &block) + if selectors.empty? + return to_enum(__method__) unless block_given? + selected_columns = columns.select(&block) + else + selected_columns = [] + selectors.each do |selector| + case selector + when Range + selected_columns.concat(columns[selector]) + else + column = find_column(selector) + if column.nil? + case selector + when String, Symbol + message = "unknown column: #{selector.inspect}: #{inspect}" + raise KeyError.new(message) + else + message = "out of index (0..#{n_columns - 1}): " + message << "#{selector.inspect}: #{inspect}" + raise IndexError.new(message) + end + end + selected_columns << column + end + end + selected_columns = selected_columns.select(&block) if block_given? + end + self.class.new(selected_columns) + end + + # @overload [](name) + # Find a column that has the given name. + # + # @param name [String, Symbol] The column name to be found. + # @return [Column] The found column. + # @see #find_column + # + # @overload [](index) + # Find the `index`-th column. + # + # @param index [Integer] The index to be found. + # @return [Column] The found column. + # @see #find_column + # + # @overload [](range) + # Selects columns that are in `range` and creates a new container + # only with the selected columns. + # + # @param range [Range] The range to be selected. + # @return [self.class] The newly created container that only has selected + # columns. + # @see #select_columns + # + # @overload [](selectors) + # Selects columns that are selected by `selectors` and creates a + # new container only with the selected columns. + # + # @param selectors [Array] The selectors that are used to select columns. + # @return [self.class] The newly created container that only has selected + # columns. + # @see #select_columns + def [](selector) + case selector + when ::Array + select_columns(*selector) + when Range + select_columns(selector) + else + find_column(selector) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/column.rb b/src/arrow/ruby/red-arrow/lib/arrow/column.rb new file mode 100644 index 000000000..06f3dbdc0 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/column.rb @@ -0,0 +1,76 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Column + include Enumerable + + attr_reader :container + attr_reader :field + attr_reader :data + def initialize(container, index) + @container = container + @index = index + @field = @container.schema[@index] + @data = @container.get_column_data(@index) + end + + def name + @field.name + end + + def data_type + @field.data_type + end + + def null?(i) + @data.null?(i) + end + + def valid?(i) + @data.valid?(i) + end + + def [](i) + @data[i] + end + + def each(&block) + @data.each(&block) + end + + def reverse_each(&block) + @data.reverse_each(&block) + end + + def n_rows + @data.n_rows + end + alias_method :size, :n_rows + alias_method :length, :n_rows + + def n_nulls + @data.n_nulls + end + + def ==(other) + other.is_a?(self.class) and + @field == other.field and + @data == other.data + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/compression-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/compression-type.rb new file mode 100644 index 000000000..b913e48ff --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/compression-type.rb @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class CompressionType + EXTENSIONS = {} + values.each do |value| + case value + when UNCOMPRESSED + when GZIP + EXTENSIONS["gz"] = value + else + EXTENSIONS[value.nick] = value + end + end + + class << self + def resolve_extension(extension) + EXTENSIONS[extension.to_s] + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/constructor-arguments-gc-guardable.rb b/src/arrow/ruby/red-arrow/lib/arrow/constructor-arguments-gc-guardable.rb new file mode 100644 index 000000000..16669be93 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/constructor-arguments-gc-guardable.rb @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + module ConstructorArgumentsGCGuardable + def initialize(*args) + super + @arguments = args + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/csv-loader.rb b/src/arrow/ruby/red-arrow/lib/arrow/csv-loader.rb new file mode 100644 index 000000000..f82263e46 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/csv-loader.rb @@ -0,0 +1,384 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "csv" +require "pathname" +require "time" + +module Arrow + class CSVLoader + class << self + def load(path_or_data, **options) + new(path_or_data, **options).load + end + end + + def initialize(path_or_data, **options) + @path_or_data = path_or_data + @options = options + if @options.key?(:delimiter) + @options[:col_sep] = @options.delete(:delimiter) + end + @compression = @options.delete(:compression) + end + + def load + case @path_or_data + when Pathname + load_from_path(@path_or_data.to_path) + when /\A.+\.csv\z/i + load_from_path(@path_or_data) + else + load_data(@path_or_data) + end + end + + private + def open_csv(path, **options) + CSV.open(path, **options) do |csv| + yield(csv) + end + end + + def parse_csv_data(data, **options) + csv = CSV.new(data, **options) + begin + yield(csv) + ensure + csv.close + end + end + + def read_csv(csv) + values_set = [] + csv.each do |row| + if row.is_a?(CSV::Row) + row = row.collect(&:last) + end + row.each_with_index do |value, i| + values = (values_set[i] ||= []) + values << value + end + end + return nil if values_set.empty? + + arrays = values_set.collect.with_index do |values, i| + ArrayBuilder.build(values) + end + if csv.headers + names = csv.headers + else + names = arrays.size.times.collect(&:to_s) + end + raw_table = {} + names.each_with_index do |name, i| + raw_table[name] = arrays[i] + end + Table.new(raw_table) + end + + def reader_options + options = CSVReadOptions.new + @options.each do |key, value| + case key + when :headers + case value + when ::Array + options.column_names = value + when String + return nil + else + if value + options.generate_column_names = false + else + options.generate_column_names = true + end + end + when :column_types + value.each do |name, type| + options.add_column_type(name, type) + end + when :schema + options.add_schema(value) + when :encoding + # process encoding on opening input + when :col_sep + options.delimiter = value + else + setter = "#{key}=" + if options.respond_to?(setter) + options.__send__(setter, value) + else + return nil + end + end + end + options + end + + def open_decompress_input(raw_input) + if @compression + codec = Codec.new(@compression) + CompressedInputStream.open(codec, raw_input) do |input| + yield(input) + end + else + yield(raw_input) + end + end + + def open_encoding_convert_stream(raw_input, &block) + encoding = @options[:encoding] + if encoding + converter = Gio::CharsetConverter.new("UTF-8", encoding) + convert_input_stream = + Gio::ConverterInputStream.new(raw_input, converter) + GIOInputStream.open(convert_input_stream, &block) + else + yield(raw_input) + end + end + + def wrap_input(raw_input) + open_decompress_input(raw_input) do |input_| + open_encoding_convert_stream(input_) do |input__| + yield(input__) + end + end + end + + def load_from_path(path) + options = reader_options + if options + begin + MemoryMappedInputStream.open(path) do |raw_input| + wrap_input(raw_input) do |input| + return CSVReader.new(input, options).read + end + end + rescue Arrow::Error::Invalid, Gio::Error + end + end + + options = update_csv_parse_options(@options, :open_csv, path) + open_csv(path, **options) do |csv| + read_csv(csv) + end + end + + def load_data(data) + options = reader_options + if options + begin + BufferInputStream.open(Buffer.new(data)) do |raw_input| + wrap_input(raw_input) do |input| + return CSVReader.new(input, options).read + end + end + rescue Arrow::Error::Invalid, Gio::Error + end + end + + options = update_csv_parse_options(@options, :parse_csv_data, data) + parse_csv_data(data, **options) do |csv| + read_csv(csv) + end + end + + def selective_converter(target_index) + lambda do |field, field_info| + if target_index.nil? or field_info.index == target_index + yield(field) + else + field + end + end + end + + BOOLEAN_CONVERTER = lambda do |field| + begin + encoded_field = field.encode(CSV::ConverterEncoding) + rescue EncodingError + field + else + case encoded_field + when "true" + true + when "false" + false + else + field + end + end + end + + ISO8601_CONVERTER = lambda do |field| + begin + encoded_field = field.encode(CSV::ConverterEncoding) + rescue EncodingError + field + else + begin + ::Time.iso8601(encoded_field) + rescue ArgumentError + field + end + end + end + + AVAILABLE_CSV_PARSE_OPTIONS = {} + CSV.instance_method(:initialize).parameters.each do |type, name| + AVAILABLE_CSV_PARSE_OPTIONS[name] = true if type == :key + end + + def update_csv_parse_options(options, create_csv, *args) + if options.key?(:converters) + new_options = options.dup + else + converters = [:all, BOOLEAN_CONVERTER, ISO8601_CONVERTER] + new_options = options.merge(converters: converters) + end + + # TODO: Support :schema and :column_types + + unless AVAILABLE_CSV_PARSE_OPTIONS.empty? + new_options.select! do |key, value| + AVAILABLE_CSV_PARSE_OPTIONS.key?(key) + end + end + + unless options.key?(:headers) + __send__(create_csv, *args, **new_options) do |csv| + new_options[:headers] = have_header?(csv) + end + end + unless options.key?(:converters) + __send__(create_csv, *args, **new_options) do |csv| + new_options[:converters] = detect_robust_converters(csv) + end + end + + new_options + end + + def have_header?(csv) + if @options.key?(:headers) + return @options[:headers] + end + + row1 = csv.shift + return false if row1.nil? + return false if row1.any?(&:nil?) + + row2 = csv.shift + return nil if row2.nil? + return true if row2.any?(&:nil?) + + return false if row1.any? {|value| not value.is_a?(String)} + + if row1.collect(&:class) != row2.collect(&:class) + return true + end + + nil + end + + def detect_robust_converters(csv) + column_types = [] + csv.each do |row| + if row.is_a?(CSV::Row) + each_value = Enumerator.new do |yielder| + row.each do |_name, value| + yielder << value + end + end + else + each_value = row.each + end + each_value.with_index do |value, i| + current_column_type = column_types[i] + next if current_column_type == :string + + candidate_type = nil + case value + when nil + next + when "true", "false", true, false + candidate_type = :boolean + when Integer + candidate_type = :integer + if current_column_type == :float + candidate_type = :float + end + when Float + candidate_type = :float + if current_column_type == :integer + column_types[i] = candidate_type + end + when ::Time + candidate_type = :time + when DateTime + candidate_type = :date_time + when Date + candidate_type = :date + when String + next if value.empty? + candidate_type = :string + else + candidate_type = :string + end + + column_types[i] ||= candidate_type + if column_types[i] != candidate_type + column_types[i] = :string + end + end + end + + converters = [] + column_types.each_with_index do |type, i| + case type + when :boolean + converters << selective_converter(i, &BOOLEAN_CONVERTER) + when :integer + converters << selective_converter(i) do |field| + if field.nil? or field.empty? + nil + else + CSV::Converters[:integer].call(field) + end + end + when :float + converters << selective_converter(i) do |field| + if field.nil? or field.empty? + nil + else + CSV::Converters[:float].call(field) + end + end + when :time + converters << selective_converter(i, &ISO8601_CONVERTER) + when :date_time + converters << selective_converter(i, &CSV::Converters[:date_time]) + when :date + converters << selective_converter(i, &CSV::Converters[:date]) + end + end + converters + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/csv-read-options.rb b/src/arrow/ruby/red-arrow/lib/arrow/csv-read-options.rb new file mode 100644 index 000000000..dec3dec95 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/csv-read-options.rb @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class CSVReadOptions + alias_method :add_column_type_raw, :add_column_type + def add_column_type(name, type) + add_column_type_raw(name, DataType.resolve(type)) + end + + alias_method :delimiter_raw, :delimiter + def delimiter + delimiter_raw.chr + end + + alias_method :delimiter_raw=, :delimiter= + def delimiter=(delimiter) + case delimiter + when String + if delimiter.bytesize != 1 + message = "delimiter must be 1 byte character: #{delimiter.inspect}" + raise ArgumentError, message + end + delimiter = delimiter.ord + end + self.delimiter_raw = delimiter + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/data-type.rb new file mode 100644 index 000000000..07b452521 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/data-type.rb @@ -0,0 +1,198 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class DataType + class << self + # Ensure returning suitable {Arrow::DataType}. + # + # @overload resolve(data_type) + # + # Returns the given data type itself. This is convenient to + # use this method as {Arrow::DataType} converter. + # + # @param data_type [Arrow::DataType] The data type. + # + # @return [Arrow::DataType] The given data type itself. + # + # @overload resolve(name) + # + # Creates a suitable data type from the given type name. For + # example, you can create {Arrow::BooleanDataType} from + # `:boolean`. + # + # @param name [String, Symbol] The type name of the data type. + # + # @return [Arrow::DataType] A new suitable data type. + # + # @example Create a boolean data type + # Arrow::DataType.resolve(:boolean) + # + # @overload resolve(name_with_arguments) + # + # Creates a new suitable data type from the given type name + # with arguments. + # + # @param name_with_arguments [::Array<String, ...>] + # The type name of the data type as the first element. + # + # The rest elements are additional information of the data type. + # + # For example, {Arrow::TimestampDataType} needs unit as + # additional information. + # + # @return [Arrow::DataType] A new suitable data type. + # + # @example Create a boolean data type + # Arrow::DataType.resolve([:boolean]) + # + # @example Create a milliseconds unit timestamp data type + # Arrow::DataType.resolve([:timestamp, :milli]) + # + # @overload resolve(description) + # + # Creates a new suitable data type from the given data type + # description. + # + # Data type description is a raw `Hash`. Data type description + # must have `:type` value. `:type` is the type of the data type. + # + # If the type needs additional information, you need to + # specify it. See constructor document what information is + # needed. For example, {Arrow::ListDataType#initialize} needs + # `:field` value. + # + # @param description [Hash] The description of the data type. + # + # @option description [String, Symbol] :type The type name of + # the data type. + # + # @return [Arrow::DataType] A new suitable data type. + # + # @example Create a boolean data type + # Arrow::DataType.resolve(type: :boolean) + # + # @example Create a list data type + # Arrow::DataType.resolve(type: :list, + # field: {name: "visible", type: :boolean}) + def resolve(data_type) + case data_type + when DataType + data_type + when String, Symbol + resolve_class(data_type).new + when ::Array + type, *arguments = data_type + resolve_class(type).new(*arguments) + when Hash + type = nil + description = {} + data_type.each do |key, value| + key = key.to_sym + case key + when :type + type = value + else + description[key] = value + end + end + if type.nil? + message = + "data type description must have :type value: #{data_type.inspect}" + raise ArgumentError, message + end + data_type_class = resolve_class(type) + if description.empty? + data_type_class.new + else + data_type_class.new(description) + end + else + message = + "data type must be " + + "Arrow::DataType, String, Symbol, [String, ...], [Symbol, ...] " + + "{type: String, ...} or {type: Symbol, ...}: #{data_type.inspect}" + raise ArgumentError, message + end + end + + def sub_types + types = {} + gtype.children.each do |child| + sub_type = child.to_class + types[sub_type] = true + sub_type.sub_types.each do |sub_sub_type| + types[sub_sub_type] = true + end + end + types.keys + end + + def try_convert(value) + begin + resolve(value) + rescue ArgumentError + nil + end + end + + private + def resolve_class(data_type) + components = data_type.to_s.split("_").collect(&:capitalize) + data_type_name = components.join.gsub(/\AUint/, "UInt") + data_type_class_name = "#{data_type_name}DataType" + unless Arrow.const_defined?(data_type_class_name) + available_types = [] + Arrow.constants.each do |name| + name = name.to_s + next if name == "DataType" + next unless name.end_with?("DataType") + name = name.gsub(/DataType\z/, "") + components = name.scan(/(UInt[0-9]+|[A-Z][a-z\d]+)/).flatten + available_types << components.collect(&:downcase).join("_").to_sym + end + message = + "unknown type: <#{data_type.inspect}>: " + + "available types: #{available_types.inspect}" + raise ArgumentError, message + end + data_type_class = Arrow.const_get(data_type_class_name) + if data_type_class.gtype.abstract? + not_abstract_types = data_type_class.sub_types.find_all do |sub_type| + not sub_type.gtype.abstract? + end + not_abstract_types = not_abstract_types.sort_by do |type| + type.name + end + message = + "abstract type: <#{data_type.inspect}>: " + + "use one of not abstract type: #{not_abstract_types.inspect}" + raise ArgumentError, message + end + data_type_class + end + end + + def build_array(values) + base_name = self.class.name.gsub(/DataType\z/, "") + builder_class = self.class.const_get("#{base_name}ArrayBuilder") + args = [values] + args.unshift(self) unless builder_class.buildable?(args) + builder_class.build(*args) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/date32-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/date32-array-builder.rb new file mode 100644 index 000000000..dedbba85e --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/date32-array-builder.rb @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Date32ArrayBuilder + private + UNIX_EPOCH = Date.new(1970, 1, 1) + def convert_to_arrow_value(value) + value = value.to_date if value.respond_to?(:to_date) + + if value.is_a?(Date) + (value - UNIX_EPOCH).to_i + else + value + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/date32-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/date32-array.rb new file mode 100644 index 000000000..121dbcb55 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/date32-array.rb @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Date32Array + def get_value(i) + to_date(get_raw_value(i)) + end + + private + UNIX_EPOCH = 2440588 + def to_date(raw_value) + Date.jd(UNIX_EPOCH + raw_value) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/date64-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/date64-array-builder.rb new file mode 100644 index 000000000..658118122 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/date64-array-builder.rb @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Date64ArrayBuilder + private + def convert_to_arrow_value(value) + if value.respond_to?(:to_time) and not value.is_a?(::Time) + value = value.to_time + end + + if value.is_a?(::Time) + value.to_i * 1_000 + value.usec / 1_000 + else + value + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/date64-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/date64-array.rb new file mode 100644 index 000000000..9b8a92476 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/date64-array.rb @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Date64Array + def get_value(i) + to_datetime(get_raw_value(i)) + end + + private + def to_datetime(raw_value) + ::Time.at(*raw_value.divmod(1_000)).to_datetime + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/datum.rb b/src/arrow/ruby/red-arrow/lib/arrow/datum.rb new file mode 100644 index 000000000..196a18f54 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/datum.rb @@ -0,0 +1,100 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Datum + class << self + # @api private + def try_convert(value) + case value + when Table + TableDatum.new(value) + when Array + ArrayDatum.new(value) + when ChunkedArray + ChunkedArrayDatum.new(value) + when Scalar + ScalarDatum.new(value) + when ::Array + ArrayDatum.new(ArrayBuilder.build(value)) + when Integer + case value + when (0..((2 ** 8) - 1)) + try_convert(UInt8Scalar.new(value)) + when ((-(2 ** 7))..((2 ** 7) - 1)) + try_convert(Int8Scalar.new(value)) + when (0..((2 ** 16) - 1)) + try_convert(UInt16Scalar.new(value)) + when ((-(2 ** 15))..((2 ** 15) - 1)) + try_convert(Int16Scalar.new(value)) + when (0..((2 ** 32) - 1)) + try_convert(UInt32Scalar.new(value)) + when ((-(2 ** 31))..((2 ** 31) - 1)) + try_convert(Int32Scalar.new(value)) + when (0..((2 ** 64) - 1)) + try_convert(UInt64Scalar.new(value)) + when ((-(2 ** 63))..((2 ** 63) - 1)) + try_convert(Int64Scalar.new(value)) + else + nil + end + when Float + try_convert(DoubleScalar.new(value)) + when true, false + try_convert(BooleanScalar.new(value)) + when String + if value.ascii_only? or value.encoding == Encoding::UTF_8 + if value.bytesize <= ((2 ** 31) - 1) + try_convert(StringScalar.new(value)) + else + try_convert(LargeStringScalar.new(value)) + end + else + if value.bytesize <= ((2 ** 31) - 1) + try_convert(BinaryScalar.new(value)) + else + try_convert(LargeBinaryScalar.new(value)) + end + end + when Date + date32_value = (value - Date32ArrayBuilder::UNIX_EPOCH).to_i + try_convert(Date32Scalar.new(date32_value)) + when Time + case value.unit + when TimeUnit::SECOND, TimeUnit::MILLI + data_type = Time32DataType.new(value.unit) + scalar_class = Time32Scalar + else + data_type = Time64DataType.new(value.unit) + scalar_class = Time64Scalar + end + try_convert(scalar_class.new(data_type, value.value)) + when ::Time + data_type = TimestampDataType.new(:nano) + timestamp_value = value.to_i * 1_000_000_000 + value.nsec + try_convert(TimestampScalar.new(data_type, timestamp_value)) + when Decimal128 + data_type = TimestampDataType.new(:nano) + timestamp_value = value.to_i * 1_000_000_000 + value.nsec + try_convert(Decimal128Scalar.new(data_type, timestamp_value)) + else + nil + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/decimal128-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/decimal128-array-builder.rb new file mode 100644 index 000000000..d380ce070 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/decimal128-array-builder.rb @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Decimal128ArrayBuilder + class << self + def build(data_type, values) + builder = new(data_type) + builder.build(values) + end + end + + alias_method :append_value_raw, :append_value + def append_value(value) + append_value_raw(normalize_value(value)) + end + + alias_method :append_values_raw, :append_values + def append_values(values, is_valids=nil) + if values.is_a?(::Array) + values = values.collect do |value| + normalize_value(value) + end + append_values_raw(values, is_valids) + else + append_values_packed(values, is_valids) + end + end + + private + def normalize_value(value) + case value + when String + Decimal128.new(value) + when Float + Decimal128.new(value.to_s) + when BigDecimal + Decimal128.new(value.to_s) + else + value + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/decimal128-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/decimal128-array.rb new file mode 100644 index 000000000..a5ee53be7 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/decimal128-array.rb @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Decimal128Array + def get_value(i) + BigDecimal(format_value(i)) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/decimal128-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/decimal128-data-type.rb new file mode 100644 index 000000000..4b5583896 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/decimal128-data-type.rb @@ -0,0 +1,71 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Decimal128DataType + MAX_PRECISION = max_precision + + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::Decimal128DataType}. + # + # @overload initialize(precision, scale) + # + # @param precision [Integer] The precision of the decimal data + # type. It's the number of digits including the number of + # digits after the decimal point. + # + # @param scale [Integer] The scale of the decimal data + # type. It's the number of digits after the decimal point. + # + # @example Create a decimal data type for "XXXXXX.YY" decimal + # Arrow::Decimal128DataType.new(8, 2) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the decimal data + # type. It must have `:precision` and `:scale` values. + # + # @option description [Integer] :precision The precision of the + # decimal data type. It's the number of digits including the + # number of digits after the decimal point. + # + # @option description [Integer] :scale The scale of the decimal + # data type. It's the number of digits after the decimal + # point. + # + # @example Create a decimal data type for "XXXXXX.YY" decimal + # Arrow::Decimal128DataType.new(precision: 8, + # scale: 2) + def initialize(*args) + n_args = args.size + case n_args + when 1 + description = args[0] + precision = description[:precision] + scale = description[:scale] + when 2 + precision, scale = args + else + message = "wrong number of arguments (given, #{n_args}, expected 1..2)" + raise ArgumentError, message + end + initialize_raw(precision, scale) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/decimal128.rb b/src/arrow/ruby/red-arrow/lib/arrow/decimal128.rb new file mode 100644 index 000000000..bf853ae7f --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/decimal128.rb @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Decimal128 + alias_method :to_s_raw, :to_s + + # @overload to_s + # + # @return [String] + # The string representation of the decimal. + # + # @overload to_s(scale) + # + # @param scale [Integer] The scale of the decimal. + # @return [String] + # The string representation of the decimal including the scale. + # + # @since 0.13.0 + def to_s(scale=nil) + if scale + to_string_scale(scale) + else + to_s_raw + end + end + + alias_method :abs!, :abs + + # @since 3.0.0 + def abs + copied = dup + copied.abs! + copied + end + + alias_method :negate!, :negate + + # @since 3.0.0 + def negate + copied = dup + copied.negate! + copied + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/decimal256-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/decimal256-array-builder.rb new file mode 100644 index 000000000..fb89ff00b --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/decimal256-array-builder.rb @@ -0,0 +1,61 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Decimal256ArrayBuilder + class << self + # @since 3.0.0 + def build(data_type, values) + builder = new(data_type) + builder.build(values) + end + end + + alias_method :append_value_raw, :append_value + # @since 3.0.0 + def append_value(value) + append_value_raw(normalize_value(value)) + end + + alias_method :append_values_raw, :append_values + # @since 3.0.0 + def append_values(values, is_valids=nil) + if values.is_a?(::Array) + values = values.collect do |value| + normalize_value(value) + end + append_values_raw(values, is_valids) + else + append_values_packed(values, is_valids) + end + end + + private + def normalize_value(value) + case value + when String + Decimal256.new(value) + when Float + Decimal256.new(value.to_s) + when BigDecimal + Decimal256.new(value.to_s) + else + value + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/decimal256-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/decimal256-array.rb new file mode 100644 index 000000000..8c2306dfe --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/decimal256-array.rb @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Decimal256Array + # @since 3.0.0 + def get_value(i) + BigDecimal(format_value(i)) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/decimal256-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/decimal256-data-type.rb new file mode 100644 index 000000000..8264e388e --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/decimal256-data-type.rb @@ -0,0 +1,73 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Decimal256DataType + MAX_PRECISION = max_precision + + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::Decimal256DataType}. + # + # @overload initialize(precision, scale) + # + # @param precision [Integer] The precision of the decimal data + # type. It's the number of digits including the number of + # digits after the decimal point. + # + # @param scale [Integer] The scale of the decimal data + # type. It's the number of digits after the decimal point. + # + # @example Create a decimal data type for "XXXXXX.YY" decimal + # Arrow::Decimal256DataType.new(8, 2) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the decimal data + # type. It must have `:precision` and `:scale` values. + # + # @option description [Integer] :precision The precision of the + # decimal data type. It's the number of digits including the + # number of digits after the decimal point. + # + # @option description [Integer] :scale The scale of the decimal + # data type. It's the number of digits after the decimal + # point. + # + # @example Create a decimal data type for "XXXXXX.YY" decimal + # Arrow::Decimal256DataType.new(precision: 8, + # scale: 2) + # + # @since 3.0.0 + def initialize(*args) + n_args = args.size + case n_args + when 1 + description = args[0] + precision = description[:precision] + scale = description[:scale] + when 2 + precision, scale = args + else + message = "wrong number of arguments (given, #{n_args}, expected 1..2)" + raise ArgumentError, message + end + initialize_raw(precision, scale) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/decimal256.rb b/src/arrow/ruby/red-arrow/lib/arrow/decimal256.rb new file mode 100644 index 000000000..1a7097a4d --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/decimal256.rb @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Decimal256 + alias_method :to_s_raw, :to_s + + # @overload to_s + # + # @return [String] + # The string representation of the decimal. + # + # @overload to_s(scale) + # + # @param scale [Integer] The scale of the decimal. + # @return [String] + # The string representation of the decimal including the scale. + # + # @since 3.0.0 + def to_s(scale=nil) + if scale + to_string_scale(scale) + else + to_s_raw + end + end + + alias_method :abs!, :abs + + # @since 3.0.0 + def abs + copied = dup + copied.abs! + copied + end + + alias_method :negate!, :negate + + # @since 3.0.0 + def negate + copied = dup + copied.negate! + copied + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/dense-union-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/dense-union-data-type.rb new file mode 100644 index 000000000..6d2bf5e70 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/dense-union-data-type.rb @@ -0,0 +1,90 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class DenseUnionDataType + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::DenseUnionDataType}. + # + # @overload initialize(fields, type_codes) + # + # @param fields [::Array<Arrow::Field, Hash>] The fields of the + # dense union data type. You can mix {Arrow::Field} and field + # description in the fields. + # + # See {Arrow::Field.new} how to specify field description. + # + # @param type_codes [::Array<Integer>] The IDs that indicates + # corresponding fields. + # + # @example Create a dense union data type for `{2: visible, 9: count}` + # fields = [ + # Arrow::Field.new("visible", :boolean), + # { + # name: "count", + # type: :int32, + # }, + # ] + # Arrow::DenseUnionDataType.new(fields, [2, 9]) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the dense union + # data type. It must have `:fields` and `:type_codes` values. + # + # @option description [::Array<Arrow::Field, Hash>] :fields The + # fields of the dense union data type. You can mix + # {Arrow::Field} and field description in the fields. + # + # See {Arrow::Field.new} how to specify field description. + # + # @option description [::Array<Integer>] :type_codes The IDs + # that indicates corresponding fields. + # + # @example Create a dense union data type for `{2: visible, 9: count}` + # fields = [ + # Arrow::Field.new("visible", :boolean), + # { + # name: "count", + # type: :int32, + # }, + # ] + # Arrow::DenseUnionDataType.new(fields: fields, + # type_codes: [2, 9]) + def initialize(*args) + n_args = args.size + case n_args + when 1 + description = args[0] + fields = description[:fields] + type_codes = description[:type_codes] + when 2 + fields, type_codes = args + else + message = "wrong number of arguments (given, #{n_args}, expected 1..2)" + raise ArgumentError, message + end + fields = fields.collect do |field| + field = Field.new(field) unless field.is_a?(Field) + field + end + initialize_raw(fields, type_codes) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/dictionary-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/dictionary-array.rb new file mode 100644 index 000000000..70591ab7c --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/dictionary-array.rb @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class DictionaryArray + def get_value(i) + dictionary[indices[i]] + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/dictionary-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/dictionary-data-type.rb new file mode 100644 index 000000000..8396e311c --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/dictionary-data-type.rb @@ -0,0 +1,117 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class DictionaryDataType + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::DictionaryDataType}. + # + # @overload initialize(index_data_type, value_data_type, ordered) + # + # @param index_data_type [Arrow::DataType, Hash, String, Symbol] + # The index data type of the dictionary data type. It must be + # signed integer data types. Here are available signed integer + # data types: + # + # * Arrow::Int8DataType + # * Arrow::Int16DataType + # * Arrow::Int32DataType + # * Arrow::Int64DataType + # + # You can specify data type as a description by `Hash`. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @param value_data_type [Arrow::DataType, Hash, String, Symbol] + # The value data type of the dictionary data type. + # + # You can specify data type as a description by `Hash`. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @param ordered [Boolean] Whether dictionary contents are + # ordered or not. + # + # @example Create a dictionary data type for `{0: "Hello", 1: "World"}` + # index_data_type = :int8 + # value_data_type = :string + # ordered = true + # Arrow::DictionaryDataType.new(index_data_type, + # value_data_type, + # ordered) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the dictionary + # data type. It must have `:index_data_type`, `:dictionary` + # and `:ordered` values. + # + # @option description [Arrow::DataType, Hash, String, Symbol] + # :index_data_type The index data type of the dictionary data + # type. It must be signed integer data types. Here are + # available signed integer data types: + # + # * Arrow::Int8DataType + # * Arrow::Int16DataType + # * Arrow::Int32DataType + # * Arrow::Int64DataType + # + # You can specify data type as a description by `Hash`. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @option description [Arrow::DataType, Hash, String, Symbol] + # :value_data_type + # The value data type of the dictionary data type. + # + # You can specify data type as a description by `Hash`. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @option description [Boolean] :ordered Whether dictionary + # contents are ordered or not. + # + # @example Create a dictionary data type for `{0: "Hello", 1: "World"}` + # Arrow::DictionaryDataType.new(index_data_type: :int8, + # value_data_type: :string, + # ordered: true) + def initialize(*args) + n_args = args.size + case n_args + when 1 + description = args[0] + index_data_type = description[:index_data_type] + value_data_type = description[:value_data_type] + ordered = description[:ordered] + when 3 + index_data_type, value_data_type, ordered = args + else + message = "wrong number of arguments (given, #{n_args}, expected 1 or 3)" + raise ArgumentError, message + end + index_data_type = DataType.resolve(index_data_type) + value_data_type = DataType.resolve(value_data_type) + initialize_raw(index_data_type, value_data_type, ordered) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/equal-options.rb b/src/arrow/ruby/red-arrow/lib/arrow/equal-options.rb new file mode 100644 index 000000000..4eb9964ad --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/equal-options.rb @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class EqualOptions + class << self + # @api private + def try_convert(value) + case value + when Hash + options = new + value.each do |k, v| + setter = :"#{k}=" + return unless options.respond_to?(setter) + options.__send__(setter, v) + end + options + else + nil + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/expression.rb b/src/arrow/ruby/red-arrow/lib/arrow/expression.rb new file mode 100644 index 000000000..a33cc53c2 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/expression.rb @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Expression + class << self + # @api private + def try_convert(value) + case value + when Symbol + FieldExpression.new(value.to_s) + when ::Array + function_name, *arguments = value + case function_name + when String, Symbol + function_name = function_name.to_s + else + return nil + end + if arguments.last.is_a?(FunctionOptions) + options = arguments.pop + else + options = nil + end + CallExpression.new(function_name, arguments, options) + else + datum = Datum.try_convert(value) + return nil if datum.nil? + LiteralExpression.new(datum) + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/field-containable.rb b/src/arrow/ruby/red-arrow/lib/arrow/field-containable.rb new file mode 100644 index 000000000..e4dbf4ec2 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/field-containable.rb @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + module FieldContainable + def find_field(name_or_index) + case name_or_index + when String, Symbol + name = name_or_index + get_field_by_name(name) + when Integer + index = name_or_index + raise if index < 0 + index += n_fields if index < 0 + return nil if index < 0 or index >= n_fields + get_field(index) + else + message = "field name or index must be String, Symbol or Integer" + message << ": <#{name_or_index.inspect}>" + raise ArgumentError, message + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/field.rb b/src/arrow/ruby/red-arrow/lib/arrow/field.rb new file mode 100644 index 000000000..e439cb960 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/field.rb @@ -0,0 +1,118 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Field + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::Field}. + # + # @overload initialize(name, data_type) + # + # @param name [String, Symbol] The name of the field. + # + # @param data_type [Arrow::DataType, Hash, String, Symbol] The + # data type of the field. + # + # You can specify data type as a description by `Hash`. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @example Create a field with {Arrow::DataType}s + # Arrow::Field.new("visible", Arrow::BooleanDataType.new) + # + # @example Create a field with data type description + # Arrow::Field.new("visible", :boolean) + # + # @example Create a field with name as `Symbol` + # Arrow::Field.new(:visible, :boolean) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the field. + # + # Field description is a raw `Hash`. Field description must + # have `:name` and `:data_type` values. `:name` is the name of + # the field. `:data_type` is the data type of the field. You + # can use {Arrow::DataType} or data type description as + # `:data_type` value. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # There is a shortcut for convenience. If field description + # doesn't have `:data_type`, all keys except `:name` are + # processes as data type description. For example, the + # following field descriptions are the same: + # + # ```ruby + # {name: "visible", data_type: {type: :boolean}} + # {name: "visible", type: :boolean} # Shortcut version + # ``` + # + # @option description [String, Symbol] :name The name of the field. + # + # @option description [Arrow::DataType, Hash] :data_type The + # data type of the field. You can specify data type description + # by `Hash`. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @example Create a field with {Arrow::DataType}s + # Arrow::Field.new(name: "visible", + # data_type: Arrow::BooleanDataType.new) + # + # @example Create a field with data type description + # Arrow::Field.new(name: "visible", data_type: {type: :boolean} + # + # @example Create a field with shortcut form + # Arrow::Field.new(name: "visible", type: :boolean) + def initialize(*args) + n_args = args.size + case n_args + when 1 + description = args[0] + name = nil + data_type = nil + data_type_description = {} + description.each do |key, value| + key = key.to_sym + case key + when :name + name = value + when :data_type + data_type = DataType.resolve(value) + else + data_type_description[key] = value + end + end + data_type ||= DataType.resolve(data_type_description) + when 2 + name = args[0] + data_type = DataType.resolve(args[1]) + else + message = "wrong number of arguments (given #{n_args}, expected 1..2)" + raise ArgumentError, message + end + + initialize_raw(name, data_type) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/file-output-stream.rb b/src/arrow/ruby/red-arrow/lib/arrow/file-output-stream.rb new file mode 100644 index 000000000..f39ad14ca --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/file-output-stream.rb @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class FileOutputStream + alias_method :initialize_raw, :initialize + private :initialize_raw + def initialize(path, options={}) + append = nil + case options + when true, false + append = options + when Hash + append = options[:append] + end + append = false if append.nil? + initialize_raw(path, append) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/file-system.rb b/src/arrow/ruby/red-arrow/lib/arrow/file-system.rb new file mode 100644 index 000000000..7d105b42a --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/file-system.rb @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class FileSystem + alias_method :open_output_stream_raw, :open_output_stream + def open_output_stream(path) + stream = open_output_stream_raw(path) + if block_given? + begin + yield(stream) + ensure + stream.close + end + else + stream + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/fixed-size-binary-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/fixed-size-binary-array-builder.rb new file mode 100644 index 000000000..516d8143d --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/fixed-size-binary-array-builder.rb @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class FixedSizeBinaryArrayBuilder + class << self + # @since 3.0.0 + def build(data_type, values) + builder = new(data_type) + builder.build(values) + end + end + + alias_method :append_values_raw, :append_values + # @since 3.0.0 + def append_values(values, is_valids=nil) + if values.is_a?(::Array) + append_values_raw(values, is_valids) + else + append_values_packed(values, is_valids) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/fixed-size-binary-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/fixed-size-binary-array.rb new file mode 100644 index 000000000..37c121d8e --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/fixed-size-binary-array.rb @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class FixedSizeBinaryArray + alias_method :get_value_raw, :get_value + # @since 3.0.0 + def get_value(i) + get_value_raw(i).to_s + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/generic-filterable.rb b/src/arrow/ruby/red-arrow/lib/arrow/generic-filterable.rb new file mode 100644 index 000000000..50a79142a --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/generic-filterable.rb @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + module GenericFilterable + class << self + def included(base) + base.__send__(:alias_method, :filter_raw, :filter) + base.__send__(:alias_method, :filter, :filter_generic) + end + end + + def filter_generic(filter, options=nil) + case filter + when ::Array + filter_raw(BooleanArray.new(filter), options) + when ChunkedArray + if respond_to?(:filter_chunked_array) + filter_chunked_array(filter, options) + else + # TODO: Implement this in C++ + filter_raw(filter.pack, options) + end + else + filter_raw(filter, options) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/generic-takeable.rb b/src/arrow/ruby/red-arrow/lib/arrow/generic-takeable.rb new file mode 100644 index 000000000..f32b43f22 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/generic-takeable.rb @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + module GenericTakeable + class << self + def included(base) + base.__send__(:alias_method, :take_raw, :take) + base.__send__(:alias_method, :take, :take_generic) + end + end + + def take_generic(indices) + case indices + when ::Array + take_raw(IntArrayBuilder.build(indices)) + when ChunkedArray + take_chunked_array(indices) + else + take_raw(indices) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/group.rb b/src/arrow/ruby/red-arrow/lib/arrow/group.rb new file mode 100644 index 000000000..7827ac0bd --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/group.rb @@ -0,0 +1,164 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Group + def initialize(table, keys) + @table = table + @keys = keys + end + + def count(*target_names) + aggregate(*build_aggregations("hash_count", target_names)) + end + + def sum(*target_names) + aggregate(*build_aggregations("hash_sum", target_names)) + end + + def product(*target_names) + aggregate(*build_aggregations("hash_product", target_names)) + end + + def mean(*target_names) + aggregate(*build_aggregations("hash_mean", target_names)) + end + + def min(*target_names) + aggregate(*build_aggregations("hash_min", target_names)) + end + + def max(*target_names) + aggregate(*build_aggregations("hash_max", target_names)) + end + + def stddev(*target_names) + aggregate(*build_aggregations("hash_stddev", target_names)) + end + + def variance(*target_names) + aggregate(*build_aggregations("hash_variance", target_names)) + end + + def aggregate(aggregation, *more_aggregations) + aggregations = [aggregation] + more_aggregations + normalized_aggregations = normalize_aggregations(aggregations) + plan = ExecutePlan.new + source_node = plan.build_source_node(@table) + aggregate_node = + plan.build_aggregate_node(source_node, + { + aggregations: normalized_aggregations, + keys: @keys + }) + sink_node_options = SinkNodeOptions.new + plan.build_sink_node(aggregate_node, sink_node_options) + plan.validate + plan.start + plan.wait + reader = sink_node_options.get_reader(aggregate_node.output_schema) + reader.read_all + end + + private + def build_aggregations(function_name, target_names) + if target_names.empty? + [function_name] + else + target_names.collect do |name| + "#{function_name}(#{name})" + end + end + end + + def normalize_aggregations(aggregations) + normalized_aggregations = [] + aggregations.each do |aggregation| + case aggregation + when :all + all_functions = [ + "hash_count", + "hash_sum", + "hash_product", + "hash_mean", + "hash_stddev", + "hash_variance", + # "hash_tdigest", + "hash_min", + "hash_max", + "hash_any", + "hash_all", + ] + normalized_aggregations.concat(normalize_aggregations(all_functions)) + when /\A([a-zA-Z0-9_].+?)\((.+?)\)\z/ + function = $1 + input = $2.strip + normalized_aggregations << {function: function, input: input} + when "count", "hash_count" + function = aggregation + target_columns.each do |column| + normalized_aggregations << {function: function, input: column.name} + end + when "any", "hash_any", "all", "hash_all" + function = aggregation + boolean_target_columns.each do |column| + normalized_aggregations << {function: function, input: column.name} + end + when String + function = aggregation + numeric_target_columns.each do |column| + normalized_aggregations << {function: function, input: column.name} + end + else + normalized_aggregations << aggregation + end + end + normalized_aggregations + end + + def target_columns + @target_columns ||= find_target_columns + end + + def find_target_columns + key_names = @keys.collect(&:to_s) + @table.columns.find_all do |column| + not key_names.include?(column.name) + end + end + + def boolean_target_columns + @boolean_target_columns ||= find_boolean_target_columns + end + + def find_boolean_target_columns + target_columns.find_all do |column| + column.data_type.is_a?(BooleanDataType) + end + end + + def numeric_target_columns + @numeric_target_columns ||= find_numeric_target_columns + end + + def find_numeric_target_columns + target_columns.find_all do |column| + column.data_type.is_a?(NumericDataType) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/list-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/list-array-builder.rb new file mode 100644 index 000000000..d889c8a0c --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/list-array-builder.rb @@ -0,0 +1,96 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class ListArrayBuilder + class << self + def build(data_type, values) + builder = new(data_type) + builder.build(values) + end + end + + alias_method :append_value_raw, :append_value + + # @overload append_value + # + # Starts appending a list record. You also need to append list + # value by {#value_builder}. + # + # @overload append_value(list) + # + # Appends a list record including list value. + # + # @param value [nil, ::Array] The list value of the record. + # + # If this is `nil`, the list record is null. + # + # If this is `Array`, it's the list value of the record. + # + # @since 0.12.0 + def append_value(*args) + n_args = args.size + + case n_args + when 0 + append_value_raw + when 1 + value = args[0] + case value + when nil + append_null + when ::Array + append_value_raw + @value_builder ||= value_builder + @value_builder.append(*value) + else + message = "list value must be nil or Array: #{value.inspect}" + raise ArgumentError, message + end + else + message = "wrong number of arguments (given #{n_args}, expected 0..1)" + raise ArgumentError, message + end + end + + def append_values(lists, is_valids=nil) + if is_valids + is_valids.each_with_index do |is_valid, i| + if is_valid + append_value(lists[i]) + else + append_null + end + end + else + lists.each do |list| + append_value(list) + end + end + end + + # @since 0.12.0 + def append(*values) + if values.empty? + # For backward compatibility + append_value + else + super + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/list-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/list-data-type.rb new file mode 100644 index 000000000..cfcdd2a9e --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/list-data-type.rb @@ -0,0 +1,118 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class ListDataType + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::ListDataType}. + # + # @overload initialize(field) + # + # @param field [Arrow::Field, Hash] The field of the list data + # type. You can also specify field description by `Hash`. + # + # See {Arrow::Field.new} how to specify field description. + # + # @example Create a list data type with {Arrow::Field} + # visible_field = Arrow::Field.new("visible", :boolean) + # Arrow::ListDataType.new(visible_field) + # + # @example Create a list data type with field description + # Arrow::ListDataType.new(name: "visible", type: :boolean) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the list data + # type. It must have `:field` value. + # + # @option description [Arrow::Field, Hash] :field The field of + # the list data type. You can also specify field description + # by `Hash`. + # + # See {Arrow::Field.new} how to specify field description. + # + # @example Create a list data type with {Arrow::Field} + # visible_field = Arrow::Field.new("visible", :boolean) + # Arrow::ListDataType.new(field: visible_field) + # + # @example Create a list data type with field description + # Arrow::ListDataType.new(field: {name: "visible", type: :boolean}) + # + # @overload initialize(data_type) + # + # @param data_type [Arrow::DataType, String, Symbol, + # ::Array<String>, ::Array<Symbol>, Hash] The element data + # type of the list data type. A field is created with the + # default name `"item"` from the data type automatically. + # + # See {Arrow::DataType.resolve} how to specify data type. + # + # @example Create a list data type with {Arrow::DataType} + # Arrow::ListDataType.new(Arrow::BooleanDataType.new) + # + # @example Create a list data type with data type name as String + # Arrow::ListDataType.new("boolean") + # + # @example Create a list data type with data type name as Symbol + # Arrow::ListDataType.new(:boolean) + # + # @example Create a list data type with data type as Array + # Arrow::ListDataType.new([:time32, :milli]) + def initialize(arg) + data_type = resolve_data_type(arg) + if data_type + field = Field.new(default_field_name, data_type) + else + field = resolve_field(arg) + end + initialize_raw(field) + end + + private + def resolve_data_type(arg) + case arg + when DataType, String, Symbol, ::Array + DataType.resolve(arg) + when Hash + return nil if arg[:name] + return nil unless arg[:type] + DataType.resolve(arg) + else + nil + end + end + + def default_field_name + "item" + end + + def resolve_field(arg) + if arg.is_a?(Hash) and arg.key?(:field) + description = arg + arg = description[:field] + end + if arg.is_a?(Hash) + field_description = arg + Field.new(field_description) + else + arg + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/loader.rb b/src/arrow/ruby/red-arrow/lib/arrow/loader.rb new file mode 100644 index 000000000..804a94894 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/loader.rb @@ -0,0 +1,216 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "arrow/block-closable" + +module Arrow + class Loader < GObjectIntrospection::Loader + class << self + def load + super("Arrow", Arrow) + end + end + + private + def post_load(repository, namespace) + require_libraries + require_extension_library + gc_guard + end + + def require_libraries + require "arrow/column-containable" + require "arrow/field-containable" + require "arrow/generic-filterable" + require "arrow/generic-takeable" + require "arrow/record-containable" + require "arrow/symbol-values-appendable" + + require "arrow/aggregate-node-options" + require "arrow/aggregation" + require "arrow/array" + require "arrow/array-builder" + require "arrow/bigdecimal-extension" + require "arrow/binary-dictionary-array-builder" + require "arrow/buffer" + require "arrow/chunked-array" + require "arrow/column" + require "arrow/compression-type" + require "arrow/csv-loader" + require "arrow/csv-read-options" + require "arrow/data-type" + require "arrow/date32-array" + require "arrow/date32-array-builder" + require "arrow/date64-array" + require "arrow/date64-array-builder" + require "arrow/datum" + require "arrow/decimal128" + require "arrow/decimal128-array" + require "arrow/decimal128-array-builder" + require "arrow/decimal128-data-type" + require "arrow/decimal256" + require "arrow/decimal256-array" + require "arrow/decimal256-array-builder" + require "arrow/decimal256-data-type" + require "arrow/dense-union-data-type" + require "arrow/dictionary-array" + require "arrow/dictionary-data-type" + require "arrow/equal-options" + require "arrow/expression" + require "arrow/field" + require "arrow/file-output-stream" + require "arrow/file-system" + require "arrow/fixed-size-binary-array" + require "arrow/fixed-size-binary-array-builder" + require "arrow/group" + require "arrow/list-array-builder" + require "arrow/list-data-type" + require "arrow/map-array" + require "arrow/map-array-builder" + require "arrow/map-data-type" + require "arrow/null-array" + require "arrow/null-array-builder" + require "arrow/path-extension" + require "arrow/record" + require "arrow/record-batch" + require "arrow/record-batch-builder" + require "arrow/record-batch-file-reader" + require "arrow/record-batch-iterator" + require "arrow/record-batch-reader" + require "arrow/record-batch-stream-reader" + require "arrow/rolling-window" + require "arrow/scalar" + require "arrow/schema" + require "arrow/slicer" + require "arrow/sort-key" + require "arrow/sort-options" + require "arrow/source-node-options" + require "arrow/sparse-union-data-type" + require "arrow/string-dictionary-array-builder" + require "arrow/struct-array" + require "arrow/struct-array-builder" + require "arrow/struct-data-type" + require "arrow/table" + require "arrow/table-concatenate-options" + require "arrow/table-formatter" + require "arrow/table-list-formatter" + require "arrow/table-table-formatter" + require "arrow/table-loader" + require "arrow/table-saver" + require "arrow/tensor" + require "arrow/time" + require "arrow/time32-array" + require "arrow/time32-array-builder" + require "arrow/time32-data-type" + require "arrow/time64-array" + require "arrow/time64-array-builder" + require "arrow/time64-data-type" + require "arrow/timestamp-array" + require "arrow/timestamp-array-builder" + require "arrow/timestamp-data-type" + require "arrow/writable" + end + + def require_extension_library + require "arrow.so" + end + + def gc_guard + require "arrow/constructor-arguments-gc-guardable" + + [ + @base_module::BinaryScalar, + @base_module::Buffer, + @base_module::DenseUnionScalar, + @base_module::FixedSizeBinaryScalar, + @base_module::LargeBinaryScalar, + @base_module::LargeListScalar, + @base_module::LargeStringScalar, + @base_module::ListScalar, + @base_module::MapScalar, + @base_module::SparseUnionScalar, + @base_module::StringScalar, + @base_module::StructScalar, + ].each do |klass| + klass.prepend(ConstructorArgumentsGCGuardable) + end + end + + def load_object_info(info) + super + + klass = @base_module.const_get(rubyish_class_name(info)) + if klass.method_defined?(:close) + klass.extend(BlockClosable) + end + end + + def load_method_info(info, klass, method_name) + case klass.name + when /Array\z/ + case method_name + when "values" + method_name = "values_raw" + end + end + + case klass.name + when /Builder\z/ + case method_name + when "append" + return + else + super + end + when "Arrow::StringArray" + case method_name + when "get_value" + method_name = "get_raw_value" + when "get_string" + method_name = "get_value" + end + super(info, klass, method_name) + when "Arrow::Date32Array", + "Arrow::Date64Array", + "Arrow::Decimal128Array", + "Arrow::Decimal256Array", + "Arrow::Time32Array", + "Arrow::Time64Array", + "Arrow::TimestampArray" + case method_name + when "get_value" + method_name = "get_raw_value" + end + super(info, klass, method_name) + when "Arrow::Decimal128", "Arrow::Decimal256" + case method_name + when "copy" + method_name = "dup" + end + super(info, klass, method_name) + when "Arrow::BooleanScalar" + case method_name + when "value?" + method_name = "value" + end + super(info, klass, method_name) + else + super + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/map-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/map-array-builder.rb new file mode 100644 index 000000000..9e269d1c5 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/map-array-builder.rb @@ -0,0 +1,109 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class MapArrayBuilder + class << self + def build(data_type, values) + builder = new(data_type) + builder.build(values) + end + end + + alias_method :append_value_raw, :append_value + + # @overload append_value + # + # Starts appending a map record. You need to append + # values of map by {#key_builder} and {#item_builder}. + # + # @overload append_value(value) + # + # Appends a map record including key and item values. + # + # @param value [nil, #each] The map record. + # + # If this is `nil`, the map record is null. + # + # If this is an `Object` that has `#each`, each value is a pair of key and item. + # + # @since 6.0.0 + def append_value(*args) + n_args = args.size + + case n_args + when 0 + append_value_raw + when 1 + value = args[0] + case value + when nil + append_null + else + unless value.respond_to?(:each) + message = "map value must be nil, Hash or Object that has #each: #{value.inspect}" + raise ArgumentError, message + end + append_value_raw + @key_builder ||= key_builder + @item_builder ||= item_builder + case value + when Hash + keys = value.keys + values = value.values + else + keys = [] + values = [] + value.each do |key, item| + keys << key + values << item + end + end + @key_builder.append(*keys) + @item_builder.append(*values) + end + else + message = "wrong number of arguments (given #{n_args}, expected 0..1)" + raise ArgumentError, message + end + end + + alias_method :append_values_raw, :append_values + + def append_values(values, is_valids=nil) + value = values[0] + case value + when Integer + append_values_raw(values, is_valids) + else + if is_valids + is_valids.each_with_index do |is_valid, i| + if is_valid + append_value(values[i]) + else + append_null + end + end + else + values.each do |value| + append_value(value) + end + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/map-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/map-array.rb new file mode 100644 index 000000000..96b8c01b1 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/map-array.rb @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class MapArray + def get_value(i) + super.each_with_object({}) do |item, result| + result[item["key"]] = item["value"] + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/map-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/map-data-type.rb new file mode 100644 index 000000000..67e134329 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/map-data-type.rb @@ -0,0 +1,89 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class MapDataType + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::MapDataType}. + # + # @overload initialize(key, item) + # + # @param key [Arrow::DataType, Hash, String, Symbol] + # The key data type of the map data type. + # + # You can specify data type as a description by `Hash`. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @param item [Arrow::DataType, Hash, String, Symbol] + # The item data type of the map data type. + # + # You can specify data type as a description by `Hash`. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @example Create a map data type for `{0: "Hello", 1: "World"}` + # key = :int8 + # item = :string + # Arrow::MapDataType.new(key, item) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the map data + # type. It must have `:key`, `:item` values. + # + # @option description [Arrow::DataType, Hash, String, Symbol] + # :key The key data type of the map data type. + # + # You can specify data type as a description by `Hash`. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @option description [Arrow::DataType, Hash, String, Symbol] + # :item The item data type of the map data type. + # + # You can specify data type as a description by `Hash`. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @example Create a map data type for `{0: "Hello", 1: "World"}` + # Arrow::MapDataType.new(key: :int8, item: :string) + def initialize(*args) + n_args = args.size + case n_args + when 1 + description = args[0] + key = description[:key] + item = description[:item] + when 2 + key, item = args + else + message = "wrong number of arguments (given, #{n_args}, expected 1..2)" + raise ArgumentError, message + end + key = DataType.resolve(key) + item = DataType.resolve(item) + initialize_raw(key, item) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/null-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/null-array-builder.rb new file mode 100644 index 000000000..26e58ccdc --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/null-array-builder.rb @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class NullArrayBuilder + class << self + def buildable?(args) + super and not (args.size == 1 and args[0].is_a?(Integer)) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/null-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/null-array.rb new file mode 100644 index 000000000..7426bb345 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/null-array.rb @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class NullArray + def get_value(i) + nil + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/path-extension.rb b/src/arrow/ruby/red-arrow/lib/arrow/path-extension.rb new file mode 100644 index 000000000..1273f298c --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/path-extension.rb @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class PathExtension + def initialize(path) + @path = path + end + + def extract + basename = ::File.basename(@path) + components = basename.split(".") + return {} if components.size < 2 + + extension = components.last.downcase + if components.size > 2 + compression = CompressionType.resolve_extension(extension) + if compression + { + format: components[-2].downcase, + compression: compression, + } + else + {format: extension} + end + else + {format: extension} + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/raw-table-converter.rb b/src/arrow/ruby/red-arrow/lib/arrow/raw-table-converter.rb new file mode 100644 index 000000000..41d331fb3 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/raw-table-converter.rb @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class RawTableConverter + attr_reader :n_rows + attr_reader :schema + attr_reader :values + def initialize(raw_table) + @raw_table = raw_table + convert + end + + private + def convert + if @raw_table.is_a?(::Array) and @raw_table[0].is_a?(Column) + fields = @raw_table.collect(&:field) + @schema = Schema.new(fields) + @values = @raw_table.collect(&:data) + else + fields = [] + @values = [] + @raw_table.each do |name, array| + array = ArrayBuilder.build(array) if array.is_a?(::Array) + fields << Field.new(name.to_s, array.value_data_type) + @values << array + end + @schema = Schema.new(fields) + end + @n_rows = @values[0].length + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/record-batch-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-builder.rb new file mode 100644 index 000000000..dc20312f2 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-builder.rb @@ -0,0 +1,114 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class RecordBatchBuilder + class << self + # @since 0.12.0 + def build(schema, data) + builder = new(schema) + builder.append(data) + builder.flush + end + end + + alias_method :initialize_raw, :initialize + private :initialize_raw + def initialize(schema) + unless schema.is_a?(Schema) + schema = Schema.new(schema) + end + initialize_raw(schema) + @name_to_index = {} + schema.fields.each_with_index do |field, i| + @name_to_index[field.name] = i + end + end + + # @since 0.12.0 + def [](name_or_index) + case name_or_index + when String, Symbol + name = name_or_index + self[resolve_name(name)] + else + index = name_or_index + column_builders[index] + end + end + + # @since 0.12.0 + def append(*values) + values.each do |value| + case value + when Hash + append_columns(value) + else + append_records(value) + end + end + end + + # @since 0.12.0 + def append_records(records) + n = n_columns + columns = n.times.collect do + [] + end + records.each_with_index do |record, nth_record| + case record + when nil + when Hash + record.each do |name, value| + nth_column = resolve_name(name) + next if nth_column.nil? + columns[nth_column] << value + end + else + record.each_with_index do |value, nth_column| + columns[nth_column] << value + end + end + columns.each do |column| + column << nil if column.size != (nth_record + 1) + end + end + columns.each_with_index do |column, i| + self[i].append(*column) + end + end + + # @since 0.12.0 + def append_columns(columns) + columns.each do |name, values| + self[name].append(*values) + end + end + + # @since 0.13.0 + def column_builders + @column_builders ||= n_columns.times.collect do |i| + get_column_builder(i) + end + end + + private + def resolve_name(name) + @name_to_index[name.to_s] + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/record-batch-file-reader.rb b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-file-reader.rb new file mode 100644 index 000000000..86a757e32 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-file-reader.rb @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class RecordBatchFileReader + include Enumerable + + def each + n_record_batches.times do |i| + yield(get_record_batch(i)) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/record-batch-iterator.rb b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-iterator.rb new file mode 100644 index 000000000..4b828c6dc --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-iterator.rb @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class RecordBatchIterator + alias_method :to_a, :to_list + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/record-batch-reader.rb b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-reader.rb new file mode 100644 index 000000000..e030e4f3b --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-reader.rb @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class RecordBatchReader + class << self + # @api private + def try_convert(value) + case value + when ::Array + return nil if value.empty? + if value.all? {|v| v.is_a?(RecordBatch)} + new(value) + else + nil + end + when RecordBatch + new([value]) + when Table + TableBatchReader.new(value) + else + nil + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/record-batch-stream-reader.rb b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-stream-reader.rb new file mode 100644 index 000000000..fa15c8000 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/record-batch-stream-reader.rb @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class RecordBatchStreamReader + include Enumerable + + def each + loop do + record_batch = next_record_batch + break if record_batch.nil? + yield(record_batch) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/record-batch.rb b/src/arrow/ruby/red-arrow/lib/arrow/record-batch.rb new file mode 100644 index 000000000..c5aaf876b --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/record-batch.rb @@ -0,0 +1,75 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "arrow/raw-table-converter" + +module Arrow + class RecordBatch + include ColumnContainable + include RecordContainable + include Enumerable + + class << self + def new(*args) + n_args = args.size + case n_args + when 1 + raw_table_converter = RawTableConverter.new(args[0]) + n_rows = raw_table_converter.n_rows + schema = raw_table_converter.schema + values = raw_table_converter.values + super(schema, n_rows, values) + when 2 + schema, data = args + RecordBatchBuilder.build(schema, data) + when 3 + super + else + message = "wrong number of arguments (given #{n_args}, expected 1..3)" + raise ArgumentError, message + end + end + end + + alias_method :each, :each_record + + alias_method :size, :n_rows + alias_method :length, :n_rows + + # Converts the record batch to {Arrow::Table}. + # + # @return [Arrow::Table] + # + # @since 0.12.0 + def to_table + Table.new(schema, [self]) + end + + def respond_to_missing?(name, include_private) + return true if find_column(name) + super + end + + def method_missing(name, *args, &block) + if args.empty? + column = find_column(name) + return column if column + end + super + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/record-containable.rb b/src/arrow/ruby/red-arrow/lib/arrow/record-containable.rb new file mode 100644 index 000000000..20c9ac2f5 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/record-containable.rb @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + module RecordContainable + def each_record(reuse_record: false) + unless block_given? + return to_enum(__method__, reuse_record: reuse_record) + end + + if reuse_record + record = Record.new(self, nil) + n_rows.times do |i| + record.index = i + yield(record) + end + else + n_rows.times do |i| + yield(Record.new(self, i)) + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/record.rb b/src/arrow/ruby/red-arrow/lib/arrow/record.rb new file mode 100644 index 000000000..6f83dded0 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/record.rb @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Record + attr_reader :container + attr_accessor :index + def initialize(container, index) + @container = container + @index = index + end + + def [](column_name_or_column_index) + column = @container.find_column(column_name_or_column_index) + return nil if column.nil? + column[@index] + end + + def to_a + @container.columns.collect do |column| + column[@index] + end + end + + def to_h + attributes = {} + @container.columns.each do |column| + attributes[column.name] = column[@index] + end + attributes + end + + def respond_to_missing?(name, include_private) + return true if @container.find_column(name) + super + end + + def method_missing(name, *args, &block) + if args.empty? + column = @container.find_column(name) + return column[@index] if column + end + super + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/rolling-window.rb b/src/arrow/ruby/red-arrow/lib/arrow/rolling-window.rb new file mode 100644 index 000000000..1db03bb23 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/rolling-window.rb @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + # Experimental + # + # TODO: Almost codes should be implemented in Apache Arrow C++. + class RollingWindow + def initialize(table, size) + @table = table + @size = size + end + + def lag(key, diff: 1) + column = @table[key] + if @size + windows = column.each_slice(@size) + else + windows = column + end + lag_values = [nil] * diff + windows.each_cons(diff + 1) do |values| + target = values[0] + current = values[1] + if target.nil? or current.nil? + lag_values << nil + else + lag_values << current - target + end + end + ArrayBuilder.build(lag_values) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/scalar.rb b/src/arrow/ruby/red-arrow/lib/arrow/scalar.rb new file mode 100644 index 000000000..b2bf1ac59 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/scalar.rb @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Scalar + # @param other [Arrow::Scalar] The scalar to be compared. + # @param options [Arrow::EqualOptions, Hash] (nil) + # The options to custom how to compare. + # + # @return [Boolean] + # `true` if both of them have the same data, `false` otherwise. + # + # @since 5.0.0 + def equal_scalar?(other, options=nil) + equal_options(other, options) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/schema.rb b/src/arrow/ruby/red-arrow/lib/arrow/schema.rb new file mode 100644 index 000000000..03354c862 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/schema.rb @@ -0,0 +1,100 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Schema + include FieldContainable + + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::Schema}. + # + # @overload initialize(fields) + # + # @param fields [::Array<Arrow::Field, Hash>] The fields of the + # schema. You can mix {Arrow::Field} and field description in + # the fields. + # + # See {Arrow::Field.new} how to specify field description. + # + # @example Create a schema with {Arrow::Field}s + # visible_field = Arrow::Field.new("visible", :boolean) + # Arrow::Schema.new([visible_field]) + # + # @example Create a schema with field descriptions + # visible_field_description = { + # name: "visible", + # data_type: :boolean, + # } + # Arrow::Schema.new([visible_field_description]) + # + # @example Create a schema with {Arrow::Field}s and field descriptions + # fields = [ + # Arrow::Field.new("visible", :boolean), + # { + # name: "count", + # type: :int32, + # }, + # ] + # Arrow::Schema.new(fields) + # + # @overload initialize(fields) + # + # @param fields [Hash{String, Symbol => Arrow::DataType, Hash}] + # The pairs of field name and field data type of the schema. + # You can mix {Arrow::DataType} and data description for field + # data type. + # + # See {Arrow::DataType.new} how to specify data type description. + # + # @example Create a schema with fields + # fields = { + # "visible" => Arrow::BooleanDataType.new, + # :count => :int32, + # :tags => { + # type: :list, + # field: { + # name: "tag", + # type: :string, + # }, + # }, + # } + # Arrow::Schema.new(fields) + def initialize(fields) + case fields + when ::Array + fields = fields.collect do |field| + field = Field.new(field) unless field.is_a?(Field) + field + end + when Hash + fields = fields.collect do |name, data_type| + Field.new(name, data_type) + end + end + initialize_raw(fields) + end + + alias_method :[], :find_field + + alias_method :to_s_raw, :to_s + def to_s(show_metadata: false) + to_string_metadata(show_metadata) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/slicer.rb b/src/arrow/ruby/red-arrow/lib/arrow/slicer.rb new file mode 100644 index 000000000..6cca7f75e --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/slicer.rb @@ -0,0 +1,355 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Slicer + def initialize(table) + @table = table + end + + def [](column_name) + column = @table[column_name] + return nil if column.nil? + ColumnCondition.new(column) + end + + def respond_to_missing?(name, include_private) + return true if self[name] + super + end + + def method_missing(name, *args, &block) + if args.empty? + column_condition = self[name] + return column_condition if column_condition + end + super + end + + module Helper + class << self + def ensure_boolean(column) + case column.data_type + when Arrow::BooleanDataType + column.data + else + options = CastOptions.new + options.to_data_type = Arrow::BooleanDataType.new + Function.find("cast").execute([column.data], options).value + end + end + end + end + + class Condition + def evaluate + message = "Slicer::Condition must define \#evaluate: #{inspect}" + raise NotImplementedError.new(message) + end + + def &(condition) + AndCondition.new(self, condition) + end + + def |(condition) + OrCondition.new(self, condition) + end + + def ^(condition) + XorCondition.new(self, condition) + end + end + + class LogicalCondition < Condition + def initialize(condition1, condition2) + @condition1 = condition1 + @condition2 = condition2 + end + + def evaluate + function.execute([@condition1.evaluate, @condition2.evaluate]).value + end + end + + class AndCondition < LogicalCondition + private + def function + Function.find("and") + end + end + + class OrCondition < LogicalCondition + private + def function + Function.find("or") + end + end + + class XorCondition < LogicalCondition + private + def function + Function.find("xor") + end + end + + class ColumnCondition < Condition + def initialize(column) + @column = column + end + + def evaluate + Helper.ensure_boolean(@column) + end + + def !@ + NotColumnCondition.new(@column) + end + + def null? + self == nil + end + + def valid? + self != nil + end + + def ==(value) + EqualCondition.new(@column, value) + end + + def !=(value) + NotEqualCondition.new(@column, value) + end + + def <(value) + LessCondition.new(@column, value) + end + + def <=(value) + LessEqualCondition.new(@column, value) + end + + def >(value) + GreaterCondition.new(@column, value) + end + + def >=(value) + GreaterEqualCondition.new(@column, value) + end + + def in?(values) + InCondition.new(@column, values) + end + + def select(&block) + SelectCondition.new(@column, block) + end + + def reject(&block) + RejectCondition.new(@column, block) + end + end + + class NotColumnCondition < Condition + def initialize(column) + @column = column + end + + def evaluate + data = Helper.ensure_boolean(@column) + Function.find("invert").execute([data]).value + end + + def !@ + ColumnCondition.new(@column) + end + end + + class EqualCondition < Condition + def initialize(column, value) + @column = column + @value = value + end + + def !@ + NotEqualCondition.new(@column, @value) + end + + def evaluate + if @value.nil? + Function.find("is_null").execute([@column.data]).value + else + Function.find("equal").execute([@column.data, @value]).value + end + end + end + + class NotEqualCondition < Condition + def initialize(column, value) + @column = column + @value = value + end + + def !@ + EqualCondition.new(@column, @value) + end + + def evaluate + if @value.nil? + Function.find("is_valid").execute([@column.data]).value + else + Function.find("not_equal").execute([@column.data, @value]).value + end + end + end + + class LessCondition < Condition + def initialize(column, value) + @column = column + @value = value + end + + def !@ + GreaterEqualCondition.new(@column, @value) + end + + def evaluate + Function.find("less").execute([@column.data, @value]).value + end + end + + class LessEqualCondition < Condition + def initialize(column, value) + @column = column + @value = value + end + + def !@ + GreaterCondition.new(@column, @value) + end + + def evaluate + Function.find("less_equal").execute([@column.data, @value]).value + end + end + + class GreaterCondition < Condition + def initialize(column, value) + @column = column + @value = value + end + + def !@ + LessEqualCondition.new(@column, @value) + end + + def evaluate + Function.find("greater").execute([@column.data, @value]).value + end + end + + class GreaterEqualCondition < Condition + def initialize(column, value) + @column = column + @value = value + end + + def !@ + LessCondition.new(@column, @value) + end + + def evaluate + Function.find("greater_equal").execute([@column.data, @value]).value + end + end + + class InCondition < Condition + def initialize(column, values) + @column = column + @values = values + end + + def !@ + NotInCondition.new(@column, @values) + end + + def evaluate + values = @values + values = Array.new(values) unless values.is_a?(Array) + options = SetLookupOptions.new(values) + Function.find("is_in").execute([@column.data], options).value + end + end + + class NotInCondition < Condition + def initialize(column, values) + @column = column + @values = values + end + + def !@ + InCondition.new(@column, @values) + end + + def evaluate + values = @values + values = Array.new(values) unless values.is_a?(Array) + options = SetLookupOptions.new(values) + booleans = Function.find("is_in").execute([@column.data], options).value + Function.find("invert").execute([booleans]).value + end + end + + class SelectCondition < Condition + def initialize(column, block) + @column = column + @block = block + end + + def !@ + RejectCondition.new(@column, @block) + end + + def evaluate + BooleanArray.new(@column.collect(&@block)) + end + end + + class RejectCondition < Condition + def initialize(column, block) + @column = column + @block = block + end + + def !@ + SelectCondition.new(@column, @block) + end + + def evaluate + raw_array = @column.collect do |value| + evaluated_value = @block.call(value) + if evaluated_value.nil? + nil + else + not evaluated_value + end + end + BooleanArray.new(raw_array) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/sort-key.rb b/src/arrow/ruby/red-arrow/lib/arrow/sort-key.rb new file mode 100644 index 000000000..987027256 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/sort-key.rb @@ -0,0 +1,193 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class SortKey + class << self + # Ensure returning suitable {Arrow::SortKey}. + # + # @overload resolve(sort_key) + # + # Returns the given sort key itself. This is convenient to use + # this method as {Arrow::SortKey} converter. + # + # @param sort_key [Arrow::SortKey] The sort key. + # + # @return [Arrow::SortKey] The given sort key itself. + # + # @overload resolve(name) + # + # Creates a new suitable sort key from column name with + # leading order mark. See {#initialize} for details about + # order mark. + # + # @return [Arrow::SortKey] A new suitable sort key. + # + # @overload resolve(name, order) + # + # Creates a new suitable sort key from column name without + # leading order mark and order. See {#initialize} for details. + # + # @return [Arrow::SortKey] A new suitable sort key. + # + # @since 4.0.0 + def resolve(name, order=nil) + return name if name.is_a?(self) + new(name, order) + end + + # @api private + def try_convert(value) + case value + when Symbol, String + new(value.to_s, :ascending) + else + nil + end + end + end + + alias_method :initialize_raw, :initialize + private :initialize_raw + # Creates a new {Arrow::SortKey}. + # + # @overload initialize(name) + # + # @param name [Symbol, String] The name of the sort column. + # + # If `name` is a String, the first character may be processed + # as the "leading order mark". If the first character is `"+"` + # or `"-"`, they are processed as a leading order mark. If the + # first character is processed as a leading order mark, the + # first character is removed from sort column name and + # corresponding order is used. `"+"` uses ascending order and + # `"-"` uses ascending order. + # + # If `name` is not a String nor `name` doesn't start with the + # leading order mark, sort column name is `name` as-is and + # ascending order is used. + # + # @example String without the leading order mark + # key = Arrow::SortKey.new("count") + # key.name # => "count" + # key.order # => Arrow::SortOrder::ASCENDING + # + # @example String with the "+" leading order mark + # key = Arrow::SortKey.new("+count") + # key.name # => "count" + # key.order # => Arrow::SortOrder::ASCENDING + # + # @example String with the "-" leading order mark + # key = Arrow::SortKey.new("-count") + # key.name # => "count" + # key.order # => Arrow::SortOrder::DESCENDING + # + # @example Symbol that starts with "-" + # key = Arrow::SortKey.new(:"-count") + # key.name # => "-count" + # key.order # => Arrow::SortOrder::ASCENDING + # + # @overload initialize(name, order) + # + # @param name [Symbol, String] The name of the sort column. + # + # No leading order mark processing. The given `name` is used + # as-is. + # + # @param order [Symbol, String, Arrow::SortOrder] How to order + # by this sort key. + # + # If this is a Symbol or String, this must be `:ascending`, + # `"ascending"`, `:asc`, `"asc"`, `:descending`, + # `"descending"`, `:desc` or `"desc"`. + # + # @example No leading order mark processing + # key = Arrow::SortKey.new("-count", :ascending) + # key.name # => "-count" + # key.order # => Arrow::SortOrder::ASCENDING + # + # @example Order by abbreviated name with Symbol + # key = Arrow::SortKey.new("count", :desc) + # key.name # => "count" + # key.order # => Arrow::SortOrder::DESCENDING + # + # @example Order by String + # key = Arrow::SortKey.new("count", "descending") + # key.name # => "count" + # key.order # => Arrow::SortOrder::DESCENDING + # + # @example Order by Arrow::SortOrder + # key = Arrow::SortKey.new("count", Arrow::SortOrder::DESCENDING) + # key.name # => "count" + # key.order # => Arrow::SortOrder::DESCENDING + # + # @since 4.0.0 + def initialize(name, order=nil) + name, order = normalize_name(name, order) + order = normalize_order(order) || :ascending + initialize_raw(name, order) + end + + # @return [String] The string representation of this sort key. You + # can use recreate {Arrow::SortKey} by + # `Arrow::SortKey.new(key.to_s)`. + # + # @example Recreate Arrow::SortKey + # key = Arrow::SortKey.new("-count") + # key.to_s # => "-count" + # key == Arrow::SortKey.new(key.to_s) # => true + # + # @since 4.0.0 + def to_s + if order == SortOrder::ASCENDING + "+#{name}" + else + "-#{name}" + end + end + + private + def normalize_name(name, order) + case name + when Symbol + return name.to_s, order + when String + return name, order if order + if name.start_with?("-") + return name[1..-1], order || :descending + elsif name.start_with?("+") + return name[1..-1], order || :ascending + else + return name, order + end + else + return name, order + end + end + + def normalize_order(order) + case order + when :asc, "asc" + :ascending + when :desc, "desc" + :descending + else + order + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/sort-options.rb b/src/arrow/ruby/red-arrow/lib/arrow/sort-options.rb new file mode 100644 index 000000000..a7c2d6431 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/sort-options.rb @@ -0,0 +1,109 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class SortOptions + class << self + # @api private + def try_convert(value) + case value + when Symbol, String + new(value) + when ::Array + new(*value) + else + nil + end + end + end + + alias_method :initialize_raw, :initialize + private :initialize_raw + # @param sort_keys [::Array<String, Symbol, Arrow::SortKey>] The + # sort keys to be used. See {Arrow::SortKey.resolve} how to + # resolve each sort key in `sort_keys`. + # + # You can add more sort keys by {#add_sort_key} later. + # + # @example No initial sort keys + # options = Arrow::SortOptions.new + # options.sort_keys # => [] + # + # @example String sort keys + # options = Arrow::SortOptions.new("count", "-age") + # options.sort_keys.collect(&:to_s) # => ["+count", "-age"] + # + # @example Symbol sort keys + # options = Arrow::SortOptions.new(:count, :age) + # options.sort_keys.collect(&:to_s) # => ["+count", "+age"] + # + # @example Mixed sort keys + # options = Arrow::SortOptions.new(:count, "-age") + # options.sort_keys.collect(&:to_s) # => ["+count", "-age"] + # + # @since 4.0.0 + def initialize(*sort_keys) + initialize_raw + sort_keys.each do |sort_key| + add_sort_key(sort_key) + end + end + + # @api private + alias_method :add_sort_key_raw, :add_sort_key + # Add a sort key. + # + # @return [void] + # + # @overload add_sort_key(key) + # + # @param key [Arrow::SortKey] The sort key to be added. + # + # @example Add a key to sort by "price" column in descending order + # options = Arrow::SortOptions.new + # options.add_sort_key(Arrow::SortKey.new(:price, :descending)) + # options.sort_keys.collect(&:to_s) # => ["-price"] + # + # @overload add_sort_key(name) + # + # @param name [Symbol, String] The sort key name to be + # added. See also {Arrow::SortKey#initialize} for the leading + # order mark for String name. + # + # @example Add a key to sort by "price" column in descending order + # options = Arrow::SortOptions.new + # options.add_sort_key("-price") + # options.sort_keys.collect(&:to_s) # => ["-price"] + # + # @overload add_sort_key(name, order) + # + # @param name [Symbol, String] The sort key name. + # + # @param order [Symbol, String, Arrow::SortOrder] The sort + # order. See {Arrow::SortKey#initialize} for details. + # + # @example Add a key to sort by "price" column in descending order + # options = Arrow::SortOptions.new + # options.add_sort_key("price", :desc) + # options.sort_keys.collect(&:to_s) # => ["-price"] + # + # @since 4.0.0 + def add_sort_key(name, order=nil) + add_sort_key_raw(SortKey.resolve(name, order)) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/source-node-options.rb b/src/arrow/ruby/red-arrow/lib/arrow/source-node-options.rb new file mode 100644 index 000000000..402ea85f7 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/source-node-options.rb @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class SourceNodeOptions + class << self + # @api private + def try_convert(value) + case value + when RecordBatchReader, RecordBatch, Table + new(value) + else + nil + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/sparse-union-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/sparse-union-data-type.rb new file mode 100644 index 000000000..14f3e5a7e --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/sparse-union-data-type.rb @@ -0,0 +1,90 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class SparseUnionDataType + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::SparseUnionDataType}. + # + # @overload initialize(fields, type_codes) + # + # @param fields [::Array<Arrow::Field, Hash>] The fields of the + # sparse union data type. You can mix {Arrow::Field} and field + # description in the fields. + # + # See {Arrow::Field.new} how to specify field description. + # + # @param type_codes [::Array<Integer>] The IDs that indicates + # corresponding fields. + # + # @example Create a sparse union data type for `{2: visible, 9: count}` + # fields = [ + # Arrow::Field.new("visible", :boolean), + # { + # name: "count", + # type: :int32, + # }, + # ] + # Arrow::SparseUnionDataType.new(fields, [2, 9]) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the sparse union + # data type. It must have `:fields` and `:type_codes` values. + # + # @option description [::Array<Arrow::Field, Hash>] :fields The + # fields of the sparse union data type. You can mix + # {Arrow::Field} and field description in the fields. + # + # See {Arrow::Field.new} how to specify field description. + # + # @option description [::Array<Integer>] :type_codes The IDs + # that indicates corresponding fields. + # + # @example Create a sparse union data type for `{2: visible, 9: count}` + # fields = [ + # Arrow::Field.new("visible", :boolean), + # { + # name: "count", + # type: :int32, + # }, + # ] + # Arrow::SparseUnionDataType.new(fields: fields, + # type_codes: [2, 9]) + def initialize(*args) + n_args = args.size + case n_args + when 1 + description = args[0] + fields = description[:fields] + type_codes = description[:type_codes] + when 2 + fields, type_codes = args + else + message = "wrong number of arguments (given, #{n_args}, expected 1..2)" + raise ArgumentError, message + end + fields = fields.collect do |field| + field = Field.new(field) unless field.is_a?(Field) + field + end + initialize_raw(fields, type_codes) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/string-dictionary-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/string-dictionary-array-builder.rb new file mode 100644 index 000000000..fc2f90b80 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/string-dictionary-array-builder.rb @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class StringDictionaryArrayBuilder + include SymbolValuesAppendable + + private + def create_values_array_builder + StringArrayBuilder.new + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/struct-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/struct-array-builder.rb new file mode 100644 index 000000000..ce883166a --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/struct-array-builder.rb @@ -0,0 +1,146 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class StructArrayBuilder + class << self + def build(data_type, values) + builder = new(data_type) + builder.build(values) + end + end + + def [](index_or_name) + find_field_builder(index_or_name) + end + + def find_field_builder(index_or_name) + case index_or_name + when String, Symbol + name = index_or_name + cached_name_to_builder[name.to_s] + else + index = index_or_name + cached_field_builders[index] + end + end + + alias_method :append_value_raw, :append_value + + # @overload append_value + # + # Starts appending a struct record. You need to append values of + # fields. + # + # @overload append_value(value) + # + # Appends a struct record including values of fields. + # + # @param value [nil, ::Array, Hash] The struct record value. + # + # If this is `nil`, the struct record is null. + # + # If this is `Array` or `Hash`, they are values of fields. + # + # @since 0.12.0 + def append_value(*args) + n_args = args.size + + case n_args + when 0 + append_value_raw + when 1 + value = args[0] + case value + when nil + append_null + when ::Array + append_value_raw + cached_field_builders.zip(value) do |builder, sub_value| + builder.append(sub_value) + end + when Hash + append_value_raw + local_name_to_builder = cached_name_to_builder.dup + value.each do |name, sub_value| + builder = local_name_to_builder.delete(name.to_s) + builder.append(sub_value) + end + local_name_to_builder.each do |_, builder| + builder.append_null + end + else + message = + "struct value must be nil, Array or Hash: #{value.inspect}" + raise ArgumentError, message + end + else + message = "wrong number of arguments (given #{n_args}, expected 0..1)" + raise ArgumentError, message + end + end + + def append_values(values, is_valids=nil) + if is_valids + is_valids.each_with_index do |is_valid, i| + if is_valid + append_value(values[i]) + else + append_null + end + end + else + values.each do |value| + append_value(value) + end + end + end + + alias_method :append_null_raw, :append_null + def append_null + append_null_raw + end + + # @since 0.12.0 + def append(*values) + if values.empty? + # For backward compatibility + append_value_raw + else + super + end + end + + private + def cached_field_builders + @field_builders ||= field_builders + end + + def build_name_to_builder + name_to_builder = {} + builders = cached_field_builders + value_data_type.fields.each_with_index do |field, i| + name_to_builder[field.name] = builders[i] + end + name_to_builder + end + + def cached_name_to_builder + @name_to_builder ||= build_name_to_builder + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/struct-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/struct-array.rb new file mode 100644 index 000000000..0b293dfc1 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/struct-array.rb @@ -0,0 +1,68 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class StructArray + # @param i [Integer] + # The index of the value to be gotten. You must specify the value index. + # + # You can use {Arrow::Array#[]} for convenient value access. + # + # @return [Hash] The `i`-th struct. + def get_value(i) + value = {} + value_data_type.fields.zip(fields) do |field, field_array| + value[field.name] = field_array[i] + end + value + end + + # @overload find_field(index) + # @param index [Integer] The index of the field to be found. + # @return [Arrow::Array, nil] + # The `index`-th field or `nil` for out of range. + # + # @overload find_field(name) + # @param index [String, Symbol] The name of the field to be found. + # @return [Arrow::Array, nil] + # The field that has `name` or `nil` for nonexistent name. + def find_field(index_or_name) + case index_or_name + when String, Symbol + name = index_or_name + (@name_to_field ||= build_name_to_field)[name.to_s] + else + index = index_or_name + fields[index] + end + end + + alias_method :fields_raw, :fields + def fields + @fields ||= fields_raw + end + + private + def build_name_to_field + name_to_field = {} + value_data_type.fields.zip(fields) do |field, field_array| + name_to_field[field.name] = field_array + end + name_to_field + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/struct-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/struct-data-type.rb new file mode 100644 index 000000000..a89a01689 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/struct-data-type.rb @@ -0,0 +1,128 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class StructDataType + include FieldContainable + + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::StructDataType}. + # + # @overload initialize(fields) + # + # @param fields [::Array<Arrow::Field, Hash>] The fields of the + # struct data type. You can also specify field description as + # a field. You can mix {Arrow::Field} and field description. + # + # See {Arrow::Field.new} how to specify field description. + # + # @example Create a struct data type with {Arrow::Field}s + # visible_field = Arrow::Field.new("visible", :boolean) + # count_field = Arrow::Field.new("count", :int32) + # Arrow::StructDataType.new([visible_field, count_field]) + # + # @example Create a struct data type with field descriptions + # field_descriptions = [ + # {name: "visible", type: :boolean}, + # {name: "count", type: :int32}, + # ] + # Arrow::StructDataType.new(field_descriptions) + # + # @example Create a struct data type with {Arrow::Field} and field description + # fields = [ + # Arrow::Field.new("visible", :boolean), + # {name: "count", type: :int32}, + # ] + # Arrow::StructDataType.new(fields) + # + # @overload initialize(fields) + # + # @param fields [Hash{String, Symbol => Arrow::DataType, Hash}] + # The pairs of field name and field data type of the struct + # data type. You can also specify data type description by + # `Hash`. You can mix {Arrow::DataType} and data type description. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @example Create a struct data type with {Arrow::DataType}s + # fields = { + # "visible" => Arrow::BooleanDataType.new, + # "count" => Arrow::Int32DataType.new, + # } + # Arrow::StructDataType.new(fields) + # + # @example Create a struct data type with data type descriptions + # fields = { + # "visible" => :boolean, + # "count" => {type: :int32}, + # } + # Arrow::StructDataType.new(fields) + # + # @example Create a struct data type with {Arrow::DataType} and data type description + # fields = { + # "visible" => Arrow::BooleanDataType.new, + # "count" => {type: :int32}, + # } + # Arrow::StructDataType.new(fields) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the struct data + # type. It must have `:fields` value. + # + # @option description + # [::Array<Arrow::Field, Hash>, + # Hash{String, Symbol => Arrow::DataType, Hash, String, Symbol}] + # :fields The fields of the struct data type. + # + # @example Create a struct data type with {Arrow::Field} and field description + # fields = [ + # Arrow::Field.new("visible", :boolean), + # {name: "count", type: :int32}, + # ] + # Arrow::StructDataType.new(fields: fields) + # + # @example Create a struct data type with {Arrow::DataType} and data type description + # fields = { + # "visible" => Arrow::BooleanDataType.new, + # "count" => {type: :int32}, + # } + # Arrow::StructDataType.new(fields: fields) + def initialize(fields) + if fields.is_a?(Hash) and fields.key?(:fields) + description = fields + fields = description[:fields] + end + if fields.is_a?(Hash) + fields = fields.collect do |name, data_type| + Field.new(name, data_type) + end + else + fields = fields.collect do |field| + field = Field.new(field) unless field.is_a?(Field) + field + end + end + initialize_raw(fields) + end + + alias_method :[], :find_field + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/symbol-values-appendable.rb b/src/arrow/ruby/red-arrow/lib/arrow/symbol-values-appendable.rb new file mode 100644 index 000000000..66ab0a490 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/symbol-values-appendable.rb @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + module SymbolValuesAppendable + def append_values(values, is_valids=nil) + builder = create_values_array_builder + values = values.collect do |value| + case value + when Symbol + value.to_s + else + value + end + end + builder.append_values(values, is_valids) + append_array(builder.finish) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/table-concatenate-options.rb b/src/arrow/ruby/red-arrow/lib/arrow/table-concatenate-options.rb new file mode 100644 index 000000000..730bce1c8 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/table-concatenate-options.rb @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class TableConcatenateOptions + class << self + # @api private + def try_convert(value) + case value + when Hash + options = new + value.each do |k, v| + options.public_send("#{k}=", value) + end + options + else + nil + end + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/table-formatter.rb b/src/arrow/ruby/red-arrow/lib/arrow/table-formatter.rb new file mode 100644 index 000000000..d039679f9 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/table-formatter.rb @@ -0,0 +1,190 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + # TODO: Almost codes should be implemented in Apache Arrow C++. + class TableFormatter + # @private + class ColumnFormatter + attr_reader :column + attr_reader :head_values + attr_reader :tail_values + attr_reader :sample_values + def initialize(column, head_values, tail_values) + @column = column + @head_values = head_values + @tail_values = tail_values + @sample_values = head_values + tail_values + @field_value_widths = {} + end + + def data_type + @data_type ||= @column.data_type + end + + def name + @name ||= @column.name + end + + def aligned_name + @aligned_name ||= format_aligned_name(name, data_type, @sample_values) + end + + FLOAT_N_DIGITS = 10 + FORMATTED_NULL = "(null)" + + def format_value(value, width=0) + case value + when ::Time + value.iso8601 + when Float + "%*f" % [[width, FLOAT_N_DIGITS].max, value] + when Integer + "%*d" % [width, value] + when Hash + formatted_values = data_type.fields.collect do |field| + field_name = field.name + field_value_width = compute_field_value_width(field, @sample_values) + formatted_name = format_value(field_name, 0) + formatted_value = format_value(value[field_name], field_value_width) + "#{formatted_name}: #{formatted_value}" + end + formatted = "{" + formatted << formatted_values.join(", ") + formatted << "}" + "%-*s" % [width, formatted] + when nil + "%*s" % [width, FORMATTED_NULL] + else + "%-*s" % [width, value.to_s] + end + end + + private + def compute_field_value_width(field, sample_values) + unless @field_value_widths.key?(field) + field_name = field.name + field_sample_values = sample_values.collect do |v| + (v || {})[field_name] + end + field_aligned_name = format_aligned_name("", + field.data_type, + field_sample_values) + @field_value_widths[field] = field_aligned_name.size + end + @field_value_widths[field] + end + + def format_aligned_name(name, data_type, sample_values) + case data_type + when TimestampDataType + "%*s" % [::Time.now.iso8601.size, name] + when IntegerDataType + have_null = false + have_negative = false + max_value = nil + sample_values.each do |value| + if value.nil? + have_null = true + else + if max_value.nil? + max_value = value.abs + else + max_value = [value.abs, max_value].max + end + have_negative = true if value.negative? + end + end + if max_value.nil? + width = 0 + elsif max_value.zero? + width = 1 + else + width = (Math.log10(max_value) + 1).truncate + end + width += 1 if have_negative # Need "-" + width = [width, FORMATTED_NULL.size].max if have_null + "%*s" % [width, name] + when FloatDataType, DoubleDataType + "%*s" % [FLOAT_N_DIGITS, name] + when StructDataType + field_widths = data_type.fields.collect do |field| + field_value_width = compute_field_value_width(field, sample_values) + field.name.size + ": ".size + field_value_width + end + width = "{}".size + field_widths.sum + if field_widths.size > 0 + width += (", ".size * (field_widths.size - 1)) + end + "%*s" % [width, name] + else + name + end + end + end + + def initialize(table, options={}) + @table = table + @options = options + end + + def format + text = "" + n_rows = @table.n_rows + border = @options[:border] || 10 + + head_limit = [border, n_rows].min + + tail_start = [border, n_rows - border].max + tail_limit = n_rows - tail_start + + column_formatters = @table.columns.collect do |column| + head_values = column.each.take(head_limit) + if tail_limit > 0 + tail_values = column.reverse_each.take(tail_limit).reverse + else + tail_values = [] + end + ColumnFormatter.new(column, head_values, tail_values) + end + + format_header(text, column_formatters) + return text if n_rows.zero? + + n_digits = (Math.log10(n_rows) + 1).truncate + format_rows(text, + column_formatters, + column_formatters.collect(&:head_values).transpose, + n_digits, + 0) + return text if n_rows <= border + + + if head_limit != tail_start + format_ellipsis(text) + end + + format_rows(text, + column_formatters, + column_formatters.collect(&:tail_values).transpose, + n_digits, + tail_start) + + text + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/table-list-formatter.rb b/src/arrow/ruby/red-arrow/lib/arrow/table-list-formatter.rb new file mode 100644 index 000000000..4fe293416 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/table-list-formatter.rb @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + # TODO: Almost codes should be implemented in Apache Arrow C++. + class TableListFormatter < TableFormatter + private + def format_header(text, columns) + end + + def format_rows(text, column_formatters, rows, n_digits, start_offset) + rows.each_with_index do |row, nth_row| + text << ("=" * 20 + " #{start_offset + nth_row} " + "=" * 20 + "\n") + row.each_with_index do |column_value, nth_column| + column_formatter = column_formatters[nth_column] + formatted_name = column_formatter.name + formatted_value = column_formatter.format_value(column_value) + text << "#{formatted_name}: #{formatted_value}\n" + end + end + end + + def format_ellipsis(text) + text << "...\n" + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/table-loader.rb b/src/arrow/ruby/red-arrow/lib/arrow/table-loader.rb new file mode 100644 index 000000000..8f43b69df --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/table-loader.rb @@ -0,0 +1,225 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "uri" + +module Arrow + class TableLoader + class << self + def load(input, options={}) + new(input, options).load + end + end + + def initialize(input, options={}) + input = input.to_path if input.respond_to?(:to_path) + @input = input + @options = options + fill_options + end + + def load + if @input.is_a?(URI) + custom_load_method = "load_from_uri" + elsif @input.is_a?(String) and ::File.directory?(@input) + custom_load_method = "load_from_directory" + else + custom_load_method = "load_from_file" + end + unless respond_to?(custom_load_method, true) + available_schemes = [] + (methods(true) | private_methods(true)).each do |name| + match_data = /\Aload_from_/.match(name.to_s) + if match_data + available_schemes << match_data.post_match + end + end + message = "Arrow::Table load source must be one of [" + message << available_schemes.join(", ") + message << "]: #{@input.inspect}" + raise ArgumentError, message + end + __send__(custom_load_method) + end + + private + def load_from_file + format = @options[:format] + custom_load_method = "load_as_#{format}" + unless respond_to?(custom_load_method, true) + available_formats = [] + (methods(true) | private_methods(true)).each do |name| + match_data = /\Aload_as_/.match(name.to_s) + if match_data + available_formats << match_data.post_match + end + end + deprecated_formats = ["batch", "stream"] + available_formats -= deprecated_formats + message = "Arrow::Table load format must be one of [" + message << available_formats.join(", ") + message << "]: #{format.inspect}" + raise ArgumentError, message + end + if method(custom_load_method).arity.zero? + __send__(custom_load_method) + else + # For backward compatibility. + __send__(custom_load_method, @input) + end + end + + def fill_options + if @options[:format] and @options.key?(:compression) + return + end + + case @input + when Buffer + info = {} + when URI + extension = PathExtension.new(@input.path) + info = extension.extract + else + extension = PathExtension.new(@input) + info = extension.extract + end + format = info[:format] + @options = @options.dup + if format + @options[:format] ||= format.to_sym + else + @options[:format] ||= :arrow + end + unless @options.key?(:compression) + @options[:compression] = info[:compression] + end + end + + def open_input_stream + if @input.is_a?(Buffer) + BufferInputStream.new(@input) + else + MemoryMappedInputStream.new(@input) + end + end + + def load_raw(input, reader) + schema = reader.schema + record_batches = [] + reader.each do |record_batch| + record_batches << record_batch + end + table = Table.new(schema, record_batches) + table.instance_variable_set(:@input, input) + table + end + + def load_as_arrow + input = nil + reader = nil + error = nil + reader_class_candidates = [ + RecordBatchFileReader, + RecordBatchStreamReader, + ] + reader_class_candidates.each do |reader_class_candidate| + input = open_input_stream + begin + reader = reader_class_candidate.new(input) + rescue Arrow::Error + error = $! + else + break + end + end + raise error if reader.nil? + load_raw(input, reader) + end + + # @since 1.0.0 + def load_as_arrow_file + input = open_input_stream + reader = RecordBatchFileReader.new(input) + load_raw(input, reader) + end + + # @deprecated Use `format: :arrow_file` instead. + def load_as_batch + load_as_arrow_file + end + + # @since 1.0.0 + def load_as_arrow_streaming + input = open_input_stream + reader = RecordBatchStreamReader.new(input) + load_raw(input, reader) + end + + # @deprecated Use `format: :arrow_streaming` instead. + def load_as_stream + load_as_arrow_streaming + end + + if Arrow.const_defined?(:ORCFileReader) + def load_as_orc + input = open_input_stream + reader = ORCFileReader.new(input) + field_indexes = @options[:field_indexes] + reader.set_field_indexes(field_indexes) if field_indexes + table = reader.read_stripes + table.instance_variable_set(:@input, input) + table + end + end + + def csv_load(options) + options.delete(:format) + if @input.is_a?(Buffer) + CSVLoader.load(@input.data.to_s, **options) + else + CSVLoader.load(Pathname.new(@input), **options) + end + end + + def load_as_csv + csv_load(@options.dup) + end + + def load_as_tsv + options = @options.dup + options[:delimiter] = "\t" + csv_load(options.dup) + end + + def load_as_feather + input = open_input_stream + reader = FeatherFileReader.new(input) + table = reader.read + table.instance_variable_set(:@input, input) + table + end + + def load_as_json + input = open_input_stream + reader = JSONReader.new(input) + table = reader.read + table.instance_variable_set(:@input, input) + table + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/table-saver.rb b/src/arrow/ruby/red-arrow/lib/arrow/table-saver.rb new file mode 100644 index 000000000..207a10a82 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/table-saver.rb @@ -0,0 +1,195 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class TableSaver + class << self + def save(table, output, options={}) + new(table, output, options).save + end + end + + def initialize(table, output, options={}) + @table = table + output = output.to_path if output.respond_to?(:to_path) + @output = output + @options = options + fill_options + end + + def save + if @output.is_a?(URI) + custom_save_method = "save_to_uri" + else + custom_save_method = "save_to_file" + end + unless respond_to?(custom_save_method, true) + available_schemes = [] + (methods(true) | private_methods(true)).each do |name| + match_data = /\Asave_to_/.match(name.to_s) + if match_data + available_schemes << match_data.post_match + end + end + message = "Arrow::Table save source must be one of [" + message << available_schemes.join(", ") + message << "]: #{@output.scheme.inspect}" + raise ArgumentError, message + end + __send__(custom_save_method) + end + + private + def save_to_file + format = @options[:format] + custom_save_method = "save_as_#{format}" + unless respond_to?(custom_save_method, true) + available_formats = [] + (methods(true) | private_methods(true)).each do |name| + match_data = /\Asave_as_/.match(name.to_s) + if match_data + available_formats << match_data.post_match + end + end + deprecated_formats = ["batch", "stream"] + available_formats -= deprecated_formats + message = "Arrow::Table save format must be one of [" + message << available_formats.join(", ") + message << "]: #{format.inspect}" + raise ArgumentError, message + end + if method(custom_save_method).arity.zero? + __send__(custom_save_method) + else + # For backward compatibility. + __send__(custom_save_method, @output) + end + end + + def fill_options + if @options[:format] and @options.key?(:compression) + return + end + + case @output + when Buffer + info = {} + when URI + extension = PathExtension.new(@output.path) + info = extension.extract + else + extension = PathExtension.new(@output) + info = extension.extract + end + format = info[:format] + @options = @options.dup + if format + @options[:format] ||= format.to_sym + else + @options[:format] ||= :arrow + end + unless @options.key?(:compression) + @options[:compression] = info[:compression] + end + end + + def open_raw_output_stream(&block) + if @output.is_a?(Buffer) + BufferOutputStream.open(@output, &block) + else + FileOutputStream.open(@output, false, &block) + end + end + + def open_output_stream(&block) + compression = @options[:compression] + if compression + codec = Codec.new(compression) + open_raw_output_stream do |raw_output| + CompressedOutputStream.open(codec, raw_output) do |output| + yield(output) + end + end + else + open_raw_output_stream(&block) + end + end + + def save_raw(writer_class) + open_output_stream do |output| + writer_class.open(output, @table.schema) do |writer| + writer.write_table(@table) + end + end + end + + def save_as_arrow + save_as_arrow_file + end + + # @since 1.0.0 + def save_as_arrow_file + save_raw(RecordBatchFileWriter) + end + + # @deprecated Use `format: :arrow_batch` instead. + def save_as_batch + save_as_arrow_file + end + + # @since 1.0.0 + def save_as_arrow_streaming + save_raw(RecordBatchStreamWriter) + end + + # @deprecated Use `format: :arrow_streaming` instead. + def save_as_stream + save_as_arrow_streaming + end + + def csv_save(**options) + open_output_stream do |output| + csv = CSV.new(output, **options) + names = @table.schema.fields.collect(&:name) + csv << names + @table.raw_records.each do |record| + csv << record + end + end + end + + def save_as_csv + csv_save + end + + def save_as_tsv + csv_save(col_sep: "\t") + end + + def save_as_feather + properties = FeatherWriteProperties.new + properties.class.properties.each do |name| + value = @options[name.to_sym] + next if value.nil? + properties.__send__("#{name}=", value) + end + open_raw_output_stream do |output| + @table.write_as_feather(output, properties) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/table-table-formatter.rb b/src/arrow/ruby/red-arrow/lib/arrow/table-table-formatter.rb new file mode 100644 index 000000000..36121e1b6 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/table-table-formatter.rb @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "time" + +module Arrow + # TODO: Almost codes should be implemented in Apache Arrow C++. + class TableTableFormatter < TableFormatter + private + def format_header(text, column_formatters) + column_formatters.each do |column_formatter| + text << "\t" + text << column_formatter.aligned_name + end + text << "\n" + end + + def format_rows(text, column_formatters, rows, n_digits, start_offset) + rows.each_with_index do |row, nth_row| + text << ("%*d" % [n_digits, start_offset + nth_row]) + row.each_with_index do |column_value, nth_column| + text << "\t" + column_formatter = column_formatters[nth_column] + aligned_name = column_formatter.aligned_name + text << column_formatter.format_value(column_value, aligned_name.size) + end + text << "\n" + end + end + + def format_ellipsis(text) + text << "...\n" + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/table.rb b/src/arrow/ruby/red-arrow/lib/arrow/table.rb new file mode 100644 index 000000000..e8aa39bac --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/table.rb @@ -0,0 +1,519 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "arrow/raw-table-converter" + +module Arrow + class Table + include ColumnContainable + include GenericFilterable + include GenericTakeable + include RecordContainable + + class << self + def load(path, options={}) + TableLoader.load(path, options) + end + end + + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::Table}. + # + # @overload initialize(columns) + # + # @param columns [::Array<Arrow::Column>] The columns of the table. + # + # @example Create a table from columns + # count_field = Arrow::Field.new("count", :uint32) + # count_array = Arrow::UInt32Array.new([0, 2, nil, 4]) + # count_column = Arrow::Column.new(count_field, count_array) + # visible_field = Arrow::Field.new("visible", :boolean) + # visible_array = Arrow::BooleanArray.new([true, nil, nil, false]) + # visible_column = Arrow::Column.new(visible_field, visible_array) + # Arrow::Table.new([count_column, visible_column]) + # + # @overload initialize(raw_table) + # + # @param raw_table [Hash<String, Arrow::Array>] + # The pairs of column name and values of the table. Column values is + # `Arrow::Array`. + # + # @example Create a table from column name and values + # Arrow::Table.new("count" => Arrow::UInt32Array.new([0, 2, nil, 4]), + # "visible" => Arrow::BooleanArray.new([true, nil, nil, false])) + # + # @overload initialize(raw_table) + # + # @param raw_table [Hash<String, Arrow::ChunkedArray>] + # The pairs of column name and values of the table. Column values is + # `Arrow::ChunkedArray`. + # + # @example Create a table from column name and values + # count_chunks = [ + # Arrow::UInt32Array.new([0, 2]), + # Arrow::UInt32Array.new([nil, 4]), + # ] + # visible_chunks = [ + # Arrow::BooleanArray.new([true]), + # Arrow::BooleanArray.new([nil, nil, false]), + # ] + # Arrow::Table.new("count" => Arrow::ChunkedArray.new(count_chunks), + # "visible" => Arrow::ChunkedArray.new(visible_chunks)) + # + # @overload initialize(raw_table) + # + # @param raw_table [Hash<String, ::Array>] + # The pairs of column name and values of the table. Column values is + # `Array`. + # + # @example Create a table from column name and values + # Arrow::Table.new("count" => [0, 2, nil, 4], + # "visible" => [true, nil, nil, false]) + # + # @overload initialize(schema, columns) + # + # @param schema [Arrow::Schema] The schema of the table. + # You can also specify schema as primitive Ruby objects. + # See {Arrow::Schema#initialize} for details. + # + # @param columns [::Array<Arrow::Column>] The data of the table. + # + # @example Create a table from schema and columns + # count_field = Arrow::Field.new("count", :uint32) + # count_array = Arrow::UInt32Array.new([0, 2, nil, 4]) + # count_column = Arrow::Column.new(count_field, count_array) + # visible_field = Arrow::Field.new("visible", :boolean) + # visible_array = Arrow::BooleanArray.new([true, nil, nil, false]) + # visible_column = Arrow::Column.new(visible_field, visible_array) + # Arrow::Table.new(Arrow::Schema.new([count_field, visible_field]), + # [count_column, visible_column]) + # + # @overload initialize(schema, arrays) + # + # @param schema [Arrow::Schema] The schema of the table. + # You can also specify schema as primitive Ruby objects. + # See {Arrow::Schema#initialize} for details. + # + # @param arrays [::Array<Arrow::Array>] The data of the table. + # + # @example Create a table from schema and arrays + # count_field = Arrow::Field.new("count", :uint32) + # count_array = Arrow::UInt32Array.new([0, 2, nil, 4]) + # visible_field = Arrow::Field.new("visible", :boolean) + # visible_array = Arrow::BooleanArray.new([true, nil, nil, false]) + # Arrow::Table.new(Arrow::Schema.new([count_field, visible_field]), + # [count_array, visible_array]) + # + # @overload initialize(schema, record_batches) + # + # @param schema [Arrow::Schema] The schema of the table. + # You can also specify schema as primitive Ruby objects. + # See {Arrow::Schema#initialize} for details. + # + # @param arrays [::Array<Arrow::RecordBatch>] The data of the table. + # + # @example Create a table from schema and record batches + # count_field = Arrow::Field.new("count", :uint32) + # visible_field = Arrow::Field.new("visible", :boolean) + # schema = Arrow::Schema.new([count_field, visible_field]) + # record_batches = [ + # Arrow::RecordBatch.new(schema, [[0, true], [2, nil], [nil, nil]]), + # Arrow::RecordBatch.new(schema, [[4, false]]), + # ] + # Arrow::Table.new(schema, record_batches) + # + # @overload initialize(schema, raw_records) + # + # @param schema [Arrow::Schema] The schema of the table. + # You can also specify schema as primitive Ruby objects. + # See {Arrow::Schema#initialize} for details. + # + # @param arrays [::Array<::Array>] The data of the table as primitive + # Ruby objects. + # + # @example Create a table from schema and raw records + # schema = { + # count: :uint32, + # visible: :boolean, + # } + # raw_records = [ + # [0, true], + # [2, nil], + # [nil, nil], + # [4, false], + # ] + # Arrow::Table.new(schema, raw_records) + def initialize(*args) + n_args = args.size + case n_args + when 1 + raw_table_converter = RawTableConverter.new(args[0]) + schema = raw_table_converter.schema + values = raw_table_converter.values + when 2 + schema = args[0] + schema = Schema.new(schema) unless schema.is_a?(Schema) + values = args[1] + case values[0] + when ::Array + values = [RecordBatch.new(schema, values)] + when Column + values = values.collect(&:data) + end + else + message = "wrong number of arguments (given #{n_args}, expected 1..2)" + raise ArgumentError, message + end + initialize_raw(schema, values) + end + + def each_record_batch + return to_enum(__method__) unless block_given? + + reader = TableBatchReader.new(self) + while record_batch = reader.read_next + yield(record_batch) + end + end + + alias_method :size, :n_rows + alias_method :length, :n_rows + + alias_method :slice_raw, :slice + + # @overload slice(offset, length) + # + # @param offset [Integer] The offset of sub Arrow::Table. + # @param length [Integer] The length of sub Arrow::Table. + # @return [Arrow::Table] + # The sub `Arrow::Table` that covers only from + # `offset` to `offset + length` range. + # + # @overload slice(index) + # + # @param index [Integer] The index in this table. + # @return [Arrow::Record] + # The `Arrow::Record` corresponding to index of + # the table. + # + # @overload slice(booleans) + # + # @param booleans [::Array<Boolean>] + # The values indicating the target rows. + # @return [Arrow::Table] + # The sub `Arrow::Table` that covers only rows of indices + # the values of `booleans` is true. + # + # @overload slice(boolean_array) + # + # @param boolean_array [::Array<Arrow::BooleanArray>] + # The values indicating the target rows. + # @return [Arrow::Table] + # The sub `Arrow::Table` that covers only rows of indices + # the values of `boolean_array` is true. + # + # @overload slice(range) + # + # @param range_included_end [Range] The range indicating the target rows. + # @return [Arrow::Table] + # The sub `Arrow::Table` that covers only rows of the range of indices. + # + # @overload slice(conditions) + # + # @param conditions [Hash] The conditions to select records. + # @return [Arrow::Table] + # The sub `Arrow::Table` that covers only rows matched by condition + # + # @overload slice + # + # @yield [slicer] Gives slicer that constructs condition to select records. + # @yieldparam slicer [Arrow::Slicer] The slicer that helps us to + # build condition. + # @yieldreturn [Arrow::Slicer::Condition, ::Array<Arrow::Slicer::Condition>] + # The condition to select records. + # @return [Arrow::Table] + # The sub `Arrow::Table` that covers only rows matched by condition + # specified by slicer. + def slice(*args) + slicers = [] + if block_given? + unless args.empty? + raise ArgumentError, "must not specify both arguments and block" + end + block_slicer = yield(Slicer.new(self)) + case block_slicer + when ::Array + slicers.concat(block_slicer) + else + slicers << block_slicer + end + else + expected_n_args = nil + case args.size + when 1 + case args[0] + when Integer + index = args[0] + index += n_rows if index < 0 + return nil if index < 0 + return nil if index >= n_rows + return Record.new(self, index) + when Hash + condition_pairs = args[0] + slicer = Slicer.new(self) + conditions = [] + condition_pairs.each do |key, value| + case value + when Range + # TODO: Optimize "begin <= key <= end" case by missing "between" kernel + # https://issues.apache.org/jira/browse/ARROW-9843 + unless value.begin.nil? + conditions << (slicer[key] >= value.begin) + end + unless value.end.nil? + if value.exclude_end? + conditions << (slicer[key] < value.end) + else + conditions << (slicer[key] <= value.end) + end + end + else + conditions << (slicer[key] == value) + end + end + slicers << conditions.inject(:&) + else + slicers << args[0] + end + when 2 + offset, length = args + slicers << (offset...(offset + length)) + else + expected_n_args = "1..2" + end + if expected_n_args + message = "wrong number of arguments " + + "(given #{args.size}, expected #{expected_n_args})" + raise ArgumentError, message + end + end + + filter_options = Arrow::FilterOptions.new + filter_options.null_selection_behavior = :emit_null + sliced_tables = [] + slicers.each do |slicer| + slicer = slicer.evaluate if slicer.respond_to?(:evaluate) + case slicer + when Integer + slicer += n_rows if slicer < 0 + sliced_tables << slice_by_range(slicer, n_rows - 1) + when Range + original_from = from = slicer.first + to = slicer.last + to -= 1 if slicer.exclude_end? + from += n_rows if from < 0 + if from < 0 or from >= n_rows + message = + "offset is out of range (-#{n_rows + 1},#{n_rows}): " + + "#{original_from}" + raise ArgumentError, message + end + to += n_rows if to < 0 + sliced_tables << slice_by_range(from, to) + when ::Array, BooleanArray, ChunkedArray + sliced_tables << filter(slicer, filter_options) + else + message = "slicer must be Integer, Range, (from, to), " + + "Arrow::ChunkedArray of Arrow::BooleanArray, " + + "Arrow::BooleanArray or Arrow::Slicer::Condition: #{slicer.inspect}" + raise ArgumentError, message + end + end + if sliced_tables.size > 1 + sliced_tables[0].concatenate(sliced_tables[1..-1]) + else + sliced_tables[0] + end + end + + # TODO + # + # @return [Arrow::Table] + def merge(other) + added_columns = {} + removed_columns = {} + + case other + when Hash + other.each do |name, value| + name = name.to_s + if value + added_columns[name] = ensure_raw_column(name, value) + else + removed_columns[name] = true + end + end + when Table + added_columns = {} + other.columns.each do |column| + name = column.name + added_columns[name] = ensure_raw_column(name, column) + end + else + message = "merge target must be Hash or Arrow::Table: " + + "<#{other.inspect}>: #{inspect}" + raise ArgumentError, message + end + + new_columns = [] + columns.each do |column| + column_name = column.name + new_column = added_columns.delete(column_name) + if new_column + new_columns << new_column + next + end + next if removed_columns.key?(column_name) + new_columns << ensure_raw_column(column_name, column) + end + added_columns.each do |name, new_column| + new_columns << new_column + end + new_fields = [] + new_arrays = [] + new_columns.each do |new_column| + new_fields << new_column[:field] + new_arrays << new_column[:data] + end + self.class.new(new_fields, new_arrays) + end + + alias_method :remove_column_raw, :remove_column + def remove_column(name_or_index) + case name_or_index + when String, Symbol + name = name_or_index.to_s + index = columns.index {|column| column.name == name} + if index.nil? + message = "unknown column: #{name_or_index.inspect}: #{inspect}" + raise KeyError.new(message) + end + else + index = name_or_index + index += n_columns if index < 0 + if index < 0 or index >= n_columns + message = "out of index (0..#{n_columns - 1}): " + + "#{name_or_index.inspect}: #{inspect}" + raise IndexError.new(message) + end + end + remove_column_raw(index) + end + + # Experimental + def group(*keys) + Group.new(self, keys) + end + + # Experimental + def window(size: nil) + RollingWindow.new(self, size) + end + + def save(output, options={}) + saver = TableSaver.new(self, output, options) + saver.save + end + + def pack + packed_arrays = columns.collect do |column| + column.data.pack + end + self.class.new(schema, packed_arrays) + end + + alias_method :to_s_raw, :to_s + def to_s(options={}) + format = options[:format] + case format + when :column + return to_s_raw + when :list + formatter_class = TableListFormatter + when :table, nil + formatter_class = TableTableFormatter + else + message = ":format must be :column, :list, :table or nil" + raise ArgumentError, "#{message}: <#{format.inspect}>" + end + formatter = formatter_class.new(self, options) + formatter.format + end + + alias_method :inspect_raw, :inspect + def inspect + "#{super}\n#{to_s}" + end + + def respond_to_missing?(name, include_private) + return true if find_column(name) + super + end + + def method_missing(name, *args, &block) + if args.empty? + column = find_column(name) + return column if column + end + super + end + + private + def slice_by_range(from, to) + slice_raw(from, to - from + 1) + end + + def ensure_raw_column(name, data) + case data + when Array + { + field: Field.new(name, data.value_data_type), + data: ChunkedArray.new([data]), + } + when ChunkedArray + { + field: Field.new(name, data.value_data_type), + data: data, + } + when Column + column = data + data = column.data + data = ChunkedArray.new([data]) unless data.is_a?(ChunkedArray) + { + field: column.field, + data: data, + } + else + message = "column must be Arrow::Array or Arrow::Column: " + + "<#{name}>: <#{data.inspect}>: #{inspect}" + raise ArgumentError, message + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/tensor.rb b/src/arrow/ruby/red-arrow/lib/arrow/tensor.rb new file mode 100644 index 000000000..fdcc6c1ae --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/tensor.rb @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Tensor + def to_arrow + self + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/time.rb b/src/arrow/ruby/red-arrow/lib/arrow/time.rb new file mode 100644 index 000000000..3d25a6403 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/time.rb @@ -0,0 +1,159 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Time + attr_reader :unit + attr_reader :value + def initialize(unit, value) + @unit = unit + @value = value + @unconstructed = false + end + + def ==(other) + other.is_a?(self.class) and + positive? == other.positive? and + hour == other.hour and + minute == other.minute and + second == other.second and + nano_second == other.nano_second + end + + def cast(target_unit) + return self.class.new(@unit, @value) if @unit == target_unit + + target_value = (hour * 60 * 60) + (minute * 60) + second + case target_unit + when TimeUnit::MILLI + target_value *= 1000 + target_value += nano_second / 1000 / 1000 + when TimeUnit::MICRO + target_value *= 1000 * 1000 + target_value += nano_second / 1000 + when TimeUnit::NANO + target_value *= 1000 * 1000 * 1000 + target_value += nano_second + end + target_value = -target_value if negative? + self.class.new(target_unit, target_value) + end + + def to_f + case @unit + when TimeUnit::SECOND + @value.to_f + when TimeUnit::MILLI + @value.to_f / 1000.0 + when TimeUnit::MICRO + @value.to_f / 1000.0 / 1000.0 + when TimeUnit::NANO + @value.to_f / 1000.0 / 1000.0 / 1000.0 + end + end + + def positive? + @value.positive? + end + + def negative? + @value.negative? + end + + def hour + unconstruct + @hour + end + + def minute + unconstruct + @minute + end + alias_method :min, :minute + + def second + unconstruct + @second + end + alias_method :sec, :second + + def nano_second + unconstruct + @nano_second + end + alias_method :nsec, :nano_second + + def to_s + unconstruct + if @nano_second.zero? + nano_second_string = "" + else + nano_second_string = (".%09d" % @nano_second).gsub(/0+\z/, "") + end + "%s%02d:%02d:%02d%s" % [ + @value.negative? ? "-" : "", + @hour, + @minute, + @second, + nano_second_string, + ] + end + + private + def unconstruct + return if @unconstructed + abs_value = @value.abs + case unit + when TimeUnit::SECOND + unconstruct_second(abs_value) + @nano_second = 0 + when TimeUnit::MILLI + unconstruct_second(abs_value / 1000) + @nano_second = (abs_value % 1000) * 1000 * 1000 + when TimeUnit::MICRO + unconstruct_second(abs_value / 1000 / 1000) + @nano_second = (abs_value % (1000 * 1000)) * 1000 + when TimeUnit::NANO + unconstruct_second(abs_value / 1000 / 1000 / 1000) + @nano_second = abs_value % (1000 * 1000 * 1000) + else + raise ArgumentError, "invalid unit: #{@unit.inspect}" + end + @unconstructed = true + end + + def unconstruct_second(abs_value_in_second) + if abs_value_in_second < 60 + hour = 0 + minute = 0 + second = abs_value_in_second + elsif abs_value_in_second < (60 * 60) + hour = 0 + minute = abs_value_in_second / 60 + second = abs_value_in_second % 60 + else + in_minute = abs_value_in_second / 60 + hour = in_minute / 60 + minute = in_minute % 60 + second = abs_value_in_second % 60 + end + @hour = hour + @minute = minute + @second = second + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/time32-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/time32-array-builder.rb new file mode 100644 index 000000000..088f37c4e --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/time32-array-builder.rb @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Time32ArrayBuilder + class << self + def build(unit_or_data_type, values) + builder = new(unit_or_data_type) + builder.build(values) + end + end + + alias_method :initialize_raw, :initialize + def initialize(unit_or_data_type) + case unit_or_data_type + when DataType + data_type = unit_or_data_type + else + unit = unit_or_data_type + data_type = Time32DataType.new(unit) + end + initialize_raw(data_type) + end + + def unit + @unit ||= value_data_type.unit + end + + private + def convert_to_arrow_value(value) + return value unless value.is_a?(Time) + value.cast(unit).value + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/time32-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/time32-array.rb new file mode 100644 index 000000000..e01dd9732 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/time32-array.rb @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Time32Array + def get_value(i) + Time.new(unit, get_raw_value(i)) + end + + def unit + @unit ||= value_data_type.unit + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/time32-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/time32-data-type.rb new file mode 100644 index 000000000..be1d04fa2 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/time32-data-type.rb @@ -0,0 +1,61 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Time32DataType + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::Time32DataType}. + # + # @overload initialize(unit) + # + # @param unit [Arrow::TimeUnit, Symbol] The unit of the + # time32 data type. + # + # The unit must be second or millisecond. + # + # @example Create a time32 data type with Arrow::TimeUnit + # Arrow::Time32DataType.new(Arrow::TimeUnit::MILLI) + # + # @example Create a time32 data type with Symbol + # Arrow::Time32DataType.new(:milli) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the time32 data + # type. It must have `:unit` value. + # + # @option description [Arrow::TimeUnit, Symbol] :unit The unit of + # the time32 data type. + # + # The unit must be second or millisecond. + # + # @example Create a time32 data type with Arrow::TimeUnit + # Arrow::Time32DataType.new(unit: Arrow::TimeUnit::MILLI) + # + # @example Create a time32 data type with Symbol + # Arrow::Time32DataType.new(unit: :milli) + def initialize(unit) + if unit.is_a?(Hash) + description = unit + unit = description[:unit] + end + initialize_raw(unit) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/time64-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/time64-array-builder.rb new file mode 100644 index 000000000..dec15b8bf --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/time64-array-builder.rb @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Time64ArrayBuilder + class << self + def build(unit_or_data_type, values) + builder = new(unit_or_data_type) + builder.build(values) + end + end + + alias_method :initialize_raw, :initialize + def initialize(unit_or_data_type) + case unit_or_data_type + when DataType + data_type = unit_or_data_type + else + unit = unit_or_data_type + data_type = Time64DataType.new(unit) + end + initialize_raw(data_type) + end + + def unit + @unit ||= value_data_type.unit + end + + private + def convert_to_arrow_value(value) + return value unless value.is_a?(Time) + value.cast(unit).value + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/time64-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/time64-array.rb new file mode 100644 index 000000000..7fc2fd9ab --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/time64-array.rb @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Time64Array + def get_value(i) + Time.new(unit, get_raw_value(i)) + end + + def unit + @unit ||= value_data_type.unit + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/time64-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/time64-data-type.rb new file mode 100644 index 000000000..13795aa83 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/time64-data-type.rb @@ -0,0 +1,61 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Time64DataType + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::Time64DataType}. + # + # @overload initialize(unit) + # + # @param unit [Arrow::TimeUnit, Symbol] The unit of the + # time64 data type. + # + # The unit must be microsecond or nanosecond. + # + # @example Create a time64 data type with Arrow::TimeUnit + # Arrow::Time64DataType.new(Arrow::TimeUnit::NANO) + # + # @example Create a time64 data type with Symbol + # Arrow::Time64DataType.new(:nano) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the time64 data + # type. It must have `:unit` value. + # + # @option description [Arrow::TimeUnit, Symbol] :unit The unit of + # the time64 data type. + # + # The unit must be microsecond or nanosecond. + # + # @example Create a time64 data type with Arrow::TimeUnit + # Arrow::Time64DataType.new(unit: Arrow::TimeUnit::NANO) + # + # @example Create a time64 data type with Symbol + # Arrow::Time64DataType.new(unit: :nano) + def initialize(unit) + if unit.is_a?(Hash) + description = unit + unit = description[:unit] + end + initialize_raw(unit) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/timestamp-array-builder.rb b/src/arrow/ruby/red-arrow/lib/arrow/timestamp-array-builder.rb new file mode 100644 index 000000000..68bcb0fec --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/timestamp-array-builder.rb @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class TimestampArrayBuilder + class << self + def build(unit_or_data_type, values) + builder = new(unit_or_data_type) + builder.build(values) + end + end + + alias_method :initialize_raw, :initialize + def initialize(unit_or_data_type) + case unit_or_data_type + when DataType + data_type = unit_or_data_type + else + unit = unit_or_data_type + data_type = TimestampDataType.new(unit) + end + initialize_raw(data_type) + end + + private + def unit_id + @unit_id ||= value_data_type.unit.nick.to_sym + end + + def convert_to_arrow_value(value) + if value.respond_to?(:to_time) and not value.is_a?(Time) + value = value.to_time + end + + if value.is_a?(::Time) + case unit_id + when :second + value.to_i + when :milli + value.to_i * 1_000 + value.usec / 1_000 + when :micro + value.to_i * 1_000_000 + value.usec + else + value.to_i * 1_000_000_000 + value.nsec + end + else + value + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/timestamp-array.rb b/src/arrow/ruby/red-arrow/lib/arrow/timestamp-array.rb new file mode 100644 index 000000000..011273487 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/timestamp-array.rb @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class TimestampArray + def get_value(i) + cast_to_time(get_raw_value(i)) + end + + def unit + @unit ||= value_data_type.unit + end + + private + def cast_to_time(raw_value) + case unit + when TimeUnit::SECOND + ::Time.at(raw_value) + when TimeUnit::MILLI + ::Time.at(*raw_value.divmod(1_000)) + when TimeUnit::MICRO + ::Time.at(*raw_value.divmod(1_000_000)) + else + ::Time.at(raw_value / 1_000_000_000.0) + end + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/timestamp-data-type.rb b/src/arrow/ruby/red-arrow/lib/arrow/timestamp-data-type.rb new file mode 100644 index 000000000..cd91f567d --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/timestamp-data-type.rb @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class TimestampDataType + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::TimestampDataType}. + # + # @overload initialize(unit) + # + # @param unit [Arrow::TimeUnit, Symbol] The unit of the + # timestamp data type. + # + # @example Create a timestamp data type with Arrow::TimeUnit + # Arrow::TimestampDataType.new(Arrow::TimeUnit::MILLI) + # + # @example Create a timestamp data type with Symbol + # Arrow::TimestampDataType.new(:milli) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the timestamp data + # type. It must have `:unit` value. + # + # @option description [Arrow::TimeUnit, Symbol] :unit The unit of + # the timestamp data type. + # + # @example Create a timestamp data type with Arrow::TimeUnit + # Arrow::TimestampDataType.new(unit: Arrow::TimeUnit::MILLI) + # + # @example Create a timestamp data type with Symbol + # Arrow::TimestampDataType.new(unit: :milli) + def initialize(unit) + if unit.is_a?(Hash) + description = unit + unit = description[:unit] + end + initialize_raw(unit) + end + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/version.rb b/src/arrow/ruby/red-arrow/lib/arrow/version.rb new file mode 100644 index 000000000..f830ff895 --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/version.rb @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + VERSION = "6.0.1" + + module Version + numbers, TAG = VERSION.split("-") + MAJOR, MINOR, MICRO = numbers.split(".").collect(&:to_i) + STRING = VERSION + end +end diff --git a/src/arrow/ruby/red-arrow/lib/arrow/writable.rb b/src/arrow/ruby/red-arrow/lib/arrow/writable.rb new file mode 100644 index 000000000..02be9ddfc --- /dev/null +++ b/src/arrow/ruby/red-arrow/lib/arrow/writable.rb @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + module Writable + alias_method :<<, :write + end +end |