summaryrefslogtreecommitdiffstats
path: root/src/arrow/ruby/red-arrow/benchmark/raw-records/dictionary.yml
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/arrow/ruby/red-arrow/benchmark/raw-records/dictionary.yml75
1 files changed, 75 insertions, 0 deletions
diff --git a/src/arrow/ruby/red-arrow/benchmark/raw-records/dictionary.yml b/src/arrow/ruby/red-arrow/benchmark/raw-records/dictionary.yml
new file mode 100644
index 000000000..151bb412f
--- /dev/null
+++ b/src/arrow/ruby/red-arrow/benchmark/raw-records/dictionary.yml
@@ -0,0 +1,75 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+contexts:
+ - name: master
+ prelude: |
+ $LOAD_PATH.unshift(File.expand_path("ext/arrow"))
+ $LOAD_PATH.unshift(File.expand_path("lib"))
+prelude: |-
+ require "arrow"
+ require "faker"
+
+ state = ENV.fetch("FAKER_RANDOM_SEED", 17).to_i
+ Faker::Config.random = Random.new(state)
+
+ n_rows = 1000
+ n_columns = 10
+ type = Arrow::DictionaryDataType.new(:int8, :string, true)
+
+ fields = n_columns.times.map {|i| ["column_#{i}".to_sym, type] }.to_h
+ schema = Arrow::Schema.new(**fields)
+ dictionary = Arrow::StringArray.new(
+ 100.times.map { Faker::Book.genre }.uniq.sort
+ )
+ indices = Arrow::Int8Array.new(
+ n_rows.times.map {
+ Faker::Number.within(range: 0 ... dictionary.length)
+ }
+ )
+ arrays = n_columns.times.map do
+ Arrow::DictionaryArray.new(
+ type,
+ indices,
+ dictionary,
+ )
+ end
+ record_batch = Arrow::RecordBatch.new(schema, n_rows, arrays)
+
+ def pure_ruby_raw_records(record_batch)
+ n_rows = record_batch.n_rows
+ n_columns = record_batch.n_columns
+ columns = record_batch.columns
+ records = []
+ i = 0
+ while i < n_rows
+ record = []
+ j = 0
+ while j < n_columns
+ record << columns[j].data.indices[i]
+ j += 1
+ end
+ records << record
+ i += 1
+ end
+ records
+ end
+benchmark:
+ pure_ruby: |-
+ pure_ruby_raw_records(record_batch)
+ raw_records: |-
+ record_batch.raw_records