summaryrefslogtreecommitdiffstats
path: root/src/arrow/c_glib/test/test-csv-reader.rb
diff options
context:
space:
mode:
Diffstat (limited to 'src/arrow/c_glib/test/test-csv-reader.rb')
-rw-r--r--src/arrow/c_glib/test/test-csv-reader.rb241
1 files changed, 241 insertions, 0 deletions
diff --git a/src/arrow/c_glib/test/test-csv-reader.rb b/src/arrow/c_glib/test/test-csv-reader.rb
new file mode 100644
index 000000000..018f062ac
--- /dev/null
+++ b/src/arrow/c_glib/test/test-csv-reader.rb
@@ -0,0 +1,241 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestCSVReader < Test::Unit::TestCase
+ include Helper::Buildable
+ include Helper::Omittable
+
+ sub_test_case("#read") do
+ def open_input(csv)
+ buffer = Arrow::Buffer.new(csv)
+ Arrow::BufferInputStream.new(buffer)
+ end
+
+ def test_default
+ table = Arrow::CSVReader.new(open_input(<<-CSV))
+message,count
+"Start",2
+"Shutdown",9
+ CSV
+ columns = {
+ "message" => build_string_array(["Start", "Shutdown"]),
+ "count" => build_int64_array([2, 9]),
+ }
+ assert_equal(build_table(columns),
+ table.read)
+ end
+
+ sub_test_case("options") do
+ def test_add_column_type
+ options = Arrow::CSVReadOptions.new
+ options.add_column_type("count", Arrow::UInt8DataType.new)
+ options.add_column_type("valid", Arrow::BooleanDataType.new)
+ table = Arrow::CSVReader.new(open_input(<<-CSV), options)
+count,valid
+2,1
+9,0
+ CSV
+ columns = {
+ "count" => build_uint8_array([2, 9]),
+ "valid" => build_boolean_array([true, false]),
+ }
+ assert_equal(build_table(columns),
+ table.read)
+ end
+
+ def test_add_schema
+ options = Arrow::CSVReadOptions.new
+ fields = [
+ Arrow::Field.new("count", Arrow::UInt8DataType.new),
+ Arrow::Field.new("valid", Arrow::BooleanDataType.new),
+ ]
+ schema = Arrow::Schema.new(fields)
+ options.add_schema(schema)
+ table = Arrow::CSVReader.new(open_input(<<-CSV), options)
+count,valid
+2,1
+9,0
+ CSV
+ columns = {
+ "count" => build_uint8_array([2, 9]),
+ "valid" => build_boolean_array([true, false]),
+ }
+ assert_equal(build_table(columns),
+ table.read)
+ end
+
+ def test_column_types
+ require_gi_bindings(3, 3, 1)
+ options = Arrow::CSVReadOptions.new
+ options.add_column_type("count", Arrow::UInt8DataType.new)
+ options.add_column_type("valid", Arrow::BooleanDataType.new)
+ assert_equal({
+ "count" => Arrow::UInt8DataType.new,
+ "valid" => Arrow::BooleanDataType.new,
+ },
+ options.column_types)
+ end
+
+ def test_null_values
+ options = Arrow::CSVReadOptions.new
+ null_values = ["2", "5"]
+ options.null_values = null_values
+ assert_equal(null_values, options.null_values)
+
+ table = Arrow::CSVReader.new(open_input(<<-CSV), options)
+message,count
+"Start",2
+"Shutdown",9
+"Restart",5
+ CSV
+ columns = {
+ "message" => build_string_array(["Start", "Shutdown", "Restart"]),
+ "count" => build_int64_array([nil, 9, nil]),
+ }
+ assert_equal(build_table(columns),
+ table.read)
+ end
+
+ def test_add_null_value
+ options = Arrow::CSVReadOptions.new
+ null_values = ["2", "5"]
+ options.null_values = null_values
+ options.add_null_value("9")
+ assert_equal(null_values + ["9"], options.null_values)
+ end
+
+ def test_boolean_values
+ options = Arrow::CSVReadOptions.new
+ true_values = ["Start", "Restart"]
+ options.true_values = true_values
+ assert_equal(true_values, options.true_values)
+
+ false_values = ["Shutdown"]
+ options.false_values = false_values
+ assert_equal(false_values, options.false_values)
+
+ table = Arrow::CSVReader.new(open_input(<<-CSV), options)
+message,count
+"Start",2
+"Shutdown",9
+"Restart",5
+ CSV
+ columns = {
+ "message" => build_boolean_array([true, false, true]),
+ "count" => build_int64_array([2, 9, 5]),
+ }
+ assert_equal(build_table(columns),
+ table.read)
+ end
+
+ def test_add_true_value
+ options = Arrow::CSVReadOptions.new
+ true_values = ["Start", "Restart"]
+ options.true_values = true_values
+ options.add_true_value("Shutdown")
+ assert_equal(true_values + ["Shutdown"], options.true_values)
+ end
+
+ def test_add_false_value
+ options = Arrow::CSVReadOptions.new
+ false_values = ["Start", "Restart"]
+ options.false_values = false_values
+ options.add_false_value("Shutdown")
+ assert_equal(false_values + ["Shutdown"], options.false_values)
+ end
+
+ def test_allow_null_strings
+ options = Arrow::CSVReadOptions.new
+ options.null_values = ["Start", "Restart"]
+ options.allow_null_strings = true
+ table = Arrow::CSVReader.new(open_input(<<-CSV), options)
+message,count
+"Start",2
+"Shutdown",9
+"Restart",5
+ CSV
+ columns = {
+ "message" => build_string_array([nil, "Shutdown", nil]),
+ "count" => build_int64_array([2, 9, 5]),
+ }
+ assert_equal(build_table(columns),
+ table.read)
+ end
+
+ def test_n_skip_rows
+ options = Arrow::CSVReadOptions.new
+ options.n_skip_rows = 1
+ table = Arrow::CSVReader.new(open_input(<<-CSV), options)
+message1,message2
+"Start1","Start2"
+"Shutdown1","Shutdown2"
+"Reboot1","Reboot2"
+ CSV
+ columns = {
+ "Start1" => build_string_array(["Shutdown1", "Reboot1"]),
+ "Start2" => build_string_array(["Shutdown2", "Reboot2"]),
+ }
+ assert_equal(build_table(columns),
+ table.read)
+ end
+
+ def test_column_names
+ options = Arrow::CSVReadOptions.new
+ column_names = ["message", "count"]
+ options.column_names = column_names
+ assert_equal(column_names, options.column_names)
+
+ table = Arrow::CSVReader.new(open_input(<<-CSV), options)
+"Start",2
+"Shutdown",9
+"Reboot",5
+ CSV
+ columns = {
+ "message" => build_string_array(["Start", "Shutdown", "Reboot"]),
+ "count" => build_int64_array([2, 9, 5]),
+ }
+ assert_equal(build_table(columns),
+ table.read)
+ end
+
+ def test_add_column_name
+ options = Arrow::CSVReadOptions.new
+ column_names = ["message", "count"]
+ options.column_names = column_names
+ options.add_column_name("score")
+ assert_equal(column_names + ["score"], options.column_names)
+ end
+
+ def test_generate_column_names
+ options = Arrow::CSVReadOptions.new
+ options.generate_column_names = true
+
+ table = Arrow::CSVReader.new(open_input(<<-CSV), options)
+"Start",2
+"Shutdown",9
+"Reboot",5
+ CSV
+ columns = {
+ "f0" => build_string_array(["Start", "Shutdown", "Reboot"]),
+ "f1" => build_int64_array([2, 9, 5]),
+ }
+ assert_equal(build_table(columns),
+ table.read)
+ end
+ end
+ end
+end