diff options
Diffstat (limited to '')
-rw-r--r-- | src/arrow/ruby/red-arrow/test/test-csv-loader.rb | 250 |
1 files changed, 250 insertions, 0 deletions
diff --git a/src/arrow/ruby/red-arrow/test/test-csv-loader.rb b/src/arrow/ruby/red-arrow/test/test-csv-loader.rb new file mode 100644 index 000000000..7f7f23498 --- /dev/null +++ b/src/arrow/ruby/red-arrow/test/test-csv-loader.rb @@ -0,0 +1,250 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class CSVLoaderTest < Test::Unit::TestCase + include Helper::Fixture + + def load_csv(input) + Arrow::CSVLoader.load(input, skip_lines: /^#/) + end + + sub_test_case(".load") do + test("String: data: with header") do + data = fixture_path("with-header-float.csv").read + assert_equal(<<-TABLE, load_csv(data).to_s) + name score +0 alice 10.100000 +1 bob 29.200000 +2 chris -1.300000 + TABLE + end + + test("String: data: without header") do + data = fixture_path("without-header-float.csv").read + assert_equal(<<-TABLE, load_csv(data).to_s) + 0 1 +0 alice 10.100000 +1 bob 29.200000 +2 chris -1.300000 + TABLE + end + + test("String: path: with header") do + path = fixture_path("with-header-float.csv").to_s + assert_equal(<<-TABLE, load_csv(path).to_s) + name score +0 alice 10.100000 +1 bob 29.200000 +2 chris -1.300000 + TABLE + end + + test("String: path: without header") do + path = fixture_path("without-header-float.csv").to_s + assert_equal(<<-TABLE, load_csv(path).to_s) + 0 1 +0 alice 10.100000 +1 bob 29.200000 +2 chris -1.300000 + TABLE + end + + test("Pathname: with header") do + path = fixture_path("with-header-float.csv") + assert_equal(<<-TABLE, load_csv(path).to_s) + name score +0 alice 10.100000 +1 bob 29.200000 +2 chris -1.300000 + TABLE + end + + test("Pathname: without header") do + path = fixture_path("without-header-float.csv") + assert_equal(<<-TABLE, load_csv(path).to_s) + 0 1 +0 alice 10.100000 +1 bob 29.200000 +2 chris -1.300000 + TABLE + end + + test("null: with double quote") do + path = fixture_path("null-with-double-quote.csv").to_s + assert_equal(<<-TABLE, load_csv(path).to_s) + name score +0 alice 10 +1 bob (null) +2 chris -1 + TABLE + end + + test("null: without double quote") do + path = fixture_path("null-without-double-quote.csv").to_s + assert_equal(<<-TABLE, load_csv(path).to_s) + name score +0 alice 10 +1 bob (null) +2 chris -1 + TABLE + end + + test("number: float, integer") do + path = fixture_path("float-integer.csv").to_s + assert_equal([2.9, 10, -1.1], + load_csv(path)[:score].to_a) + end + + test("number: integer, float") do + path = fixture_path("integer-float.csv").to_s + assert_equal([10.0, 2.9, -1.1], + load_csv(path)[:score].to_a) + end + end + + sub_test_case("CSVReader") do + def load_csv(data, **options) + Arrow::CSVLoader.load(data, **options) + end + + sub_test_case(":headers") do + test("true") do + values = Arrow::StringArray.new(["a", "b", "c"]) + assert_equal(Arrow::Table.new(value: values), + load_csv(<<-CSV, headers: true)) +value +a +b +c + CSV + end + + test(":first_line") do + values = Arrow::StringArray.new(["a", "b", "c"]) + assert_equal(Arrow::Table.new(value: values), + load_csv(<<-CSV, headers: :first_line)) +value +a +b +c + CSV + end + + test("truthy") do + values = Arrow::StringArray.new(["a", "b", "c"]) + assert_equal(Arrow::Table.new(value: values), + load_csv(<<-CSV, headers: 0)) +value +a +b +c + CSV + end + + test("Array of column names") do + values = Arrow::StringArray.new(["a", "b", "c"]) + assert_equal(Arrow::Table.new(column: values), + load_csv(<<-CSV, headers: ["column"])) +a +b +c + CSV + end + + test("false") do + values = Arrow::StringArray.new(["a", "b", "c"]) + assert_equal(Arrow::Table.new(f0: values), + load_csv(<<-CSV, headers: false)) +a +b +c + CSV + end + + test("nil") do + values = Arrow::StringArray.new(["a", "b", "c"]) + assert_equal(Arrow::Table.new(f0: values), + load_csv(<<-CSV, headers: nil)) +a +b +c + CSV + end + + test("string") do + values = Arrow::StringArray.new(["a", "b", "c"]) + assert_equal(Arrow::Table.new(column: values), + load_csv(<<-CSV, headers: "column")) +a +b +c + CSV + end + end + + test(":column_types") do + assert_equal(Arrow::Table.new(:count => Arrow::UInt16Array.new([1, 2, 4])), + load_csv(<<-CSV, column_types: {count: :uint16})) +count +1 +2 +4 + CSV + end + + test(":schema") do + table = Arrow::Table.new(:count => Arrow::UInt16Array.new([1, 2, 4])) + assert_equal(table, + load_csv(<<-CSV, schema: table.schema)) +count +1 +2 +4 + CSV + end + + test(":encoding") do + messages = [ + "\u3042", # U+3042 HIRAGANA LETTER A + "\u3044", # U+3044 HIRAGANA LETTER I + "\u3046", # U+3046 HIRAGANA LETTER U + ] + table = Arrow::Table.new(:message => Arrow::StringArray.new(messages)) + encoding = "cp932" + assert_equal(table, + load_csv((["message"] + messages).join("\n").encode(encoding), + schema: table.schema, + encoding: encoding)) + end + + test(":encoding and :compression") do + messages = [ + "\u3042", # U+3042 HIRAGANA LETTER A + "\u3044", # U+3044 HIRAGANA LETTER I + "\u3046", # U+3046 HIRAGANA LETTER U + ] + table = Arrow::Table.new(:message => Arrow::StringArray.new(messages)) + encoding = "cp932" + csv = (["message"] + messages).join("\n").encode(encoding) + assert_equal(table, + load_csv(Zlib::Deflate.deflate(csv), + schema: table.schema, + encoding: encoding, + compression: :gzip)) + end + end +end |