summaryrefslogtreecommitdiffstats
path: root/src/arrow/testing/data/avro
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/testing/data/avro
parentInitial commit. (diff)
downloadceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/arrow/testing/data/avro')
-rw-r--r--src/arrow/testing/data/avro/README.md37
-rw-r--r--src/arrow/testing/data/avro/alltypes_dictionary.avrobin0 -> 765 bytes
-rw-r--r--src/arrow/testing/data/avro/alltypes_plain.avrobin0 -> 868 bytes
-rw-r--r--src/arrow/testing/data/avro/alltypes_plain.snappy.avrobin0 -> 766 bytes
-rw-r--r--src/arrow/testing/data/avro/binary.avrobin0 -> 236 bytes
-rw-r--r--src/arrow/testing/data/avro/datapage_v2.snappy.avrobin0 -> 456 bytes
-rw-r--r--src/arrow/testing/data/avro/dict-page-offset-zero.avrobin0 -> 213 bytes
-rw-r--r--src/arrow/testing/data/avro/fixed_length_decimal.avrobin0 -> 436 bytes
-rw-r--r--src/arrow/testing/data/avro/fixed_length_decimal_legacy.avrobin0 -> 433 bytes
-rw-r--r--src/arrow/testing/data/avro/int32_decimal.avrobin0 -> 392 bytes
-rw-r--r--src/arrow/testing/data/avro/int64_decimal.avrobin0 -> 431 bytes
-rw-r--r--src/arrow/testing/data/avro/list_columns.avrobin0 -> 373 bytes
-rw-r--r--src/arrow/testing/data/avro/nested_lists.snappy.avrobin0 -> 407 bytes
-rw-r--r--src/arrow/testing/data/avro/nonnullable.impala.avrobin0 -> 1570 bytes
-rw-r--r--src/arrow/testing/data/avro/nullable.impala.avrobin0 -> 1812 bytes
-rw-r--r--src/arrow/testing/data/avro/nulls.snappy.avrobin0 -> 330 bytes
-rw-r--r--src/arrow/testing/data/avro/repeated_no_annotation.avrobin0 -> 627 bytes
-rw-r--r--src/arrow/testing/data/avro/single_nan.avrobin0 -> 204 bytes
18 files changed, 37 insertions, 0 deletions
diff --git a/src/arrow/testing/data/avro/README.md b/src/arrow/testing/data/avro/README.md
new file mode 100644
index 000000000..2707e1297
--- /dev/null
+++ b/src/arrow/testing/data/avro/README.md
@@ -0,0 +1,37 @@
+This directory contains AVRO files corresponding to the parquet testing files at https://github.com/apache/parquet-testing/blob/master/data/
+
+These files were created by using spark using the commands from https://gist.github.com/Igosuki/324b011f40185269d3fc552350d21744
+
+Roughly:
+```scala
+import com.github.mrpowers.spark.daria.sql.DariaWriters
+import org.apache.hadoop.fs.FileSystem
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.conf.Configuration
+import org.apache.commons.io.FilenameUtils
+
+val fileGlobs = sc.getConf.get("spark.driver.globs")
+val dest = sc.getConf.get("spark.driver.out")
+
+val fs = FileSystem.get(new Configuration(true));
+val status = fs.globStatus(new Path(fileGlobs))
+for (fileStatus <- status) {
+ val path = fileStatus.getPath().toString()
+ try {
+ val dfin = spark.read.format("parquet").load(path)
+ val fileName = fileStatus.getPath().getName();
+ val fileNameWithOutExt = FilenameUtils.removeExtension(fileName);
+ val destination = s"${dest}/${fileNameWithOutExt}.avro"
+ println(s"Converting $path to avro at $destination")
+ DariaWriters.writeSingleFile(
+ df = dfin,
+ format = "avro",
+ sc = spark.sparkContext,
+ tmpFolder = s"/tmp/dw/${fileName}",
+ filename = destination
+ )
+ } catch {
+ case e: Throwable => println(s"failed to convert $path : ${e.getMessage}")
+ }
+}
+```
diff --git a/src/arrow/testing/data/avro/alltypes_dictionary.avro b/src/arrow/testing/data/avro/alltypes_dictionary.avro
new file mode 100644
index 000000000..1fdd79e8a
--- /dev/null
+++ b/src/arrow/testing/data/avro/alltypes_dictionary.avro
Binary files differ
diff --git a/src/arrow/testing/data/avro/alltypes_plain.avro b/src/arrow/testing/data/avro/alltypes_plain.avro
new file mode 100644
index 000000000..d60c62822
--- /dev/null
+++ b/src/arrow/testing/data/avro/alltypes_plain.avro
Binary files differ
diff --git a/src/arrow/testing/data/avro/alltypes_plain.snappy.avro b/src/arrow/testing/data/avro/alltypes_plain.snappy.avro
new file mode 100644
index 000000000..d818ab554
--- /dev/null
+++ b/src/arrow/testing/data/avro/alltypes_plain.snappy.avro
Binary files differ
diff --git a/src/arrow/testing/data/avro/binary.avro b/src/arrow/testing/data/avro/binary.avro
new file mode 100644
index 000000000..48081f93b
--- /dev/null
+++ b/src/arrow/testing/data/avro/binary.avro
Binary files differ
diff --git a/src/arrow/testing/data/avro/datapage_v2.snappy.avro b/src/arrow/testing/data/avro/datapage_v2.snappy.avro
new file mode 100644
index 000000000..ccd590076
--- /dev/null
+++ b/src/arrow/testing/data/avro/datapage_v2.snappy.avro
Binary files differ
diff --git a/src/arrow/testing/data/avro/dict-page-offset-zero.avro b/src/arrow/testing/data/avro/dict-page-offset-zero.avro
new file mode 100644
index 000000000..388541dfc
--- /dev/null
+++ b/src/arrow/testing/data/avro/dict-page-offset-zero.avro
Binary files differ
diff --git a/src/arrow/testing/data/avro/fixed_length_decimal.avro b/src/arrow/testing/data/avro/fixed_length_decimal.avro
new file mode 100644
index 000000000..1c9f195ab
--- /dev/null
+++ b/src/arrow/testing/data/avro/fixed_length_decimal.avro
Binary files differ
diff --git a/src/arrow/testing/data/avro/fixed_length_decimal_legacy.avro b/src/arrow/testing/data/avro/fixed_length_decimal_legacy.avro
new file mode 100644
index 000000000..e840d486a
--- /dev/null
+++ b/src/arrow/testing/data/avro/fixed_length_decimal_legacy.avro
Binary files differ
diff --git a/src/arrow/testing/data/avro/int32_decimal.avro b/src/arrow/testing/data/avro/int32_decimal.avro
new file mode 100644
index 000000000..0623a288a
--- /dev/null
+++ b/src/arrow/testing/data/avro/int32_decimal.avro
Binary files differ
diff --git a/src/arrow/testing/data/avro/int64_decimal.avro b/src/arrow/testing/data/avro/int64_decimal.avro
new file mode 100644
index 000000000..182d9b7b1
--- /dev/null
+++ b/src/arrow/testing/data/avro/int64_decimal.avro
Binary files differ
diff --git a/src/arrow/testing/data/avro/list_columns.avro b/src/arrow/testing/data/avro/list_columns.avro
new file mode 100644
index 000000000..0d2dd2354
--- /dev/null
+++ b/src/arrow/testing/data/avro/list_columns.avro
Binary files differ
diff --git a/src/arrow/testing/data/avro/nested_lists.snappy.avro b/src/arrow/testing/data/avro/nested_lists.snappy.avro
new file mode 100644
index 000000000..6cbff8961
--- /dev/null
+++ b/src/arrow/testing/data/avro/nested_lists.snappy.avro
Binary files differ
diff --git a/src/arrow/testing/data/avro/nonnullable.impala.avro b/src/arrow/testing/data/avro/nonnullable.impala.avro
new file mode 100644
index 000000000..7ff8f3b7a
--- /dev/null
+++ b/src/arrow/testing/data/avro/nonnullable.impala.avro
Binary files differ
diff --git a/src/arrow/testing/data/avro/nullable.impala.avro b/src/arrow/testing/data/avro/nullable.impala.avro
new file mode 100644
index 000000000..28f118b6b
--- /dev/null
+++ b/src/arrow/testing/data/avro/nullable.impala.avro
Binary files differ
diff --git a/src/arrow/testing/data/avro/nulls.snappy.avro b/src/arrow/testing/data/avro/nulls.snappy.avro
new file mode 100644
index 000000000..8be5bec85
--- /dev/null
+++ b/src/arrow/testing/data/avro/nulls.snappy.avro
Binary files differ
diff --git a/src/arrow/testing/data/avro/repeated_no_annotation.avro b/src/arrow/testing/data/avro/repeated_no_annotation.avro
new file mode 100644
index 000000000..44edb8e10
--- /dev/null
+++ b/src/arrow/testing/data/avro/repeated_no_annotation.avro
Binary files differ
diff --git a/src/arrow/testing/data/avro/single_nan.avro b/src/arrow/testing/data/avro/single_nan.avro
new file mode 100644
index 000000000..ccf93e54d
--- /dev/null
+++ b/src/arrow/testing/data/avro/single_nan.avro
Binary files differ