diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/c_glib/example | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/arrow/c_glib/example')
-rw-r--r-- | src/arrow/c_glib/example/README.md | 48 | ||||
-rw-r--r-- | src/arrow/c_glib/example/build.c | 77 | ||||
-rw-r--r-- | src/arrow/c_glib/example/extension-type.c | 381 | ||||
-rw-r--r-- | src/arrow/c_glib/example/lua/README.md | 50 | ||||
-rw-r--r-- | src/arrow/c_glib/example/lua/meson.build | 28 | ||||
-rw-r--r-- | src/arrow/c_glib/example/lua/read-batch.lua | 44 | ||||
-rw-r--r-- | src/arrow/c_glib/example/lua/read-stream.lua | 51 | ||||
-rw-r--r-- | src/arrow/c_glib/example/lua/write-batch.lua | 74 | ||||
-rw-r--r-- | src/arrow/c_glib/example/lua/write-stream.lua | 74 | ||||
-rw-r--r-- | src/arrow/c_glib/example/meson.build | 36 | ||||
-rw-r--r-- | src/arrow/c_glib/example/read-batch.c | 145 | ||||
-rw-r--r-- | src/arrow/c_glib/example/read-stream.c | 144 |
12 files changed, 1152 insertions, 0 deletions
diff --git a/src/arrow/c_glib/example/README.md b/src/arrow/c_glib/example/README.md new file mode 100644 index 000000000..b69145d68 --- /dev/null +++ b/src/arrow/c_glib/example/README.md @@ -0,0 +1,48 @@ +<!--- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +# Arrow GLib example + +There are example codes in this directory. + +C example codes exist in this directory. Language bindings example +codes exists in sub directories. For example, Lua example codes exists +in `lua/` sub directory. + +## C example codes + +Here are example codes in this directory: + + * `build.c`: It shows how to create an array by array builder. + +<!--- + * `write-batch.c`: It shows how to write Arrow array to file in batch + mode. +--> + + * `read-batch.c`: It shows how to read Arrow array from file in batch + mode. + +<!--- + * `write-stream.c`: It shows how to write Arrow array to file in + stream mode. +--> + + * `read-stream.c`: It shows how to read Arrow array from file in + stream mode. diff --git a/src/arrow/c_glib/example/build.c b/src/arrow/c_glib/example/build.c new file mode 100644 index 000000000..9b2d58d2b --- /dev/null +++ b/src/arrow/c_glib/example/build.c @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <stdlib.h> + +#include <arrow-glib/arrow-glib.h> + +int +main(int argc, char **argv) +{ + GArrowArray *array; + + { + GArrowInt32ArrayBuilder *builder; + gboolean success = TRUE; + GError *error = NULL; + + builder = garrow_int32_array_builder_new(); + if (success) { + success = garrow_int32_array_builder_append_value(builder, 29, &error); + } + if (success) { + success = garrow_int32_array_builder_append_value(builder, 2929, &error); + } + if (success) { + success = garrow_int32_array_builder_append_value(builder, 292929, &error); + } + if (!success) { + g_print("failed to append: %s\n", error->message); + g_error_free(error); + g_object_unref(builder); + return EXIT_FAILURE; + } + array = garrow_array_builder_finish(GARROW_ARRAY_BUILDER(builder), &error); + if (!array) { + g_print("failed to finish: %s\n", error->message); + g_error_free(error); + g_object_unref(builder); + return EXIT_FAILURE; + } + g_object_unref(builder); + } + + { + gint64 i, n; + + n = garrow_array_get_length(array); + g_print("length: %" G_GINT64_FORMAT "\n", n); + for (i = 0; i < n; i++) { + gint32 value; + + value = garrow_int32_array_get_value(GARROW_INT32_ARRAY(array), i); + g_print("array[%" G_GINT64_FORMAT "] = %d\n", + i, value); + } + } + + g_object_unref(array); + + return EXIT_SUCCESS; +} diff --git a/src/arrow/c_glib/example/extension-type.c b/src/arrow/c_glib/example/extension-type.c new file mode 100644 index 000000000..a23fa427d --- /dev/null +++ b/src/arrow/c_glib/example/extension-type.c @@ -0,0 +1,381 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <stdlib.h> + +#include <arrow-glib/arrow-glib.h> + +#define EXAMPLE_TYPE_UUID_ARRAY (example_uuid_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(ExampleUUIDArray, + example_uuid_array, + EXAMPLE, + UUID_ARRAY, + GArrowExtensionArray) +struct _ExampleUUIDArrayClass +{ + GArrowExtensionArrayClass parent_class; +}; + +G_DEFINE_TYPE(ExampleUUIDArray, + example_uuid_array, + GARROW_TYPE_EXTENSION_ARRAY) + +static void +example_uuid_array_init(ExampleUUIDArray *object) +{ +} + +static void +example_uuid_array_class_init(ExampleUUIDArrayClass *klass) +{ +} + + +#define EXAMPLE_TYPE_UUID_DATA_TYPE (example_uuid_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(ExampleUUIDDataType, + example_uuid_data_type, + EXAMPLE, + UUID_DATA_TYPE, + GArrowExtensionDataType) +struct _ExampleUUIDDataTypeClass +{ + GArrowExtensionDataTypeClass parent_class; +}; + + +G_DEFINE_TYPE(ExampleUUIDDataType, + example_uuid_data_type, + GARROW_TYPE_EXTENSION_DATA_TYPE) + +static gchar * +example_uuid_data_type_get_extension_name(GArrowExtensionDataType *data_type) +{ + return g_strdup("uuid"); +} + +static gboolean +example_uuid_data_type_equal(GArrowExtensionDataType *data_type, + GArrowExtensionDataType *other_data_type) +{ + /* Compare parameters if they exists. */ + return TRUE; +} + +static const gchar *example_uuid_data_type_serialize_id = "uuid-serialized"; +static ExampleUUIDDataType *example_uuid_data_type_new(void); + +static GArrowDataType * +example_uuid_data_type_deserialize(GArrowExtensionDataType *data_type, + GArrowDataType *storage_data_type, + GBytes *serialized_data, + GError **error) +{ + gsize raw_data_size; + gconstpointer raw_data = g_bytes_get_data(serialized_data, &raw_data_size); + if (!(raw_data_size == strlen(example_uuid_data_type_serialize_id) && + strncmp(raw_data, + example_uuid_data_type_serialize_id, + raw_data_size) == 0)) { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "[uuid-data-type][deserialize] " + "identifier must be <%s>: <%.*s>", + example_uuid_data_type_serialize_id, + (gint)raw_data_size, + (const gchar *)raw_data); + return NULL; + } + + GArrowDataType *expected_storage_data_type; + g_object_get(data_type, + "storage-data-type", &expected_storage_data_type, + NULL); + if (!garrow_data_type_equal(storage_data_type, + expected_storage_data_type)) { + gchar *expected = garrow_data_type_to_string(expected_storage_data_type); + gchar *actual = garrow_data_type_to_string(storage_data_type); + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "[uuid-data-type][deserialize] " + "storage data type must be <%s>: <%s>", + expected, + actual); + g_free(actual); + g_free(expected); + return NULL; + } + + return GARROW_DATA_TYPE(example_uuid_data_type_new()); +} + +static GBytes * +example_uuid_data_type_serialize(GArrowExtensionDataType *data_type) +{ + return g_bytes_new_static(example_uuid_data_type_serialize_id, + strlen(example_uuid_data_type_serialize_id)); +} + +static GType +example_uuid_data_type_get_array_gtype(GArrowExtensionDataType *data_type) +{ + return EXAMPLE_TYPE_UUID_ARRAY; +} + +static void +example_uuid_data_type_init(ExampleUUIDDataType *object) +{ +} + +static void +example_uuid_data_type_class_init(ExampleUUIDDataTypeClass *klass) +{ + GArrowExtensionDataTypeClass *extension_klass = + GARROW_EXTENSION_DATA_TYPE_CLASS(klass); + extension_klass->get_extension_name = + example_uuid_data_type_get_extension_name; + extension_klass->equal = example_uuid_data_type_equal; + extension_klass->deserialize = example_uuid_data_type_deserialize; + extension_klass->serialize = example_uuid_data_type_serialize; + extension_klass->get_array_gtype = example_uuid_data_type_get_array_gtype; +} + +static ExampleUUIDDataType * +example_uuid_data_type_new(void) +{ + GArrowFixedSizeBinaryDataType *storage_data_type = + garrow_fixed_size_binary_data_type_new(16); + return g_object_new(EXAMPLE_TYPE_UUID_DATA_TYPE, + "storage-data-type", storage_data_type, + NULL); +} + + +int +main(int argc, char **argv) +{ + GArrowExtensionDataTypeRegistry *registry = + garrow_extension_data_type_registry_default(); + + /* Create UUID extension data type. */ + ExampleUUIDDataType *uuid_data_type = example_uuid_data_type_new(); + GArrowExtensionDataType *extension_data_type = + GARROW_EXTENSION_DATA_TYPE(uuid_data_type); + /* Register the created UUID extension data type. */ + GError *error = NULL; + if (!garrow_extension_data_type_registry_register(registry, + extension_data_type, + &error)) { + g_print("failed to register: %s\n", error->message); + g_error_free(error); + g_object_unref(registry); + return EXIT_FAILURE; + } + + { + /* Build storage data for the created UUID extension data type. */ + GArrowFixedSizeBinaryDataType *storage_data_type; + g_object_get(extension_data_type, + "storage-data-type", &storage_data_type, + NULL); + GArrowFixedSizeBinaryArrayBuilder *builder = + garrow_fixed_size_binary_array_builder_new(storage_data_type); + g_object_unref(storage_data_type); + garrow_fixed_size_binary_array_builder_append_value( + builder, + (const guint8 *)"0123456789012345", + 16, + &error); + if (!error) { + garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), &error); + } + if (!error) { + garrow_fixed_size_binary_array_builder_append_value( + builder, + (const guint8 *)"abcdefghijklmnop", + 16, + &error); + } + if (error) { + g_print("failed to append elements: %s\n", error->message); + g_error_free(error); + g_object_unref(builder); + goto exit; + } + GArrowArray *storage = + garrow_array_builder_finish(GARROW_ARRAY_BUILDER(builder), &error); + g_object_unref(builder); + if (error) { + g_print("failed to build storage: %s\n", error->message); + g_error_free(error); + goto exit; + } + + /* Wrap the created storage data as the created UUID extension array. */ + GArrowExtensionArray *extension_array = + garrow_extension_data_type_wrap_array(extension_data_type, + storage); + g_object_unref(storage); + gint64 n_rows = garrow_array_get_length(GARROW_ARRAY(extension_array)); + + /* Create a record batch to serialize the created UUID extension array. */ + GList *fields = NULL; + fields = g_list_append(fields, + garrow_field_new("uuid", + GARROW_DATA_TYPE(uuid_data_type))); + GArrowSchema *schema = garrow_schema_new(fields); + g_list_free_full(fields, g_object_unref); + GList *columns = NULL; + columns = g_list_append(columns, extension_array); + GArrowRecordBatch *record_batch = + garrow_record_batch_new(schema, n_rows, columns, &error); + g_list_free_full(columns, g_object_unref); + if (error) { + g_print("failed to create record batch: %s\n", error->message); + g_error_free(error); + g_object_unref(schema); + goto exit; + } + + /* Serialize the created record batch. */ + GArrowResizableBuffer *buffer = garrow_resizable_buffer_new(0, &error); + if (error) { + g_print("failed to create buffer: %s\n", error->message); + g_error_free(error); + g_object_unref(schema); + g_object_unref(record_batch); + goto exit; + } + { + GArrowBufferOutputStream *output = + garrow_buffer_output_stream_new(buffer); + GArrowRecordBatchStreamWriter *writer = + garrow_record_batch_stream_writer_new(GARROW_OUTPUT_STREAM(output), + schema, + &error); + if (error) { + g_print("failed to create writer: %s\n", error->message); + g_error_free(error); + g_object_unref(output); + g_object_unref(buffer); + g_object_unref(schema); + g_object_unref(record_batch); + goto exit; + } + garrow_record_batch_writer_write_record_batch( + GARROW_RECORD_BATCH_WRITER(writer), + record_batch, + &error); + if (error) { + g_print("failed to write record batch: %s\n", error->message); + g_error_free(error); + g_object_unref(writer); + g_object_unref(output); + g_object_unref(buffer); + g_object_unref(schema); + g_object_unref(record_batch); + goto exit; + } + g_object_unref(schema); + g_object_unref(record_batch); + garrow_record_batch_writer_close(GARROW_RECORD_BATCH_WRITER(writer), + &error); + g_object_unref(writer); + g_object_unref(output); + if (error) { + g_print("failed to close writer: %s\n", error->message); + g_error_free(error); + g_object_unref(buffer); + goto exit; + } + } + + /* Deserialize the serialized record batch. */ + { + GArrowBufferInputStream *input = + garrow_buffer_input_stream_new(GARROW_BUFFER(buffer)); + GArrowRecordBatchStreamReader *reader = + garrow_record_batch_stream_reader_new(GARROW_INPUT_STREAM(input), + &error); + if (error) { + g_print("failed to create reader: %s\n", error->message); + g_error_free(error); + g_object_unref(input); + g_object_unref(buffer); + goto exit; + } + record_batch = + garrow_record_batch_reader_read_next(GARROW_RECORD_BATCH_READER(reader), + &error); + if (error) { + g_print("failed to read record batch: %s\n", error->message); + g_error_free(error); + g_object_unref(reader); + g_object_unref(input); + g_object_unref(buffer); + goto exit; + } + /* Show the deserialize record batch. */ + gchar *record_batch_content = + garrow_record_batch_to_string(record_batch, + &error); + if (error) { + g_print("failed to dump record batch content: %s\n", error->message); + g_error_free(error); + error = NULL; + } else { + g_print("record batch:\n%s\n", record_batch_content); + } + /* Get the deserialize UUID extension array. */ + GArrowArray *deserialized_array = + garrow_record_batch_get_column_data(record_batch, 0); + g_print("array: %s\n", G_OBJECT_TYPE_NAME(deserialized_array)); + g_object_unref(deserialized_array); + + g_object_unref(record_batch); + g_object_unref(reader); + g_object_unref(input); + } + + g_object_unref(buffer); + } + +exit: + /* Unregister the created UUID extension data type. */ + { + gchar *data_type_name = + garrow_extension_data_type_get_extension_name(extension_data_type); + gboolean success = + garrow_extension_data_type_registry_unregister(registry, + data_type_name, + &error); + g_free(data_type_name); + if (!success) { + g_print("failed to unregister: %s\n", error->message); + g_error_free(error); + g_object_unref(registry); + return EXIT_FAILURE; + } + } + + g_object_unref(registry); + + return EXIT_SUCCESS; +} diff --git a/src/arrow/c_glib/example/lua/README.md b/src/arrow/c_glib/example/lua/README.md new file mode 100644 index 000000000..7d388d46a --- /dev/null +++ b/src/arrow/c_glib/example/lua/README.md @@ -0,0 +1,50 @@ +<!--- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +# Arrow Lua example + +There are Lua example codes in this directory. + +## How to run + +All example codes use [LGI](https://github.com/pavouk/lgi) to use +Arrow GLib based bindings. + +Here are command lines to install LGI on Debian GNU/Linux and Ubuntu: + +```text +% sudo apt install -y luarocks +% sudo luarocks install lgi +``` + +## Lua example codes + +Here are example codes in this directory: + + * `write-batch.lua`: It shows how to write Arrow array to file in + batch mode. + + * `read-batch.lua`: It shows how to read Arrow array from file in + batch mode. + + * `write-stream.lua`: It shows how to write Arrow array to file in + stream mode. + + * `read-stream.lua`: It shows how to read Arrow array from file in + stream mode. diff --git a/src/arrow/c_glib/example/lua/meson.build b/src/arrow/c_glib/example/lua/meson.build new file mode 100644 index 000000000..8fe3e5f23 --- /dev/null +++ b/src/arrow/c_glib/example/lua/meson.build @@ -0,0 +1,28 @@ +# -*- indent-tabs-mode: nil -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +install_data('README.md', + 'read-batch.lua', + 'read-stream.lua', + 'write-batch.lua', + 'write-stream.lua', + install_dir: join_paths(data_dir, + meson.project_name(), + 'example', + 'lua')) diff --git a/src/arrow/c_glib/example/lua/read-batch.lua b/src/arrow/c_glib/example/lua/read-batch.lua new file mode 100644 index 000000000..a4c86763f --- /dev/null +++ b/src/arrow/c_glib/example/lua/read-batch.lua @@ -0,0 +1,44 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +local lgi = require 'lgi' +local Arrow = lgi.Arrow + +local input_path = arg[1] or "/tmp/batch.arrow"; + +local input = Arrow.MemoryMappedInputStream.new(input_path) +local reader = Arrow.RecordBatchFileReader.new(input) + +for i = 0, reader:get_n_record_batches() - 1 do + local record_batch = reader:read_record_batch(i) + print(string.rep("=", 40)) + print("record-batch["..i.."]:") + for j = 0, record_batch:get_n_columns() - 1 do + local column_name = record_batch:get_column_name(j) + local column_data = record_batch:get_column_data(j) + io.write(" "..column_name..": [") + for k = 0, record_batch:get_n_rows() - 1 do + if k > 0 then + io.write(", ") + end + io.write(column_data:get_value(k)) + end + print("]") + end +end + +input:close() diff --git a/src/arrow/c_glib/example/lua/read-stream.lua b/src/arrow/c_glib/example/lua/read-stream.lua new file mode 100644 index 000000000..7bf1083e2 --- /dev/null +++ b/src/arrow/c_glib/example/lua/read-stream.lua @@ -0,0 +1,51 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +local lgi = require 'lgi' +local Arrow = lgi.Arrow + +local input_path = arg[1] or "/tmp/stream.arrow"; + +local input = Arrow.MemoryMappedInputStream.new(input_path) +local reader = Arrow.RecordBatchStreamReader.new(input) + +local i = 0 +while true do + local record_batch = reader:read_next() + if not record_batch then + break + end + + print(string.rep("=", 40)) + print("record-batch["..i.."]:") + for j = 0, record_batch:get_n_columns() - 1 do + local column_name = record_batch:get_column_name(j) + local column_data = record_batch:get_column_data(j) + io.write(" "..column_name..": [") + for k = 0, record_batch:get_n_rows() - 1 do + if k > 0 then + io.write(", ") + end + io.write(column_data:get_value(k)) + end + print("]") + end + + i = i + 1 +end + +input:close() diff --git a/src/arrow/c_glib/example/lua/write-batch.lua b/src/arrow/c_glib/example/lua/write-batch.lua new file mode 100644 index 000000000..26acc1dff --- /dev/null +++ b/src/arrow/c_glib/example/lua/write-batch.lua @@ -0,0 +1,74 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +local lgi = require 'lgi' +local Arrow = lgi.Arrow + +local output_path = arg[1] or "/tmp/batch.arrow"; + +local fields = { + Arrow.Field.new("uint8", Arrow.UInt8DataType.new()), + Arrow.Field.new("uint16", Arrow.UInt16DataType.new()), + Arrow.Field.new("uint32", Arrow.UInt32DataType.new()), + Arrow.Field.new("uint64", Arrow.UInt64DataType.new()), + Arrow.Field.new("int8", Arrow.Int8DataType.new()), + Arrow.Field.new("int16", Arrow.Int16DataType.new()), + Arrow.Field.new("int32", Arrow.Int32DataType.new()), + Arrow.Field.new("int64", Arrow.Int64DataType.new()), + Arrow.Field.new("float", Arrow.FloatDataType.new()), + Arrow.Field.new("double", Arrow.DoubleDataType.new()), +} +local schema = Arrow.Schema.new(fields) + +local output = Arrow.FileOutputStream.new(output_path, false) +local writer = Arrow.RecordBatchFileWriter.new(output, schema) + +function build_array(builder, values) + for _, value in pairs(values) do + builder:append(value) + end + return builder:finish() +end + +local uints = {1, 2, 4, 8} +local ints = {1, -2, 4, -8} +local floats = {1.1, -2.2, 4.4, -8.8} +local columns = { + build_array(Arrow.UInt8ArrayBuilder.new(), uints), + build_array(Arrow.UInt16ArrayBuilder.new(), uints), + build_array(Arrow.UInt32ArrayBuilder.new(), uints), + build_array(Arrow.UInt64ArrayBuilder.new(), uints), + build_array(Arrow.Int8ArrayBuilder.new(), ints), + build_array(Arrow.Int16ArrayBuilder.new(), ints), + build_array(Arrow.Int32ArrayBuilder.new(), ints), + build_array(Arrow.Int64ArrayBuilder.new(), ints), + build_array(Arrow.FloatArrayBuilder.new(), floats), + build_array(Arrow.DoubleArrayBuilder.new(), floats), +} + +local record_batch = Arrow.RecordBatch.new(schema, 4, columns) +writer:write_record_batch(record_batch) + +local sliced_columns = {} +for i, column in pairs(columns) do + sliced_columns[i] = column:slice(1, 3) +end +record_batch = Arrow.RecordBatch.new(schema, 3, sliced_columns) +writer:write_record_batch(record_batch) + +writer:close() +output:close() diff --git a/src/arrow/c_glib/example/lua/write-stream.lua b/src/arrow/c_glib/example/lua/write-stream.lua new file mode 100644 index 000000000..07bbd79af --- /dev/null +++ b/src/arrow/c_glib/example/lua/write-stream.lua @@ -0,0 +1,74 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +local lgi = require 'lgi' +local Arrow = lgi.Arrow + +local output_path = arg[1] or "/tmp/stream.arrow"; + +local fields = { + Arrow.Field.new("uint8", Arrow.UInt8DataType.new()), + Arrow.Field.new("uint16", Arrow.UInt16DataType.new()), + Arrow.Field.new("uint32", Arrow.UInt32DataType.new()), + Arrow.Field.new("uint64", Arrow.UInt64DataType.new()), + Arrow.Field.new("int8", Arrow.Int8DataType.new()), + Arrow.Field.new("int16", Arrow.Int16DataType.new()), + Arrow.Field.new("int32", Arrow.Int32DataType.new()), + Arrow.Field.new("int64", Arrow.Int64DataType.new()), + Arrow.Field.new("float", Arrow.FloatDataType.new()), + Arrow.Field.new("double", Arrow.DoubleDataType.new()), +} +local schema = Arrow.Schema.new(fields) + +local output = Arrow.FileOutputStream.new(output_path, false) +local writer = Arrow.RecordBatchStreamWriter.new(output, schema) + +function build_array(builder, values) + for _, value in pairs(values) do + builder:append(value) + end + return builder:finish() +end + +local uints = {1, 2, 4, 8} +local ints = {1, -2, 4, -8} +local floats = {1.1, -2.2, 4.4, -8.8} +local columns = { + build_array(Arrow.UInt8ArrayBuilder.new(), uints), + build_array(Arrow.UInt16ArrayBuilder.new(), uints), + build_array(Arrow.UInt32ArrayBuilder.new(), uints), + build_array(Arrow.UInt64ArrayBuilder.new(), uints), + build_array(Arrow.Int8ArrayBuilder.new(), ints), + build_array(Arrow.Int16ArrayBuilder.new(), ints), + build_array(Arrow.Int32ArrayBuilder.new(), ints), + build_array(Arrow.Int64ArrayBuilder.new(), ints), + build_array(Arrow.FloatArrayBuilder.new(), floats), + build_array(Arrow.DoubleArrayBuilder.new(), floats), +} + +local record_batch = Arrow.RecordBatch.new(schema, 4, columns) +writer:write_record_batch(record_batch) + +local sliced_columns = {} +for i, column in pairs(columns) do + sliced_columns[i] = column:slice(1, 3) +end +record_batch = Arrow.RecordBatch.new(schema, 3, sliced_columns) +writer:write_record_batch(record_batch) + +writer:close() +output:close() diff --git a/src/arrow/c_glib/example/meson.build b/src/arrow/c_glib/example/meson.build new file mode 100644 index 000000000..9a9bef1bd --- /dev/null +++ b/src/arrow/c_glib/example/meson.build @@ -0,0 +1,36 @@ +# -*- indent-tabs-mode: nil -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +executable('build', 'build.c', + dependencies: [arrow_glib]) +executable('extension-type', 'extension-type.c', + dependencies: [arrow_glib]) +executable('read-batch', 'read-batch.c', + dependencies: [arrow_glib]) +executable('read-stream', 'read-stream.c', + dependencies: [arrow_glib]) + +install_data('README.md', + 'build.c', + 'extension-type.c', + 'read-batch.c', + 'read-stream.c', + install_dir: join_paths(data_dir, meson.project_name(), 'example')) + +subdir('lua') diff --git a/src/arrow/c_glib/example/read-batch.c b/src/arrow/c_glib/example/read-batch.c new file mode 100644 index 000000000..273dc70ff --- /dev/null +++ b/src/arrow/c_glib/example/read-batch.c @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <stdlib.h> + +#include <arrow-glib/arrow-glib.h> + +static void +print_array(GArrowArray *array) +{ + GArrowType value_type; + gint64 i, n; + + value_type = garrow_array_get_value_type(array); + + g_print("["); + n = garrow_array_get_length(array); + +#define ARRAY_CASE(type, Type, TYPE, format) \ + case GARROW_TYPE_ ## TYPE: \ + { \ + GArrow ## Type ## Array *real_array; \ + real_array = GARROW_ ## TYPE ## _ARRAY(array); \ + for (i = 0; i < n; i++) { \ + if (i > 0) { \ + g_print(", "); \ + } \ + g_print(format, \ + garrow_ ## type ## _array_get_value(real_array, i)); \ + } \ + } \ + break + + switch (value_type) { + ARRAY_CASE(uint8, UInt8, UINT8, "%hhu"); + ARRAY_CASE(uint16, UInt16, UINT16, "%" G_GUINT16_FORMAT); + ARRAY_CASE(uint32, UInt32, UINT32, "%" G_GUINT32_FORMAT); + ARRAY_CASE(uint64, UInt64, UINT64, "%" G_GUINT64_FORMAT); + ARRAY_CASE( int8, Int8, INT8, "%hhd"); + ARRAY_CASE( int16, Int16, INT16, "%" G_GINT16_FORMAT); + ARRAY_CASE( int32, Int32, INT32, "%" G_GINT32_FORMAT); + ARRAY_CASE( int64, Int64, INT64, "%" G_GINT64_FORMAT); + ARRAY_CASE( float, Float, FLOAT, "%g"); + ARRAY_CASE(double, Double, DOUBLE, "%g"); + default: + break; + } +#undef ARRAY_CASE + + g_print("]\n"); +} + +static void +print_record_batch(GArrowRecordBatch *record_batch) +{ + guint nth_column, n_columns; + + n_columns = garrow_record_batch_get_n_columns(record_batch); + for (nth_column = 0; nth_column < n_columns; nth_column++) { + GArrowArray *array; + + g_print("columns[%u](%s): ", + nth_column, + garrow_record_batch_get_column_name(record_batch, nth_column)); + array = garrow_record_batch_get_column_data(record_batch, nth_column); + print_array(array); + g_object_unref(array); + } +} + +int +main(int argc, char **argv) +{ + const char *input_path = "/tmp/batch.arrow"; + GArrowMemoryMappedInputStream *input; + GError *error = NULL; + + if (argc > 1) + input_path = argv[1]; + input = garrow_memory_mapped_input_stream_new(input_path, + &error); + if (!input) { + g_print("failed to open file: %s\n", error->message); + g_error_free(error); + return EXIT_FAILURE; + } + + { + GArrowRecordBatchFileReader *reader; + + reader = + garrow_record_batch_file_reader_new(GARROW_SEEKABLE_INPUT_STREAM(input), + &error); + if (!reader) { + g_print("failed to open file reader: %s\n", error->message); + g_error_free(error); + g_object_unref(input); + return EXIT_FAILURE; + } + + { + guint i, n; + + n = garrow_record_batch_file_reader_get_n_record_batches(reader); + for (i = 0; i < n; i++) { + GArrowRecordBatch *record_batch; + + record_batch = + garrow_record_batch_file_reader_read_record_batch(reader, i, &error); + if (!record_batch) { + g_print("failed to open file reader: %s\n", error->message); + g_error_free(error); + g_object_unref(reader); + g_object_unref(input); + return EXIT_FAILURE; + } + + print_record_batch(record_batch); + g_object_unref(record_batch); + } + } + + g_object_unref(reader); + } + + g_object_unref(input); + + return EXIT_SUCCESS; +} diff --git a/src/arrow/c_glib/example/read-stream.c b/src/arrow/c_glib/example/read-stream.c new file mode 100644 index 000000000..133418faa --- /dev/null +++ b/src/arrow/c_glib/example/read-stream.c @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <stdlib.h> + +#include <arrow-glib/arrow-glib.h> + +static void +print_array(GArrowArray *array) +{ + GArrowType value_type; + gint64 i, n; + + value_type = garrow_array_get_value_type(array); + + g_print("["); + n = garrow_array_get_length(array); + +#define ARRAY_CASE(type, Type, TYPE, format) \ + case GARROW_TYPE_ ## TYPE: \ + { \ + GArrow ## Type ## Array *real_array; \ + real_array = GARROW_ ## TYPE ## _ARRAY(array); \ + for (i = 0; i < n; i++) { \ + if (i > 0) { \ + g_print(", "); \ + } \ + g_print(format, \ + garrow_ ## type ## _array_get_value(real_array, i)); \ + } \ + } \ + break + + switch (value_type) { + ARRAY_CASE(uint8, UInt8, UINT8, "%hhu"); + ARRAY_CASE(uint16, UInt16, UINT16, "%" G_GUINT16_FORMAT); + ARRAY_CASE(uint32, UInt32, UINT32, "%" G_GUINT32_FORMAT); + ARRAY_CASE(uint64, UInt64, UINT64, "%" G_GUINT64_FORMAT); + ARRAY_CASE( int8, Int8, INT8, "%hhd"); + ARRAY_CASE( int16, Int16, INT16, "%" G_GINT16_FORMAT); + ARRAY_CASE( int32, Int32, INT32, "%" G_GINT32_FORMAT); + ARRAY_CASE( int64, Int64, INT64, "%" G_GINT64_FORMAT); + ARRAY_CASE( float, Float, FLOAT, "%g"); + ARRAY_CASE(double, Double, DOUBLE, "%g"); + default: + break; + } +#undef ARRAY_CASE + + g_print("]\n"); +} + +static void +print_record_batch(GArrowRecordBatch *record_batch) +{ + guint nth_column, n_columns; + + n_columns = garrow_record_batch_get_n_columns(record_batch); + for (nth_column = 0; nth_column < n_columns; nth_column++) { + GArrowArray *array; + + g_print("columns[%u](%s): ", + nth_column, + garrow_record_batch_get_column_name(record_batch, nth_column)); + array = garrow_record_batch_get_column_data(record_batch, nth_column); + print_array(array); + g_object_unref(array); + } +} + +int +main(int argc, char **argv) +{ + const char *input_path = "/tmp/stream.arrow"; + GArrowMemoryMappedInputStream *input; + GError *error = NULL; + + if (argc > 1) + input_path = argv[1]; + input = garrow_memory_mapped_input_stream_new(input_path, &error); + if (!input) { + g_print("failed to open file: %s\n", error->message); + g_error_free(error); + return EXIT_FAILURE; + } + + { + GArrowRecordBatchReader *reader; + GArrowRecordBatchStreamReader *stream_reader; + + stream_reader = + garrow_record_batch_stream_reader_new(GARROW_INPUT_STREAM(input), + &error); + if (!stream_reader) { + g_print("failed to open stream reader: %s\n", error->message); + g_error_free(error); + g_object_unref(input); + return EXIT_FAILURE; + } + + reader = GARROW_RECORD_BATCH_READER(stream_reader); + while (TRUE) { + GArrowRecordBatch *record_batch; + + record_batch = garrow_record_batch_reader_read_next(reader, &error); + if (error) { + g_print("failed to read the next record batch: %s\n", error->message); + g_error_free(error); + g_object_unref(reader); + g_object_unref(input); + return EXIT_FAILURE; + } + + if (!record_batch) { + break; + } + + print_record_batch(record_batch); + g_object_unref(record_batch); + } + + g_object_unref(reader); + } + + g_object_unref(input); + + return EXIT_SUCCESS; +} |