diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/matlab | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/arrow/matlab')
31 files changed, 2901 insertions, 0 deletions
diff --git a/src/arrow/matlab/.gitignore b/src/arrow/matlab/.gitignore new file mode 100644 index 000000000..e89b1b9af --- /dev/null +++ b/src/arrow/matlab/.gitignore @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# CMake files +CMakeFiles/* +CMakeCache.txt + +# MEX files +*.mex* diff --git a/src/arrow/matlab/CMakeLists.txt b/src/arrow/matlab/CMakeLists.txt new file mode 100644 index 000000000..cfc74a266 --- /dev/null +++ b/src/arrow/matlab/CMakeLists.txt @@ -0,0 +1,282 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +cmake_minimum_required(VERSION 3.20) + +# Build the Arrow C++ libraries. +function(build_arrow) + set(options BUILD_GTEST) + set(one_value_args) + set(multi_value_args) + cmake_parse_arguments(ARG + "${options}" + "${one_value_args}" + "${multi_value_args}" + ${ARGN}) + if(ARG_UNPARSED_ARGUMENTS) + message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}") + endif() + + if(WIN32) + set(ARROW_IMPORTED_TYPE IMPORTED_IMPLIB) + set(ARROW_LIBRARY_SUFFIX ${CMAKE_IMPORT_LIBRARY_SUFFIX}) + else() + set(ARROW_IMPORTED_TYPE IMPORTED_LOCATION) + set(ARROW_LIBRARY_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX}) + endif() + + set(ARROW_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-prefix") + set(ARROW_INCLUDE_DIR "${ARROW_PREFIX}/include") + set(ARROW_LIBRARY_DIR "${ARROW_PREFIX}/lib") + set(ARROW_SHARED_LIB + "${ARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow${ARROW_LIBRARY_SUFFIX}") + set(ARROW_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-build") + set(ARROW_CMAKE_ARGS "-DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX}" + "-DCMAKE_INSTALL_LIBDIR=lib" "-DARROW_BUILD_STATIC=OFF") + set(ARROW_BUILD_BYPRODUCTS "${ARROW_SHARED_LIB}") + + # Building the Arrow C++ libraries and bundled GoogleTest binaries requires ExternalProject. + include(ExternalProject) + + if(ARG_BUILD_GTEST) + enable_gtest() + endif() + + externalproject_add(arrow_ep + SOURCE_DIR "${CMAKE_SOURCE_DIR}/../cpp" + BINARY_DIR "${ARROW_BINARY_DIR}" + CMAKE_ARGS ${ARROW_CMAKE_ARGS} + BUILD_BYPRODUCTS ${ARROW_BUILD_BYPRODUCTS}) + + set(ARROW_LIBRARY_TARGET arrow_shared) + + # If find_package has already found a valid Arrow installation, then + # we don't want to link against the newly built arrow_shared library. + # However, we still need create a library target to trigger building + # of the arrow_ep target, which will ultimately build the bundled + # GoogleTest binaries. + if(Arrow_FOUND) + set(ARROW_LIBRARY_TARGET arrow_shared_for_gtest) + endif() + + file(MAKE_DIRECTORY "${ARROW_INCLUDE_DIR}") + add_library(${ARROW_LIBRARY_TARGET} SHARED IMPORTED) + set_target_properties(${ARROW_LIBRARY_TARGET} + PROPERTIES ${ARROW_IMPORTED_TYPE} ${ARROW_SHARED_LIB} + INTERFACE_INCLUDE_DIRECTORIES ${ARROW_INCLUDE_DIR}) + + add_dependencies(${ARROW_LIBRARY_TARGET} arrow_ep) + + if(ARG_BUILD_GTEST) + build_gtest() + endif() + +endfunction() + +macro(enable_gtest) + if(WIN32) + set(ARROW_GTEST_IMPORTED_TYPE IMPORTED_IMPLIB) + set(ARROW_GTEST_MAIN_IMPORTED_TYPE IMPORTED_IMPLIB) + + set(ARROW_GTEST_LIBRARY_SUFFIX ${CMAKE_IMPORT_LIBRARY_SUFFIX}) + set(ARROW_GTEST_MAIN_LIBRARY_SUFFIX ${CMAKE_IMPORT_LIBRARY_SUFFIX}) + else() + set(ARROW_GTEST_IMPORTED_TYPE IMPORTED_LOCATION) + set(ARROW_GTEST_MAIN_IMPORTED_TYPE IMPORTED_LOCATION) + + set(ARROW_GTEST_LIBRARY_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(ARROW_GTEST_MAIN_LIBRARY_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX}) + endif() + + set(ARROW_GTEST_PREFIX "${ARROW_BINARY_DIR}/googletest_ep-prefix") + set(ARROW_GTEST_INCLUDE_DIR "${ARROW_GTEST_PREFIX}/include") + set(ARROW_GTEST_LIBRARY_DIR "${ARROW_GTEST_PREFIX}/lib") + set(ARROW_GTEST_SHARED_LIB + "${ARROW_GTEST_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest${ARROW_GTEST_LIBRARY_SUFFIX}" + ) + + set(ARROW_GTEST_MAIN_PREFIX "${ARROW_BINARY_DIR}/googletest_ep-prefix") + set(ARROW_GTEST_MAIN_INCLUDE_DIR "${ARROW_GTEST_MAIN_PREFIX}/include") + set(ARROW_GTEST_MAIN_LIBRARY_DIR "${ARROW_GTEST_MAIN_PREFIX}/lib") + set(ARROW_GTEST_MAIN_SHARED_LIB + "${ARROW_GTEST_MAIN_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest_main${ARROW_GTEST_MAIN_LIBRARY_SUFFIX}" + ) + + list(APPEND ARROW_CMAKE_ARGS "-DARROW_BUILD_TESTS=ON") + list(APPEND ARROW_BUILD_BYPRODUCTS "${ARROW_GTEST_SHARED_LIB}" + "${ARROW_GTEST_MAIN_SHARED_LIB}") +endmacro() + +# Build the GoogleTest binaries that are bundled with the Arrow C++ libraries. +macro(build_gtest) + set(ARROW_GTEST_INCLUDE_DIR "${ARROW_GTEST_PREFIX}/include") + set(ARROW_GTEST_MAIN_INCLUDE_DIR "${ARROW_GTEST_MAIN_PREFIX}/include") + + file(MAKE_DIRECTORY "${ARROW_GTEST_INCLUDE_DIR}") + + if(WIN32) + set(ARROW_GTEST_RUNTIME_DIR "${ARROW_GTEST_PREFIX}/bin") + set(ARROW_GTEST_MAIN_RUNTIME_DIR "${ARROW_GTEST_MAIN_PREFIX}/bin") + set(ARROW_GTEST_RUNTIME_SUFFIX "${CMAKE_SHARED_LIBRARY_SUFFIX}") + set(ARROW_GTEST_MAIN_RUNTIME_SUFFIX "${CMAKE_SHARED_LIBRARY_SUFFIX}") + set(ARROW_GTEST_RUNTIME_LIB + "${ARROW_GTEST_RUNTIME_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest${ARROW_GTEST_RUNTIME_SUFFIX}" + ) + set(ARROW_GTEST_MAIN_RUNTIME_LIB + "${ARROW_GTEST_MAIN_RUNTIME_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest_main${ARROW_GTEST_MAIN_RUNTIME_SUFFIX}" + ) + + # Multi-Configuration generators (e.g. Visual Studio or XCode) place their build artifacts + # in a subdirectory named ${CMAKE_BUILD_TYPE} by default, where ${CMAKE_BUILD_TYPE} varies + # depending on the chosen build configuration (e.g. Release or Debug). + get_property(GENERATOR_IS_MULTI_CONFIG_VALUE GLOBAL + PROPERTY GENERATOR_IS_MULTI_CONFIG) + if(GENERATOR_IS_MULTI_CONFIG_VALUE) + set(MATLAB_TESTS_DIR "${CMAKE_BINARY_DIR}/$<CONFIG>") + else() + set(MATLAB_TESTS_DIR "${CMAKE_BINARY_DIR}") + endif() + + # We need to copy the gtest and gtest_main runtime DLLs into the directory where the + # MATLAB C++ tests reside, since Windows requires that runtime DLLs are in the same + # directory as the executables that depend on them (or on the %PATH%). + externalproject_add_step(arrow_ep copy + COMMAND ${CMAKE_COMMAND} -E make_directory + ${MATLAB_TESTS_DIR} + COMMAND ${CMAKE_COMMAND} -E copy ${ARROW_GTEST_RUNTIME_LIB} + ${MATLAB_TESTS_DIR} + COMMAND ${CMAKE_COMMAND} -E copy + ${ARROW_GTEST_MAIN_RUNTIME_LIB} ${MATLAB_TESTS_DIR} + DEPENDEES install) + endif() + + add_library(GTest::gtest SHARED IMPORTED) + set_target_properties(GTest::gtest + PROPERTIES ${ARROW_GTEST_IMPORTED_TYPE} ${ARROW_GTEST_SHARED_LIB} + INTERFACE_INCLUDE_DIRECTORIES + ${ARROW_GTEST_INCLUDE_DIR}) + + add_library(GTest::gtest_main SHARED IMPORTED) + set_target_properties(GTest::gtest_main + PROPERTIES ${ARROW_GTEST_MAIN_IMPORTED_TYPE} + ${ARROW_GTEST_MAIN_SHARED_LIB} + INTERFACE_INCLUDE_DIRECTORIES + ${ARROW_GTEST_MAIN_INCLUDE_DIR}) + + add_dependencies(GTest::gtest arrow_ep) + add_dependencies(GTest::gtest_main arrow_ep) +endmacro() + +set(CMAKE_CXX_STANDARD 11) + +set(MLARROW_VERSION "6.0.1") +string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" MLARROW_BASE_VERSION "${MLARROW_VERSION}") + +project(mlarrow VERSION "${MLARROW_BASE_VERSION}") + +option(MATLAB_BUILD_TESTS "Build the C++ tests for the MATLAB interface" OFF) + +# Grab CMAKE Modules from the CPP interface +set(CPP_CMAKE_MODULES "${CMAKE_SOURCE_DIR}/../cpp/cmake_modules") +if(EXISTS "${CPP_CMAKE_MODULES}") + set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CPP_CMAKE_MODULES}) +endif() + +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}/cmake_modules) + +# Only build the MATLAB interface C++ tests if MATLAB_BUILD_TESTS=ON. +if(MATLAB_BUILD_TESTS) + # find_package(GTest) supports custom GTEST_ROOT as well as package managers. + find_package(GTest) + if(NOT GTest_FOUND) + # find_package(Arrow) supports custom ARROW_HOME as well as package + # managers. + find_package(Arrow) + # Trigger an automatic build of the Arrow C++ libraries and bundled + # GoogleTest binaries. If a valid Arrow installation was not already + # found by find_package, then build_arrow will use the Arrow + # C++ libraries that are built from source. + build_arrow(BUILD_GTEST) + else() + find_package(Arrow) + if(NOT Arrow_FOUND) + # Trigger an automatic build of the Arrow C++ libraries. + build_arrow() + endif() + endif() +else() + find_package(Arrow) + if(NOT Arrow_FOUND) + build_arrow() + endif() +endif() + +# MATLAB is Required +find_package(Matlab REQUIRED) + +# Construct the absolute path to featherread's source files +set(featherread_sources featherreadmex.cc feather_reader.cc util/handle_status.cc + util/unicode_conversion.cc) +list(TRANSFORM featherread_sources PREPEND ${CMAKE_SOURCE_DIR}/src/) + +# Build featherreadmex MEX binary +matlab_add_mex(R2018a + NAME featherreadmex + SRC ${featherread_sources} + LINK_TO arrow_shared) + +# Construct the absolute path to featherwrite's source files +set(featherwrite_sources featherwritemex.cc feather_writer.cc util/handle_status.cc + util/unicode_conversion.cc) +list(TRANSFORM featherwrite_sources PREPEND ${CMAKE_SOURCE_DIR}/src/) + +# Build featherwritemex MEX binary +matlab_add_mex(R2018a + NAME featherwritemex + SRC ${featherwrite_sources} + LINK_TO arrow_shared) + +# Ensure the MEX binaries are placed in the src directory on all platforms +if(WIN32) + set_target_properties(featherreadmex PROPERTIES RUNTIME_OUTPUT_DIRECTORY + $<1:${CMAKE_SOURCE_DIR}/src>) + set_target_properties(featherwritemex PROPERTIES RUNTIME_OUTPUT_DIRECTORY + $<1:${CMAKE_SOURCE_DIR}/src>) +else() + set_target_properties(featherreadmex PROPERTIES LIBRARY_OUTPUT_DIRECTORY + $<1:${CMAKE_SOURCE_DIR}/src>) + set_target_properties(featherwritemex PROPERTIES LIBRARY_OUTPUT_DIRECTORY + $<1:${CMAKE_SOURCE_DIR}/src>) +endif() + +# ############################################################################## +# C++ Tests +# ############################################################################## +# Only build the C++ tests if MATLAB_BUILD_TESTS=ON. +if(MATLAB_BUILD_TESTS) + enable_testing() + + # Define a test executable target. TODO: Remove the placeholder test. This is + # just for testing GoogleTest integration. + add_executable(placeholder_test ${CMAKE_SOURCE_DIR}/src/placeholder_test.cc) + # Declare a dependency on the GTest::gtest and GTest::gtest_main IMPORTED + # targets. + target_link_libraries(placeholder_test GTest::gtest GTest::gtest_main) + + # Add a test target. + add_test(PlaceholderTestTarget placeholder_test) +endif() diff --git a/src/arrow/matlab/README.md b/src/arrow/matlab/README.md new file mode 100644 index 000000000..edf991e87 --- /dev/null +++ b/src/arrow/matlab/README.md @@ -0,0 +1,112 @@ +<!--- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +## MATLAB Library for Apache Arrow + +## Status + +This is a very early stage MATLAB interface to the Apache Arrow C++ libraries. + +The current code only supports reading/writing numeric types from/to Feather files. + +## Building from source + +### Get Arrow and build Arrow CPP + +See: [Arrow CPP README](../cpp/README.md) + +### Build MATLAB interface to Apache Arrow using MATLAB R2018a: + + cd arrow/matlab + mkdir build + cd build + cmake .. + make + +#### Non-standard MATLAB and Arrow installations + +To specify a non-standard MATLAB install location, use the Matlab_ROOT_DIR CMake flag: + + cmake .. -DMatlab_ROOT_DIR=/<PATH_TO_MATLAB_INSTALL> + +To specify a non-standard Arrow install location, use the ARROW_HOME CMake flag: + + cmake .. -DARROW_HOME=/<PATH_TO_ARROW_INSTALL> + +### Build MATLAB interface to Arrow using MATLAB R2018b or later: + +This may be preferred if you are using MATLAB R2018b or later and have encountered [linker errors](https://gitlab.kitware.com/cmake/cmake/issues/18391) when using CMake. + +Prerequisite: Ensure that the Arrow C++ library is already installed and the `ARROW_HOME` environment variable is set to the installation root. + +To verify this, you can run: + +``` matlab +>> getenv ARROW_HOME +``` + +This should print a path that contains `include` and `lib` directories with Arrow C++ headers and libraries. + +Navigate to the `build_support` subfolder and run the `compile` function to build the necessary MEX files: + +``` matlab +>> cd build_support +>> compile +``` + +Run the `test` function to execute the unit tests: + +``` matlab +>> test +``` + +## Try it out + +### Add the src and build directories to your MATLAB path + +``` matlab +>> cd(fullfile('arrow', 'matlab')); +>> addpath src; +>> addpath build; +``` + +### Write a MATLAB table to a Feather file + +``` matlab +>> t = array2table(rand(10, 10)); +>> filename = 'table.feather'; +>> featherwrite(filename,t); +``` + +### Read a Feather file into a MATLAB table + +``` matlab +>> filename = 'table.feather'; +>> t = featherread(filename); +``` + +## Running the tests + +``` matlab +>> cd(fullfile('arrow', 'matlab')); +>> addpath src; +>> addpath build; +>> cd test; +>> runtests .; +``` diff --git a/src/arrow/matlab/build_support/common_vars.m b/src/arrow/matlab/build_support/common_vars.m new file mode 100644 index 000000000..a7c9d6a32 --- /dev/null +++ b/src/arrow/matlab/build_support/common_vars.m @@ -0,0 +1,24 @@ +function vars = common_vars() +% Licensed to the Apache Software Foundation (ASF) under one +% or more contributor license agreements. See the NOTICE file +% distributed with this work for additional information +% regarding copyright ownership. The ASF licenses this file +% to you under the Apache License, Version 2.0 (the +% "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, +% software distributed under the License is distributed on an +% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +% KIND, either express or implied. See the License for the +% specific language governing permissions and limitations +% under the License. + +fileDir = fileparts(mfilename("fullpath")); + +vars.srcDir = fullfile(fileDir, "..", "src"); +vars.testDir = fullfile(fileDir, "..", "test"); +vars.buildDir = fullfile(fileDir, "..", "build"); +end
\ No newline at end of file diff --git a/src/arrow/matlab/build_support/compile.m b/src/arrow/matlab/build_support/compile.m new file mode 100644 index 000000000..d436dadfe --- /dev/null +++ b/src/arrow/matlab/build_support/compile.m @@ -0,0 +1,41 @@ +function compile() +% Licensed to the Apache Software Foundation (ASF) under one +% or more contributor license agreements. See the NOTICE file +% distributed with this work for additional information +% regarding copyright ownership. The ASF licenses this file +% to you under the Apache License, Version 2.0 (the +% "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, +% software distributed under the License is distributed on an +% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +% KIND, either express or implied. See the License for the +% specific language governing permissions and limitations +% under the License. + +vars = common_vars(); + +mkdir(vars.buildDir); + +ldflags = string.empty; +if isunix + arrowHome = getenv("ARROW_HOME"); + if isempty(arrowHome) + error("The ARROW_HOME environment variable must be set."); + end + ldflags(end+1) = "-Wl"; + ldflags(end+1) = "-rpath '" + fullfile(arrowHome, "lib") + "'"; +end + +mex(fullfile(vars.srcDir, "featherreadmex.cc"), ... + fullfile(vars.srcDir, "feather_reader.cc"), ... + fullfile(vars.srcDir, "util", "handle_status.cc"), ... + "-L" + fullfile(arrowHome, "lib"), "-larrow", ... + "-I" + fullfile(arrowHome, "include"), ... + "LDFLAGS=""\$LDFLAGS " + strjoin(ldflags, ",") + """", ... + "-outdir", vars.buildDir, ... + "-R2018a", "-v"); +end diff --git a/src/arrow/matlab/build_support/test.m b/src/arrow/matlab/build_support/test.m new file mode 100644 index 000000000..990549e3b --- /dev/null +++ b/src/arrow/matlab/build_support/test.m @@ -0,0 +1,28 @@ +function test() +% Licensed to the Apache Software Foundation (ASF) under one +% or more contributor license agreements. See the NOTICE file +% distributed with this work for additional information +% regarding copyright ownership. The ASF licenses this file +% to you under the Apache License, Version 2.0 (the +% "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, +% software distributed under the License is distributed on an +% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +% KIND, either express or implied. See the License for the +% specific language governing permissions and limitations +% under the License. + +vars = common_vars(); + +compile(); + +originalPath = addpath(vars.srcDir, vars.buildDir); +restoreOriginalPath = onCleanup(@()path(originalPath)); + +results = runtests(vars.testDir, "IncludeSubfolders", true, "OutputDetail", 3); +assert(all(~[results.Failed])); +end diff --git a/src/arrow/matlab/doc/matlab_interface_for_apache_arrow_design.md b/src/arrow/matlab/doc/matlab_interface_for_apache_arrow_design.md new file mode 100644 index 000000000..5d64c8e85 --- /dev/null +++ b/src/arrow/matlab/doc/matlab_interface_for_apache_arrow_design.md @@ -0,0 +1,366 @@ +<!--- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +# MATLAB Interface for Apache Arrow + +## Overview +This document outlines a high-level roadmap for development of a [MATLAB] Interface for Apache Arrow, which enables interfacing with [Arrow] memory. + +## Use Cases +Apache Arrow is designed to enable a variety of high-performance columnar analytics use cases. + +This design document focuses on a subset of use cases that we feel will help to lay the foundation for more advanced [use cases] in the future. + +1. **UC1**: Ability to create, access, and delete Arrow memory using MATLAB code. +2. **UC2**: Ability to serialize and deserialize Arrow memory using MATLAB code to/from file formats like Parquet, Feather, JSON, and CSV. +3. **UC3**: Ability to move in-memory tabular data, represented as a MATLAB table, to other languages, like Python, R, and Rust, with minimal overhead (ideally, zero copy). + +## Design +We envision a set of packaged (`arrow.*`) classes and functions allowing users to interact with key functionality from the Arrow C++ libraries using MATLAB code. + +Included below is a list of example MATLAB and C++ APIs that would be exposed by the MATLAB Interface for Apache Arrow. + +### MATLAB APIs +- `arrow.Buffer` +- `arrow.Array` +- `arrow.RecordBatch` +- `arrow.Table` +- `arrow.Field` +- `arrow.Schema` +- `arrow.type.DataType` + - `arrow.type.Float64` + - `arrow.type.String` + - `arrow.type.Date` + - `arrow.type.Time` + - ... +- `arrow.memory.getTotalBytesAllocated` +- `arrow.memory.allocateBuffer` +- ... + +### C++ APIs +In order to enable interaction with the Arrow C++ libraries, the MATLAB Interface for Apache Arrow must expose associated C++ APIs for wrapping/unwrapping MATLAB [`mxArray`] data to/from appropriate Arrow C++ types. + +The list below provides a few brief examples of what these C++ APIs might look like (intended to be consistent with the rest of the Arrow ecosystem). + +- `arrow::matlab::is_array` +- `arrow::matlab::is_record_batch` +- `arrow::matlab::is_table` +- `arrow::matlab::unwrap_array` +- `arrow::matlab::wrap_array` +- `arrow::matlab::unwrap_record_batch` +- `arrow::matlab::wrap_record_batch` +- `arrow::matlab::unwrap_table` +- `arrow::matlab::wrap_table` +- ... + +## Design Cases + +### Use Case: UC1 +A MATLAB developer could create an `arrow.Array` from an "ordinary" MATLAB array (e.g. a numeric row vector of type `double`). +They could then operate on this array in a variety of different ways (e.g. indexing/slicing, getting its type/class, clearing it from the workspace, etc.). +The `arrow.array` “factory function” returns a type-specific, concrete subclass of the abstract `arrow.Array` class based on the MATLAB type of the input array. For example, passing a double array to the `arrow.array` function will return a corresponding `arrow.Float64Array`. + +**Note**: MATLAB [`missing` values] (e.g. `NaN`, `NaT`, `<undefined>`) are automatically converted into Arrow `NULL` values upon construction of an `arrow.Array` subclass instance. + +###### Example Code: +``` matlab +>> A = randi(100, 1, 5) +A = + 82 91 13 92 64 + +>> class(A) +ans = + 'double' + +>> A(4) = NaN; % Set the fourth element to NaN. + +>> AA = arrow.array(A); % Create an arrow.Array from A. + +>> class(AA) +ans = + 'arrow.Float64Array' + +>> AA(3:5) % Extract elements at indices 3 to 5 from AA. +ans = + 13 <NULL> 64 + +>> clear AA; % Clear AA from workspace and release Arrow C++ memory. +``` + +### Use Case: UC2 + +#### Developer Workflow for Writing a MATLAB Table to a Feather File + +To serialize MATLAB data to a file on disk (e.g. Feather, Parquet), a MATLAB developer could start by constructing an `arrow.Table` using one of several different approaches. + +They could individually compose the table from a set of `arrow.Array` objects (one for each table variable). + +###### Example Code: +``` matlab +>> Var1 = arrow.array(["foo"; "bar"; "baz"]); + +>> Var2 = arrow.array([today; today + 1; today + 2]); + +>> Var3 = arrow.array([10; 20; 30]); + +>> AT = arrow.Table(Var1, Var2, Var3); +``` +Alternatively, they could directly convert from an existing MATLAB `table` to an `arrow.Table` using a function like `arrow.matlab2arrow` to convert between an existing MATLAB `table` and an `arrow.Table`. + +###### Example Code: +``` matlab +>> Weight = [10; 24; 10; 12; 18]; + +>> Radius = [80; 135; 65; 70; 150]; + +>> Density = [10.2; 20.5; 11.2; 13.7; 17.8]; + +>> T = table(Weight, Radius, Density); % Create a MATLAB table + +>> AT = arrow.matlab2arrow(T); % Create an arrow.Table +``` +To serialize the `arrow.Table`, `AT`, to a file (e.g. Feather) on disk, the user could then instantiate an `arrow.FeatherTableWriter`. + +###### Example Code: +``` matlab +>> featherTableWriter = arrow.FeatherTableWriter(); + +>> featherTableWriter.write(AT, "data.feather"); +``` +The Feather file could then be read and operated on by an external process like Rust or Go. To read it back into MATLAB after modification by another process, the user could instantiate an `arrow.FeatherTableReader`. + +###### Example Code: +``` matlab +>> featherTableReader = arrow.FeatherTableReader("data.feather"); + +>> AT = featherTableReader.read(); +``` +#### Advanced MATLAB User Workflow for Implementing Support for Writing to Feather Files + +To add support for writing to Feather files, an advanced MATLAB user could use the MATLAB and C++ APIs offered by the MATLAB Interface for Apache Arrow to create `arrow.FeatherTableWriter`. + +They would need to author a [MEX function] (e.g. `featherwriteMEX`), which can be called directly by MATLAB code. Within their MEX function, they could use `arrow::matlab::unwrap_table` to convert between the MATLAB representation of the Arrow memory (`arrow.Table`) and the equivalent C++ representation (`arrow::Table`). Once the `arrow.Table` has been "unwrapped" into a C++ `arrow::Table`, it can be passed to the appropriate Arrow C++ library API for writing to a Feather file (`arrow::ipc::feather::WriteTable`). + +An analogous workflow could be followed to create `arrow.FeatherTableReader` to enable reading from Feather files. + +#### Enabling High-Level Workflows + +Ultimately, many of the APIs exposed by the MATLAB Interface for Apache Arrow are targeted at advanced MATLAB users. By leveraging these building blocks, advanced MATLAB users can create high-level interfaces, which are useful to everyday MATLAB users. An example of such a high-level interface would be `featherwrite`, intended to make it easy to write Feather files. A diagram summarizing the overall workflow and specific pieces an advanced user would need to author to create such a high-level interface is included below. + +![Code flow diagram](https://github.com/mathworks/matlab-arrow-support-files/raw/main/images/design_doc_code_flow_diagram.svg) + +### Use Case: UC3 + +Arrow supports several approaches to sharing memory locally. + +Roughly speaking, local memory sharing workflows can be divided into two categories: +1. In-Process Memory Sharing +2. Out-of-Process Memory Sharing + +#### In-Process Memory Sharing + +[MATLAB supports running Python code within the MATLAB process]. In theory, because MATLAB and Python can share the same virtual address space, users should be able to share Arrow memory efficiently between MATLAB and PyArrow code. The [Apache Arrow C Data Interface] defines a lightweight C API for sharing Arrow data and metadata between multiple languages running within the same virtual address space. + +To share a MATLAB `arrow.Array` with PyArrow efficiently, a user could use the `exportToCDataInterface` method to export the Arrow memory wrapped by an `arrow.Array` to the C Data Interface format, consisting of two C-style structs, [`ArrowArray`] and [`ArrowSchema`], which represent the Arrow data and associated metadata. + +Memory addresses to the `ArrowArray` and `ArrowSchema` structs are returned by the call to `exportToCDataInterface`. These addresses can be passed to Python directly, without having to make any copies of the underlying Arrow data structures that they refer to. A user can then wrap the underlying data pointed to by the `ArrowArray` struct (which is already in the [Arrow Columnar Format]), as well as extract the necessary metadata from the `ArrowSchema` struct, to create a `pyarrow.Array` by using the static method `py.pyarrow.Array._import_from_c`. + +###### Example Code: +``` matlab +% Create a MATLAB arrow.Array. +>> AA = arrow.array([1, 2, 3, 4, 5]); + +% Export the MATLAB arrow.Array to the C Data Interface format, returning the +% memory addresses of the required ArrowArray and ArrowSchema C-style structs. +>> [arrayMemoryAddress, schemaMemoryAddress] = AA.exportToCDataInterface(); + +% Import the memory addresses of the C Data Interface format structs to create a pyarrow.Array. +>> PA = py.pyarrow.Array._import_from_c(arrayMemoryAddress, schemaMemoryAddress); +``` +Conversely, a user can create an Arrow array using PyArrow and share it with MATLAB. To do this, they can call the method `_export_to_c` to export a `pyarrow.Array` to the C Data Interface format. + +The memory addresses to the `ArrowArray` and `ArrowSchema` structs populated by the call to `_export_to_c` can be passed to the static method `arrow.Array.importFromCDataInterface` to construct a MATLAB `arrow.Array` with zero copies. + +The example code below is adapted from the [`test_cffi.py` test cases for PyArrow]. + +###### Example Code: +``` matlab +% Make a pyarrow.Array. +>> PA = py.pyarrow.array([1, 2, 3, 4, 5]); + +% Create ArrowArray and ArrowSchema C-style structs adhering to the Arrow C Data Interface format. +>> array = py.pyarrow.cffi.ffi.new("struct ArrowArray*") + +>> arrayMemoryAddress = py.int(py.pyarrow.cffi.ffi.cast("uintptr_t", array)); + +>> schema = py.pyarrow.cffi.ffi.new("struct ArrowSchema*") + +>> schemaMemoryAddress = py.int(py.pyarrow.cffi.ffi.cast("uintptr_t", schema)); + +% Export the pyarrow.Array to the C Data Interface format, populating the required ArrowArray and ArrowShema structs. +>> PA.export_to_c(arrayMemoryAddress, schemaMemoryAddress) + +% Import the C Data Interface structs to create a MATLAB arrow.Array. +>> AA = arrow.Array.importFromCDataInterface(arrayMemoryAddress, schemaMemoryAddress); +``` + +#### Out-of-Process Memory Sharing + +[MATLAB supports running Python code in a separate process]. A user could leverage the MATLAB Interface for Apache Arrow to share Arrow memory between MATLAB and PyArrow running within a separate Python process using one of the following approaches described below. + +##### Memory-Mapped IPC File + +For large tables used in a multi-process "data processing pipeline", a user could serialize their `arrow.Table` to the Arrow IPC File Format. Then, this file could be memory-mapped (zero-copy) by PyArrow running in a separate process to read the data in with minimal overhead. The fact that the Arrow IPC File Format is a 1:1 mapping of the in-memory Arrow format on disk, makes the memory-mapping highly performant as no custom deserialization/conversion is required to construct a `pyarrow.Table`. + +###### Example Code: +``` matlab +% Create a MATLAB arrow.Table. +>> Var1 = arrow.array(["foo", "bar", "baz"]); + +>> Var2 = arrow.array([today, today + 1, today + 2]); + +>> Var3 = arrow.array([10, 20, 30]); + +>> AT = arrow.Table(Var1, Var2, Var3); + +% Write the MATLAB arrow.Table to the Arrow IPC File Format on disk. +>> arrow.ipcwrite(AT, "data.arrow"); + +% Run Python in a separate process. +>> pyenv("ExecutionMode", "OutOfProcess"); + +% Memory map the Arrow IPC File. +>> memoryMappedFile = py.pyarrow.memory_map("data.arrow"); + +% Construct pyarrow.ipc.RecordBatchFileReader to read the Arrow IPC File. +>> recordBatchFileReader = py.pyarrow.ipc.open_file(memoryMappedFile); + +% Read all record batches from the Arrow IPC File in one-shot and return a pyarrow.Table. +>> PAT = recordBatchFileReader.read_all() +``` + +##### Plasma Object Store +_**Note**: Plasma is informally deprecated. It may not make sense to support it._ + +Users could also share Arrow memory across process boundaries by using the [Plasma Object Store]. + +The code examples below assume a Plasma Object Store process is already running at `/tmp/plasma`. + +A MATLAB user could connect to the running Plasma Object Store process from MATLAB and share an `arrow.Array`, with an ID `"123"`. + +###### Example Code: +``` matlab +>> AA = arrow.array([1, 2, 3]); + +>> ID = 123; + +>> plasmaClientMATLAB = arrow.plasma.PlasmaClient("/tmp/plasma"); + +>> plasmaClientMATLAB.put(AA, ID); + +>> plasmaClientMATLAB.seal(ID); + ``` + +To consume the Arrow object shared from MATLAB in another process (for example, a C++ process), a user could connect to the same Plasma Object Store process using the Arrow C++ Libraries (example code based on this [tutorial]). + +###### Example Code: +``` c++ +#include <plasma/client.h> + +using namespace plasma; + +int main(int argc, char** argv) { + // Start up and connect a Plasma client. + PlasmaClient client; + + ARROW_CHECK_OK(client.Connect("/tmp/plasma")); + + // Get from the Plasma store by Object ID. + ObjectBuffer object_buffer; + + client.Get(123, 1, -1, &object_buffer); + ... +} +``` + +## Testing +To ensure code quality, we would like to include the following testing infrastructure, at a minimum: +1. C++ APIs + - GoogleTest C++ Unit Tests + - Integration with CI workflows +2. MATLAB APIs + - [MATLAB Class-Based Unit Tests] + - Integration with CI workflows +3. [Integration Testing] + +## Documentation +To ensure usability, discoverability, and accessibility, we would like to include high quality documentation for the MATLAB Interface for Apache Arrow. + +Specific areas of documentation would include: +1. [MATLAB Help Text] for MATLAB APIs +2. MATLAB API reference +3. Usage examples of MATLAB and C++ APIs +4. README for building and installation +5. Build system documentation +6. CI integration documentation + +## Installation +We would ideally like to make it as easy as possible for MATLAB users to install the MATLAB Interface for Apache Arrow without the need to compile [MEX] functions or perform any other manual configuration steps. + +In MATLAB, users normally install optional software packages via the [Add-On Explorer]. This workflow is analogous to the way a [JavaScript user] would install the [`apache-arrow` package via the `npm` package manager] or the way a [Rust user] would install the [`arrow` crate via the `cargo` package manager]. + +In the short term, in the absence of an easily installable MATLAB Add-On, we plan to maintain up-to-date, clearly explained, build and installation instructions for recent versions of MATLAB on GitHub. + +In addition, we'd like to include pre-built MEX functions for Windows, Mac, and Linux that get built regularly via CI workflows. This would allow users to try out the latest functionality without having to manually build the MEX interfaces from scratch. + +## Roadmap +The table below provides a high-level roadmap for the development of specific capabilities in the MATLAB Interface for Apache Arrow. + +| Capability | Use Case | Timeframe | +|----------------------------------|----------|-----------| +| Arrow Memory Interaction | UC1 | Near Term | +| File Reading/Writing | UC2 | Near Term | +| In/Out-of-Process Memory Sharing | UC3 | Mid Term | + +<!-- Links --> +[MATLAB]: https://www.mathworks.com/products/matlab.html +[Arrow]: https://arrow.apache.org/ +[use cases]: https://arrow.apache.org/use_cases/ +[`mxArray`]: https://www.mathworks.com/help/matlab/matlab_external/matlab-data.html +['missing' values]: https://www.mathworks.com/help/matlab/data_analysis/missing-data-in-matlab.html +[MEX function]: https://www.mathworks.com/help/matlab/call-mex-file-functions.html +[several approaches to sharing memory locally]: https://arrow.apache.org/use_cases/#sharing-memory-locally +[MATLAB supports running Python code within the MATLAB process]: https://www.mathworks.com/help/matlab/matlab_external/create-object-from-python-class.html +[Apache Arrow C Data Interface]: https://arrow.apache.org/docs/format/CDataInterface.html +[`ArrowArray`]: https://arrow.apache.org/docs/format/CDataInterface.html#the-arrowarray-structure +[`ArrowSchema`]: https://arrow.apache.org/docs/format/CDataInterface.html#the-arrowschema-structure +[Arrow Columnar Format]: https://arrow.apache.org/docs/format/Columnar.html +[`test_cffi.py` test cases for PyArrow]: https://github.com/apache/arrow/blob/97879eb970bac52d93d2247200b9ca7acf6f3f93/python/pyarrow/tests/test_cffi.py#L109 +[MATLAB supports running Python code in a separate process]: https://www.mathworks.com/help/matlab/matlab_external/out-of-process-execution-of-python-functionality.html +[Plasma Object Store]: https://arrow.apache.org/docs/python/plasma.html +[tutorial]: https://github.com/apache/arrow/blob/master/cpp/apidoc/tutorials/plasma.md#getting-an-object +[MATLAB Class-Based Unit Tests]: https://www.mathworks.com/help/matlab/class-based-unit-tests.html +[Integration Testing]: https://arrow.apache.org/docs/format/Integration.html +[MATLAB Help Text]: https://www.mathworks.com/help/matlab/matlab_prog/add-help-for-your-program.html +[MEX]: https://www.mathworks.com/help/matlab/call-mex-files-1.html +[Add-On Explorer]: https://www.mathworks.com/help/matlab/matlab_env/get-add-ons.html +[JavaScript user]: https://github.com/apache/arrow/tree/master/js +[`apache-arrow` package via the `npm` package manager]: https://www.npmjs.com/package/apache-arrow +[Rust user]: https://github.com/apache/arrow-rs +[`arrow` crate via the `cargo` package manager]: https://crates.io/crates/arrow diff --git a/src/arrow/matlab/src/+mlarrow/+util/createMetadataStruct.m b/src/arrow/matlab/src/+mlarrow/+util/createMetadataStruct.m new file mode 100644 index 000000000..b1b8bc7ed --- /dev/null +++ b/src/arrow/matlab/src/+mlarrow/+util/createMetadataStruct.m @@ -0,0 +1,23 @@ +function metadata = createMetadataStruct(numRows, numVariables) +% CREATEMETADATASTRUCT Helper function for creating Feather MEX metadata +% struct. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +metadata = struct('NumRows', numRows, ... + 'NumVariables', numVariables); +end + diff --git a/src/arrow/matlab/src/+mlarrow/+util/createVariableStruct.m b/src/arrow/matlab/src/+mlarrow/+util/createVariableStruct.m new file mode 100644 index 000000000..99f52d89b --- /dev/null +++ b/src/arrow/matlab/src/+mlarrow/+util/createVariableStruct.m @@ -0,0 +1,24 @@ +function variable = createVariableStruct(type, data, valid, name) +% CREATEVARIABLESTRUCT Helper function for creating Feather MEX variable +% struct. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +variable = struct('Type', type, ... + 'Data', data, ... + 'Valid', valid, ... + 'Name', name); +end
\ No newline at end of file diff --git a/src/arrow/matlab/src/+mlarrow/+util/makeValidMATLABTableVariableNames.m b/src/arrow/matlab/src/+mlarrow/+util/makeValidMATLABTableVariableNames.m new file mode 100644 index 000000000..ba5e072dc --- /dev/null +++ b/src/arrow/matlab/src/+mlarrow/+util/makeValidMATLABTableVariableNames.m @@ -0,0 +1,42 @@ +function [variableNames, variableDescriptions] = makeValidMATLABTableVariableNames(columnNames) +% makeValidMATLABTableVariableNames Makes valid MATLAB table variable names +% from a set of Feather table column names. +% +% [variableNames, variableDescriptions] = makeValidMATLABTableVariableNames(columnNames) +% Modifies the input Feather table columnNames to be valid MATLAB table +% variable names if they are not already. If any of the Feather table columnNames +% are invalid MATLAB table variable names, then the original columnNames are returned +% in variableDescriptions to be stored in the table.Properties.VariableDescriptions +% property. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + + variableNames = string(columnNames); + variableDescriptions = strings(0, 0); + + validVariableNames = false(1, length(variableNames)); + for ii = 1:length(variableNames) + validVariableNames(ii) = isvarname(variableNames(ii)); + end + + if ~all(validVariableNames) + variableDescriptions = strings(1, length(columnNames)); + variableDescriptions(validVariableNames) = ""; + variableDescriptions(~validVariableNames) = compose("Original variable name: '%s'", ... + variableNames(~validVariableNames)); + variableNames(~validVariableNames) = matlab.lang.makeValidName(variableNames(~validVariableNames)); + end +end diff --git a/src/arrow/matlab/src/+mlarrow/+util/table2mlarrow.m b/src/arrow/matlab/src/+mlarrow/+util/table2mlarrow.m new file mode 100644 index 000000000..36e4d1d15 --- /dev/null +++ b/src/arrow/matlab/src/+mlarrow/+util/table2mlarrow.m @@ -0,0 +1,82 @@ +function [variables, metadata] = table2mlarrow(t) +%TABLE2MLARROW Converts a MATLAB table into a form +% suitable for passing to the mlarrow C++ MEX layer. +% +% [VARIABLES, METADATA] = TABLE2MLARROW(T) +% Takes a MATLAB table T and returns struct array equivalents +% which are suitable for passing to the mlarrow C++ MEX layer. +% +% VARIABLES is an 1xN struct array representing the the table variables. +% +% VARIABLES contains the following fields: +% +% Field Name Class Description +% ------------ ------- ---------------------------------------------- +% Name char Variable's name +% Type char Variable's MATLAB datatype +% Data numeric Variable's data +% Valid logical 0 = invalid (null), 1 = valid (non-null) value +% +% METADATA is a 1x1 struct array with the following fields: +% +% METADATA contains the following fields: +% +% Field Name Class Description +% ------------ ------- ---------------------------------------------- +% NumRows double Number of table rows (height(T)) +% NumVariables double Number of table variables (width(T)) +% +% See also FEATHERREAD, FEATHERWRITE. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +import mlarrow.util.*; + +% Struct array representing the underlying data of each variable +% in the given table. +variables = repmat(createVariableStruct('', [], [], ''), 1, width(t)); + +% Struct representing table-level metadata. +metadata = createMetadataStruct(height(t), width(t)); + +% Iterate over each variable in the given table, +% extracting the underlying array data. +for ii = 1:width(t) + data = t.(ii); + % Multi-column table variables are unsupported. + if ~isvector(data) + error('MATLAB:arrow:MultiColumnVariablesUnsupported', ... + 'Multi-column table variables are unsupported by featherwrite.'); + end + % Get the datatype of the current variable's underlying array. + variables(ii).Type = class(data); + % Break the datatype down into its constituent components, if appropriate. + switch variables(ii).Type + % For numeric variables, the underlying array data can + % be passed to the C++ layer directly. + case {'uint8', 'uint16', 'uint32', 'uint64', ... + 'int8', 'int16', 'int32', 'int64', ... + 'single', 'double'} + variables(ii).Data = data; + otherwise + error('MATLAB:arrow:UnsupportedVariableType', ... + ['Type ' variables(ii).Type ' is unsupported by featherwrite.']); + end + variables(ii).Valid = ~ismissing(data); + variables(ii).Name = t.Properties.VariableNames{ii}; +end + +end diff --git a/src/arrow/matlab/src/feather_reader.cc b/src/arrow/matlab/src/feather_reader.cc new file mode 100644 index 000000000..1cbb50541 --- /dev/null +++ b/src/arrow/matlab/src/feather_reader.cc @@ -0,0 +1,277 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <algorithm> +#include <cmath> + +#include "feather_reader.h" + +#include <arrow/array/array_base.h> +#include <arrow/array/builder_base.h> +#include <arrow/array/builder_primitive.h> +#include <arrow/io/file.h> +#include <arrow/ipc/feather.h> +#include <arrow/result.h> +#include <arrow/status.h> +#include <arrow/table.h> +#include <arrow/type.h> +#include <arrow/type_traits.h> +#include <arrow/util/bitmap_visit.h> +#include <mex.h> + +#include "matlab_traits.h" +#include "util/handle_status.h" +#include "util/unicode_conversion.h" + +namespace arrow { +namespace matlab { +namespace internal { + +// Read the name of variable i from the Feather file as a mxArray*. +mxArray* ReadVariableName(const std::string& column_name) { + return matlab::util::ConvertUTF8StringToUTF16CharMatrix(column_name); +} + +template <typename ArrowDataType> +mxArray* ReadNumericVariableData(const std::shared_ptr<Array>& column) { + using MatlabType = typename MatlabTraits<ArrowDataType>::MatlabType; + using ArrowArrayType = typename TypeTraits<ArrowDataType>::ArrayType; + + const mxClassID matlab_class_id = MatlabTraits<ArrowDataType>::matlab_class_id; + // Allocate a numeric mxArray* with the correct mxClassID based on the type of the + // arrow::Array. + mxArray* variable_data = + mxCreateNumericMatrix(column->length(), 1, matlab_class_id, mxREAL); + + auto arrow_numeric_array = + std::static_pointer_cast<ArrowArrayType>(column); + + // Get a raw pointer to the Arrow array data. + const MatlabType* source = arrow_numeric_array->raw_values(); + + // Get a mutable pointer to the MATLAB array data and std::copy the + // Arrow array data into it. + MatlabType* destination = MatlabTraits<ArrowDataType>::GetData(variable_data); + std::copy(source, source + column->length(), destination); + + return variable_data; +} + +// Read the data of variable i from the Feather file as a mxArray*. +mxArray* ReadVariableData(const std::shared_ptr<Array>& column, + const std::string& column_name) { + std::shared_ptr<DataType> type = column->type(); + + switch (type->id()) { + case Type::FLOAT: + return ReadNumericVariableData<FloatType>(column); + case Type::DOUBLE: + return ReadNumericVariableData<DoubleType>(column); + case Type::UINT8: + return ReadNumericVariableData<UInt8Type>(column); + case Type::UINT16: + return ReadNumericVariableData<UInt16Type>(column); + case Type::UINT32: + return ReadNumericVariableData<UInt32Type>(column); + case Type::UINT64: + return ReadNumericVariableData<UInt64Type>(column); + case Type::INT8: + return ReadNumericVariableData<Int8Type>(column); + case Type::INT16: + return ReadNumericVariableData<Int16Type>(column); + case Type::INT32: + return ReadNumericVariableData<Int32Type>(column); + case Type::INT64: + return ReadNumericVariableData<Int64Type>(column); + default: { + mexErrMsgIdAndTxt("MATLAB:arrow:UnsupportedArrowType", + "Unsupported arrow::Type '%s' for variable '%s'", + type->name().c_str(), column_name.c_str()); + break; + } + } + + return nullptr; +} + +// arrow::Buffers are bit-packed, while mxLogical arrays aren't. This utility +// uses an Arrow utility to copy each bit of an arrow::Buffer into each byte +// of an mxLogical array. +void BitUnpackBuffer(const std::shared_ptr<Buffer>& source, int64_t length, + mxLogical* destination) { + const uint8_t* source_data = source->data(); + + // Call into an Arrow utility to visit each bit in the bitmap. + auto visitFcn = [&](mxLogical is_valid) { *destination++ = is_valid; }; + + const int64_t start_offset = 0; + arrow::internal::VisitBitsUnrolled(source_data, start_offset, length, visitFcn); +} + +// Populates the validity bitmap from an arrow::Array. +// writes to a zero-initialized destination buffer. +// Implements a fast path for the fully-valid and fully-invalid cases. +// Returns true if the destination buffer was successfully populated. +bool TryBitUnpackFastPath(const std::shared_ptr<Array>& array, mxLogical* destination) { + const int64_t null_count = array->null_count(); + const int64_t length = array->length(); + + if (null_count == length) { + // The source array is filled with invalid values. Since mxCreateLogicalMatrix + // zero-initializes the destination buffer, we can return without changing anything + // in the destination buffer. + return true; + } else if (null_count == 0) { + // The source array contains only valid values. Fill the destination buffer + // with 'true'. + std::fill(destination, destination + length, true); + return true; + } + + // Return false to indicate that we couldn't fill the entire validity bitmap. + return false; +} + +// Read the validity (null) bitmap of variable i from the Feather +// file as an mxArray*. +mxArray* ReadVariableValidityBitmap(const std::shared_ptr<Array>& column) { + // Allocate an mxLogical array to store the validity (null) bitmap values. + // Note: All Arrow arrays can have an associated validity (null) bitmap. + // The Apache Arrow specification defines 0 (false) to represent an + // invalid (null) array entry and 1 (true) to represent a valid + // (non-null) array entry. + mxArray* validity_bitmap = mxCreateLogicalMatrix(column->length(), 1); + mxLogical* validity_bitmap_unpacked = mxGetLogicals(validity_bitmap); + + if (!TryBitUnpackFastPath(column, validity_bitmap_unpacked)) { + // Couldn't fill the full validity bitmap at once. Call an optimized loop-unrolled + // implementation instead that goes byte-by-byte and populates the validity bitmap. + BitUnpackBuffer(column->null_bitmap(), column->length(), validity_bitmap_unpacked); + } + + return validity_bitmap; +} + +// Read the type name of an arrow::Array as an mxChar array. +mxArray* ReadVariableType(const std::shared_ptr<Array>& column) { + return util::ConvertUTF8StringToUTF16CharMatrix(column->type()->name()); +} + +// MATLAB arrays cannot be larger than 2^48 elements. +static constexpr uint64_t MAX_MATLAB_SIZE = static_cast<uint64_t>(0x01) << 48; + +} // namespace internal + +Status FeatherReader::Open(const std::string& filename, + std::shared_ptr<FeatherReader>* feather_reader) { + *feather_reader = std::shared_ptr<FeatherReader>(new FeatherReader()); + + // Open file with given filename as a ReadableFile. + ARROW_ASSIGN_OR_RAISE(auto readable_file, io::ReadableFile::Open(filename)); + + // Open the Feather file for reading with a TableReader. + ARROW_ASSIGN_OR_RAISE(auto reader, ipc::feather::Reader::Open(readable_file)); + + // Set the internal reader_ object. + (*feather_reader)->reader_ = reader; + + // Check the feather file version + auto version = reader->version(); + if (version == ipc::feather::kFeatherV2Version) { + return Status::NotImplemented("Support for Feather V2 has not been implemented."); + } else if (version != ipc::feather::kFeatherV1Version) { + return Status::Invalid("Unknown Feather format version."); + } + + // read the table metadata from the Feather file + (*feather_reader)->num_variables_ = reader->schema()->num_fields(); + return Status::OK(); +} + +// Read the table metadata from the Feather file as a mxArray*. +mxArray* FeatherReader::ReadMetadata() const { + const int32_t num_metadata_fields = 3; + const char* fieldnames[] = {"NumRows", "NumVariables", "Description"}; + + // Create a mxArray struct array containing the table metadata to be passed back to + // MATLAB. + mxArray* metadata = mxCreateStructMatrix(1, 1, num_metadata_fields, fieldnames); + + // Returning double values to MATLAB since that is the default type. + + // Set the number of rows. + mxSetField(metadata, 0, "NumRows", + mxCreateDoubleScalar(static_cast<double>(num_rows_))); + + // Set the number of variables. + mxSetField(metadata, 0, "NumVariables", + mxCreateDoubleScalar(static_cast<double>(num_variables_))); + + return metadata; +} + +// Read the table variables from the Feather file as a mxArray*. +mxArray* FeatherReader::ReadVariables() { + const int32_t num_variable_fields = 4; + const char* fieldnames[] = {"Name", "Type", "Data", "Valid"}; + + // Create an mxArray* struct array containing the table variables to be passed back to + // MATLAB. + mxArray* variables = + mxCreateStructMatrix(1, num_variables_, num_variable_fields, fieldnames); + + std::shared_ptr<arrow::Table> table; + auto status = reader_->Read(&table); + if (!status.ok()) { + mexErrMsgIdAndTxt("MATLAB:arrow:FeatherReader::FailedToReadTable", + "Failed to read arrow::Table from Feather file. Reason: %s", + status.message().c_str()); + } + + // Set the number of rows + num_rows_ = table->num_rows(); + + if (num_rows_ > internal::MAX_MATLAB_SIZE || + num_variables_ > internal::MAX_MATLAB_SIZE) { + mexErrMsgIdAndTxt("MATLAB:arrow:SizeTooLarge", + "The table size exceeds MATLAB limits: %u x %u", num_rows_, + num_variables_); + } + + auto column_names = table->ColumnNames(); + + for (int64_t i = 0; i < num_variables_; ++i) { + auto column = table->column(i); + if (column->num_chunks() != 1) { + mexErrMsgIdAndTxt("MATLAB:arrow:FeatherReader::ReadVariables", + "Chunked columns not yet supported"); + } + std::shared_ptr<Array> chunk = column->chunk(0); + const std::string column_name = column_names[i]; + + // set the struct fields data + mxSetField(variables, i, "Name", internal::ReadVariableName(column_name)); + mxSetField(variables, i, "Type", internal::ReadVariableType(chunk)); + mxSetField(variables, i, "Data", internal::ReadVariableData(chunk, column_name)); + mxSetField(variables, i, "Valid", internal::ReadVariableValidityBitmap(chunk)); + } + + return variables; +} + +} // namespace matlab +} // namespace arrow diff --git a/src/arrow/matlab/src/feather_reader.h b/src/arrow/matlab/src/feather_reader.h new file mode 100644 index 000000000..197e470bf --- /dev/null +++ b/src/arrow/matlab/src/feather_reader.h @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <memory> +#include <string> + +#include <arrow/ipc/feather.h> +#include <arrow/status.h> +#include <arrow/type.h> +#include <matrix.h> + +namespace arrow { +namespace matlab { + +class FeatherReader { + public: + ~FeatherReader() = default; + + /// \brief Read the table metadata as an mxArray* struct from the given + /// Feather file. + /// The returned mxArray* struct contains the following fields: + /// - "Description" :: Nx1 mxChar array, table-level description + /// - "NumRows" :: scalar mxDouble array, number of rows in the + /// table + /// - "NumVariables" :: scalar mxDouble array, number of variables in + /// the table + /// Clients are responsible for freeing the returned mxArray memory + /// when it is no longer needed, or passing it to MATLAB to be managed. + /// \return metadata mxArray* scalar struct containing table level metadata + mxArray* ReadMetadata() const; + + /// \brief Read the table variable data as an mxArray* struct array from the + /// given Feather file. + /// The returned mxArray* struct array has the following fields: + /// - "Name" :: Nx1 mxChar array, name of the variable + /// - "Type" :: Nx1 mxChar array, the variable's Arrow datatype + /// - "Data" :: Nx1 mxArray, data for the variable + /// - "Valid" :: Nx1 mxLogical array, validity (null) bitmap + /// Clients are responsible for freeing the returned mxArray memory + /// when it is no longer needed, or passing it to MATLAB to be managed. + /// \return variables mxArray* struct array containing table variable data + mxArray* ReadVariables(); + + /// \brief Initialize a FeatherReader object from a given Feather file. + /// \param[in] filename path to a Feather file + /// \param[out] feather_reader uninitialized FeatherReader object + static Status Open(const std::string& filename, + std::shared_ptr<FeatherReader>* feather_reader); + + private: + FeatherReader() = default; + std::shared_ptr<ipc::feather::Reader> reader_; + int64_t num_rows_; + int64_t num_variables_; + std::string description_; +}; + +} // namespace matlab +} // namespace arrow diff --git a/src/arrow/matlab/src/feather_writer.cc b/src/arrow/matlab/src/feather_writer.cc new file mode 100644 index 000000000..1a76ada19 --- /dev/null +++ b/src/arrow/matlab/src/feather_writer.cc @@ -0,0 +1,366 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <cmath> +#include <functional> /* for std::multiplies */ +#include <numeric> /* for std::accumulate */ + +#include "feather_writer.h" + +#include <arrow/array.h> +#include <arrow/buffer.h> +#include <arrow/io/file.h> +#include <arrow/ipc/feather.h> +#include <arrow/status.h> +#include <arrow/table.h> +#include <arrow/type.h> +#include <arrow/util/bit_util.h> +#include <arrow/util/bitmap_generate.h> +#include <arrow/util/key_value_metadata.h> +#include <mex.h> + +#include "matlab_traits.h" +#include "util/handle_status.h" + +namespace arrow { +namespace matlab { +namespace internal { + +// Returns the arrow::DataType that corresponds to the input type string +std::shared_ptr<arrow::DataType> ConvertMatlabTypeStringToArrowDataType( + const std::string& t) { + if (t == "double") { + return arrow::float64(); + } else if (t == "single") { + return arrow::float32(); + } else if (t == "uint64") { + return arrow::uint64(); + } else if (t == "uint32") { + return arrow::uint32(); + } else if (t == "uint16") { + return arrow::uint16(); + } else if (t == "uint8") { + return arrow::uint8(); + } else if (t == "int64") { + return arrow::int64(); + } else if (t == "int32") { + return arrow::int32(); + } else if (t == "int16") { + return arrow::int16(); + } else if (t == "int8") { + return arrow::int8(); + } + mexErrMsgIdAndTxt("MATLAB:arrow:UnsupportedMatlabTypeString", + "Unsupported MATLAB type string: '%s'", t.c_str()); + + // mexErrMsgIdAndTxt throws unconditionally so we should never reach this line + return nullptr; +} + +// Utility that helps verify the input mxArray struct field name and type. +// Returns void since any errors will throw and terminate MEX execution. +void ValidateMxStructField(const mxArray* struct_array, const char* fieldname, + mxClassID expected_class_id, bool can_be_empty) { + // Check that the input mxArray is a struct array. + if (!mxIsStruct(struct_array)) { + mexErrMsgIdAndTxt("MATLAB:arrow:IncorrectDimensionsOrType", + "Input needs to be a struct array"); + } + + // Return early if an empty table is provided as input. + if (mxIsEmpty(struct_array)) { + return; + } + + mxArray* field = mxGetField(struct_array, 0, fieldname); + + if (!field) { + mexErrMsgIdAndTxt("MATLAB:arrow:MissingStructField", + "Missing field '%s' in input struct array", fieldname); + } + + mxClassID actual_class_id = mxGetClassID(field); + + // Avoid type check if an mxUNKNOWN_CLASS is provided since the UNKNOWN type is used to + // signify genericity in the input type. + if (expected_class_id != mxUNKNOWN_CLASS) { + if (expected_class_id != actual_class_id) { + mexErrMsgIdAndTxt("MATLAB:arrow:MissingStructField", + "Incorrect type '%s' for struct array field '%s'", + mxGetClassName(field), fieldname); + } + } + // Some struct fields (like Data) can be empty, while others + // (like NumRows) should never be empty. This conditional helps account for both cases. + if (!can_be_empty) { + // Ensure that individual mxStructArray fields are non-empty. + // We can call mxGetData after this without needing another null check. + if (mxIsEmpty(field)) { + mexErrMsgIdAndTxt("MATLAB:arrow:EmptyStructField", + "Struct array field '%s' cannot be empty", fieldname); + } + } +} + +// Utility function to convert mxChar mxArray* to std::string while preserving +// Unicode code points. +std::string MxArrayToString(const mxArray* array) { + // Return empty std::string if a mxChar array is not passed in. + if (!mxIsChar(array)) { + return std::string(); + } + + // Convert mxArray first to a C-style char array, then copy into a std::string. + char* utf8_array = mxArrayToUTF8String(array); + std::string output(utf8_array); + + // Free the allocated char* from the MEX runtime. + mxFree(utf8_array); + + return output; +} + +// Compare number of columns and exit out to the MATLAB layer if incorrect. +void ValidateNumColumns(int64_t actual, int64_t expected) { + if (actual != expected) { + mexErrMsgIdAndTxt("MATLAB:arrow:IncorrectNumberOfColumns", + "Received only '%d' columns but expected '%d' columns", actual, + expected); + } +} + +// Compare number of rows and exit out to the MATLAB layer if incorrect. +void ValidateNumRows(int64_t actual, int64_t expected) { + if (actual != expected) { + mexErrMsgIdAndTxt("MATLAB:arrow:IncorrectNumberOfRows", + "Received only '%d' rows but expected '%d' rows", actual, expected); + } +} + +// Calculate the number of bytes required in the bit-packed validity buffer. +int64_t BitPackedLength(int64_t num_elements) { + // Since mxLogicalArray encodes [0, 1] in a full byte, we can compress that byte + // down to a bit...therefore dividing the mxLogicalArray length by 8 here. + return static_cast<int64_t>(std::ceil(num_elements / 8.0)); +} + +// Calculate the total number of elements in an mxArray +// We have to do this separately since mxGetNumberOfElements only works in numeric arrays +size_t GetNumberOfElements(const mxArray* array) { + // Get the dimensions and the total number of dimensions from the mxArray*. + const size_t num_dimensions = mxGetNumberOfDimensions(array); + const size_t* dimensions = mxGetDimensions(array); + + // Iterate over the dimensions array and accumulate the total number of elements. + return std::accumulate(dimensions, dimensions + num_dimensions, size_t{1}, + std::multiplies<size_t>()); +} + +// Write an mxLogicalArray* into a bit-packed arrow::MutableBuffer +void BitPackBuffer(const mxArray* logical_array, + std::shared_ptr<MutableBuffer> packed_buffer) { + // Error out if the incorrect type is passed in. + if (!mxIsLogical(logical_array)) { + mexErrMsgIdAndTxt( + "MATLAB:arrow:IncorrectType", + "Expected mxLogical array as input but received mxArray of class '%s'", + mxGetClassName(logical_array)); + } + + // Validate that the input arrow::Buffer has sufficient size to store a full bit-packed + // representation of the input mxLogicalArray + int64_t unpacked_buffer_length = GetNumberOfElements(logical_array); + if (BitPackedLength(unpacked_buffer_length) > packed_buffer->capacity()) { + mexErrMsgIdAndTxt("MATLAB:arrow:BufferSizeExceeded", + "Buffer of size %d bytes cannot store %d bytes of data", + packed_buffer->capacity(), BitPackedLength(unpacked_buffer_length)); + } + + // Get pointers to the internal uint8_t arrays behind arrow::Buffer and mxArray + uint8_t* packed_buffer_ptr = packed_buffer->mutable_data(); + mxLogical* unpacked_buffer_ptr = mxGetLogicals(logical_array); + + // Iterate over the mxLogical array and write bit-packed bools to the arrow::Buffer. + // Call into a loop-unrolled Arrow utility for better performance when bit-packing. + auto generator = [&]() -> bool { return *(unpacked_buffer_ptr++); }; + const int64_t start_offset = 0; + arrow::internal::GenerateBitsUnrolled(packed_buffer_ptr, start_offset, + unpacked_buffer_length, generator); +} + +// Write numeric datatypes to the Feather file. +template <typename ArrowDataType> +std::unique_ptr<Array> WriteNumericData(const mxArray* data, + const std::shared_ptr<Buffer> validity_bitmap) { + // Alias the type name for the underlying MATLAB type. + using MatlabType = typename MatlabTraits<ArrowDataType>::MatlabType; + + // Get a pointer to the underlying mxArray data. + // We need to (temporarily) cast away const here since the mxGet* functions do not + // accept a const input parameter for compatibility reasons. + const MatlabType* dt = MatlabTraits<ArrowDataType>::GetData(const_cast<mxArray*>(data)); + + // Construct an arrow::Buffer that points to the underlying mxArray without copying. + // - The lifetime of the mxArray buffer exceeds that of the arrow::Buffer here since + // MATLAB should only free this region on garbage-collection after the MEX function + // is executed. Therefore it is safe for arrow::Buffer to point to this location. + // - However arrow::Buffer must not free this region by itself, since that could cause + // segfaults if the input array is used later in MATLAB. + // - The Doxygen doc for arrow::Buffer's constructor implies that it is not an RAII + // type, so this should be safe from possible double-free here. + std::shared_ptr<Buffer> buffer = + std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(dt), + mxGetElementSize(data) * mxGetNumberOfElements(data)); + + // Construct arrow::NumericArray specialization using arrow::Buffer. + // Pass in nulls information...we could compute and provide the number of nulls here + // too, but passing -1 for now so that Arrow recomputes it if necessary. + return std::unique_ptr<Array>(new NumericArray<ArrowDataType>( + mxGetNumberOfElements(data), buffer, validity_bitmap, -1)); +} + +// Dispatch MATLAB column data to the correct arrow::Array converter. +std::unique_ptr<Array> WriteVariableData(const mxArray* data, const std::string& type, + const std::shared_ptr<Buffer> validity_bitmap) { + // Get the underlying type of the mxArray data. + const mxClassID mxclass = mxGetClassID(data); + + switch (mxclass) { + case mxSINGLE_CLASS: + return WriteNumericData<FloatType>(data, validity_bitmap); + case mxDOUBLE_CLASS: + return WriteNumericData<DoubleType>(data, validity_bitmap); + case mxUINT8_CLASS: + return WriteNumericData<UInt8Type>(data, validity_bitmap); + case mxUINT16_CLASS: + return WriteNumericData<UInt16Type>(data, validity_bitmap); + case mxUINT32_CLASS: + return WriteNumericData<UInt32Type>(data, validity_bitmap); + case mxUINT64_CLASS: + return WriteNumericData<UInt64Type>(data, validity_bitmap); + case mxINT8_CLASS: + return WriteNumericData<Int8Type>(data, validity_bitmap); + case mxINT16_CLASS: + return WriteNumericData<Int16Type>(data, validity_bitmap); + case mxINT32_CLASS: + return WriteNumericData<Int32Type>(data, validity_bitmap); + case mxINT64_CLASS: + return WriteNumericData<Int64Type>(data, validity_bitmap); + default: { + mexErrMsgIdAndTxt("MATLAB:arrow:UnsupportedArrowType", + "Unsupported arrow::Type '%s' for variable '%s'", + mxGetClassName(data), type.c_str()); + } + } + + // We shouldn't ever reach this branch, but if we do, return nullptr. + return nullptr; +} + +} // namespace internal + +Status FeatherWriter::Open(const std::string& filename, + std::shared_ptr<FeatherWriter>* feather_writer) { + // Allocate shared_ptr out parameter. + *feather_writer = std::shared_ptr<FeatherWriter>(new FeatherWriter()); + + // Open a FileOutputStream corresponding to the provided filename. + ARROW_ASSIGN_OR_RAISE((*feather_writer)->file_output_stream_, + io::FileOutputStream::Open(filename, &((*feather_writer)->file_output_stream_))); + return Status::OK(); +} + +// Write mxArrays from MATLAB into a Feather file. +Status FeatherWriter::WriteVariables(const mxArray* variables, const mxArray* metadata) { + // Verify that all required fieldnames are provided. + internal::ValidateMxStructField(variables, "Name", mxCHAR_CLASS, true); + internal::ValidateMxStructField(variables, "Type", mxCHAR_CLASS, false); + internal::ValidateMxStructField(variables, "Data", mxUNKNOWN_CLASS, true); + internal::ValidateMxStructField(variables, "Valid", mxLOGICAL_CLASS, true); + + // Verify that all required fieldnames are provided. + internal::ValidateMxStructField(metadata, "NumRows", mxDOUBLE_CLASS, false); + internal::ValidateMxStructField(metadata, "NumVariables", mxDOUBLE_CLASS, false); + + // Get the number of columns in the struct array. + size_t num_columns = internal::GetNumberOfElements(variables); + + // Get the NumRows field in the struct array and set on TableWriter. + num_rows_ = static_cast<int64_t>(mxGetScalar(mxGetField(metadata, 0, "NumRows"))); + // Get the total number of variables. This is checked later for consistency with + // the provided number of columns before finishing the file write. + num_variables_ = + static_cast<int64_t>(mxGetScalar(mxGetField(metadata, 0, "NumVariables"))); + + // Verify that we have all the columns required for writing + // Currently we need all columns to be passed in together in the WriteVariables method. + internal::ValidateNumColumns(static_cast<int64_t>(num_columns), num_variables_); + + arrow::SchemaBuilder schema_builder; + std::vector<std::shared_ptr<arrow::Array>> table_columns; + + const int64_t bitpacked_length = internal::BitPackedLength(num_rows_); + + // Iterate over the input columns and generate arrow arrays. + for (int idx = 0; idx < num_columns; ++idx) { + // Unwrap constituent mxArray*s from the mxStructArray*. This is safe since we + // already checked for existence and non-nullness of these types. + const mxArray* name = mxGetField(variables, idx, "Name"); + const mxArray* data = mxGetField(variables, idx, "Data"); + const mxArray* type = mxGetField(variables, idx, "Type"); + const mxArray* valid = mxGetField(variables, idx, "Valid"); + + // Convert column and type name to a std::string from mxArray*. + std::string name_str = internal::MxArrayToString(name); + std::string type_str = internal::MxArrayToString(type); + + auto datatype = internal::ConvertMatlabTypeStringToArrowDataType(type_str); + auto field = std::make_shared<arrow::Field>(name_str, datatype); + + ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ResizableBuffer> validity_bitmap, + arrow::AllocateResizableBuffer(internal::BitPackedLength(num_rows_))); + + // Populate bit-packed arrow::Buffer using validity data in the mxArray*. + internal::BitPackBuffer(valid, validity_bitmap); + + // Wrap mxArray data in an arrow::Array of the equivalent type. + auto array = + internal::WriteVariableData(data, type_str, validity_bitmap); + + // Verify that the arrow::Array has the right number of elements. + internal::ValidateNumRows(array->length(), num_rows_); + + // Append the field to the schema builder + RETURN_NOT_OK(schema_builder.AddField(field)); + + // Store the table column + table_columns.push_back(std::move(array)); + } + // Create the table schema + ARROW_ASSIGN_OR_RAISE(auto table_schema, schema_builder.Finish()); + + // Specify the feather file format version as V1 + arrow::ipc::feather::WriteProperties write_props; + write_props.version = arrow::ipc::feather::kFeatherV1Version; + + std::shared_ptr<arrow::Table> table = arrow::Table::Make(table_schema, table_columns); + // Write the Feather file metadata to the end of the file. + return ipc::feather::WriteTable(*table, file_output_stream_.get(), write_props); +} + +} // namespace matlab +} // namespace arrow diff --git a/src/arrow/matlab/src/feather_writer.h b/src/arrow/matlab/src/feather_writer.h new file mode 100644 index 000000000..a35b14343 --- /dev/null +++ b/src/arrow/matlab/src/feather_writer.h @@ -0,0 +1,68 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <memory> +#include <string> + +#include <arrow/ipc/feather.h> +#include <arrow/status.h> +#include <arrow/type.h> +#include <matrix.h> + +namespace arrow { +namespace matlab { + +class FeatherWriter { + public: + ~FeatherWriter() = default; + + /// \brief Write mxArrays to a Feather file. The first input must be a N-by-1 mxStruct + /// array with the following fields: + /// - "Name" :: Nx1 mxChar array, name of the column + /// - "Type" :: Nx1 mxChar array, the variable's MATLAB datatype + /// - "Data" :: Nx1 mxArray, data for this variable + /// - "Valid" :: Nx1 mxLogical array, 0 represents invalid (null) values and + /// 1 represents valid (non-null) values + /// The second input must be a scalar mxStruct with the following + /// fields: + /// - "NumRows" :: scalar mxDouble array, number of rows in table + /// - "NumVariables" :: scalar mxDouble array, total number of variables + /// \param[in] variables mxArray* struct array containing table variable data + /// \param[in] metadata mxArray* scalar struct containing table-level metadata + /// \return status + Status WriteVariables(const mxArray* variables, const mxArray* metadata); + + /// \brief Initialize a FeatherWriter object that writes to a Feather file + /// \param[in] filename path to the new Feather file + /// \param[out] feather_writer uninitialized FeatherWriter object + /// \return status + static Status Open(const std::string& filename, + std::shared_ptr<FeatherWriter>* feather_writer); + + private: + FeatherWriter() = default; + + int64_t num_rows_; + int64_t num_variables_; + std::string description_; + std::shared_ptr<arrow::io::OutputStream> file_output_stream_; +}; + +} // namespace matlab +} // namespace arrow diff --git a/src/arrow/matlab/src/featherread.m b/src/arrow/matlab/src/featherread.m new file mode 100644 index 000000000..31bc426b8 --- /dev/null +++ b/src/arrow/matlab/src/featherread.m @@ -0,0 +1,86 @@ +function t = featherread(filename) +%FEATHERREAD Create a table by reading from a Feather file. +% Use the FEATHERREAD function to create a table by reading +% column-oriented data from a Feather file. +% +% T = FEATHERREAD(FILENAME) creates a table by reading from the Feather +% file FILENAME. + +% Licensed to the Apache Software Foundation (ASF) under one +% or more contributor license agreements. See the NOTICE file +% distributed with this work for additional information +% regarding copyright ownership. The ASF licenses this file +% to you under the Apache License, Version 2.0 (the +% "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, +% software distributed under the License is distributed on an +% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +% KIND, either express or implied. See the License for the +% specific language governing permissions and limitations +% under the License. + +import mlarrow.util.*; + +% Validate input arguments. +narginchk(1, 1); +filename = convertStringsToChars(filename); +if ~ischar(filename) + error('MATLAB:arrow:InvalidFilenameDatatype', ... + 'Filename must be a character vector or string scalar.'); +end + +% FOPEN can be used to search for files without an extension on the MATLAB +% path. +fid = fopen(filename); +if fid ~= -1 + filename = fopen(fid); + fclose(fid); +else + error('MATLAB:arrow:UnableToOpenFile', ... + 'Unable to open file %s.', filename); +end + +% Read table variables and metadata from the given Feather file using +% libarrow. +[variables, metadata] = featherreadmex(filename); + +% Make valid MATLAB table variable names out of any of the +% Feather table column names that are not valid MATLAB table +% variable names. +[variableNames, variableDescriptions] = makeValidMATLABTableVariableNames({variables.Name}); + +% Iterate over each table variable, handling invalid (null) entries +% and invalid MATLAB table variable names appropriately. +% Note: All Arrow arrays can have an associated validity (null) bitmap. +% The Apache Arrow specification defines 0 (false) to represent an +% invalid (null) array entry and 1 (true) to represent a valid +% (non-null) array entry. +for ii = 1:length(variables) + if ~all(variables(ii).Valid) + switch variables(ii).Type + case {'uint8', 'uint16', 'uint32', 'uint64', 'int8', 'int16', 'int32', 'int64'} + % MATLAB does not support missing values for integer types, so + % cast to double and set missing values to NaN in this case. + variables(ii).Data = double(variables(ii).Data); + end + + % Set invalid (null) entries to the appropriate MATLAB missing value using + % logical indexing. + variables(ii).Data(~variables(ii).Valid) = missing; + end +end + +% Construct a MATLAB table from the Feather file data. +t = table(variables.Data, 'VariableNames', cellstr(variableNames)); + +% Store original Feather table column names in the table.Properties.VariableDescriptions +% property if they were modified to be valid MATLAB table variable names. +if ~isempty(variableDescriptions) + t.Properties.VariableDescriptions = cellstr(variableDescriptions); +end + +end diff --git a/src/arrow/matlab/src/featherreadmex.cc b/src/arrow/matlab/src/featherreadmex.cc new file mode 100644 index 000000000..b52b8a98f --- /dev/null +++ b/src/arrow/matlab/src/featherreadmex.cc @@ -0,0 +1,37 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <string> + +#include <mex.h> + +#include "feather_reader.h" +#include "util/handle_status.h" + +// MEX gateway function. This is the entry point for featherreadmex.cpp. +void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) { + const std::string filename{mxArrayToUTF8String(prhs[0])}; + + // Read the given Feather file into memory. + std::shared_ptr<arrow::matlab::FeatherReader> feather_reader{nullptr}; + arrow::matlab::util::HandleStatus( + arrow::matlab::FeatherReader::Open(filename, &feather_reader)); + + // Return the Feather file table variables and table metadata to MATLAB. + plhs[0] = feather_reader->ReadVariables(); + plhs[1] = feather_reader->ReadMetadata(); +} diff --git a/src/arrow/matlab/src/featherwrite.m b/src/arrow/matlab/src/featherwrite.m new file mode 100644 index 000000000..eeedf26d0 --- /dev/null +++ b/src/arrow/matlab/src/featherwrite.m @@ -0,0 +1,44 @@ +function featherwrite(filename, t) +%FEATHERWRITE Write a table to a Feather file. +% Use the FEATHERWRITE function to write a table to +% a Feather file as column-oriented data. +% +% FEATHERWRITE(FILENAME,T) writes the table T to a Feather +% file FILENAME as column-oriented data. + +% Licensed to the Apache Software Foundation (ASF) under one +% or more contributor license agreements. See the NOTICE file +% distributed with this work for additional information +% regarding copyright ownership. The ASF licenses this file +% to you under the Apache License, Version 2.0 (the +% "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, +% software distributed under the License is distributed on an +% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +% KIND, either express or implied. See the License for the +% specific language governing permissions and limitations +% under the License. + +import mlarrow.util.table2mlarrow; + +% Validate input arguments. +narginchk(2, 2); +filename = convertStringsToChars(filename); +if ~ischar(filename) + error('MATLAB:arrow:InvalidFilenameDatatype', ... + 'Filename must be a character vector or string scalar.'); +end +if ~istable(t) + error('MATLAB:arrow:InvalidInputTable', 't must be a table.'); +end + +[variables, metadata] = table2mlarrow(t); + +% Write the table to a Feather file. +featherwritemex(filename, variables, metadata); + +end diff --git a/src/arrow/matlab/src/featherwritemex.cc b/src/arrow/matlab/src/featherwritemex.cc new file mode 100644 index 000000000..d8f90baaf --- /dev/null +++ b/src/arrow/matlab/src/featherwritemex.cc @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <string> + +#include <mex.h> + +#include "feather_writer.h" +#include "util/handle_status.h" + +// MEX gateway function. This is the entry point for featherwritemex.cc. +void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) { + const std::string filename{mxArrayToUTF8String(prhs[0])}; + + // Open a Feather file at the provided file path for writing. + std::shared_ptr<arrow::matlab::FeatherWriter> feather_writer{nullptr}; + arrow::matlab::util::HandleStatus( + arrow::matlab::FeatherWriter::Open(filename, &feather_writer)); + + // Write the Feather file table variables and table metadata from MATLAB. + arrow::matlab::util::HandleStatus(feather_writer->WriteVariables(prhs[1], prhs[2])); +} diff --git a/src/arrow/matlab/src/matlab_traits.h b/src/arrow/matlab/src/matlab_traits.h new file mode 100644 index 000000000..a76539fa7 --- /dev/null +++ b/src/arrow/matlab/src/matlab_traits.h @@ -0,0 +1,103 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <arrow/type.h> + +#include <matrix.h> + +namespace arrow { +namespace matlab { + +/// \brief A type traits class mapping Arrow types to MATLAB types. +template <typename ArrowDataType> +struct MatlabTraits; + +template <> +struct MatlabTraits<FloatType> { + static constexpr mxClassID matlab_class_id = mxSINGLE_CLASS; + using MatlabType = mxSingle; + static MatlabType* GetData(mxArray* pa) { return mxGetSingles(pa); } +}; + +template <> +struct MatlabTraits<DoubleType> { + static constexpr mxClassID matlab_class_id = mxDOUBLE_CLASS; + using MatlabType = mxDouble; + static MatlabType* GetData(mxArray* pa) { return mxGetDoubles(pa); } +}; + +template <> +struct MatlabTraits<UInt8Type> { + static constexpr mxClassID matlab_class_id = mxUINT8_CLASS; + using MatlabType = mxUint8; + static MatlabType* GetData(mxArray* pa) { return mxGetUint8s(pa); } +}; + +template <> +struct MatlabTraits<UInt16Type> { + static constexpr mxClassID matlab_class_id = mxUINT16_CLASS; + using MatlabType = mxUint16; + static MatlabType* GetData(mxArray* pa) { return mxGetUint16s(pa); } +}; + +template <> +struct MatlabTraits<UInt32Type> { + static constexpr mxClassID matlab_class_id = mxUINT32_CLASS; + using MatlabType = mxUint32; + static MatlabType* GetData(mxArray* pa) { return mxGetUint32s(pa); } +}; + +template <> +struct MatlabTraits<UInt64Type> { + static constexpr mxClassID matlab_class_id = mxUINT64_CLASS; + using MatlabType = mxUint64; + static MatlabType* GetData(mxArray* pa) { return mxGetUint64s(pa); } +}; + +template <> +struct MatlabTraits<Int8Type> { + static constexpr mxClassID matlab_class_id = mxINT8_CLASS; + using MatlabType = mxInt8; + static MatlabType* GetData(mxArray* pa) { return mxGetInt8s(pa); } +}; + +template <> +struct MatlabTraits<Int16Type> { + static constexpr mxClassID matlab_class_id = mxINT16_CLASS; + using MatlabType = mxInt16; + static MatlabType* GetData(mxArray* pa) { return mxGetInt16s(pa); } +}; + +template <> +struct MatlabTraits<Int32Type> { + static constexpr mxClassID matlab_class_id = mxINT32_CLASS; + using MatlabType = mxInt32; + static MatlabType* GetData(mxArray* pa) { return mxGetInt32s(pa); } +}; + +template <> +struct MatlabTraits<Int64Type> { + static constexpr mxClassID matlab_class_id = mxINT64_CLASS; + using MatlabType = mxInt64; + static MatlabType* GetData(mxArray* pa) { return mxGetInt64s(pa); } +}; + +} // namespace matlab +} // namespace arrow + diff --git a/src/arrow/matlab/src/placeholder_test.cc b/src/arrow/matlab/src/placeholder_test.cc new file mode 100644 index 000000000..eef37e178 --- /dev/null +++ b/src/arrow/matlab/src/placeholder_test.cc @@ -0,0 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gtest/gtest.h> + +namespace arrow { +namespace matlab { +namespace test { +// TODO: Remove this placeholder test. +TEST(PlaceholderTestSuite, PlaceholderTestCase) { ASSERT_TRUE(true); } +} // namespace test +} // namespace matlab +} // namespace arrow diff --git a/src/arrow/matlab/src/util/handle_status.cc b/src/arrow/matlab/src/util/handle_status.cc new file mode 100644 index 000000000..f1c3b7f25 --- /dev/null +++ b/src/arrow/matlab/src/util/handle_status.cc @@ -0,0 +1,91 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <arrow/status.h> + +#include <mex.h> + +namespace arrow { +namespace matlab { +namespace util { + +void HandleStatus(const Status& status) { + const char* arrow_error_message = "Arrow error: %s"; + switch (status.code()) { + case StatusCode::OK: { + break; + } + case StatusCode::OutOfMemory: { + mexErrMsgIdAndTxt("MATLAB:arrow:status:OutOfMemory", arrow_error_message, + status.ToString().c_str()); + break; + } + case StatusCode::KeyError: { + mexErrMsgIdAndTxt("MATLAB:arrow:status:KeyError", arrow_error_message, + status.ToString().c_str()); + break; + } + case StatusCode::TypeError: { + mexErrMsgIdAndTxt("MATLAB:arrow:status:TypeError", arrow_error_message, + status.ToString().c_str()); + break; + } + case StatusCode::Invalid: { + mexErrMsgIdAndTxt("MATLAB:arrow:status:Invalid", arrow_error_message, + status.ToString().c_str()); + break; + } + case StatusCode::IOError: { + mexErrMsgIdAndTxt("MATLAB:arrow:status:IOError", arrow_error_message, + status.ToString().c_str()); + break; + } + case StatusCode::CapacityError: { + mexErrMsgIdAndTxt("MATLAB:arrow:status:CapacityError", arrow_error_message, + status.ToString().c_str()); + break; + } + case StatusCode::IndexError: { + mexErrMsgIdAndTxt("MATLAB:arrow:status:IndexError", arrow_error_message, + status.ToString().c_str()); + break; + } + case StatusCode::UnknownError: { + mexErrMsgIdAndTxt("MATLAB:arrow:status:UnknownError", arrow_error_message, + status.ToString().c_str()); + break; + } + case StatusCode::NotImplemented: { + mexErrMsgIdAndTxt("MATLAB:arrow:status:NotImplemented", arrow_error_message, + status.ToString().c_str()); + break; + } + case StatusCode::SerializationError: { + mexErrMsgIdAndTxt("MATLAB:arrow:status:SerializationError", arrow_error_message, + status.ToString().c_str()); + break; + } + default: { + mexErrMsgIdAndTxt("MATLAB:arrow:status:UnknownStatus", arrow_error_message, + "Unknown status"); + break; + } + } +} +} // namespace util +} // namespace matlab +} // namespace arrow diff --git a/src/arrow/matlab/src/util/handle_status.h b/src/arrow/matlab/src/util/handle_status.h new file mode 100644 index 000000000..7212114a1 --- /dev/null +++ b/src/arrow/matlab/src/util/handle_status.h @@ -0,0 +1,32 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <arrow/status.h> + +namespace arrow { +namespace matlab { +namespace util { +// Terminates execution and returns to the MATLAB prompt, +// displaying an error message if the given status +// indicates that an error has occurred. +void HandleStatus(const Status& status); +} // namespace util +} // namespace matlab +} // namespace arrow + diff --git a/src/arrow/matlab/src/util/unicode_conversion.cc b/src/arrow/matlab/src/util/unicode_conversion.cc new file mode 100644 index 000000000..01c2e4b94 --- /dev/null +++ b/src/arrow/matlab/src/util/unicode_conversion.cc @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <locale> /* for std::wstring_convert */ +#include <codecvt> /* for std::codecvt_utf8_utf16 */ + +#include "unicode_conversion.h" + +namespace arrow { +namespace matlab { +namespace util { + +mxArray* ConvertUTF8StringToUTF16CharMatrix(const std::string& utf8_string) { + // Get pointers to the start and end of the std::string data. + const char* string_start = utf8_string.c_str(); + const char* string_end = string_start + utf8_string.length(); + + // Due to this issue on MSVC: https://stackoverflow.com/q/32055357 we cannot + // directly use a destination type of char16_t. +#if _MSC_VER >= 1900 + using CharType = int16_t; +#else + using CharType = char16_t; +#endif + using ConverterType = std::codecvt_utf8_utf16<CharType>; + std::wstring_convert<ConverterType, CharType> code_converter{}; + + std::basic_string<CharType> utf16_string; + try { + utf16_string = code_converter.from_bytes(string_start, string_end); + } catch (...) { + // In the case that any error occurs, just try returning a string in the + // user's current locale instead. + return mxCreateString(string_start); + } + + // Store the converter UTF-16 string in a mxCharMatrix and return it. + const mwSize dimensions[2] = {1, utf16_string.size()}; + mxArray* character_matrix = mxCreateCharArray(2, dimensions); + mxChar* character_matrix_pointer = mxGetChars(character_matrix); + std::copy(utf16_string.data(), utf16_string.data() + utf16_string.size(), + character_matrix_pointer); + + return character_matrix; +} + +} // namespace util +} // namespace matlab +} // namespace arrow diff --git a/src/arrow/matlab/src/util/unicode_conversion.h b/src/arrow/matlab/src/util/unicode_conversion.h new file mode 100644 index 000000000..fa905cbf0 --- /dev/null +++ b/src/arrow/matlab/src/util/unicode_conversion.h @@ -0,0 +1,32 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <string> +#include <mex.h> + +namespace arrow { +namespace matlab { +namespace util { +// Converts a UTF-8 encoded std::string to a heap-allocated UTF-16 encoded +// mxCharArray. +mxArray* ConvertUTF8StringToUTF16CharMatrix(const std::string& utf8_string); +} // namespace util +} // namespace matlab +} // namespace arrow + diff --git a/src/arrow/matlab/test/tfeather.m b/src/arrow/matlab/test/tfeather.m new file mode 100755 index 000000000..625a3a525 --- /dev/null +++ b/src/arrow/matlab/test/tfeather.m @@ -0,0 +1,232 @@ +classdef tfeather < matlab.unittest.TestCase + % Tests for MATLAB featherread and featherwrite. + + % Licensed to the Apache Software Foundation (ASF) under one or more + % contributor license agreements. See the NOTICE file distributed with + % this work for additional information regarding copyright ownership. + % The ASF licenses this file to you under the Apache License, Version + % 2.0 (the "License"); you may not use this file except in compliance + % with the License. You may obtain a copy of the License at + % + % http://www.apache.org/licenses/LICENSE-2.0 + % + % Unless required by applicable law or agreed to in writing, software + % distributed under the License is distributed on an "AS IS" BASIS, + % WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + % implied. See the License for the specific language governing + % permissions and limitations under the License. + + methods(TestClassSetup) + + function addFeatherFunctionsToMATLABPath(testCase) + import matlab.unittest.fixtures.PathFixture + % Add Feather test utilities to the MATLAB path. + testCase.applyFixture(PathFixture('util')); + % Add featherread and featherwrite to the MATLAB path. + testCase.applyFixture(PathFixture(fullfile('..', 'src'))); + % featherreadmex must be on the MATLAB path. + testCase.assertTrue(~isempty(which('featherreadmex')), ... + '''featherreadmex'' must be on the MATLAB path. Use ''addpath'' to add folders to the MATLAB path.'); + % featherwritemex must be on the MATLAB path. + testCase.assertTrue(~isempty(which('featherwritemex')), ... + '''featherwritemex'' must be on to the MATLAB path. Use ''addpath'' to add folders to the MATLAB path.'); + end + + end + + methods(TestMethodSetup) + + function setupTempWorkingDirectory(testCase) + import matlab.unittest.fixtures.WorkingFolderFixture; + testCase.applyFixture(WorkingFolderFixture); + end + + end + + methods(Test) + + function NumericDatatypesNoNulls(testCase) + filename = fullfile(pwd, 'temp.feather'); + + actualTable = createTable; + expectedTable = featherRoundTrip(filename, actualTable); + testCase.verifyEqual(actualTable, expectedTable); + end + + function NumericDatatypesWithNaNRow(testCase) + filename = fullfile(pwd, 'temp.feather'); + + t = createTable; + + variableNames = {'single', ... + 'double', ... + 'int8', ... + 'int16', ... + 'int32', ... + 'int64', ... + 'uint8', ... + 'uint16', ... + 'uint32', ... + 'uint64'}; + variableTypes = repmat({'double'}, 10, 1)'; + numRows = 1; + numVariables = 10; + + addRow = table('Size', [numRows, numVariables], ... + 'VariableTypes', variableTypes, ... + 'VariableNames', variableNames); + addRow(1,:) = {NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN}; + actualTable = [t; addRow]; + expectedTable = featherRoundTrip(filename, actualTable); + testCase.verifyEqual(actualTable, expectedTable); + end + + function NumericDatatypesWithNaNColumns(testCase) + filename = fullfile(pwd, 'temp.feather'); + + actualTable = createTable; + actualTable.double = [NaN; NaN; NaN]; + actualTable.int64 = [NaN; NaN; NaN]; + + expectedTable = featherRoundTrip(filename, actualTable); + testCase.verifyEqual(actualTable, expectedTable); + end + + function NumericDatatypesWithExpInfSciNotation(testCase) + filename = fullfile(pwd, 'temp.feather'); + + actualTable = createTable; + actualTable.single(2) = 1.0418e+06; + + actualTable.double(1) = Inf; + actualTable.double(2) = exp(9); + + actualTable.int64(2) = 1.0418e+03; + + expectedTable = featherRoundTrip(filename, actualTable); + testCase.verifyEqual(actualTable, expectedTable); + end + + function IgnoreRowVarNames(testCase) + filename = fullfile(pwd, 'temp.feather'); + + actualTable = createTable; + time = {'day1', 'day2', 'day3'}; + actualTable.Properties.RowNames = time; + expectedTable = featherRoundTrip(filename, actualTable); + actualTable = createTable; + testCase.verifyEqual(actualTable, expectedTable); + end + + function NotFeatherExtension(testCase) + filename = fullfile(pwd, 'temp.txt'); + + actualTable = createTable; + expectedTable = featherRoundTrip(filename, actualTable); + testCase.verifyEqual(actualTable, expectedTable); + end + + function EmptyTable(testCase) + filename = fullfile(pwd, 'temp.feather'); + + actualTable = table; + expectedTable = featherRoundTrip(filename, actualTable); + testCase.verifyEqual(actualTable, expectedTable); + end + + function zeroByNTable(testCase) + filename = fullfile(pwd, 'temp.feather'); + + actualTable = createTable; + actualTable([1, 2], :) = []; + expectedTable = featherRoundTrip(filename, actualTable); + testCase.verifyEqual(actualTable, expectedTable); + end + + % %%%%%%%%%%%%%%%%%%% + % Negative test cases + % %%%%%%%%%%%%%%%%%%% + + function ErrorIfUnableToOpenFile(testCase) + filename = fullfile(pwd, 'temp.feather'); + + testCase.verifyError(@() featherread(filename), 'MATLAB:arrow:UnableToOpenFile'); + end + + function ErrorIfCorruptedFeatherFile(testCase) + filename = fullfile(pwd, 'temp.feather'); + + t = createTable; + featherwrite(filename, t); + + fileID = fopen(filename, 'w'); + fwrite(fileID, [1; 5]); + fclose(fileID); + + testCase.verifyError(@() featherread(filename), 'MATLAB:arrow:status:Invalid'); + end + + function ErrorIfInvalidFilenameDatatype(testCase) + filename = fullfile(pwd, 'temp.feather'); + + t = createTable; + + testCase.verifyError(@() featherwrite({filename}, t), 'MATLAB:arrow:InvalidFilenameDatatype'); + testCase.verifyError(@() featherread({filename}), 'MATLAB:arrow:InvalidFilenameDatatype'); + end + + function ErrorIfTooManyInputs(testCase) + filename = fullfile(pwd, 'temp.feather'); + + t = createTable; + + testCase.verifyError(@() featherwrite(filename, t, 'SomeValue', 'SomeOtherValue'), 'MATLAB:TooManyInputs'); + testCase.verifyError(@() featherread(filename, 'SomeValue', 'SomeOtherValue'), 'MATLAB:TooManyInputs'); + end + + function ErrorIfTooFewInputs(testCase) + testCase.verifyError(@() featherwrite(), 'MATLAB:narginchk:notEnoughInputs'); + testCase.verifyError(@() featherread(), 'MATLAB:narginchk:notEnoughInputs'); + end + + function ErrorIfMultiColVarExist(testCase) + filename = fullfile(pwd, 'temp.feather'); + + age = [38; 43; 38; 40; 49]; + smoker = logical([1; 0; 1; 0; 1]); + height = [71; 69; 64; 67; 64]; + weight = [176; 163; 131; 133; 119]; + bloodPressure = [124, 93; 109, 77; 125, 83; 117, 75; 122, 80]; + + t = table(age, smoker, height, weight, bloodPressure); + + testCase.verifyError(@() featherwrite(filename, t), 'MATLAB:arrow:UnsupportedVariableType'); + end + + function UnsupportedMATLABDatatypes(testCase) + filename = fullfile(pwd, 'temp.feather'); + + actualTable = createTable; + calendarDurationVariable = [calendarDuration(1, 7, 9); ... + calendarDuration(2, 1, 1); ... + calendarDuration(5, 3, 2)]; + actualTable = addvars(actualTable, calendarDurationVariable); + + testCase.verifyError(@() featherwrite(filename, actualTable) ,'MATLAB:arrow:UnsupportedVariableType'); + end + + function NumericComplexUnsupported(testCase) + filename = fullfile(pwd, 'temp.feather'); + + actualTable = createTable; + actualTable.single(1) = 1.0418 + 2i; + actualTable.double(2) = exp(9) + 5i; + actualTable.int64(2) = 1.0418e+03; + + expectedTable = featherRoundTrip(filename, actualTable); + testCase.verifyNotEqual(actualTable, expectedTable); + end + + end + +end diff --git a/src/arrow/matlab/test/tfeathermex.m b/src/arrow/matlab/test/tfeathermex.m new file mode 100644 index 000000000..77070ad14 --- /dev/null +++ b/src/arrow/matlab/test/tfeathermex.m @@ -0,0 +1,76 @@ +classdef tfeathermex < matlab.unittest.TestCase + % Tests for MATLAB featherreadmex and featherwritemex. + + % Licensed to the Apache Software Foundation (ASF) under one or more + % contributor license agreements. See the NOTICE file distributed with + % this work for additional information regarding copyright ownership. + % The ASF licenses this file to you under the Apache License, Version + % 2.0 (the "License"); you may not use this file except in compliance + % with the License. You may obtain a copy of the License at + % + % http://www.apache.org/licenses/LICENSE-2.0 + % + % Unless required by applicable law or agreed to in writing, software + % distributed under the License is distributed on an "AS IS" BASIS, + % WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + % implied. See the License for the specific language governing + % permissions and limitations under the License. + + methods(TestClassSetup) + + function addFeatherFunctionsToMATLABPath(testCase) + import matlab.unittest.fixtures.PathFixture + % Add Feather test utilities to the MATLAB path. + testCase.applyFixture(PathFixture('util')); + % Add featherread and featherwrite to the MATLAB path. + testCase.applyFixture(PathFixture(fullfile('..', 'src'))); + % featherreadmex must be on the MATLAB path. + testCase.assertTrue(~isempty(which('featherreadmex')), ... + '''featherreadmex'' must be on the MATLAB path. Use ''addpath'' to add folders to the MATLAB path.'); + % featherwritemex must be on the MATLAB path. + testCase.assertTrue(~isempty(which('featherwritemex')), ... + '''featherwritemex'' must be on to the MATLAB path. Use ''addpath'' to add folders to the MATLAB path.'); + end + + end + + methods(TestMethodSetup) + + function setupTempWorkingDirectory(testCase) + import matlab.unittest.fixtures.WorkingFolderFixture; + testCase.applyFixture(WorkingFolderFixture); + end + + end + + methods(Test) + + function NumericDatatypesNulls(testCase) + filename = fullfile(pwd, 'temp.feather'); + + [expectedVariables, expectedMetadata] = createVariablesAndMetadataStructs(); + [actualVariables, ~] = featherMEXRoundTrip(filename, expectedVariables, expectedMetadata); + testCase.verifyEqual([actualVariables.Valid], [expectedVariables.Valid]); + end + + function InvalidMATLABTableVariableNames(testCase) + filename = fullfile(pwd, 'temp.feather'); + + % Create a table with an invalid MATLAB table variable name. + invalidVariable = mlarrow.util.createVariableStruct('double', 1, true, '@'); + validVariable = mlarrow.util.createVariableStruct('double', 1, true, 'Valid'); + variables = [invalidVariable, validVariable]; + metadata = mlarrow.util.createMetadataStruct(1, 2); + featherwritemex(filename, variables, metadata); + t = featherread(filename); + + testCase.verifyEqual(t.Properties.VariableNames{1}, 'x_'); + testCase.verifyEqual(t.Properties.VariableNames{2}, 'Valid'); + + testCase.verifyEqual(t.Properties.VariableDescriptions{1}, 'Original variable name: ''@'''); + testCase.verifyEqual(t.Properties.VariableDescriptions{2}, ''); + end + + end + +end diff --git a/src/arrow/matlab/test/util/createTable.m b/src/arrow/matlab/test/util/createTable.m new file mode 100644 index 000000000..2bf67c68c --- /dev/null +++ b/src/arrow/matlab/test/util/createTable.m @@ -0,0 +1,68 @@ +function t = createTable() +% CREATETABLE Helper function for creating test table. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +variableNames = {'uint8', ... + 'uint16', ... + 'uint32', ... + 'uint64', ... + 'int8', ... + 'int16', ... + 'int32', ... + 'int64', ... + 'single', ... + 'double'}; + +variableTypes = {'uint8', ... + 'uint16', ... + 'uint32', ... + 'uint64', ... + 'int8', ... + 'int16', ... + 'int32', ... + 'int64', ... + 'single', ... + 'double'}; + +uint8Data = uint8([1; 2; 3]); +uint16Data = uint16([1; 2; 3]); +uint32Data = uint32([1; 2; 3]); +uint64Data = uint64([1; 2; 3]); +int8Data = int8([1; 2; 3]); +int16Data = int16([1; 2; 3]); +int32Data = int32([1; 2; 3]); +int64Data = int64([1; 2; 3]); +singleData = single([1/2; 1/4; 1/8]); +doubleData = double([1/10; 1/100; 1/1000]); + +numRows = 3; +numVariables = 10; + +t = table('Size', [numRows, numVariables], 'VariableTypes', variableTypes, 'VariableNames', variableNames); + +t.uint8 = uint8Data; +t.uint16 = uint16Data; +t.uint32 = uint32Data; +t.uint64 = uint64Data; +t.int8 = int8Data; +t.int16 = int16Data; +t.int32 = int32Data; +t.int64 = int64Data; +t.single = singleData; +t.double = doubleData; + +end
\ No newline at end of file diff --git a/src/arrow/matlab/test/util/createVariablesAndMetadataStructs.m b/src/arrow/matlab/test/util/createVariablesAndMetadataStructs.m new file mode 100644 index 000000000..0c60cbfbb --- /dev/null +++ b/src/arrow/matlab/test/util/createVariablesAndMetadataStructs.m @@ -0,0 +1,97 @@ +function [variables, metadata] = createVariablesAndMetadataStructs() +% CREATEVARIABLESANDMETADATASTRUCTS Helper function for creating +% Feather MEX variables and metadata structs. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +import mlarrow.util.*; + +type = 'uint8'; +data = uint8([1; 2; 3]); +valid = logical([0; 1; 0]); +name = 'uint8'; +uint8Variable = createVariableStruct(type, data, valid, name); + +type = 'uint16'; +data = uint16([1; 2; 3]); +valid = logical([0; 1; 0]); +name = 'uint16'; +uint16Variable = createVariableStruct(type, data, valid, name); + +type = 'uint32'; +data = uint32([1; 2; 3]); +valid = logical([0; 1; 0]); +name = 'uint32'; +uint32Variable = createVariableStruct(type, data, valid, name); + +type = 'uint64'; +data = uint64([1; 2; 3]); +valid = logical([0; 1; 0]); +name = 'uint64'; +uint64Variable = createVariableStruct(type, data, valid, name); + +type = 'int8'; +data = int8([1; 2; 3]); +valid = logical([0; 1; 0]); +name = 'int8'; +int8Variable = createVariableStruct(type, data, valid, name); + +type = 'int16'; +data = int16([1; 2; 3]); +valid = logical([0; 1; 0]); +name = 'int16'; +int16Variable = createVariableStruct(type, data, valid, name); + +type = 'int32'; +data = int32([1; 2; 3]); +valid = logical([0; 1; 0]); +name = 'int32'; +int32Variable = createVariableStruct(type, data, valid, name); + +type = 'int64'; +data = int64([1; 2; 3]); +valid = logical([0; 1; 0]); +name = 'int64'; +int64Variable = createVariableStruct(type, data, valid, name); + +type = 'single'; +data = single([1; 2; 3]); +valid = logical([0; 1; 0]); +name = 'single'; +singleVariable = createVariableStruct(type, data, valid, name); + +type = 'double'; +data = double([1; 2; 3]); +valid = logical([0; 1; 0]); +name = 'double'; +doubleVariable = createVariableStruct(type, data, valid, name); + +variables = [uint8Variable, ... + uint16Variable, ... + uint32Variable, ... + uint64Variable, ... + int8Variable, ... + int16Variable, ... + int32Variable, ... + int64Variable, ... + singleVariable, ... + doubleVariable]; + +numRows = 3; +numVariables = length(variables); + +metadata = createMetadataStruct(numRows, numVariables); +end diff --git a/src/arrow/matlab/test/util/featherMEXRoundTrip.m b/src/arrow/matlab/test/util/featherMEXRoundTrip.m new file mode 100644 index 000000000..49ab183ed --- /dev/null +++ b/src/arrow/matlab/test/util/featherMEXRoundTrip.m @@ -0,0 +1,22 @@ +function [variablesOut, metadataOut] = featherMEXRoundTrip(filename, variablesIn, metadataIn) +% FEATHERMEXROUNDTRIP Helper function for round tripping variables +% and metadata structs to a Feather file. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +featherwritemex(filename, variablesIn, metadataIn); +[variablesOut, metadataOut] = featherreadmex(filename); +end
\ No newline at end of file diff --git a/src/arrow/matlab/test/util/featherRoundTrip.m b/src/arrow/matlab/test/util/featherRoundTrip.m new file mode 100644 index 000000000..18f80562d --- /dev/null +++ b/src/arrow/matlab/test/util/featherRoundTrip.m @@ -0,0 +1,22 @@ +function tableOut = featherRoundTrip(filename, tableIn) +% FEATHERROUNDTRIP Helper function for round tripping a table +% to a Feather file. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +featherwrite(filename, tableIn); +tableOut = featherread(filename); +end
\ No newline at end of file |