diff options
Diffstat (limited to 'src/arrow/r/inst')
28 files changed, 4050 insertions, 0 deletions
diff --git a/src/arrow/r/inst/NOTICE.txt b/src/arrow/r/inst/NOTICE.txt new file mode 100644 index 000000000..a60979137 --- /dev/null +++ b/src/arrow/r/inst/NOTICE.txt @@ -0,0 +1,84 @@ +Apache Arrow +Copyright 2016-2019 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +This product includes software from the SFrame project (BSD, 3-clause). +* Copyright (C) 2015 Dato, Inc. +* Copyright (c) 2009 Carnegie Mellon University. + +This product includes software from the Feather project (Apache 2.0) +https://github.com/wesm/feather + +This product includes software from the DyND project (BSD 2-clause) +https://github.com/libdynd + +This product includes software from the LLVM project + * distributed under the University of Illinois Open Source + +This product includes software from the google-lint project + * Copyright (c) 2009 Google Inc. All rights reserved. + +This product includes software from the mman-win32 project + * Copyright https://code.google.com/p/mman-win32/ + * Licensed under the MIT License; + +This product includes software from the LevelDB project + * Copyright (c) 2011 The LevelDB Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * Moved from Kudu http://github.com/cloudera/kudu + +This product includes software from the CMake project + * Copyright 2001-2009 Kitware, Inc. + * Copyright 2012-2014 Continuum Analytics, Inc. + * All rights reserved. + +This product includes software from https://github.com/matthew-brett/multibuild (BSD 2-clause) + * Copyright (c) 2013-2016, Matt Terry and Matthew Brett; all rights reserved. + +This product includes software from the Ibis project (Apache 2.0) + * Copyright (c) 2015 Cloudera, Inc. + * https://github.com/cloudera/ibis + +This product includes software from Dremio (Apache 2.0) + * Copyright (C) 2017-2018 Dremio Corporation + * https://github.com/dremio/dremio-oss + +This product includes software from Google Guava (Apache 2.0) + * Copyright (C) 2007 The Guava Authors + * https://github.com/google/guava + +This product include software from CMake (BSD 3-Clause) + * CMake - Cross Platform Makefile Generator + * Copyright 2000-2019 Kitware, Inc. and Contributors + +The web site includes files generated by Jekyll. + +-------------------------------------------------------------------------------- + +This product includes code from Apache Kudu, which includes the following in +its NOTICE file: + + Apache Kudu + Copyright 2016 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + Portions of this software were developed at + Cloudera, Inc (http://www.cloudera.com/). + +-------------------------------------------------------------------------------- + +This product includes code from Apache ORC, which includes the following in +its NOTICE file: + + Apache ORC + Copyright 2013-2019 The Apache Software Foundation + + This product includes software developed by The Apache Software + Foundation (http://www.apache.org/). + + This product includes software developed by Hewlett-Packard: + (c) Copyright [2014-2015] Hewlett-Packard Development Company, L.P diff --git a/src/arrow/r/inst/build_arrow_static.sh b/src/arrow/r/inst/build_arrow_static.sh new file mode 100755 index 000000000..c424646e3 --- /dev/null +++ b/src/arrow/r/inst/build_arrow_static.sh @@ -0,0 +1,86 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Quit on failure +set -e + +# Print commands for debugging +set -x + +# By default, this script assumes it's in the top-level dir of the apache/arrow +# git repository. Set any of the following env vars to customize where to read +# and write from +: ${ARROW_HOME:="$(pwd)"} # Only used in default SOURCE/BUILD dirs +: ${SOURCE_DIR:="${ARROW_HOME}/cpp"} # Where the C++ source is +: ${BUILD_DIR:="${ARROW_HOME}/r/libarrow/dist"} # Where cmake should build +: ${DEST_DIR:="$BUILD_DIR"} # Where the resulting /lib and /include should be +: ${CMAKE:="$(which cmake)"} + +# Make sure SOURCE and DEST dirs are absolute and exist +SOURCE_DIR="$(cd "${SOURCE_DIR}" && pwd)" +DEST_DIR="$(mkdir -p "${DEST_DIR}" && cd "${DEST_DIR}" && pwd)" + +# Make some env vars case-insensitive +if [ "$LIBARROW_MINIMAL" != "" ]; then + LIBARROW_MINIMAL=`echo $LIBARROW_MINIMAL | tr '[:upper:]' '[:lower:]'` +fi + +if [ "$LIBARROW_MINIMAL" = "false" ]; then + ARROW_DEFAULT_PARAM="ON" +else + ARROW_DEFAULT_PARAM="OFF" +fi + +mkdir -p "${BUILD_DIR}" +pushd "${BUILD_DIR}" +${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \ + -DARROW_BUILD_TESTS=OFF \ + -DARROW_BUILD_SHARED=OFF \ + -DARROW_BUILD_STATIC=ON \ + -DARROW_COMPUTE=ON \ + -DARROW_CSV=ON \ + -DARROW_DATASET=${ARROW_DATASET:-ON} \ + -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-BUNDLED} \ + -DAWSSDK_SOURCE=${AWSSDK_SOURCE:-} \ + -DARROW_FILESYSTEM=ON \ + -DARROW_JEMALLOC=${ARROW_JEMALLOC:-$ARROW_DEFAULT_PARAM} \ + -DARROW_MIMALLOC=${ARROW_MIMALLOC:-ON} \ + -DARROW_JSON=${ARROW_JSON:-ON} \ + -DARROW_PARQUET=${ARROW_PARQUET:-ON} \ + -DARROW_S3=${ARROW_S3:-$ARROW_DEFAULT_PARAM} \ + -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI:-$ARROW_DEFAULT_PARAM} \ + -DARROW_WITH_BZ2=${ARROW_WITH_BZ2:-$ARROW_DEFAULT_PARAM} \ + -DARROW_WITH_LZ4=${ARROW_WITH_LZ4:-$ARROW_DEFAULT_PARAM} \ + -DARROW_WITH_RE2=${ARROW_WITH_RE2:-ON} \ + -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY:-$ARROW_DEFAULT_PARAM} \ + -DARROW_WITH_UTF8PROC=${ARROW_WITH_UTF8PROC:-ON} \ + -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB:-$ARROW_DEFAULT_PARAM} \ + -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD:-$ARROW_DEFAULT_PARAM} \ + -DARROW_VERBOSE_THIRDPARTY_BUILD=${ARROW_VERBOSE_THIRDPARTY_BUILD:-OFF} \ + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release} \ + -DCMAKE_INSTALL_LIBDIR=lib \ + -DCMAKE_INSTALL_PREFIX=${DEST_DIR} \ + -DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON \ + -DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON \ + -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-ON} \ + ${EXTRA_CMAKE_FLAGS} \ + -G ${CMAKE_GENERATOR:-"Unix Makefiles"} \ + ${SOURCE_DIR} +${CMAKE} --build . --target install +popd diff --git a/src/arrow/r/inst/demo_flight_server.py b/src/arrow/r/inst/demo_flight_server.py new file mode 100644 index 000000000..0c81aa912 --- /dev/null +++ b/src/arrow/r/inst/demo_flight_server.py @@ -0,0 +1,120 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" + An example Flight Python server. + See https://github.com/apache/arrow/blob/master/python/examples/flight/server.py +""" + +import ast +import threading +import time + +import pyarrow +import pyarrow.flight + + +class DemoFlightServer(pyarrow.flight.FlightServerBase): + def __init__(self, host="localhost", port=5005): + if isinstance(port, float): + # Because R is looser with integer vs. float + port = int(port) + location = "grpc+tcp://{}:{}".format(host, port) + super(DemoFlightServer, self).__init__(location) + self.flights = {} + self.host = host + + @classmethod + def descriptor_to_key(self, descriptor): + return (descriptor.descriptor_type.value, descriptor.command, + tuple(descriptor.path or tuple())) + + def _make_flight_info(self, key, descriptor, table): + location = pyarrow.flight.Location.for_grpc_tcp(self.host, self.port) + endpoints = [pyarrow.flight.FlightEndpoint(repr(key), [location]), ] + + mock_sink = pyarrow.MockOutputStream() + stream_writer = pyarrow.RecordBatchStreamWriter( + mock_sink, table.schema) + stream_writer.write_table(table) + stream_writer.close() + data_size = mock_sink.size() + + return pyarrow.flight.FlightInfo(table.schema, + descriptor, endpoints, + table.num_rows, data_size) + + def list_flights(self, context, criteria): + print("list_flights") + for key, table in self.flights.items(): + if key[1] is not None: + descriptor = \ + pyarrow.flight.FlightDescriptor.for_command(key[1]) + else: + descriptor = pyarrow.flight.FlightDescriptor.for_path(*key[2]) + + yield self._make_flight_info(key, descriptor, table) + + def get_flight_info(self, context, descriptor): + print("get_flight_info") + key = DemoFlightServer.descriptor_to_key(descriptor) + if key in self.flights: + table = self.flights[key] + return self._make_flight_info(key, descriptor, table) + raise KeyError('Flight not found.') + + def do_put(self, context, descriptor, reader, writer): + print("do_put") + key = DemoFlightServer.descriptor_to_key(descriptor) + print(key) + self.flights[key] = reader.read_all() + print(self.flights[key]) + + def do_get(self, context, ticket): + print("do_get") + key = ast.literal_eval(ticket.ticket.decode()) + if key not in self.flights: + return None + return pyarrow.flight.RecordBatchStream(self.flights[key]) + + def list_actions(self, context): + print("list_actions") + return [ + ("clear", "Clear the stored flights."), + ("shutdown", "Shut down this server."), + ] + + def do_action(self, context, action): + print("do_action") + if action.type == "clear": + raise NotImplementedError( + "{} is not implemented.".format(action.type)) + elif action.type == "healthcheck": + pass + elif action.type == "shutdown": + yield pyarrow.flight.Result(pyarrow.py_buffer(b'Shutdown!')) + # Shut down on background thread to avoid blocking current + # request + threading.Thread(target=self._shutdown).start() + else: + raise KeyError("Unknown action {!r}".format(action.type)) + + def _shutdown(self): + """Shut down after a delay.""" + print("Server is shutting down...") + time.sleep(2) + self.shutdown() diff --git a/src/arrow/r/inst/include/cpp11.hpp b/src/arrow/r/inst/include/cpp11.hpp new file mode 100644 index 000000000..737fbb80b --- /dev/null +++ b/src/arrow/r/inst/include/cpp11.hpp @@ -0,0 +1,26 @@ +// cpp11 version: 0.3.1.1 +// vendored on: 2021-08-11 +#pragma once + +#include "cpp11/R.hpp" +#include "cpp11/altrep.hpp" +#include "cpp11/as.hpp" +#include "cpp11/attribute_proxy.hpp" +#include "cpp11/data_frame.hpp" +#include "cpp11/doubles.hpp" +#include "cpp11/environment.hpp" +#include "cpp11/external_pointer.hpp" +#include "cpp11/function.hpp" +#include "cpp11/integers.hpp" +#include "cpp11/list.hpp" +#include "cpp11/list_of.hpp" +#include "cpp11/logicals.hpp" +#include "cpp11/matrix.hpp" +#include "cpp11/named_arg.hpp" +#include "cpp11/protect.hpp" +#include "cpp11/r_bool.hpp" +#include "cpp11/r_string.hpp" +#include "cpp11/r_vector.hpp" +#include "cpp11/raws.hpp" +#include "cpp11/sexp.hpp" +#include "cpp11/strings.hpp" diff --git a/src/arrow/r/inst/include/cpp11/R.hpp b/src/arrow/r/inst/include/cpp11/R.hpp new file mode 100644 index 000000000..f32dcd0b8 --- /dev/null +++ b/src/arrow/r/inst/include/cpp11/R.hpp @@ -0,0 +1,46 @@ +// cpp11 version: 0.3.1.1 +// vendored on: 2021-08-11 +#pragma once + +#ifdef R_INTERNALS_H_ +#if !(defined(R_NO_REMAP) && defined(STRICT_R_HEADERS)) +#error R headers were included before cpp11 headers \ + and at least one of R_NO_REMAP or STRICT_R_HEADERS \ + was not defined. +#endif +#endif + +#define R_NO_REMAP +#define STRICT_R_HEADERS +#include "Rinternals.h" + +// clang-format off +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wattributes" +#endif + +#ifdef __GNUC__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wattributes" +#endif +// clang-format on + +#include "cpp11/altrep.hpp" + +namespace cpp11 { +namespace literals { + +constexpr R_xlen_t operator"" _xl(unsigned long long int value) { return value; } + +} // namespace literals + +template <typename T> +inline T na(); + +template <typename T> +inline bool is_na(const T& value) { + return value == na<T>(); +} + +} // namespace cpp11 diff --git a/src/arrow/r/inst/include/cpp11/altrep.hpp b/src/arrow/r/inst/include/cpp11/altrep.hpp new file mode 100644 index 000000000..3d6e1172e --- /dev/null +++ b/src/arrow/r/inst/include/cpp11/altrep.hpp @@ -0,0 +1,44 @@ +// cpp11 version: 0.3.1.1 +// vendored on: 2021-08-11 +#pragma once + +#include "Rversion.h" + +#if defined(R_VERSION) && R_VERSION >= R_Version(3, 5, 0) +#define HAS_ALTREP +#endif + +#ifndef HAS_ALTREP + +#define ALTREP(x) false + +#define REAL_ELT(x, i) REAL(x)[i] +#define INTEGER_ELT(x, i) INTEGER(x)[i] +#define LOGICAL_ELT(x, i) LOGICAL(x)[i] +#define RAW_ELT(x, i) RAW(x)[i] + +#define SET_REAL_ELT(x, i, val) REAL(x)[i] = val +#define SET_INTEGER_ELT(x, i, val) INTEGER(x)[i] = val +#define SET_LOGICAL_ELT(x, i, val) LOGICAL(x)[i] = val +#define SET_RAW_ELT(x, i, val) RAW(x)[i] = val + +#define REAL_GET_REGION(...) \ + do { \ + } while (false) + +#define INTEGER_GET_REGION(...) \ + do { \ + } while (false) +#endif + +#if !defined HAS_ALTREP || (defined(R_VERSION) && R_VERSION < R_Version(3, 6, 0)) + +#define LOGICAL_GET_REGION(...) \ + do { \ + } while (false) + +#define RAW_GET_REGION(...) \ + do { \ + } while (false) + +#endif diff --git a/src/arrow/r/inst/include/cpp11/as.hpp b/src/arrow/r/inst/include/cpp11/as.hpp new file mode 100644 index 000000000..dd9641a16 --- /dev/null +++ b/src/arrow/r/inst/include/cpp11/as.hpp @@ -0,0 +1,337 @@ +// cpp11 version: 0.3.1.1 +// vendored on: 2021-08-11 +#pragma once + +#include <cmath> // for modf +#include <initializer_list> // for initializer_list +#include <memory> // for std::shared_ptr, std::weak_ptr, std::unique_ptr +#include <string> // for string, basic_string +#include <type_traits> // for decay, enable_if, is_same, is_convertible + +#include "cpp11/R.hpp" // for SEXP, SEXPREC, Rf_xlength, R_xlen_t +#include "cpp11/protect.hpp" // for stop, protect, safe, protect::function + +namespace cpp11 { + +template <bool C, typename R = void> +using enable_if_t = typename std::enable_if<C, R>::type; + +template <typename T> +using decay_t = typename std::decay<T>::type; + +template <typename T> +struct is_smart_ptr : std::false_type {}; + +template <typename T> +struct is_smart_ptr<std::shared_ptr<T>> : std::true_type {}; + +template <typename T> +struct is_smart_ptr<std::unique_ptr<T>> : std::true_type {}; + +template <typename T> +struct is_smart_ptr<std::weak_ptr<T>> : std::true_type {}; + +template <typename T, typename R = void> +using enable_if_constructible_from_sexp = + enable_if_t<!is_smart_ptr<T>::value && // workaround for gcc 4.8 + std::is_class<T>::value && std::is_constructible<T, SEXP>::value, + R>; + +template <typename T, typename R = void> +using enable_if_is_sexp = enable_if_t<std::is_same<T, SEXP>::value, R>; + +template <typename T, typename R = void> +using enable_if_convertible_to_sexp = enable_if_t<std::is_convertible<T, SEXP>::value, R>; + +template <typename T, typename R = void> +using disable_if_convertible_to_sexp = + enable_if_t<!std::is_convertible<T, SEXP>::value, R>; + +template <typename T, typename R = void> +using enable_if_integral = + enable_if_t<std::is_integral<T>::value && !std::is_same<T, bool>::value && + !std::is_same<T, char>::value, + R>; + +template <typename T, typename R = void> +using enable_if_floating_point = + typename std::enable_if<std::is_floating_point<T>::value, R>::type; + +template <typename E, typename R = void> +using enable_if_enum = enable_if_t<std::is_enum<E>::value, R>; + +template <typename T, typename R = void> +using enable_if_bool = enable_if_t<std::is_same<T, bool>::value, R>; + +template <typename T, typename R = void> +using enable_if_char = enable_if_t<std::is_same<T, char>::value, R>; + +template <typename T, typename R = void> +using enable_if_std_string = enable_if_t<std::is_same<T, std::string>::value, R>; + +template <typename T, typename R = void> +using enable_if_c_string = enable_if_t<std::is_same<T, const char*>::value, R>; + +// https://stackoverflow.com/a/1521682/2055486 +// +inline bool is_convertable_without_loss_to_integer(double value) { + double int_part; + return std::modf(value, &int_part) == 0.0; +} + +template <typename T> +enable_if_constructible_from_sexp<T, T> as_cpp(SEXP from) { + return T(from); +} + +template <typename T> +enable_if_is_sexp<T, T> as_cpp(SEXP from) { + return from; +} + +template <typename T> +enable_if_integral<T, T> as_cpp(SEXP from) { + if (Rf_isInteger(from)) { + if (Rf_xlength(from) == 1) { + return INTEGER_ELT(from, 0); + } + } else if (Rf_isReal(from)) { + if (Rf_xlength(from) == 1) { + if (ISNA(REAL_ELT(from, 0))) { + return NA_INTEGER; + } + double value = REAL_ELT(from, 0); + if (is_convertable_without_loss_to_integer(value)) { + return value; + } + } + } else if (Rf_isLogical(from)) { + if (Rf_xlength(from) == 1) { + if (LOGICAL_ELT(from, 0) == NA_LOGICAL) { + return NA_INTEGER; + } + } + } + + stop("Expected single integer value"); +} + +template <typename E> +enable_if_enum<E, E> as_cpp(SEXP from) { + if (Rf_isInteger(from)) { + using underlying_type = typename std::underlying_type<E>::type; + using int_type = typename std::conditional<std::is_same<char, underlying_type>::value, + int, // as_cpp<char> would trigger + // undesired string conversions + underlying_type>::type; + return static_cast<E>(as_cpp<int_type>(from)); + } + + stop("Expected single integer value"); +} + +template <typename T> +enable_if_bool<T, T> as_cpp(SEXP from) { + if (Rf_isLogical(from)) { + if (Rf_xlength(from) == 1) { + return LOGICAL_ELT(from, 0) == 1; + } + } + + stop("Expected single logical value"); +} + +template <typename T> +enable_if_floating_point<T, T> as_cpp(SEXP from) { + if (Rf_isReal(from)) { + if (Rf_xlength(from) == 1) { + return REAL_ELT(from, 0); + } + } + // All 32 bit integers can be coerced to doubles, so we just convert them. + if (Rf_isInteger(from)) { + if (Rf_xlength(from) == 1) { + if (INTEGER_ELT(from, 0) == NA_INTEGER) { + return NA_REAL; + } + return INTEGER_ELT(from, 0); + } + } + + // Also allow NA values + if (Rf_isLogical(from)) { + if (Rf_xlength(from) == 1) { + if (LOGICAL_ELT(from, 0) == NA_LOGICAL) { + return NA_REAL; + } + } + } + + stop("Expected single double value"); +} + +template <typename T> +enable_if_char<T, T> as_cpp(SEXP from) { + if (Rf_isString(from)) { + if (Rf_xlength(from) == 1) { + return unwind_protect([&] { return Rf_translateCharUTF8(STRING_ELT(from, 0))[0]; }); + } + } + + stop("Expected string vector of length 1"); +} + +template <typename T> +enable_if_c_string<T, T> as_cpp(SEXP from) { + if (Rf_isString(from)) { + if (Rf_xlength(from) == 1) { + // TODO: use vmaxget / vmaxset here? + return {unwind_protect([&] { return Rf_translateCharUTF8(STRING_ELT(from, 0)); })}; + } + } + + stop("Expected string vector of length 1"); +} + +template <typename T> +enable_if_std_string<T, T> as_cpp(SEXP from) { + return {as_cpp<const char*>(from)}; +} + +/// Temporary workaround for compatibility with cpp11 0.1.0 +template <typename T> +enable_if_t<!std::is_same<decay_t<T>, T>::value, decay_t<T>> as_cpp(SEXP from) { + return as_cpp<decay_t<T>>(from); +} + +template <typename T> +enable_if_integral<T, SEXP> as_sexp(T from) { + return safe[Rf_ScalarInteger](from); +} + +template <typename T> +enable_if_floating_point<T, SEXP> as_sexp(T from) { + return safe[Rf_ScalarReal](from); +} + +template <typename T> +enable_if_bool<T, SEXP> as_sexp(T from) { + return safe[Rf_ScalarLogical](from); +} + +template <typename T> +enable_if_c_string<T, SEXP> as_sexp(T from) { + return unwind_protect([&] { return Rf_ScalarString(Rf_mkCharCE(from, CE_UTF8)); }); +} + +template <typename T> +enable_if_std_string<T, SEXP> as_sexp(const T& from) { + return as_sexp(from.c_str()); +} + +template <typename Container, typename T = typename Container::value_type, + typename = disable_if_convertible_to_sexp<Container>> +enable_if_integral<T, SEXP> as_sexp(const Container& from) { + R_xlen_t size = from.size(); + SEXP data = safe[Rf_allocVector](INTSXP, size); + + auto it = from.begin(); + int* data_p = INTEGER(data); + for (R_xlen_t i = 0; i < size; ++i, ++it) { + data_p[i] = *it; + } + return data; +} + +inline SEXP as_sexp(std::initializer_list<int> from) { + return as_sexp<std::initializer_list<int>>(from); +} + +template <typename Container, typename T = typename Container::value_type, + typename = disable_if_convertible_to_sexp<Container>> +enable_if_floating_point<T, SEXP> as_sexp(const Container& from) { + R_xlen_t size = from.size(); + SEXP data = safe[Rf_allocVector](REALSXP, size); + + auto it = from.begin(); + double* data_p = REAL(data); + for (R_xlen_t i = 0; i < size; ++i, ++it) { + data_p[i] = *it; + } + return data; +} + +inline SEXP as_sexp(std::initializer_list<double> from) { + return as_sexp<std::initializer_list<double>>(from); +} + +template <typename Container, typename T = typename Container::value_type, + typename = disable_if_convertible_to_sexp<Container>> +enable_if_bool<T, SEXP> as_sexp(const Container& from) { + R_xlen_t size = from.size(); + SEXP data = safe[Rf_allocVector](LGLSXP, size); + + auto it = from.begin(); + int* data_p = LOGICAL(data); + for (R_xlen_t i = 0; i < size; ++i, ++it) { + data_p[i] = *it; + } + return data; +} + +inline SEXP as_sexp(std::initializer_list<bool> from) { + return as_sexp<std::initializer_list<bool>>(from); +} + +namespace detail { +template <typename Container, typename AsCstring> +SEXP as_sexp_strings(const Container& from, AsCstring&& c_str) { + R_xlen_t size = from.size(); + + SEXP data; + try { + data = PROTECT(safe[Rf_allocVector](STRSXP, size)); + + auto it = from.begin(); + for (R_xlen_t i = 0; i < size; ++i, ++it) { + SET_STRING_ELT(data, i, safe[Rf_mkCharCE](c_str(*it), CE_UTF8)); + } + } catch (const unwind_exception& e) { + UNPROTECT(1); + throw e; + } + + UNPROTECT(1); + return data; +} +} // namespace detail + +class r_string; + +template <typename T, typename R = void> +using disable_if_r_string = enable_if_t<!std::is_same<T, cpp11::r_string>::value, R>; + +template <typename Container, typename T = typename Container::value_type, + typename = disable_if_r_string<T>> +enable_if_t<std::is_convertible<T, std::string>::value && + !std::is_convertible<T, const char*>::value, + SEXP> +as_sexp(const Container& from) { + return detail::as_sexp_strings(from, [](const std::string& s) { return s.c_str(); }); +} + +template <typename Container, typename T = typename Container::value_type> +enable_if_c_string<T, SEXP> as_sexp(const Container& from) { + return detail::as_sexp_strings(from, [](const char* s) { return s; }); +} + +inline SEXP as_sexp(std::initializer_list<const char*> from) { + return as_sexp<std::initializer_list<const char*>>(from); +} + +template <typename T, typename = disable_if_r_string<T>> +enable_if_convertible_to_sexp<T, SEXP> as_sexp(const T& from) { + return from; +} + +} // namespace cpp11 diff --git a/src/arrow/r/inst/include/cpp11/attribute_proxy.hpp b/src/arrow/r/inst/include/cpp11/attribute_proxy.hpp new file mode 100644 index 000000000..7301919c7 --- /dev/null +++ b/src/arrow/r/inst/include/cpp11/attribute_proxy.hpp @@ -0,0 +1,50 @@ +// cpp11 version: 0.3.1.1 +// vendored on: 2021-08-11 +#pragma once + +#include <initializer_list> // for initializer_list +#include <string> // for string, basic_string + +#include "cpp11/R.hpp" // for SEXP, SEXPREC, Rf_install, PROTECT, Rf_... +#include "cpp11/as.hpp" // for as_sexp +#include "cpp11/protect.hpp" // for protect, safe, protect::function + +namespace cpp11 { + +class sexp; + +template <typename T> +class attribute_proxy { + private: + const T& parent_; + SEXP symbol_; + + public: + attribute_proxy(const T& parent, const char* index) + : parent_(parent), symbol_(safe[Rf_install](index)) {} + + attribute_proxy(const T& parent, const std::string& index) + : parent_(parent), symbol_(safe[Rf_install](index.c_str())) {} + + attribute_proxy(const T& parent, SEXP index) : parent_(parent), symbol_(index) {} + + template <typename C> + attribute_proxy& operator=(C rhs) { + SEXP value = PROTECT(as_sexp(rhs)); + Rf_setAttrib(parent_.data(), symbol_, value); + UNPROTECT(1); + return *this; + } + + template <typename C> + attribute_proxy& operator=(std::initializer_list<C> rhs) { + SEXP value = PROTECT(as_sexp(rhs)); + Rf_setAttrib(parent_.data(), symbol_, value); + UNPROTECT(1); + return *this; + } + + operator SEXP() const { return safe[Rf_getAttrib](parent_.data(), symbol_); } +}; + +} // namespace cpp11 diff --git a/src/arrow/r/inst/include/cpp11/data_frame.hpp b/src/arrow/r/inst/include/cpp11/data_frame.hpp new file mode 100644 index 000000000..9abbc0f33 --- /dev/null +++ b/src/arrow/r/inst/include/cpp11/data_frame.hpp @@ -0,0 +1,102 @@ +// cpp11 version: 0.3.1.1 +// vendored on: 2021-08-11 +#pragma once + +#include <cstdlib> // for abs +#include <cstdlib> +#include <initializer_list> // for initializer_list +#include <string> // for string, basic_string +#include <utility> // for move + +#include "R_ext/Arith.h" // for NA_INTEGER +#include "cpp11/R.hpp" // for Rf_xlength, SEXP, SEXPREC, INTEGER +#include "cpp11/attribute_proxy.hpp" // for attribute_proxy +#include "cpp11/list.hpp" // for list, r_vector<>::r_vector, r_v... +#include "cpp11/r_vector.hpp" // for r_vector + +namespace cpp11 { + +class named_arg; +namespace writable { +class data_frame; +} // namespace writable + +class data_frame : public list { + using list::list; + + friend class writable::data_frame; + + /* we cannot use Rf_getAttrib because it has a special case for c(NA, -n) and creates + * the full vector */ + static SEXP get_attrib0(SEXP x, SEXP sym) { + for (SEXP attr = ATTRIB(x); attr != R_NilValue; attr = CDR(attr)) { + if (TAG(attr) == sym) { + return CAR(attr); + } + } + + return R_NilValue; + } + + static int calc_nrow(SEXP x) { + auto nms = get_attrib0(x, R_RowNamesSymbol); + bool has_short_rownames = + (Rf_isInteger(nms) && Rf_xlength(nms) == 2 && INTEGER(nms)[0] == NA_INTEGER); + if (has_short_rownames) { + return abs(INTEGER(nms)[1]); + } + + if (!Rf_isNull(nms)) { + return Rf_xlength(nms); + } + + if (Rf_xlength(x) == 0) { + return 0; + } + + return Rf_xlength(VECTOR_ELT(x, 0)); + } + + public: + /* Adapted from + * https://github.com/wch/r-source/blob/f2a0dfab3e26fb42b8b296fcba40cbdbdbec767d/src/main/attrib.c#L198-L207 + */ + R_xlen_t nrow() const { return calc_nrow(*this); } + R_xlen_t ncol() const { return size(); } +}; + +namespace writable { +class data_frame : public cpp11::data_frame { + private: + writable::list set_data_frame_attributes(writable::list&& x) { + x.attr(R_RowNamesSymbol) = {NA_INTEGER, -static_cast<int>(calc_nrow(x))}; + x.attr(R_ClassSymbol) = "data.frame"; + return std::move(x); + } + + public: + data_frame(const SEXP data) : cpp11::data_frame(set_data_frame_attributes(data)) {} + data_frame(const SEXP data, bool is_altrep) + : cpp11::data_frame(set_data_frame_attributes(data), is_altrep) {} + data_frame(std::initializer_list<list> il) + : cpp11::data_frame(set_data_frame_attributes(writable::list(il))) {} + data_frame(std::initializer_list<named_arg> il) + : cpp11::data_frame(set_data_frame_attributes(writable::list(il))) {} + + using cpp11::data_frame::ncol; + using cpp11::data_frame::nrow; + + attribute_proxy<data_frame> attr(const char* name) const { return {*this, name}; } + + attribute_proxy<data_frame> attr(const std::string& name) const { + return {*this, name.c_str()}; + } + + attribute_proxy<data_frame> attr(SEXP name) const { return {*this, name}; } + + attribute_proxy<data_frame> names() const { return {*this, R_NamesSymbol}; } +}; + +} // namespace writable + +} // namespace cpp11 diff --git a/src/arrow/r/inst/include/cpp11/declarations.hpp b/src/arrow/r/inst/include/cpp11/declarations.hpp new file mode 100644 index 000000000..c67c9db1b --- /dev/null +++ b/src/arrow/r/inst/include/cpp11/declarations.hpp @@ -0,0 +1,54 @@ +// cpp11 version: 0.3.1.1 +// vendored on: 2021-08-11 +#pragma once + +#include <cstring> +#include <string> +#include <vector> + +#ifndef CPP11_PARTIAL +#include "cpp11.hpp" +using namespace cpp11; +namespace writable = cpp11::writable; +#endif + +#include <R_ext/Rdynload.h> + +namespace cpp11 { +template <class T> +T& unmove(T&& t) { + return t; +} +} // namespace cpp11 + +#ifdef HAS_UNWIND_PROTECT +#define CPP11_UNWIND R_ContinueUnwind(err); +#else +#define CPP11_UNWIND \ + do { \ + } while (false); +#endif + +#define CPP11_ERROR_BUFSIZE 8192 + +#define BEGIN_CPP11 \ + SEXP err = R_NilValue; \ + char buf[CPP11_ERROR_BUFSIZE] = ""; \ + try { +#define END_CPP11 \ + } \ + catch (cpp11::unwind_exception & e) { \ + err = e.token; \ + } \ + catch (std::exception & e) { \ + strncpy(buf, e.what(), sizeof(buf) - 1); \ + } \ + catch (...) { \ + strncpy(buf, "C++ error (unknown cause)", sizeof(buf) - 1); \ + } \ + if (buf[0] != '\0') { \ + Rf_errorcall(R_NilValue, "%s", buf); \ + } else if (err != R_NilValue) { \ + CPP11_UNWIND \ + } \ + return R_NilValue; diff --git a/src/arrow/r/inst/include/cpp11/doubles.hpp b/src/arrow/r/inst/include/cpp11/doubles.hpp new file mode 100644 index 000000000..a12f7c7c0 --- /dev/null +++ b/src/arrow/r/inst/include/cpp11/doubles.hpp @@ -0,0 +1,145 @@ +// cpp11 version: 0.3.1.1 +// vendored on: 2021-08-11 +#pragma once + +#include <algorithm> // for min +#include <array> // for array +#include <initializer_list> // for initializer_list + +#include "R_ext/Arith.h" // for ISNA +#include "cpp11/R.hpp" // for SEXP, SEXPREC, Rf_allocVector, REAL +#include "cpp11/as.hpp" // for as_sexp +#include "cpp11/named_arg.hpp" // for named_arg +#include "cpp11/protect.hpp" // for SEXP, SEXPREC, REAL_ELT, R_Preserve... +#include "cpp11/r_vector.hpp" // for vector, vector<>::proxy, vector<>::... +#include "cpp11/sexp.hpp" // for sexp + +// Specializations for doubles + +namespace cpp11 { + +template <> +inline SEXP r_vector<double>::valid_type(SEXP data) { + if (TYPEOF(data) != REALSXP) { + throw type_error(REALSXP, TYPEOF(data)); + } + return data; +} + +template <> +inline double r_vector<double>::operator[](const R_xlen_t pos) const { + // NOPROTECT: likely too costly to unwind protect every elt + return is_altrep_ ? REAL_ELT(data_, pos) : data_p_[pos]; +} + +template <> +inline double* r_vector<double>::get_p(bool is_altrep, SEXP data) { + if (is_altrep) { + return nullptr; + } else { + return REAL(data); + } +} + +template <> +inline void r_vector<double>::const_iterator::fill_buf(R_xlen_t pos) { + length_ = std::min(64_xl, data_->size() - pos); + REAL_GET_REGION(data_->data_, pos, length_, buf_.data()); + block_start_ = pos; +} + +typedef r_vector<double> doubles; + +namespace writable { + +template <> +inline typename r_vector<double>::proxy& r_vector<double>::proxy::operator=( + const double& rhs) { + if (is_altrep_) { + // NOPROTECT: likely too costly to unwind protect every set elt + SET_REAL_ELT(data_, index_, rhs); + } else { + *p_ = rhs; + } + return *this; +} + +template <> +inline r_vector<double>::proxy::operator double() const { + if (p_ == nullptr) { + // NOPROTECT: likely too costly to unwind protect every elt + return REAL_ELT(data_, index_); + } else { + return *p_; + } +} + +template <> +inline r_vector<double>::r_vector(std::initializer_list<double> il) + : cpp11::r_vector<double>(as_sexp(il)), capacity_(il.size()) {} + +template <> +inline r_vector<double>::r_vector(std::initializer_list<named_arg> il) + : cpp11::r_vector<double>(safe[Rf_allocVector](REALSXP, il.size())), + capacity_(il.size()) { + protect_ = preserved.insert(data_); + int n_protected = 0; + + try { + unwind_protect([&] { + Rf_setAttrib(data_, R_NamesSymbol, Rf_allocVector(STRSXP, capacity_)); + SEXP names = PROTECT(Rf_getAttrib(data_, R_NamesSymbol)); + ++n_protected; + auto it = il.begin(); + for (R_xlen_t i = 0; i < capacity_; ++i, ++it) { + data_p_[i] = REAL_ELT(it->value(), 0); + SET_STRING_ELT(names, i, Rf_mkCharCE(it->name(), CE_UTF8)); + } + UNPROTECT(n_protected); + }); + } catch (const unwind_exception& e) { + preserved.release(protect_); + UNPROTECT(n_protected); + throw e; + } +} + +template <> +inline void r_vector<double>::reserve(R_xlen_t new_capacity) { + data_ = data_ == R_NilValue ? safe[Rf_allocVector](REALSXP, new_capacity) + : safe[Rf_xlengthgets](data_, new_capacity); + SEXP old_protect = protect_; + protect_ = preserved.insert(data_); + preserved.release(old_protect); + + data_p_ = REAL(data_); + capacity_ = new_capacity; +} + +template <> +inline void r_vector<double>::push_back(double value) { + while (length_ >= capacity_) { + reserve(capacity_ == 0 ? 1 : capacity_ *= 2); + } + if (is_altrep_) { + SET_REAL_ELT(data_, length_, value); + } else { + data_p_[length_] = value; + } + ++length_; +} + +typedef r_vector<double> doubles; + +} // namespace writable + +template <> +inline double na() { + return NA_REAL; +} + +template <> +inline bool is_na(const double& x) { + return ISNA(x); +} +} // namespace cpp11 diff --git a/src/arrow/r/inst/include/cpp11/environment.hpp b/src/arrow/r/inst/include/cpp11/environment.hpp new file mode 100644 index 000000000..038fb60a8 --- /dev/null +++ b/src/arrow/r/inst/include/cpp11/environment.hpp @@ -0,0 +1,75 @@ +// cpp11 version: 0.3.1.1 +// vendored on: 2021-08-11 +#pragma once + +#include <string> // for string, basic_string + +#include "Rversion.h" // for R_VERSION, R_Version +#include "cpp11/R.hpp" // for SEXP, SEXPREC, Rf_install, Rf_findVarIn... +#include "cpp11/as.hpp" // for as_sexp +#include "cpp11/protect.hpp" // for protect, protect::function, safe, unwin... +#include "cpp11/sexp.hpp" // for sexp + +#if R_VERSION >= R_Version(4, 0, 0) +#define HAS_REMOVE_VAR_FROM_FRAME +#endif + +#ifndef HAS_REMOVE_VAR_FROM_FRAME +#include "cpp11/function.hpp" +#endif + +namespace cpp11 { + +class environment { + private: + sexp env_; + + class proxy { + SEXP parent_; + SEXP name_; + + public: + proxy(SEXP parent, SEXP name) : parent_(parent), name_(name) {} + + template <typename T> + proxy& operator=(T value) { + safe[Rf_defineVar](name_, as_sexp(value), parent_); + return *this; + } + operator SEXP() const { return safe[Rf_findVarInFrame3](parent_, name_, TRUE); }; + operator sexp() const { return SEXP(); }; + }; + + public: + environment(SEXP env) : env_(env) {} + proxy operator[](SEXP name) const { return {env_, name}; } + proxy operator[](const char* name) const { return operator[](safe[Rf_install](name)); } + proxy operator[](const std::string& name) const { return operator[](name.c_str()); } + + bool exists(SEXP name) const { + SEXP res = safe[Rf_findVarInFrame3](env_, name, FALSE); + return res != R_UnboundValue; + } + bool exists(const char* name) const { return exists(safe[Rf_install](name)); } + + bool exists(const std::string& name) const { return exists(name.c_str()); } + + void remove(SEXP name) { + PROTECT(name); +#ifdef HAS_REMOVE_VAR_FROM_FRAME + R_removeVarFromFrame(name, env_); +#else + auto remove = package("base")["remove"]; + remove(name, "envir"_nm = env_); +#endif + UNPROTECT(1); + } + + void remove(const char* name) { remove(safe[Rf_install](name)); } + + R_xlen_t size() const { return Rf_xlength(env_); } + + operator SEXP() const { return env_; } +}; + +} // namespace cpp11 diff --git a/src/arrow/r/inst/include/cpp11/external_pointer.hpp b/src/arrow/r/inst/include/cpp11/external_pointer.hpp new file mode 100644 index 000000000..059a1aa55 --- /dev/null +++ b/src/arrow/r/inst/include/cpp11/external_pointer.hpp @@ -0,0 +1,166 @@ +// cpp11 version: 0.3.1.1 +// vendored on: 2021-08-11 +#pragma once + +#include <cstddef> // for nullptr_t, NULL +#include <memory> // for bad_weak_ptr +#include <type_traits> // for add_lvalue_reference + +#include "cpp11/R.hpp" // for SEXP, SEXPREC, TYPEOF, R_NilValue, R_C... +#include "cpp11/protect.hpp" // for protect, safe, protect::function +#include "cpp11/r_bool.hpp" // for r_bool +#include "cpp11/r_vector.hpp" // for type_error +#include "cpp11/sexp.hpp" // for sexp + +namespace cpp11 { + +template <typename T> +void default_deleter(T* obj) { + delete obj; +} + +template <typename T, void Deleter(T*) = default_deleter<T>> +class external_pointer { + private: + sexp data_ = R_NilValue; + + static SEXP valid_type(SEXP data) { + if (TYPEOF(data) != EXTPTRSXP) { + throw type_error(EXTPTRSXP, TYPEOF(data)); + } + + return data; + } + + static void r_deleter(SEXP p) { + if (TYPEOF(p) != EXTPTRSXP) return; + + T* ptr = static_cast<T*>(R_ExternalPtrAddr(p)); + + if (ptr == NULL) { + return; + } + + R_ClearExternalPtr(p); + + Deleter(ptr); + } + + public: + using pointer = T*; + + external_pointer() noexcept {} + external_pointer(std::nullptr_t) noexcept {} + + external_pointer(SEXP data) : data_(valid_type(data)) {} + + external_pointer(pointer p, bool use_deleter = true, bool finalize_on_exit = true) + : data_(safe[R_MakeExternalPtr]((void*)p, R_NilValue, R_NilValue)) { + if (use_deleter) { + R_RegisterCFinalizerEx(data_, r_deleter, static_cast<r_bool>(finalize_on_exit)); + } + } + + external_pointer(const external_pointer& rhs) { + data_ = safe[Rf_shallow_duplicate](rhs.data_); + } + + external_pointer(external_pointer&& rhs) { reset(rhs.release()); } + + external_pointer& operator=(external_pointer&& rhs) noexcept { reset(rhs.release()); } + + external_pointer& operator=(std::nullptr_t) noexcept { reset(); }; + + operator SEXP() const noexcept { return data_; } + + pointer get() const noexcept { + pointer addr = static_cast<T*>(R_ExternalPtrAddr(data_)); + if (addr == nullptr) { + return nullptr; + } + return addr; + } + + typename std::add_lvalue_reference<T>::type operator*() { + pointer addr = get(); + if (addr == nullptr) { + throw std::bad_weak_ptr(); + } + return *get(); + } + + pointer operator->() const { + pointer addr = get(); + if (addr == nullptr) { + throw std::bad_weak_ptr(); + } + return get(); + } + + pointer release() noexcept { + if (get() == nullptr) { + return nullptr; + } + pointer ptr = get(); + R_ClearExternalPtr(data_); + + return ptr; + } + + void reset(pointer ptr = pointer()) { + SEXP old_data = data_; + data_ = safe[R_MakeExternalPtr]((void*)ptr, R_NilValue, R_NilValue); + r_deleter(old_data); + } + + void swap(external_pointer& other) noexcept { + SEXP tmp = other.data_; + other.data_ = data_; + data_ = tmp; + } + + operator bool() noexcept { return data_ != nullptr; } +}; + +template <class T, void Deleter(T*)> +void swap(external_pointer<T, Deleter>& lhs, external_pointer<T, Deleter>& rhs) noexcept { + lhs.swap(rhs); +} + +template <class T, void Deleter(T*)> +bool operator==(const external_pointer<T, Deleter>& x, + const external_pointer<T, Deleter>& y) { + return x.data_ == y.data_; +} + +template <class T, void Deleter(T*)> +bool operator!=(const external_pointer<T, Deleter>& x, + const external_pointer<T, Deleter>& y) { + return x.data_ != y.data_; +} + +template <class T, void Deleter(T*)> +bool operator<(const external_pointer<T, Deleter>& x, + const external_pointer<T, Deleter>& y) { + return x.data_ < y.data_; +} + +template <class T, void Deleter(T*)> +bool operator<=(const external_pointer<T, Deleter>& x, + const external_pointer<T, Deleter>& y) { + return x.data_ <= y.data_; +} + +template <class T, void Deleter(T*)> +bool operator>(const external_pointer<T, Deleter>& x, + const external_pointer<T, Deleter>& y) { + return x.data_ > y.data_; +} + +template <class T, void Deleter(T*)> +bool operator>=(const external_pointer<T, Deleter>& x, + const external_pointer<T, Deleter>& y) { + return x.data_ >= y.data_; +} + +} // namespace cpp11 diff --git a/src/arrow/r/inst/include/cpp11/function.hpp b/src/arrow/r/inst/include/cpp11/function.hpp new file mode 100644 index 000000000..06e602ac6 --- /dev/null +++ b/src/arrow/r/inst/include/cpp11/function.hpp @@ -0,0 +1,78 @@ +// cpp11 version: 0.3.1.1 +// vendored on: 2021-08-11 +#pragma once + +#include <string.h> // for strcmp + +#include <string> // for string, basic_string +#include <utility> // for forward + +#include "cpp11/R.hpp" // for SEXP, SEXPREC, CDR, Rf_install, SETCAR +#include "cpp11/as.hpp" // for as_sexp +#include "cpp11/named_arg.hpp" // for named_arg +#include "cpp11/protect.hpp" // for protect, protect::function, safe +#include "cpp11/sexp.hpp" // for sexp + +namespace cpp11 { + +class function { + public: + function(SEXP data) : data_(data) {} + + template <typename... Args> + sexp operator()(Args&&... args) const { + // Size of the arguments plus one for the function name itself + R_xlen_t num_args = sizeof...(args) + 1; + + sexp call(safe[Rf_allocVector](LANGSXP, num_args)); + + construct_call(call, data_, std::forward<Args>(args)...); + + return safe[Rf_eval](call, R_GlobalEnv); + } + + private: + SEXP data_; + + template <typename... Args> + SEXP construct_call(SEXP val, const named_arg& arg, Args&&... args) const { + SETCAR(val, arg.value()); + SET_TAG(val, safe[Rf_install](arg.name())); + val = CDR(val); + return construct_call(val, std::forward<Args>(args)...); + } + + // Construct the call recursively, each iteration adds an Arg to the pairlist. + // We need + template <typename T, typename... Args> + SEXP construct_call(SEXP val, const T& arg, Args&&... args) const { + SETCAR(val, as_sexp(arg)); + val = CDR(val); + return construct_call(val, std::forward<Args>(args)...); + } + + // Base case, just return + SEXP construct_call(SEXP val) const { return val; } +}; + +class package { + public: + package(const char* name) : data_(get_namespace(name)) {} + package(const std::string& name) : data_(get_namespace(name.c_str())) {} + function operator[](const char* name) { + return safe[Rf_findFun](safe[Rf_install](name), data_); + } + function operator[](const std::string& name) { return operator[](name.c_str()); } + + private: + static SEXP get_namespace(const char* name) { + if (strcmp(name, "base") == 0) { + return R_BaseEnv; + } + sexp name_sexp = safe[Rf_install](name); + return safe[Rf_findVarInFrame](R_NamespaceRegistry, name_sexp); + } + + SEXP data_; +}; +} // namespace cpp11 diff --git a/src/arrow/r/inst/include/cpp11/integers.hpp b/src/arrow/r/inst/include/cpp11/integers.hpp new file mode 100644 index 000000000..19f85c060 --- /dev/null +++ b/src/arrow/r/inst/include/cpp11/integers.hpp @@ -0,0 +1,146 @@ +// cpp11 version: 0.3.1.1 +// vendored on: 2021-08-11 +#pragma once + +#include <algorithm> // for min +#include <array> // for array +#include <initializer_list> // for initializer_list + +#include "R_ext/Arith.h" // for NA_INTEGER +#include "cpp11/R.hpp" // for SEXP, SEXPREC, Rf_allocVector +#include "cpp11/as.hpp" // for as_sexp +#include "cpp11/attribute_proxy.hpp" // for attribute_proxy +#include "cpp11/named_arg.hpp" // for named_arg +#include "cpp11/protect.hpp" // for preserved +#include "cpp11/r_vector.hpp" // for r_vector, r_vector<>::proxy +#include "cpp11/sexp.hpp" // for sexp + +// Specializations for integers + +namespace cpp11 { + +template <> +inline SEXP r_vector<int>::valid_type(SEXP data) { + if (TYPEOF(data) != INTSXP) { + throw type_error(INTSXP, TYPEOF(data)); + } + return data; +} + +template <> +inline int r_vector<int>::operator[](const R_xlen_t pos) const { + // NOPROTECT: likely too costly to unwind protect every elt + return is_altrep_ ? INTEGER_ELT(data_, pos) : data_p_[pos]; +} + +template <> +inline int* r_vector<int>::get_p(bool is_altrep, SEXP data) { + if (is_altrep) { + return nullptr; + } else { + return INTEGER(data); + } +} + +template <> +inline void r_vector<int>::const_iterator::fill_buf(R_xlen_t pos) { + length_ = std::min(64_xl, data_->size() - pos); + INTEGER_GET_REGION(data_->data_, pos, length_, buf_.data()); + block_start_ = pos; +} + +typedef r_vector<int> integers; + +namespace writable { + +template <> +inline typename r_vector<int>::proxy& r_vector<int>::proxy::operator=(const int& rhs) { + if (is_altrep_) { + // NOPROTECT: likely too costly to unwind protect every set elt + SET_INTEGER_ELT(data_, index_, rhs); + } else { + *p_ = rhs; + } + return *this; +} + +template <> +inline r_vector<int>::proxy::operator int() const { + if (p_ == nullptr) { + // NOPROTECT: likely too costly to unwind protect every elt + return INTEGER_ELT(data_, index_); + } else { + return *p_; + } +} + +template <> +inline r_vector<int>::r_vector(std::initializer_list<int> il) + : cpp11::r_vector<int>(as_sexp(il)), capacity_(il.size()) {} + +template <> +inline void r_vector<int>::reserve(R_xlen_t new_capacity) { + data_ = data_ == R_NilValue ? safe[Rf_allocVector](INTSXP, new_capacity) + : safe[Rf_xlengthgets](data_, new_capacity); + SEXP old_protect = protect_; + + // Protect the new data + protect_ = preserved.insert(data_); + + // Release the old protection; + preserved.release(old_protect); + + data_p_ = INTEGER(data_); + capacity_ = new_capacity; +} + +template <> +inline r_vector<int>::r_vector(std::initializer_list<named_arg> il) + : cpp11::r_vector<int>(safe[Rf_allocVector](INTSXP, il.size())), + capacity_(il.size()) { + protect_ = preserved.insert(data_); + int n_protected = 0; + + try { + unwind_protect([&] { + Rf_setAttrib(data_, R_NamesSymbol, Rf_allocVector(STRSXP, capacity_)); + SEXP names = PROTECT(Rf_getAttrib(data_, R_NamesSymbol)); + ++n_protected; + auto it = il.begin(); + for (R_xlen_t i = 0; i < capacity_; ++i, ++it) { + data_p_[i] = INTEGER_ELT(it->value(), 0); + SET_STRING_ELT(names, i, Rf_mkCharCE(it->name(), CE_UTF8)); + } + UNPROTECT(n_protected); + }); + } catch (const unwind_exception& e) { + preserved.release(protect_); + UNPROTECT(n_protected); + throw e; + } +} + +template <> +inline void r_vector<int>::push_back(int value) { + while (length_ >= capacity_) { + reserve(capacity_ == 0 ? 1 : capacity_ *= 2); + } + if (is_altrep_) { + // NOPROTECT: likely too costly to unwind protect every elt + SET_INTEGER_ELT(data_, length_, value); + } else { + data_p_[length_] = value; + } + ++length_; +} + +typedef r_vector<int> integers; + +} // namespace writable + +template <> +inline int na() { + return NA_INTEGER; +} + +} // namespace cpp11 diff --git a/src/arrow/r/inst/include/cpp11/list.hpp b/src/arrow/r/inst/include/cpp11/list.hpp new file mode 100644 index 000000000..28140fe2f --- /dev/null +++ b/src/arrow/r/inst/include/cpp11/list.hpp @@ -0,0 +1,138 @@ +// cpp11 version: 0.3.1.1 +// vendored on: 2021-08-11 +#pragma once + +#include <initializer_list> // for initializer_list + +#include "cpp11/R.hpp" // for SEXP, SEXPREC, SET_VECTOR_ELT +#include "cpp11/attribute_proxy.hpp" // for attribute_proxy +#include "cpp11/named_arg.hpp" // for named_arg +#include "cpp11/protect.hpp" // for preserved +#include "cpp11/r_string.hpp" // for r_string +#include "cpp11/r_vector.hpp" // for r_vector, r_vector<>::proxy +#include "cpp11/sexp.hpp" // for sexp + +// Specializations for list + +namespace cpp11 { + +template <> +inline SEXP r_vector<SEXP>::valid_type(SEXP data) { + if (TYPEOF(data) != VECSXP) { + throw type_error(VECSXP, TYPEOF(data)); + } + return data; +} + +template <> +inline SEXP r_vector<SEXP>::operator[](const R_xlen_t pos) const { + return VECTOR_ELT(data_, pos); +} + +template <> +inline SEXP r_vector<SEXP>::operator[](const r_string& name) const { + SEXP names = this->names(); + R_xlen_t size = Rf_xlength(names); + + for (R_xlen_t pos = 0; pos < size; ++pos) { + auto cur = Rf_translateCharUTF8(STRING_ELT(names, pos)); + if (name == cur) { + return operator[](pos); + } + } + return R_NilValue; +} + +template <> +inline SEXP* r_vector<SEXP>::get_p(bool, SEXP) { + return nullptr; +} + +template <> +inline void r_vector<SEXP>::const_iterator::fill_buf(R_xlen_t) { + return; +} + +template <> +inline SEXP r_vector<SEXP>::const_iterator::operator*() const { + return VECTOR_ELT(data_->data(), pos_); +} + +typedef r_vector<SEXP> list; + +namespace writable { + +template <> +inline typename r_vector<SEXP>::proxy& r_vector<SEXP>::proxy::operator=(const SEXP& rhs) { + SET_VECTOR_ELT(data_, index_, rhs); + return *this; +} + +template <> +inline r_vector<SEXP>::proxy::operator SEXP() const { + return VECTOR_ELT(data_, index_); +} + +template <> +inline r_vector<SEXP>::r_vector(std::initializer_list<SEXP> il) + : cpp11::r_vector<SEXP>(safe[Rf_allocVector](VECSXP, il.size())), + capacity_(il.size()) { + protect_ = preserved.insert(data_); + auto it = il.begin(); + for (R_xlen_t i = 0; i < capacity_; ++i, ++it) { + SET_VECTOR_ELT(data_, i, *it); + } +} + +template <> +inline r_vector<SEXP>::r_vector(std::initializer_list<named_arg> il) + : cpp11::r_vector<SEXP>(safe[Rf_allocVector](VECSXP, il.size())), + capacity_(il.size()) { + protect_ = preserved.insert(data_); + int n_protected = 0; + + try { + unwind_protect([&] { + Rf_setAttrib(data_, R_NamesSymbol, Rf_allocVector(STRSXP, capacity_)); + SEXP names = PROTECT(Rf_getAttrib(data_, R_NamesSymbol)); + ++n_protected; + auto it = il.begin(); + for (R_xlen_t i = 0; i < capacity_; ++i, ++it) { + SET_VECTOR_ELT(data_, i, it->value()); + SET_STRING_ELT(names, i, Rf_mkCharCE(it->name(), CE_UTF8)); + } + UNPROTECT(n_protected); + }); + } catch (const unwind_exception& e) { + preserved.release(protect_); + UNPROTECT(n_protected); + throw e; + } +} + +template <> +inline void r_vector<SEXP>::reserve(R_xlen_t new_capacity) { + data_ = data_ == R_NilValue ? safe[Rf_allocVector](VECSXP, new_capacity) + : safe[Rf_xlengthgets](data_, new_capacity); + + SEXP old_protect = protect_; + protect_ = preserved.insert(data_); + preserved.release(old_protect); + + capacity_ = new_capacity; +} + +template <> +inline void r_vector<SEXP>::push_back(SEXP value) { + while (length_ >= capacity_) { + reserve(capacity_ == 0 ? 1 : capacity_ *= 2); + } + SET_VECTOR_ELT(data_, length_, value); + ++length_; +} + +typedef r_vector<SEXP> list; + +} // namespace writable + +} // namespace cpp11 diff --git a/src/arrow/r/inst/include/cpp11/list_of.hpp b/src/arrow/r/inst/include/cpp11/list_of.hpp new file mode 100644 index 000000000..d9b8f8020 --- /dev/null +++ b/src/arrow/r/inst/include/cpp11/list_of.hpp @@ -0,0 +1,73 @@ +// cpp11 version: 0.3.1.1 +// vendored on: 2021-08-11 +#pragma once + +#include <string> // for string, basic_string + +#include "cpp11/R.hpp" // for R_xlen_t, SEXP, SEXPREC, LONG_VECTOR_SUPPORT +#include "cpp11/list.hpp" // for list + +namespace cpp11 { + +template <typename T> +class list_of : public list { + public: + list_of(const list& data) : list(data) {} + +#ifdef LONG_VECTOR_SUPPORT + T operator[](int pos) { return operator[](static_cast<R_xlen_t>(pos)); } +#endif + + T operator[](R_xlen_t pos) { return list::operator[](pos); } + + T operator[](const char* pos) { return list::operator[](pos); } + + T operator[](const std::string& pos) { return list::operator[](pos.c_str()); } +}; + +namespace writable { +template <typename T> +class list_of : public writable::list { + public: + list_of(const list& data) : writable::list(data) {} + list_of(R_xlen_t n) : writable::list(n) {} + + class proxy { + private: + writable::list::proxy data_; + + public: + proxy(const writable::list::proxy& data) : data_(data) {} + + operator T() const { return static_cast<SEXP>(*this); } + operator SEXP() const { return static_cast<SEXP>(data_); } +#ifdef LONG_VECTOR_SUPPORT + typename T::proxy operator[](int pos) { return static_cast<T>(data_)[pos]; } +#endif + typename T::proxy operator[](R_xlen_t pos) { return static_cast<T>(data_)[pos]; } + proxy operator[](const char* pos) { static_cast<T>(data_)[pos]; } + proxy operator[](const std::string& pos) { return static_cast<T>(data_)[pos]; } + proxy& operator=(const T& rhs) { + data_ = rhs; + + return *this; + } + }; + +#ifdef LONG_VECTOR_SUPPORT + proxy operator[](int pos) { + return {writable::list::operator[](static_cast<R_xlen_t>(pos))}; + } +#endif + + proxy operator[](R_xlen_t pos) { return writable::list::operator[](pos); } + + proxy operator[](const char* pos) { return {writable::list::operator[](pos)}; } + + proxy operator[](const std::string& pos) { + return writable::list::operator[](pos.c_str()); + } +}; +} // namespace writable + +} // namespace cpp11 diff --git a/src/arrow/r/inst/include/cpp11/logicals.hpp b/src/arrow/r/inst/include/cpp11/logicals.hpp new file mode 100644 index 000000000..5f96b3eab --- /dev/null +++ b/src/arrow/r/inst/include/cpp11/logicals.hpp @@ -0,0 +1,143 @@ +// cpp11 version: 0.3.1.1 +// vendored on: 2021-08-11 +#pragma once + +#include <algorithm> // for min +#include <array> // for array +#include <initializer_list> // for initializer_list + +#include "cpp11/R.hpp" // for SEXP, SEXPREC, Rf_all... +#include "cpp11/attribute_proxy.hpp" // for attribute_proxy +#include "cpp11/named_arg.hpp" // for named_arg +#include "cpp11/protect.hpp" // for preserved +#include "cpp11/r_bool.hpp" // for r_bool +#include "cpp11/r_vector.hpp" // for r_vector, r_vector<>::proxy +#include "cpp11/sexp.hpp" // for sexp + +// Specializations for logicals + +namespace cpp11 { + +template <> +inline SEXP r_vector<r_bool>::valid_type(SEXP data) { + if (TYPEOF(data) != LGLSXP) { + throw type_error(LGLSXP, TYPEOF(data)); + } + return data; +} + +template <> +inline r_bool r_vector<r_bool>::operator[](const R_xlen_t pos) const { + return is_altrep_ ? static_cast<r_bool>(LOGICAL_ELT(data_, pos)) : data_p_[pos]; +} + +template <> +inline r_bool* r_vector<r_bool>::get_p(bool is_altrep, SEXP data) { + if (is_altrep) { + return nullptr; + } else { + return reinterpret_cast<r_bool*>(LOGICAL(data)); + } +} + +template <> +inline void r_vector<r_bool>::const_iterator::fill_buf(R_xlen_t pos) { + length_ = std::min(64_xl, data_->size() - pos); + LOGICAL_GET_REGION(data_->data_, pos, length_, reinterpret_cast<int*>(buf_.data())); + block_start_ = pos; +} + +typedef r_vector<r_bool> logicals; + +namespace writable { + +template <> +inline typename r_vector<r_bool>::proxy& r_vector<r_bool>::proxy::operator=( + const r_bool& rhs) { + if (is_altrep_) { + SET_LOGICAL_ELT(data_, index_, rhs); + } else { + *p_ = rhs; + } + return *this; +} + +template <> +inline r_vector<r_bool>::proxy::operator r_bool() const { + if (p_ == nullptr) { + return static_cast<r_bool>(LOGICAL_ELT(data_, index_)); + } else { + return *p_; + } +} + +inline bool operator==(const r_vector<r_bool>::proxy& lhs, r_bool rhs) { + return static_cast<r_bool>(lhs).operator==(rhs); +} + +template <> +inline r_vector<r_bool>::r_vector(std::initializer_list<r_bool> il) + : cpp11::r_vector<r_bool>(Rf_allocVector(LGLSXP, il.size())), capacity_(il.size()) { + protect_ = preserved.insert(data_); + auto it = il.begin(); + for (R_xlen_t i = 0; i < capacity_; ++i, ++it) { + SET_LOGICAL_ELT(data_, i, *it); + } +} + +template <> +inline r_vector<r_bool>::r_vector(std::initializer_list<named_arg> il) + : cpp11::r_vector<r_bool>(safe[Rf_allocVector](LGLSXP, il.size())), + capacity_(il.size()) { + protect_ = preserved.insert(data_); + int n_protected = 0; + + try { + unwind_protect([&] { + Rf_setAttrib(data_, R_NamesSymbol, Rf_allocVector(STRSXP, capacity_)); + SEXP names = PROTECT(Rf_getAttrib(data_, R_NamesSymbol)); + ++n_protected; + auto it = il.begin(); + for (R_xlen_t i = 0; i < capacity_; ++i, ++it) { + data_p_[i] = static_cast<r_bool>(LOGICAL_ELT(it->value(), 0)); + SET_STRING_ELT(names, i, Rf_mkCharCE(it->name(), CE_UTF8)); + } + UNPROTECT(n_protected); + }); + } catch (const unwind_exception& e) { + preserved.release(protect_); + UNPROTECT(n_protected); + throw e; + } +} + +template <> +inline void r_vector<r_bool>::reserve(R_xlen_t new_capacity) { + data_ = data_ == R_NilValue ? safe[Rf_allocVector](LGLSXP, new_capacity) + : safe[Rf_xlengthgets](data_, new_capacity); + SEXP old_protect = protect_; + protect_ = preserved.insert(data_); + + preserved.release(old_protect); + + data_p_ = reinterpret_cast<r_bool*>(LOGICAL(data_)); + capacity_ = new_capacity; +} + +template <> +inline void r_vector<r_bool>::push_back(r_bool value) { + while (length_ >= capacity_) { + reserve(capacity_ == 0 ? 1 : capacity_ *= 2); + } + if (is_altrep_) { + SET_LOGICAL_ELT(data_, length_, value); + } else { + data_p_[length_] = value; + } + ++length_; +} + +typedef r_vector<r_bool> logicals; + +} // namespace writable +} // namespace cpp11 diff --git a/src/arrow/r/inst/include/cpp11/matrix.hpp b/src/arrow/r/inst/include/cpp11/matrix.hpp new file mode 100644 index 000000000..30698c65a --- /dev/null +++ b/src/arrow/r/inst/include/cpp11/matrix.hpp @@ -0,0 +1,112 @@ +// cpp11 version: 0.3.1.1 +// vendored on: 2021-08-11 +#pragma once + +#include <string> // for string + +#include "cpp11/R.hpp" // for SEXP, SEXPREC, R_xlen_t, INT... +#include "cpp11/r_bool.hpp" // for r_bool +#include "cpp11/r_string.hpp" // for r_string +#include "cpp11/r_vector.hpp" // for r_vector +#include "cpp11/sexp.hpp" // for sexp + +namespace cpp11 { +template <typename V, typename T> +class matrix { + private: + V vector_; + int nrow_; + + public: + class row { + private: + matrix& parent_; + int row_; + + public: + row(matrix& parent, R_xlen_t row) : parent_(parent), row_(row) {} + T operator[](const int pos) { return parent_.vector_[row_ + (pos * parent_.nrow_)]; } + + class iterator { + private: + row& row_; + int pos_; + + public: + iterator(row& row, R_xlen_t pos) : row_(row), pos_(pos) {} + iterator begin() const { return row_.parent_.vector_iterator(&this, 0); } + iterator end() const { return iterator(&this, row_.size()); } + inline iterator& operator++() { + ++pos_; + return *this; + } + bool operator!=(const iterator& rhs) { + return !(pos_ == rhs.pos_ && row_.row_ == rhs.row_.row_); + } + T operator*() const { return row_[pos_]; }; + }; + + iterator begin() { return iterator(*this, 0); } + iterator end() { return iterator(*this, size()); } + R_xlen_t size() const { return parent_.vector_.size() / parent_.nrow_; } + bool operator!=(const row& rhs) { return row_ != rhs.row_; } + row& operator++() { + ++row_; + return *this; + } + row& operator*() { return *this; } + }; + friend row; + + public: + matrix(SEXP data) : vector_(data), nrow_(INTEGER_ELT(vector_.attr("dim"), 0)) {} + + template <typename V2, typename T2> + matrix(const cpp11::matrix<V2, T2>& rhs) : vector_(rhs), nrow_(rhs.nrow()) {} + + matrix(int nrow, int ncol) : vector_(R_xlen_t(nrow * ncol)), nrow_(nrow) { + vector_.attr("dim") = {nrow, ncol}; + } + + int nrow() const { return nrow_; } + + int ncol() const { return size() / nrow_; } + + SEXP data() const { return vector_.data(); } + + R_xlen_t size() const { return vector_.size(); } + + operator SEXP() const { return SEXP(vector_); } + + // operator sexp() { return sexp(vector_); } + + sexp attr(const char* name) const { return SEXP(vector_.attr(name)); } + + sexp attr(const std::string& name) const { return SEXP(vector_.attr(name)); } + + sexp attr(SEXP name) const { return SEXP(vector_.attr(name)); } + + r_vector<r_string> names() const { return SEXP(vector_.names()); } + + row operator[](const int pos) { return {*this, pos}; } + + T operator()(int row, int col) { return vector_[row + (col * nrow_)]; } + + row begin() { return {*this, 0}; } + row end() { return {*this, nrow_}; } +}; + +using doubles_matrix = matrix<r_vector<double>, double>; +using integers_matrix = matrix<r_vector<int>, int>; +using logicals_matrix = matrix<r_vector<r_bool>, r_bool>; +using strings_matrix = matrix<r_vector<r_string>, r_string>; + +namespace writable { +using doubles_matrix = matrix<r_vector<double>, r_vector<double>::proxy>; +using integers_matrix = matrix<r_vector<int>, r_vector<int>::proxy>; +using logicals_matrix = matrix<r_vector<r_bool>, r_vector<r_bool>::proxy>; +using strings_matrix = matrix<r_vector<r_string>, r_vector<r_string>::proxy>; +} // namespace writable + +// TODO: Add tests for Matrix class +} // namespace cpp11 diff --git a/src/arrow/r/inst/include/cpp11/named_arg.hpp b/src/arrow/r/inst/include/cpp11/named_arg.hpp new file mode 100644 index 000000000..762c8a79d --- /dev/null +++ b/src/arrow/r/inst/include/cpp11/named_arg.hpp @@ -0,0 +1,50 @@ +// cpp11 version: 0.3.1.1 +// vendored on: 2021-08-11 +#pragma once + +#include <stddef.h> // for size_t + +#include <initializer_list> // for initializer_list + +#include "cpp11/R.hpp" // for SEXP, SEXPREC, literals +#include "cpp11/as.hpp" // for as_sexp +#include "cpp11/sexp.hpp" // for sexp + +namespace cpp11 { +class named_arg { + public: + explicit named_arg(const char* name) : name_(name), value_(R_NilValue) {} + named_arg& operator=(std::initializer_list<int> il) { + value_ = as_sexp(il); + return *this; + } + + template <typename T> + named_arg& operator=(T rhs) { + value_ = as_sexp(rhs); + return *this; + } + + template <typename T> + named_arg& operator=(std::initializer_list<T> rhs) { + value_ = as_sexp(rhs); + return *this; + } + + const char* name() const { return name_; } + SEXP value() const { return value_; } + + private: + const char* name_; + sexp value_; +}; + +namespace literals { + +inline named_arg operator"" _nm(const char* name, std::size_t) { return named_arg(name); } + +} // namespace literals + +using namespace literals; + +} // namespace cpp11 diff --git a/src/arrow/r/inst/include/cpp11/protect.hpp b/src/arrow/r/inst/include/cpp11/protect.hpp new file mode 100644 index 000000000..1d1b48bb5 --- /dev/null +++ b/src/arrow/r/inst/include/cpp11/protect.hpp @@ -0,0 +1,372 @@ +// cpp11 version: 0.3.1.1 +// vendored on: 2021-08-11 +#pragma once + +#include <csetjmp> // for longjmp, setjmp, jmp_buf +#include <exception> // for exception +#include <stdexcept> // for std::runtime_error +#include <string> // for string, basic_string +#include <tuple> // for tuple, make_tuple + +// NB: cpp11/R.hpp must precede R_ext/Error.h to ensure R_NO_REMAP is defined +#include "cpp11/R.hpp" // for SEXP, SEXPREC, CDR, R_NilValue, CAR, R_Pres... + +#include "R_ext/Boolean.h" // for Rboolean +#include "R_ext/Error.h" // for Rf_error, Rf_warning +#include "R_ext/Print.h" // for REprintf +#include "R_ext/Utils.h" // for R_CheckUserInterrupt +#include "Rversion.h" // for R_VERSION, R_Version + +#if defined(R_VERSION) && R_VERSION >= R_Version(3, 5, 0) +#define HAS_UNWIND_PROTECT +#endif + +namespace cpp11 { +class unwind_exception : public std::exception { + public: + SEXP token; + unwind_exception(SEXP token_) : token(token_) {} +}; + +#ifdef HAS_UNWIND_PROTECT + +/// Unwind Protection from C longjmp's, like those used in R error handling +/// +/// @param code The code to which needs to be protected, as a nullary callable +template <typename Fun, typename = typename std::enable_if<std::is_same< + decltype(std::declval<Fun&&>()()), SEXP>::value>::type> +SEXP unwind_protect(Fun&& code) { + static SEXP token = [] { + SEXP res = R_MakeUnwindCont(); + R_PreserveObject(res); + return res; + }(); + + std::jmp_buf jmpbuf; + if (setjmp(jmpbuf)) { + throw unwind_exception(token); + } + + SEXP res = R_UnwindProtect( + [](void* data) -> SEXP { + auto callback = static_cast<decltype(&code)>(data); + return static_cast<Fun&&>(*callback)(); + }, + &code, + [](void* jmpbuf, Rboolean jump) { + if (jump == TRUE) { + // We need to first jump back into the C++ stacks because you can't safely throw + // exceptions from C stack frames. + longjmp(*static_cast<std::jmp_buf*>(jmpbuf), 1); + } + }, + &jmpbuf, token); + + // R_UnwindProtect adds the result to the CAR of the continuation token, + // which implicitly protects the result. However if there is no error and + // R_UwindProtect does a normal exit the memory shouldn't be protected, so we + // unset it here before returning the value ourselves. + SETCAR(token, R_NilValue); + + return res; +} + +template <typename Fun, typename = typename std::enable_if<std::is_same< + decltype(std::declval<Fun&&>()()), void>::value>::type> +void unwind_protect(Fun&& code) { + (void)unwind_protect([&] { + std::forward<Fun>(code)(); + return R_NilValue; + }); +} + +template <typename Fun, typename R = decltype(std::declval<Fun&&>()())> +typename std::enable_if<!std::is_same<R, SEXP>::value && !std::is_same<R, void>::value, + R>::type +unwind_protect(Fun&& code) { + R out; + (void)unwind_protect([&] { + out = std::forward<Fun>(code)(); + return R_NilValue; + }); + return out; +} + +#else +// Don't do anything if we don't have unwind protect. This will leak C++ resources, +// including those held by cpp11 objects, but the other alternatives are also not great. +template <typename Fun> +decltype(std::declval<Fun&&>()()) unwind_protect(Fun&& code) { + return std::forward<Fun>(code)(); +} +#endif + +namespace detail { + +template <size_t...> +struct index_sequence { + using type = index_sequence; +}; + +template <typename, size_t> +struct appended_sequence; + +template <std::size_t... I, std::size_t J> +struct appended_sequence<index_sequence<I...>, J> : index_sequence<I..., J> {}; + +template <size_t N> +struct make_index_sequence + : appended_sequence<typename make_index_sequence<N - 1>::type, N - 1> {}; + +template <> +struct make_index_sequence<0> : index_sequence<> {}; + +template <typename F, typename... Aref, size_t... I> +decltype(std::declval<F&&>()(std::declval<Aref>()...)) apply( + F&& f, std::tuple<Aref...>&& a, const index_sequence<I...>&) { + return std::forward<F>(f)(std::get<I>(std::move(a))...); +} + +template <typename F, typename... Aref> +decltype(std::declval<F&&>()(std::declval<Aref>()...)) apply(F&& f, + std::tuple<Aref...>&& a) { + return apply(std::forward<F>(f), std::move(a), make_index_sequence<sizeof...(Aref)>{}); +} + +// overload to silence a compiler warning that the (empty) tuple parameter is set but +// unused +template <typename F> +decltype(std::declval<F&&>()()) apply(F&& f, std::tuple<>&&) { + return std::forward<F>(f)(); +} + +template <typename F, typename... Aref> +struct closure { + decltype(std::declval<F*>()(std::declval<Aref>()...)) operator()() && { + return apply(ptr_, std::move(arefs_)); + } + F* ptr_; + std::tuple<Aref...> arefs_; +}; + +} // namespace detail + +struct protect { + template <typename F> + struct function { + template <typename... A> + decltype(std::declval<F*>()(std::declval<A&&>()...)) operator()(A&&... a) const { + // workaround to support gcc4.8, which can't capture a parameter pack + return unwind_protect( + detail::closure<F, A&&...>{ptr_, std::forward_as_tuple(std::forward<A>(a)...)}); + } + + F* ptr_; + }; + + /// May not be applied to a function bearing attributes, which interfere with linkage on + /// some compilers; use an appropriately attributed alternative. (For example, Rf_error + /// bears the [[noreturn]] attribute and must be protected with safe.noreturn rather + /// than safe.operator[]). + template <typename F> + constexpr function<F> operator[](F* raw) const { + return {raw}; + } + + template <typename F> + struct noreturn_function { + template <typename... A> + void operator() [[noreturn]] (A&&... a) const { + // workaround to support gcc4.8, which can't capture a parameter pack + unwind_protect( + detail::closure<F, A&&...>{ptr_, std::forward_as_tuple(std::forward<A>(a)...)}); + // Compiler hint to allow [[noreturn]] attribute; this is never executed since + // the above call will not return. + throw std::runtime_error("[[noreturn]]"); + } + F* ptr_; + }; + + template <typename F> + constexpr noreturn_function<F> noreturn(F* raw) const { + return {raw}; + } +}; +constexpr struct protect safe = {}; + +inline void check_user_interrupt() { safe[R_CheckUserInterrupt](); } + +template <typename... Args> +void stop [[noreturn]] (const char* fmt, Args... args) { + safe.noreturn(Rf_errorcall)(R_NilValue, fmt, args...); +} + +template <typename... Args> +void stop [[noreturn]] (const std::string& fmt, Args... args) { + safe.noreturn(Rf_errorcall)(R_NilValue, fmt.c_str(), args...); +} + +template <typename... Args> +void warning(const char* fmt, Args... args) { + safe[Rf_warningcall](R_NilValue, fmt, args...); +} + +template <typename... Args> +void warning(const std::string& fmt, Args... args) { + safe[Rf_warningcall](R_NilValue, fmt.c_str(), args...); +} + +/// A doubly-linked list of preserved objects, allowing O(1) insertion/release of +/// objects compared to O(N preserved) with R_PreserveObject. +static struct { + SEXP insert(SEXP obj) { + if (obj == R_NilValue) { + return R_NilValue; + } + +#ifdef CPP11_USE_PRESERVE_OBJECT + PROTECT(obj); + R_PreserveObject(obj); + UNPROTECT(1); + return obj; +#endif + + PROTECT(obj); + + SEXP list_ = get_preserve_list(); + + // Add a new cell that points to the previous end. + SEXP cell = PROTECT(Rf_cons(list_, CDR(list_))); + + SET_TAG(cell, obj); + + SETCDR(list_, cell); + + if (CDR(cell) != R_NilValue) { + SETCAR(CDR(cell), cell); + } + + UNPROTECT(2); + + return cell; + } + + void print() { + for (SEXP head = get_preserve_list(); head != R_NilValue; head = CDR(head)) { + REprintf("%x CAR: %x CDR: %x TAG: %x\n", head, CAR(head), CDR(head), TAG(head)); + } + REprintf("---\n"); + } + + // This is currently unused, but client packages could use it to free leaked resources + // in older R versions if needed + void release_all() { +#if !defined(CPP11_USE_PRESERVE_OBJECT) + SEXP list_ = get_preserve_list(); + SEXP first = CDR(list_); + if (first != R_NilValue) { + SETCAR(first, R_NilValue); + SETCDR(list_, R_NilValue); + } +#endif + } + + void release(SEXP token) { + if (token == R_NilValue) { + return; + } + +#ifdef CPP11_USE_PRESERVE_OBJECT + R_ReleaseObject(token); + return; +#endif + + SEXP before = CAR(token); + + SEXP after = CDR(token); + + if (before == R_NilValue && after == R_NilValue) { + Rf_error("should never happen"); + } + + SETCDR(before, after); + + if (after != R_NilValue) { + SETCAR(after, before); + } + } + + private: + // We deliberately avoid using safe[] in the below code, as this code runs + // when the shared library is loaded and will not be wrapped by + // `CPP11_UNWIND`, so if an error occurs we will not catch the C++ exception + // that safe emits. + static void set_option(SEXP name, SEXP value) { + static SEXP opt = SYMVALUE(Rf_install(".Options")); + SEXP t = opt; + while (CDR(t) != R_NilValue) { + if (TAG(CDR(t)) == name) { + opt = CDR(t); + SET_TAG(opt, name); + SETCAR(opt, value); + return; + } + t = CDR(t); + } + SETCDR(t, Rf_allocList(1)); + opt = CDR(t); + SET_TAG(opt, name); + SETCAR(opt, value); + } + + // The preserved list singleton is stored in a XPtr within an R global option. + // + // It is not constructed as a static variable directly since many + // translation units may be compiled, resulting in unrelated instances of each + // static variable. + // + // We cannot store it in the cpp11 namespace, as cpp11 likely will not be loaded by + // packages. + // We cannot store it in R's global environment, as that is against CRAN + // policies. + // We instead store it as an XPtr in the global options, which avoids issues + // both copying and serializing. + static SEXP get_preserve_xptr_addr() { + static SEXP preserve_xptr_sym = Rf_install("cpp11_preserve_xptr"); + SEXP preserve_xptr = Rf_GetOption1(preserve_xptr_sym); + + if (TYPEOF(preserve_xptr) != EXTPTRSXP) { + return R_NilValue; + } + auto addr = R_ExternalPtrAddr(preserve_xptr); + if (addr == nullptr) { + return R_NilValue; + } + return static_cast<SEXP>(addr); + } + + static void set_preserve_xptr(SEXP value) { + static SEXP preserve_xptr_sym = Rf_install("cpp11_preserve_xptr"); + + SEXP xptr = PROTECT(R_MakeExternalPtr(value, R_NilValue, R_NilValue)); + set_option(preserve_xptr_sym, xptr); + UNPROTECT(1); + } + + static SEXP get_preserve_list() { + static SEXP preserve_list = R_NilValue; + + if (TYPEOF(preserve_list) != LISTSXP) { + preserve_list = get_preserve_xptr_addr(); + if (TYPEOF(preserve_list) != LISTSXP) { + preserve_list = Rf_cons(R_NilValue, R_NilValue); + R_PreserveObject(preserve_list); + set_preserve_xptr(preserve_list); + } + } + + return preserve_list; + } +} // namespace cpp11 +preserved; +} // namespace cpp11 diff --git a/src/arrow/r/inst/include/cpp11/r_bool.hpp b/src/arrow/r/inst/include/cpp11/r_bool.hpp new file mode 100644 index 000000000..e5c8592d7 --- /dev/null +++ b/src/arrow/r/inst/include/cpp11/r_bool.hpp @@ -0,0 +1,76 @@ +// cpp11 version: 0.3.1.1 +// vendored on: 2021-08-11 +#pragma once + +#include <limits> // for numeric_limits +#include <ostream> +#include <type_traits> // for is_convertible, enable_if + +#include "R_ext/Boolean.h" // for Rboolean +#include "cpp11/R.hpp" // for SEXP, SEXPREC, ... +#include "cpp11/as.hpp" // for as_sexp +#include "cpp11/protect.hpp" // for unwind_protect, preserved +#include "cpp11/r_vector.hpp" +#include "cpp11/sexp.hpp" // for sexp + +namespace cpp11 { + +class r_bool { + public: + r_bool() = default; + + r_bool(SEXP data) { + if (Rf_isLogical(data)) { + if (Rf_xlength(data) == 1) { + value_ = static_cast<Rboolean>(LOGICAL_ELT(data, 0)); + } + } + stop("Invalid r_bool value: %x", data); + } + + r_bool(bool value) : value_(value ? TRUE : FALSE) {} + r_bool(Rboolean value) : value_(value) {} + r_bool(int value) : value_(from_int(value)) {} + + operator bool() const { return value_ == TRUE; } + operator int() const { return value_; } + operator Rboolean() const { return value_ ? TRUE : FALSE; } + + bool operator==(r_bool rhs) const { return value_ == rhs.value_; } + bool operator==(bool rhs) const { return operator==(r_bool(rhs)); } + bool operator==(Rboolean rhs) const { return operator==(r_bool(rhs)); } + bool operator==(int rhs) const { return operator==(r_bool(rhs)); } + + private: + static constexpr int na = std::numeric_limits<int>::min(); + + static int from_int(int value) { + if (value == static_cast<int>(FALSE)) return FALSE; + if (value == static_cast<int>(na)) return na; + return TRUE; + } + + int value_ = na; +}; + +inline std::ostream& operator<<(std::ostream& os, r_bool const& value) { + os << ((value == TRUE) ? "TRUE" : "FALSE"); + return os; +} + +template <typename T, typename R = void> +using enable_if_r_bool = enable_if_t<std::is_same<T, r_bool>::value, R>; + +template <typename T> +enable_if_r_bool<T, SEXP> as_sexp(T from) { + sexp res = Rf_allocVector(LGLSXP, 1); + unwind_protect([&] { SET_LOGICAL_ELT(res.data(), 0, from); }); + return res; +} + +template <> +inline r_bool na() { + return NA_LOGICAL; +} + +} // namespace cpp11 diff --git a/src/arrow/r/inst/include/cpp11/r_string.hpp b/src/arrow/r/inst/include/cpp11/r_string.hpp new file mode 100644 index 000000000..d62f7270f --- /dev/null +++ b/src/arrow/r/inst/include/cpp11/r_string.hpp @@ -0,0 +1,98 @@ +// cpp11 version: 0.3.1.1 +// vendored on: 2021-08-11 +#pragma once + +#include <string> // for string, basic_string, operator== +#include <type_traits> // for is_convertible, enable_if + +#include "R_ext/Memory.h" // for vmaxget, vmaxset +#include "cpp11/R.hpp" // for SEXP, SEXPREC, Rf_mkCharCE, Rf_translat... +#include "cpp11/as.hpp" // for as_sexp +#include "cpp11/protect.hpp" // for unwind_protect, protect, protect::function +#include "cpp11/sexp.hpp" // for sexp + +namespace cpp11 { + +class r_string { + public: + r_string() = default; + r_string(SEXP data) : data_(data) {} + r_string(const char* data) : data_(safe[Rf_mkCharCE](data, CE_UTF8)) {} + r_string(const std::string& data) + : data_(safe[Rf_mkCharLenCE](data.c_str(), data.size(), CE_UTF8)) {} + + operator SEXP() const { return data_; } + operator sexp() const { return data_; } + operator std::string() const { + std::string res; + res.reserve(size()); + + void* vmax = vmaxget(); + unwind_protect([&] { res.assign(Rf_translateCharUTF8(data_)); }); + vmaxset(vmax); + + return res; + } + + bool operator==(const r_string& rhs) const { return data_.data() == rhs.data_.data(); } + + bool operator==(const SEXP rhs) const { return data_.data() == rhs; } + + bool operator==(const char* rhs) const { + return static_cast<std::string>(*this) == rhs; + } + + bool operator==(const std::string& rhs) const { + return static_cast<std::string>(*this) == rhs; + } + + R_xlen_t size() const { return Rf_xlength(data_); } + + private: + sexp data_ = R_NilValue; +}; + +inline SEXP as_sexp(std::initializer_list<r_string> il) { + R_xlen_t size = il.size(); + + sexp data; + unwind_protect([&] { + data = Rf_allocVector(STRSXP, size); + auto it = il.begin(); + for (R_xlen_t i = 0; i < size; ++i, ++it) { + if (*it == NA_STRING) { + SET_STRING_ELT(data, i, *it); + } else { + SET_STRING_ELT(data, i, Rf_mkCharCE(Rf_translateCharUTF8(*it), CE_UTF8)); + } + } + }); + return data; +} + +template <typename T, typename R = void> +using enable_if_r_string = enable_if_t<std::is_same<T, cpp11::r_string>::value, R>; + +template <typename T> +enable_if_r_string<T, SEXP> as_sexp(T from) { + r_string str(from); + sexp res; + unwind_protect([&] { + res = Rf_allocVector(STRSXP, 1); + + if (str == NA_STRING) { + SET_STRING_ELT(res, 0, str); + } else { + SET_STRING_ELT(res, 0, Rf_mkCharCE(Rf_translateCharUTF8(str), CE_UTF8)); + } + }); + + return res; +} + +template <> +inline r_string na() { + return NA_STRING; +} + +} // namespace cpp11 diff --git a/src/arrow/r/inst/include/cpp11/r_vector.hpp b/src/arrow/r/inst/include/cpp11/r_vector.hpp new file mode 100644 index 000000000..3a3d53b36 --- /dev/null +++ b/src/arrow/r/inst/include/cpp11/r_vector.hpp @@ -0,0 +1,1009 @@ +// cpp11 version: 0.3.1.1 +// vendored on: 2021-08-11 +#pragma once + +#include <stddef.h> // for ptrdiff_t, size_t + +#include <algorithm> // for max +#include <array> // for array +#include <cstdio> // for snprintf +#include <exception> // for exception +#include <initializer_list> // for initializer_list +#include <iterator> // for forward_iterator_tag, random_ac... +#include <stdexcept> // for out_of_range +#include <string> // for string, basic_string +#include <type_traits> // for decay, is_same, enable_if, is_c... +#include <utility> // for declval + +#include "cpp11/R.hpp" // for R_xlen_t, SEXP, SEXPREC, Rf_xle... +#include "cpp11/attribute_proxy.hpp" // for attribute_proxy +#include "cpp11/protect.hpp" // for preserved +#include "cpp11/r_string.hpp" // for r_string +#include "cpp11/sexp.hpp" // for sexp + +namespace cpp11 { + +using namespace cpp11::literals; + +class type_error : public std::exception { + public: + type_error(int expected, int actual) : expected_(expected), actual_(actual) {} + virtual const char* what() const noexcept { + snprintf(str_, 64, "Invalid input type, expected '%s' actual '%s'", + Rf_type2char(expected_), Rf_type2char(actual_)); + return str_; + } + + private: + int expected_; + int actual_; + mutable char str_[64]; +}; + +// Forward Declarations +class named_arg; + +namespace writable { +template <typename T> +class r_vector; +} // namespace writable + +// Declarations +template <typename T> +class r_vector { + public: + typedef ptrdiff_t difference_type; + typedef size_t size_type; + typedef T value_type; + typedef T* pointer; + typedef T& reference; + + r_vector() = default; + + r_vector(SEXP data); + + r_vector(SEXP data, bool is_altrep); + +#ifdef LONG_VECTOR_SUPPORT + T operator[](const int pos) const; + T at(const int pos) const; +#endif + T operator[](const R_xlen_t pos) const; + T operator[](const size_type pos) const; + T operator[](const r_string& name) const; + + T at(const R_xlen_t pos) const; + T at(const size_type pos) const; + T at(const r_string& name) const; + + bool contains(const r_string& name) const; + + r_vector& operator=(const r_vector& rhs) { + SEXP old_protect = protect_; + + data_ = rhs.data_; + protect_ = preserved.insert(data_); + is_altrep_ = rhs.is_altrep_; + data_p_ = rhs.data_p_; + length_ = rhs.length_; + + preserved.release(old_protect); + + return *this; + }; + + r_vector(const r_vector& rhs) { + SEXP old_protect = protect_; + + data_ = rhs.data_; + protect_ = preserved.insert(data_); + is_altrep_ = rhs.is_altrep_; + data_p_ = rhs.data_p_; + length_ = rhs.length_; + + preserved.release(old_protect); + }; + + r_vector(const writable::r_vector<T>& rhs) : r_vector(static_cast<SEXP>(rhs)) {} + + bool is_altrep() const; + + bool named() const; + + R_xlen_t size() const; + + operator SEXP() const; + + operator sexp() const; + + bool empty() const; + + /// Provide access to the underlying data, mainly for interface + /// compatibility with std::vector + SEXP data() const; + + sexp attr(const char* name) const { + return SEXP(attribute_proxy<r_vector<T>>(*this, name)); + } + + sexp attr(const std::string& name) const { + return SEXP(attribute_proxy<r_vector<T>>(*this, name.c_str())); + } + + sexp attr(SEXP name) const { return SEXP(attribute_proxy<r_vector<T>>(*this, name)); } + + r_vector<r_string> names() const { + SEXP nms = SEXP(attribute_proxy<r_vector<T>>(*this, R_NamesSymbol)); + if (nms == R_NilValue) { + return r_vector<r_string>(); + } + + return nms; + } + + class const_iterator { + public: + using difference_type = ptrdiff_t; + using value_type = T; + using pointer = T*; + using reference = T&; + using iterator_category = std::random_access_iterator_tag; + + const_iterator(const r_vector* data, R_xlen_t pos); + + inline const_iterator& operator+(R_xlen_t pos); + inline ptrdiff_t operator-(const const_iterator& other) const; + + inline const_iterator& operator++(); + inline const_iterator& operator--(); + + inline const_iterator& operator+=(R_xlen_t pos); + inline const_iterator& operator-=(R_xlen_t pos); + + inline bool operator!=(const const_iterator& other) const; + inline bool operator==(const const_iterator& other) const; + + inline T operator*() const; + + friend class writable::r_vector<T>::iterator; + + private: + const r_vector* data_; + void fill_buf(R_xlen_t pos); + + R_xlen_t pos_; + std::array<T, 64 * 64> buf_; + R_xlen_t block_start_ = 0; + R_xlen_t length_ = 0; + }; + + public: + const_iterator begin() const; + const_iterator end() const; + + const_iterator cbegin() const; + const_iterator cend() const; + + const_iterator find(const r_string& name) const; + + ~r_vector() { preserved.release(protect_); } + + private: + SEXP data_ = R_NilValue; + SEXP protect_ = R_NilValue; + bool is_altrep_ = false; + T* data_p_ = nullptr; + R_xlen_t length_ = 0; + + static T* get_p(bool is_altrep, SEXP data); + + static SEXP valid_type(SEXP data); + + friend class writable::r_vector<T>; +}; + +namespace writable { + +template <typename T> +using has_begin_fun = std::decay<decltype(*begin(std::declval<T>()))>; + +/// Read/write access to new or copied r_vectors +template <typename T> +class r_vector : public cpp11::r_vector<T> { + private: + SEXP protect_ = R_NilValue; + + // These are necessary because type names are not directly accessible in + // template inheritance + using cpp11::r_vector<T>::data_; + using cpp11::r_vector<T>::data_p_; + using cpp11::r_vector<T>::is_altrep_; + using cpp11::r_vector<T>::length_; + + R_xlen_t capacity_ = 0; + + public: + class proxy { + private: + const SEXP data_; + const R_xlen_t index_; + T* const p_; + bool is_altrep_; + + public: + proxy(SEXP data, const R_xlen_t index, T* const p, bool is_altrep); + + proxy& operator=(const T& rhs); + proxy& operator+=(const T& rhs); + proxy& operator-=(const T& rhs); + proxy& operator*=(const T& rhs); + proxy& operator/=(const T& rhs); + proxy& operator++(int); + proxy& operator--(int); + + void operator++(); + void operator--(); + + operator T() const; + }; + + typedef ptrdiff_t difference_type; + typedef size_t size_type; + typedef proxy value_type; + typedef proxy* pointer; + typedef proxy& reference; + + class iterator : public cpp11::r_vector<T>::const_iterator { + private: + const r_vector& data_; + using cpp11::r_vector<T>::const_iterator::block_start_; + using cpp11::r_vector<T>::const_iterator::pos_; + using cpp11::r_vector<T>::const_iterator::buf_; + using cpp11::r_vector<T>::const_iterator::length_; + using cpp11::r_vector<T>::const_iterator::fill_buf; + + public: + using difference_type = ptrdiff_t; + using value_type = proxy; + using pointer = proxy*; + using reference = proxy&; + using iterator_category = std::forward_iterator_tag; + + iterator(const r_vector& data, R_xlen_t pos); + + inline iterator& operator++(); + + inline proxy operator*() const; + + using cpp11::r_vector<T>::const_iterator::operator!=; + + inline iterator& operator+(R_xlen_t rhs); + }; + + r_vector() = default; + r_vector(const SEXP& data); + r_vector(SEXP&& data); + r_vector(const SEXP& data, bool is_altrep); + r_vector(SEXP&& data, bool is_altrep); + r_vector(std::initializer_list<T> il); + r_vector(std::initializer_list<named_arg> il); + r_vector(std::initializer_list<const char*> il); + r_vector(std::initializer_list<std::string> il); + + template <typename Iter> + r_vector(Iter first, Iter last); + + template <typename V, typename W = has_begin_fun<V>> + r_vector(const V& obj); + + r_vector(const R_xlen_t size); + + ~r_vector(); + + r_vector(const r_vector& rhs); + r_vector(r_vector&& rhs); + + r_vector(const cpp11::r_vector<T>& rhs); + + r_vector& operator=(const r_vector& rhs); + r_vector& operator=(r_vector&& rhs); + +#ifdef LONG_VECTOR_SUPPORT + proxy operator[](const int pos) const; + proxy at(const int pos) const; +#endif + proxy operator[](const R_xlen_t pos) const; + proxy operator[](const size_type pos) const; + proxy operator[](const r_string& name) const; + + proxy at(const R_xlen_t pos) const; + proxy at(const size_type pos) const; + proxy at(const r_string& name) const; + + void push_back(T value); + void push_back(const named_arg& value); + void pop_back(); + + void resize(R_xlen_t count); + + void reserve(R_xlen_t new_capacity); + + iterator insert(R_xlen_t pos, T value); + iterator erase(R_xlen_t pos); + + void clear(); + + iterator begin() const; + iterator end() const; + + using cpp11::r_vector<T>::cbegin; + using cpp11::r_vector<T>::cend; + using cpp11::r_vector<T>::size; + + iterator find(const r_string& name) const; + + attribute_proxy<r_vector<T>> attr(const char* name) const { + return attribute_proxy<r_vector<T>>(*this, name); + } + + attribute_proxy<r_vector<T>> attr(const std::string& name) const { + return attribute_proxy<r_vector<T>>(*this, name.c_str()); + } + + attribute_proxy<r_vector<T>> attr(SEXP name) const { + return attribute_proxy<r_vector<T>>(*this, name); + } + + attribute_proxy<r_vector<T>> names() const { + return attribute_proxy<r_vector<T>>(*this, R_NamesSymbol); + } + + operator SEXP() const; +}; +} // namespace writable + +// Implementations below + +template <typename T> +inline r_vector<T>::r_vector(const SEXP data) + : data_(valid_type(data)), + protect_(preserved.insert(data)), + is_altrep_(ALTREP(data)), + data_p_(get_p(ALTREP(data), data)), + length_(Rf_xlength(data)) {} + +template <typename T> +inline r_vector<T>::r_vector(const SEXP data, bool is_altrep) + : data_(valid_type(data)), + protect_(preserved.insert(data)), + is_altrep_(is_altrep), + data_p_(get_p(is_altrep, data)), + length_(Rf_xlength(data)) {} + +template <typename T> +inline bool r_vector<T>::is_altrep() const { + return is_altrep_; +} + +template <typename T> +inline bool r_vector<T>::named() const { + return ((this->names()) != R_NilValue); +} + +template <typename T> +inline R_xlen_t r_vector<T>::size() const { + return length_; +} + +template <typename T> +inline r_vector<T>::operator SEXP() const { + return data_; +} + +template <typename T> +inline bool r_vector<T>::empty() const { + return (!(this->size() > 0)); +} + +template <typename T> +inline r_vector<T>::operator sexp() const { + return data_; +} + +/// Provide access to the underlying data, mainly for interface +/// compatibility with std::vector +template <typename T> +inline SEXP r_vector<T>::data() const { + return data_; +} + +template <typename T> +inline typename r_vector<T>::const_iterator r_vector<T>::begin() const { + return const_iterator(this, 0); +} + +template <typename T> +inline typename r_vector<T>::const_iterator r_vector<T>::end() const { + return const_iterator(this, length_); +} + +template <typename T> +inline typename r_vector<T>::const_iterator r_vector<T>::cbegin() const { + return const_iterator(this, 0); +} + +template <typename T> +inline typename r_vector<T>::const_iterator r_vector<T>::cend() const { + return const_iterator(this, length_); +} + +template <typename T> +r_vector<T>::const_iterator::const_iterator(const r_vector* data, R_xlen_t pos) + : data_(data), pos_(pos), buf_() { + if (data_->is_altrep()) { + fill_buf(pos); + } +} + +template <typename T> +inline typename r_vector<T>::const_iterator& r_vector<T>::const_iterator::operator++() { + ++pos_; + if (data_->is_altrep() && pos_ >= block_start_ + length_) { + fill_buf(pos_); + } + return *this; +} + +template <typename T> +inline typename r_vector<T>::const_iterator& r_vector<T>::const_iterator::operator--() { + --pos_; + if (data_->is_altrep() && pos_ > 0 && pos_ < block_start_) { + fill_buf(std::max(0_xl, pos_ - 64)); + } + return *this; +} + +template <typename T> +inline typename r_vector<T>::const_iterator& r_vector<T>::const_iterator::operator+=( + R_xlen_t i) { + pos_ += i; + if (data_->is_altrep() && pos_ >= block_start_ + length_) { + fill_buf(pos_); + } + return *this; +} + +template <typename T> +inline typename r_vector<T>::const_iterator& r_vector<T>::const_iterator::operator-=( + R_xlen_t i) { + pos_ -= i; + if (data_->is_altrep() && pos_ >= block_start_ + length_) { + fill_buf(std::max(0_xl, pos_ - 64)); + } + return *this; +} + +template <typename T> +inline bool r_vector<T>::const_iterator::operator!=( + const r_vector<T>::const_iterator& other) const { + return pos_ != other.pos_; +} + +template <typename T> +inline bool r_vector<T>::const_iterator::operator==( + const r_vector<T>::const_iterator& other) const { + return pos_ == other.pos_; +} + +template <typename T> +inline ptrdiff_t r_vector<T>::const_iterator::operator-( + const r_vector<T>::const_iterator& other) const { + return pos_ - other.pos_; +} + +template <typename T> +inline typename r_vector<T>::const_iterator& r_vector<T>::const_iterator::operator+( + R_xlen_t rhs) { + pos_ += rhs; + if (data_->is_altrep() && pos_ >= block_start_ + length_) { + fill_buf(pos_); + } + return *this; +} + +template <typename T> +inline T cpp11::r_vector<T>::at(R_xlen_t pos) const { + if (pos < 0 || pos >= length_) { + throw std::out_of_range("r_vector"); + } + + return operator[](pos); +} + +template <typename T> +inline T cpp11::r_vector<T>::at(size_type pos) const { + return at(static_cast<R_xlen_t>(pos)); +} + +template <typename T> +inline T cpp11::r_vector<T>::operator[](const r_string& name) const { + SEXP names = this->names(); + R_xlen_t size = Rf_xlength(names); + + for (R_xlen_t pos = 0; pos < size; ++pos) { + auto cur = Rf_translateCharUTF8(STRING_ELT(names, pos)); + if (name == cur) { + return operator[](pos); + } + } + + throw std::out_of_range("r_vector"); +} + +template <typename T> +inline bool cpp11::r_vector<T>::contains(const r_string& name) const { + SEXP names = this->names(); + R_xlen_t size = Rf_xlength(names); + + for (R_xlen_t pos = 0; pos < size; ++pos) { + auto cur = Rf_translateCharUTF8(STRING_ELT(names, pos)); + if (name == cur) { + return true; + } + } + + return false; +} + +template <typename T> +inline typename cpp11::r_vector<T>::const_iterator cpp11::r_vector<T>::find( + const r_string& name) const { + SEXP names = this->names(); + R_xlen_t size = Rf_xlength(names); + + for (R_xlen_t pos = 0; pos < size; ++pos) { + auto cur = Rf_translateCharUTF8(STRING_ELT(names, pos)); + if (name == cur) { + return begin() + pos; + } + } + + return end(); +} + +template <typename T> +inline T r_vector<T>::const_iterator::operator*() const { + if (data_->is_altrep()) { + return buf_[pos_ - block_start_]; + } else { + return data_->data_p_[pos_]; + } +} + +#ifdef LONG_VECTOR_SUPPORT +template <typename T> +inline T r_vector<T>::operator[](const int pos) const { + return operator[](static_cast<R_xlen_t>(pos)); +} + +template <typename T> +inline T r_vector<T>::at(const int pos) const { + return at(static_cast<R_xlen_t>(pos)); +} +#endif + +template <typename T> +inline T r_vector<T>::operator[](size_type pos) const { + return operator[](static_cast<R_xlen_t>(pos)); +} + +namespace writable { + +template <typename T> +r_vector<T>::proxy::proxy(SEXP data, const R_xlen_t index, T* const p, bool is_altrep) + : data_(data), index_(index), p_(p), is_altrep_(is_altrep) {} + +template <typename T> +inline typename r_vector<T>::proxy r_vector<T>::iterator::operator*() const { + if (data_.is_altrep()) { + return proxy(data_.data(), pos_, const_cast<T*>(&buf_[pos_ - block_start_]), true); + } else { + return proxy(data_.data(), pos_, + data_.data_p_ != nullptr ? &data_.data_p_[pos_] : nullptr, false); + } +} + +template <typename T> +r_vector<T>::iterator::iterator(const r_vector& data, R_xlen_t pos) + : r_vector<T>::const_iterator(&data, pos), data_(data) {} + +template <typename T> +inline typename r_vector<T>::iterator& r_vector<T>::iterator::operator++() { + ++pos_; + if (data_.is_altrep() && pos_ >= block_start_ + length_) { + fill_buf(pos_); + } + return *this; +} + +template <typename T> +inline typename r_vector<T>::iterator& r_vector<T>::iterator::operator+(R_xlen_t rhs) { + pos_ += rhs; + if (data_.is_altrep() && pos_ >= block_start_ + length_) { + fill_buf(pos_); + } + return *this; +} + +template <typename T> +inline typename r_vector<T>::iterator r_vector<T>::begin() const { + return iterator(*this, 0); +} + +template <typename T> +inline typename r_vector<T>::iterator r_vector<T>::end() const { + return iterator(*this, length_); +} + +template <typename T> +inline r_vector<T>::r_vector(const SEXP& data) + : cpp11::r_vector<T>(safe[Rf_shallow_duplicate](data)), + protect_(preserved.insert(data_)), + capacity_(length_) {} + +template <typename T> +inline r_vector<T>::r_vector(const SEXP& data, bool is_altrep) + : cpp11::r_vector<T>(safe[Rf_shallow_duplicate](data), is_altrep), + protect_(preserved.insert(data_)), + capacity_(length_) {} + +template <typename T> +inline r_vector<T>::r_vector(SEXP&& data) + : cpp11::r_vector<T>(data), protect_(preserved.insert(data_)), capacity_(length_) {} + +template <typename T> +inline r_vector<T>::r_vector(SEXP&& data, bool is_altrep) + : cpp11::r_vector<T>(data, is_altrep), + protect_(preserved.insert(data_)), + capacity_(length_) {} + +template <typename T> +template <typename Iter> +inline r_vector<T>::r_vector(Iter first, Iter last) : r_vector() { + reserve(last - first); + while (first != last) { + push_back(*first); + ++first; + } +} + +template <typename T> +template <typename V, typename W> +inline r_vector<T>::r_vector(const V& obj) : r_vector() { + auto first = obj.begin(); + auto last = obj.end(); + reserve(last - first); + while (first != last) { + push_back(*first); + ++first; + } +} + +template <typename T> +inline r_vector<T>::r_vector(R_xlen_t size) : r_vector() { + resize(size); +} + +template <typename T> +inline r_vector<T>::~r_vector() { + preserved.release(protect_); +} + +#ifdef LONG_VECTOR_SUPPORT +template <typename T> +inline typename r_vector<T>::proxy r_vector<T>::operator[](const int pos) const { + return operator[](static_cast<R_xlen_t>(pos)); +} + +template <typename T> +inline typename r_vector<T>::proxy r_vector<T>::at(const int pos) const { + return at(static_cast<R_xlen_t>(pos)); +} +#endif + +template <typename T> +inline typename r_vector<T>::proxy r_vector<T>::operator[](const R_xlen_t pos) const { + if (is_altrep_) { + return {data_, pos, nullptr, true}; + } + return {data_, pos, data_p_ != nullptr ? &data_p_[pos] : nullptr, false}; +} + +template <typename T> +inline typename r_vector<T>::proxy r_vector<T>::operator[](size_type pos) const { + return operator[](static_cast<R_xlen_t>(pos)); +} + +template <typename T> +inline typename r_vector<T>::proxy r_vector<T>::at(const R_xlen_t pos) const { + if (pos < 0 || pos >= length_) { + throw std::out_of_range("r_vector"); + } + return operator[](static_cast<R_xlen_t>(pos)); +} + +template <typename T> +inline typename r_vector<T>::proxy r_vector<T>::at(size_type pos) const { + return at(static_cast<R_xlen_t>(pos)); +} + +template <typename T> +inline typename r_vector<T>::proxy r_vector<T>::operator[](const r_string& name) const { + SEXP names = PROTECT(this->names()); + R_xlen_t size = Rf_xlength(names); + + for (R_xlen_t pos = 0; pos < size; ++pos) { + auto cur = Rf_translateCharUTF8(STRING_ELT(names, pos)); + if (name == cur) { + UNPROTECT(1); + return operator[](pos); + } + } + + UNPROTECT(1); + throw std::out_of_range("r_vector"); +} + +template <typename T> +inline typename r_vector<T>::proxy r_vector<T>::at(const r_string& name) const { + return operator[](name); +} + +template <typename T> +inline typename r_vector<T>::iterator r_vector<T>::find(const r_string& name) const { + SEXP names = PROTECT(this->names()); + R_xlen_t size = Rf_xlength(names); + + for (R_xlen_t pos = 0; pos < size; ++pos) { + auto cur = Rf_translateCharUTF8(STRING_ELT(names, pos)); + if (name == cur) { + UNPROTECT(1); + return begin() + pos; + } + } + + UNPROTECT(1); + return end(); +} + +template <typename T> +inline r_vector<T>::r_vector(const r_vector<T>& rhs) + : cpp11::r_vector<T>(safe[Rf_shallow_duplicate](rhs)), + protect_(preserved.insert(data_)), + capacity_(rhs.capacity_) {} + +template <typename T> +inline r_vector<T>::r_vector(r_vector<T>&& rhs) + : cpp11::r_vector<T>(rhs), protect_(rhs.protect_), capacity_(rhs.capacity_) { + rhs.data_ = R_NilValue; + rhs.protect_ = R_NilValue; +} + +template <typename T> +inline r_vector<T>::r_vector(const cpp11::r_vector<T>& rhs) + : cpp11::r_vector<T>(safe[Rf_shallow_duplicate](rhs)), + protect_(preserved.insert(data_)), + capacity_(rhs.length_) {} + +// We don't release the old object until the end in case we throw an exception +// during the duplicate. +template <typename T> +inline r_vector<T>& r_vector<T>::operator=(const r_vector<T>& rhs) { + if (data_ == rhs.data_) { + return *this; + } + + cpp11::r_vector<T>::operator=(rhs); + + auto old_protect = protect_; + + data_ = safe[Rf_shallow_duplicate](rhs.data_); + protect_ = preserved.insert(data_); + + preserved.release(old_protect); + + capacity_ = rhs.capacity_; + + return *this; +} + +template <typename T> +inline r_vector<T>& r_vector<T>::operator=(r_vector<T>&& rhs) { + if (data_ == rhs.data_) { + return *this; + } + + cpp11::r_vector<T>::operator=(rhs); + + SEXP old_protect = protect_; + + data_ = rhs.data_; + protect_ = preserved.insert(data_); + + preserved.release(old_protect); + + capacity_ = rhs.capacity_; + + rhs.data_ = R_NilValue; + rhs.protect_ = R_NilValue; + + return *this; +} + +template <typename T> +inline void r_vector<T>::pop_back() { + --length_; +} + +template <typename T> +inline void r_vector<T>::resize(R_xlen_t count) { + reserve(count); + length_ = count; +} + +template <typename T> +inline typename r_vector<T>::iterator r_vector<T>::insert(R_xlen_t pos, T value) { + push_back(value); + + R_xlen_t i = length_ - 1; + while (i > pos) { + operator[](i) = (T) operator[](i - 1); + --i; + }; + operator[](pos) = value; + + return begin() + pos; +} + +template <typename T> +inline typename r_vector<T>::iterator r_vector<T>::erase(R_xlen_t pos) { + R_xlen_t i = pos; + while (i < length_ - 1) { + operator[](i) = (T) operator[](i + 1); + ++i; + } + pop_back(); + + return begin() + pos; +} + +template <typename T> +inline void r_vector<T>::clear() { + length_ = 0; +} + +template <typename T> +inline r_vector<T>::operator SEXP() const { + if (length_ < capacity_) { +#if R_VERSION >= R_Version(3, 4, 0) + SETLENGTH(data_, length_); + SET_TRUELENGTH(data_, capacity_); + SET_GROWABLE_BIT(data_); +#else + auto* p = const_cast<r_vector<T>*>(this); + p->data_ = safe[Rf_lengthgets](data_, length_); +#endif + } + return data_; +} + +template <typename T> +inline typename r_vector<T>::proxy& r_vector<T>::proxy::operator+=(const T& rhs) { + operator=(static_cast<T>(*this) + rhs); + return *this; +} + +template <typename T> +inline typename r_vector<T>::proxy& r_vector<T>::proxy::operator-=(const T& rhs) { + operator=(static_cast<T>(*this) - rhs); + return *this; +} + +template <typename T> +inline typename r_vector<T>::proxy& r_vector<T>::proxy::operator*=(const T& rhs) { + operator=(static_cast<T>(*this) * rhs); + return *this; +} + +template <typename T> +inline typename r_vector<T>::proxy& r_vector<T>::proxy::operator/=(const T& rhs) { + operator=(static_cast<T>(*this) / rhs); + return *this; +} + +template <typename T> +inline typename r_vector<T>::proxy& r_vector<T>::proxy::operator++(int) { + operator=(static_cast<T>(*this) + 1); + return *this; +} + +template <typename T> +inline typename r_vector<T>::proxy& r_vector<T>::proxy::operator--(int) { + operator=(static_cast<T>(*this) - 1); + return *this; +} + +template <typename T> +inline void r_vector<T>::proxy::operator--() { + operator=(static_cast<T>(*this) - 1); +} + +template <typename T> +inline void r_vector<T>::proxy::operator++() { + operator=(static_cast<T>(*this) + 1); +} + +} // namespace writable + +// TODO: is there a better condition we could use, e.g. assert something true +// rather than three things false? +template <typename C, typename T> +using is_container_but_not_sexp_or_string = typename std::enable_if< + !std::is_constructible<C, SEXP>::value && + !std::is_same<typename std::decay<C>::type, std::string>::value && + !std::is_same<typename std::decay<T>::type, std::string>::value, + typename std::decay<C>::type>::type; + +template <typename C, typename T = typename std::decay<C>::type::value_type> +// typename T = typename C::value_type> +is_container_but_not_sexp_or_string<C, T> as_cpp(SEXP from) { + auto obj = cpp11::r_vector<T>(from); + return {obj.begin(), obj.end()}; +} + +// TODO: could we make this generalize outside of std::string? +template <typename C, typename T = C> +using is_vector_of_strings = typename std::enable_if< + std::is_same<typename std::decay<T>::type, std::string>::value, + typename std::decay<C>::type>::type; + +template <typename C, typename T = typename std::decay<C>::type::value_type> +// typename T = typename C::value_type> +is_vector_of_strings<C, T> as_cpp(SEXP from) { + auto obj = cpp11::r_vector<cpp11::r_string>(from); + typename std::decay<C>::type res; + auto it = obj.begin(); + while (it != obj.end()) { + r_string s = *it; + res.emplace_back(static_cast<std::string>(s)); + ++it; + } + return res; +} + +template <typename T> +bool operator==(const r_vector<T>& lhs, const r_vector<T>& rhs) { + if (lhs.size() != rhs.size()) { + return false; + } + + auto lhs_it = lhs.begin(); + auto rhs_it = rhs.begin(); + + auto end = lhs.end(); + while (lhs_it != end) { + if (!(*lhs_it == *rhs_it)) { + return false; + } + ++lhs_it; + ++rhs_it; + } + return true; +} + +template <typename T> +bool operator!=(const r_vector<T>& lhs, const r_vector<T>& rhs) { + return !(lhs == rhs); +} + +} // namespace cpp11 diff --git a/src/arrow/r/inst/include/cpp11/raws.hpp b/src/arrow/r/inst/include/cpp11/raws.hpp new file mode 100644 index 000000000..ef1ab304d --- /dev/null +++ b/src/arrow/r/inst/include/cpp11/raws.hpp @@ -0,0 +1,148 @@ +// cpp11 version: 0.3.1.1 +// vendored on: 2021-08-11 +#pragma once + +#include <algorithm> // for min +#include <array> // for array +#include <cstdint> // for uint8_t +#include <initializer_list> // for initializer_list + +#include "cpp11/R.hpp" // for RAW, SEXP, SEXPREC, Rf_allocVector +#include "cpp11/attribute_proxy.hpp" // for attribute_proxy +#include "cpp11/named_arg.hpp" // for named_arg +#include "cpp11/protect.hpp" // for preserved +#include "cpp11/r_vector.hpp" // for r_vector, r_vector<>::proxy +#include "cpp11/sexp.hpp" // for sexp + +// Specializations for raws + +namespace cpp11 { + +template <> +inline SEXP r_vector<uint8_t>::valid_type(SEXP data) { + if (TYPEOF(data) != RAWSXP) { + throw type_error(RAWSXP, TYPEOF(data)); + } + return data; +} + +template <> +inline uint8_t r_vector<uint8_t>::operator[](const R_xlen_t pos) const { + // NOPROTECT: likely too costly to unwind protect every elt + return is_altrep_ ? RAW_ELT(data_, pos) : data_p_[pos]; +} + +template <> +inline uint8_t* r_vector<uint8_t>::get_p(bool is_altrep, SEXP data) { + if (is_altrep) { + return nullptr; + } else { + return reinterpret_cast<uint8_t*>(RAW(data)); + } +} + +template <> +inline void r_vector<uint8_t>::const_iterator::fill_buf(R_xlen_t pos) { + using namespace cpp11::literals; + length_ = std::min(64_xl, data_->size() - pos); + unwind_protect( + [&] { RAW_GET_REGION(data_->data_, pos, length_, (uint8_t*)buf_.data()); }); + block_start_ = pos; +} + +typedef r_vector<uint8_t> raws; + +namespace writable { + +template <> +inline typename r_vector<uint8_t>::proxy& r_vector<uint8_t>::proxy::operator=( + const uint8_t& rhs) { + if (is_altrep_) { + // NOPROTECT: likely too costly to unwind protect every set elt + RAW(data_)[index_] = rhs; + } else { + *p_ = rhs; + } + return *this; +} + +template <> +inline r_vector<uint8_t>::proxy::operator uint8_t() const { + if (p_ == nullptr) { + // NOPROTECT: likely too costly to unwind protect every elt + return RAW(data_)[index_]; + } else { + return *p_; + } +} + +template <> +inline r_vector<uint8_t>::r_vector(std::initializer_list<uint8_t> il) + : cpp11::r_vector<uint8_t>(safe[Rf_allocVector](RAWSXP, il.size())), + capacity_(il.size()) { + protect_ = preserved.insert(data_); + auto it = il.begin(); + for (R_xlen_t i = 0; i < capacity_; ++i, ++it) { + data_p_[i] = *it; + } +} + +template <> +inline r_vector<uint8_t>::r_vector(std::initializer_list<named_arg> il) + : cpp11::r_vector<uint8_t>(safe[Rf_allocVector](RAWSXP, il.size())), + capacity_(il.size()) { + protect_ = preserved.insert(data_); + int n_protected = 0; + + try { + unwind_protect([&] { + Rf_setAttrib(data_, R_NamesSymbol, Rf_allocVector(STRSXP, capacity_)); + SEXP names = PROTECT(Rf_getAttrib(data_, R_NamesSymbol)); + ++n_protected; + + auto it = il.begin(); + for (R_xlen_t i = 0; i < capacity_; ++i, ++it) { + data_p_[i] = RAW_ELT(it->value(), 0); + SET_STRING_ELT(names, i, Rf_mkCharCE(it->name(), CE_UTF8)); + } + UNPROTECT(n_protected); + }); + } catch (const unwind_exception& e) { + preserved.release(protect_); + UNPROTECT(n_protected); + throw e; + } +} + +template <> +inline void r_vector<uint8_t>::reserve(R_xlen_t new_capacity) { + data_ = data_ == R_NilValue ? safe[Rf_allocVector](RAWSXP, new_capacity) + : safe[Rf_xlengthgets](data_, new_capacity); + + SEXP old_protect = protect_; + protect_ = preserved.insert(data_); + preserved.release(old_protect); + + data_p_ = reinterpret_cast<uint8_t*>(RAW(data_)); + capacity_ = new_capacity; +} + +template <> +inline void r_vector<uint8_t>::push_back(uint8_t value) { + while (length_ >= capacity_) { + reserve(capacity_ == 0 ? 1 : capacity_ *= 2); + } + if (is_altrep_) { + // NOPROTECT: likely too costly to unwind protect every elt + RAW(data_)[length_] = value; + } else { + data_p_[length_] = value; + } + ++length_; +} + +typedef r_vector<uint8_t> raws; + +} // namespace writable + +} // namespace cpp11 diff --git a/src/arrow/r/inst/include/cpp11/sexp.hpp b/src/arrow/r/inst/include/cpp11/sexp.hpp new file mode 100644 index 000000000..0a5edccb4 --- /dev/null +++ b/src/arrow/r/inst/include/cpp11/sexp.hpp @@ -0,0 +1,85 @@ +// cpp11 version: 0.3.1.1 +// vendored on: 2021-08-11 +#pragma once + +#include <stddef.h> // for size_t + +#include <string> // for string, basic_string + +#include "cpp11/R.hpp" // for SEXP, SEXPREC, REAL_ELT, R_NilV... +#include "cpp11/attribute_proxy.hpp" // for attribute_proxy +#include "cpp11/protect.hpp" // for preserved + +namespace cpp11 { + +/// Converting to SEXP +class sexp { + private: + SEXP data_ = R_NilValue; + SEXP preserve_token_ = R_NilValue; + + public: + sexp() = default; + + sexp(SEXP data) : data_(data), preserve_token_(preserved.insert(data_)) { + // REprintf("created %x %x : %i\n", data_, preserve_token_, protect_head_size()); + } + + sexp(const sexp& rhs) { + data_ = rhs.data_; + preserve_token_ = preserved.insert(data_); + // REprintf("copied %x new protect %x : %i\n", rhs.data_, preserve_token_, + // protect_head_size()); + } + + sexp(sexp&& rhs) { + data_ = rhs.data_; + preserve_token_ = rhs.preserve_token_; + + rhs.data_ = R_NilValue; + rhs.preserve_token_ = R_NilValue; + + // REprintf("moved %x : %i\n", rhs.data_, protect_head_size()); + } + + sexp& operator=(const sexp& rhs) { + preserved.release(preserve_token_); + + data_ = rhs.data_; + preserve_token_ = preserved.insert(data_); + // REprintf("assigned %x : %i\n", rhs.data_, protect_head_size()); + return *this; + } + + // void swap(sexp& rhs) { + // sexp tmp(rhs); + // rhs = *this; + //*this = tmp; + //} + + ~sexp() { preserved.release(preserve_token_); } + + attribute_proxy<sexp> attr(const char* name) const { + return attribute_proxy<sexp>(*this, name); + } + + attribute_proxy<sexp> attr(const std::string& name) const { + return attribute_proxy<sexp>(*this, name.c_str()); + } + + attribute_proxy<sexp> attr(SEXP name) const { + return attribute_proxy<sexp>(*this, name); + } + + attribute_proxy<sexp> names() const { + return attribute_proxy<sexp>(*this, R_NamesSymbol); + } + + operator SEXP() const { return data_; } + operator double() const { return REAL_ELT(data_, 0); } + operator size_t() const { return REAL_ELT(data_, 0); } + operator bool() const { return LOGICAL_ELT(data_, 0); } + SEXP data() const { return data_; } +}; + +} // namespace cpp11 diff --git a/src/arrow/r/inst/include/cpp11/strings.hpp b/src/arrow/r/inst/include/cpp11/strings.hpp new file mode 100644 index 000000000..adca2a174 --- /dev/null +++ b/src/arrow/r/inst/include/cpp11/strings.hpp @@ -0,0 +1,187 @@ +// cpp11 version: 0.3.1.1 +// vendored on: 2021-08-11 +#pragma once + +#include <initializer_list> // for initializer_list +#include <string> // for string, basic_string + +#include "cpp11/R.hpp" // for SEXP, TYPEOF, SEXPREC, SET_STRI... +#include "cpp11/as.hpp" // for as_sexp +#include "cpp11/attribute_proxy.hpp" // for attribute_proxy +#include "cpp11/named_arg.hpp" // for named_arg +#include "cpp11/protect.hpp" // for preserved +#include "cpp11/r_string.hpp" // for r_string +#include "cpp11/r_vector.hpp" // for r_vector, r_vector<>::proxy +#include "cpp11/sexp.hpp" // for sexp + +// Specializations for strings + +namespace cpp11 { + +template <> +inline SEXP r_vector<r_string>::valid_type(SEXP data) { + if (TYPEOF(data) != STRSXP) { + throw type_error(STRSXP, TYPEOF(data)); + } + return data; +} + +template <> +inline r_string r_vector<r_string>::operator[](const R_xlen_t pos) const { + // NOPROTECT: likely too costly to unwind protect every elt + return STRING_ELT(data_, pos); +} + +template <> +inline r_string* r_vector<r_string>::get_p(bool, SEXP) { + return nullptr; +} + +template <> +inline void r_vector<r_string>::const_iterator::fill_buf(R_xlen_t) { + return; +} + +template <> +inline r_string r_vector<r_string>::const_iterator::operator*() const { + return STRING_ELT(data_->data(), pos_); +} + +typedef r_vector<r_string> strings; + +namespace writable { + +template <> +inline typename r_vector<r_string>::proxy& r_vector<r_string>::proxy::operator=( + const r_string& rhs) { + unwind_protect([&] { SET_STRING_ELT(data_, index_, rhs); }); + return *this; +} + +template <> +inline r_vector<r_string>::proxy::operator r_string() const { + // NOPROTECT: likely too costly to unwind protect every elt + return STRING_ELT(data_, index_); +} + +inline bool operator==(const r_vector<r_string>::proxy& lhs, r_string rhs) { + return static_cast<r_string>(lhs).operator==(static_cast<std::string>(rhs).c_str()); +} + +inline SEXP alloc_or_copy(const SEXP data) { + switch (TYPEOF(data)) { + case CHARSXP: + return cpp11::r_vector<r_string>(safe[Rf_allocVector](STRSXP, 1)); + case STRSXP: + return safe[Rf_shallow_duplicate](data); + default: + throw type_error(STRSXP, TYPEOF(data)); + } +} + +inline SEXP alloc_if_charsxp(const SEXP data) { + switch (TYPEOF(data)) { + case CHARSXP: + return cpp11::r_vector<r_string>(safe[Rf_allocVector](STRSXP, 1)); + case STRSXP: + return data; + default: + throw type_error(STRSXP, TYPEOF(data)); + } +} + +template <> +inline r_vector<r_string>::r_vector(const SEXP& data) + : cpp11::r_vector<r_string>(alloc_or_copy(data)), + protect_(preserved.insert(data_)), + capacity_(length_) { + if (TYPEOF(data) == CHARSXP) { + SET_STRING_ELT(data_, 0, data); + } +} + +template <> +inline r_vector<r_string>::r_vector(SEXP&& data) + : cpp11::r_vector<r_string>(alloc_if_charsxp(data)), + protect_(preserved.insert(data_)), + capacity_(length_) { + if (TYPEOF(data) == CHARSXP) { + SET_STRING_ELT(data_, 0, data); + } +} + +template <> +inline r_vector<r_string>::r_vector(std::initializer_list<r_string> il) + : cpp11::r_vector<r_string>(as_sexp(il)), capacity_(il.size()) {} + +template <> +inline r_vector<r_string>::r_vector(std::initializer_list<const char*> il) + : cpp11::r_vector<r_string>(as_sexp(il)), capacity_(il.size()) {} + +template <> +inline r_vector<r_string>::r_vector(std::initializer_list<std::string> il) + : cpp11::r_vector<r_string>(as_sexp(il)), capacity_(il.size()) {} + +template <> +inline r_vector<r_string>::r_vector(std::initializer_list<named_arg> il) + : cpp11::r_vector<r_string>(safe[Rf_allocVector](STRSXP, il.size())), + capacity_(il.size()) { + protect_ = preserved.insert(data_); + int n_protected = 0; + + try { + unwind_protect([&] { + Rf_setAttrib(data_, R_NamesSymbol, Rf_allocVector(STRSXP, capacity_)); + SEXP names = PROTECT(Rf_getAttrib(data_, R_NamesSymbol)); + ++n_protected; + auto it = il.begin(); + for (R_xlen_t i = 0; i < capacity_; ++i, ++it) { + SET_STRING_ELT(data_, i, STRING_ELT(it->value(), 0)); + SET_STRING_ELT(names, i, Rf_mkCharCE(it->name(), CE_UTF8)); + } + UNPROTECT(n_protected); + }); + } catch (const unwind_exception& e) { + preserved.release(protect_); + UNPROTECT(n_protected); + throw e; + } +} + +template <> +inline void r_vector<r_string>::reserve(R_xlen_t new_capacity) { + data_ = data_ == R_NilValue ? safe[Rf_allocVector](STRSXP, new_capacity) + : safe[Rf_xlengthgets](data_, new_capacity); + + SEXP old_protect = protect_; + protect_ = preserved.insert(data_); + preserved.release(old_protect); + + capacity_ = new_capacity; +} + +template <> +inline void r_vector<r_string>::push_back(r_string value) { + while (length_ >= capacity_) { + reserve(capacity_ == 0 ? 1 : capacity_ *= 2); + } + unwind_protect([&] { SET_STRING_ELT(data_, length_, value); }); + ++length_; +} + +typedef r_vector<r_string> strings; + +template <typename T> +inline void r_vector<T>::push_back(const named_arg& value) { + push_back(value.value()); + if (Rf_xlength(names()) == 0) { + cpp11::writable::strings new_nms(size()); + names() = new_nms; + } + cpp11::writable::strings nms(names()); + nms[size() - 1] = value.name(); +} + +} // namespace writable + +} // namespace cpp11 diff --git a/src/arrow/r/inst/v0.7.1.parquet b/src/arrow/r/inst/v0.7.1.parquet Binary files differnew file mode 100644 index 000000000..44670bcd1 --- /dev/null +++ b/src/arrow/r/inst/v0.7.1.parquet |