summaryrefslogtreecommitdiffstats
path: root/src/arrow/cpp/src/parquet/encryption/crypto_factory.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/arrow/cpp/src/parquet/encryption/crypto_factory.cc')
-rw-r--r--src/arrow/cpp/src/parquet/encryption/crypto_factory.cc175
1 files changed, 175 insertions, 0 deletions
diff --git a/src/arrow/cpp/src/parquet/encryption/crypto_factory.cc b/src/arrow/cpp/src/parquet/encryption/crypto_factory.cc
new file mode 100644
index 000000000..384516bff
--- /dev/null
+++ b/src/arrow/cpp/src/parquet/encryption/crypto_factory.cc
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/result.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/string.h"
+#include "arrow/util/string_view.h"
+
+#include "parquet/encryption/crypto_factory.h"
+#include "parquet/encryption/encryption_internal.h"
+#include "parquet/encryption/file_key_material_store.h"
+#include "parquet/encryption/file_key_unwrapper.h"
+#include "parquet/encryption/key_toolkit_internal.h"
+
+namespace parquet {
+namespace encryption {
+
+void CryptoFactory::RegisterKmsClientFactory(
+ std::shared_ptr<KmsClientFactory> kms_client_factory) {
+ key_toolkit_.RegisterKmsClientFactory(kms_client_factory);
+}
+
+std::shared_ptr<FileEncryptionProperties> CryptoFactory::GetFileEncryptionProperties(
+ const KmsConnectionConfig& kms_connection_config,
+ const EncryptionConfiguration& encryption_config) {
+ if (!encryption_config.uniform_encryption && encryption_config.column_keys.empty()) {
+ throw ParquetException("Either column_keys or uniform_encryption must be set");
+ } else if (encryption_config.uniform_encryption &&
+ !encryption_config.column_keys.empty()) {
+ throw ParquetException("Cannot set both column_keys and uniform_encryption");
+ }
+ const std::string& footer_key_id = encryption_config.footer_key;
+ const std::string& column_key_str = encryption_config.column_keys;
+
+ std::shared_ptr<FileKeyMaterialStore> key_material_store = NULL;
+ if (!encryption_config.internal_key_material) {
+ // TODO: using external key material store with Hadoop file system
+ throw ParquetException("External key material store is not supported yet.");
+ }
+
+ FileKeyWrapper key_wrapper(&key_toolkit_, kms_connection_config, key_material_store,
+ encryption_config.cache_lifetime_seconds,
+ encryption_config.double_wrapping);
+
+ int32_t dek_length_bits = encryption_config.data_key_length_bits;
+ if (!internal::ValidateKeyLength(dek_length_bits)) {
+ std::ostringstream ss;
+ ss << "Wrong data key length : " << dek_length_bits;
+ throw ParquetException(ss.str());
+ }
+
+ int dek_length = dek_length_bits / 8;
+
+ std::string footer_key(dek_length, '\0');
+ RandBytes(reinterpret_cast<uint8_t*>(&footer_key[0]),
+ static_cast<int>(footer_key.size()));
+
+ std::string footer_key_metadata =
+ key_wrapper.GetEncryptionKeyMetadata(footer_key, footer_key_id, true);
+
+ FileEncryptionProperties::Builder properties_builder =
+ FileEncryptionProperties::Builder(footer_key);
+ properties_builder.footer_key_metadata(footer_key_metadata);
+ properties_builder.algorithm(encryption_config.encryption_algorithm);
+
+ if (!encryption_config.uniform_encryption) {
+ ColumnPathToEncryptionPropertiesMap encrypted_columns =
+ GetColumnEncryptionProperties(dek_length, column_key_str, &key_wrapper);
+ properties_builder.encrypted_columns(encrypted_columns);
+
+ if (encryption_config.plaintext_footer) {
+ properties_builder.set_plaintext_footer();
+ }
+ }
+
+ return properties_builder.build();
+}
+
+ColumnPathToEncryptionPropertiesMap CryptoFactory::GetColumnEncryptionProperties(
+ int dek_length, const std::string& column_keys, FileKeyWrapper* key_wrapper) {
+ ColumnPathToEncryptionPropertiesMap encrypted_columns;
+
+ std::vector<::arrow::util::string_view> key_to_columns =
+ ::arrow::internal::SplitString(column_keys, ';');
+ for (size_t i = 0; i < key_to_columns.size(); ++i) {
+ std::string cur_key_to_columns =
+ ::arrow::internal::TrimString(std::string(key_to_columns[i]));
+ if (cur_key_to_columns.empty()) {
+ continue;
+ }
+
+ std::vector<::arrow::util::string_view> parts =
+ ::arrow::internal::SplitString(cur_key_to_columns, ':');
+ if (parts.size() != 2) {
+ std::ostringstream message;
+ message << "Incorrect key to columns mapping in column keys property"
+ << ": [" << cur_key_to_columns << "]";
+ throw ParquetException(message.str());
+ }
+
+ std::string column_key_id = ::arrow::internal::TrimString(std::string(parts[0]));
+ if (column_key_id.empty()) {
+ throw ParquetException("Empty key name in column keys property.");
+ }
+
+ std::string column_names_str = ::arrow::internal::TrimString(std::string(parts[1]));
+ std::vector<::arrow::util::string_view> column_names =
+ ::arrow::internal::SplitString(column_names_str, ',');
+ if (0 == column_names.size()) {
+ throw ParquetException("No columns to encrypt defined for key: " + column_key_id);
+ }
+
+ for (size_t j = 0; j < column_names.size(); ++j) {
+ std::string column_name =
+ ::arrow::internal::TrimString(std::string(column_names[j]));
+ if (column_name.empty()) {
+ std::ostringstream message;
+ message << "Empty column name in column keys property for key: " << column_key_id;
+ throw ParquetException(message.str());
+ }
+
+ if (encrypted_columns.find(column_name) != encrypted_columns.end()) {
+ throw ParquetException("Multiple keys defined for the same column: " +
+ column_name);
+ }
+
+ std::string column_key(dek_length, '\0');
+ RandBytes(reinterpret_cast<uint8_t*>(&column_key[0]),
+ static_cast<int>(column_key.size()));
+ std::string column_key_key_metadata =
+ key_wrapper->GetEncryptionKeyMetadata(column_key, column_key_id, false);
+
+ std::shared_ptr<ColumnEncryptionProperties> cmd =
+ ColumnEncryptionProperties::Builder(column_name)
+ .key(column_key)
+ ->key_metadata(column_key_key_metadata)
+ ->build();
+ encrypted_columns.insert({column_name, cmd});
+ }
+ }
+ if (encrypted_columns.empty()) {
+ throw ParquetException("No column keys configured in column keys property.");
+ }
+
+ return encrypted_columns;
+}
+
+std::shared_ptr<FileDecryptionProperties> CryptoFactory::GetFileDecryptionProperties(
+ const KmsConnectionConfig& kms_connection_config,
+ const DecryptionConfiguration& decryption_config) {
+ std::shared_ptr<DecryptionKeyRetriever> key_retriever(new FileKeyUnwrapper(
+ &key_toolkit_, kms_connection_config, decryption_config.cache_lifetime_seconds));
+
+ return FileDecryptionProperties::Builder()
+ .key_retriever(key_retriever)
+ ->plaintext_files_allowed()
+ ->build();
+}
+
+} // namespace encryption
+} // namespace parquet