summaryrefslogtreecommitdiffstats
path: root/src/arrow/go/parquet/internal/encryption
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/go/parquet/internal/encryption
parentInitial commit. (diff)
downloadceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/arrow/go/parquet/internal/encryption')
-rw-r--r--src/arrow/go/parquet/internal/encryption/aes.go264
-rw-r--r--src/arrow/go/parquet/internal/encryption/decryptor.go261
-rw-r--r--src/arrow/go/parquet/internal/encryption/encryptor.go237
-rw-r--r--src/arrow/go/parquet/internal/encryption/key_handling.go62
4 files changed, 824 insertions, 0 deletions
diff --git a/src/arrow/go/parquet/internal/encryption/aes.go b/src/arrow/go/parquet/internal/encryption/aes.go
new file mode 100644
index 000000000..227325723
--- /dev/null
+++ b/src/arrow/go/parquet/internal/encryption/aes.go
@@ -0,0 +1,264 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package encryption contains the internal helpers for the parquet AES encryption/decryption handling.
+//
+// Testing for this is done via integration testing at the top level parquet package via attempting to
+// read and write encrypted files with different configurations to match test files in parquet-testing
+package encryption
+
+import (
+ "bytes"
+ "crypto/aes"
+ "crypto/cipher"
+ "crypto/rand"
+ "encoding/binary"
+ "io"
+
+ "github.com/apache/arrow/go/v6/parquet"
+ "golang.org/x/xerrors"
+)
+
+// important constants for handling the aes encryption
+const (
+ GcmTagLength = 16
+ NonceLength = 12
+
+ gcmMode = 0
+ ctrMode = 1
+ ctrIVLen = 16
+ bufferSizeLength = 4
+)
+
+// Module constants for constructing the AAD bytes, the order here is
+// important as the constants are set via iota.
+const (
+ FooterModule int8 = iota
+ ColumnMetaModule
+ DataPageModule
+ DictPageModule
+ DataPageHeaderModule
+ DictPageHeaderModule
+ ColumnIndexModule
+ OffsetIndexModule
+)
+
+type aesEncryptor struct {
+ mode int
+ ciphertextSizeDelta int
+}
+
+// NewAesEncryptor constructs an encryptor for the passed in cipher and whether
+// or not it's being used to encrypt metadata.
+func NewAesEncryptor(alg parquet.Cipher, metadata bool) *aesEncryptor {
+ ret := &aesEncryptor{}
+ ret.ciphertextSizeDelta = bufferSizeLength + NonceLength
+ if metadata || alg == parquet.AesGcm {
+ ret.mode = gcmMode
+ ret.ciphertextSizeDelta += GcmTagLength
+ } else {
+ ret.mode = ctrMode
+ }
+
+ return ret
+}
+
+// CiphertextSizeDelta is the number of extra bytes that are part of the encrypted data
+// above and beyond the plaintext value.
+func (a *aesEncryptor) CiphertextSizeDelta() int { return a.ciphertextSizeDelta }
+
+// SignedFooterEncrypt writes the signature for the provided footer bytes using the given key, AAD and nonce.
+// It returns the number of bytes that were written to w.
+func (a *aesEncryptor) SignedFooterEncrypt(w io.Writer, footer, key, aad, nonce []byte) int {
+ if a.mode != gcmMode {
+ panic("must use AES GCM (metadata) encryptor")
+ }
+
+ block, err := aes.NewCipher(key)
+ if err != nil {
+ panic(err)
+ }
+
+ aead, err := cipher.NewGCM(block)
+ if err != nil {
+ panic(err)
+ }
+ if aead.NonceSize() != NonceLength {
+ panic(xerrors.Errorf("nonce size mismatch %d, %d", aead.NonceSize(), NonceLength))
+ }
+ if aead.Overhead() != GcmTagLength {
+ panic(xerrors.Errorf("tagsize mismatch %d %d", aead.Overhead(), GcmTagLength))
+ }
+
+ ciphertext := aead.Seal(nil, nonce, footer, aad)
+ bufferSize := uint32(len(ciphertext) + len(nonce))
+ // data is written with a prefix of the size written as a little endian 32bit int.
+ if err := binary.Write(w, binary.LittleEndian, bufferSize); err != nil {
+ panic(err)
+ }
+ w.Write(nonce)
+ w.Write(ciphertext)
+ return bufferSizeLength + int(bufferSize)
+}
+
+// Encrypt calculates the ciphertext for src with the given key and aad, then writes it to w.
+// Returns the total number of bytes written.
+func (a *aesEncryptor) Encrypt(w io.Writer, src, key, aad []byte) int {
+ block, err := aes.NewCipher(key)
+ if err != nil {
+ panic(err)
+ }
+
+ nonce := make([]byte, NonceLength)
+ rand.Read(nonce)
+
+ if a.mode == gcmMode {
+ aead, err := cipher.NewGCM(block)
+ if err != nil {
+ panic(err)
+ }
+ if aead.NonceSize() != NonceLength {
+ panic(xerrors.Errorf("nonce size mismatch %d, %d", aead.NonceSize(), NonceLength))
+ }
+ if aead.Overhead() != GcmTagLength {
+ panic(xerrors.Errorf("tagsize mismatch %d %d", aead.Overhead(), GcmTagLength))
+ }
+
+ ciphertext := aead.Seal(nil, nonce, src, aad)
+ bufferSize := len(ciphertext) + len(nonce)
+ // data is written with a prefix of the size written as a little endian 32bit int.
+ if err := binary.Write(w, binary.LittleEndian, uint32(bufferSize)); err != nil {
+ panic(err)
+ }
+ w.Write(nonce)
+ w.Write(ciphertext)
+ return bufferSizeLength + bufferSize
+ }
+
+ // Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial
+ // counter field.
+ // The first 31 bits of the initial counter field are set to 0, the last bit
+ // is set to 1.
+ iv := make([]byte, ctrIVLen)
+ copy(iv, nonce)
+ iv[ctrIVLen-1] = 1
+
+ bufferSize := NonceLength + len(src)
+ // data is written with a prefix of the size written as a little endian 32bit int.
+ if err := binary.Write(w, binary.LittleEndian, uint32(bufferSize)); err != nil {
+ panic(err)
+ }
+ w.Write(nonce)
+ cipher.StreamWriter{S: cipher.NewCTR(block, iv), W: w}.Write(src)
+ return bufferSizeLength + bufferSize
+}
+
+type aesDecryptor struct {
+ mode int
+ ciphertextSizeDelta int
+}
+
+// newAesDecryptor constructs and returns a decryptor for the given cipher type and whether or
+// not it is intended to be used for decrypting metadata.
+func newAesDecryptor(alg parquet.Cipher, metadata bool) *aesDecryptor {
+ ret := &aesDecryptor{}
+ ret.ciphertextSizeDelta = bufferSizeLength + NonceLength
+ if metadata || alg == parquet.AesGcm {
+ ret.mode = gcmMode
+ ret.ciphertextSizeDelta += GcmTagLength
+ } else {
+ ret.mode = ctrMode
+ }
+
+ return ret
+}
+
+// CiphertextSizeDelta is the number of bytes in the ciphertext that will not exist in the
+// plaintext due to be used for the decryption. The total size - the CiphertextSizeDelta is
+// the length of the plaintext after decryption.
+func (a *aesDecryptor) CiphertextSizeDelta() int { return a.ciphertextSizeDelta }
+
+// Decrypt returns the plaintext version of the given ciphertext when decrypted
+// with the provided key and AAD security bytes.
+func (a *aesDecryptor) Decrypt(cipherText, key, aad []byte) []byte {
+ block, err := aes.NewCipher(key)
+ if err != nil {
+ panic(err)
+ }
+
+ writtenCiphertextLen := binary.LittleEndian.Uint32(cipherText)
+ cipherLen := writtenCiphertextLen + bufferSizeLength
+ nonce := cipherText[bufferSizeLength : bufferSizeLength+NonceLength]
+
+ if a.mode == gcmMode {
+ aead, err := cipher.NewGCM(block)
+ if err != nil {
+ panic(err)
+ }
+
+ plain, err := aead.Open(nil, nonce, cipherText[bufferSizeLength+NonceLength:cipherLen], aad)
+ if err != nil {
+ panic(err)
+ }
+ return plain
+ }
+
+ // Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial
+ // counter field.
+ // The first 31 bits of the initial counter field are set to 0, the last bit
+ // is set to 1.
+ iv := make([]byte, ctrIVLen)
+ copy(iv, nonce)
+ iv[ctrIVLen-1] = 1
+
+ stream := cipher.NewCTR(block, iv)
+ dst := make([]byte, len(cipherText)-bufferSizeLength-NonceLength)
+ stream.XORKeyStream(dst, cipherText[bufferSizeLength+NonceLength:])
+ return dst
+}
+
+// CreateModuleAad creates the section AAD security bytes for the file, module, row group, column and page.
+//
+// This should be used for being passed to the encryptor and decryptor whenever requesting AAD bytes.
+func CreateModuleAad(fileAad string, moduleType int8, rowGroupOrdinal, columnOrdinal, pageOrdinal int16) string {
+ buf := bytes.NewBuffer([]byte(fileAad))
+ buf.WriteByte(byte(moduleType))
+
+ if moduleType == FooterModule {
+ return buf.String()
+ }
+
+ binary.Write(buf, binary.LittleEndian, rowGroupOrdinal)
+ binary.Write(buf, binary.LittleEndian, columnOrdinal)
+ if DataPageModule != moduleType && DataPageHeaderModule != moduleType {
+ return buf.String()
+ }
+
+ binary.Write(buf, binary.LittleEndian, pageOrdinal)
+ return buf.String()
+}
+
+// CreateFooterAad takes an aadPrefix and constructs the security AAD bytes for encrypting
+// and decrypting the parquet footer bytes.
+func CreateFooterAad(aadPrefix string) string {
+ return CreateModuleAad(aadPrefix, FooterModule, -1, -1, -1)
+}
+
+// QuickUpdatePageAad updates aad with the new page ordinal, modifying the
+// last two bytes of aad.
+func QuickUpdatePageAad(aad []byte, newPageOrdinal int16) {
+ binary.LittleEndian.PutUint16(aad[len(aad)-2:], uint16(newPageOrdinal))
+}
diff --git a/src/arrow/go/parquet/internal/encryption/decryptor.go b/src/arrow/go/parquet/internal/encryption/decryptor.go
new file mode 100644
index 000000000..fe06fdce3
--- /dev/null
+++ b/src/arrow/go/parquet/internal/encryption/decryptor.go
@@ -0,0 +1,261 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encryption
+
+import (
+ "github.com/apache/arrow/go/v6/arrow/memory"
+ "github.com/apache/arrow/go/v6/parquet"
+)
+
+// FileDecryptor is an interface used by the filereader for decrypting an
+// entire parquet file as we go, usually constructed from the DecryptionProperties
+type FileDecryptor interface {
+ // Returns the key for decrypting the footer if provided
+ GetFooterKey() string
+ // Provides the file level AAD security bytes
+ FileAad() string
+ // return which algorithm this decryptor was constructed for
+ Algorithm() parquet.Cipher
+ // return the FileDecryptionProperties that were used for this decryptor
+ Properties() *parquet.FileDecryptionProperties
+ // Clear out the decryption keys, this is automatically called after every
+ // successfully decrypted file to ensure that keys aren't kept around.
+ WipeOutDecryptionKeys()
+ // GetFooterDecryptor returns a Decryptor interface for use to decrypt the footer
+ // of a parquet file.
+ GetFooterDecryptor() Decryptor
+ // GetFooterDecryptorForColumnMeta returns a Decryptor interface for Column Metadata
+ // in the file footer using the AAD bytes provided.
+ GetFooterDecryptorForColumnMeta(aad string) Decryptor
+ // GetFooterDecryptorForColumnData returns the decryptor that can be used for decrypting
+ // actual column data footer bytes, not column metadata.
+ GetFooterDecryptorForColumnData(aad string) Decryptor
+ // GetColumnMetaDecryptor returns a decryptor for the requested column path, key and AAD bytes
+ // but only for decrypting the row group level metadata
+ GetColumnMetaDecryptor(columnPath, columnKeyMetadata, aad string) Decryptor
+ // GetColumnDataDecryptor returns a decryptor for the requested column path, key, and AAD bytes
+ // but only for the rowgroup column data.
+ GetColumnDataDecryptor(columnPath, columnKeyMetadata, aad string) Decryptor
+}
+
+type fileDecryptor struct {
+ // the properties contains the key retriever for us to get keys
+ // from the key metadata
+ props *parquet.FileDecryptionProperties
+ // concatenation of aad_prefix (if exists) and aad_file_unique
+ fileAad string
+ columnDataMap map[string]Decryptor
+ columnMetaDataMap map[string]Decryptor
+ footerMetadataDecryptor Decryptor
+ footerDataDecryptor Decryptor
+ alg parquet.Cipher
+ footerKeyMetadata string
+ metaDecryptor *aesDecryptor
+ dataDecryptor *aesDecryptor
+ mem memory.Allocator
+}
+
+// NewFileDecryptor constructs a decryptor from the provided configuration of properties, cipher and key metadata. Using the provided memory allocator or
+// the default allocator if one isn't provided.
+func NewFileDecryptor(props *parquet.FileDecryptionProperties, fileAad string, alg parquet.Cipher, keymetadata string, mem memory.Allocator) FileDecryptor {
+ if mem == nil {
+ mem = memory.DefaultAllocator
+ }
+ return &fileDecryptor{
+ fileAad: fileAad,
+ props: props,
+ alg: alg,
+ footerKeyMetadata: keymetadata,
+ mem: mem,
+ columnDataMap: make(map[string]Decryptor),
+ columnMetaDataMap: make(map[string]Decryptor),
+ }
+}
+
+func (d *fileDecryptor) FileAad() string { return d.fileAad }
+func (d *fileDecryptor) Properties() *parquet.FileDecryptionProperties { return d.props }
+func (d *fileDecryptor) Algorithm() parquet.Cipher { return d.alg }
+func (d *fileDecryptor) GetFooterKey() string {
+ footerKey := d.props.FooterKey()
+ if footerKey == "" {
+ if d.footerKeyMetadata == "" {
+ panic("no footer key or key metadata")
+ }
+ if d.props.KeyRetriever == nil {
+ panic("no footer key or key retriever")
+ }
+ footerKey = d.props.KeyRetriever.GetKey([]byte(d.footerKeyMetadata))
+ }
+ if footerKey == "" {
+ panic("invalid footer encryption key. Could not parse footer metadata")
+ }
+ return footerKey
+}
+
+func (d *fileDecryptor) GetFooterDecryptor() Decryptor {
+ aad := CreateFooterAad(d.fileAad)
+ return d.getFooterDecryptor(aad, true)
+}
+
+func (d *fileDecryptor) GetFooterDecryptorForColumnMeta(aad string) Decryptor {
+ return d.getFooterDecryptor(aad, true)
+}
+
+func (d *fileDecryptor) GetFooterDecryptorForColumnData(aad string) Decryptor {
+ return d.getFooterDecryptor(aad, false)
+}
+
+func (d *fileDecryptor) GetColumnMetaDecryptor(columnPath, columnKeyMetadata, aad string) Decryptor {
+ return d.getColumnDecryptor(columnPath, columnKeyMetadata, aad, true)
+}
+
+func (d *fileDecryptor) GetColumnDataDecryptor(columnPath, columnKeyMetadata, aad string) Decryptor {
+ return d.getColumnDecryptor(columnPath, columnKeyMetadata, aad, false)
+}
+
+func (d *fileDecryptor) WipeOutDecryptionKeys() {
+ d.props.WipeOutDecryptionKeys()
+}
+
+func (d *fileDecryptor) getFooterDecryptor(aad string, metadata bool) Decryptor {
+ if metadata {
+ if d.footerMetadataDecryptor != nil {
+ return d.footerMetadataDecryptor
+ }
+ } else {
+ if d.footerDataDecryptor != nil {
+ return d.footerDataDecryptor
+ }
+ }
+
+ footerKey := d.GetFooterKey()
+
+ // Create both data and metadata decryptors to avoid redundant retrieval of key
+ // from the key_retriever.
+ aesMetaDecrypt := d.getMetaAesDecryptor()
+ aesDataDecrypt := d.getDataAesDecryptor()
+
+ d.footerMetadataDecryptor = &decryptor{
+ decryptor: aesMetaDecrypt,
+ key: []byte(footerKey),
+ fileAad: []byte(d.fileAad),
+ aad: []byte(aad),
+ mem: d.mem,
+ }
+ d.footerDataDecryptor = &decryptor{
+ decryptor: aesDataDecrypt,
+ key: []byte(footerKey),
+ fileAad: []byte(d.fileAad),
+ aad: []byte(aad),
+ mem: d.mem,
+ }
+
+ if metadata {
+ return d.footerMetadataDecryptor
+ }
+ return d.footerDataDecryptor
+}
+
+func (d *fileDecryptor) getColumnDecryptor(columnPath, columnMeta, aad string, metadata bool) Decryptor {
+ if metadata {
+ if res, ok := d.columnMetaDataMap[columnPath]; ok {
+ res.UpdateAad(aad)
+ return res
+ }
+ } else {
+ if res, ok := d.columnDataMap[columnPath]; ok {
+ res.UpdateAad(aad)
+ return res
+ }
+ }
+
+ columnKey := d.props.ColumnKey(columnPath)
+ // No explicit column key given via API. Retrieve via key metadata.
+ if columnKey == "" && columnMeta != "" && d.props.KeyRetriever != nil {
+ columnKey = d.props.KeyRetriever.GetKey([]byte(columnMeta))
+ }
+ if columnKey == "" {
+ panic("hidden column exception, path=" + columnPath)
+ }
+
+ aesDataDecrypt := d.getDataAesDecryptor()
+ aesMetaDecrypt := d.getMetaAesDecryptor()
+
+ d.columnDataMap[columnPath] = &decryptor{
+ decryptor: aesDataDecrypt,
+ key: []byte(columnKey),
+ fileAad: []byte(d.fileAad),
+ aad: []byte(aad),
+ mem: d.mem,
+ }
+ d.columnMetaDataMap[columnPath] = &decryptor{
+ decryptor: aesMetaDecrypt,
+ key: []byte(columnKey),
+ fileAad: []byte(d.fileAad),
+ aad: []byte(aad),
+ mem: d.mem,
+ }
+
+ if metadata {
+ return d.columnMetaDataMap[columnPath]
+ }
+ return d.columnDataMap[columnPath]
+}
+
+func (d *fileDecryptor) getMetaAesDecryptor() *aesDecryptor {
+ if d.metaDecryptor == nil {
+ d.metaDecryptor = newAesDecryptor(d.alg, true)
+ }
+ return d.metaDecryptor
+}
+
+func (d *fileDecryptor) getDataAesDecryptor() *aesDecryptor {
+ if d.dataDecryptor == nil {
+ d.dataDecryptor = newAesDecryptor(d.alg, false)
+ }
+ return d.dataDecryptor
+}
+
+// Decryptor is the basic interface for any decryptor generated from a FileDecryptor
+type Decryptor interface {
+ // returns the File Level AAD bytes
+ FileAad() string
+ // returns the current allocator that was used for any extra allocations of buffers
+ Allocator() memory.Allocator
+ // returns the CiphertextSizeDelta from the decryptor
+ CiphertextSizeDelta() int
+ // Decrypt just returns the decrypted plaintext from the src ciphertext
+ Decrypt(src []byte) []byte
+ // set the AAD bytes of the decryptor to the provided string
+ UpdateAad(string)
+}
+
+type decryptor struct {
+ decryptor *aesDecryptor
+ key []byte
+ fileAad []byte
+ aad []byte
+ mem memory.Allocator
+}
+
+func (d *decryptor) Allocator() memory.Allocator { return d.mem }
+func (d *decryptor) FileAad() string { return string(d.fileAad) }
+func (d *decryptor) UpdateAad(aad string) { d.aad = []byte(aad) }
+func (d *decryptor) CiphertextSizeDelta() int { return d.decryptor.CiphertextSizeDelta() }
+func (d *decryptor) Decrypt(src []byte) []byte {
+ return d.decryptor.Decrypt(src, d.key, d.aad)
+}
diff --git a/src/arrow/go/parquet/internal/encryption/encryptor.go b/src/arrow/go/parquet/internal/encryption/encryptor.go
new file mode 100644
index 000000000..c4ce817e7
--- /dev/null
+++ b/src/arrow/go/parquet/internal/encryption/encryptor.go
@@ -0,0 +1,237 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encryption
+
+import (
+ "io"
+
+ "github.com/apache/arrow/go/v6/arrow/memory"
+ "github.com/apache/arrow/go/v6/parquet"
+)
+
+// FileEncryptor is the interface for constructing encryptors for the different
+// sections of a parquet file.
+type FileEncryptor interface {
+ // GetFooterEncryptor returns an encryptor for the footer metadata
+ GetFooterEncryptor() Encryptor
+ // GetFooterSigningEncryptor returns an encryptor for creating the signature
+ // for the footer as opposed to encrypting the footer bytes directly.
+ GetFooterSigningEncryptor() Encryptor
+ // GetColumnMetaEncryptor returns an encryptor for the metadata only of the requested
+ // column path string.
+ GetColumnMetaEncryptor(columnPath string) Encryptor
+ // GetColumnDataEncryptor returns an encryptor for the column data ONLY of
+ // the requested column path string.
+ GetColumnDataEncryptor(columnPath string) Encryptor
+ // WipeOutEncryptionKeys deletes the keys that were used for encryption,
+ // called after every successfully encrypted file to ensure against accidental
+ // key re-use.
+ WipeOutEncryptionKeys()
+}
+
+type fileEncryptor struct {
+ props *parquet.FileEncryptionProperties
+ columnDataMap map[string]Encryptor
+ columnMetaDataMap map[string]Encryptor
+ footerSigningEncryptor Encryptor
+ footerEncryptor Encryptor
+
+ // Key must be 16, 24, or 32 bytes in length thus there could be up to
+ // three types of meta_encryptors and data_encryptors
+ metaEncryptor *aesEncryptor
+ dataEncryptor *aesEncryptor
+
+ mem memory.Allocator
+}
+
+// NewFileEncryptor returns a new encryptor using the given encryption properties.
+//
+// Panics if the properties passed have already been used to construct an encryptor
+// ie: props.IsUtilized returns true. If mem is nil, will default to memory.DefaultAllocator
+func NewFileEncryptor(props *parquet.FileEncryptionProperties, mem memory.Allocator) FileEncryptor {
+ if props.IsUtilized() {
+ panic("re-using encryption properties for another file")
+ }
+
+ props.SetUtilized()
+ if mem == nil {
+ mem = memory.DefaultAllocator
+ }
+
+ return &fileEncryptor{
+ props: props,
+ mem: mem,
+ columnDataMap: make(map[string]Encryptor),
+ columnMetaDataMap: make(map[string]Encryptor),
+ }
+}
+
+func (e *fileEncryptor) WipeOutEncryptionKeys() {
+ e.props.WipeOutEncryptionKeys()
+}
+
+func (e *fileEncryptor) GetFooterEncryptor() Encryptor {
+ if e.footerEncryptor == nil {
+ alg := e.props.Algorithm().Algo
+ footerAad := CreateFooterAad(e.props.FileAad())
+ footerKey := e.props.FooterKey()
+ enc := e.getMetaAesEncryptor(alg)
+ e.footerEncryptor = &encryptor{
+ aesEncryptor: enc,
+ key: []byte(footerKey),
+ fileAad: e.props.FileAad(),
+ aad: footerAad,
+ mem: e.mem,
+ }
+ }
+ return e.footerEncryptor
+}
+
+func (e *fileEncryptor) GetFooterSigningEncryptor() Encryptor {
+ if e.footerSigningEncryptor == nil {
+ alg := e.props.Algorithm().Algo
+ footerAad := CreateFooterAad(e.props.FileAad())
+ footerKey := e.props.FooterKey()
+ enc := e.getMetaAesEncryptor(alg)
+ e.footerSigningEncryptor = &encryptor{
+ aesEncryptor: enc,
+ key: []byte(footerKey),
+ fileAad: e.props.FileAad(),
+ aad: footerAad,
+ mem: e.mem,
+ }
+ }
+ return e.footerSigningEncryptor
+}
+
+func (e *fileEncryptor) getMetaAesEncryptor(alg parquet.Cipher) *aesEncryptor {
+ if e.metaEncryptor == nil {
+ e.metaEncryptor = NewAesEncryptor(alg, true)
+ }
+ return e.metaEncryptor
+}
+
+func (e *fileEncryptor) getDataAesEncryptor(alg parquet.Cipher) *aesEncryptor {
+ if e.dataEncryptor == nil {
+ e.dataEncryptor = NewAesEncryptor(alg, false)
+ }
+ return e.dataEncryptor
+}
+
+func (e *fileEncryptor) GetColumnMetaEncryptor(columnPath string) Encryptor {
+ return e.getColumnEncryptor(columnPath, true)
+}
+
+func (e *fileEncryptor) GetColumnDataEncryptor(columnPath string) Encryptor {
+ return e.getColumnEncryptor(columnPath, false)
+}
+
+func (e *fileEncryptor) getColumnEncryptor(columnPath string, metadata bool) Encryptor {
+ if metadata {
+ if enc, ok := e.columnMetaDataMap[columnPath]; ok {
+ return enc
+ }
+ } else {
+ if enc, ok := e.columnDataMap[columnPath]; ok {
+ return enc
+ }
+ }
+
+ columnProp := e.props.ColumnEncryptionProperties(columnPath)
+ if columnProp == nil {
+ return nil
+ }
+
+ var key string
+ if columnProp.IsEncryptedWithFooterKey() {
+ key = e.props.FooterKey()
+ } else {
+ key = columnProp.Key()
+ }
+
+ alg := e.props.Algorithm().Algo
+ var enc *aesEncryptor
+ if metadata {
+ enc = e.getMetaAesEncryptor(alg)
+ } else {
+ enc = e.getDataAesEncryptor(alg)
+ }
+
+ fileAad := e.props.FileAad()
+ ret := &encryptor{
+ aesEncryptor: enc,
+ key: []byte(key),
+ fileAad: fileAad,
+ aad: "",
+ mem: e.mem,
+ }
+ if metadata {
+ e.columnMetaDataMap[columnPath] = ret
+ } else {
+ e.columnDataMap[columnPath] = ret
+ }
+ return ret
+}
+
+// Encryptor is the basic interface for encryptors, for now there's only the single
+// aes encryptor implementation, but having it as an interface allows easy addition
+// manipulation of encryptor implementations in the future.
+type Encryptor interface {
+ // FileAad returns the file level AAD bytes for this encryptor
+ FileAad() string
+ // UpdateAad sets the aad bytes for encryption to the provided string
+ UpdateAad(string)
+ // Allocator returns the allocator that was used to construct the encryptor
+ Allocator() memory.Allocator
+ // CiphertextSizeDelta returns the extra bytes that will be added to the ciphertext
+ // for a total size of len(plaintext) + CiphertextSizeDelta bytes
+ CiphertextSizeDelta() int
+ // Encrypt writes the encrypted ciphertext for src to w and returns the total
+ // number of bytes written.
+ Encrypt(w io.Writer, src []byte) int
+ // EncryptColumnMetaData returns true if the column metadata should be encrypted based on the
+ // column encryption settings and footer encryption setting.
+ EncryptColumnMetaData(encryptFooter bool, properties *parquet.ColumnEncryptionProperties) bool
+}
+
+type encryptor struct {
+ aesEncryptor *aesEncryptor
+ key []byte
+ fileAad string
+ aad string
+ mem memory.Allocator
+}
+
+func (e *encryptor) FileAad() string { return e.fileAad }
+func (e *encryptor) UpdateAad(aad string) { e.aad = aad }
+func (e *encryptor) Allocator() memory.Allocator { return e.mem }
+func (e *encryptor) CiphertextSizeDelta() int { return e.aesEncryptor.CiphertextSizeDelta() }
+
+func (e *encryptor) EncryptColumnMetaData(encryptFooter bool, properties *parquet.ColumnEncryptionProperties) bool {
+ if properties == nil || !properties.IsEncrypted() {
+ return false
+ }
+ if !encryptFooter {
+ return false
+ }
+ // if not encrypted with footer key then encrypt the metadata
+ return !properties.IsEncryptedWithFooterKey()
+}
+
+func (e *encryptor) Encrypt(w io.Writer, src []byte) int {
+ return e.aesEncryptor.Encrypt(w, src, e.key, []byte(e.aad))
+}
diff --git a/src/arrow/go/parquet/internal/encryption/key_handling.go b/src/arrow/go/parquet/internal/encryption/key_handling.go
new file mode 100644
index 000000000..b3c36c497
--- /dev/null
+++ b/src/arrow/go/parquet/internal/encryption/key_handling.go
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encryption
+
+import (
+ "encoding/binary"
+ "unsafe"
+
+ "golang.org/x/xerrors"
+)
+
+// StringKeyIDRetriever implements the KeyRetriever interface GetKey
+// to allow setting in keys with a string id.
+type StringKeyIDRetriever map[string]string
+
+// PutKey adds a key with the given string ID that can be retrieved
+func (s StringKeyIDRetriever) PutKey(keyID, key string) {
+ s[keyID] = key
+}
+
+// GetKey expects the keymetadata to match one of the keys that were added
+// with PutKey and panics if the key cannot be found.
+func (s StringKeyIDRetriever) GetKey(keyMetadata []byte) string {
+ k, ok := s[*(*string)(unsafe.Pointer(&keyMetadata))]
+ if !ok {
+ panic(xerrors.Errorf("parquet: key missing for id %s", keyMetadata))
+ }
+ return k
+}
+
+// IntegerKeyIDRetriever is used for using unsigned 32bit integers as key ids.
+type IntegerKeyIDRetriever map[uint32]string
+
+// PutKey adds keys with uint32 IDs
+func (i IntegerKeyIDRetriever) PutKey(keyID uint32, key string) {
+ i[keyID] = key
+}
+
+// GetKey expects the key metadata bytes to be a little endian uint32 which
+// is then used to retrieve the key bytes. Panics if the key id cannot be found.
+func (i IntegerKeyIDRetriever) GetKey(keyMetadata []byte) string {
+ keyID := binary.LittleEndian.Uint32(keyMetadata)
+ k, ok := i[keyID]
+ if !ok {
+ panic(xerrors.Errorf("parquet: key missing for id %d", keyID))
+ }
+ return k
+}