diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/go/parquet/internal/encryption | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/arrow/go/parquet/internal/encryption')
-rw-r--r-- | src/arrow/go/parquet/internal/encryption/aes.go | 264 | ||||
-rw-r--r-- | src/arrow/go/parquet/internal/encryption/decryptor.go | 261 | ||||
-rw-r--r-- | src/arrow/go/parquet/internal/encryption/encryptor.go | 237 | ||||
-rw-r--r-- | src/arrow/go/parquet/internal/encryption/key_handling.go | 62 |
4 files changed, 824 insertions, 0 deletions
diff --git a/src/arrow/go/parquet/internal/encryption/aes.go b/src/arrow/go/parquet/internal/encryption/aes.go new file mode 100644 index 000000000..227325723 --- /dev/null +++ b/src/arrow/go/parquet/internal/encryption/aes.go @@ -0,0 +1,264 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package encryption contains the internal helpers for the parquet AES encryption/decryption handling. +// +// Testing for this is done via integration testing at the top level parquet package via attempting to +// read and write encrypted files with different configurations to match test files in parquet-testing +package encryption + +import ( + "bytes" + "crypto/aes" + "crypto/cipher" + "crypto/rand" + "encoding/binary" + "io" + + "github.com/apache/arrow/go/v6/parquet" + "golang.org/x/xerrors" +) + +// important constants for handling the aes encryption +const ( + GcmTagLength = 16 + NonceLength = 12 + + gcmMode = 0 + ctrMode = 1 + ctrIVLen = 16 + bufferSizeLength = 4 +) + +// Module constants for constructing the AAD bytes, the order here is +// important as the constants are set via iota. +const ( + FooterModule int8 = iota + ColumnMetaModule + DataPageModule + DictPageModule + DataPageHeaderModule + DictPageHeaderModule + ColumnIndexModule + OffsetIndexModule +) + +type aesEncryptor struct { + mode int + ciphertextSizeDelta int +} + +// NewAesEncryptor constructs an encryptor for the passed in cipher and whether +// or not it's being used to encrypt metadata. +func NewAesEncryptor(alg parquet.Cipher, metadata bool) *aesEncryptor { + ret := &aesEncryptor{} + ret.ciphertextSizeDelta = bufferSizeLength + NonceLength + if metadata || alg == parquet.AesGcm { + ret.mode = gcmMode + ret.ciphertextSizeDelta += GcmTagLength + } else { + ret.mode = ctrMode + } + + return ret +} + +// CiphertextSizeDelta is the number of extra bytes that are part of the encrypted data +// above and beyond the plaintext value. +func (a *aesEncryptor) CiphertextSizeDelta() int { return a.ciphertextSizeDelta } + +// SignedFooterEncrypt writes the signature for the provided footer bytes using the given key, AAD and nonce. +// It returns the number of bytes that were written to w. +func (a *aesEncryptor) SignedFooterEncrypt(w io.Writer, footer, key, aad, nonce []byte) int { + if a.mode != gcmMode { + panic("must use AES GCM (metadata) encryptor") + } + + block, err := aes.NewCipher(key) + if err != nil { + panic(err) + } + + aead, err := cipher.NewGCM(block) + if err != nil { + panic(err) + } + if aead.NonceSize() != NonceLength { + panic(xerrors.Errorf("nonce size mismatch %d, %d", aead.NonceSize(), NonceLength)) + } + if aead.Overhead() != GcmTagLength { + panic(xerrors.Errorf("tagsize mismatch %d %d", aead.Overhead(), GcmTagLength)) + } + + ciphertext := aead.Seal(nil, nonce, footer, aad) + bufferSize := uint32(len(ciphertext) + len(nonce)) + // data is written with a prefix of the size written as a little endian 32bit int. + if err := binary.Write(w, binary.LittleEndian, bufferSize); err != nil { + panic(err) + } + w.Write(nonce) + w.Write(ciphertext) + return bufferSizeLength + int(bufferSize) +} + +// Encrypt calculates the ciphertext for src with the given key and aad, then writes it to w. +// Returns the total number of bytes written. +func (a *aesEncryptor) Encrypt(w io.Writer, src, key, aad []byte) int { + block, err := aes.NewCipher(key) + if err != nil { + panic(err) + } + + nonce := make([]byte, NonceLength) + rand.Read(nonce) + + if a.mode == gcmMode { + aead, err := cipher.NewGCM(block) + if err != nil { + panic(err) + } + if aead.NonceSize() != NonceLength { + panic(xerrors.Errorf("nonce size mismatch %d, %d", aead.NonceSize(), NonceLength)) + } + if aead.Overhead() != GcmTagLength { + panic(xerrors.Errorf("tagsize mismatch %d %d", aead.Overhead(), GcmTagLength)) + } + + ciphertext := aead.Seal(nil, nonce, src, aad) + bufferSize := len(ciphertext) + len(nonce) + // data is written with a prefix of the size written as a little endian 32bit int. + if err := binary.Write(w, binary.LittleEndian, uint32(bufferSize)); err != nil { + panic(err) + } + w.Write(nonce) + w.Write(ciphertext) + return bufferSizeLength + bufferSize + } + + // Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial + // counter field. + // The first 31 bits of the initial counter field are set to 0, the last bit + // is set to 1. + iv := make([]byte, ctrIVLen) + copy(iv, nonce) + iv[ctrIVLen-1] = 1 + + bufferSize := NonceLength + len(src) + // data is written with a prefix of the size written as a little endian 32bit int. + if err := binary.Write(w, binary.LittleEndian, uint32(bufferSize)); err != nil { + panic(err) + } + w.Write(nonce) + cipher.StreamWriter{S: cipher.NewCTR(block, iv), W: w}.Write(src) + return bufferSizeLength + bufferSize +} + +type aesDecryptor struct { + mode int + ciphertextSizeDelta int +} + +// newAesDecryptor constructs and returns a decryptor for the given cipher type and whether or +// not it is intended to be used for decrypting metadata. +func newAesDecryptor(alg parquet.Cipher, metadata bool) *aesDecryptor { + ret := &aesDecryptor{} + ret.ciphertextSizeDelta = bufferSizeLength + NonceLength + if metadata || alg == parquet.AesGcm { + ret.mode = gcmMode + ret.ciphertextSizeDelta += GcmTagLength + } else { + ret.mode = ctrMode + } + + return ret +} + +// CiphertextSizeDelta is the number of bytes in the ciphertext that will not exist in the +// plaintext due to be used for the decryption. The total size - the CiphertextSizeDelta is +// the length of the plaintext after decryption. +func (a *aesDecryptor) CiphertextSizeDelta() int { return a.ciphertextSizeDelta } + +// Decrypt returns the plaintext version of the given ciphertext when decrypted +// with the provided key and AAD security bytes. +func (a *aesDecryptor) Decrypt(cipherText, key, aad []byte) []byte { + block, err := aes.NewCipher(key) + if err != nil { + panic(err) + } + + writtenCiphertextLen := binary.LittleEndian.Uint32(cipherText) + cipherLen := writtenCiphertextLen + bufferSizeLength + nonce := cipherText[bufferSizeLength : bufferSizeLength+NonceLength] + + if a.mode == gcmMode { + aead, err := cipher.NewGCM(block) + if err != nil { + panic(err) + } + + plain, err := aead.Open(nil, nonce, cipherText[bufferSizeLength+NonceLength:cipherLen], aad) + if err != nil { + panic(err) + } + return plain + } + + // Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial + // counter field. + // The first 31 bits of the initial counter field are set to 0, the last bit + // is set to 1. + iv := make([]byte, ctrIVLen) + copy(iv, nonce) + iv[ctrIVLen-1] = 1 + + stream := cipher.NewCTR(block, iv) + dst := make([]byte, len(cipherText)-bufferSizeLength-NonceLength) + stream.XORKeyStream(dst, cipherText[bufferSizeLength+NonceLength:]) + return dst +} + +// CreateModuleAad creates the section AAD security bytes for the file, module, row group, column and page. +// +// This should be used for being passed to the encryptor and decryptor whenever requesting AAD bytes. +func CreateModuleAad(fileAad string, moduleType int8, rowGroupOrdinal, columnOrdinal, pageOrdinal int16) string { + buf := bytes.NewBuffer([]byte(fileAad)) + buf.WriteByte(byte(moduleType)) + + if moduleType == FooterModule { + return buf.String() + } + + binary.Write(buf, binary.LittleEndian, rowGroupOrdinal) + binary.Write(buf, binary.LittleEndian, columnOrdinal) + if DataPageModule != moduleType && DataPageHeaderModule != moduleType { + return buf.String() + } + + binary.Write(buf, binary.LittleEndian, pageOrdinal) + return buf.String() +} + +// CreateFooterAad takes an aadPrefix and constructs the security AAD bytes for encrypting +// and decrypting the parquet footer bytes. +func CreateFooterAad(aadPrefix string) string { + return CreateModuleAad(aadPrefix, FooterModule, -1, -1, -1) +} + +// QuickUpdatePageAad updates aad with the new page ordinal, modifying the +// last two bytes of aad. +func QuickUpdatePageAad(aad []byte, newPageOrdinal int16) { + binary.LittleEndian.PutUint16(aad[len(aad)-2:], uint16(newPageOrdinal)) +} diff --git a/src/arrow/go/parquet/internal/encryption/decryptor.go b/src/arrow/go/parquet/internal/encryption/decryptor.go new file mode 100644 index 000000000..fe06fdce3 --- /dev/null +++ b/src/arrow/go/parquet/internal/encryption/decryptor.go @@ -0,0 +1,261 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package encryption + +import ( + "github.com/apache/arrow/go/v6/arrow/memory" + "github.com/apache/arrow/go/v6/parquet" +) + +// FileDecryptor is an interface used by the filereader for decrypting an +// entire parquet file as we go, usually constructed from the DecryptionProperties +type FileDecryptor interface { + // Returns the key for decrypting the footer if provided + GetFooterKey() string + // Provides the file level AAD security bytes + FileAad() string + // return which algorithm this decryptor was constructed for + Algorithm() parquet.Cipher + // return the FileDecryptionProperties that were used for this decryptor + Properties() *parquet.FileDecryptionProperties + // Clear out the decryption keys, this is automatically called after every + // successfully decrypted file to ensure that keys aren't kept around. + WipeOutDecryptionKeys() + // GetFooterDecryptor returns a Decryptor interface for use to decrypt the footer + // of a parquet file. + GetFooterDecryptor() Decryptor + // GetFooterDecryptorForColumnMeta returns a Decryptor interface for Column Metadata + // in the file footer using the AAD bytes provided. + GetFooterDecryptorForColumnMeta(aad string) Decryptor + // GetFooterDecryptorForColumnData returns the decryptor that can be used for decrypting + // actual column data footer bytes, not column metadata. + GetFooterDecryptorForColumnData(aad string) Decryptor + // GetColumnMetaDecryptor returns a decryptor for the requested column path, key and AAD bytes + // but only for decrypting the row group level metadata + GetColumnMetaDecryptor(columnPath, columnKeyMetadata, aad string) Decryptor + // GetColumnDataDecryptor returns a decryptor for the requested column path, key, and AAD bytes + // but only for the rowgroup column data. + GetColumnDataDecryptor(columnPath, columnKeyMetadata, aad string) Decryptor +} + +type fileDecryptor struct { + // the properties contains the key retriever for us to get keys + // from the key metadata + props *parquet.FileDecryptionProperties + // concatenation of aad_prefix (if exists) and aad_file_unique + fileAad string + columnDataMap map[string]Decryptor + columnMetaDataMap map[string]Decryptor + footerMetadataDecryptor Decryptor + footerDataDecryptor Decryptor + alg parquet.Cipher + footerKeyMetadata string + metaDecryptor *aesDecryptor + dataDecryptor *aesDecryptor + mem memory.Allocator +} + +// NewFileDecryptor constructs a decryptor from the provided configuration of properties, cipher and key metadata. Using the provided memory allocator or +// the default allocator if one isn't provided. +func NewFileDecryptor(props *parquet.FileDecryptionProperties, fileAad string, alg parquet.Cipher, keymetadata string, mem memory.Allocator) FileDecryptor { + if mem == nil { + mem = memory.DefaultAllocator + } + return &fileDecryptor{ + fileAad: fileAad, + props: props, + alg: alg, + footerKeyMetadata: keymetadata, + mem: mem, + columnDataMap: make(map[string]Decryptor), + columnMetaDataMap: make(map[string]Decryptor), + } +} + +func (d *fileDecryptor) FileAad() string { return d.fileAad } +func (d *fileDecryptor) Properties() *parquet.FileDecryptionProperties { return d.props } +func (d *fileDecryptor) Algorithm() parquet.Cipher { return d.alg } +func (d *fileDecryptor) GetFooterKey() string { + footerKey := d.props.FooterKey() + if footerKey == "" { + if d.footerKeyMetadata == "" { + panic("no footer key or key metadata") + } + if d.props.KeyRetriever == nil { + panic("no footer key or key retriever") + } + footerKey = d.props.KeyRetriever.GetKey([]byte(d.footerKeyMetadata)) + } + if footerKey == "" { + panic("invalid footer encryption key. Could not parse footer metadata") + } + return footerKey +} + +func (d *fileDecryptor) GetFooterDecryptor() Decryptor { + aad := CreateFooterAad(d.fileAad) + return d.getFooterDecryptor(aad, true) +} + +func (d *fileDecryptor) GetFooterDecryptorForColumnMeta(aad string) Decryptor { + return d.getFooterDecryptor(aad, true) +} + +func (d *fileDecryptor) GetFooterDecryptorForColumnData(aad string) Decryptor { + return d.getFooterDecryptor(aad, false) +} + +func (d *fileDecryptor) GetColumnMetaDecryptor(columnPath, columnKeyMetadata, aad string) Decryptor { + return d.getColumnDecryptor(columnPath, columnKeyMetadata, aad, true) +} + +func (d *fileDecryptor) GetColumnDataDecryptor(columnPath, columnKeyMetadata, aad string) Decryptor { + return d.getColumnDecryptor(columnPath, columnKeyMetadata, aad, false) +} + +func (d *fileDecryptor) WipeOutDecryptionKeys() { + d.props.WipeOutDecryptionKeys() +} + +func (d *fileDecryptor) getFooterDecryptor(aad string, metadata bool) Decryptor { + if metadata { + if d.footerMetadataDecryptor != nil { + return d.footerMetadataDecryptor + } + } else { + if d.footerDataDecryptor != nil { + return d.footerDataDecryptor + } + } + + footerKey := d.GetFooterKey() + + // Create both data and metadata decryptors to avoid redundant retrieval of key + // from the key_retriever. + aesMetaDecrypt := d.getMetaAesDecryptor() + aesDataDecrypt := d.getDataAesDecryptor() + + d.footerMetadataDecryptor = &decryptor{ + decryptor: aesMetaDecrypt, + key: []byte(footerKey), + fileAad: []byte(d.fileAad), + aad: []byte(aad), + mem: d.mem, + } + d.footerDataDecryptor = &decryptor{ + decryptor: aesDataDecrypt, + key: []byte(footerKey), + fileAad: []byte(d.fileAad), + aad: []byte(aad), + mem: d.mem, + } + + if metadata { + return d.footerMetadataDecryptor + } + return d.footerDataDecryptor +} + +func (d *fileDecryptor) getColumnDecryptor(columnPath, columnMeta, aad string, metadata bool) Decryptor { + if metadata { + if res, ok := d.columnMetaDataMap[columnPath]; ok { + res.UpdateAad(aad) + return res + } + } else { + if res, ok := d.columnDataMap[columnPath]; ok { + res.UpdateAad(aad) + return res + } + } + + columnKey := d.props.ColumnKey(columnPath) + // No explicit column key given via API. Retrieve via key metadata. + if columnKey == "" && columnMeta != "" && d.props.KeyRetriever != nil { + columnKey = d.props.KeyRetriever.GetKey([]byte(columnMeta)) + } + if columnKey == "" { + panic("hidden column exception, path=" + columnPath) + } + + aesDataDecrypt := d.getDataAesDecryptor() + aesMetaDecrypt := d.getMetaAesDecryptor() + + d.columnDataMap[columnPath] = &decryptor{ + decryptor: aesDataDecrypt, + key: []byte(columnKey), + fileAad: []byte(d.fileAad), + aad: []byte(aad), + mem: d.mem, + } + d.columnMetaDataMap[columnPath] = &decryptor{ + decryptor: aesMetaDecrypt, + key: []byte(columnKey), + fileAad: []byte(d.fileAad), + aad: []byte(aad), + mem: d.mem, + } + + if metadata { + return d.columnMetaDataMap[columnPath] + } + return d.columnDataMap[columnPath] +} + +func (d *fileDecryptor) getMetaAesDecryptor() *aesDecryptor { + if d.metaDecryptor == nil { + d.metaDecryptor = newAesDecryptor(d.alg, true) + } + return d.metaDecryptor +} + +func (d *fileDecryptor) getDataAesDecryptor() *aesDecryptor { + if d.dataDecryptor == nil { + d.dataDecryptor = newAesDecryptor(d.alg, false) + } + return d.dataDecryptor +} + +// Decryptor is the basic interface for any decryptor generated from a FileDecryptor +type Decryptor interface { + // returns the File Level AAD bytes + FileAad() string + // returns the current allocator that was used for any extra allocations of buffers + Allocator() memory.Allocator + // returns the CiphertextSizeDelta from the decryptor + CiphertextSizeDelta() int + // Decrypt just returns the decrypted plaintext from the src ciphertext + Decrypt(src []byte) []byte + // set the AAD bytes of the decryptor to the provided string + UpdateAad(string) +} + +type decryptor struct { + decryptor *aesDecryptor + key []byte + fileAad []byte + aad []byte + mem memory.Allocator +} + +func (d *decryptor) Allocator() memory.Allocator { return d.mem } +func (d *decryptor) FileAad() string { return string(d.fileAad) } +func (d *decryptor) UpdateAad(aad string) { d.aad = []byte(aad) } +func (d *decryptor) CiphertextSizeDelta() int { return d.decryptor.CiphertextSizeDelta() } +func (d *decryptor) Decrypt(src []byte) []byte { + return d.decryptor.Decrypt(src, d.key, d.aad) +} diff --git a/src/arrow/go/parquet/internal/encryption/encryptor.go b/src/arrow/go/parquet/internal/encryption/encryptor.go new file mode 100644 index 000000000..c4ce817e7 --- /dev/null +++ b/src/arrow/go/parquet/internal/encryption/encryptor.go @@ -0,0 +1,237 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package encryption + +import ( + "io" + + "github.com/apache/arrow/go/v6/arrow/memory" + "github.com/apache/arrow/go/v6/parquet" +) + +// FileEncryptor is the interface for constructing encryptors for the different +// sections of a parquet file. +type FileEncryptor interface { + // GetFooterEncryptor returns an encryptor for the footer metadata + GetFooterEncryptor() Encryptor + // GetFooterSigningEncryptor returns an encryptor for creating the signature + // for the footer as opposed to encrypting the footer bytes directly. + GetFooterSigningEncryptor() Encryptor + // GetColumnMetaEncryptor returns an encryptor for the metadata only of the requested + // column path string. + GetColumnMetaEncryptor(columnPath string) Encryptor + // GetColumnDataEncryptor returns an encryptor for the column data ONLY of + // the requested column path string. + GetColumnDataEncryptor(columnPath string) Encryptor + // WipeOutEncryptionKeys deletes the keys that were used for encryption, + // called after every successfully encrypted file to ensure against accidental + // key re-use. + WipeOutEncryptionKeys() +} + +type fileEncryptor struct { + props *parquet.FileEncryptionProperties + columnDataMap map[string]Encryptor + columnMetaDataMap map[string]Encryptor + footerSigningEncryptor Encryptor + footerEncryptor Encryptor + + // Key must be 16, 24, or 32 bytes in length thus there could be up to + // three types of meta_encryptors and data_encryptors + metaEncryptor *aesEncryptor + dataEncryptor *aesEncryptor + + mem memory.Allocator +} + +// NewFileEncryptor returns a new encryptor using the given encryption properties. +// +// Panics if the properties passed have already been used to construct an encryptor +// ie: props.IsUtilized returns true. If mem is nil, will default to memory.DefaultAllocator +func NewFileEncryptor(props *parquet.FileEncryptionProperties, mem memory.Allocator) FileEncryptor { + if props.IsUtilized() { + panic("re-using encryption properties for another file") + } + + props.SetUtilized() + if mem == nil { + mem = memory.DefaultAllocator + } + + return &fileEncryptor{ + props: props, + mem: mem, + columnDataMap: make(map[string]Encryptor), + columnMetaDataMap: make(map[string]Encryptor), + } +} + +func (e *fileEncryptor) WipeOutEncryptionKeys() { + e.props.WipeOutEncryptionKeys() +} + +func (e *fileEncryptor) GetFooterEncryptor() Encryptor { + if e.footerEncryptor == nil { + alg := e.props.Algorithm().Algo + footerAad := CreateFooterAad(e.props.FileAad()) + footerKey := e.props.FooterKey() + enc := e.getMetaAesEncryptor(alg) + e.footerEncryptor = &encryptor{ + aesEncryptor: enc, + key: []byte(footerKey), + fileAad: e.props.FileAad(), + aad: footerAad, + mem: e.mem, + } + } + return e.footerEncryptor +} + +func (e *fileEncryptor) GetFooterSigningEncryptor() Encryptor { + if e.footerSigningEncryptor == nil { + alg := e.props.Algorithm().Algo + footerAad := CreateFooterAad(e.props.FileAad()) + footerKey := e.props.FooterKey() + enc := e.getMetaAesEncryptor(alg) + e.footerSigningEncryptor = &encryptor{ + aesEncryptor: enc, + key: []byte(footerKey), + fileAad: e.props.FileAad(), + aad: footerAad, + mem: e.mem, + } + } + return e.footerSigningEncryptor +} + +func (e *fileEncryptor) getMetaAesEncryptor(alg parquet.Cipher) *aesEncryptor { + if e.metaEncryptor == nil { + e.metaEncryptor = NewAesEncryptor(alg, true) + } + return e.metaEncryptor +} + +func (e *fileEncryptor) getDataAesEncryptor(alg parquet.Cipher) *aesEncryptor { + if e.dataEncryptor == nil { + e.dataEncryptor = NewAesEncryptor(alg, false) + } + return e.dataEncryptor +} + +func (e *fileEncryptor) GetColumnMetaEncryptor(columnPath string) Encryptor { + return e.getColumnEncryptor(columnPath, true) +} + +func (e *fileEncryptor) GetColumnDataEncryptor(columnPath string) Encryptor { + return e.getColumnEncryptor(columnPath, false) +} + +func (e *fileEncryptor) getColumnEncryptor(columnPath string, metadata bool) Encryptor { + if metadata { + if enc, ok := e.columnMetaDataMap[columnPath]; ok { + return enc + } + } else { + if enc, ok := e.columnDataMap[columnPath]; ok { + return enc + } + } + + columnProp := e.props.ColumnEncryptionProperties(columnPath) + if columnProp == nil { + return nil + } + + var key string + if columnProp.IsEncryptedWithFooterKey() { + key = e.props.FooterKey() + } else { + key = columnProp.Key() + } + + alg := e.props.Algorithm().Algo + var enc *aesEncryptor + if metadata { + enc = e.getMetaAesEncryptor(alg) + } else { + enc = e.getDataAesEncryptor(alg) + } + + fileAad := e.props.FileAad() + ret := &encryptor{ + aesEncryptor: enc, + key: []byte(key), + fileAad: fileAad, + aad: "", + mem: e.mem, + } + if metadata { + e.columnMetaDataMap[columnPath] = ret + } else { + e.columnDataMap[columnPath] = ret + } + return ret +} + +// Encryptor is the basic interface for encryptors, for now there's only the single +// aes encryptor implementation, but having it as an interface allows easy addition +// manipulation of encryptor implementations in the future. +type Encryptor interface { + // FileAad returns the file level AAD bytes for this encryptor + FileAad() string + // UpdateAad sets the aad bytes for encryption to the provided string + UpdateAad(string) + // Allocator returns the allocator that was used to construct the encryptor + Allocator() memory.Allocator + // CiphertextSizeDelta returns the extra bytes that will be added to the ciphertext + // for a total size of len(plaintext) + CiphertextSizeDelta bytes + CiphertextSizeDelta() int + // Encrypt writes the encrypted ciphertext for src to w and returns the total + // number of bytes written. + Encrypt(w io.Writer, src []byte) int + // EncryptColumnMetaData returns true if the column metadata should be encrypted based on the + // column encryption settings and footer encryption setting. + EncryptColumnMetaData(encryptFooter bool, properties *parquet.ColumnEncryptionProperties) bool +} + +type encryptor struct { + aesEncryptor *aesEncryptor + key []byte + fileAad string + aad string + mem memory.Allocator +} + +func (e *encryptor) FileAad() string { return e.fileAad } +func (e *encryptor) UpdateAad(aad string) { e.aad = aad } +func (e *encryptor) Allocator() memory.Allocator { return e.mem } +func (e *encryptor) CiphertextSizeDelta() int { return e.aesEncryptor.CiphertextSizeDelta() } + +func (e *encryptor) EncryptColumnMetaData(encryptFooter bool, properties *parquet.ColumnEncryptionProperties) bool { + if properties == nil || !properties.IsEncrypted() { + return false + } + if !encryptFooter { + return false + } + // if not encrypted with footer key then encrypt the metadata + return !properties.IsEncryptedWithFooterKey() +} + +func (e *encryptor) Encrypt(w io.Writer, src []byte) int { + return e.aesEncryptor.Encrypt(w, src, e.key, []byte(e.aad)) +} diff --git a/src/arrow/go/parquet/internal/encryption/key_handling.go b/src/arrow/go/parquet/internal/encryption/key_handling.go new file mode 100644 index 000000000..b3c36c497 --- /dev/null +++ b/src/arrow/go/parquet/internal/encryption/key_handling.go @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package encryption + +import ( + "encoding/binary" + "unsafe" + + "golang.org/x/xerrors" +) + +// StringKeyIDRetriever implements the KeyRetriever interface GetKey +// to allow setting in keys with a string id. +type StringKeyIDRetriever map[string]string + +// PutKey adds a key with the given string ID that can be retrieved +func (s StringKeyIDRetriever) PutKey(keyID, key string) { + s[keyID] = key +} + +// GetKey expects the keymetadata to match one of the keys that were added +// with PutKey and panics if the key cannot be found. +func (s StringKeyIDRetriever) GetKey(keyMetadata []byte) string { + k, ok := s[*(*string)(unsafe.Pointer(&keyMetadata))] + if !ok { + panic(xerrors.Errorf("parquet: key missing for id %s", keyMetadata)) + } + return k +} + +// IntegerKeyIDRetriever is used for using unsigned 32bit integers as key ids. +type IntegerKeyIDRetriever map[uint32]string + +// PutKey adds keys with uint32 IDs +func (i IntegerKeyIDRetriever) PutKey(keyID uint32, key string) { + i[keyID] = key +} + +// GetKey expects the key metadata bytes to be a little endian uint32 which +// is then used to retrieve the key bytes. Panics if the key id cannot be found. +func (i IntegerKeyIDRetriever) GetKey(keyMetadata []byte) string { + keyID := binary.LittleEndian.Uint32(keyMetadata) + k, ok := i[keyID] + if !ok { + panic(xerrors.Errorf("parquet: key missing for id %d", keyID)) + } + return k +} |