summaryrefslogtreecommitdiffstats
path: root/src/arrow/go/arrow/ipc/compression.go
blob: 55a00469ce9525b0babc54ec10c1edcaf336a394 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package ipc

import (
	"io"

	"github.com/apache/arrow/go/v6/arrow/internal/debug"
	"github.com/apache/arrow/go/v6/arrow/internal/flatbuf"
	"github.com/klauspost/compress/zstd"
	"github.com/pierrec/lz4/v4"
)

type compressor interface {
	MaxCompressedLen(n int) int
	Reset(io.Writer)
	io.WriteCloser
	Type() flatbuf.CompressionType
}

type lz4Compressor struct {
	*lz4.Writer
}

func (lz4Compressor) MaxCompressedLen(n int) int {
	return lz4.CompressBlockBound(n)
}

func (lz4Compressor) Type() flatbuf.CompressionType {
	return flatbuf.CompressionTypeLZ4_FRAME
}

type zstdCompressor struct {
	*zstd.Encoder
}

// from zstd.h, ZSTD_COMPRESSBOUND
func (zstdCompressor) MaxCompressedLen(len int) int {
	debug.Assert(len >= 0, "MaxCompressedLen called with len less than 0")
	extra := uint((uint(128<<10) - uint(len)) >> 11)
	if len >= (128 << 10) {
		extra = 0
	}
	return int(uint(len+(len>>8)) + extra)
}

func (zstdCompressor) Type() flatbuf.CompressionType {
	return flatbuf.CompressionTypeZSTD
}

func getCompressor(codec flatbuf.CompressionType) compressor {
	switch codec {
	case flatbuf.CompressionTypeLZ4_FRAME:
		w := lz4.NewWriter(nil)
		// options here chosen in order to match the C++ implementation
		w.Apply(lz4.ChecksumOption(false), lz4.BlockSizeOption(lz4.Block64Kb))
		return &lz4Compressor{w}
	case flatbuf.CompressionTypeZSTD:
		enc, err := zstd.NewWriter(nil)
		if err != nil {
			panic(err)
		}
		return zstdCompressor{enc}
	}
	return nil
}

type decompressor interface {
	io.Reader
	Reset(io.Reader)
}

type zstdDecompressor struct {
	*zstd.Decoder
}

func (z *zstdDecompressor) Reset(r io.Reader) {
	if err := z.Decoder.Reset(r); err != nil {
		panic(err)
	}
}

func getDecompressor(codec flatbuf.CompressionType) decompressor {
	switch codec {
	case flatbuf.CompressionTypeLZ4_FRAME:
		return lz4.NewReader(nil)
	case flatbuf.CompressionTypeZSTD:
		dec, err := zstd.NewReader(nil)
		if err != nil {
			panic(err)
		}
		return &zstdDecompressor{dec}
	}
	return nil
}