1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
%include "options.asm"
%include "lz0a_const.asm"
%include "stdmac.asm"
; Macros for doing Huffman Encoding
%ifdef LONGER_HUFFTABLE
%if (D > 8192)
%error History D is larger than 8K, cannot use %LONGER_HUFFTABLE
% error
%else
%define DIST_TABLE_SIZE 8192
%define DECODE_OFFSET 26
%endif
%else
%define DIST_TABLE_SIZE 2
%define DECODE_OFFSET 0
%endif
%define LEN_TABLE_SIZE 256
%define LIT_TABLE_SIZE 257
%define DIST_TABLE_START (ISAL_DEF_MAX_HDR_SIZE + 8)
%define DIST_TABLE_OFFSET (DIST_TABLE_START + - 4 * 1)
%define LEN_TABLE_OFFSET (DIST_TABLE_START + DIST_TABLE_SIZE * 4 - 4*3)
%define LIT_TABLE_OFFSET (DIST_TABLE_START + 4 * DIST_TABLE_SIZE + 4 * LEN_TABLE_SIZE)
%define LIT_TABLE_SIZES_OFFSET (LIT_TABLE_OFFSET + 2 * LIT_TABLE_SIZE)
%define DCODE_TABLE_OFFSET (LIT_TABLE_SIZES_OFFSET + LIT_TABLE_SIZE + 1 - DECODE_OFFSET * 2)
%define DCODE_TABLE_SIZE_OFFSET (DCODE_TABLE_OFFSET + 2 * 30 - DECODE_OFFSET)
;; /** @brief Holds the huffman tree used to huffman encode the input stream **/
;; struct isal_hufftables {
;; // deflate huffman tree header
;; uint8_t deflate_huff_hdr[ISAL_DEF_MAX_HDR_SIZE];
;;
;; //!< Number of whole bytes in deflate_huff_hdr
;; uint32_t deflate_huff_hdr_count;
;;
;; //!< Number of bits in the partial byte in header
;; uint32_t deflate_huff_hdr_extra_bits;
;;
;; //!< bits 7:0 are the code length, bits 31:8 are the code
;; uint32_t dist_table[DIST_TABLE_SIZE];
;;
;; //!< bits 7:0 are the code length, bits 31:8 are the code
;; uint32_t len_table[LEN_TABLE_SIZE];
;;
;; //!< bits 3:0 are the code length, bits 15:4 are the code
;; uint16_t lit_table[LIT_TABLE_SIZE];
;;
;; //!< bits 3:0 are the code length, bits 15:4 are the code
;; uint16_t dcodes[30 - DECODE_OFFSET];
;; };
%ifdef LONGER_HUFFTABLE
; Uses RCX, clobbers dist
; get_dist_code dist, code, len
%macro get_dist_code 4
%define %%dist %1 ; 64-bit IN
%define %%code %2d ; 32-bit OUT
%define %%len %3d ; 32-bit OUT
%define %%hufftables %4 ; address of the hufftable
mov %%len, [%%hufftables + DIST_TABLE_OFFSET + 4*(%%dist + 1) ]
mov %%code, %%len
and %%len, 0x1F;
shr %%code, 5
%endm
%macro get_packed_dist_code 3
%define %%dist %1 ; 64-bit IN
%define %%code_len %2d ; 32-bit OUT
%define %%hufftables %3 ; address of the hufftable
mov %%code_len, [%%hufftables + DIST_TABLE_OFFSET + 4*%%dist ]
%endm
%macro unpack_dist_code 2
%define %%code %1d ; 32-bit OUT
%define %%len %2d ; 32-bit OUT
mov %%len, %%code
and %%len, 0x1F;
shr %%code, 5
%endm
%else
; Assumes (dist != 0)
; Uses RCX, clobbers dist
; void compute_dist_code dist, code, len
%macro compute_dist_code 4
%define %%dist %1 ; IN, clobbered
%define %%distq %1
%define %%code %2 ; OUT
%define %%len %3 ; OUT
%define %%hufftables %4
bsr rcx, %%dist ; ecx = msb = bsr(dist)
dec rcx ; ecx = num_extra_bits = msb - N
BZHI %%code, %%dist, rcx, %%len
SHRX %%dist, %%dist, rcx ; dist >>= num_extra_bits
lea %%dist, [%%dist + 2*rcx] ; dist = sym = dist + num_extra_bits*2
mov %%len, rcx ; len = num_extra_bits
movzx rcx, byte [hufftables + DCODE_TABLE_SIZE_OFFSET + %%distq WRT_OPT]
movzx %%dist, word [hufftables + DCODE_TABLE_OFFSET + 2 * %%distq WRT_OPT]
SHLX %%code, %%code, rcx ; code = extra_bits << (sym & 0xF)
or %%code, %%dist ; code = (sym >> 4) | (extra_bits << (sym & 0xF))
add %%len, rcx ; len = num_extra_bits + (sym & 0xF)
%endm
; Uses RCX, clobbers dist
; get_dist_code dist, code, len
%macro get_dist_code 4
%define %%dist %1 ; 32-bit IN, clobbered
%define %%distq %1 ; 64-bit IN, clobbered
%define %%code %2 ; 32-bit OUT
%define %%len %3 ; 32-bit OUT
%define %%hufftables %4
cmp %%dist, DIST_TABLE_SIZE - 1
jg %%do_compute
%ifndef IACA
mov %%len %+ d, dword [hufftables + DIST_TABLE_OFFSET + 4*(%%distq + 1) WRT_OPT]
mov %%code, %%len
and %%len, 0x1F;
shr %%code, 5
jmp %%done
%endif
%%do_compute:
compute_dist_code %%distq, %%code, %%len, %%hufftables
%%done:
%endm
%macro get_packed_dist_code 3
%define %%dist %1 ; 64-bit IN
%define %%code_len %2d ; 32-bit OUT
%define %%hufftables %3 ; address of the hufftable
%endm
%endif
; Macros for doing Huffman Encoding
; Assumes (dist != 0)
; Uses RCX, clobbers dist
; void compute_dist_code dist, code, len
%macro compute_dist_icf_code 3
%define %%dist %1 ; IN, clobbered
%define %%distq %1
%define %%code %2 ; OUT
%define %%tmp1 %3
bsr rcx, %%dist ; ecx = msb = bsr(dist)
dec rcx ; ecx = num_extra_bits = msb - N
BZHI %%code, %%dist, rcx, %%tmp1
SHRX %%dist, %%dist, rcx ; dist >>= num_extra_bits
lea %%dist, [%%dist + 2*rcx] ; code = sym = dist + num_extra_bits*2
shl %%code, EXTRA_BITS_OFFSET - DIST_OFFSET
add %%code, %%dist ; code = extra_bits | sym
%endm
; Uses RCX, clobbers dist
; get_dist_code dist, code, len
%macro get_dist_icf_code 3
%define %%dist %1 ; 32-bit IN, clobbered
%define %%distq %1 ; 64-bit IN, clobbered
%define %%code %2 ; 32-bit OUT
%define %%tmp1 %3
cmp %%dist, 1
jg %%do_compute
%ifnidn %%code, %%dist
mov %%code, %%dist
%endif
jmp %%done
%%do_compute:
compute_dist_icf_code %%distq, %%code, %%tmp1
%%done:
shl %%code, DIST_OFFSET
%endm
; "len" can be same register as "length"
; get_len_code length, code, len
%macro get_len_code 4
%define %%length %1 ; 64-bit IN
%define %%code %2d ; 32-bit OUT
%define %%len %3d ; 32-bit OUT
%define %%hufftables %4
mov %%len, [%%hufftables + LEN_TABLE_OFFSET + 4 * %%length]
mov %%code, %%len
and %%len, 0x1F
shr %%code, 5
%endm
%macro get_lit_code 4
%define %%lit %1 ; 64-bit IN or CONST
%define %%code %2d ; 32-bit OUT
%define %%len %3d ; 32-bit OUT
%define %%hufftables %4
movzx %%len, byte [%%hufftables + LIT_TABLE_SIZES_OFFSET + %%lit]
movzx %%code, word [%%hufftables + LIT_TABLE_OFFSET + 2 * %%lit]
%endm
;; Compute hash of first 3 bytes of data
%macro compute_hash 2
%define %%result %1d ; 32-bit reg
%define %%data %2d ; 32-bit reg (low byte not clobbered)
xor %%result, %%result
crc32 %%result, %%data
%endm
|