From 19fcec84d8d7d21e796c7624e521b60d28ee21ed Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 20:45:59 +0200 Subject: Adding upstream version 16.2.11+ds. Signed-off-by: Daniel Baumann --- src/isa-l/igzip/huffman.asm | 249 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 249 insertions(+) create mode 100644 src/isa-l/igzip/huffman.asm (limited to 'src/isa-l/igzip/huffman.asm') diff --git a/src/isa-l/igzip/huffman.asm b/src/isa-l/igzip/huffman.asm new file mode 100644 index 000000000..9056b5ee4 --- /dev/null +++ b/src/isa-l/igzip/huffman.asm @@ -0,0 +1,249 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "options.asm" +%include "lz0a_const.asm" +%include "stdmac.asm" + +; Macros for doing Huffman Encoding + +%ifdef LONGER_HUFFTABLE + %if (D > 8192) + %error History D is larger than 8K, cannot use %LONGER_HUFFTABLE + % error + %else + %define DIST_TABLE_SIZE 8192 + %define DECODE_OFFSET 26 + %endif +%else + %define DIST_TABLE_SIZE 2 + %define DECODE_OFFSET 0 +%endif + +%define LEN_TABLE_SIZE 256 +%define LIT_TABLE_SIZE 257 + +%define DIST_TABLE_START (ISAL_DEF_MAX_HDR_SIZE + 8) +%define DIST_TABLE_OFFSET (DIST_TABLE_START + - 4 * 1) +%define LEN_TABLE_OFFSET (DIST_TABLE_START + DIST_TABLE_SIZE * 4 - 4*3) +%define LIT_TABLE_OFFSET (DIST_TABLE_START + 4 * DIST_TABLE_SIZE + 4 * LEN_TABLE_SIZE) +%define LIT_TABLE_SIZES_OFFSET (LIT_TABLE_OFFSET + 2 * LIT_TABLE_SIZE) +%define DCODE_TABLE_OFFSET (LIT_TABLE_SIZES_OFFSET + LIT_TABLE_SIZE + 1 - DECODE_OFFSET * 2) +%define DCODE_TABLE_SIZE_OFFSET (DCODE_TABLE_OFFSET + 2 * 30 - DECODE_OFFSET) +;; /** @brief Holds the huffman tree used to huffman encode the input stream **/ +;; struct isal_hufftables { +;; // deflate huffman tree header +;; uint8_t deflate_huff_hdr[ISAL_DEF_MAX_HDR_SIZE]; +;; +;; //!< Number of whole bytes in deflate_huff_hdr +;; uint32_t deflate_huff_hdr_count; +;; +;; //!< Number of bits in the partial byte in header +;; uint32_t deflate_huff_hdr_extra_bits; +;; +;; //!< bits 7:0 are the code length, bits 31:8 are the code +;; uint32_t dist_table[DIST_TABLE_SIZE]; +;; +;; //!< bits 7:0 are the code length, bits 31:8 are the code +;; uint32_t len_table[LEN_TABLE_SIZE]; +;; +;; //!< bits 3:0 are the code length, bits 15:4 are the code +;; uint16_t lit_table[LIT_TABLE_SIZE]; +;; +;; //!< bits 3:0 are the code length, bits 15:4 are the code +;; uint16_t dcodes[30 - DECODE_OFFSET]; + +;; }; + + +%ifdef LONGER_HUFFTABLE +; Uses RCX, clobbers dist +; get_dist_code dist, code, len +%macro get_dist_code 4 +%define %%dist %1 ; 64-bit IN +%define %%code %2d ; 32-bit OUT +%define %%len %3d ; 32-bit OUT +%define %%hufftables %4 ; address of the hufftable + + mov %%len, [%%hufftables + DIST_TABLE_OFFSET + 4*(%%dist + 1) ] + mov %%code, %%len + and %%len, 0x1F; + shr %%code, 5 +%endm + +%macro get_packed_dist_code 3 +%define %%dist %1 ; 64-bit IN +%define %%code_len %2d ; 32-bit OUT +%define %%hufftables %3 ; address of the hufftable + mov %%code_len, [%%hufftables + DIST_TABLE_OFFSET + 4*%%dist ] +%endm + +%macro unpack_dist_code 2 +%define %%code %1d ; 32-bit OUT +%define %%len %2d ; 32-bit OUT + + mov %%len, %%code + and %%len, 0x1F; + shr %%code, 5 +%endm + +%else +; Assumes (dist != 0) +; Uses RCX, clobbers dist +; void compute_dist_code dist, code, len +%macro compute_dist_code 4 +%define %%dist %1 ; IN, clobbered +%define %%distq %1 +%define %%code %2 ; OUT +%define %%len %3 ; OUT +%define %%hufftables %4 + + bsr rcx, %%dist ; ecx = msb = bsr(dist) + dec rcx ; ecx = num_extra_bits = msb - N + BZHI %%code, %%dist, rcx, %%len + SHRX %%dist, %%dist, rcx ; dist >>= num_extra_bits + lea %%dist, [%%dist + 2*rcx] ; dist = sym = dist + num_extra_bits*2 + mov %%len, rcx ; len = num_extra_bits + movzx rcx, byte [hufftables + DCODE_TABLE_SIZE_OFFSET + %%distq WRT_OPT] + movzx %%dist, word [hufftables + DCODE_TABLE_OFFSET + 2 * %%distq WRT_OPT] + SHLX %%code, %%code, rcx ; code = extra_bits << (sym & 0xF) + or %%code, %%dist ; code = (sym >> 4) | (extra_bits << (sym & 0xF)) + add %%len, rcx ; len = num_extra_bits + (sym & 0xF) +%endm + +; Uses RCX, clobbers dist +; get_dist_code dist, code, len +%macro get_dist_code 4 +%define %%dist %1 ; 32-bit IN, clobbered +%define %%distq %1 ; 64-bit IN, clobbered +%define %%code %2 ; 32-bit OUT +%define %%len %3 ; 32-bit OUT +%define %%hufftables %4 + + cmp %%dist, DIST_TABLE_SIZE - 1 + jg %%do_compute +%ifndef IACA + mov %%len %+ d, dword [hufftables + DIST_TABLE_OFFSET + 4*(%%distq + 1) WRT_OPT] + mov %%code, %%len + and %%len, 0x1F; + shr %%code, 5 + jmp %%done +%endif +%%do_compute: + compute_dist_code %%distq, %%code, %%len, %%hufftables +%%done: +%endm + +%macro get_packed_dist_code 3 +%define %%dist %1 ; 64-bit IN +%define %%code_len %2d ; 32-bit OUT +%define %%hufftables %3 ; address of the hufftable +%endm + +%endif + + +; Macros for doing Huffman Encoding + +; Assumes (dist != 0) +; Uses RCX, clobbers dist +; void compute_dist_code dist, code, len +%macro compute_dist_icf_code 3 +%define %%dist %1 ; IN, clobbered +%define %%distq %1 +%define %%code %2 ; OUT +%define %%tmp1 %3 + + bsr rcx, %%dist ; ecx = msb = bsr(dist) + dec rcx ; ecx = num_extra_bits = msb - N + BZHI %%code, %%dist, rcx, %%tmp1 + SHRX %%dist, %%dist, rcx ; dist >>= num_extra_bits + lea %%dist, [%%dist + 2*rcx] ; code = sym = dist + num_extra_bits*2 + shl %%code, EXTRA_BITS_OFFSET - DIST_OFFSET + add %%code, %%dist ; code = extra_bits | sym + +%endm + +; Uses RCX, clobbers dist +; get_dist_code dist, code, len +%macro get_dist_icf_code 3 +%define %%dist %1 ; 32-bit IN, clobbered +%define %%distq %1 ; 64-bit IN, clobbered +%define %%code %2 ; 32-bit OUT +%define %%tmp1 %3 + + cmp %%dist, 1 + jg %%do_compute + +%ifnidn %%code, %%dist + mov %%code, %%dist +%endif + jmp %%done +%%do_compute: + compute_dist_icf_code %%distq, %%code, %%tmp1 +%%done: + shl %%code, DIST_OFFSET +%endm + + +; "len" can be same register as "length" +; get_len_code length, code, len +%macro get_len_code 4 +%define %%length %1 ; 64-bit IN +%define %%code %2d ; 32-bit OUT +%define %%len %3d ; 32-bit OUT +%define %%hufftables %4 + + mov %%len, [%%hufftables + LEN_TABLE_OFFSET + 4 * %%length] + mov %%code, %%len + and %%len, 0x1F + shr %%code, 5 +%endm + + +%macro get_lit_code 4 +%define %%lit %1 ; 64-bit IN or CONST +%define %%code %2d ; 32-bit OUT +%define %%len %3d ; 32-bit OUT +%define %%hufftables %4 + + movzx %%len, byte [%%hufftables + LIT_TABLE_SIZES_OFFSET + %%lit] + movzx %%code, word [%%hufftables + LIT_TABLE_OFFSET + 2 * %%lit] + +%endm + + +;; Compute hash of first 3 bytes of data +%macro compute_hash 2 +%define %%result %1d ; 32-bit reg +%define %%data %2d ; 32-bit reg (low byte not clobbered) + + xor %%result, %%result + crc32 %%result, %%data +%endm -- cgit v1.2.3