diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
commit | 19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch) | |
tree | 42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/isa-l/igzip/igzip_compare_types.asm | |
parent | Initial commit. (diff) | |
download | ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip |
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/isa-l/igzip/igzip_compare_types.asm')
-rw-r--r-- | src/isa-l/igzip/igzip_compare_types.asm | 452 |
1 files changed, 452 insertions, 0 deletions
diff --git a/src/isa-l/igzip/igzip_compare_types.asm b/src/isa-l/igzip/igzip_compare_types.asm new file mode 100644 index 000000000..c5ab3169f --- /dev/null +++ b/src/isa-l/igzip/igzip_compare_types.asm @@ -0,0 +1,452 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "options.asm" +%include "stdmac.asm" + +%ifndef UTILS_ASM +%define UTILS_ASM +; compare macro + +;; sttni2 is faster, but it can't be debugged +;; so following code is based on "mine5" + +;; compares 8 bytes at a time, using xor +;; assumes the input buffer has size at least 8 +;; compare_r src1, src2, result, result_max, tmp +%macro compare_r 5 +%define %%src1 %1 +%define %%src2 %2 +%define %%result %3 +%define %%result_max %4 +%define %%tmp %5 +%define %%tmp16 %5w ; tmp as a 16-bit register + + sub %%result_max, 16 + cmp %%result, %%result_max + jg %%_by_8 + +%%loop1: + mov %%tmp, [%%src1 + %%result] + xor %%tmp, [%%src2 + %%result] + jnz %%miscompare_reg + add %%result, 8 + + mov %%tmp, [%%src1 + %%result] + xor %%tmp, [%%src2 + %%result] + jnz %%miscompare_reg + add %%result, 8 + cmp %%result, %%result_max + jle %%loop1 + +%%_by_8: + add %%result_max, 8 + cmp %%result, %%result_max + jg %%_cmp_last + + ; compare last two bytes + mov %%tmp, [%%src1 + %%result] + xor %%tmp, [%%src2 + %%result] + jnz %%miscompare_reg + add %%result, 8 + +%%_cmp_last: + add %%result_max, 8 + cmp %%result, %%result_max + je %%end + + lea %%result, [%%result_max - 8] + + mov %%tmp, [%%src1 + %%result] + xor %%tmp, [%%src2 + %%result] + jnz %%miscompare_reg + add %%result, 8 + jmp %%end + +%%miscompare_reg: + bsf %%tmp, %%tmp + shr %%tmp, 3 + add %%result, %%tmp +%%end: +%endm + +;; compares 16 bytes at a time, using pcmpeqb/pmovmskb +;; assumes the input buffer has size at least 8 +;; compare_x src1, src2, result, result_max, tmp, xtmp1, xtmp2 +%macro compare_x 7 +%define %%src1 %1 +%define %%src2 %2 +%define %%result %3 ; Accumulator for match_length +%define %%result_max %4 +%define %%tmp %5 +%define %%tmp16 %5w ; tmp as a 16-bit register +%define %%tmp32 %5d ; tmp as a 32-bit register +%define %%xtmp %6 +%define %%xtmp2 %7 + + sub %%result_max, 32 + cmp %%result, %%result_max + jg %%_by_16 + +%%loop1: + MOVDQU %%xtmp, [%%src1 + %%result] + MOVDQU %%xtmp2, [%%src2 + %%result] + PCMPEQB %%xtmp, %%xtmp, %%xtmp2 + PMOVMSKB %%tmp32, %%xtmp + xor %%tmp, 0xFFFF + jnz %%miscompare_vect + add %%result, 16 + + MOVDQU %%xtmp, [%%src1 + %%result] + MOVDQU %%xtmp2, [%%src2 + %%result] + PCMPEQB %%xtmp, %%xtmp, %%xtmp2 + PMOVMSKB %%tmp32, %%xtmp + xor %%tmp, 0xFFFF + jnz %%miscompare_vect + add %%result, 16 + + cmp %%result, %%result_max + jle %%loop1 + +%%_by_16: + add %%result_max, 16 + cmp %%result, %%result_max + jg %%_by_8 + + MOVDQU %%xtmp, [%%src1 + %%result] + MOVDQU %%xtmp2, [%%src2 + %%result] + PCMPEQB %%xtmp, %%xtmp, %%xtmp2 + PMOVMSKB %%tmp32, %%xtmp + xor %%tmp, 0xFFFF + jnz %%miscompare_vect + add %%result, 16 + +%%_by_8: + add %%result_max, 8 + cmp %%result, %%result_max + jg %%_cmp_last + + ; compare last two bytes + mov %%tmp, [%%src1 + %%result] + xor %%tmp, [%%src2 + %%result] + jnz %%miscompare_reg + add %%result, 8 + +%%_cmp_last: + add %%result_max, 8 + cmp %%result, %%result_max + je %%end + + lea %%result, [%%result_max - 8] + + mov %%tmp, [%%src1 + %%result] + xor %%tmp, [%%src2 + %%result] + jnz %%miscompare_reg + add %%result, 8 + jmp %%end + +%%miscompare_reg: + bsf %%tmp, %%tmp + shr %%tmp, 3 + add %%result, %%tmp + jmp %%end + +%%miscompare_vect: + bsf %%tmp, %%tmp + add %%result, %%tmp +%%end: +%endm + +;; compares 32 bytes at a time, using pcmpeqb/pmovmskb +;; assumes the input buffer has size at least 8 +;; compare_y src1, src2, result, result_max, tmp, xtmp1, xtmp2 +%macro compare_y 7 +%define %%src1 %1 +%define %%src2 %2 +%define %%result %3 ; Accumulator for match_length +%define %%result_max %4 +%define %%tmp %5 +%define %%tmp16 %5w ; tmp as a 16-bit register +%define %%tmp32 %5d ; tmp as a 32-bit register +%define %%ytmp %6 +%define %%ytmp2 %7 + + sub %%result_max, 64 + cmp %%result, %%result_max + jg %%_by_32 + +%%loop1: + vmovdqu %%ytmp, [%%src1 + %%result] + vmovdqu %%ytmp2, [%%src2 + %%result] + vpcmpeqb %%ytmp, %%ytmp, %%ytmp2 + vpmovmskb %%tmp, %%ytmp + xor %%tmp32, 0xFFFFFFFF + jnz %%miscompare_vect + add %%result, 32 + + vmovdqu %%ytmp, [%%src1 + %%result] + vmovdqu %%ytmp2, [%%src2 + %%result] + vpcmpeqb %%ytmp, %%ytmp, %%ytmp2 + vpmovmskb %%tmp, %%ytmp + xor %%tmp32, 0xFFFFFFFF + jnz %%miscompare_vect + add %%result, 32 + + cmp %%result, %%result_max + jle %%loop1 + +%%_by_32: + add %%result_max, 32 + cmp %%result, %%result_max + jg %%_by_16 + + vmovdqu %%ytmp, [%%src1 + %%result] + vmovdqu %%ytmp2, [%%src2 + %%result] + vpcmpeqb %%ytmp, %%ytmp, %%ytmp2 + vpmovmskb %%tmp, %%ytmp + xor %%tmp32, 0xFFFFFFFF + jnz %%miscompare_vect + add %%result, 32 + +%%_by_16: + add %%result_max, 16 + cmp %%result, %%result_max + jg %%_by_8 + + vmovdqu %%ytmp %+ x, [%%src1 + %%result] + vmovdqu %%ytmp2 %+ x, [%%src2 + %%result] + vpcmpeqb %%ytmp %+ x, %%ytmp %+ x, %%ytmp2 %+ x + vpmovmskb %%tmp, %%ytmp %+ x + xor %%tmp32, 0xFFFF + jnz %%miscompare_vect + add %%result, 16 + +%%_by_8: + add %%result_max, 8 + cmp %%result, %%result_max + jg %%_cmp_last + + mov %%tmp, [%%src1 + %%result] + xor %%tmp, [%%src2 + %%result] + jnz %%miscompare_reg + add %%result, 8 + +%%_cmp_last: + add %%result_max, 8 + cmp %%result, %%result_max + je %%end + + lea %%result, [%%result_max - 8] + + ; compare last two bytes + mov %%tmp, [%%src1 + %%result] + xor %%tmp, [%%src2 + %%result] + jnz %%miscompare_reg + add %%result, 8 + jmp %%end + +%%miscompare_reg: + bsf %%tmp, %%tmp + shr %%tmp, 3 + add %%result, %%tmp + jmp %%end + +%%miscompare_vect: + tzcnt %%tmp, %%tmp + add %%result, %%tmp +%%end: +%endm + +;; compares 64 bytes at a time +;; compare_z src1, src2, result, result_max, tmp, ktmp, ztmp1, ztmp2 +;; Clobbers result_max +%macro compare_z 8 +%define %%src1 %1 +%define %%src2 %2 +%define %%result %3 ; Accumulator for match_length +%define %%result_max %4 +%define %%tmp %5 ; tmp as a 16-bit register +%define %%ktmp %6 +%define %%ztmp %7 +%define %%ztmp2 %8 + + sub %%result_max, 128 + cmp %%result, %%result_max + jg %%_by_64 + +%%loop1: + vmovdqu8 %%ztmp, [%%src1 + %%result] + vmovdqu8 %%ztmp2, [%%src2 + %%result] + vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ + ktestq %%ktmp, %%ktmp + jnz %%miscompare + add %%result, 64 + + vmovdqu8 %%ztmp, [%%src1 + %%result] + vmovdqu8 %%ztmp2, [%%src2 + %%result] + vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ + ktestq %%ktmp, %%ktmp + jnz %%miscompare + add %%result, 64 + + cmp %%result, %%result_max + jle %%loop1 + +%%_by_64: + add %%result_max, 64 + cmp %%result, %%result_max + jg %%_less_than_64 + + vmovdqu8 %%ztmp, [%%src1 + %%result] + vmovdqu8 %%ztmp2, [%%src2 + %%result] + vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ + ktestq %%ktmp, %%ktmp + jnz %%miscompare + add %%result, 64 + +%%_less_than_64: + add %%result_max, 64 + sub %%result_max, %%result + jle %%end + + mov %%tmp, -1 + bzhi %%tmp, %%tmp, %%result_max + kmovq %%ktmp, %%tmp + + vmovdqu8 %%ztmp {%%ktmp}{z}, [%%src1 + %%result] + vmovdqu8 %%ztmp2 {%%ktmp}{z}, [%%src2 + %%result] + vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ + ktestq %%ktmp, %%ktmp + jnz %%miscompare + add %%result, %%result_max + + jmp %%end +%%miscompare: + kmovq %%tmp, %%ktmp + tzcnt %%tmp, %%tmp + add %%result, %%tmp +%%end: +%endm + +%macro compare250 7 +%define %%src1 %1 +%define %%src2 %2 +%define %%result %3 +%define %%result_max %4 +%define %%tmp %5 +%define %%xtmp0 %6x +%define %%xtmp1 %7x +%define %%ytmp0 %6 +%define %%ytmp1 %7 + + mov %%tmp, 250 + cmp %%result_max, 250 + cmovg %%result_max, %%tmp + +%if (COMPARE_TYPE == 1) + compare_r %%src1, %%src2, %%result, %%result_max, %%tmp +%elif (COMPARE_TYPE == 2) + compare_x %%src1, %%src2, %%result, %%result_max, %%tmp, %%xtmp0, %%xtmp1 +%elif (COMPARE_TYPE == 3) + compare_y %%src1, %%src2, %%result, %%result_max, %%tmp, %%ytmp0, %%ytmp1 +%else +%error Unknown Compare type COMPARE_TYPE + % error +%endif +%endmacro + +; Assumes the buffer has at least 8 bytes +; Accumulates match length onto result +%macro compare_large 7 +%define %%src1 %1 +%define %%src2 %2 +%define %%result %3 +%define %%result_max %4 +%define %%tmp %5 +%define %%xtmp0 %6x +%define %%xtmp1 %7x +%define %%ytmp0 %6 +%define %%ytmp1 %7 + +%if (COMPARE_TYPE == 1) + compare_r %%src1, %%src2, %%result, %%result_max, %%tmp +%elif (COMPARE_TYPE == 2) + compare_x %%src1, %%src2, %%result, %%result_max, %%tmp, %%xtmp0, %%xtmp1 +%elif (COMPARE_TYPE == 3) + compare_y %%src1, %%src2, %%result, %%result_max, %%tmp, %%ytmp0, %%ytmp1 +%else +%error Unknown Compare type COMPARE_TYPE + % error +%endif +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; compare size, src1, src2, result, tmp +%macro compare 5 +%define %%size %1 +%define %%src1 %2 +%define %%src2 %3 +%define %%result %4 +%define %%tmp %5 +%define %%tmp8 %5b ; tmp as a 8-bit register + + xor %%result, %%result + sub %%size, 7 + jle %%lab2 +%%loop1: + mov %%tmp, [%%src1 + %%result] + xor %%tmp, [%%src2 + %%result] + jnz %%miscompare + add %%result, 8 + sub %%size, 8 + jg %%loop1 +%%lab2: + ;; if we fall through from above, we have found no mismatches, + ;; %%size+7 is the number of bytes left to look at, and %%result is the + ;; number of bytes that have matched + add %%size, 7 + jle %%end +%%loop3: + mov %%tmp8, [%%src1 + %%result] + cmp %%tmp8, [%%src2 + %%result] + jne %%end + inc %%result + dec %%size + jg %%loop3 + jmp %%end +%%miscompare: + bsf %%tmp, %%tmp + shr %%tmp, 3 + add %%result, %%tmp +%%end: +%endm + +%endif ;UTILS_ASM |