summaryrefslogtreecommitdiffstats
path: root/src/isa-l/igzip/igzip_compare_types.asm
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
commit19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/isa-l/igzip/igzip_compare_types.asm
parentInitial commit. (diff)
downloadceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz
ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/isa-l/igzip/igzip_compare_types.asm')
-rw-r--r--src/isa-l/igzip/igzip_compare_types.asm452
1 files changed, 452 insertions, 0 deletions
diff --git a/src/isa-l/igzip/igzip_compare_types.asm b/src/isa-l/igzip/igzip_compare_types.asm
new file mode 100644
index 000000000..c5ab3169f
--- /dev/null
+++ b/src/isa-l/igzip/igzip_compare_types.asm
@@ -0,0 +1,452 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "options.asm"
+%include "stdmac.asm"
+
+%ifndef UTILS_ASM
+%define UTILS_ASM
+; compare macro
+
+;; sttni2 is faster, but it can't be debugged
+;; so following code is based on "mine5"
+
+;; compares 8 bytes at a time, using xor
+;; assumes the input buffer has size at least 8
+;; compare_r src1, src2, result, result_max, tmp
+%macro compare_r 5
+%define %%src1 %1
+%define %%src2 %2
+%define %%result %3
+%define %%result_max %4
+%define %%tmp %5
+%define %%tmp16 %5w ; tmp as a 16-bit register
+
+ sub %%result_max, 16
+ cmp %%result, %%result_max
+ jg %%_by_8
+
+%%loop1:
+ mov %%tmp, [%%src1 + %%result]
+ xor %%tmp, [%%src2 + %%result]
+ jnz %%miscompare_reg
+ add %%result, 8
+
+ mov %%tmp, [%%src1 + %%result]
+ xor %%tmp, [%%src2 + %%result]
+ jnz %%miscompare_reg
+ add %%result, 8
+ cmp %%result, %%result_max
+ jle %%loop1
+
+%%_by_8:
+ add %%result_max, 8
+ cmp %%result, %%result_max
+ jg %%_cmp_last
+
+ ; compare last two bytes
+ mov %%tmp, [%%src1 + %%result]
+ xor %%tmp, [%%src2 + %%result]
+ jnz %%miscompare_reg
+ add %%result, 8
+
+%%_cmp_last:
+ add %%result_max, 8
+ cmp %%result, %%result_max
+ je %%end
+
+ lea %%result, [%%result_max - 8]
+
+ mov %%tmp, [%%src1 + %%result]
+ xor %%tmp, [%%src2 + %%result]
+ jnz %%miscompare_reg
+ add %%result, 8
+ jmp %%end
+
+%%miscompare_reg:
+ bsf %%tmp, %%tmp
+ shr %%tmp, 3
+ add %%result, %%tmp
+%%end:
+%endm
+
+;; compares 16 bytes at a time, using pcmpeqb/pmovmskb
+;; assumes the input buffer has size at least 8
+;; compare_x src1, src2, result, result_max, tmp, xtmp1, xtmp2
+%macro compare_x 7
+%define %%src1 %1
+%define %%src2 %2
+%define %%result %3 ; Accumulator for match_length
+%define %%result_max %4
+%define %%tmp %5
+%define %%tmp16 %5w ; tmp as a 16-bit register
+%define %%tmp32 %5d ; tmp as a 32-bit register
+%define %%xtmp %6
+%define %%xtmp2 %7
+
+ sub %%result_max, 32
+ cmp %%result, %%result_max
+ jg %%_by_16
+
+%%loop1:
+ MOVDQU %%xtmp, [%%src1 + %%result]
+ MOVDQU %%xtmp2, [%%src2 + %%result]
+ PCMPEQB %%xtmp, %%xtmp, %%xtmp2
+ PMOVMSKB %%tmp32, %%xtmp
+ xor %%tmp, 0xFFFF
+ jnz %%miscompare_vect
+ add %%result, 16
+
+ MOVDQU %%xtmp, [%%src1 + %%result]
+ MOVDQU %%xtmp2, [%%src2 + %%result]
+ PCMPEQB %%xtmp, %%xtmp, %%xtmp2
+ PMOVMSKB %%tmp32, %%xtmp
+ xor %%tmp, 0xFFFF
+ jnz %%miscompare_vect
+ add %%result, 16
+
+ cmp %%result, %%result_max
+ jle %%loop1
+
+%%_by_16:
+ add %%result_max, 16
+ cmp %%result, %%result_max
+ jg %%_by_8
+
+ MOVDQU %%xtmp, [%%src1 + %%result]
+ MOVDQU %%xtmp2, [%%src2 + %%result]
+ PCMPEQB %%xtmp, %%xtmp, %%xtmp2
+ PMOVMSKB %%tmp32, %%xtmp
+ xor %%tmp, 0xFFFF
+ jnz %%miscompare_vect
+ add %%result, 16
+
+%%_by_8:
+ add %%result_max, 8
+ cmp %%result, %%result_max
+ jg %%_cmp_last
+
+ ; compare last two bytes
+ mov %%tmp, [%%src1 + %%result]
+ xor %%tmp, [%%src2 + %%result]
+ jnz %%miscompare_reg
+ add %%result, 8
+
+%%_cmp_last:
+ add %%result_max, 8
+ cmp %%result, %%result_max
+ je %%end
+
+ lea %%result, [%%result_max - 8]
+
+ mov %%tmp, [%%src1 + %%result]
+ xor %%tmp, [%%src2 + %%result]
+ jnz %%miscompare_reg
+ add %%result, 8
+ jmp %%end
+
+%%miscompare_reg:
+ bsf %%tmp, %%tmp
+ shr %%tmp, 3
+ add %%result, %%tmp
+ jmp %%end
+
+%%miscompare_vect:
+ bsf %%tmp, %%tmp
+ add %%result, %%tmp
+%%end:
+%endm
+
+;; compares 32 bytes at a time, using pcmpeqb/pmovmskb
+;; assumes the input buffer has size at least 8
+;; compare_y src1, src2, result, result_max, tmp, xtmp1, xtmp2
+%macro compare_y 7
+%define %%src1 %1
+%define %%src2 %2
+%define %%result %3 ; Accumulator for match_length
+%define %%result_max %4
+%define %%tmp %5
+%define %%tmp16 %5w ; tmp as a 16-bit register
+%define %%tmp32 %5d ; tmp as a 32-bit register
+%define %%ytmp %6
+%define %%ytmp2 %7
+
+ sub %%result_max, 64
+ cmp %%result, %%result_max
+ jg %%_by_32
+
+%%loop1:
+ vmovdqu %%ytmp, [%%src1 + %%result]
+ vmovdqu %%ytmp2, [%%src2 + %%result]
+ vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
+ vpmovmskb %%tmp, %%ytmp
+ xor %%tmp32, 0xFFFFFFFF
+ jnz %%miscompare_vect
+ add %%result, 32
+
+ vmovdqu %%ytmp, [%%src1 + %%result]
+ vmovdqu %%ytmp2, [%%src2 + %%result]
+ vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
+ vpmovmskb %%tmp, %%ytmp
+ xor %%tmp32, 0xFFFFFFFF
+ jnz %%miscompare_vect
+ add %%result, 32
+
+ cmp %%result, %%result_max
+ jle %%loop1
+
+%%_by_32:
+ add %%result_max, 32
+ cmp %%result, %%result_max
+ jg %%_by_16
+
+ vmovdqu %%ytmp, [%%src1 + %%result]
+ vmovdqu %%ytmp2, [%%src2 + %%result]
+ vpcmpeqb %%ytmp, %%ytmp, %%ytmp2
+ vpmovmskb %%tmp, %%ytmp
+ xor %%tmp32, 0xFFFFFFFF
+ jnz %%miscompare_vect
+ add %%result, 32
+
+%%_by_16:
+ add %%result_max, 16
+ cmp %%result, %%result_max
+ jg %%_by_8
+
+ vmovdqu %%ytmp %+ x, [%%src1 + %%result]
+ vmovdqu %%ytmp2 %+ x, [%%src2 + %%result]
+ vpcmpeqb %%ytmp %+ x, %%ytmp %+ x, %%ytmp2 %+ x
+ vpmovmskb %%tmp, %%ytmp %+ x
+ xor %%tmp32, 0xFFFF
+ jnz %%miscompare_vect
+ add %%result, 16
+
+%%_by_8:
+ add %%result_max, 8
+ cmp %%result, %%result_max
+ jg %%_cmp_last
+
+ mov %%tmp, [%%src1 + %%result]
+ xor %%tmp, [%%src2 + %%result]
+ jnz %%miscompare_reg
+ add %%result, 8
+
+%%_cmp_last:
+ add %%result_max, 8
+ cmp %%result, %%result_max
+ je %%end
+
+ lea %%result, [%%result_max - 8]
+
+ ; compare last two bytes
+ mov %%tmp, [%%src1 + %%result]
+ xor %%tmp, [%%src2 + %%result]
+ jnz %%miscompare_reg
+ add %%result, 8
+ jmp %%end
+
+%%miscompare_reg:
+ bsf %%tmp, %%tmp
+ shr %%tmp, 3
+ add %%result, %%tmp
+ jmp %%end
+
+%%miscompare_vect:
+ tzcnt %%tmp, %%tmp
+ add %%result, %%tmp
+%%end:
+%endm
+
+;; compares 64 bytes at a time
+;; compare_z src1, src2, result, result_max, tmp, ktmp, ztmp1, ztmp2
+;; Clobbers result_max
+%macro compare_z 8
+%define %%src1 %1
+%define %%src2 %2
+%define %%result %3 ; Accumulator for match_length
+%define %%result_max %4
+%define %%tmp %5 ; tmp as a 16-bit register
+%define %%ktmp %6
+%define %%ztmp %7
+%define %%ztmp2 %8
+
+ sub %%result_max, 128
+ cmp %%result, %%result_max
+ jg %%_by_64
+
+%%loop1:
+ vmovdqu8 %%ztmp, [%%src1 + %%result]
+ vmovdqu8 %%ztmp2, [%%src2 + %%result]
+ vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ
+ ktestq %%ktmp, %%ktmp
+ jnz %%miscompare
+ add %%result, 64
+
+ vmovdqu8 %%ztmp, [%%src1 + %%result]
+ vmovdqu8 %%ztmp2, [%%src2 + %%result]
+ vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ
+ ktestq %%ktmp, %%ktmp
+ jnz %%miscompare
+ add %%result, 64
+
+ cmp %%result, %%result_max
+ jle %%loop1
+
+%%_by_64:
+ add %%result_max, 64
+ cmp %%result, %%result_max
+ jg %%_less_than_64
+
+ vmovdqu8 %%ztmp, [%%src1 + %%result]
+ vmovdqu8 %%ztmp2, [%%src2 + %%result]
+ vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ
+ ktestq %%ktmp, %%ktmp
+ jnz %%miscompare
+ add %%result, 64
+
+%%_less_than_64:
+ add %%result_max, 64
+ sub %%result_max, %%result
+ jle %%end
+
+ mov %%tmp, -1
+ bzhi %%tmp, %%tmp, %%result_max
+ kmovq %%ktmp, %%tmp
+
+ vmovdqu8 %%ztmp {%%ktmp}{z}, [%%src1 + %%result]
+ vmovdqu8 %%ztmp2 {%%ktmp}{z}, [%%src2 + %%result]
+ vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ
+ ktestq %%ktmp, %%ktmp
+ jnz %%miscompare
+ add %%result, %%result_max
+
+ jmp %%end
+%%miscompare:
+ kmovq %%tmp, %%ktmp
+ tzcnt %%tmp, %%tmp
+ add %%result, %%tmp
+%%end:
+%endm
+
+%macro compare250 7
+%define %%src1 %1
+%define %%src2 %2
+%define %%result %3
+%define %%result_max %4
+%define %%tmp %5
+%define %%xtmp0 %6x
+%define %%xtmp1 %7x
+%define %%ytmp0 %6
+%define %%ytmp1 %7
+
+ mov %%tmp, 250
+ cmp %%result_max, 250
+ cmovg %%result_max, %%tmp
+
+%if (COMPARE_TYPE == 1)
+ compare_r %%src1, %%src2, %%result, %%result_max, %%tmp
+%elif (COMPARE_TYPE == 2)
+ compare_x %%src1, %%src2, %%result, %%result_max, %%tmp, %%xtmp0, %%xtmp1
+%elif (COMPARE_TYPE == 3)
+ compare_y %%src1, %%src2, %%result, %%result_max, %%tmp, %%ytmp0, %%ytmp1
+%else
+%error Unknown Compare type COMPARE_TYPE
+ % error
+%endif
+%endmacro
+
+; Assumes the buffer has at least 8 bytes
+; Accumulates match length onto result
+%macro compare_large 7
+%define %%src1 %1
+%define %%src2 %2
+%define %%result %3
+%define %%result_max %4
+%define %%tmp %5
+%define %%xtmp0 %6x
+%define %%xtmp1 %7x
+%define %%ytmp0 %6
+%define %%ytmp1 %7
+
+%if (COMPARE_TYPE == 1)
+ compare_r %%src1, %%src2, %%result, %%result_max, %%tmp
+%elif (COMPARE_TYPE == 2)
+ compare_x %%src1, %%src2, %%result, %%result_max, %%tmp, %%xtmp0, %%xtmp1
+%elif (COMPARE_TYPE == 3)
+ compare_y %%src1, %%src2, %%result, %%result_max, %%tmp, %%ytmp0, %%ytmp1
+%else
+%error Unknown Compare type COMPARE_TYPE
+ % error
+%endif
+%endmacro
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; compare size, src1, src2, result, tmp
+%macro compare 5
+%define %%size %1
+%define %%src1 %2
+%define %%src2 %3
+%define %%result %4
+%define %%tmp %5
+%define %%tmp8 %5b ; tmp as a 8-bit register
+
+ xor %%result, %%result
+ sub %%size, 7
+ jle %%lab2
+%%loop1:
+ mov %%tmp, [%%src1 + %%result]
+ xor %%tmp, [%%src2 + %%result]
+ jnz %%miscompare
+ add %%result, 8
+ sub %%size, 8
+ jg %%loop1
+%%lab2:
+ ;; if we fall through from above, we have found no mismatches,
+ ;; %%size+7 is the number of bytes left to look at, and %%result is the
+ ;; number of bytes that have matched
+ add %%size, 7
+ jle %%end
+%%loop3:
+ mov %%tmp8, [%%src1 + %%result]
+ cmp %%tmp8, [%%src2 + %%result]
+ jne %%end
+ inc %%result
+ dec %%size
+ jg %%loop3
+ jmp %%end
+%%miscompare:
+ bsf %%tmp, %%tmp
+ shr %%tmp, 3
+ add %%result, %%tmp
+%%end:
+%endm
+
+%endif ;UTILS_ASM