diff options
Diffstat (limited to 'src/isa-l/igzip/stdmac.asm')
-rw-r--r-- | src/isa-l/igzip/stdmac.asm | 389 |
1 files changed, 389 insertions, 0 deletions
diff --git a/src/isa-l/igzip/stdmac.asm b/src/isa-l/igzip/stdmac.asm new file mode 100644 index 00000000..500ee7fd --- /dev/null +++ b/src/isa-l/igzip/stdmac.asm @@ -0,0 +1,389 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%ifndef STDMAC_ASM +%define STDMAC_ASM +;; internal macro used by push_all +;; push args L to R +%macro push_all_ 1-* +%xdefine _PUSH_ALL_REGS_COUNT_ %0 +%rep %0 + push %1 + %rotate 1 +%endrep +%endmacro + +;; internal macro used by pop_all +;; pop args R to L +%macro pop_all_ 1-* +%rep %0 + %rotate -1 + pop %1 +%endrep +%endmacro + +%xdefine _PUSH_ALL_REGS_COUNT_ 0 +%xdefine _ALLOC_STACK_VAL_ 0 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; STACK_OFFSET +;; Number of bytes subtracted from stack due to PUSH_ALL and ALLOC_STACK +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define STACK_OFFSET (_PUSH_ALL_REGS_COUNT_ * 8 + _ALLOC_STACK_VAL_) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; PUSH_ALL reg1, reg2, ... +;; push args L to R, remember regs for pop_all +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro PUSH_ALL 1+ +%xdefine _PUSH_ALL_REGS_ %1 + push_all_ %1 +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; POP_ALL +;; push args from prev "push_all" R to L +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro POP_ALL 0 + pop_all_ _PUSH_ALL_REGS_ +%xdefine _PUSH_ALL_REGS_COUNT_ 0 +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALLOC_STACK n +;; subtract n from the stack pointer and remember the value for restore_stack +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro ALLOC_STACK 1 +%xdefine _ALLOC_STACK_VAL_ %1 + sub rsp, %1 +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; RESTORE_STACK +;; add n to the stack pointer, where n is the arg to the previous alloc_stack +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro RESTORE_STACK 0 + add rsp, _ALLOC_STACK_VAL_ +%xdefine _ALLOC_STACK_VAL_ 0 +%endmacro + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; NOPN n +;; Create n bytes of NOP, using nops of up to 8 bytes each +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro NOPN 1 + + %assign %%i %1 + %rep 200 + %if (%%i < 9) + nopn %%i + %exitrep + %else + nopn 8 + %assign %%i (%%i - 8) + %endif + %endrep +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; nopn n +;; Create n bytes of NOP, where n is between 1 and 9 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro nopn 1 +%if (%1 == 1) + nop +%elif (%1 == 2) + db 0x66 + nop +%elif (%1 == 3) + db 0x0F + db 0x1F + db 0x00 +%elif (%1 == 4) + db 0x0F + db 0x1F + db 0x40 + db 0x00 +%elif (%1 == 5) + db 0x0F + db 0x1F + db 0x44 + db 0x00 + db 0x00 +%elif (%1 == 6) + db 0x66 + db 0x0F + db 0x1F + db 0x44 + db 0x00 + db 0x00 +%elif (%1 == 7) + db 0x0F + db 0x1F + db 0x80 + db 0x00 + db 0x00 + db 0x00 + db 0x00 +%elif (%1 == 8) + db 0x0F + db 0x1F + db 0x84 + db 0x00 + db 0x00 + db 0x00 + db 0x00 + db 0x00 +%elif (%1 == 9) + db 0x66 + db 0x0F + db 0x1F + db 0x84 + db 0x00 + db 0x00 + db 0x00 + db 0x00 + db 0x00 +%else +%error Invalid value to nopn +%endif +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; rolx64 dst, src, amount +;; Emulate a rolx instruction using rorx, assuming data 64 bits wide +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro rolx64 3 + rorx %1, %2, (64-%3) +%endm + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; rolx32 dst, src, amount +;; Emulate a rolx instruction using rorx, assuming data 32 bits wide +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro rolx32 3 + rorx %1, %2, (32-%3) +%endm + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Define a function void ssc(uint64_t x) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro DEF_SSC 0 +global ssc +ssc: + mov rax, rbx + mov rbx, rcx + db 0x64 + db 0x67 + nop + mov rbx, rax + ret +%endm + +;; Implement BZHI instruction on older architectures +;; Clobbers rcx, unless rcx is %%index +%macro BZHI 4 +%define %%dest %1 +%define %%src %2 +%define %%index %3 +%define %%tmp1 %4 + +%ifdef USE_HSWNI + bzhi %%dest, %%src, %%index +%else +%ifnidn %%index, rcx + mov rcx, %%index +%endif + mov %%tmp1, 1 + shl %%tmp1, cl + sub %%tmp1, 1 + +%ifnidn %%src, %%dest + mov %%dest, %%src +%endif + + and %%dest, %%tmp1 +%endif +%endm + +;; Implement shrx instruction on older architectures +;; Clobbers rcx, unless rcx is %%index +%macro SHRX 3 +%define %%dest %1 +%define %%src %2 +%define %%index %3 + +%ifdef USE_HSWNI + shrx %%dest, %%src, %%index +%else +%ifnidn %%src, %%dest + mov %%dest, %%src +%endif +%ifnidn %%index, rcx + mov rcx, %%index +%endif + shr %%dest, cl +%endif +%endm + +;; Implement shlx instruction on older architectures +;; Clobbers rcx, unless rcx is %%index +%macro SHLX 3 +%define %%dest %1 +%define %%src %2 +%define %%index %3 + +%ifdef USE_HSWNI + shlx %%dest, %%src, %%index +%else +%ifnidn %%src, %%dest + mov %%dest, %%src +%endif +%ifnidn %%index, rcx + mov rcx, %%index +%endif + shl %%dest, cl +%endif +%endm + +%macro MOVDQU 2 +%define %%dest %1 +%define %%src %2 +%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) + vmovdqu %%dest, %%src +%else + movdqu %%dest, %%src +%endif +%endm + +%macro MOVD 2 +%define %%dest %1 +%define %%src %2 +%if (ARCH == 02 || ARCH == 03 || ARCH == 04) + vmovd %%dest, %%src +%else + movd %%dest, %%src +%endif +%endm + +%macro MOVQ 2 +%define %%dest %1 +%define %%src %2 +%if (ARCH == 02 || ARCH == 03 || ARCH == 04) + vmovq %%dest, %%src +%else + movq %%dest, %%src +%endif +%endm + +%macro PINSRD 3 +%define %%dest %1 +%define %%src %2 +%define %%offset %3 +%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) + vpinsrd %%dest, %%src, %%offset +%else + pinsrd %%dest, %%src, %%offset +%endif +%endm + +%macro PEXTRD 3 +%define %%dest %1 +%define %%src %2 +%define %%offset %3 +%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) + vpextrd %%dest, %%src, %%offset +%else + pextrd %%dest, %%src, %%offset +%endif +%endm + +%macro PSRLDQ 2 +%define %%dest %1 +%define %%offset %2 +%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) + vpsrldq %%dest, %%offset +%else + psrldq %%dest, %%offset +%endif +%endm + +%macro PAND 3 +%define %%dest %1 +%define %%src1 %2 +%define %%src2 %3 +%if (ARCH == 02 || ARCH == 03 || ARCH == 04) + vpand %%dest, %%src1, %%src2 +%else +%ifnidn %%dest, %%src1 + movdqa %%dest, %%src1 +%endif + pand %%dest, %%src2 +%endif +%endm + +%macro PCMPEQB 3 +%define %%dest %1 +%define %%src1 %2 +%define %%src2 %3 +%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) + vpcmpeqb %%dest, %%src1, %%src2 +%else +%ifnidn %%dest, %%src1 + movdqa %%dest, %%src1 +%endif + pcmpeqb %%dest, %%src2 +%endif +%endm + +%macro PMOVMSKB 2 +%define %%dest %1 +%define %%src %2 +%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) + vpmovmskb %%dest, %%src +%else + pmovmskb %%dest, %%src +%endif +%endm + +%macro PXOR 3 +%define %%dest %1 +%define %%src1 %2 +%define %%src2 %3 +%if ((ARCH == 02) || (ARCH == 03) || (ARCH == 04)) + vpxor %%dest, %%src1, %%src2 +%else +%ifnidn %%dest, %%src1 + movdqa %%dest, %%src1 +%endif + pxor %%dest, %%src2 +%endif +%endm +%endif ;; ifndef STDMAC_ASM |