1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
|
;;
;; Copyright (c) 2018, Intel Corporation
;;
;; Redistribution and use in source and binary forms, with or without
;; modification, are permitted provided that the following conditions are met:
;;
;; * Redistributions of source code must retain the above copyright notice,
;; this list of conditions and the following disclaimer.
;; * Redistributions in binary form must reproduce the above copyright
;; notice, this list of conditions and the following disclaimer in the
;; documentation and/or other materials provided with the distribution.
;; * Neither the name of Intel Corporation nor the names of its contributors
;; may be used to endorse or promote products derived from this software
;; without specific prior written permission.
;;
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;
%ifndef _CONST_INC_
%define _CONST_INC_
;;; Tables used to insert word into a SIMD register
extern len_shift_tab
extern len_mask_tab
extern shift_tab_16
;;; Table to do 0x80 byte shift for padding prefix
extern padding_0x80_tab16
;;; Size of len_shift_tab defined in const.asm module
%define len_tab_diff 128
; PINSRW_COMMON insert word into 128 bit SIMD register
%macro PINSRW_COMMON 7
%define %%type %1 ; instruction type - sse or avx
%define %%dest %2 ; dest XMM reg to insert word
%define %%tmp_simd %3 ; XMM reg to clobber
%define %%tmp_gp %4 ; GP reg to clobber
%define %%idx %5 ; word index to insert value into XMM
%define %%val %6 ; word value to insert into idx
%define %%scale_idx %7 ; flag to set if index is to be scaled x16
%ifidn %%scale_idx, scale_x16
shl %%idx, 4 ; scale idx up x16
%endif
%ifnum %%val
;; immediate value passed on
mov DWORD(%%tmp_gp), %%val
%ifidn %%type, sse
movd %%tmp_simd, DWORD(%%tmp_gp)
%else
vmovd %%tmp_simd, DWORD(%%tmp_gp)
%endif
%else
;; register name passed on
%ifidn %%type, sse
movd %%tmp_simd, DWORD(%%val)
%else
vmovd %%tmp_simd, DWORD(%%val)
%endif
%endif
lea %%tmp_gp, [rel len_shift_tab]
;; check type - SSE or AVX
%ifidn %%type, sse
pshufb %%tmp_simd, [%%tmp_gp + %%idx]
pand %%dest, [%%tmp_gp + len_tab_diff + %%idx]
por %%dest, %%tmp_simd
%else
vpshufb %%tmp_simd, [%%tmp_gp + %%idx]
vpand %%dest, [%%tmp_gp + len_tab_diff + %%idx]
vpor %%dest, %%tmp_simd
%endif
%ifidn %%scale_idx, scale_x16
shr %%idx, 4 ; reset idx
%endif
%endmacro
;;; Call SSE macro
%define XPINSRW PINSRW_COMMON sse,
;;; Call AVX macro
%define XVPINSRW PINSRW_COMMON avx,
;;; VPINSRW_M256 insert word into 32 byte memory range
%macro VPINSRW_M256 8
%define %%mem_addr %1 ; 16 byte aligned memory address to insert word
%define %%tmp_simd1 %2 ; XMM reg to clobber
%define %%tmp_simd2 %3 ; XMM reg to clobber
%define %%tmp_gp %4 ; GP reg to clobber
%define %%offset %5 ; GP reg used to store offset
%define %%idx %6 ; word index to insert value
%define %%val %7 ; word value to insert into idx
%define %%scale_idx %8 ; flag to set if index is to be scaled x16
mov %%offset, %%idx
and %%offset, 0x8 ; set offset 0 or 8
and %%idx, 0x7 ; remove offset from idx
vmovdqa %%tmp_simd1, [%%mem_addr + %%offset*2]
XVPINSRW %%tmp_simd1, %%tmp_simd2, %%tmp_gp, %%idx, %%val, %%scale_idx
vmovdqa [%%mem_addr + %%offset*2], %%tmp_simd1
or %%idx, %%offset ; reset offset
%endmacro
;;; PSLB_COMMON shift bytes 128 bit SIMD register
%macro PSLB_COMMON 6
%define %%type %1 ; [in] instruction type - sse or avx
%define %%dir %2 ; [in] shift direction - left or right
%define %%reg %3 ; [in/out] XMM reg to shift bytes
%define %%num %4 ; [in] GP reg containing number of bytes to shift
%define %%shuf_tab %5 ; [out] XMM reg to store shuffle table
%define %%tmp_gp %6 ; [clobbered] GP reg to clobber
;; load shift table into %%shuf_tab
lea %%tmp_gp, [rel shift_tab_16 + 16]
%ifidn %%dir, left
sub %%tmp_gp, %%num
%else
add %%tmp_gp, %%num
%endif
%ifidn %%type, sse
movdqu %%shuf_tab, [%%tmp_gp]
pshufb %%reg, %%shuf_tab
%else
vmovdqu %%shuf_tab, [%%tmp_gp]
vpshufb %%reg, %%shuf_tab
%endif
%endmacro
;;; Call SSE left shift macro
%macro XPSLLB 4
PSLB_COMMON sse, left, %1,%2,%3,%4
%endm
;;; Call SSE right shift macro
%macro XPSRLB 4
PSLB_COMMON sse, right, %1,%2,%3,%4
%endm
;;; Call AVX left shift macro
%macro XVPSLLB 4
PSLB_COMMON avx, left, %1,%2,%3,%4
%endm
;;; Call AVX right shift macro
%macro XVPSRLB 4
PSLB_COMMON avx, right, %1,%2,%3,%4
%endm
%endif ; end ifndef _CONST_INC_
|