summaryrefslogtreecommitdiffstats
path: root/media/libvpx/libvpx/vpx_dsp/mips/subtract_mmi.c
blob: 8bd7e6977cd07192820922fb4ff3ff7c0fb4bc7e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
/*
 *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "./vpx_dsp_rtcd.h"
#include "vpx/vpx_integer.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/asmdefs_mmi.h"

void vpx_subtract_block_mmi(int rows, int cols, int16_t *diff,
                            ptrdiff_t diff_stride, const uint8_t *src,
                            ptrdiff_t src_stride, const uint8_t *pred,
                            ptrdiff_t pred_stride) {
  double ftmp[13];
  uint32_t tmp[1];

  if (rows == cols) {
    switch (rows) {
      case 4:
        __asm__ volatile(
            "pxor       %[ftmp0],   %[ftmp0],           %[ftmp0]        \n\t"
#if _MIPS_SIM == _ABIO32
            "ulw        %[tmp0],    0x00(%[src])                        \n\t"
            "mtc1       %[tmp0],    %[ftmp1]                            \n\t"
            "ulw        %[tmp0],    0x00(%[pred])                       \n\t"
            "mtc1       %[tmp0],    %[ftmp2]                            \n\t"
#else
            "gslwlc1    %[ftmp1],   0x03(%[src])                        \n\t"
            "gslwrc1    %[ftmp1],   0x00(%[src])                        \n\t"
            "gslwlc1    %[ftmp2],   0x03(%[pred])                       \n\t"
            "gslwrc1    %[ftmp2],   0x00(%[pred])                       \n\t"
#endif
            MMI_ADDU(%[src], %[src], %[src_stride])
            MMI_ADDU(%[pred], %[pred], %[pred_stride])

#if _MIPS_SIM == _ABIO32
            "ulw        %[tmp0],    0x00(%[src])                        \n\t"
            "mtc1       %[tmp0],    %[ftmp3]                            \n\t"
            "ulw        %[tmp0],    0x00(%[pred])                       \n\t"
            "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
#else
            "gslwlc1    %[ftmp3],   0x03(%[src])                        \n\t"
            "gslwrc1    %[ftmp3],   0x00(%[src])                        \n\t"
            "gslwlc1    %[ftmp4],   0x03(%[pred])                       \n\t"
            "gslwrc1    %[ftmp4],   0x00(%[pred])                       \n\t"
#endif
            MMI_ADDU(%[src], %[src], %[src_stride])
            MMI_ADDU(%[pred], %[pred], %[pred_stride])

#if _MIPS_SIM == _ABIO32
            "ulw        %[tmp0],    0x00(%[src])                        \n\t"
            "mtc1       %[tmp0],    %[ftmp5]                            \n\t"
            "ulw        %[tmp0],    0x00(%[pred])                       \n\t"
            "mtc1       %[tmp0],    %[ftmp6]                            \n\t"
#else
            "gslwlc1    %[ftmp5],   0x03(%[src])                        \n\t"
            "gslwrc1    %[ftmp5],   0x00(%[src])                        \n\t"
            "gslwlc1    %[ftmp6],   0x03(%[pred])                       \n\t"
            "gslwrc1    %[ftmp6],   0x00(%[pred])                       \n\t"
#endif
            MMI_ADDU(%[src], %[src], %[src_stride])
            MMI_ADDU(%[pred], %[pred], %[pred_stride])

#if _MIPS_SIM == _ABIO32
            "ulw        %[tmp0],    0x00(%[src])                        \n\t"
            "mtc1       %[tmp0],    %[ftmp7]                            \n\t"
            "ulw        %[tmp0],    0x00(%[pred])                       \n\t"
            "mtc1       %[tmp0],    %[ftmp8]                            \n\t"
#else
            "gslwlc1    %[ftmp7],   0x03(%[src])                        \n\t"
            "gslwrc1    %[ftmp7],   0x00(%[src])                        \n\t"
            "gslwlc1    %[ftmp8],   0x03(%[pred])                       \n\t"
            "gslwrc1    %[ftmp8],   0x00(%[pred])                       \n\t"
#endif
            "punpcklbh  %[ftmp9],   %[ftmp1],           %[ftmp0]        \n\t"
            "punpcklbh  %[ftmp10],  %[ftmp2],           %[ftmp0]        \n\t"
            "psubh      %[ftmp11],  %[ftmp9],           %[ftmp10]       \n\t"
            "gssdlc1    %[ftmp11],  0x07(%[diff])                       \n\t"
            "gssdrc1    %[ftmp11],  0x00(%[diff])                       \n\t"
            MMI_ADDU(%[diff], %[diff], %[diff_stride])
            "punpcklbh  %[ftmp9],   %[ftmp3],           %[ftmp0]        \n\t"
            "punpcklbh  %[ftmp10],  %[ftmp4],           %[ftmp0]        \n\t"
            "psubh      %[ftmp11],  %[ftmp9],           %[ftmp10]       \n\t"
            "gssdlc1    %[ftmp11],  0x07(%[diff])                       \n\t"
            "gssdrc1    %[ftmp11],  0x00(%[diff])                       \n\t"
            MMI_ADDU(%[diff], %[diff], %[diff_stride])
            "punpcklbh  %[ftmp9],   %[ftmp5],           %[ftmp0]        \n\t"
            "punpcklbh  %[ftmp10],  %[ftmp6],           %[ftmp0]        \n\t"
            "psubh      %[ftmp11],  %[ftmp9],           %[ftmp10]       \n\t"
            "gssdlc1    %[ftmp11],  0x07(%[diff])                       \n\t"
            "gssdrc1    %[ftmp11],  0x00(%[diff])                       \n\t"
            MMI_ADDU(%[diff], %[diff], %[diff_stride])
            "punpcklbh  %[ftmp9],   %[ftmp7],           %[ftmp0]        \n\t"
            "punpcklbh  %[ftmp10],  %[ftmp8],           %[ftmp0]        \n\t"
            "psubh      %[ftmp11],  %[ftmp9],           %[ftmp10]       \n\t"
            "gssdlc1    %[ftmp11],  0x07(%[diff])                       \n\t"
            "gssdrc1    %[ftmp11],  0x00(%[diff])                       \n\t"
            : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]),
              [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]),
              [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
              [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]),
              [ftmp8] "=&f"(ftmp[8]), [ftmp9] "=&f"(ftmp[9]),
              [ftmp10] "=&f"(ftmp[10]), [ftmp11] "=&f"(ftmp[11]),
#if _MIPS_SIM == _ABIO32
              [tmp0] "=&r"(tmp[0]),
#endif
              [src] "+&r"(src), [pred] "+&r"(pred), [diff] "+&r"(diff)
            : [src_stride] "r"((mips_reg)src_stride),
              [pred_stride] "r"((mips_reg)pred_stride),
              [diff_stride] "r"((mips_reg)(diff_stride * 2))
            : "memory");
        break;
      case 8:
        __asm__ volatile(
            "pxor       %[ftmp0],   %[ftmp0],           %[ftmp0]        \n\t"
            "li         %[tmp0],    0x02                                \n\t"
            "1:                                                         \n\t"
            "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t"
            "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t"
            "gsldlc1    %[ftmp2],   0x07(%[pred])                       \n\t"
            "gsldrc1    %[ftmp2],   0x00(%[pred])                       \n\t"
            MMI_ADDU(%[src], %[src], %[src_stride])
            MMI_ADDU(%[pred], %[pred], %[pred_stride])
            "gsldlc1    %[ftmp3],   0x07(%[src])                        \n\t"
            "gsldrc1    %[ftmp3],   0x00(%[src])                        \n\t"
            "gsldlc1    %[ftmp4],   0x07(%[pred])                       \n\t"
            "gsldrc1    %[ftmp4],   0x00(%[pred])                       \n\t"
            MMI_ADDU(%[src], %[src], %[src_stride])
            MMI_ADDU(%[pred], %[pred], %[pred_stride])
            "gsldlc1    %[ftmp5],   0x07(%[src])                        \n\t"
            "gsldrc1    %[ftmp5],   0x00(%[src])                        \n\t"
            "gsldlc1    %[ftmp6],   0x07(%[pred])                       \n\t"
            "gsldrc1    %[ftmp6],   0x00(%[pred])                       \n\t"
            MMI_ADDU(%[src], %[src], %[src_stride])
            MMI_ADDU(%[pred], %[pred], %[pred_stride])
            "gsldlc1    %[ftmp7],   0x07(%[src])                        \n\t"
            "gsldrc1    %[ftmp7],   0x00(%[src])                        \n\t"
            "gsldlc1    %[ftmp8],   0x07(%[pred])                       \n\t"
            "gsldrc1    %[ftmp8],   0x00(%[pred])                       \n\t"
            MMI_ADDU(%[src], %[src], %[src_stride])
            MMI_ADDU(%[pred], %[pred], %[pred_stride])
            "punpcklbh  %[ftmp9],   %[ftmp1],           %[ftmp0]        \n\t"
            "punpckhbh  %[ftmp10],  %[ftmp1],           %[ftmp0]        \n\t"
            "punpcklbh  %[ftmp11],  %[ftmp2],           %[ftmp0]        \n\t"
            "punpckhbh  %[ftmp12],  %[ftmp2],           %[ftmp0]        \n\t"
            "psubsh     %[ftmp9],   %[ftmp9],           %[ftmp11]       \n\t"
            "psubsh     %[ftmp10],  %[ftmp10],          %[ftmp12]       \n\t"
            "gssdlc1    %[ftmp9],   0x07(%[diff])                       \n\t"
            "gssdrc1    %[ftmp9],   0x00(%[diff])                       \n\t"
            "gssdlc1    %[ftmp10],  0x0f(%[diff])                       \n\t"
            "gssdrc1    %[ftmp10],  0x08(%[diff])                       \n\t"
            MMI_ADDU(%[diff], %[diff], %[diff_stride])
            "punpcklbh  %[ftmp9],   %[ftmp3],           %[ftmp0]        \n\t"
            "punpckhbh  %[ftmp10],  %[ftmp3],           %[ftmp0]        \n\t"
            "punpcklbh  %[ftmp11],  %[ftmp4],           %[ftmp0]        \n\t"
            "punpckhbh  %[ftmp12],  %[ftmp4],           %[ftmp0]        \n\t"
            "psubsh     %[ftmp9],   %[ftmp9],           %[ftmp11]       \n\t"
            "psubsh     %[ftmp10],  %[ftmp10],          %[ftmp12]       \n\t"
            "gssdlc1    %[ftmp9],   0x07(%[diff])                       \n\t"
            "gssdrc1    %[ftmp9],   0x00(%[diff])                       \n\t"
            "gssdlc1    %[ftmp10],  0x0f(%[diff])                       \n\t"
            "gssdrc1    %[ftmp10],  0x08(%[diff])                       \n\t"
            MMI_ADDU(%[diff], %[diff], %[diff_stride])
            "punpcklbh  %[ftmp9],   %[ftmp5],           %[ftmp0]        \n\t"
            "punpckhbh  %[ftmp10],  %[ftmp5],           %[ftmp0]        \n\t"
            "punpcklbh  %[ftmp11],  %[ftmp6],           %[ftmp0]        \n\t"
            "punpckhbh  %[ftmp12],  %[ftmp6],           %[ftmp0]        \n\t"
            "psubsh     %[ftmp9],   %[ftmp9],           %[ftmp11]       \n\t"
            "psubsh     %[ftmp10],  %[ftmp10],          %[ftmp12]       \n\t"
            "gssdlc1    %[ftmp9],   0x07(%[diff])                       \n\t"
            "gssdrc1    %[ftmp9],   0x00(%[diff])                       \n\t"
            "gssdlc1    %[ftmp10],  0x0f(%[diff])                       \n\t"
            "gssdrc1    %[ftmp10],  0x08(%[diff])                       \n\t"
            MMI_ADDU(%[diff], %[diff], %[diff_stride])
            "punpcklbh  %[ftmp9],   %[ftmp7],           %[ftmp0]        \n\t"
            "punpckhbh  %[ftmp10],  %[ftmp7],           %[ftmp0]        \n\t"
            "punpcklbh  %[ftmp11],  %[ftmp8],           %[ftmp0]        \n\t"
            "punpckhbh  %[ftmp12],  %[ftmp8],           %[ftmp0]        \n\t"
            "psubsh     %[ftmp9],   %[ftmp9],           %[ftmp11]       \n\t"
            "psubsh     %[ftmp10],  %[ftmp10],          %[ftmp12]       \n\t"
            "gssdlc1    %[ftmp9],   0x07(%[diff])                       \n\t"
            "gssdrc1    %[ftmp9],   0x00(%[diff])                       \n\t"
            "gssdlc1    %[ftmp10],  0x0f(%[diff])                       \n\t"
            "gssdrc1    %[ftmp10],  0x08(%[diff])                       \n\t"
            MMI_ADDU(%[diff], %[diff], %[diff_stride])
            "addiu      %[tmp0],    %[tmp0],            -0x01           \n\t"
            "bnez       %[tmp0],    1b                                  \n\t"
            : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]),
              [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]),
              [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
              [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]),
              [ftmp8] "=&f"(ftmp[8]), [ftmp9] "=&f"(ftmp[9]),
              [ftmp10] "=&f"(ftmp[10]), [ftmp11] "=&f"(ftmp[11]),
              [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]), [src] "+&r"(src),
              [pred] "+&r"(pred), [diff] "+&r"(diff)
            : [pred_stride] "r"((mips_reg)pred_stride),
              [src_stride] "r"((mips_reg)src_stride),
              [diff_stride] "r"((mips_reg)(diff_stride * 2))
            : "memory");
        break;
      case 16:
        __asm__ volatile(
            "pxor       %[ftmp0],   %[ftmp0],           %[ftmp0]        \n\t"
            "li         %[tmp0],    0x08                                \n\t"
            "1:                                                         \n\t"
            "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t"
            "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t"
            "gsldlc1    %[ftmp2],   0x07(%[pred])                       \n\t"
            "gsldrc1    %[ftmp2],   0x00(%[pred])                       \n\t"
            "gsldlc1    %[ftmp3],   0x0f(%[src])                        \n\t"
            "gsldrc1    %[ftmp3],   0x08(%[src])                        \n\t"
            "gsldlc1    %[ftmp4],   0x0f(%[pred])                       \n\t"
            "gsldrc1    %[ftmp4],   0x08(%[pred])                       \n\t"
            MMI_ADDU(%[src], %[src], %[src_stride])
            MMI_ADDU(%[pred], %[pred], %[pred_stride])
            "gsldlc1    %[ftmp5],   0x07(%[src])                        \n\t"
            "gsldrc1    %[ftmp5],   0x00(%[src])                        \n\t"
            "gsldlc1    %[ftmp6],   0x07(%[pred])                       \n\t"
            "gsldrc1    %[ftmp6],   0x00(%[pred])                       \n\t"
            "gsldlc1    %[ftmp7],   0x0f(%[src])                        \n\t"
            "gsldrc1    %[ftmp7],   0x08(%[src])                        \n\t"
            "gsldlc1    %[ftmp8],   0x0f(%[pred])                       \n\t"
            "gsldrc1    %[ftmp8],   0x08(%[pred])                       \n\t"
            MMI_ADDU(%[src], %[src], %[src_stride])
            MMI_ADDU(%[pred], %[pred], %[pred_stride])
            "punpcklbh  %[ftmp9],   %[ftmp1],           %[ftmp0]        \n\t"
            "punpckhbh  %[ftmp10],  %[ftmp1],           %[ftmp0]        \n\t"
            "punpcklbh  %[ftmp11],  %[ftmp2],           %[ftmp0]        \n\t"
            "punpckhbh  %[ftmp12],  %[ftmp2],           %[ftmp0]        \n\t"
            "psubsh     %[ftmp9],   %[ftmp9],           %[ftmp11]       \n\t"
            "psubsh     %[ftmp10],  %[ftmp10],          %[ftmp12]       \n\t"
            "gssdlc1    %[ftmp9],   0x07(%[diff])                       \n\t"
            "gssdrc1    %[ftmp9],   0x00(%[diff])                       \n\t"
            "gssdlc1    %[ftmp10],  0x0f(%[diff])                       \n\t"
            "gssdrc1    %[ftmp10],  0x08(%[diff])                       \n\t"
            "punpcklbh  %[ftmp9],   %[ftmp3],           %[ftmp0]        \n\t"
            "punpckhbh  %[ftmp10],  %[ftmp3],           %[ftmp0]        \n\t"
            "punpcklbh  %[ftmp11],  %[ftmp4],           %[ftmp0]        \n\t"
            "punpckhbh  %[ftmp12],  %[ftmp4],           %[ftmp0]        \n\t"
            "psubsh     %[ftmp9],   %[ftmp9],           %[ftmp11]       \n\t"
            "psubsh     %[ftmp10],  %[ftmp10],          %[ftmp12]       \n\t"
            "gssdlc1    %[ftmp9],   0x17(%[diff])                       \n\t"
            "gssdrc1    %[ftmp9],   0x10(%[diff])                       \n\t"
            "gssdlc1    %[ftmp10],  0x1f(%[diff])                       \n\t"
            "gssdrc1    %[ftmp10],  0x18(%[diff])                       \n\t"
            MMI_ADDU(%[diff], %[diff], %[diff_stride])
            "punpcklbh  %[ftmp9],   %[ftmp5],           %[ftmp0]        \n\t"
            "punpckhbh  %[ftmp10],  %[ftmp5],           %[ftmp0]        \n\t"
            "punpcklbh  %[ftmp11],  %[ftmp6],           %[ftmp0]        \n\t"
            "punpckhbh  %[ftmp12],  %[ftmp6],           %[ftmp0]        \n\t"
            "psubsh     %[ftmp9],   %[ftmp9],           %[ftmp11]       \n\t"
            "psubsh     %[ftmp10],  %[ftmp10],          %[ftmp12]       \n\t"
            "gssdlc1    %[ftmp9],   0x07(%[diff])                       \n\t"
            "gssdrc1    %[ftmp9],   0x00(%[diff])                       \n\t"
            "gssdlc1    %[ftmp10],  0x0f(%[diff])                       \n\t"
            "gssdrc1    %[ftmp10],  0x08(%[diff])                       \n\t"
            "punpcklbh  %[ftmp9],   %[ftmp7],           %[ftmp0]        \n\t"
            "punpckhbh  %[ftmp10],  %[ftmp7],           %[ftmp0]        \n\t"
            "punpcklbh  %[ftmp11],  %[ftmp8],           %[ftmp0]        \n\t"
            "punpckhbh  %[ftmp12],  %[ftmp8],           %[ftmp0]        \n\t"
            "psubsh     %[ftmp9],   %[ftmp9],           %[ftmp11]       \n\t"
            "psubsh     %[ftmp10],  %[ftmp10],          %[ftmp12]       \n\t"
            "gssdlc1    %[ftmp9],   0x17(%[diff])                       \n\t"
            "gssdrc1    %[ftmp9],   0x10(%[diff])                       \n\t"
            "gssdlc1    %[ftmp10],  0x1f(%[diff])                       \n\t"
            "gssdrc1    %[ftmp10],  0x18(%[diff])                       \n\t"
            MMI_ADDU(%[diff], %[diff], %[diff_stride])
            "addiu      %[tmp0],    %[tmp0],            -0x01           \n\t"
            "bnez       %[tmp0],    1b                                  \n\t"
            : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]),
              [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]),
              [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
              [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]),
              [ftmp8] "=&f"(ftmp[8]), [ftmp9] "=&f"(ftmp[9]),
              [ftmp10] "=&f"(ftmp[10]), [ftmp11] "=&f"(ftmp[11]),
              [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]), [src] "+&r"(src),
              [pred] "+&r"(pred), [diff] "+&r"(diff)
            : [pred_stride] "r"((mips_reg)pred_stride),
              [src_stride] "r"((mips_reg)src_stride),
              [diff_stride] "r"((mips_reg)(diff_stride * 2))
            : "memory");
        break;
      case 32:
        vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride,
                             pred, pred_stride);
        break;
      case 64:
        vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride,
                             pred, pred_stride);
        break;
      default:
        vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride,
                             pred, pred_stride);
        break;
    }
  } else {
    vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride, pred,
                         pred_stride);
  }
}