third_party/aom/av1/encoder/encodemv.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345

/*
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */

#include <math.h>

#include "av1/common/common.h"
#include "av1/common/entropymode.h"

#include "av1/encoder/cost.h"
#include "av1/encoder/encodemv.h"

#include "aom_dsp/aom_dsp_common.h"
#include "aom_ports/bitops.h"

static void update_mv_component_stats(int comp, nmv_component *mvcomp,
                                      MvSubpelPrecision precision) {
  assert(comp != 0);
  int offset;
  const int sign = comp < 0;
  const int mag = sign ? -comp : comp;
  const int mv_class = av1_get_mv_class(mag - 1, &offset);
  const int d = offset >> 3;         // int mv data
  const int fr = (offset >> 1) & 3;  // fractional mv data
  const int hp = offset & 1;         // high precision mv data

  // Sign
  update_cdf(mvcomp->sign_cdf, sign, 2);

  // Class
  update_cdf(mvcomp->classes_cdf, mv_class, MV_CLASSES);

  // Integer bits
  if (mv_class == MV_CLASS_0) {
    update_cdf(mvcomp->class0_cdf, d, CLASS0_SIZE);
  } else {
    const int n = mv_class + CLASS0_BITS - 1;  // number of bits
    for (int i = 0; i < n; ++i)
      update_cdf(mvcomp->bits_cdf[i], (d >> i) & 1, 2);
  }
  // Fractional bits
  if (precision > MV_SUBPEL_NONE) {
    aom_cdf_prob *fp_cdf =
        mv_class == MV_CLASS_0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf;
    update_cdf(fp_cdf, fr, MV_FP_SIZE);
  }

  // High precision bit
  if (precision > MV_SUBPEL_LOW_PRECISION) {
    aom_cdf_prob *hp_cdf =
        mv_class == MV_CLASS_0 ? mvcomp->class0_hp_cdf : mvcomp->hp_cdf;
    update_cdf(hp_cdf, hp, 2);
  }
}

void av1_update_mv_stats(const MV *mv, const MV *ref, nmv_context *mvctx,
                         MvSubpelPrecision precision) {
  const MV diff = { mv->row - ref->row, mv->col - ref->col };
  const MV_JOINT_TYPE j = av1_get_mv_joint(&diff);

  update_cdf(mvctx->joints_cdf, j, MV_JOINTS);

  if (mv_joint_vertical(j))
    update_mv_component_stats(diff.row, &mvctx->comps[0], precision);

  if (mv_joint_horizontal(j))
    update_mv_component_stats(diff.col, &mvctx->comps[1], precision);
}

static void encode_mv_component(aom_writer *w, int comp, nmv_component *mvcomp,
                                MvSubpelPrecision precision) {
  assert(comp != 0);
  int offset;
  const int sign = comp < 0;
  const int mag = sign ? -comp : comp;
  const int mv_class = av1_get_mv_class(mag - 1, &offset);
  const int d = offset >> 3;         // int mv data
  const int fr = (offset >> 1) & 3;  // fractional mv data
  const int hp = offset & 1;         // high precision mv data

  // Sign
  aom_write_symbol(w, sign, mvcomp->sign_cdf, 2);

  // Class
  aom_write_symbol(w, mv_class, mvcomp->classes_cdf, MV_CLASSES);

  // Integer bits
  if (mv_class == MV_CLASS_0) {
    aom_write_symbol(w, d, mvcomp->class0_cdf, CLASS0_SIZE);
  } else {
    int i;
    const int n = mv_class + CLASS0_BITS - 1;  // number of bits
    for (i = 0; i < n; ++i)
      aom_write_symbol(w, (d >> i) & 1, mvcomp->bits_cdf[i], 2);
  }
  // Fractional bits
  if (precision > MV_SUBPEL_NONE) {
    aom_write_symbol(
        w, fr,
        mv_class == MV_CLASS_0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf,
        MV_FP_SIZE);
  }

  // High precision bit
  if (precision > MV_SUBPEL_LOW_PRECISION)
    aom_write_symbol(
        w, hp, mv_class == MV_CLASS_0 ? mvcomp->class0_hp_cdf : mvcomp->hp_cdf,
        2);
}

/* TODO(siekyleb@amazon.com): This function writes MV_VALS ints or 128 KiB. This
 *   is more than most L1D caches and is a significant chunk of L2. Write
 *   SIMD that uses streaming writes to avoid loading all of that into L1, or
 *   just don't update the larger component costs every time this called
 *   (or both).
 */
void av1_build_nmv_component_cost_table(int *mvcost,
                                        const nmv_component *const mvcomp,
                                        MvSubpelPrecision precision) {
  int i, j, v, o, mantissa;
  int sign_cost[2], class_cost[MV_CLASSES], class0_cost[CLASS0_SIZE];
  int bits_cost[MV_OFFSET_BITS][2];
  int class0_fp_cost[CLASS0_SIZE][MV_FP_SIZE] = { 0 },
      fp_cost[MV_FP_SIZE] = { 0 };
  int class0_hp_cost[2] = { 0 }, hp_cost[2] = { 0 };

  av1_cost_tokens_from_cdf(sign_cost, mvcomp->sign_cdf, NULL);
  av1_cost_tokens_from_cdf(class_cost, mvcomp->classes_cdf, NULL);
  av1_cost_tokens_from_cdf(class0_cost, mvcomp->class0_cdf, NULL);
  for (i = 0; i < MV_OFFSET_BITS; ++i) {
    av1_cost_tokens_from_cdf(bits_cost[i], mvcomp->bits_cdf[i], NULL);
  }

  if (precision > MV_SUBPEL_NONE) {
    for (i = 0; i < CLASS0_SIZE; ++i)
      av1_cost_tokens_from_cdf(class0_fp_cost[i], mvcomp->class0_fp_cdf[i],
                               NULL);
    av1_cost_tokens_from_cdf(fp_cost, mvcomp->fp_cdf, NULL);
  }

  if (precision > MV_SUBPEL_LOW_PRECISION) {
    av1_cost_tokens_from_cdf(class0_hp_cost, mvcomp->class0_hp_cdf, NULL);
    av1_cost_tokens_from_cdf(hp_cost, mvcomp->hp_cdf, NULL);
  }

  // Instead of accumulating the cost of each vector component's bits
  //   individually, compute the costs based on smaller vectors. Costs for
  //   [2^exp, 2 * 2^exp - 1] are calculated based on [0, 2^exp - 1]
  //   respectively. Offsets are maintained to swap both 1) class costs when
  //   treated as a complete vector component with the highest set bit when
  //   treated as a mantissa (significand) and 2) leading zeros to account for
  //   the current exponent.

  // Cost offsets
  int cost_swap[MV_OFFSET_BITS] = { 0 };
  // Delta to convert positive vector to negative vector costs
  int negate_sign = sign_cost[1] - sign_cost[0];

  // Initialize with offsets to swap the class costs with the costs of the
  //   highest set bit.
  for (i = 1; i < MV_OFFSET_BITS; ++i) {
    cost_swap[i] = bits_cost[i - 1][1];
    if (i > CLASS0_BITS) cost_swap[i] -= class_cost[i - CLASS0_BITS];
  }

  // Seed the fractional costs onto the output (overwritten latter).
  for (o = 0; o < MV_FP_SIZE; ++o) {
    int hp;
    for (hp = 0; hp < 2; ++hp) {
      v = 2 * o + hp + 1;
      mvcost[v] = fp_cost[o] + hp_cost[hp] + sign_cost[0];
    }
  }

  mvcost[0] = 0;
  // Fill the costs for each exponent's vectors, using the costs set in the
  //   previous exponents.
  for (i = 0; i < MV_OFFSET_BITS; ++i) {
    const int exponent = (2 * MV_FP_SIZE) << i;

    int class = 0;
    if (i >= CLASS0_BITS) {
      class = class_cost[i - CLASS0_BITS + 1];
    }

    // Iterate through mantissas, keeping track of the location
    //   of the highest set bit for the mantissa.
    // To be clear: in the outer loop, the position of the highest set bit
    //   (exponent) is tracked and, in this loop, the highest set bit of the
    //   mantissa is tracked.
    mantissa = 0;
    for (j = 0; j <= i; ++j) {
      for (; mantissa < (2 * MV_FP_SIZE) << j; ++mantissa) {
        int cost = mvcost[mantissa + 1] + class + cost_swap[j];
        v = exponent + mantissa + 1;
        mvcost[v] = cost;
        mvcost[-v] = cost + negate_sign;
      }
      cost_swap[j] += bits_cost[i][0];
    }
  }

  // Special case to avoid buffer overrun
  {
    int exponent = (2 * MV_FP_SIZE) << MV_OFFSET_BITS;
    int class = class_cost[MV_CLASSES - 1];
    mantissa = 0;
    for (j = 0; j < MV_OFFSET_BITS; ++j) {
      for (; mantissa < (2 * MV_FP_SIZE) << j; ++mantissa) {
        int cost = mvcost[mantissa + 1] + class + cost_swap[j];
        v = exponent + mantissa + 1;
        mvcost[v] = cost;
        mvcost[-v] = cost + negate_sign;
      }
    }
    // At this point: mantissa = exponent >> 1

    // Manually calculate the final cost offset
    int cost_swap_hi =
        bits_cost[MV_OFFSET_BITS - 1][1] - class_cost[MV_CLASSES - 2];
    for (; mantissa < exponent - 1; ++mantissa) {
      int cost = mvcost[mantissa + 1] + class + cost_swap_hi;
      v = exponent + mantissa + 1;
      mvcost[v] = cost;
      mvcost[-v] = cost + negate_sign;
    }
  }

  // Fill costs for class0 vectors, overwriting previous placeholder values
  //   used for calculating the costs of the larger vectors.
  for (i = 0; i < CLASS0_SIZE; ++i) {
    const int top = i * 2 * MV_FP_SIZE;
    for (o = 0; o < MV_FP_SIZE; ++o) {
      int hp;
      int cost = class0_fp_cost[i][o] + class_cost[0] + class0_cost[i];
      for (hp = 0; hp < 2; ++hp) {
        v = top + 2 * o + hp + 1;
        mvcost[v] = cost + class0_hp_cost[hp] + sign_cost[0];
        mvcost[-v] = cost + class0_hp_cost[hp] + sign_cost[1];
      }
    }
  }
}

void av1_encode_mv(AV1_COMP *cpi, aom_writer *w, ThreadData *td, const MV *mv,
                   const MV *ref, nmv_context *mvctx, int usehp) {
  const MV diff = { mv->row - ref->row, mv->col - ref->col };
  const MV_JOINT_TYPE j = av1_get_mv_joint(&diff);
  // If the mv_diff is zero, then we should have used near or nearest instead.
  assert(j != MV_JOINT_ZERO);
  if (cpi->common.features.cur_frame_force_integer_mv) {
    usehp = MV_SUBPEL_NONE;
  }
  aom_write_symbol(w, j, mvctx->joints_cdf, MV_JOINTS);
  if (mv_joint_vertical(j))
    encode_mv_component(w, diff.row, &mvctx->comps[0], usehp);

  if (mv_joint_horizontal(j))
    encode_mv_component(w, diff.col, &mvctx->comps[1], usehp);

  // If auto_mv_step_size is enabled then keep track of the largest
  // motion vector component used.
  if (cpi->sf.mv_sf.auto_mv_step_size) {
    int maxv = AOMMAX(abs(mv->row), abs(mv->col)) >> 3;
    td->max_mv_magnitude = AOMMAX(maxv, td->max_mv_magnitude);
  }
}

void av1_encode_dv(aom_writer *w, const MV *mv, const MV *ref,
                   nmv_context *mvctx) {
  // DV and ref DV should not have sub-pel.
  assert((mv->col & 7) == 0);
  assert((mv->row & 7) == 0);
  assert((ref->col & 7) == 0);
  assert((ref->row & 7) == 0);
  const MV diff = { mv->row - ref->row, mv->col - ref->col };
  const MV_JOINT_TYPE j = av1_get_mv_joint(&diff);

  aom_write_symbol(w, j, mvctx->joints_cdf, MV_JOINTS);
  if (mv_joint_vertical(j))
    encode_mv_component(w, diff.row, &mvctx->comps[0], MV_SUBPEL_NONE);

  if (mv_joint_horizontal(j))
    encode_mv_component(w, diff.col, &mvctx->comps[1], MV_SUBPEL_NONE);
}

void av1_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
                              const nmv_context *ctx,
                              MvSubpelPrecision precision) {
  av1_cost_tokens_from_cdf(mvjoint, ctx->joints_cdf, NULL);
  av1_build_nmv_component_cost_table(mvcost[0], &ctx->comps[0], precision);
  av1_build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], precision);
}

int_mv av1_get_ref_mv_from_stack(int ref_idx,
                                 const MV_REFERENCE_FRAME *ref_frame,
                                 int ref_mv_idx,
                                 const MB_MODE_INFO_EXT *mbmi_ext) {
  const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
  const CANDIDATE_MV *curr_ref_mv_stack =
      mbmi_ext->ref_mv_stack[ref_frame_type];

  if (ref_frame[1] > INTRA_FRAME) {
    assert(ref_idx == 0 || ref_idx == 1);
    return ref_idx ? curr_ref_mv_stack[ref_mv_idx].comp_mv
                   : curr_ref_mv_stack[ref_mv_idx].this_mv;
  }

  assert(ref_idx == 0);
  return ref_mv_idx < mbmi_ext->ref_mv_count[ref_frame_type]
             ? curr_ref_mv_stack[ref_mv_idx].this_mv
             : mbmi_ext->global_mvs[ref_frame_type];
}

int_mv av1_get_ref_mv(const MACROBLOCK *x, int ref_idx) {
  const MACROBLOCKD *xd = &x->e_mbd;
  const MB_MODE_INFO *mbmi = xd->mi[0];
  int ref_mv_idx = mbmi->ref_mv_idx;
  if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEW_NEARMV) {
    assert(has_second_ref(mbmi));
    ref_mv_idx += 1;
  }
  return av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame, ref_mv_idx,
                                   &x->mbmi_ext);
}

void av1_find_best_ref_mvs_from_stack(int allow_hp,
                                      const MB_MODE_INFO_EXT *mbmi_ext,
                                      MV_REFERENCE_FRAME ref_frame,
                                      int_mv *nearest_mv, int_mv *near_mv,
                                      int is_integer) {
  const int ref_idx = 0;
  MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME };
  *nearest_mv = av1_get_ref_mv_from_stack(ref_idx, ref_frames, 0, mbmi_ext);
  lower_mv_precision(&nearest_mv->as_mv, allow_hp, is_integer);
  *near_mv = av1_get_ref_mv_from_stack(ref_idx, ref_frames, 1, mbmi_ext);
  lower_mv_precision(&near_mv->as_mv, allow_hp, is_integer);
}