1 files changed, 345 insertions, 0 deletions
diff --git a/third_party/aom/av1/common/thread_common.h b/third_party/aom/av1/common/thread_common.h
new file mode 100644
index 0000000000..675687dc98
--- /dev/null
+++ b/third_party/aom/av1/common/thread_common.h
@@ -0,0 +1,345 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_AV1_COMMON_THREAD_COMMON_H_
+#define AOM_AV1_COMMON_THREAD_COMMON_H_
+
+#include "config/aom_config.h"
+
+#include "av1/common/av1_loopfilter.h"
+#include "av1/common/cdef.h"
+#include "aom_util/aom_thread.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct AV1Common;
+
+typedef struct AV1LfMTInfo {
+  int mi_row;
+  int plane;
+  int dir;
+  int lpf_opt_level;
+} AV1LfMTInfo;
+
+// Loopfilter row synchronization
+typedef struct AV1LfSyncData {
+#if CONFIG_MULTITHREAD
+  pthread_mutex_t *mutex_[MAX_MB_PLANE];
+  pthread_cond_t *cond_[MAX_MB_PLANE];
+#endif
+  // Allocate memory to store the loop-filtered superblock index in each row.
+  int *cur_sb_col[MAX_MB_PLANE];
+  // The optimal sync_range for different resolution and platform should be
+  // determined by testing. Currently, it is chosen to be a power-of-2 number.
+  int sync_range;
+  int rows;
+
+  // Row-based parallel loopfilter data
+  LFWorkerData *lfdata;
+  int num_workers;
+
+#if CONFIG_MULTITHREAD
+  pthread_mutex_t *job_mutex;
+#endif
+  AV1LfMTInfo *job_queue;
+  int jobs_enqueued;
+  int jobs_dequeued;
+
+  // Initialized to false, set to true by the worker thread that encounters an
+  // error in order to abort the processing of other worker threads.
+  bool lf_mt_exit;
+} AV1LfSync;
+
+typedef struct AV1LrMTInfo {
+  int v_start;
+  int v_end;
+  int lr_unit_row;
+  int plane;
+  int sync_mode;
+  int v_copy_start;
+  int v_copy_end;
+} AV1LrMTInfo;
+
+typedef struct LoopRestorationWorkerData {
+  int32_t *rst_tmpbuf;
+  void *rlbs;
+  void *lr_ctxt;
+  int do_extend_border;
+  struct aom_internal_error_info error_info;
+} LRWorkerData;
+
+// Looprestoration row synchronization
+typedef struct AV1LrSyncData {
+#if CONFIG_MULTITHREAD
+  pthread_mutex_t *mutex_[MAX_MB_PLANE];
+  pthread_cond_t *cond_[MAX_MB_PLANE];
+#endif
+  // Allocate memory to store the loop-restoration block index in each row.
+  int *cur_sb_col[MAX_MB_PLANE];
+  // The optimal sync_range for different resolution and platform should be
+  // determined by testing. Currently, it is chosen to be a power-of-2 number.
+  int sync_range;
+  int rows;
+  int num_planes;
+
+  int num_workers;
+
+#if CONFIG_MULTITHREAD
+  pthread_mutex_t *job_mutex;
+#endif
+  // Row-based parallel loopfilter data
+  LRWorkerData *lrworkerdata;
+
+  AV1LrMTInfo *job_queue;
+  int jobs_enqueued;
+  int jobs_dequeued;
+  // Initialized to false, set to true by the worker thread that encounters
+  // an error in order to abort the processing of other worker threads.
+  bool lr_mt_exit;
+} AV1LrSync;
+
+typedef struct AV1CdefWorker {
+  AV1_COMMON *cm;
+  MACROBLOCKD *xd;
+  uint16_t *colbuf[MAX_MB_PLANE];
+  uint16_t *srcbuf;
+  uint16_t *linebuf[MAX_MB_PLANE];
+  cdef_init_fb_row_t cdef_init_fb_row_fn;
+  int do_extend_border;
+  struct aom_internal_error_info error_info;
+} AV1CdefWorkerData;
+
+typedef struct AV1CdefRowSync {
+#if CONFIG_MULTITHREAD
+  pthread_mutex_t *row_mutex_;
+  pthread_cond_t *row_cond_;
+#endif  // CONFIG_MULTITHREAD
+  int is_row_done;
+} AV1CdefRowSync;
+
+// Data related to CDEF search multi-thread synchronization.
+typedef struct AV1CdefSyncData {
+#if CONFIG_MULTITHREAD
+  // Mutex lock used while dispatching jobs.
+  pthread_mutex_t *mutex_;
+#endif  // CONFIG_MULTITHREAD
+  // Data related to CDEF row mt sync information
+  AV1CdefRowSync *cdef_row_mt;
+  // Flag to indicate all blocks are processed and end of frame is reached
+  int end_of_frame;
+  // Row index in units of 64x64 block
+  int fbr;
+  // Column index in units of 64x64 block
+  int fbc;
+  // Initialized to false, set to true by the worker thread that encounters
+  // an error in order to abort the processing of other worker threads.
+  bool cdef_mt_exit;
+} AV1CdefSync;
+
+void av1_cdef_frame_mt(AV1_COMMON *const cm, MACROBLOCKD *const xd,
+                       AV1CdefWorkerData *const cdef_worker,
+                       AVxWorker *const workers, AV1CdefSync *const cdef_sync,
+                       int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn,
+                       int do_extend_border);
+void av1_cdef_init_fb_row_mt(const AV1_COMMON *const cm,
+                             const MACROBLOCKD *const xd,
+                             CdefBlockInfo *const fb_info,
+                             uint16_t **const linebuf, uint16_t *const src,
+                             struct AV1CdefSyncData *const cdef_sync, int fbr);
+void av1_cdef_copy_sb8_16(const AV1_COMMON *const cm, uint16_t *const dst,
+                          int dstride, const uint8_t *src, int src_voffset,
+                          int src_hoffset, int sstride, int vsize, int hsize);
+void av1_cdef_copy_sb8_16_lowbd(uint16_t *const dst, int dstride,
+                                const uint8_t *src, int src_voffset,
+                                int src_hoffset, int sstride, int vsize,
+                                int hsize);
+void av1_cdef_copy_sb8_16_highbd(uint16_t *const dst, int dstride,
+                                 const uint8_t *src, int src_voffset,
+                                 int src_hoffset, int sstride, int vsize,
+                                 int hsize);
+void av1_alloc_cdef_sync(AV1_COMMON *const cm, AV1CdefSync *cdef_sync,
+                         int num_workers);
+void av1_free_cdef_sync(AV1CdefSync *cdef_sync);
+
+// Deallocate loopfilter synchronization related mutex and data.
+void av1_loop_filter_dealloc(AV1LfSync *lf_sync);
+void av1_loop_filter_alloc(AV1LfSync *lf_sync, AV1_COMMON *cm, int rows,
+                           int width, int num_workers);
+
+void av1_set_vert_loop_filter_done(AV1_COMMON *cm, AV1LfSync *lf_sync,
+                                   int num_mis_in_lpf_unit_height_log2);
+
+void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
+                              struct macroblockd *xd, int plane_start,
+                              int plane_end, int partial_frame,
+                              AVxWorker *workers, int num_workers,
+                              AV1LfSync *lf_sync, int lpf_opt_level);
+
+void av1_loop_restoration_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
+                                          struct AV1Common *cm,
+                                          int optimized_lr, AVxWorker *workers,
+                                          int num_workers, AV1LrSync *lr_sync,
+                                          void *lr_ctxt, int do_extend_border);
+void av1_loop_restoration_dealloc(AV1LrSync *lr_sync);
+void av1_loop_restoration_alloc(AV1LrSync *lr_sync, AV1_COMMON *cm,
+                                int num_workers, int num_rows_lr,
+                                int num_planes, int width);
+int av1_get_intrabc_extra_top_right_sb_delay(const AV1_COMMON *cm);
+
+void av1_thread_loop_filter_rows(
+    const YV12_BUFFER_CONFIG *const frame_buffer, AV1_COMMON *const cm,
+    struct macroblockd_plane *planes, MACROBLOCKD *xd, int mi_row, int plane,
+    int dir, int lpf_opt_level, AV1LfSync *const lf_sync,
+    struct aom_internal_error_info *error_info,
+    AV1_DEBLOCKING_PARAMETERS *params_buf, TX_SIZE *tx_buf, int mib_size_log2);
+
+static AOM_FORCE_INLINE bool skip_loop_filter_plane(
+    const int planes_to_lf[MAX_MB_PLANE], int plane, int lpf_opt_level) {
+  // If LPF_PICK_METHOD is LPF_PICK_FROM_Q, we have the option to filter both
+  // chroma planes together
+  if (lpf_opt_level == 2) {
+    if (plane == AOM_PLANE_Y) {
+      return !planes_to_lf[plane];
+    }
+    if (plane == AOM_PLANE_U) {
+      // U and V are handled together
+      return !planes_to_lf[1] && !planes_to_lf[2];
+    }
+    assert(plane == AOM_PLANE_V);
+    if (plane == AOM_PLANE_V) {
+      // V is handled when u is filtered
+      return true;
+    }
+  }
+
+  // Normal operation mode
+  return !planes_to_lf[plane];
+}
+
+static AOM_INLINE void enqueue_lf_jobs(AV1LfSync *lf_sync, int start, int stop,
+                                       const int planes_to_lf[MAX_MB_PLANE],
+                                       int lpf_opt_level,
+                                       int num_mis_in_lpf_unit_height) {
+  int mi_row, plane, dir;
+  AV1LfMTInfo *lf_job_queue = lf_sync->job_queue;
+  lf_sync->jobs_enqueued = 0;
+  lf_sync->jobs_dequeued = 0;
+
+  // Launch all vertical jobs first, as they are blocking the horizontal ones.
+  // Launch top row jobs for all planes first, in case the output can be
+  // partially reconstructed row by row.
+  for (dir = 0; dir < 2; ++dir) {
+    for (mi_row = start; mi_row < stop; mi_row += num_mis_in_lpf_unit_height) {
+      for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+        if (skip_loop_filter_plane(planes_to_lf, plane, lpf_opt_level)) {
+          continue;
+        }
+        if (!planes_to_lf[plane]) continue;
+        lf_job_queue->mi_row = mi_row;
+        lf_job_queue->plane = plane;
+        lf_job_queue->dir = dir;
+        lf_job_queue->lpf_opt_level = lpf_opt_level;
+        lf_job_queue++;
+        lf_sync->jobs_enqueued++;
+      }
+    }
+  }
+}
+
+static AOM_INLINE void loop_filter_frame_mt_init(
+    AV1_COMMON *cm, int start_mi_row, int end_mi_row,
+    const int planes_to_lf[MAX_MB_PLANE], int num_workers, AV1LfSync *lf_sync,
+    int lpf_opt_level, int num_mis_in_lpf_unit_height_log2) {
+  // Number of superblock rows
+  const int sb_rows =
+      CEIL_POWER_OF_TWO(cm->mi_params.mi_rows, num_mis_in_lpf_unit_height_log2);
+
+  if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
+      num_workers > lf_sync->num_workers) {
+    av1_loop_filter_dealloc(lf_sync);
+    av1_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
+  }
+  lf_sync->lf_mt_exit = false;
+
+  // Initialize cur_sb_col to -1 for all SB rows.
+  for (int i = 0; i < MAX_MB_PLANE; i++) {
+    memset(lf_sync->cur_sb_col[i], -1,
+           sizeof(*(lf_sync->cur_sb_col[i])) * sb_rows);
+  }
+
+  enqueue_lf_jobs(lf_sync, start_mi_row, end_mi_row, planes_to_lf,
+                  lpf_opt_level, (1 << num_mis_in_lpf_unit_height_log2));
+}
+
+static AOM_INLINE AV1LfMTInfo *get_lf_job_info(AV1LfSync *lf_sync) {
+  AV1LfMTInfo *cur_job_info = NULL;
+
+#if CONFIG_MULTITHREAD
+  pthread_mutex_lock(lf_sync->job_mutex);
+
+  if (!lf_sync->lf_mt_exit && lf_sync->jobs_dequeued < lf_sync->jobs_enqueued) {
+    cur_job_info = lf_sync->job_queue + lf_sync->jobs_dequeued;
+    lf_sync->jobs_dequeued++;
+  }
+
+  pthread_mutex_unlock(lf_sync->job_mutex);
+#else
+  (void)lf_sync;
+#endif
+
+  return cur_job_info;
+}
+
+static AOM_INLINE void loop_filter_data_reset(LFWorkerData *lf_data,
+                                              YV12_BUFFER_CONFIG *frame_buffer,
+                                              struct AV1Common *cm,
+                                              MACROBLOCKD *xd) {
+  struct macroblockd_plane *pd = xd->plane;
+  lf_data->frame_buffer = frame_buffer;
+  lf_data->cm = cm;
+  lf_data->xd = xd;
+  for (int i = 0; i < MAX_MB_PLANE; i++) {
+    memcpy(&lf_data->planes[i].dst, &pd[i].dst, sizeof(lf_data->planes[i].dst));
+    lf_data->planes[i].subsampling_x = pd[i].subsampling_x;
+    lf_data->planes[i].subsampling_y = pd[i].subsampling_y;
+  }
+}
+
+static AOM_INLINE void set_planes_to_loop_filter(const struct loopfilter *lf,
+                                                 int planes_to_lf[MAX_MB_PLANE],
+                                                 int plane_start,
+                                                 int plane_end) {
+  // For each luma and chroma plane, whether to filter it or not.
+  planes_to_lf[0] = (lf->filter_level[0] || lf->filter_level[1]) &&
+                    plane_start <= 0 && 0 < plane_end;
+  planes_to_lf[1] = lf->filter_level_u && plane_start <= 1 && 1 < plane_end;
+  planes_to_lf[2] = lf->filter_level_v && plane_start <= 2 && 2 < plane_end;
+}
+
+static AOM_INLINE int check_planes_to_loop_filter(
+    const struct loopfilter *lf, int planes_to_lf[MAX_MB_PLANE],
+    int plane_start, int plane_end) {
+  set_planes_to_loop_filter(lf, planes_to_lf, plane_start, plane_end);
+  // If the luma plane is purposely not filtered, neither are the chroma
+  // planes.
+  if (!planes_to_lf[0] && plane_start <= 0 && 0 < plane_end) return 0;
+  // Early exit.
+  if (!planes_to_lf[0] && !planes_to_lf[1] && !planes_to_lf[2]) return 0;
+  return 1;
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // AOM_AV1_COMMON_THREAD_COMMON_H_