summaryrefslogtreecommitdiffstats
path: root/drivers/media/platform/mediatek/vcodec/decoder/vdec
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/media/platform/mediatek/vcodec/decoder/vdec')
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_av1_req_lat_if.c2209
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_if.c494
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_common.c322
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_common.h277
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_if.c444
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_multi_if.c848
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c1092
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c608
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_req_if.c433
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c1017
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c2216
11 files changed, 9960 insertions, 0 deletions
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_av1_req_lat_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_av1_req_lat_if.c
new file mode 100644
index 000000000..2b6a5adbc
--- /dev/null
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_av1_req_lat_if.c
@@ -0,0 +1,2209 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2023 MediaTek Inc.
+ * Author: Xiaoyong Lu <xiaoyong.lu@mediatek.com>
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <media/videobuf2-dma-contig.h>
+
+#include "../mtk_vcodec_dec.h"
+#include "../../common/mtk_vcodec_intr.h"
+#include "../vdec_drv_base.h"
+#include "../vdec_drv_if.h"
+#include "../vdec_vpu_if.h"
+
+#define AV1_MAX_FRAME_BUF_COUNT (V4L2_AV1_TOTAL_REFS_PER_FRAME + 1)
+#define AV1_TILE_BUF_SIZE 64
+#define AV1_SCALE_SUBPEL_BITS 10
+#define AV1_REF_SCALE_SHIFT 14
+#define AV1_REF_NO_SCALE BIT(AV1_REF_SCALE_SHIFT)
+#define AV1_REF_INVALID_SCALE -1
+#define AV1_CDF_TABLE_BUFFER_SIZE 16384
+#define AV1_PRIMARY_REF_NONE 7
+
+#define AV1_INVALID_IDX -1
+
+#define AV1_DIV_ROUND_UP_POW2(value, n) \
+({ \
+ typeof(n) _n = n; \
+ typeof(value) _value = value; \
+ (_value + (BIT(_n) >> 1)) >> _n; \
+})
+
+#define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n) \
+({ \
+ typeof(n) _n_ = n; \
+ typeof(value) _value_ = value; \
+ (((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_)) \
+ : AV1_DIV_ROUND_UP_POW2((_value_), (_n_))); \
+})
+
+#define BIT_FLAG(x, bit) (!!((x)->flags & (bit)))
+#define SEGMENTATION_FLAG(x, name) (!!((x)->flags & V4L2_AV1_SEGMENTATION_FLAG_##name))
+#define QUANT_FLAG(x, name) (!!((x)->flags & V4L2_AV1_QUANTIZATION_FLAG_##name))
+#define SEQUENCE_FLAG(x, name) (!!((x)->flags & V4L2_AV1_SEQUENCE_FLAG_##name))
+#define FH_FLAG(x, name) (!!((x)->flags & V4L2_AV1_FRAME_FLAG_##name))
+
+#define MINQ 0
+#define MAXQ 255
+
+#define DIV_LUT_PREC_BITS 14
+#define DIV_LUT_BITS 8
+#define DIV_LUT_NUM BIT(DIV_LUT_BITS)
+#define WARP_PARAM_REDUCE_BITS 6
+#define WARPEDMODEL_PREC_BITS 16
+
+#define SEG_LVL_ALT_Q 0
+#define SECONDARY_FILTER_STRENGTH_NUM_BITS 2
+
+static const short div_lut[DIV_LUT_NUM + 1] = {
+ 16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
+ 15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
+ 15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
+ 14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
+ 13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
+ 13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
+ 13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
+ 12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
+ 12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
+ 11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
+ 11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
+ 11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
+ 10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
+ 10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
+ 10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
+ 9963, 9939, 9916, 9892, 9869, 9846, 9823, 9800, 9777, 9754, 9732,
+ 9709, 9687, 9664, 9642, 9620, 9598, 9576, 9554, 9533, 9511, 9489,
+ 9468, 9447, 9425, 9404, 9383, 9362, 9341, 9321, 9300, 9279, 9259,
+ 9239, 9218, 9198, 9178, 9158, 9138, 9118, 9098, 9079, 9059, 9039,
+ 9020, 9001, 8981, 8962, 8943, 8924, 8905, 8886, 8867, 8849, 8830,
+ 8812, 8793, 8775, 8756, 8738, 8720, 8702, 8684, 8666, 8648, 8630,
+ 8613, 8595, 8577, 8560, 8542, 8525, 8508, 8490, 8473, 8456, 8439,
+ 8422, 8405, 8389, 8372, 8355, 8339, 8322, 8306, 8289, 8273, 8257,
+ 8240, 8224, 8208, 8192,
+};
+
+/**
+ * struct vdec_av1_slice_init_vsi - VSI used to initialize instance
+ * @architecture: architecture type
+ * @reserved: reserved
+ * @core_vsi: for core vsi
+ * @cdf_table_addr: cdf table addr
+ * @cdf_table_size: cdf table size
+ * @iq_table_addr: iq table addr
+ * @iq_table_size: iq table size
+ * @vsi_size: share vsi structure size
+ */
+struct vdec_av1_slice_init_vsi {
+ u32 architecture;
+ u32 reserved;
+ u64 core_vsi;
+ u64 cdf_table_addr;
+ u32 cdf_table_size;
+ u64 iq_table_addr;
+ u32 iq_table_size;
+ u32 vsi_size;
+};
+
+/**
+ * struct vdec_av1_slice_mem - memory address and size
+ * @buf: dma_addr padding
+ * @dma_addr: buffer address
+ * @size: buffer size
+ * @dma_addr_end: buffer end address
+ * @padding: for padding
+ */
+struct vdec_av1_slice_mem {
+ union {
+ u64 buf;
+ dma_addr_t dma_addr;
+ };
+ union {
+ size_t size;
+ dma_addr_t dma_addr_end;
+ u64 padding;
+ };
+};
+
+/**
+ * struct vdec_av1_slice_state - decoding state
+ * @err : err type for decode
+ * @full : transcoded buffer is full or not
+ * @timeout : decode timeout or not
+ * @perf : performance enable
+ * @crc : hw checksum
+ * @out_size : hw output size
+ */
+struct vdec_av1_slice_state {
+ int err;
+ u32 full;
+ u32 timeout;
+ u32 perf;
+ u32 crc[16];
+ u32 out_size;
+};
+
+/*
+ * enum vdec_av1_slice_resolution_level - resolution level
+ */
+enum vdec_av1_slice_resolution_level {
+ AV1_RES_NONE,
+ AV1_RES_FHD,
+ AV1_RES_4K,
+ AV1_RES_8K,
+};
+
+/*
+ * enum vdec_av1_slice_frame_type - av1 frame type
+ */
+enum vdec_av1_slice_frame_type {
+ AV1_KEY_FRAME = 0,
+ AV1_INTER_FRAME,
+ AV1_INTRA_ONLY_FRAME,
+ AV1_SWITCH_FRAME,
+ AV1_FRAME_TYPES,
+};
+
+/*
+ * enum vdec_av1_slice_reference_mode - reference mode type
+ */
+enum vdec_av1_slice_reference_mode {
+ AV1_SINGLE_REFERENCE = 0,
+ AV1_COMPOUND_REFERENCE,
+ AV1_REFERENCE_MODE_SELECT,
+ AV1_REFERENCE_MODES,
+};
+
+/**
+ * struct vdec_av1_slice_tile_group - info for each tile
+ * @num_tiles: tile number
+ * @tile_size: input size for each tile
+ * @tile_start_offset: tile offset to input buffer
+ */
+struct vdec_av1_slice_tile_group {
+ u32 num_tiles;
+ u32 tile_size[V4L2_AV1_MAX_TILE_COUNT];
+ u32 tile_start_offset[V4L2_AV1_MAX_TILE_COUNT];
+};
+
+/**
+ * struct vdec_av1_slice_scale_factors - scale info for each ref frame
+ * @is_scaled: frame is scaled or not
+ * @x_scale: frame width scale coefficient
+ * @y_scale: frame height scale coefficient
+ * @x_step: width step for x_scale
+ * @y_step: height step for y_scale
+ */
+struct vdec_av1_slice_scale_factors {
+ u8 is_scaled;
+ int x_scale;
+ int y_scale;
+ int x_step;
+ int y_step;
+};
+
+/**
+ * struct vdec_av1_slice_frame_refs - ref frame info
+ * @ref_fb_idx: ref slot index
+ * @ref_map_idx: ref frame index
+ * @scale_factors: scale factors for each ref frame
+ */
+struct vdec_av1_slice_frame_refs {
+ int ref_fb_idx;
+ int ref_map_idx;
+ struct vdec_av1_slice_scale_factors scale_factors;
+};
+
+/**
+ * struct vdec_av1_slice_gm - AV1 Global Motion parameters
+ * @wmtype: The type of global motion transform used
+ * @wmmat: gm_params
+ * @alpha: alpha info
+ * @beta: beta info
+ * @gamma: gamma info
+ * @delta: delta info
+ * @invalid: is invalid or not
+ */
+struct vdec_av1_slice_gm {
+ int wmtype;
+ int wmmat[8];
+ short alpha;
+ short beta;
+ short gamma;
+ short delta;
+ char invalid;
+};
+
+/**
+ * struct vdec_av1_slice_sm - AV1 Skip Mode parameters
+ * @skip_mode_allowed: Skip Mode is allowed or not
+ * @skip_mode_present: specified that the skip_mode will be present or not
+ * @skip_mode_frame: specifies the frames to use for compound prediction
+ */
+struct vdec_av1_slice_sm {
+ u8 skip_mode_allowed;
+ u8 skip_mode_present;
+ int skip_mode_frame[2];
+};
+
+/**
+ * struct vdec_av1_slice_seg - AV1 Segmentation params
+ * @segmentation_enabled: this frame makes use of the segmentation tool or not
+ * @segmentation_update_map: segmentation map are updated during the decoding frame
+ * @segmentation_temporal_update:segmentation map are coded relative the existing segmentaion map
+ * @segmentation_update_data: new parameters are about to be specified for each segment
+ * @feature_data: specifies the feature data for a segment feature
+ * @feature_enabled_mask: the corresponding feature value is coded or not.
+ * @segid_preskip: segment id will be read before the skip syntax element.
+ * @last_active_segid: the highest numbered segment id that has some enabled feature
+ */
+struct vdec_av1_slice_seg {
+ u8 segmentation_enabled;
+ u8 segmentation_update_map;
+ u8 segmentation_temporal_update;
+ u8 segmentation_update_data;
+ int feature_data[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX];
+ u16 feature_enabled_mask[V4L2_AV1_MAX_SEGMENTS];
+ int segid_preskip;
+ int last_active_segid;
+};
+
+/**
+ * struct vdec_av1_slice_delta_q_lf - AV1 Loop Filter delta parameters
+ * @delta_q_present: specified whether quantizer index delta values are present
+ * @delta_q_res: specifies the left shift which should be applied to decoded quantizer index
+ * @delta_lf_present: specifies whether loop filter delta values are present
+ * @delta_lf_res: specifies the left shift which should be applied to decoded
+ * loop filter delta values
+ * @delta_lf_multi: specifies that separate loop filter deltas are sent for horizontal
+ * luma edges,vertical luma edges,the u edges, and the v edges.
+ */
+struct vdec_av1_slice_delta_q_lf {
+ u8 delta_q_present;
+ u8 delta_q_res;
+ u8 delta_lf_present;
+ u8 delta_lf_res;
+ u8 delta_lf_multi;
+};
+
+/**
+ * struct vdec_av1_slice_quantization - AV1 Quantization params
+ * @base_q_idx: indicates the base frame qindex. This is used for Y AC
+ * coefficients and as the base value for the other quantizers.
+ * @qindex: qindex
+ * @delta_qydc: indicates the Y DC quantizer relative to base_q_idx
+ * @delta_qudc: indicates the U DC quantizer relative to base_q_idx.
+ * @delta_quac: indicates the U AC quantizer relative to base_q_idx
+ * @delta_qvdc: indicates the V DC quantizer relative to base_q_idx
+ * @delta_qvac: indicates the V AC quantizer relative to base_q_idx
+ * @using_qmatrix: specifies that the quantizer matrix will be used to
+ * compute quantizers
+ * @qm_y: specifies the level in the quantizer matrix that should
+ * be used for luma plane decoding
+ * @qm_u: specifies the level in the quantizer matrix that should
+ * be used for chroma U plane decoding.
+ * @qm_v: specifies the level in the quantizer matrix that should be
+ * used for chroma V plane decoding
+ */
+struct vdec_av1_slice_quantization {
+ int base_q_idx;
+ int qindex[V4L2_AV1_MAX_SEGMENTS];
+ int delta_qydc;
+ int delta_qudc;
+ int delta_quac;
+ int delta_qvdc;
+ int delta_qvac;
+ u8 using_qmatrix;
+ u8 qm_y;
+ u8 qm_u;
+ u8 qm_v;
+};
+
+/**
+ * struct vdec_av1_slice_lr - AV1 Loop Restauration parameters
+ * @use_lr: whether to use loop restoration
+ * @use_chroma_lr: whether to use chroma loop restoration
+ * @frame_restoration_type: specifies the type of restoration used for each plane
+ * @loop_restoration_size: pecifies the size of loop restoration units in units
+ * of samples in the current plane
+ */
+struct vdec_av1_slice_lr {
+ u8 use_lr;
+ u8 use_chroma_lr;
+ u8 frame_restoration_type[V4L2_AV1_NUM_PLANES_MAX];
+ u32 loop_restoration_size[V4L2_AV1_NUM_PLANES_MAX];
+};
+
+/**
+ * struct vdec_av1_slice_loop_filter - AV1 Loop filter parameters
+ * @loop_filter_level: an array containing loop filter strength values.
+ * @loop_filter_ref_deltas: contains the adjustment needed for the filter
+ * level based on the chosen reference frame
+ * @loop_filter_mode_deltas: contains the adjustment needed for the filter
+ * level based on the chosen mode
+ * @loop_filter_sharpness: indicates the sharpness level. The loop_filter_level
+ * and loop_filter_sharpness together determine when
+ * a block edge is filtered, and by how much the
+ * filtering can change the sample values
+ * @loop_filter_delta_enabled: filetr level depends on the mode and reference
+ * frame used to predict a block
+ */
+struct vdec_av1_slice_loop_filter {
+ u8 loop_filter_level[4];
+ int loop_filter_ref_deltas[V4L2_AV1_TOTAL_REFS_PER_FRAME];
+ int loop_filter_mode_deltas[2];
+ u8 loop_filter_sharpness;
+ u8 loop_filter_delta_enabled;
+};
+
+/**
+ * struct vdec_av1_slice_cdef - AV1 CDEF parameters
+ * @cdef_damping: controls the amount of damping in the deringing filter
+ * @cdef_y_strength: specifies the strength of the primary filter and secondary filter
+ * @cdef_uv_strength: specifies the strength of the primary filter and secondary filter
+ * @cdef_bits: specifies the number of bits needed to specify which
+ * CDEF filter to apply
+ */
+struct vdec_av1_slice_cdef {
+ u8 cdef_damping;
+ u8 cdef_y_strength[8];
+ u8 cdef_uv_strength[8];
+ u8 cdef_bits;
+};
+
+/**
+ * struct vdec_av1_slice_mfmv - AV1 mfmv parameters
+ * @mfmv_valid_ref: mfmv_valid_ref
+ * @mfmv_dir: mfmv_dir
+ * @mfmv_ref_to_cur: mfmv_ref_to_cur
+ * @mfmv_ref_frame_idx: mfmv_ref_frame_idx
+ * @mfmv_count: mfmv_count
+ */
+struct vdec_av1_slice_mfmv {
+ u32 mfmv_valid_ref[3];
+ u32 mfmv_dir[3];
+ int mfmv_ref_to_cur[3];
+ int mfmv_ref_frame_idx[3];
+ int mfmv_count;
+};
+
+/**
+ * struct vdec_av1_slice_tile - AV1 Tile info
+ * @tile_cols: specifies the number of tiles across the frame
+ * @tile_rows: pecifies the number of tiles down the frame
+ * @mi_col_starts: an array specifying the start column
+ * @mi_row_starts: an array specifying the start row
+ * @context_update_tile_id: specifies which tile to use for the CDF update
+ * @uniform_tile_spacing_flag: tiles are uniformly spaced across the frame
+ * or the tile sizes are coded
+ */
+struct vdec_av1_slice_tile {
+ u8 tile_cols;
+ u8 tile_rows;
+ int mi_col_starts[V4L2_AV1_MAX_TILE_COLS + 1];
+ int mi_row_starts[V4L2_AV1_MAX_TILE_ROWS + 1];
+ u8 context_update_tile_id;
+ u8 uniform_tile_spacing_flag;
+};
+
+/**
+ * struct vdec_av1_slice_uncompressed_header - Represents an AV1 Frame Header OBU
+ * @use_ref_frame_mvs: use_ref_frame_mvs flag
+ * @order_hint: specifies OrderHintBits least significant bits of the expected
+ * @gm: global motion param
+ * @upscaled_width: the upscaled width
+ * @frame_width: frame's width
+ * @frame_height: frame's height
+ * @reduced_tx_set: frame is restricted to a reduced subset of the full
+ * set of transform types
+ * @tx_mode: specifies how the transform size is determined
+ * @uniform_tile_spacing_flag: tiles are uniformly spaced across the frame
+ * or the tile sizes are coded
+ * @interpolation_filter: specifies the filter selection used for performing inter prediction
+ * @allow_warped_motion: motion_mode may be present or not
+ * @is_motion_mode_switchable : euqlt to 0 specifies that only the SIMPLE motion mode will be used
+ * @reference_mode : frame reference mode selected
+ * @allow_high_precision_mv: specifies that motion vectors are specified to
+ * quarter pel precision or to eighth pel precision
+ * @allow_intra_bc: ubducates that intra block copy may be used in this frame
+ * @force_integer_mv: specifies motion vectors will always be integers or
+ * can contain fractional bits
+ * @allow_screen_content_tools: intra blocks may use palette encoding
+ * @error_resilient_mode: error resislent mode is enable/disable
+ * @frame_type: specifies the AV1 frame type
+ * @primary_ref_frame: specifies which reference frame contains the CDF values
+ * and other state that should be loaded at the start of the frame
+ * slots will be updated with the current frame after it is decoded
+ * @disable_frame_end_update_cdf:indicates the end of frame CDF update is disable or enable
+ * @disable_cdf_update: specified whether the CDF update in the symbol
+ * decoding process should be disables
+ * @skip_mode: av1 skip mode parameters
+ * @seg: av1 segmentaon parameters
+ * @delta_q_lf: av1 delta loop fileter
+ * @quant: av1 Quantization params
+ * @lr: av1 Loop Restauration parameters
+ * @superres_denom: the denominator for the upscaling ratio
+ * @loop_filter: av1 Loop filter parameters
+ * @cdef: av1 CDEF parameters
+ * @mfmv: av1 mfmv parameters
+ * @tile: av1 Tile info
+ * @frame_is_intra: intra frame
+ * @loss_less_array: loss less array
+ * @coded_loss_less: coded lsss less
+ * @mi_rows: size of mi unit in rows
+ * @mi_cols: size of mi unit in cols
+ */
+struct vdec_av1_slice_uncompressed_header {
+ u8 use_ref_frame_mvs;
+ int order_hint;
+ struct vdec_av1_slice_gm gm[V4L2_AV1_TOTAL_REFS_PER_FRAME];
+ u32 upscaled_width;
+ u32 frame_width;
+ u32 frame_height;
+ u8 reduced_tx_set;
+ u8 tx_mode;
+ u8 uniform_tile_spacing_flag;
+ u8 interpolation_filter;
+ u8 allow_warped_motion;
+ u8 is_motion_mode_switchable;
+ u8 reference_mode;
+ u8 allow_high_precision_mv;
+ u8 allow_intra_bc;
+ u8 force_integer_mv;
+ u8 allow_screen_content_tools;
+ u8 error_resilient_mode;
+ u8 frame_type;
+ u8 primary_ref_frame;
+ u8 disable_frame_end_update_cdf;
+ u32 disable_cdf_update;
+ struct vdec_av1_slice_sm skip_mode;
+ struct vdec_av1_slice_seg seg;
+ struct vdec_av1_slice_delta_q_lf delta_q_lf;
+ struct vdec_av1_slice_quantization quant;
+ struct vdec_av1_slice_lr lr;
+ u32 superres_denom;
+ struct vdec_av1_slice_loop_filter loop_filter;
+ struct vdec_av1_slice_cdef cdef;
+ struct vdec_av1_slice_mfmv mfmv;
+ struct vdec_av1_slice_tile tile;
+ u8 frame_is_intra;
+ u8 loss_less_array[V4L2_AV1_MAX_SEGMENTS];
+ u8 coded_loss_less;
+ u32 mi_rows;
+ u32 mi_cols;
+};
+
+/**
+ * struct vdec_av1_slice_seq_header - Represents an AV1 Sequence OBU
+ * @bitdepth: the bitdepth to use for the sequence
+ * @enable_superres: specifies whether the use_superres syntax element may be present
+ * @enable_filter_intra: specifies the use_filter_intra syntax element may be present
+ * @enable_intra_edge_filter: whether the intra edge filtering process should be enabled
+ * @enable_interintra_compound: specifies the mode info fo rinter blocks may
+ * contain the syntax element interintra
+ * @enable_masked_compound: specifies the mode info fo rinter blocks may
+ * contain the syntax element compound_type
+ * @enable_dual_filter: the inter prediction filter type may be specified independently
+ * @enable_jnt_comp: distance weights process may be used for inter prediction
+ * @mono_chrome: indicates the video does not contain U and V color planes
+ * @enable_order_hint: tools based on the values of order hints may be used
+ * @order_hint_bits: the number of bits used for the order_hint field at each frame
+ * @use_128x128_superblock: indicates superblocks contain 128*128 luma samples
+ * @subsampling_x: the chroma subsamling format
+ * @subsampling_y: the chroma subsamling format
+ * @max_frame_width: the maximum frame width for the frames represented by sequence
+ * @max_frame_height: the maximum frame height for the frames represented by sequence
+ */
+struct vdec_av1_slice_seq_header {
+ u8 bitdepth;
+ u8 enable_superres;
+ u8 enable_filter_intra;
+ u8 enable_intra_edge_filter;
+ u8 enable_interintra_compound;
+ u8 enable_masked_compound;
+ u8 enable_dual_filter;
+ u8 enable_jnt_comp;
+ u8 mono_chrome;
+ u8 enable_order_hint;
+ u8 order_hint_bits;
+ u8 use_128x128_superblock;
+ u8 subsampling_x;
+ u8 subsampling_y;
+ u32 max_frame_width;
+ u32 max_frame_height;
+};
+
+/**
+ * struct vdec_av1_slice_frame - Represents current Frame info
+ * @uh: uncompressed header info
+ * @seq: sequence header info
+ * @large_scale_tile: is large scale mode
+ * @cur_ts: current frame timestamp
+ * @prev_fb_idx: prev slot id
+ * @ref_frame_sign_bias: arrays for ref_frame sign bias
+ * @order_hints: arrays for ref_frame order hint
+ * @ref_frame_valid: arrays for valid ref_frame
+ * @ref_frame_map: map to slot frame info
+ * @frame_refs: ref_frame info
+ */
+struct vdec_av1_slice_frame {
+ struct vdec_av1_slice_uncompressed_header uh;
+ struct vdec_av1_slice_seq_header seq;
+ u8 large_scale_tile;
+ u64 cur_ts;
+ int prev_fb_idx;
+ u8 ref_frame_sign_bias[V4L2_AV1_TOTAL_REFS_PER_FRAME];
+ u32 order_hints[V4L2_AV1_REFS_PER_FRAME];
+ u32 ref_frame_valid[V4L2_AV1_REFS_PER_FRAME];
+ int ref_frame_map[V4L2_AV1_TOTAL_REFS_PER_FRAME];
+ struct vdec_av1_slice_frame_refs frame_refs[V4L2_AV1_REFS_PER_FRAME];
+};
+
+/**
+ * struct vdec_av1_slice_work_buffer - work buffer for lat
+ * @mv_addr: mv buffer memory info
+ * @cdf_addr: cdf buffer memory info
+ * @segid_addr: segid buffer memory info
+ */
+struct vdec_av1_slice_work_buffer {
+ struct vdec_av1_slice_mem mv_addr;
+ struct vdec_av1_slice_mem cdf_addr;
+ struct vdec_av1_slice_mem segid_addr;
+};
+
+/**
+ * struct vdec_av1_slice_frame_info - frame info for each slot
+ * @frame_type: frame type
+ * @frame_is_intra: is intra frame
+ * @order_hint: order hint
+ * @order_hints: referece frame order hint
+ * @upscaled_width: upscale width
+ * @pic_pitch: buffer pitch
+ * @frame_width: frane width
+ * @frame_height: frame height
+ * @mi_rows: rows in mode info
+ * @mi_cols: cols in mode info
+ * @ref_count: mark to reference frame counts
+ */
+struct vdec_av1_slice_frame_info {
+ u8 frame_type;
+ u8 frame_is_intra;
+ int order_hint;
+ u32 order_hints[V4L2_AV1_REFS_PER_FRAME];
+ u32 upscaled_width;
+ u32 pic_pitch;
+ u32 frame_width;
+ u32 frame_height;
+ u32 mi_rows;
+ u32 mi_cols;
+ int ref_count;
+};
+
+/**
+ * struct vdec_av1_slice_slot - slot info that needs to be saved in the global instance
+ * @frame_info: frame info for each slot
+ * @timestamp: time stamp info
+ */
+struct vdec_av1_slice_slot {
+ struct vdec_av1_slice_frame_info frame_info[AV1_MAX_FRAME_BUF_COUNT];
+ u64 timestamp[AV1_MAX_FRAME_BUF_COUNT];
+};
+
+/**
+ * struct vdec_av1_slice_fb - frame buffer for decoding
+ * @y: current y buffer address info
+ * @c: current c buffer address info
+ */
+struct vdec_av1_slice_fb {
+ struct vdec_av1_slice_mem y;
+ struct vdec_av1_slice_mem c;
+};
+
+/**
+ * struct vdec_av1_slice_vsi - exchange frame information between Main CPU and MicroP
+ * @bs: input buffer info
+ * @work_buffer: working buffe for hw
+ * @cdf_table: cdf_table buffer
+ * @cdf_tmp: cdf temp buffer
+ * @rd_mv: mv buffer for lat output , core input
+ * @ube: ube buffer
+ * @trans: transcoded buffer
+ * @err_map: err map buffer
+ * @row_info: row info buffer
+ * @fb: current y/c buffer
+ * @ref: ref y/c buffer
+ * @iq_table: iq table buffer
+ * @tile: tile buffer
+ * @slots: slots info for each frame
+ * @slot_id: current frame slot id
+ * @frame: current frame info
+ * @state: status after decode done
+ * @cur_lst_tile_id: tile id for large scale
+ */
+struct vdec_av1_slice_vsi {
+ /* lat */
+ struct vdec_av1_slice_mem bs;
+ struct vdec_av1_slice_work_buffer work_buffer[AV1_MAX_FRAME_BUF_COUNT];
+ struct vdec_av1_slice_mem cdf_table;
+ struct vdec_av1_slice_mem cdf_tmp;
+ /* LAT stage's output, Core stage's input */
+ struct vdec_av1_slice_mem rd_mv;
+ struct vdec_av1_slice_mem ube;
+ struct vdec_av1_slice_mem trans;
+ struct vdec_av1_slice_mem err_map;
+ struct vdec_av1_slice_mem row_info;
+ /* core */
+ struct vdec_av1_slice_fb fb;
+ struct vdec_av1_slice_fb ref[V4L2_AV1_REFS_PER_FRAME];
+ struct vdec_av1_slice_mem iq_table;
+ /* lat and core share*/
+ struct vdec_av1_slice_mem tile;
+ struct vdec_av1_slice_slot slots;
+ s8 slot_id;
+ struct vdec_av1_slice_frame frame;
+ struct vdec_av1_slice_state state;
+ u32 cur_lst_tile_id;
+};
+
+/**
+ * struct vdec_av1_slice_pfc - per-frame context that contains a local vsi.
+ * pass it from lat to core
+ * @vsi: local vsi. copy to/from remote vsi before/after decoding
+ * @ref_idx: reference buffer timestamp
+ * @seq: picture sequence
+ */
+struct vdec_av1_slice_pfc {
+ struct vdec_av1_slice_vsi vsi;
+ u64 ref_idx[V4L2_AV1_REFS_PER_FRAME];
+ int seq;
+};
+
+/**
+ * struct vdec_av1_slice_instance - represent one av1 instance
+ * @ctx: pointer to codec's context
+ * @vpu: VPU instance
+ * @iq_table: iq table buffer
+ * @cdf_table: cdf table buffer
+ * @mv: mv working buffer
+ * @cdf: cdf working buffer
+ * @seg: segmentation working buffer
+ * @cdf_temp: cdf temp buffer
+ * @tile: tile buffer
+ * @slots: slots info
+ * @tile_group: tile_group entry
+ * @level: level of current resolution
+ * @width: width of last picture
+ * @height: height of last picture
+ * @frame_type: frame_type of last picture
+ * @irq_enabled: irq to Main CPU or MicroP
+ * @inneracing_mode: is inneracing mode
+ * @init_vsi: vsi used for initialized AV1 instance
+ * @vsi: vsi used for decoding/flush ...
+ * @core_vsi: vsi used for Core stage
+ * @seq: global picture sequence
+ */
+struct vdec_av1_slice_instance {
+ struct mtk_vcodec_dec_ctx *ctx;
+ struct vdec_vpu_inst vpu;
+
+ struct mtk_vcodec_mem iq_table;
+ struct mtk_vcodec_mem cdf_table;
+
+ struct mtk_vcodec_mem mv[AV1_MAX_FRAME_BUF_COUNT];
+ struct mtk_vcodec_mem cdf[AV1_MAX_FRAME_BUF_COUNT];
+ struct mtk_vcodec_mem seg[AV1_MAX_FRAME_BUF_COUNT];
+ struct mtk_vcodec_mem cdf_temp;
+ struct mtk_vcodec_mem tile;
+ struct vdec_av1_slice_slot slots;
+ struct vdec_av1_slice_tile_group tile_group;
+
+ /* for resolution change and get_pic_info */
+ enum vdec_av1_slice_resolution_level level;
+ u32 width;
+ u32 height;
+
+ u32 frame_type;
+ u32 irq_enabled;
+ u32 inneracing_mode;
+
+ /* MicroP vsi */
+ union {
+ struct vdec_av1_slice_init_vsi *init_vsi;
+ struct vdec_av1_slice_vsi *vsi;
+ };
+ struct vdec_av1_slice_vsi *core_vsi;
+ int seq;
+};
+
+static int vdec_av1_slice_core_decode(struct vdec_lat_buf *lat_buf);
+
+static inline int vdec_av1_slice_get_msb(u32 n)
+{
+ if (n == 0)
+ return 0;
+ return 31 ^ __builtin_clz(n);
+}
+
+static inline bool vdec_av1_slice_need_scale(u32 ref_width, u32 ref_height,
+ u32 this_width, u32 this_height)
+{
+ return ((this_width << 1) >= ref_width) &&
+ ((this_height << 1) >= ref_height) &&
+ (this_width <= (ref_width << 4)) &&
+ (this_height <= (ref_height << 4));
+}
+
+static void *vdec_av1_get_ctrl_ptr(struct mtk_vcodec_dec_ctx *ctx, int id)
+{
+ struct v4l2_ctrl *ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, id);
+
+ if (!ctrl)
+ return ERR_PTR(-EINVAL);
+
+ return ctrl->p_cur.p;
+}
+
+static int vdec_av1_slice_init_cdf_table(struct vdec_av1_slice_instance *instance)
+{
+ u8 *remote_cdf_table;
+ struct mtk_vcodec_dec_ctx *ctx;
+ struct vdec_av1_slice_init_vsi *vsi;
+ int ret;
+
+ ctx = instance->ctx;
+ vsi = instance->vpu.vsi;
+ remote_cdf_table = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler,
+ (u32)vsi->cdf_table_addr);
+ if (IS_ERR(remote_cdf_table)) {
+ mtk_vdec_err(ctx, "failed to map cdf table\n");
+ return PTR_ERR(remote_cdf_table);
+ }
+
+ mtk_vdec_debug(ctx, "map cdf table to 0x%p\n", remote_cdf_table);
+
+ if (instance->cdf_table.va)
+ mtk_vcodec_mem_free(ctx, &instance->cdf_table);
+ instance->cdf_table.size = vsi->cdf_table_size;
+
+ ret = mtk_vcodec_mem_alloc(ctx, &instance->cdf_table);
+ if (ret)
+ return ret;
+
+ memcpy(instance->cdf_table.va, remote_cdf_table, vsi->cdf_table_size);
+
+ return 0;
+}
+
+static int vdec_av1_slice_init_iq_table(struct vdec_av1_slice_instance *instance)
+{
+ u8 *remote_iq_table;
+ struct mtk_vcodec_dec_ctx *ctx;
+ struct vdec_av1_slice_init_vsi *vsi;
+ int ret;
+
+ ctx = instance->ctx;
+ vsi = instance->vpu.vsi;
+ remote_iq_table = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler,
+ (u32)vsi->iq_table_addr);
+ if (IS_ERR(remote_iq_table)) {
+ mtk_vdec_err(ctx, "failed to map iq table\n");
+ return PTR_ERR(remote_iq_table);
+ }
+
+ mtk_vdec_debug(ctx, "map iq table to 0x%p\n", remote_iq_table);
+
+ if (instance->iq_table.va)
+ mtk_vcodec_mem_free(ctx, &instance->iq_table);
+ instance->iq_table.size = vsi->iq_table_size;
+
+ ret = mtk_vcodec_mem_alloc(ctx, &instance->iq_table);
+ if (ret)
+ return ret;
+
+ memcpy(instance->iq_table.va, remote_iq_table, vsi->iq_table_size);
+
+ return 0;
+}
+
+static int vdec_av1_slice_get_new_slot(struct vdec_av1_slice_vsi *vsi)
+{
+ struct vdec_av1_slice_slot *slots = &vsi->slots;
+ int new_slot_idx = AV1_INVALID_IDX;
+ int i;
+
+ for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
+ if (slots->frame_info[i].ref_count == 0) {
+ new_slot_idx = i;
+ break;
+ }
+ }
+
+ if (new_slot_idx != AV1_INVALID_IDX) {
+ slots->frame_info[new_slot_idx].ref_count++;
+ slots->timestamp[new_slot_idx] = vsi->frame.cur_ts;
+ }
+
+ return new_slot_idx;
+}
+
+static inline void vdec_av1_slice_clear_fb(struct vdec_av1_slice_frame_info *frame_info)
+{
+ memset((void *)frame_info, 0, sizeof(struct vdec_av1_slice_frame_info));
+}
+
+static void vdec_av1_slice_decrease_ref_count(struct vdec_av1_slice_slot *slots, int fb_idx)
+{
+ struct vdec_av1_slice_frame_info *frame_info = slots->frame_info;
+
+ frame_info[fb_idx].ref_count--;
+ if (frame_info[fb_idx].ref_count < 0) {
+ frame_info[fb_idx].ref_count = 0;
+ pr_err(MTK_DBG_V4L2_STR "av1_error: %s() fb_idx %d decrease ref_count error\n",
+ __func__, fb_idx);
+ }
+
+ vdec_av1_slice_clear_fb(&frame_info[fb_idx]);
+}
+
+static void vdec_av1_slice_cleanup_slots(struct vdec_av1_slice_slot *slots,
+ struct vdec_av1_slice_frame *frame,
+ struct v4l2_ctrl_av1_frame *ctrl_fh)
+{
+ int slot_id, ref_id;
+
+ for (ref_id = 0; ref_id < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref_id++)
+ frame->ref_frame_map[ref_id] = AV1_INVALID_IDX;
+
+ for (slot_id = 0; slot_id < AV1_MAX_FRAME_BUF_COUNT; slot_id++) {
+ u64 timestamp = slots->timestamp[slot_id];
+ bool ref_used = false;
+
+ /* ignored unused slots */
+ if (slots->frame_info[slot_id].ref_count == 0)
+ continue;
+
+ for (ref_id = 0; ref_id < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref_id++) {
+ if (ctrl_fh->reference_frame_ts[ref_id] == timestamp) {
+ frame->ref_frame_map[ref_id] = slot_id;
+ ref_used = true;
+ }
+ }
+
+ if (!ref_used)
+ vdec_av1_slice_decrease_ref_count(slots, slot_id);
+ }
+}
+
+static void vdec_av1_slice_setup_slot(struct vdec_av1_slice_instance *instance,
+ struct vdec_av1_slice_vsi *vsi,
+ struct v4l2_ctrl_av1_frame *ctrl_fh)
+{
+ struct vdec_av1_slice_frame_info *cur_frame_info;
+ struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh;
+ int ref_id;
+
+ memcpy(&vsi->slots, &instance->slots, sizeof(instance->slots));
+ vdec_av1_slice_cleanup_slots(&vsi->slots, &vsi->frame, ctrl_fh);
+ vsi->slot_id = vdec_av1_slice_get_new_slot(vsi);
+
+ if (vsi->slot_id == AV1_INVALID_IDX) {
+ mtk_v4l2_vdec_err(instance->ctx, "warning:av1 get invalid index slot\n");
+ vsi->slot_id = 0;
+ }
+ cur_frame_info = &vsi->slots.frame_info[vsi->slot_id];
+ cur_frame_info->frame_type = uh->frame_type;
+ cur_frame_info->frame_is_intra = ((uh->frame_type == AV1_INTRA_ONLY_FRAME) ||
+ (uh->frame_type == AV1_KEY_FRAME));
+ cur_frame_info->order_hint = uh->order_hint;
+ cur_frame_info->upscaled_width = uh->upscaled_width;
+ cur_frame_info->pic_pitch = 0;
+ cur_frame_info->frame_width = uh->frame_width;
+ cur_frame_info->frame_height = uh->frame_height;
+ cur_frame_info->mi_cols = ((uh->frame_width + 7) >> 3) << 1;
+ cur_frame_info->mi_rows = ((uh->frame_height + 7) >> 3) << 1;
+
+ /* ensure current frame is properly mapped if referenced */
+ for (ref_id = 0; ref_id < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref_id++) {
+ u64 timestamp = vsi->slots.timestamp[vsi->slot_id];
+
+ if (ctrl_fh->reference_frame_ts[ref_id] == timestamp)
+ vsi->frame.ref_frame_map[ref_id] = vsi->slot_id;
+ }
+}
+
+static int vdec_av1_slice_alloc_working_buffer(struct vdec_av1_slice_instance *instance,
+ struct vdec_av1_slice_vsi *vsi)
+{
+ struct mtk_vcodec_dec_ctx *ctx = instance->ctx;
+ enum vdec_av1_slice_resolution_level level;
+ u32 max_sb_w, max_sb_h, max_w, max_h, w, h;
+ int i, ret;
+
+ w = vsi->frame.uh.frame_width;
+ h = vsi->frame.uh.frame_height;
+
+ if (w > VCODEC_DEC_4K_CODED_WIDTH || h > VCODEC_DEC_4K_CODED_HEIGHT)
+ /* 8K */
+ return -EINVAL;
+
+ if (w > MTK_VDEC_MAX_W || h > MTK_VDEC_MAX_H) {
+ /* 4K */
+ level = AV1_RES_4K;
+ max_w = VCODEC_DEC_4K_CODED_WIDTH;
+ max_h = VCODEC_DEC_4K_CODED_HEIGHT;
+ } else {
+ /* FHD */
+ level = AV1_RES_FHD;
+ max_w = MTK_VDEC_MAX_W;
+ max_h = MTK_VDEC_MAX_H;
+ }
+
+ if (level == instance->level)
+ return 0;
+
+ mtk_vdec_debug(ctx, "resolution level changed from %u to %u, %ux%u",
+ instance->level, level, w, h);
+
+ max_sb_w = DIV_ROUND_UP(max_w, 128);
+ max_sb_h = DIV_ROUND_UP(max_h, 128);
+
+ for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
+ if (instance->mv[i].va)
+ mtk_vcodec_mem_free(ctx, &instance->mv[i]);
+ instance->mv[i].size = max_sb_w * max_sb_h * SZ_1K;
+ ret = mtk_vcodec_mem_alloc(ctx, &instance->mv[i]);
+ if (ret)
+ goto err;
+
+ if (instance->seg[i].va)
+ mtk_vcodec_mem_free(ctx, &instance->seg[i]);
+ instance->seg[i].size = max_sb_w * max_sb_h * 512;
+ ret = mtk_vcodec_mem_alloc(ctx, &instance->seg[i]);
+ if (ret)
+ goto err;
+
+ if (instance->cdf[i].va)
+ mtk_vcodec_mem_free(ctx, &instance->cdf[i]);
+ instance->cdf[i].size = AV1_CDF_TABLE_BUFFER_SIZE;
+ ret = mtk_vcodec_mem_alloc(ctx, &instance->cdf[i]);
+ if (ret)
+ goto err;
+ }
+
+ if (!instance->cdf_temp.va) {
+ instance->cdf_temp.size = (SZ_1K * 16 * 100);
+ ret = mtk_vcodec_mem_alloc(ctx, &instance->cdf_temp);
+ if (ret)
+ goto err;
+ vsi->cdf_tmp.buf = instance->cdf_temp.dma_addr;
+ vsi->cdf_tmp.size = instance->cdf_temp.size;
+ }
+
+ if (instance->tile.va)
+ mtk_vcodec_mem_free(ctx, &instance->tile);
+
+ instance->tile.size = AV1_TILE_BUF_SIZE * V4L2_AV1_MAX_TILE_COUNT;
+ ret = mtk_vcodec_mem_alloc(ctx, &instance->tile);
+ if (ret)
+ goto err;
+
+ instance->level = level;
+ return 0;
+
+err:
+ instance->level = AV1_RES_NONE;
+ return ret;
+}
+
+static void vdec_av1_slice_free_working_buffer(struct vdec_av1_slice_instance *instance)
+{
+ struct mtk_vcodec_dec_ctx *ctx = instance->ctx;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(instance->mv); i++)
+ mtk_vcodec_mem_free(ctx, &instance->mv[i]);
+
+ for (i = 0; i < ARRAY_SIZE(instance->seg); i++)
+ mtk_vcodec_mem_free(ctx, &instance->seg[i]);
+
+ for (i = 0; i < ARRAY_SIZE(instance->cdf); i++)
+ mtk_vcodec_mem_free(ctx, &instance->cdf[i]);
+
+ mtk_vcodec_mem_free(ctx, &instance->tile);
+ mtk_vcodec_mem_free(ctx, &instance->cdf_temp);
+ mtk_vcodec_mem_free(ctx, &instance->cdf_table);
+ mtk_vcodec_mem_free(ctx, &instance->iq_table);
+
+ instance->level = AV1_RES_NONE;
+}
+
+static inline void vdec_av1_slice_vsi_from_remote(struct vdec_av1_slice_vsi *vsi,
+ struct vdec_av1_slice_vsi *remote_vsi)
+{
+ memcpy(&vsi->trans, &remote_vsi->trans, sizeof(vsi->trans));
+ memcpy(&vsi->state, &remote_vsi->state, sizeof(vsi->state));
+}
+
+static inline void vdec_av1_slice_vsi_to_remote(struct vdec_av1_slice_vsi *vsi,
+ struct vdec_av1_slice_vsi *remote_vsi)
+{
+ memcpy(remote_vsi, vsi, sizeof(*vsi));
+}
+
+static int vdec_av1_slice_setup_lat_from_src_buf(struct vdec_av1_slice_instance *instance,
+ struct vdec_av1_slice_vsi *vsi,
+ struct vdec_lat_buf *lat_buf)
+{
+ struct vb2_v4l2_buffer *src;
+ struct vb2_v4l2_buffer *dst;
+
+ src = v4l2_m2m_next_src_buf(instance->ctx->m2m_ctx);
+ if (!src)
+ return -EINVAL;
+
+ lat_buf->src_buf_req = src->vb2_buf.req_obj.req;
+ dst = &lat_buf->ts_info;
+ v4l2_m2m_buf_copy_metadata(src, dst, true);
+ vsi->frame.cur_ts = dst->vb2_buf.timestamp;
+
+ return 0;
+}
+
+static short vdec_av1_slice_resolve_divisor_32(u32 D, short *shift)
+{
+ int f;
+ int e;
+
+ *shift = vdec_av1_slice_get_msb(D);
+ /* e is obtained from D after resetting the most significant 1 bit. */
+ e = D - ((u32)1 << *shift);
+ /* Get the most significant DIV_LUT_BITS (8) bits of e into f */
+ if (*shift > DIV_LUT_BITS)
+ f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS);
+ else
+ f = e << (DIV_LUT_BITS - *shift);
+ if (f > DIV_LUT_NUM)
+ return -1;
+ *shift += DIV_LUT_PREC_BITS;
+ /* Use f as lookup into the precomputed table of multipliers */
+ return div_lut[f];
+}
+
+static void vdec_av1_slice_get_shear_params(struct vdec_av1_slice_gm *gm_params)
+{
+ const int *mat = gm_params->wmmat;
+ short shift;
+ short y;
+ long long gv, dv;
+
+ if (gm_params->wmmat[2] <= 0)
+ return;
+
+ gm_params->alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
+ gm_params->beta = clamp_val(mat[3], S16_MIN, S16_MAX);
+
+ y = vdec_av1_slice_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
+
+ gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
+ gm_params->gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift),
+ S16_MIN, S16_MAX);
+
+ dv = ((long long)mat[3] * mat[4]) * y;
+ gm_params->delta = clamp_val(mat[5] - (int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) -
+ (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
+
+ gm_params->alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->alpha, WARP_PARAM_REDUCE_BITS) *
+ (1 << WARP_PARAM_REDUCE_BITS);
+ gm_params->beta = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->beta, WARP_PARAM_REDUCE_BITS) *
+ (1 << WARP_PARAM_REDUCE_BITS);
+ gm_params->gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->gamma, WARP_PARAM_REDUCE_BITS) *
+ (1 << WARP_PARAM_REDUCE_BITS);
+ gm_params->delta = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->delta, WARP_PARAM_REDUCE_BITS) *
+ (1 << WARP_PARAM_REDUCE_BITS);
+}
+
+static void vdec_av1_slice_setup_gm(struct vdec_av1_slice_gm *gm,
+ struct v4l2_av1_global_motion *ctrl_gm)
+{
+ u32 i, j;
+
+ for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++) {
+ gm[i].wmtype = ctrl_gm->type[i];
+ for (j = 0; j < 6; j++)
+ gm[i].wmmat[j] = ctrl_gm->params[i][j];
+
+ gm[i].invalid = !!(ctrl_gm->invalid & BIT(i));
+ gm[i].alpha = 0;
+ gm[i].beta = 0;
+ gm[i].gamma = 0;
+ gm[i].delta = 0;
+ if (gm[i].wmtype <= V4L2_AV1_WARP_MODEL_AFFINE)
+ vdec_av1_slice_get_shear_params(&gm[i]);
+ }
+}
+
+static void vdec_av1_slice_setup_seg(struct vdec_av1_slice_seg *seg,
+ struct v4l2_av1_segmentation *ctrl_seg)
+{
+ u32 i, j;
+
+ seg->segmentation_enabled = SEGMENTATION_FLAG(ctrl_seg, ENABLED);
+ seg->segmentation_update_map = SEGMENTATION_FLAG(ctrl_seg, UPDATE_MAP);
+ seg->segmentation_temporal_update = SEGMENTATION_FLAG(ctrl_seg, TEMPORAL_UPDATE);
+ seg->segmentation_update_data = SEGMENTATION_FLAG(ctrl_seg, UPDATE_DATA);
+ seg->segid_preskip = SEGMENTATION_FLAG(ctrl_seg, SEG_ID_PRE_SKIP);
+ seg->last_active_segid = ctrl_seg->last_active_seg_id;
+
+ for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
+ seg->feature_enabled_mask[i] = ctrl_seg->feature_enabled[i];
+ for (j = 0; j < V4L2_AV1_SEG_LVL_MAX; j++)
+ seg->feature_data[i][j] = ctrl_seg->feature_data[i][j];
+ }
+}
+
+static void vdec_av1_slice_setup_quant(struct vdec_av1_slice_quantization *quant,
+ struct v4l2_av1_quantization *ctrl_quant)
+{
+ quant->base_q_idx = ctrl_quant->base_q_idx;
+ quant->delta_qydc = ctrl_quant->delta_q_y_dc;
+ quant->delta_qudc = ctrl_quant->delta_q_u_dc;
+ quant->delta_quac = ctrl_quant->delta_q_u_ac;
+ quant->delta_qvdc = ctrl_quant->delta_q_v_dc;
+ quant->delta_qvac = ctrl_quant->delta_q_v_ac;
+ quant->qm_y = ctrl_quant->qm_y;
+ quant->qm_u = ctrl_quant->qm_u;
+ quant->qm_v = ctrl_quant->qm_v;
+ quant->using_qmatrix = QUANT_FLAG(ctrl_quant, USING_QMATRIX);
+}
+
+static int vdec_av1_slice_get_qindex(struct vdec_av1_slice_uncompressed_header *uh,
+ int segmentation_id)
+{
+ struct vdec_av1_slice_seg *seg = &uh->seg;
+ struct vdec_av1_slice_quantization *quant = &uh->quant;
+ int data = 0, qindex = 0;
+
+ if (seg->segmentation_enabled &&
+ (seg->feature_enabled_mask[segmentation_id] & BIT(SEG_LVL_ALT_Q))) {
+ data = seg->feature_data[segmentation_id][SEG_LVL_ALT_Q];
+ qindex = quant->base_q_idx + data;
+ return clamp_val(qindex, 0, MAXQ);
+ }
+
+ return quant->base_q_idx;
+}
+
+static void vdec_av1_slice_setup_lr(struct vdec_av1_slice_lr *lr,
+ struct v4l2_av1_loop_restoration *ctrl_lr)
+{
+ int i;
+
+ lr->use_lr = 0;
+ lr->use_chroma_lr = 0;
+ for (i = 0; i < V4L2_AV1_NUM_PLANES_MAX; i++) {
+ lr->frame_restoration_type[i] = ctrl_lr->frame_restoration_type[i];
+ lr->loop_restoration_size[i] = ctrl_lr->loop_restoration_size[i];
+ if (lr->frame_restoration_type[i]) {
+ lr->use_lr = 1;
+ if (i > 0)
+ lr->use_chroma_lr = 1;
+ }
+ }
+}
+
+static void vdec_av1_slice_setup_lf(struct vdec_av1_slice_loop_filter *lf,
+ struct v4l2_av1_loop_filter *ctrl_lf)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(lf->loop_filter_level); i++)
+ lf->loop_filter_level[i] = ctrl_lf->level[i];
+
+ for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++)
+ lf->loop_filter_ref_deltas[i] = ctrl_lf->ref_deltas[i];
+
+ for (i = 0; i < ARRAY_SIZE(lf->loop_filter_mode_deltas); i++)
+ lf->loop_filter_mode_deltas[i] = ctrl_lf->mode_deltas[i];
+
+ lf->loop_filter_sharpness = ctrl_lf->sharpness;
+ lf->loop_filter_delta_enabled =
+ BIT_FLAG(ctrl_lf, V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED);
+}
+
+static void vdec_av1_slice_setup_cdef(struct vdec_av1_slice_cdef *cdef,
+ struct v4l2_av1_cdef *ctrl_cdef)
+{
+ int i;
+
+ cdef->cdef_damping = ctrl_cdef->damping_minus_3 + 3;
+ cdef->cdef_bits = ctrl_cdef->bits;
+
+ for (i = 0; i < V4L2_AV1_CDEF_MAX; i++) {
+ if (ctrl_cdef->y_sec_strength[i] == 4)
+ ctrl_cdef->y_sec_strength[i] -= 1;
+
+ if (ctrl_cdef->uv_sec_strength[i] == 4)
+ ctrl_cdef->uv_sec_strength[i] -= 1;
+
+ cdef->cdef_y_strength[i] =
+ ctrl_cdef->y_pri_strength[i] << SECONDARY_FILTER_STRENGTH_NUM_BITS |
+ ctrl_cdef->y_sec_strength[i];
+ cdef->cdef_uv_strength[i] =
+ ctrl_cdef->uv_pri_strength[i] << SECONDARY_FILTER_STRENGTH_NUM_BITS |
+ ctrl_cdef->uv_sec_strength[i];
+ }
+}
+
+static void vdec_av1_slice_setup_seq(struct vdec_av1_slice_seq_header *seq,
+ struct v4l2_ctrl_av1_sequence *ctrl_seq)
+{
+ seq->bitdepth = ctrl_seq->bit_depth;
+ seq->max_frame_width = ctrl_seq->max_frame_width_minus_1 + 1;
+ seq->max_frame_height = ctrl_seq->max_frame_height_minus_1 + 1;
+ seq->enable_superres = SEQUENCE_FLAG(ctrl_seq, ENABLE_SUPERRES);
+ seq->enable_filter_intra = SEQUENCE_FLAG(ctrl_seq, ENABLE_FILTER_INTRA);
+ seq->enable_intra_edge_filter = SEQUENCE_FLAG(ctrl_seq, ENABLE_INTRA_EDGE_FILTER);
+ seq->enable_interintra_compound = SEQUENCE_FLAG(ctrl_seq, ENABLE_INTERINTRA_COMPOUND);
+ seq->enable_masked_compound = SEQUENCE_FLAG(ctrl_seq, ENABLE_MASKED_COMPOUND);
+ seq->enable_dual_filter = SEQUENCE_FLAG(ctrl_seq, ENABLE_DUAL_FILTER);
+ seq->enable_jnt_comp = SEQUENCE_FLAG(ctrl_seq, ENABLE_JNT_COMP);
+ seq->mono_chrome = SEQUENCE_FLAG(ctrl_seq, MONO_CHROME);
+ seq->enable_order_hint = SEQUENCE_FLAG(ctrl_seq, ENABLE_ORDER_HINT);
+ seq->order_hint_bits = ctrl_seq->order_hint_bits;
+ seq->use_128x128_superblock = SEQUENCE_FLAG(ctrl_seq, USE_128X128_SUPERBLOCK);
+ seq->subsampling_x = SEQUENCE_FLAG(ctrl_seq, SUBSAMPLING_X);
+ seq->subsampling_y = SEQUENCE_FLAG(ctrl_seq, SUBSAMPLING_Y);
+}
+
+static void vdec_av1_slice_setup_tile(struct vdec_av1_slice_frame *frame,
+ struct v4l2_av1_tile_info *ctrl_tile)
+{
+ struct vdec_av1_slice_seq_header *seq = &frame->seq;
+ struct vdec_av1_slice_tile *tile = &frame->uh.tile;
+ u32 mib_size_log2 = seq->use_128x128_superblock ? 5 : 4;
+ int i;
+
+ tile->tile_cols = ctrl_tile->tile_cols;
+ tile->tile_rows = ctrl_tile->tile_rows;
+ tile->context_update_tile_id = ctrl_tile->context_update_tile_id;
+ tile->uniform_tile_spacing_flag =
+ BIT_FLAG(ctrl_tile, V4L2_AV1_TILE_INFO_FLAG_UNIFORM_TILE_SPACING);
+
+ for (i = 0; i < tile->tile_cols + 1; i++)
+ tile->mi_col_starts[i] =
+ ALIGN(ctrl_tile->mi_col_starts[i], BIT(mib_size_log2)) >> mib_size_log2;
+
+ for (i = 0; i < tile->tile_rows + 1; i++)
+ tile->mi_row_starts[i] =
+ ALIGN(ctrl_tile->mi_row_starts[i], BIT(mib_size_log2)) >> mib_size_log2;
+}
+
+static void vdec_av1_slice_setup_uh(struct vdec_av1_slice_instance *instance,
+ struct vdec_av1_slice_frame *frame,
+ struct v4l2_ctrl_av1_frame *ctrl_fh)
+{
+ struct vdec_av1_slice_uncompressed_header *uh = &frame->uh;
+ int i;
+
+ uh->use_ref_frame_mvs = FH_FLAG(ctrl_fh, USE_REF_FRAME_MVS);
+ uh->order_hint = ctrl_fh->order_hint;
+ vdec_av1_slice_setup_gm(uh->gm, &ctrl_fh->global_motion);
+ uh->upscaled_width = ctrl_fh->upscaled_width;
+ uh->frame_width = ctrl_fh->frame_width_minus_1 + 1;
+ uh->frame_height = ctrl_fh->frame_height_minus_1 + 1;
+ uh->mi_cols = ((uh->frame_width + 7) >> 3) << 1;
+ uh->mi_rows = ((uh->frame_height + 7) >> 3) << 1;
+ uh->reduced_tx_set = FH_FLAG(ctrl_fh, REDUCED_TX_SET);
+ uh->tx_mode = ctrl_fh->tx_mode;
+ uh->uniform_tile_spacing_flag =
+ BIT_FLAG(&ctrl_fh->tile_info, V4L2_AV1_TILE_INFO_FLAG_UNIFORM_TILE_SPACING);
+ uh->interpolation_filter = ctrl_fh->interpolation_filter;
+ uh->allow_warped_motion = FH_FLAG(ctrl_fh, ALLOW_WARPED_MOTION);
+ uh->is_motion_mode_switchable = FH_FLAG(ctrl_fh, IS_MOTION_MODE_SWITCHABLE);
+ uh->frame_type = ctrl_fh->frame_type;
+ uh->frame_is_intra = (uh->frame_type == V4L2_AV1_INTRA_ONLY_FRAME ||
+ uh->frame_type == V4L2_AV1_KEY_FRAME);
+
+ if (!uh->frame_is_intra && FH_FLAG(ctrl_fh, REFERENCE_SELECT))
+ uh->reference_mode = AV1_REFERENCE_MODE_SELECT;
+ else
+ uh->reference_mode = AV1_SINGLE_REFERENCE;
+
+ uh->allow_high_precision_mv = FH_FLAG(ctrl_fh, ALLOW_HIGH_PRECISION_MV);
+ uh->allow_intra_bc = FH_FLAG(ctrl_fh, ALLOW_INTRABC);
+ uh->force_integer_mv = FH_FLAG(ctrl_fh, FORCE_INTEGER_MV);
+ uh->allow_screen_content_tools = FH_FLAG(ctrl_fh, ALLOW_SCREEN_CONTENT_TOOLS);
+ uh->error_resilient_mode = FH_FLAG(ctrl_fh, ERROR_RESILIENT_MODE);
+ uh->primary_ref_frame = ctrl_fh->primary_ref_frame;
+ uh->disable_frame_end_update_cdf =
+ FH_FLAG(ctrl_fh, DISABLE_FRAME_END_UPDATE_CDF);
+ uh->disable_cdf_update = FH_FLAG(ctrl_fh, DISABLE_CDF_UPDATE);
+ uh->skip_mode.skip_mode_present = FH_FLAG(ctrl_fh, SKIP_MODE_PRESENT);
+ uh->skip_mode.skip_mode_frame[0] =
+ ctrl_fh->skip_mode_frame[0] - V4L2_AV1_REF_LAST_FRAME;
+ uh->skip_mode.skip_mode_frame[1] =
+ ctrl_fh->skip_mode_frame[1] - V4L2_AV1_REF_LAST_FRAME;
+ uh->skip_mode.skip_mode_allowed = ctrl_fh->skip_mode_frame[0] ? 1 : 0;
+
+ vdec_av1_slice_setup_seg(&uh->seg, &ctrl_fh->segmentation);
+ uh->delta_q_lf.delta_q_present = QUANT_FLAG(&ctrl_fh->quantization, DELTA_Q_PRESENT);
+ uh->delta_q_lf.delta_q_res = 1 << ctrl_fh->quantization.delta_q_res;
+ uh->delta_q_lf.delta_lf_present =
+ BIT_FLAG(&ctrl_fh->loop_filter, V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT);
+ uh->delta_q_lf.delta_lf_res = ctrl_fh->loop_filter.delta_lf_res;
+ uh->delta_q_lf.delta_lf_multi =
+ BIT_FLAG(&ctrl_fh->loop_filter, V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI);
+ vdec_av1_slice_setup_quant(&uh->quant, &ctrl_fh->quantization);
+
+ uh->coded_loss_less = 1;
+ for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
+ uh->quant.qindex[i] = vdec_av1_slice_get_qindex(uh, i);
+ uh->loss_less_array[i] =
+ (uh->quant.qindex[i] == 0 && uh->quant.delta_qydc == 0 &&
+ uh->quant.delta_quac == 0 && uh->quant.delta_qudc == 0 &&
+ uh->quant.delta_qvac == 0 && uh->quant.delta_qvdc == 0);
+
+ if (!uh->loss_less_array[i])
+ uh->coded_loss_less = 0;
+ }
+
+ vdec_av1_slice_setup_lr(&uh->lr, &ctrl_fh->loop_restoration);
+ uh->superres_denom = ctrl_fh->superres_denom;
+ vdec_av1_slice_setup_lf(&uh->loop_filter, &ctrl_fh->loop_filter);
+ vdec_av1_slice_setup_cdef(&uh->cdef, &ctrl_fh->cdef);
+ vdec_av1_slice_setup_tile(frame, &ctrl_fh->tile_info);
+}
+
+static int vdec_av1_slice_setup_tile_group(struct vdec_av1_slice_instance *instance,
+ struct vdec_av1_slice_vsi *vsi)
+{
+ struct v4l2_ctrl_av1_tile_group_entry *ctrl_tge;
+ struct vdec_av1_slice_tile_group *tile_group = &instance->tile_group;
+ struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh;
+ struct vdec_av1_slice_tile *tile = &uh->tile;
+ struct v4l2_ctrl *ctrl;
+ u32 tge_size;
+ int i;
+
+ ctrl = v4l2_ctrl_find(&instance->ctx->ctrl_hdl, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY);
+ if (!ctrl)
+ return -EINVAL;
+
+ tge_size = ctrl->elems;
+ ctrl_tge = (struct v4l2_ctrl_av1_tile_group_entry *)ctrl->p_cur.p;
+
+ tile_group->num_tiles = tile->tile_cols * tile->tile_rows;
+
+ if (tile_group->num_tiles != tge_size ||
+ tile_group->num_tiles > V4L2_AV1_MAX_TILE_COUNT) {
+ mtk_vdec_err(instance->ctx, "invalid tge_size %d, tile_num:%d\n",
+ tge_size, tile_group->num_tiles);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < tge_size; i++) {
+ if (i != ctrl_tge[i].tile_row * vsi->frame.uh.tile.tile_cols +
+ ctrl_tge[i].tile_col) {
+ mtk_vdec_err(instance->ctx, "invalid tge info %d, %d %d %d\n",
+ i, ctrl_tge[i].tile_row, ctrl_tge[i].tile_col,
+ vsi->frame.uh.tile.tile_rows);
+ return -EINVAL;
+ }
+ tile_group->tile_size[i] = ctrl_tge[i].tile_size;
+ tile_group->tile_start_offset[i] = ctrl_tge[i].tile_offset;
+ }
+
+ return 0;
+}
+
+static inline void vdec_av1_slice_setup_state(struct vdec_av1_slice_vsi *vsi)
+{
+ memset(&vsi->state, 0, sizeof(vsi->state));
+}
+
+static void vdec_av1_slice_setup_scale_factors(struct vdec_av1_slice_frame_refs *frame_ref,
+ struct vdec_av1_slice_frame_info *ref_frame_info,
+ struct vdec_av1_slice_uncompressed_header *uh)
+{
+ struct vdec_av1_slice_scale_factors *scale_factors = &frame_ref->scale_factors;
+ u32 ref_upscaled_width = ref_frame_info->upscaled_width;
+ u32 ref_frame_height = ref_frame_info->frame_height;
+ u32 frame_width = uh->frame_width;
+ u32 frame_height = uh->frame_height;
+
+ if (!vdec_av1_slice_need_scale(ref_upscaled_width, ref_frame_height,
+ frame_width, frame_height)) {
+ scale_factors->x_scale = -1;
+ scale_factors->y_scale = -1;
+ scale_factors->is_scaled = 0;
+ return;
+ }
+
+ scale_factors->x_scale =
+ ((ref_upscaled_width << AV1_REF_SCALE_SHIFT) + (frame_width >> 1)) / frame_width;
+ scale_factors->y_scale =
+ ((ref_frame_height << AV1_REF_SCALE_SHIFT) + (frame_height >> 1)) / frame_height;
+ scale_factors->is_scaled =
+ (scale_factors->x_scale != AV1_REF_INVALID_SCALE) &&
+ (scale_factors->y_scale != AV1_REF_INVALID_SCALE) &&
+ (scale_factors->x_scale != AV1_REF_NO_SCALE ||
+ scale_factors->y_scale != AV1_REF_NO_SCALE);
+ scale_factors->x_step =
+ AV1_DIV_ROUND_UP_POW2(scale_factors->x_scale,
+ AV1_REF_SCALE_SHIFT - AV1_SCALE_SUBPEL_BITS);
+ scale_factors->y_step =
+ AV1_DIV_ROUND_UP_POW2(scale_factors->y_scale,
+ AV1_REF_SCALE_SHIFT - AV1_SCALE_SUBPEL_BITS);
+}
+
+static unsigned char vdec_av1_slice_get_sign_bias(int a,
+ int b,
+ u8 enable_order_hint,
+ u8 order_hint_bits)
+{
+ int diff = 0;
+ int m = 0;
+ unsigned char result = 0;
+
+ if (!enable_order_hint)
+ return 0;
+
+ diff = a - b;
+ m = 1 << (order_hint_bits - 1);
+ diff = (diff & (m - 1)) - (diff & m);
+
+ if (diff > 0)
+ result = 1;
+
+ return result;
+}
+
+static void vdec_av1_slice_setup_ref(struct vdec_av1_slice_pfc *pfc,
+ struct v4l2_ctrl_av1_frame *ctrl_fh)
+{
+ struct vdec_av1_slice_vsi *vsi = &pfc->vsi;
+ struct vdec_av1_slice_frame *frame = &vsi->frame;
+ struct vdec_av1_slice_slot *slots = &vsi->slots;
+ struct vdec_av1_slice_uncompressed_header *uh = &frame->uh;
+ struct vdec_av1_slice_seq_header *seq = &frame->seq;
+ struct vdec_av1_slice_frame_info *cur_frame_info =
+ &slots->frame_info[vsi->slot_id];
+ struct vdec_av1_slice_frame_info *frame_info;
+ int i, slot_id;
+
+ if (uh->frame_is_intra)
+ return;
+
+ for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
+ int ref_idx = ctrl_fh->ref_frame_idx[i];
+
+ pfc->ref_idx[i] = ctrl_fh->reference_frame_ts[ref_idx];
+ slot_id = frame->ref_frame_map[ref_idx];
+ frame_info = &slots->frame_info[slot_id];
+ if (slot_id == AV1_INVALID_IDX) {
+ pr_err(MTK_DBG_V4L2_STR "cannot match reference[%d] 0x%llx\n", i,
+ ctrl_fh->reference_frame_ts[ref_idx]);
+ frame->order_hints[i] = 0;
+ frame->ref_frame_valid[i] = 0;
+ continue;
+ }
+
+ frame->frame_refs[i].ref_fb_idx = slot_id;
+ vdec_av1_slice_setup_scale_factors(&frame->frame_refs[i],
+ frame_info, uh);
+ if (!seq->enable_order_hint)
+ frame->ref_frame_sign_bias[i + 1] = 0;
+ else
+ frame->ref_frame_sign_bias[i + 1] =
+ vdec_av1_slice_get_sign_bias(frame_info->order_hint,
+ uh->order_hint,
+ seq->enable_order_hint,
+ seq->order_hint_bits);
+
+ frame->order_hints[i] = ctrl_fh->order_hints[i + 1];
+ cur_frame_info->order_hints[i] = frame->order_hints[i];
+ frame->ref_frame_valid[i] = 1;
+ }
+}
+
+static void vdec_av1_slice_get_previous(struct vdec_av1_slice_vsi *vsi)
+{
+ struct vdec_av1_slice_frame *frame = &vsi->frame;
+
+ if (frame->uh.primary_ref_frame == AV1_PRIMARY_REF_NONE)
+ frame->prev_fb_idx = AV1_INVALID_IDX;
+ else
+ frame->prev_fb_idx = frame->frame_refs[frame->uh.primary_ref_frame].ref_fb_idx;
+}
+
+static inline void vdec_av1_slice_setup_operating_mode(struct vdec_av1_slice_instance *instance,
+ struct vdec_av1_slice_frame *frame)
+{
+ frame->large_scale_tile = 0;
+}
+
+static int vdec_av1_slice_setup_pfc(struct vdec_av1_slice_instance *instance,
+ struct vdec_av1_slice_pfc *pfc)
+{
+ struct v4l2_ctrl_av1_frame *ctrl_fh;
+ struct v4l2_ctrl_av1_sequence *ctrl_seq;
+ struct vdec_av1_slice_vsi *vsi = &pfc->vsi;
+ int ret = 0;
+
+ /* frame header */
+ ctrl_fh = (struct v4l2_ctrl_av1_frame *)
+ vdec_av1_get_ctrl_ptr(instance->ctx,
+ V4L2_CID_STATELESS_AV1_FRAME);
+ if (IS_ERR(ctrl_fh))
+ return PTR_ERR(ctrl_fh);
+
+ ctrl_seq = (struct v4l2_ctrl_av1_sequence *)
+ vdec_av1_get_ctrl_ptr(instance->ctx,
+ V4L2_CID_STATELESS_AV1_SEQUENCE);
+ if (IS_ERR(ctrl_seq))
+ return PTR_ERR(ctrl_seq);
+
+ /* setup vsi information */
+ vdec_av1_slice_setup_seq(&vsi->frame.seq, ctrl_seq);
+ vdec_av1_slice_setup_uh(instance, &vsi->frame, ctrl_fh);
+ vdec_av1_slice_setup_operating_mode(instance, &vsi->frame);
+
+ vdec_av1_slice_setup_state(vsi);
+ vdec_av1_slice_setup_slot(instance, vsi, ctrl_fh);
+ vdec_av1_slice_setup_ref(pfc, ctrl_fh);
+ vdec_av1_slice_get_previous(vsi);
+
+ pfc->seq = instance->seq;
+ instance->seq++;
+
+ return ret;
+}
+
+static void vdec_av1_slice_setup_lat_buffer(struct vdec_av1_slice_instance *instance,
+ struct vdec_av1_slice_vsi *vsi,
+ struct mtk_vcodec_mem *bs,
+ struct vdec_lat_buf *lat_buf)
+{
+ struct vdec_av1_slice_work_buffer *work_buffer;
+ int i;
+
+ vsi->bs.dma_addr = bs->dma_addr;
+ vsi->bs.size = bs->size;
+
+ vsi->ube.dma_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr;
+ vsi->ube.size = lat_buf->ctx->msg_queue.wdma_addr.size;
+ vsi->trans.dma_addr = lat_buf->ctx->msg_queue.wdma_wptr_addr;
+ /* used to store trans end */
+ vsi->trans.dma_addr_end = lat_buf->ctx->msg_queue.wdma_rptr_addr;
+ vsi->err_map.dma_addr = lat_buf->wdma_err_addr.dma_addr;
+ vsi->err_map.size = lat_buf->wdma_err_addr.size;
+ vsi->rd_mv.dma_addr = lat_buf->rd_mv_addr.dma_addr;
+ vsi->rd_mv.size = lat_buf->rd_mv_addr.size;
+
+ vsi->row_info.buf = 0;
+ vsi->row_info.size = 0;
+
+ work_buffer = vsi->work_buffer;
+
+ for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
+ work_buffer[i].mv_addr.buf = instance->mv[i].dma_addr;
+ work_buffer[i].mv_addr.size = instance->mv[i].size;
+ work_buffer[i].segid_addr.buf = instance->seg[i].dma_addr;
+ work_buffer[i].segid_addr.size = instance->seg[i].size;
+ work_buffer[i].cdf_addr.buf = instance->cdf[i].dma_addr;
+ work_buffer[i].cdf_addr.size = instance->cdf[i].size;
+ }
+
+ vsi->cdf_tmp.buf = instance->cdf_temp.dma_addr;
+ vsi->cdf_tmp.size = instance->cdf_temp.size;
+
+ vsi->tile.buf = instance->tile.dma_addr;
+ vsi->tile.size = instance->tile.size;
+ memcpy(lat_buf->tile_addr.va, instance->tile.va, 64 * instance->tile_group.num_tiles);
+
+ vsi->cdf_table.buf = instance->cdf_table.dma_addr;
+ vsi->cdf_table.size = instance->cdf_table.size;
+ vsi->iq_table.buf = instance->iq_table.dma_addr;
+ vsi->iq_table.size = instance->iq_table.size;
+}
+
+static void vdec_av1_slice_setup_seg_buffer(struct vdec_av1_slice_instance *instance,
+ struct vdec_av1_slice_vsi *vsi)
+{
+ struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh;
+ struct mtk_vcodec_mem *buf;
+
+ /* reset segment buffer */
+ if (uh->primary_ref_frame == AV1_PRIMARY_REF_NONE || !uh->seg.segmentation_enabled) {
+ mtk_vdec_debug(instance->ctx, "reset seg %d\n", vsi->slot_id);
+ if (vsi->slot_id != AV1_INVALID_IDX) {
+ buf = &instance->seg[vsi->slot_id];
+ memset(buf->va, 0, buf->size);
+ }
+ }
+}
+
+static void vdec_av1_slice_setup_tile_buffer(struct vdec_av1_slice_instance *instance,
+ struct vdec_av1_slice_vsi *vsi,
+ struct mtk_vcodec_mem *bs)
+{
+ struct vdec_av1_slice_tile_group *tile_group = &instance->tile_group;
+ struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh;
+ struct vdec_av1_slice_tile *tile = &uh->tile;
+ u32 tile_num, tile_row, tile_col;
+ u32 allow_update_cdf = 0;
+ u32 sb_boundary_x_m1 = 0, sb_boundary_y_m1 = 0;
+ int tile_info_base;
+ u64 tile_buf_pa;
+ u32 *tile_info_buf = instance->tile.va;
+ u64 pa = (u64)bs->dma_addr;
+
+ if (uh->disable_cdf_update == 0)
+ allow_update_cdf = 1;
+
+ for (tile_num = 0; tile_num < tile_group->num_tiles; tile_num++) {
+ /* each uint32 takes place of 4 bytes */
+ tile_info_base = (AV1_TILE_BUF_SIZE * tile_num) >> 2;
+ tile_row = tile_num / tile->tile_cols;
+ tile_col = tile_num % tile->tile_cols;
+ tile_info_buf[tile_info_base + 0] = (tile_group->tile_size[tile_num] << 3);
+ tile_buf_pa = pa + tile_group->tile_start_offset[tile_num];
+
+ /* save av1 tile high 4bits(bit 32-35) address in lower 4 bits position
+ * and clear original for hw requirement.
+ */
+ tile_info_buf[tile_info_base + 1] = (tile_buf_pa & 0xFFFFFFF0ull) |
+ ((tile_buf_pa & 0xF00000000ull) >> 32);
+ tile_info_buf[tile_info_base + 2] = (tile_buf_pa & 0xFull) << 3;
+
+ sb_boundary_x_m1 =
+ (tile->mi_col_starts[tile_col + 1] - tile->mi_col_starts[tile_col] - 1) &
+ 0x3f;
+ sb_boundary_y_m1 =
+ (tile->mi_row_starts[tile_row + 1] - tile->mi_row_starts[tile_row] - 1) &
+ 0x1ff;
+
+ tile_info_buf[tile_info_base + 3] = (sb_boundary_y_m1 << 7) | sb_boundary_x_m1;
+ tile_info_buf[tile_info_base + 4] = ((allow_update_cdf << 18) | (1 << 16));
+
+ if (tile_num == tile->context_update_tile_id &&
+ uh->disable_frame_end_update_cdf == 0)
+ tile_info_buf[tile_info_base + 4] |= (1 << 17);
+
+ mtk_vdec_debug(instance->ctx, "// tile buf %d pos(%dx%d) offset 0x%x\n",
+ tile_num, tile_row, tile_col, tile_info_base);
+ mtk_vdec_debug(instance->ctx, "// %08x %08x %08x %08x\n",
+ tile_info_buf[tile_info_base + 0],
+ tile_info_buf[tile_info_base + 1],
+ tile_info_buf[tile_info_base + 2],
+ tile_info_buf[tile_info_base + 3]);
+ mtk_vdec_debug(instance->ctx, "// %08x %08x %08x %08x\n",
+ tile_info_buf[tile_info_base + 4],
+ tile_info_buf[tile_info_base + 5],
+ tile_info_buf[tile_info_base + 6],
+ tile_info_buf[tile_info_base + 7]);
+ }
+}
+
+static int vdec_av1_slice_setup_lat(struct vdec_av1_slice_instance *instance,
+ struct mtk_vcodec_mem *bs,
+ struct vdec_lat_buf *lat_buf,
+ struct vdec_av1_slice_pfc *pfc)
+{
+ struct vdec_av1_slice_vsi *vsi = &pfc->vsi;
+ int ret;
+
+ ret = vdec_av1_slice_setup_lat_from_src_buf(instance, vsi, lat_buf);
+ if (ret)
+ return ret;
+
+ ret = vdec_av1_slice_setup_pfc(instance, pfc);
+ if (ret)
+ return ret;
+
+ ret = vdec_av1_slice_setup_tile_group(instance, vsi);
+ if (ret)
+ return ret;
+
+ ret = vdec_av1_slice_alloc_working_buffer(instance, vsi);
+ if (ret)
+ return ret;
+
+ vdec_av1_slice_setup_seg_buffer(instance, vsi);
+ vdec_av1_slice_setup_tile_buffer(instance, vsi, bs);
+ vdec_av1_slice_setup_lat_buffer(instance, vsi, bs, lat_buf);
+
+ return 0;
+}
+
+static int vdec_av1_slice_update_lat(struct vdec_av1_slice_instance *instance,
+ struct vdec_lat_buf *lat_buf,
+ struct vdec_av1_slice_pfc *pfc)
+{
+ struct vdec_av1_slice_vsi *vsi;
+
+ vsi = &pfc->vsi;
+ mtk_vdec_debug(instance->ctx, "frame %u LAT CRC 0x%08x, output size is %d\n",
+ pfc->seq, vsi->state.crc[0], vsi->state.out_size);
+
+ /* buffer full, need to re-decode */
+ if (vsi->state.full) {
+ /* buffer not enough */
+ if (vsi->trans.dma_addr_end - vsi->trans.dma_addr == vsi->ube.size)
+ return -ENOMEM;
+ return -EAGAIN;
+ }
+
+ instance->width = vsi->frame.uh.upscaled_width;
+ instance->height = vsi->frame.uh.frame_height;
+ instance->frame_type = vsi->frame.uh.frame_type;
+
+ return 0;
+}
+
+static int vdec_av1_slice_setup_core_to_dst_buf(struct vdec_av1_slice_instance *instance,
+ struct vdec_lat_buf *lat_buf)
+{
+ struct vb2_v4l2_buffer *dst;
+
+ dst = v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx);
+ if (!dst)
+ return -EINVAL;
+
+ v4l2_m2m_buf_copy_metadata(&lat_buf->ts_info, dst, true);
+
+ return 0;
+}
+
+static int vdec_av1_slice_setup_core_buffer(struct vdec_av1_slice_instance *instance,
+ struct vdec_av1_slice_pfc *pfc,
+ struct vdec_av1_slice_vsi *vsi,
+ struct vdec_fb *fb,
+ struct vdec_lat_buf *lat_buf)
+{
+ struct vb2_buffer *vb;
+ struct vb2_queue *vq;
+ int w, h, plane, size;
+ int i;
+
+ plane = instance->ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes;
+ w = vsi->frame.uh.upscaled_width;
+ h = vsi->frame.uh.frame_height;
+ size = ALIGN(w, VCODEC_DEC_ALIGNED_64) * ALIGN(h, VCODEC_DEC_ALIGNED_64);
+
+ /* frame buffer */
+ vsi->fb.y.dma_addr = fb->base_y.dma_addr;
+ if (plane == 1)
+ vsi->fb.c.dma_addr = fb->base_y.dma_addr + size;
+ else
+ vsi->fb.c.dma_addr = fb->base_c.dma_addr;
+
+ /* reference buffers */
+ vq = v4l2_m2m_get_vq(instance->ctx->m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
+ if (!vq)
+ return -EINVAL;
+
+ /* get current output buffer */
+ vb = &v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx)->vb2_buf;
+ if (!vb)
+ return -EINVAL;
+
+ /* get buffer address from vb2buf */
+ for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
+ struct vdec_av1_slice_fb *vref = &vsi->ref[i];
+
+ vb = vb2_find_buffer(vq, pfc->ref_idx[i]);
+ if (!vb) {
+ memset(vref, 0, sizeof(*vref));
+ continue;
+ }
+
+ vref->y.dma_addr = vb2_dma_contig_plane_dma_addr(vb, 0);
+ if (plane == 1)
+ vref->c.dma_addr = vref->y.dma_addr + size;
+ else
+ vref->c.dma_addr = vb2_dma_contig_plane_dma_addr(vb, 1);
+ }
+ vsi->tile.dma_addr = lat_buf->tile_addr.dma_addr;
+ vsi->tile.size = lat_buf->tile_addr.size;
+
+ return 0;
+}
+
+static int vdec_av1_slice_setup_core(struct vdec_av1_slice_instance *instance,
+ struct vdec_fb *fb,
+ struct vdec_lat_buf *lat_buf,
+ struct vdec_av1_slice_pfc *pfc)
+{
+ struct vdec_av1_slice_vsi *vsi = &pfc->vsi;
+ int ret;
+
+ ret = vdec_av1_slice_setup_core_to_dst_buf(instance, lat_buf);
+ if (ret)
+ return ret;
+
+ ret = vdec_av1_slice_setup_core_buffer(instance, pfc, vsi, fb, lat_buf);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int vdec_av1_slice_update_core(struct vdec_av1_slice_instance *instance,
+ struct vdec_lat_buf *lat_buf,
+ struct vdec_av1_slice_pfc *pfc)
+{
+ struct vdec_av1_slice_vsi *vsi = instance->core_vsi;
+
+ mtk_vdec_debug(instance->ctx, "frame %u Y_CRC %08x %08x %08x %08x\n",
+ pfc->seq, vsi->state.crc[0], vsi->state.crc[1],
+ vsi->state.crc[2], vsi->state.crc[3]);
+ mtk_vdec_debug(instance->ctx, "frame %u C_CRC %08x %08x %08x %08x\n",
+ pfc->seq, vsi->state.crc[8], vsi->state.crc[9],
+ vsi->state.crc[10], vsi->state.crc[11]);
+
+ return 0;
+}
+
+static int vdec_av1_slice_init(struct mtk_vcodec_dec_ctx *ctx)
+{
+ struct vdec_av1_slice_instance *instance;
+ struct vdec_av1_slice_init_vsi *vsi;
+ int ret;
+
+ instance = kzalloc(sizeof(*instance), GFP_KERNEL);
+ if (!instance)
+ return -ENOMEM;
+
+ instance->ctx = ctx;
+ instance->vpu.id = SCP_IPI_VDEC_LAT;
+ instance->vpu.core_id = SCP_IPI_VDEC_CORE;
+ instance->vpu.ctx = ctx;
+ instance->vpu.codec_type = ctx->current_codec;
+
+ ret = vpu_dec_init(&instance->vpu);
+ if (ret) {
+ mtk_vdec_err(ctx, "failed to init vpu dec, ret %d\n", ret);
+ goto error_vpu_init;
+ }
+
+ /* init vsi and global flags */
+ vsi = instance->vpu.vsi;
+ if (!vsi) {
+ mtk_vdec_err(ctx, "failed to get AV1 vsi\n");
+ ret = -EINVAL;
+ goto error_vsi;
+ }
+ instance->init_vsi = vsi;
+ instance->core_vsi = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler, (u32)vsi->core_vsi);
+
+ if (!instance->core_vsi) {
+ mtk_vdec_err(ctx, "failed to get AV1 core vsi\n");
+ ret = -EINVAL;
+ goto error_vsi;
+ }
+
+ if (vsi->vsi_size != sizeof(struct vdec_av1_slice_vsi))
+ mtk_vdec_err(ctx, "remote vsi size 0x%x mismatch! expected: 0x%zx\n",
+ vsi->vsi_size, sizeof(struct vdec_av1_slice_vsi));
+
+ instance->irq_enabled = 1;
+ instance->inneracing_mode = IS_VDEC_INNER_RACING(instance->ctx->dev->dec_capability);
+
+ mtk_vdec_debug(ctx, "vsi 0x%p core_vsi 0x%llx 0x%p, inneracing_mode %d\n",
+ vsi, vsi->core_vsi, instance->core_vsi, instance->inneracing_mode);
+
+ ret = vdec_av1_slice_init_cdf_table(instance);
+ if (ret)
+ goto error_vsi;
+
+ ret = vdec_av1_slice_init_iq_table(instance);
+ if (ret)
+ goto error_vsi;
+
+ ctx->drv_handle = instance;
+
+ return 0;
+error_vsi:
+ vpu_dec_deinit(&instance->vpu);
+error_vpu_init:
+ kfree(instance);
+
+ return ret;
+}
+
+static void vdec_av1_slice_deinit(void *h_vdec)
+{
+ struct vdec_av1_slice_instance *instance = h_vdec;
+
+ if (!instance)
+ return;
+ mtk_vdec_debug(instance->ctx, "h_vdec 0x%p\n", h_vdec);
+ vpu_dec_deinit(&instance->vpu);
+ vdec_av1_slice_free_working_buffer(instance);
+ vdec_msg_queue_deinit(&instance->ctx->msg_queue, instance->ctx);
+ kfree(instance);
+}
+
+static int vdec_av1_slice_flush(void *h_vdec, struct mtk_vcodec_mem *bs,
+ struct vdec_fb *fb, bool *res_chg)
+{
+ struct vdec_av1_slice_instance *instance = h_vdec;
+ int i;
+
+ mtk_vdec_debug(instance->ctx, "flush ...\n");
+
+ vdec_msg_queue_wait_lat_buf_full(&instance->ctx->msg_queue);
+
+ for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++)
+ vdec_av1_slice_clear_fb(&instance->slots.frame_info[i]);
+
+ return vpu_dec_reset(&instance->vpu);
+}
+
+static void vdec_av1_slice_get_pic_info(struct vdec_av1_slice_instance *instance)
+{
+ struct mtk_vcodec_dec_ctx *ctx = instance->ctx;
+ u32 data[3];
+
+ mtk_vdec_debug(ctx, "w %u h %u\n", ctx->picinfo.pic_w, ctx->picinfo.pic_h);
+
+ data[0] = ctx->picinfo.pic_w;
+ data[1] = ctx->picinfo.pic_h;
+ data[2] = ctx->capture_fourcc;
+ vpu_dec_get_param(&instance->vpu, data, 3, GET_PARAM_PIC_INFO);
+
+ ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, VCODEC_DEC_ALIGNED_64);
+ ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, VCODEC_DEC_ALIGNED_64);
+ ctx->picinfo.fb_sz[0] = instance->vpu.fb_sz[0];
+ ctx->picinfo.fb_sz[1] = instance->vpu.fb_sz[1];
+}
+
+static inline void vdec_av1_slice_get_dpb_size(struct vdec_av1_slice_instance *instance,
+ u32 *dpb_sz)
+{
+ /* refer av1 specification */
+ *dpb_sz = V4L2_AV1_TOTAL_REFS_PER_FRAME + 1;
+}
+
+static void vdec_av1_slice_get_crop_info(struct vdec_av1_slice_instance *instance,
+ struct v4l2_rect *cr)
+{
+ struct mtk_vcodec_dec_ctx *ctx = instance->ctx;
+
+ cr->left = 0;
+ cr->top = 0;
+ cr->width = ctx->picinfo.pic_w;
+ cr->height = ctx->picinfo.pic_h;
+
+ mtk_vdec_debug(ctx, "l=%d, t=%d, w=%d, h=%d\n",
+ cr->left, cr->top, cr->width, cr->height);
+}
+
+static int vdec_av1_slice_get_param(void *h_vdec, enum vdec_get_param_type type, void *out)
+{
+ struct vdec_av1_slice_instance *instance = h_vdec;
+
+ switch (type) {
+ case GET_PARAM_PIC_INFO:
+ vdec_av1_slice_get_pic_info(instance);
+ break;
+ case GET_PARAM_DPB_SIZE:
+ vdec_av1_slice_get_dpb_size(instance, out);
+ break;
+ case GET_PARAM_CROP_INFO:
+ vdec_av1_slice_get_crop_info(instance, out);
+ break;
+ default:
+ mtk_vdec_err(instance->ctx, "invalid get parameter type=%d\n", type);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int vdec_av1_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
+ struct vdec_fb *fb, bool *res_chg)
+{
+ struct vdec_av1_slice_instance *instance = h_vdec;
+ struct vdec_lat_buf *lat_buf;
+ struct vdec_av1_slice_pfc *pfc;
+ struct vdec_av1_slice_vsi *vsi;
+ struct mtk_vcodec_dec_ctx *ctx;
+ int ret;
+
+ if (!instance || !instance->ctx)
+ return -EINVAL;
+
+ ctx = instance->ctx;
+ /* init msgQ for the first time */
+ if (vdec_msg_queue_init(&ctx->msg_queue, ctx,
+ vdec_av1_slice_core_decode, sizeof(*pfc))) {
+ mtk_vdec_err(ctx, "failed to init AV1 msg queue\n");
+ return -ENOMEM;
+ }
+
+ /* bs NULL means flush decoder */
+ if (!bs)
+ return vdec_av1_slice_flush(h_vdec, bs, fb, res_chg);
+
+ lat_buf = vdec_msg_queue_dqbuf(&ctx->msg_queue.lat_ctx);
+ if (!lat_buf) {
+ mtk_vdec_err(ctx, "failed to get AV1 lat buf\n");
+ return -EAGAIN;
+ }
+ pfc = (struct vdec_av1_slice_pfc *)lat_buf->private_data;
+ if (!pfc) {
+ ret = -EINVAL;
+ goto err_free_fb_out;
+ }
+ vsi = &pfc->vsi;
+
+ ret = vdec_av1_slice_setup_lat(instance, bs, lat_buf, pfc);
+ if (ret) {
+ mtk_vdec_err(ctx, "failed to setup AV1 lat ret %d\n", ret);
+ goto err_free_fb_out;
+ }
+
+ vdec_av1_slice_vsi_to_remote(vsi, instance->vsi);
+ ret = vpu_dec_start(&instance->vpu, NULL, 0);
+ if (ret) {
+ mtk_vdec_err(ctx, "failed to dec AV1 ret %d\n", ret);
+ goto err_free_fb_out;
+ }
+ if (instance->inneracing_mode)
+ vdec_msg_queue_qbuf(&ctx->msg_queue.core_ctx, lat_buf);
+
+ if (instance->irq_enabled) {
+ ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED,
+ WAIT_INTR_TIMEOUT_MS,
+ MTK_VDEC_LAT0);
+ /* update remote vsi if decode timeout */
+ if (ret) {
+ mtk_vdec_err(ctx, "AV1 Frame %d decode timeout %d\n", pfc->seq, ret);
+ WRITE_ONCE(instance->vsi->state.timeout, 1);
+ }
+ vpu_dec_end(&instance->vpu);
+ }
+
+ vdec_av1_slice_vsi_from_remote(vsi, instance->vsi);
+ ret = vdec_av1_slice_update_lat(instance, lat_buf, pfc);
+
+ /* LAT trans full, re-decode */
+ if (ret == -EAGAIN) {
+ mtk_vdec_err(ctx, "AV1 Frame %d trans full\n", pfc->seq);
+ if (!instance->inneracing_mode)
+ vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf);
+ return 0;
+ }
+
+ /* LAT trans full, no more UBE or decode timeout */
+ if (ret == -ENOMEM || vsi->state.timeout) {
+ mtk_vdec_err(ctx, "AV1 Frame %d insufficient buffer or timeout\n", pfc->seq);
+ if (!instance->inneracing_mode)
+ vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf);
+ return -EBUSY;
+ }
+ vsi->trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr;
+ mtk_vdec_debug(ctx, "lat dma 1 0x%pad 0x%pad\n",
+ &pfc->vsi.trans.dma_addr, &pfc->vsi.trans.dma_addr_end);
+
+ vdec_msg_queue_update_ube_wptr(&ctx->msg_queue, vsi->trans.dma_addr_end);
+
+ if (!instance->inneracing_mode)
+ vdec_msg_queue_qbuf(&ctx->msg_queue.core_ctx, lat_buf);
+ memcpy(&instance->slots, &vsi->slots, sizeof(instance->slots));
+
+ return 0;
+
+err_free_fb_out:
+ vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf);
+
+ if (pfc)
+ mtk_vdec_err(ctx, "slice dec number: %d err: %d", pfc->seq, ret);
+
+ return ret;
+}
+
+static int vdec_av1_slice_core_decode(struct vdec_lat_buf *lat_buf)
+{
+ struct vdec_av1_slice_instance *instance;
+ struct vdec_av1_slice_pfc *pfc;
+ struct mtk_vcodec_dec_ctx *ctx = NULL;
+ struct vdec_fb *fb = NULL;
+ int ret = -EINVAL;
+
+ if (!lat_buf)
+ return -EINVAL;
+
+ pfc = lat_buf->private_data;
+ ctx = lat_buf->ctx;
+ if (!pfc || !ctx)
+ return -EINVAL;
+
+ instance = ctx->drv_handle;
+ if (!instance)
+ goto err;
+
+ fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx);
+ if (!fb) {
+ ret = -EBUSY;
+ goto err;
+ }
+
+ ret = vdec_av1_slice_setup_core(instance, fb, lat_buf, pfc);
+ if (ret) {
+ mtk_vdec_err(ctx, "vdec_av1_slice_setup_core\n");
+ goto err;
+ }
+ vdec_av1_slice_vsi_to_remote(&pfc->vsi, instance->core_vsi);
+ ret = vpu_dec_core(&instance->vpu);
+ if (ret) {
+ mtk_vdec_err(ctx, "vpu_dec_core\n");
+ goto err;
+ }
+
+ if (instance->irq_enabled) {
+ ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED,
+ WAIT_INTR_TIMEOUT_MS,
+ MTK_VDEC_CORE);
+ /* update remote vsi if decode timeout */
+ if (ret) {
+ mtk_vdec_err(ctx, "AV1 frame %d core timeout\n", pfc->seq);
+ WRITE_ONCE(instance->vsi->state.timeout, 1);
+ }
+ vpu_dec_core_end(&instance->vpu);
+ }
+
+ ret = vdec_av1_slice_update_core(instance, lat_buf, pfc);
+ if (ret) {
+ mtk_vdec_err(ctx, "vdec_av1_slice_update_core\n");
+ goto err;
+ }
+
+ mtk_vdec_debug(ctx, "core dma_addr_end 0x%pad\n",
+ &instance->core_vsi->trans.dma_addr_end);
+ vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, instance->core_vsi->trans.dma_addr_end);
+
+ ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf->src_buf_req);
+
+ return 0;
+
+err:
+ /* always update read pointer */
+ vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end);
+
+ if (fb)
+ ctx->dev->vdec_pdata->cap_to_disp(ctx, 1, lat_buf->src_buf_req);
+
+ return ret;
+}
+
+const struct vdec_common_if vdec_av1_slice_lat_if = {
+ .init = vdec_av1_slice_init,
+ .decode = vdec_av1_slice_lat_decode,
+ .get_param = vdec_av1_slice_get_param,
+ .deinit = vdec_av1_slice_deinit,
+};
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_if.c
new file mode 100644
index 000000000..bf7dffe60
--- /dev/null
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_if.c
@@ -0,0 +1,494 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2016 MediaTek Inc.
+ * Author: PC Chen <pc.chen@mediatek.com>
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include "../vdec_drv_if.h"
+#include "../mtk_vcodec_dec.h"
+#include "../../common/mtk_vcodec_intr.h"
+#include "../vdec_vpu_if.h"
+#include "../vdec_drv_base.h"
+
+#define NAL_NON_IDR_SLICE 0x01
+#define NAL_IDR_SLICE 0x05
+#define NAL_H264_PPS 0x08
+#define NAL_TYPE(value) ((value) & 0x1F)
+
+#define BUF_PREDICTION_SZ (32 * 1024)
+
+#define MB_UNIT_LEN 16
+
+/* motion vector size (bytes) for every macro block */
+#define HW_MB_STORE_SZ 64
+
+#define H264_MAX_FB_NUM 17
+#define HDR_PARSING_BUF_SZ 1024
+
+#define DEC_ERR_RET(ret) ((ret) >> 16)
+#define H264_ERR_NOT_VALID 3
+
+/**
+ * struct h264_fb - h264 decode frame buffer information
+ * @vdec_fb_va : virtual address of struct vdec_fb
+ * @y_fb_dma : dma address of Y frame buffer (luma)
+ * @c_fb_dma : dma address of C frame buffer (chroma)
+ * @poc : picture order count of frame buffer
+ * @reserved : for 8 bytes alignment
+ */
+struct h264_fb {
+ uint64_t vdec_fb_va;
+ uint64_t y_fb_dma;
+ uint64_t c_fb_dma;
+ int32_t poc;
+ uint32_t reserved;
+};
+
+/**
+ * struct h264_ring_fb_list - ring frame buffer list
+ * @fb_list : frame buffer array
+ * @read_idx : read index
+ * @write_idx : write index
+ * @count : buffer count in list
+ * @reserved : for 8 bytes alignment
+ */
+struct h264_ring_fb_list {
+ struct h264_fb fb_list[H264_MAX_FB_NUM];
+ unsigned int read_idx;
+ unsigned int write_idx;
+ unsigned int count;
+ unsigned int reserved;
+};
+
+/**
+ * struct vdec_h264_dec_info - decode information
+ * @dpb_sz : decoding picture buffer size
+ * @resolution_changed : resolution change happen
+ * @realloc_mv_buf : flag to notify driver to re-allocate mv buffer
+ * @reserved : for 8 bytes alignment
+ * @bs_dma : Input bit-stream buffer dma address
+ * @y_fb_dma : Y frame buffer dma address
+ * @c_fb_dma : C frame buffer dma address
+ * @vdec_fb_va : VDEC frame buffer struct virtual address
+ */
+struct vdec_h264_dec_info {
+ uint32_t dpb_sz;
+ uint32_t resolution_changed;
+ uint32_t realloc_mv_buf;
+ uint32_t reserved;
+ uint64_t bs_dma;
+ uint64_t y_fb_dma;
+ uint64_t c_fb_dma;
+ uint64_t vdec_fb_va;
+};
+
+/**
+ * struct vdec_h264_vsi - shared memory for decode information exchange
+ * between VPU and Host.
+ * The memory is allocated by VPU then mapping to Host
+ * in vpu_dec_init() and freed in vpu_dec_deinit()
+ * by VPU.
+ * AP-W/R : AP is writer/reader on this item
+ * VPU-W/R: VPU is write/reader on this item
+ * @hdr_buf : Header parsing buffer (AP-W, VPU-R)
+ * @pred_buf_dma : HW working predication buffer dma address (AP-W, VPU-R)
+ * @mv_buf_dma : HW working motion vector buffer dma address (AP-W, VPU-R)
+ * @list_free : free frame buffer ring list (AP-W/R, VPU-W)
+ * @list_disp : display frame buffer ring list (AP-R, VPU-W)
+ * @dec : decode information (AP-R, VPU-W)
+ * @pic : picture information (AP-R, VPU-W)
+ * @crop : crop information (AP-R, VPU-W)
+ */
+struct vdec_h264_vsi {
+ unsigned char hdr_buf[HDR_PARSING_BUF_SZ];
+ uint64_t pred_buf_dma;
+ uint64_t mv_buf_dma[H264_MAX_FB_NUM];
+ struct h264_ring_fb_list list_free;
+ struct h264_ring_fb_list list_disp;
+ struct vdec_h264_dec_info dec;
+ struct vdec_pic_info pic;
+ struct v4l2_rect crop;
+};
+
+/**
+ * struct vdec_h264_inst - h264 decoder instance
+ * @num_nalu : how many nalus be decoded
+ * @ctx : point to mtk_vcodec_dec_ctx
+ * @pred_buf : HW working predication buffer
+ * @mv_buf : HW working motion vector buffer
+ * @vpu : VPU instance
+ * @vsi : VPU shared information
+ */
+struct vdec_h264_inst {
+ unsigned int num_nalu;
+ struct mtk_vcodec_dec_ctx *ctx;
+ struct mtk_vcodec_mem pred_buf;
+ struct mtk_vcodec_mem mv_buf[H264_MAX_FB_NUM];
+ struct vdec_vpu_inst vpu;
+ struct vdec_h264_vsi *vsi;
+};
+
+static unsigned int get_mv_buf_size(unsigned int width, unsigned int height)
+{
+ return HW_MB_STORE_SZ * (width/MB_UNIT_LEN) * (height/MB_UNIT_LEN);
+}
+
+static int allocate_predication_buf(struct vdec_h264_inst *inst)
+{
+ int err = 0;
+
+ inst->pred_buf.size = BUF_PREDICTION_SZ;
+ err = mtk_vcodec_mem_alloc(inst->ctx, &inst->pred_buf);
+ if (err) {
+ mtk_vdec_err(inst->ctx, "failed to allocate ppl buf");
+ return err;
+ }
+
+ inst->vsi->pred_buf_dma = inst->pred_buf.dma_addr;
+ return 0;
+}
+
+static void free_predication_buf(struct vdec_h264_inst *inst)
+{
+ struct mtk_vcodec_mem *mem = NULL;
+
+ inst->vsi->pred_buf_dma = 0;
+ mem = &inst->pred_buf;
+ if (mem->va)
+ mtk_vcodec_mem_free(inst->ctx, mem);
+}
+
+static int alloc_mv_buf(struct vdec_h264_inst *inst, struct vdec_pic_info *pic)
+{
+ int i;
+ int err;
+ struct mtk_vcodec_mem *mem = NULL;
+ unsigned int buf_sz = get_mv_buf_size(pic->buf_w, pic->buf_h);
+
+ for (i = 0; i < H264_MAX_FB_NUM; i++) {
+ mem = &inst->mv_buf[i];
+ if (mem->va)
+ mtk_vcodec_mem_free(inst->ctx, mem);
+ mem->size = buf_sz;
+ err = mtk_vcodec_mem_alloc(inst->ctx, mem);
+ if (err) {
+ mtk_vdec_err(inst->ctx, "failed to allocate mv buf");
+ return err;
+ }
+ inst->vsi->mv_buf_dma[i] = mem->dma_addr;
+ }
+
+ return 0;
+}
+
+static void free_mv_buf(struct vdec_h264_inst *inst)
+{
+ int i;
+ struct mtk_vcodec_mem *mem = NULL;
+
+ for (i = 0; i < H264_MAX_FB_NUM; i++) {
+ inst->vsi->mv_buf_dma[i] = 0;
+ mem = &inst->mv_buf[i];
+ if (mem->va)
+ mtk_vcodec_mem_free(inst->ctx, mem);
+ }
+}
+
+static int check_list_validity(struct vdec_h264_inst *inst, bool disp_list)
+{
+ struct h264_ring_fb_list *list;
+
+ list = disp_list ? &inst->vsi->list_disp : &inst->vsi->list_free;
+
+ if (list->count > H264_MAX_FB_NUM ||
+ list->read_idx >= H264_MAX_FB_NUM ||
+ list->write_idx >= H264_MAX_FB_NUM) {
+ mtk_vdec_err(inst->ctx, "%s list err: cnt=%d r_idx=%d w_idx=%d",
+ disp_list ? "disp" : "free", list->count,
+ list->read_idx, list->write_idx);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void put_fb_to_free(struct vdec_h264_inst *inst, struct vdec_fb *fb)
+{
+ struct h264_ring_fb_list *list;
+
+ if (fb) {
+ if (check_list_validity(inst, false))
+ return;
+
+ list = &inst->vsi->list_free;
+ if (list->count == H264_MAX_FB_NUM) {
+ mtk_vdec_err(inst->ctx, "[FB] put fb free_list full");
+ return;
+ }
+
+ mtk_vdec_debug(inst->ctx, "[FB] put fb into free_list @(%p, %llx)",
+ fb->base_y.va, (u64)fb->base_y.dma_addr);
+
+ list->fb_list[list->write_idx].vdec_fb_va = (u64)(uintptr_t)fb;
+ list->write_idx = (list->write_idx == H264_MAX_FB_NUM - 1) ?
+ 0 : list->write_idx + 1;
+ list->count++;
+ }
+}
+
+static void get_pic_info(struct vdec_h264_inst *inst,
+ struct vdec_pic_info *pic)
+{
+ *pic = inst->vsi->pic;
+ mtk_vdec_debug(inst->ctx, "pic(%d, %d), buf(%d, %d)",
+ pic->pic_w, pic->pic_h, pic->buf_w, pic->buf_h);
+ mtk_vdec_debug(inst->ctx, "fb size: Y(%d), C(%d)", pic->fb_sz[0], pic->fb_sz[1]);
+}
+
+static void get_crop_info(struct vdec_h264_inst *inst, struct v4l2_rect *cr)
+{
+ cr->left = inst->vsi->crop.left;
+ cr->top = inst->vsi->crop.top;
+ cr->width = inst->vsi->crop.width;
+ cr->height = inst->vsi->crop.height;
+
+ mtk_vdec_debug(inst->ctx, "l=%d, t=%d, w=%d, h=%d", cr->left, cr->top,
+ cr->width, cr->height);
+}
+
+static void get_dpb_size(struct vdec_h264_inst *inst, unsigned int *dpb_sz)
+{
+ *dpb_sz = inst->vsi->dec.dpb_sz;
+ mtk_vdec_debug(inst->ctx, "sz=%d", *dpb_sz);
+}
+
+static int vdec_h264_init(struct mtk_vcodec_dec_ctx *ctx)
+{
+ struct vdec_h264_inst *inst = NULL;
+ int err;
+
+ inst = kzalloc(sizeof(*inst), GFP_KERNEL);
+ if (!inst)
+ return -ENOMEM;
+
+ inst->ctx = ctx;
+
+ inst->vpu.id = IPI_VDEC_H264;
+ inst->vpu.ctx = ctx;
+
+ err = vpu_dec_init(&inst->vpu);
+ if (err) {
+ mtk_vdec_err(ctx, "vdec_h264 init err=%d", err);
+ goto error_free_inst;
+ }
+
+ inst->vsi = (struct vdec_h264_vsi *)inst->vpu.vsi;
+ err = allocate_predication_buf(inst);
+ if (err)
+ goto error_deinit;
+
+ mtk_vdec_debug(ctx, "H264 Instance >> %p", inst);
+
+ ctx->drv_handle = inst;
+ return 0;
+
+error_deinit:
+ vpu_dec_deinit(&inst->vpu);
+
+error_free_inst:
+ kfree(inst);
+ return err;
+}
+
+static void vdec_h264_deinit(void *h_vdec)
+{
+ struct vdec_h264_inst *inst = (struct vdec_h264_inst *)h_vdec;
+
+ vpu_dec_deinit(&inst->vpu);
+ free_predication_buf(inst);
+ free_mv_buf(inst);
+
+ kfree(inst);
+}
+
+static int find_start_code(unsigned char *data, unsigned int data_sz)
+{
+ if (data_sz > 3 && data[0] == 0 && data[1] == 0 && data[2] == 1)
+ return 3;
+
+ if (data_sz > 4 && data[0] == 0 && data[1] == 0 && data[2] == 0 &&
+ data[3] == 1)
+ return 4;
+
+ return -1;
+}
+
+static int vdec_h264_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
+ struct vdec_fb *fb, bool *res_chg)
+{
+ struct vdec_h264_inst *inst = (struct vdec_h264_inst *)h_vdec;
+ struct vdec_vpu_inst *vpu = &inst->vpu;
+ int nal_start_idx = 0;
+ int err = 0;
+ unsigned int nal_start;
+ unsigned int nal_type;
+ unsigned char *buf;
+ unsigned int buf_sz;
+ unsigned int data[2];
+ uint64_t vdec_fb_va = (u64)(uintptr_t)fb;
+ uint64_t y_fb_dma = fb ? (u64)fb->base_y.dma_addr : 0;
+ uint64_t c_fb_dma = fb ? (u64)fb->base_c.dma_addr : 0;
+
+ mtk_vdec_debug(inst->ctx, "+ [%d] FB y_dma=%llx c_dma=%llx va=%p",
+ ++inst->num_nalu, y_fb_dma, c_fb_dma, fb);
+
+ /* bs NULL means flush decoder */
+ if (bs == NULL)
+ return vpu_dec_reset(vpu);
+
+ buf = (unsigned char *)bs->va;
+ buf_sz = bs->size;
+ nal_start_idx = find_start_code(buf, buf_sz);
+ if (nal_start_idx < 0) {
+ mtk_vdec_err(inst->ctx, "invalid nal start code");
+ err = -EIO;
+ goto err_free_fb_out;
+ }
+
+ nal_start = buf[nal_start_idx];
+ nal_type = NAL_TYPE(buf[nal_start_idx]);
+ mtk_vdec_debug(inst->ctx, "\n + NALU[%d] type %d +\n", inst->num_nalu,
+ nal_type);
+
+ if (nal_type == NAL_H264_PPS) {
+ buf_sz -= nal_start_idx;
+ if (buf_sz > HDR_PARSING_BUF_SZ) {
+ err = -EILSEQ;
+ goto err_free_fb_out;
+ }
+ memcpy(inst->vsi->hdr_buf, buf + nal_start_idx, buf_sz);
+ }
+
+ inst->vsi->dec.bs_dma = (uint64_t)bs->dma_addr;
+ inst->vsi->dec.y_fb_dma = y_fb_dma;
+ inst->vsi->dec.c_fb_dma = c_fb_dma;
+ inst->vsi->dec.vdec_fb_va = vdec_fb_va;
+
+ data[0] = buf_sz;
+ data[1] = nal_start;
+ err = vpu_dec_start(vpu, data, 2);
+ if (err) {
+ if (err > 0 && (DEC_ERR_RET(err) == H264_ERR_NOT_VALID)) {
+ mtk_vdec_err(inst->ctx, "- error bitstream - err = %d -", err);
+ err = -EIO;
+ }
+ goto err_free_fb_out;
+ }
+
+ *res_chg = inst->vsi->dec.resolution_changed;
+ if (*res_chg) {
+ struct vdec_pic_info pic;
+
+ mtk_vdec_debug(inst->ctx, "- resolution changed -");
+ get_pic_info(inst, &pic);
+
+ if (inst->vsi->dec.realloc_mv_buf) {
+ err = alloc_mv_buf(inst, &pic);
+ if (err)
+ goto err_free_fb_out;
+ }
+ }
+
+ if (nal_type == NAL_NON_IDR_SLICE || nal_type == NAL_IDR_SLICE) {
+ /* wait decoder done interrupt */
+ err = mtk_vcodec_wait_for_done_ctx(inst->ctx,
+ MTK_INST_IRQ_RECEIVED,
+ WAIT_INTR_TIMEOUT_MS, 0);
+ if (err)
+ goto err_free_fb_out;
+
+ vpu_dec_end(vpu);
+ }
+
+ mtk_vdec_debug(inst->ctx, "\n - NALU[%d] type=%d -\n", inst->num_nalu, nal_type);
+ return 0;
+
+err_free_fb_out:
+ put_fb_to_free(inst, fb);
+ mtk_vdec_err(inst->ctx, "\n - NALU[%d] err=%d -\n", inst->num_nalu, err);
+ return err;
+}
+
+static void vdec_h264_get_fb(struct vdec_h264_inst *inst,
+ struct h264_ring_fb_list *list,
+ bool disp_list, struct vdec_fb **out_fb)
+{
+ struct vdec_fb *fb;
+
+ if (check_list_validity(inst, disp_list))
+ return;
+
+ if (list->count == 0) {
+ mtk_vdec_debug(inst->ctx, "[FB] there is no %s fb", disp_list ? "disp" : "free");
+ *out_fb = NULL;
+ return;
+ }
+
+ fb = (struct vdec_fb *)
+ (uintptr_t)list->fb_list[list->read_idx].vdec_fb_va;
+ fb->status |= (disp_list ? FB_ST_DISPLAY : FB_ST_FREE);
+
+ *out_fb = fb;
+ mtk_vdec_debug(inst->ctx, "[FB] get %s fb st=%d poc=%d %llx",
+ disp_list ? "disp" : "free",
+ fb->status, list->fb_list[list->read_idx].poc,
+ list->fb_list[list->read_idx].vdec_fb_va);
+
+ list->read_idx = (list->read_idx == H264_MAX_FB_NUM - 1) ?
+ 0 : list->read_idx + 1;
+ list->count--;
+}
+
+static int vdec_h264_get_param(void *h_vdec, enum vdec_get_param_type type,
+ void *out)
+{
+ struct vdec_h264_inst *inst = (struct vdec_h264_inst *)h_vdec;
+
+ switch (type) {
+ case GET_PARAM_DISP_FRAME_BUFFER:
+ vdec_h264_get_fb(inst, &inst->vsi->list_disp, true, out);
+ break;
+
+ case GET_PARAM_FREE_FRAME_BUFFER:
+ vdec_h264_get_fb(inst, &inst->vsi->list_free, false, out);
+ break;
+
+ case GET_PARAM_PIC_INFO:
+ get_pic_info(inst, out);
+ break;
+
+ case GET_PARAM_DPB_SIZE:
+ get_dpb_size(inst, out);
+ break;
+
+ case GET_PARAM_CROP_INFO:
+ get_crop_info(inst, out);
+ break;
+
+ default:
+ mtk_vdec_err(inst->ctx, "invalid get parameter type=%d", type);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+const struct vdec_common_if vdec_h264_if = {
+ .init = vdec_h264_init,
+ .decode = vdec_h264_decode,
+ .get_param = vdec_h264_get_param,
+ .deinit = vdec_h264_deinit,
+};
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_common.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_common.c
new file mode 100644
index 000000000..5ca20d75d
--- /dev/null
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_common.c
@@ -0,0 +1,322 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022 MediaTek Inc.
+ * Author: Yunfei Dong <yunfei.dong@mediatek.com>
+ */
+
+#include "vdec_h264_req_common.h"
+
+/* get used parameters for sps/pps */
+#define GET_MTK_VDEC_FLAG(cond, flag) \
+ { dst_param->cond = ((src_param->flags & flag) ? (1) : (0)); }
+#define GET_MTK_VDEC_PARAM(param) \
+ { dst_param->param = src_param->param; }
+
+void mtk_vdec_h264_get_ref_list(u8 *ref_list,
+ const struct v4l2_h264_reference *v4l2_ref_list,
+ int num_valid)
+{
+ u32 i;
+
+ /*
+ * TODO The firmware does not support field decoding. Future
+ * implementation must use v4l2_ref_list[i].fields to obtain
+ * the reference field parity.
+ */
+
+ for (i = 0; i < num_valid; i++)
+ ref_list[i] = v4l2_ref_list[i].index;
+
+ /*
+ * The firmware expects unused reflist entries to have the value 0x20.
+ */
+ memset(&ref_list[num_valid], 0x20, 32 - num_valid);
+}
+
+void *mtk_vdec_h264_get_ctrl_ptr(struct mtk_vcodec_dec_ctx *ctx, int id)
+{
+ struct v4l2_ctrl *ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, id);
+
+ if (!ctrl)
+ return ERR_PTR(-EINVAL);
+
+ return ctrl->p_cur.p;
+}
+
+void mtk_vdec_h264_fill_dpb_info(struct mtk_vcodec_dec_ctx *ctx,
+ struct slice_api_h264_decode_param *decode_params,
+ struct mtk_h264_dpb_info *h264_dpb_info)
+{
+ const struct slice_h264_dpb_entry *dpb;
+ struct vb2_queue *vq;
+ struct vb2_buffer *vb;
+ struct vb2_v4l2_buffer *vb2_v4l2;
+ int index;
+
+ vq = v4l2_m2m_get_vq(ctx->m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
+
+ for (index = 0; index < V4L2_H264_NUM_DPB_ENTRIES; index++) {
+ dpb = &decode_params->dpb[index];
+ if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) {
+ h264_dpb_info[index].reference_flag = 0;
+ continue;
+ }
+
+ vb = vb2_find_buffer(vq, dpb->reference_ts);
+ if (!vb) {
+ dev_err(&ctx->dev->plat_dev->dev,
+ "Reference invalid: dpb_index(%d) reference_ts(%lld)",
+ index, dpb->reference_ts);
+ continue;
+ }
+
+ /* 1 for short term reference, 2 for long term reference */
+ if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM))
+ h264_dpb_info[index].reference_flag = 1;
+ else
+ h264_dpb_info[index].reference_flag = 2;
+
+ vb2_v4l2 = container_of(vb, struct vb2_v4l2_buffer, vb2_buf);
+ h264_dpb_info[index].field = vb2_v4l2->field;
+
+ h264_dpb_info[index].y_dma_addr =
+ vb2_dma_contig_plane_dma_addr(vb, 0);
+ if (ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 2)
+ h264_dpb_info[index].c_dma_addr =
+ vb2_dma_contig_plane_dma_addr(vb, 1);
+ else
+ h264_dpb_info[index].c_dma_addr =
+ h264_dpb_info[index].y_dma_addr +
+ ctx->picinfo.fb_sz[0];
+ }
+}
+
+void mtk_vdec_h264_copy_sps_params(struct mtk_h264_sps_param *dst_param,
+ const struct v4l2_ctrl_h264_sps *src_param)
+{
+ GET_MTK_VDEC_PARAM(chroma_format_idc);
+ GET_MTK_VDEC_PARAM(bit_depth_luma_minus8);
+ GET_MTK_VDEC_PARAM(bit_depth_chroma_minus8);
+ GET_MTK_VDEC_PARAM(log2_max_frame_num_minus4);
+ GET_MTK_VDEC_PARAM(pic_order_cnt_type);
+ GET_MTK_VDEC_PARAM(log2_max_pic_order_cnt_lsb_minus4);
+ GET_MTK_VDEC_PARAM(max_num_ref_frames);
+ GET_MTK_VDEC_PARAM(pic_width_in_mbs_minus1);
+ GET_MTK_VDEC_PARAM(pic_height_in_map_units_minus1);
+
+ GET_MTK_VDEC_FLAG(separate_colour_plane_flag,
+ V4L2_H264_SPS_FLAG_SEPARATE_COLOUR_PLANE);
+ GET_MTK_VDEC_FLAG(qpprime_y_zero_transform_bypass_flag,
+ V4L2_H264_SPS_FLAG_QPPRIME_Y_ZERO_TRANSFORM_BYPASS);
+ GET_MTK_VDEC_FLAG(delta_pic_order_always_zero_flag,
+ V4L2_H264_SPS_FLAG_DELTA_PIC_ORDER_ALWAYS_ZERO);
+ GET_MTK_VDEC_FLAG(frame_mbs_only_flag,
+ V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY);
+ GET_MTK_VDEC_FLAG(mb_adaptive_frame_field_flag,
+ V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD);
+ GET_MTK_VDEC_FLAG(direct_8x8_inference_flag,
+ V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE);
+}
+
+void mtk_vdec_h264_copy_pps_params(struct mtk_h264_pps_param *dst_param,
+ const struct v4l2_ctrl_h264_pps *src_param)
+{
+ GET_MTK_VDEC_PARAM(num_ref_idx_l0_default_active_minus1);
+ GET_MTK_VDEC_PARAM(num_ref_idx_l1_default_active_minus1);
+ GET_MTK_VDEC_PARAM(weighted_bipred_idc);
+ GET_MTK_VDEC_PARAM(pic_init_qp_minus26);
+ GET_MTK_VDEC_PARAM(chroma_qp_index_offset);
+ GET_MTK_VDEC_PARAM(second_chroma_qp_index_offset);
+
+ GET_MTK_VDEC_FLAG(entropy_coding_mode_flag,
+ V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE);
+ GET_MTK_VDEC_FLAG(pic_order_present_flag,
+ V4L2_H264_PPS_FLAG_BOTTOM_FIELD_PIC_ORDER_IN_FRAME_PRESENT);
+ GET_MTK_VDEC_FLAG(weighted_pred_flag,
+ V4L2_H264_PPS_FLAG_WEIGHTED_PRED);
+ GET_MTK_VDEC_FLAG(deblocking_filter_control_present_flag,
+ V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT);
+ GET_MTK_VDEC_FLAG(constrained_intra_pred_flag,
+ V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED);
+ GET_MTK_VDEC_FLAG(redundant_pic_cnt_present_flag,
+ V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT);
+ GET_MTK_VDEC_FLAG(transform_8x8_mode_flag,
+ V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE);
+ GET_MTK_VDEC_FLAG(scaling_matrix_present_flag,
+ V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT);
+}
+
+void mtk_vdec_h264_copy_slice_hd_params(struct mtk_h264_slice_hd_param *dst_param,
+ const struct v4l2_ctrl_h264_slice_params *src_param,
+ const struct v4l2_ctrl_h264_decode_params *dec_param)
+{
+ int temp;
+
+ GET_MTK_VDEC_PARAM(first_mb_in_slice);
+ GET_MTK_VDEC_PARAM(slice_type);
+ GET_MTK_VDEC_PARAM(cabac_init_idc);
+ GET_MTK_VDEC_PARAM(slice_qp_delta);
+ GET_MTK_VDEC_PARAM(disable_deblocking_filter_idc);
+ GET_MTK_VDEC_PARAM(slice_alpha_c0_offset_div2);
+ GET_MTK_VDEC_PARAM(slice_beta_offset_div2);
+ GET_MTK_VDEC_PARAM(num_ref_idx_l0_active_minus1);
+ GET_MTK_VDEC_PARAM(num_ref_idx_l1_active_minus1);
+
+ dst_param->frame_num = dec_param->frame_num;
+ dst_param->pic_order_cnt_lsb = dec_param->pic_order_cnt_lsb;
+
+ dst_param->delta_pic_order_cnt_bottom =
+ dec_param->delta_pic_order_cnt_bottom;
+ dst_param->delta_pic_order_cnt0 =
+ dec_param->delta_pic_order_cnt0;
+ dst_param->delta_pic_order_cnt1 =
+ dec_param->delta_pic_order_cnt1;
+
+ temp = dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC;
+ dst_param->field_pic_flag = temp ? 1 : 0;
+
+ temp = dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD;
+ dst_param->bottom_field_flag = temp ? 1 : 0;
+
+ GET_MTK_VDEC_FLAG(direct_spatial_mv_pred_flag,
+ V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED);
+}
+
+void mtk_vdec_h264_copy_scaling_matrix(struct slice_api_h264_scaling_matrix *dst_matrix,
+ const struct v4l2_ctrl_h264_scaling_matrix *src_matrix)
+{
+ memcpy(dst_matrix->scaling_list_4x4, src_matrix->scaling_list_4x4,
+ sizeof(dst_matrix->scaling_list_4x4));
+
+ memcpy(dst_matrix->scaling_list_8x8, src_matrix->scaling_list_8x8,
+ sizeof(dst_matrix->scaling_list_8x8));
+}
+
+void
+mtk_vdec_h264_copy_decode_params(struct slice_api_h264_decode_param *dst_params,
+ const struct v4l2_ctrl_h264_decode_params *src_params,
+ const struct v4l2_h264_dpb_entry dpb[V4L2_H264_NUM_DPB_ENTRIES])
+{
+ struct slice_h264_dpb_entry *dst_entry;
+ const struct v4l2_h264_dpb_entry *src_entry;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(dst_params->dpb); i++) {
+ dst_entry = &dst_params->dpb[i];
+ src_entry = &dpb[i];
+
+ dst_entry->reference_ts = src_entry->reference_ts;
+ dst_entry->frame_num = src_entry->frame_num;
+ dst_entry->pic_num = src_entry->pic_num;
+ dst_entry->top_field_order_cnt = src_entry->top_field_order_cnt;
+ dst_entry->bottom_field_order_cnt =
+ src_entry->bottom_field_order_cnt;
+ dst_entry->flags = src_entry->flags;
+ }
+
+ /* num_slices is a leftover from the old H.264 support and is ignored
+ * by the firmware.
+ */
+ dst_params->num_slices = 0;
+ dst_params->nal_ref_idc = src_params->nal_ref_idc;
+ dst_params->top_field_order_cnt = src_params->top_field_order_cnt;
+ dst_params->bottom_field_order_cnt = src_params->bottom_field_order_cnt;
+ dst_params->flags = src_params->flags;
+}
+
+static bool mtk_vdec_h264_dpb_entry_match(const struct v4l2_h264_dpb_entry *a,
+ const struct v4l2_h264_dpb_entry *b)
+{
+ return a->top_field_order_cnt == b->top_field_order_cnt &&
+ a->bottom_field_order_cnt == b->bottom_field_order_cnt;
+}
+
+/*
+ * Move DPB entries of dec_param that refer to a frame already existing in dpb
+ * into the already existing slot in dpb, and move other entries into new slots.
+ *
+ * This function is an adaptation of the similarly-named function in
+ * hantro_h264.c.
+ */
+void mtk_vdec_h264_update_dpb(const struct v4l2_ctrl_h264_decode_params *dec_param,
+ struct v4l2_h264_dpb_entry *dpb)
+{
+ DECLARE_BITMAP(new, ARRAY_SIZE(dec_param->dpb)) = { 0, };
+ DECLARE_BITMAP(in_use, ARRAY_SIZE(dec_param->dpb)) = { 0, };
+ DECLARE_BITMAP(used, ARRAY_SIZE(dec_param->dpb)) = { 0, };
+ unsigned int i, j;
+
+ /* Disable all entries by default, and mark the ones in use. */
+ for (i = 0; i < ARRAY_SIZE(dec_param->dpb); i++) {
+ if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)
+ set_bit(i, in_use);
+ dpb[i].flags &= ~V4L2_H264_DPB_ENTRY_FLAG_ACTIVE;
+ }
+
+ /* Try to match new DPB entries with existing ones by their POCs. */
+ for (i = 0; i < ARRAY_SIZE(dec_param->dpb); i++) {
+ const struct v4l2_h264_dpb_entry *ndpb = &dec_param->dpb[i];
+
+ if (!(ndpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
+ continue;
+
+ /*
+ * To cut off some comparisons, iterate only on target DPB
+ * entries were already used.
+ */
+ for_each_set_bit(j, in_use, ARRAY_SIZE(dec_param->dpb)) {
+ struct v4l2_h264_dpb_entry *cdpb;
+
+ cdpb = &dpb[j];
+ if (!mtk_vdec_h264_dpb_entry_match(cdpb, ndpb))
+ continue;
+
+ *cdpb = *ndpb;
+ set_bit(j, used);
+ /* Don't reiterate on this one. */
+ clear_bit(j, in_use);
+ break;
+ }
+
+ if (j == ARRAY_SIZE(dec_param->dpb))
+ set_bit(i, new);
+ }
+
+ /* For entries that could not be matched, use remaining free slots. */
+ for_each_set_bit(i, new, ARRAY_SIZE(dec_param->dpb)) {
+ const struct v4l2_h264_dpb_entry *ndpb = &dec_param->dpb[i];
+ struct v4l2_h264_dpb_entry *cdpb;
+
+ /*
+ * Both arrays are of the same sizes, so there is no way
+ * we can end up with no space in target array, unless
+ * something is buggy.
+ */
+ j = find_first_zero_bit(used, ARRAY_SIZE(dec_param->dpb));
+ if (WARN_ON(j >= ARRAY_SIZE(dec_param->dpb)))
+ return;
+
+ cdpb = &dpb[j];
+ *cdpb = *ndpb;
+ set_bit(j, used);
+ }
+}
+
+unsigned int mtk_vdec_h264_get_mv_buf_size(unsigned int width, unsigned int height)
+{
+ int unit_size = (width / MB_UNIT_LEN) * (height / MB_UNIT_LEN) + 8;
+
+ return HW_MB_STORE_SZ * unit_size;
+}
+
+int mtk_vdec_h264_find_start_code(unsigned char *data, unsigned int data_sz)
+{
+ if (data_sz > 3 && data[0] == 0 && data[1] == 0 && data[2] == 1)
+ return 3;
+
+ if (data_sz > 4 && data[0] == 0 && data[1] == 0 && data[2] == 0 &&
+ data[3] == 1)
+ return 4;
+
+ return -1;
+}
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_common.h b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_common.h
new file mode 100644
index 000000000..ac82be336
--- /dev/null
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_common.h
@@ -0,0 +1,277 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2022 MediaTek Inc.
+ * Author: Yunfei Dong <yunfei.dong@mediatek.com>
+ */
+
+#ifndef _VDEC_H264_REQ_COMMON_H_
+#define _VDEC_H264_REQ_COMMON_H_
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <media/v4l2-h264.h>
+#include <media/v4l2-mem2mem.h>
+#include <media/videobuf2-dma-contig.h>
+
+#include "../mtk_vcodec_dec_drv.h"
+
+#define NAL_NON_IDR_SLICE 0x01
+#define NAL_IDR_SLICE 0x05
+#define NAL_TYPE(value) ((value) & 0x1F)
+
+#define BUF_PREDICTION_SZ (64 * 4096)
+#define MB_UNIT_LEN 16
+
+/* motion vector size (bytes) for every macro block */
+#define HW_MB_STORE_SZ 64
+
+#define H264_MAX_MV_NUM 32
+
+/**
+ * struct mtk_h264_dpb_info - h264 dpb information
+ *
+ * @y_dma_addr: Y bitstream physical address
+ * @c_dma_addr: CbCr bitstream physical address
+ * @reference_flag: reference picture flag (short/long term reference picture)
+ * @field: field picture flag
+ */
+struct mtk_h264_dpb_info {
+ dma_addr_t y_dma_addr;
+ dma_addr_t c_dma_addr;
+ int reference_flag;
+ int field;
+};
+
+/*
+ * struct mtk_h264_sps_param - parameters for sps
+ */
+struct mtk_h264_sps_param {
+ unsigned char chroma_format_idc;
+ unsigned char bit_depth_luma_minus8;
+ unsigned char bit_depth_chroma_minus8;
+ unsigned char log2_max_frame_num_minus4;
+ unsigned char pic_order_cnt_type;
+ unsigned char log2_max_pic_order_cnt_lsb_minus4;
+ unsigned char max_num_ref_frames;
+ unsigned char separate_colour_plane_flag;
+ unsigned short pic_width_in_mbs_minus1;
+ unsigned short pic_height_in_map_units_minus1;
+ unsigned int max_frame_nums;
+ unsigned char qpprime_y_zero_transform_bypass_flag;
+ unsigned char delta_pic_order_always_zero_flag;
+ unsigned char frame_mbs_only_flag;
+ unsigned char mb_adaptive_frame_field_flag;
+ unsigned char direct_8x8_inference_flag;
+ unsigned char reserved[3];
+};
+
+/*
+ * struct mtk_h264_pps_param - parameters for pps
+ */
+struct mtk_h264_pps_param {
+ unsigned char num_ref_idx_l0_default_active_minus1;
+ unsigned char num_ref_idx_l1_default_active_minus1;
+ unsigned char weighted_bipred_idc;
+ char pic_init_qp_minus26;
+ char chroma_qp_index_offset;
+ char second_chroma_qp_index_offset;
+ unsigned char entropy_coding_mode_flag;
+ unsigned char pic_order_present_flag;
+ unsigned char deblocking_filter_control_present_flag;
+ unsigned char constrained_intra_pred_flag;
+ unsigned char weighted_pred_flag;
+ unsigned char redundant_pic_cnt_present_flag;
+ unsigned char transform_8x8_mode_flag;
+ unsigned char scaling_matrix_present_flag;
+ unsigned char reserved[2];
+};
+
+/*
+ * struct mtk_h264_slice_hd_param - parameters for slice header
+ */
+struct mtk_h264_slice_hd_param {
+ unsigned int first_mb_in_slice;
+ unsigned int field_pic_flag;
+ unsigned int slice_type;
+ unsigned int frame_num;
+ int pic_order_cnt_lsb;
+ int delta_pic_order_cnt_bottom;
+ unsigned int bottom_field_flag;
+ unsigned int direct_spatial_mv_pred_flag;
+ int delta_pic_order_cnt0;
+ int delta_pic_order_cnt1;
+ unsigned int cabac_init_idc;
+ int slice_qp_delta;
+ unsigned int disable_deblocking_filter_idc;
+ int slice_alpha_c0_offset_div2;
+ int slice_beta_offset_div2;
+ unsigned int num_ref_idx_l0_active_minus1;
+ unsigned int num_ref_idx_l1_active_minus1;
+ unsigned int reserved;
+};
+
+/*
+ * struct slice_api_h264_scaling_matrix - parameters for scaling list
+ */
+struct slice_api_h264_scaling_matrix {
+ unsigned char scaling_list_4x4[6][16];
+ unsigned char scaling_list_8x8[6][64];
+};
+
+/*
+ * struct slice_h264_dpb_entry - each dpb information
+ */
+struct slice_h264_dpb_entry {
+ unsigned long long reference_ts;
+ unsigned short frame_num;
+ unsigned short pic_num;
+ /* Note that field is indicated by v4l2_buffer.field */
+ int top_field_order_cnt;
+ int bottom_field_order_cnt;
+ unsigned int flags;
+};
+
+/*
+ * struct slice_api_h264_decode_param - parameters for decode.
+ */
+struct slice_api_h264_decode_param {
+ struct slice_h264_dpb_entry dpb[V4L2_H264_NUM_DPB_ENTRIES];
+ unsigned short num_slices;
+ unsigned short nal_ref_idc;
+ unsigned char ref_pic_list_p0[32];
+ unsigned char ref_pic_list_b0[32];
+ unsigned char ref_pic_list_b1[32];
+ int top_field_order_cnt;
+ int bottom_field_order_cnt;
+ unsigned int flags;
+};
+
+/**
+ * struct h264_fb - h264 decode frame buffer information
+ *
+ * @vdec_fb_va: virtual address of struct vdec_fb
+ * @y_fb_dma: dma address of Y frame buffer (luma)
+ * @c_fb_dma: dma address of C frame buffer (chroma)
+ * @poc: picture order count of frame buffer
+ * @reserved: for 8 bytes alignment
+ */
+struct h264_fb {
+ u64 vdec_fb_va;
+ u64 y_fb_dma;
+ u64 c_fb_dma;
+ s32 poc;
+ u32 reserved;
+};
+
+/**
+ * mtk_vdec_h264_get_ref_list - translate V4L2 reference list
+ *
+ * @ref_list: Mediatek reference picture list
+ * @v4l2_ref_list: V4L2 reference picture list
+ * @num_valid: used reference number
+ */
+void mtk_vdec_h264_get_ref_list(u8 *ref_list,
+ const struct v4l2_h264_reference *v4l2_ref_list,
+ int num_valid);
+
+/**
+ * mtk_vdec_h264_get_ctrl_ptr - get each CID contrl address.
+ *
+ * @ctx: v4l2 ctx
+ * @id: CID control ID
+ *
+ * Return: returns CID ctrl address.
+ */
+void *mtk_vdec_h264_get_ctrl_ptr(struct mtk_vcodec_dec_ctx *ctx, int id);
+
+/**
+ * mtk_vdec_h264_fill_dpb_info - get each CID contrl address.
+ *
+ * @ctx: v4l2 ctx
+ * @decode_params: slice decode params
+ * @h264_dpb_info: dpb buffer information
+ */
+void mtk_vdec_h264_fill_dpb_info(struct mtk_vcodec_dec_ctx *ctx,
+ struct slice_api_h264_decode_param *decode_params,
+ struct mtk_h264_dpb_info *h264_dpb_info);
+
+/**
+ * mtk_vdec_h264_copy_sps_params - get sps params.
+ *
+ * @dst_param: sps params for hw decoder
+ * @src_param: sps params from user driver
+ */
+void mtk_vdec_h264_copy_sps_params(struct mtk_h264_sps_param *dst_param,
+ const struct v4l2_ctrl_h264_sps *src_param);
+
+/**
+ * mtk_vdec_h264_copy_pps_params - get pps params.
+ *
+ * @dst_param: pps params for hw decoder
+ * @src_param: pps params from user driver
+ */
+void mtk_vdec_h264_copy_pps_params(struct mtk_h264_pps_param *dst_param,
+ const struct v4l2_ctrl_h264_pps *src_param);
+
+/**
+ * mtk_vdec_h264_copy_slice_hd_params - get slice header params.
+ *
+ * @dst_param: slice params for hw decoder
+ * @src_param: slice params from user driver
+ * @dec_param: decode params from user driver
+ */
+void mtk_vdec_h264_copy_slice_hd_params(struct mtk_h264_slice_hd_param *dst_param,
+ const struct v4l2_ctrl_h264_slice_params *src_param,
+ const struct v4l2_ctrl_h264_decode_params *dec_param);
+
+/**
+ * mtk_vdec_h264_copy_scaling_matrix - get each CID contrl address.
+ *
+ * @dst_matrix: scaling list params for hw decoder
+ * @src_matrix: scaling list params from user driver
+ */
+void mtk_vdec_h264_copy_scaling_matrix(struct slice_api_h264_scaling_matrix *dst_matrix,
+ const struct v4l2_ctrl_h264_scaling_matrix *src_matrix);
+
+/**
+ * mtk_vdec_h264_copy_decode_params - get decode params.
+ *
+ * @dst_params: dst params for hw decoder
+ * @src_params: decode params from user driver
+ * @dpb: dpb information
+ */
+void
+mtk_vdec_h264_copy_decode_params(struct slice_api_h264_decode_param *dst_params,
+ const struct v4l2_ctrl_h264_decode_params *src_params,
+ const struct v4l2_h264_dpb_entry dpb[V4L2_H264_NUM_DPB_ENTRIES]);
+
+/**
+ * mtk_vdec_h264_update_dpb - updata dpb list.
+ *
+ * @dec_param: v4l2 control decode params
+ * @dpb: dpb entry informaton
+ */
+void mtk_vdec_h264_update_dpb(const struct v4l2_ctrl_h264_decode_params *dec_param,
+ struct v4l2_h264_dpb_entry *dpb);
+
+/**
+ * mtk_vdec_h264_find_start_code - find h264 start code using sofeware.
+ *
+ * @data: input buffer address
+ * @data_sz: input buffer size
+ *
+ * Return: returns start code position.
+ */
+int mtk_vdec_h264_find_start_code(unsigned char *data, unsigned int data_sz);
+
+/**
+ * mtk_vdec_h264_get_mv_buf_size - get mv buffer size.
+ *
+ * @width: picture width
+ * @height: picture height
+ *
+ * Return: returns mv buffer size.
+ */
+unsigned int mtk_vdec_h264_get_mv_buf_size(unsigned int width, unsigned int height);
+
+#endif
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_if.c
new file mode 100644
index 000000000..5600f1df6
--- /dev/null
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_if.c
@@ -0,0 +1,444 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <media/v4l2-mem2mem.h>
+#include <media/v4l2-h264.h>
+#include <media/videobuf2-dma-contig.h>
+
+#include "../mtk_vcodec_dec.h"
+#include "../../common/mtk_vcodec_intr.h"
+#include "../vdec_drv_base.h"
+#include "../vdec_drv_if.h"
+#include "../vdec_vpu_if.h"
+#include "vdec_h264_req_common.h"
+
+/*
+ * struct mtk_h264_dec_slice_param - parameters for decode current frame
+ */
+struct mtk_h264_dec_slice_param {
+ struct mtk_h264_sps_param sps;
+ struct mtk_h264_pps_param pps;
+ struct slice_api_h264_scaling_matrix scaling_matrix;
+ struct slice_api_h264_decode_param decode_params;
+ struct mtk_h264_dpb_info h264_dpb_info[16];
+};
+
+/**
+ * struct vdec_h264_dec_info - decode information
+ * @dpb_sz : decoding picture buffer size
+ * @resolution_changed : resoltion change happen
+ * @realloc_mv_buf : flag to notify driver to re-allocate mv buffer
+ * @cap_num_planes : number planes of capture buffer
+ * @bs_dma : Input bit-stream buffer dma address
+ * @y_fb_dma : Y frame buffer dma address
+ * @c_fb_dma : C frame buffer dma address
+ * @vdec_fb_va : VDEC frame buffer struct virtual address
+ */
+struct vdec_h264_dec_info {
+ u32 dpb_sz;
+ u32 resolution_changed;
+ u32 realloc_mv_buf;
+ u32 cap_num_planes;
+ u64 bs_dma;
+ u64 y_fb_dma;
+ u64 c_fb_dma;
+ u64 vdec_fb_va;
+};
+
+/**
+ * struct vdec_h264_vsi - shared memory for decode information exchange
+ * between VPU and Host.
+ * The memory is allocated by VPU then mapping to Host
+ * in vpu_dec_init() and freed in vpu_dec_deinit()
+ * by VPU.
+ * AP-W/R : AP is writer/reader on this item
+ * VPU-W/R: VPU is write/reader on this item
+ * @pred_buf_dma : HW working predication buffer dma address (AP-W, VPU-R)
+ * @mv_buf_dma : HW working motion vector buffer dma address (AP-W, VPU-R)
+ * @dec : decode information (AP-R, VPU-W)
+ * @pic : picture information (AP-R, VPU-W)
+ * @crop : crop information (AP-R, VPU-W)
+ * @h264_slice_params : the parameters that hardware use to decode
+ */
+struct vdec_h264_vsi {
+ u64 pred_buf_dma;
+ u64 mv_buf_dma[H264_MAX_MV_NUM];
+ struct vdec_h264_dec_info dec;
+ struct vdec_pic_info pic;
+ struct v4l2_rect crop;
+ struct mtk_h264_dec_slice_param h264_slice_params;
+};
+
+/**
+ * struct vdec_h264_slice_inst - h264 decoder instance
+ * @num_nalu : how many nalus be decoded
+ * @ctx : point to mtk_vcodec_dec_ctx
+ * @pred_buf : HW working predication buffer
+ * @mv_buf : HW working motion vector buffer
+ * @vpu : VPU instance
+ * @vsi_ctx : Local VSI data for this decoding context
+ * @h264_slice_param : the parameters that hardware use to decode
+ * @dpb : decoded picture buffer used to store reference buffer information
+ */
+struct vdec_h264_slice_inst {
+ unsigned int num_nalu;
+ struct mtk_vcodec_dec_ctx *ctx;
+ struct mtk_vcodec_mem pred_buf;
+ struct mtk_vcodec_mem mv_buf[H264_MAX_MV_NUM];
+ struct vdec_vpu_inst vpu;
+ struct vdec_h264_vsi vsi_ctx;
+ struct mtk_h264_dec_slice_param h264_slice_param;
+
+ struct v4l2_h264_dpb_entry dpb[16];
+};
+
+static int get_vdec_decode_parameters(struct vdec_h264_slice_inst *inst)
+{
+ const struct v4l2_ctrl_h264_decode_params *dec_params;
+ const struct v4l2_ctrl_h264_sps *sps;
+ const struct v4l2_ctrl_h264_pps *pps;
+ const struct v4l2_ctrl_h264_scaling_matrix *scaling_matrix;
+ struct mtk_h264_dec_slice_param *slice_param = &inst->h264_slice_param;
+ struct v4l2_h264_reflist_builder reflist_builder;
+ struct v4l2_h264_reference v4l2_p0_reflist[V4L2_H264_REF_LIST_LEN];
+ struct v4l2_h264_reference v4l2_b0_reflist[V4L2_H264_REF_LIST_LEN];
+ struct v4l2_h264_reference v4l2_b1_reflist[V4L2_H264_REF_LIST_LEN];
+ u8 *p0_reflist = slice_param->decode_params.ref_pic_list_p0;
+ u8 *b0_reflist = slice_param->decode_params.ref_pic_list_b0;
+ u8 *b1_reflist = slice_param->decode_params.ref_pic_list_b1;
+
+ dec_params =
+ mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_DECODE_PARAMS);
+ if (IS_ERR(dec_params))
+ return PTR_ERR(dec_params);
+
+ sps = mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_SPS);
+ if (IS_ERR(sps))
+ return PTR_ERR(sps);
+
+ pps = mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_PPS);
+ if (IS_ERR(pps))
+ return PTR_ERR(pps);
+
+ scaling_matrix =
+ mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_SCALING_MATRIX);
+ if (IS_ERR(scaling_matrix))
+ return PTR_ERR(scaling_matrix);
+
+ mtk_vdec_h264_update_dpb(dec_params, inst->dpb);
+
+ mtk_vdec_h264_copy_sps_params(&slice_param->sps, sps);
+ mtk_vdec_h264_copy_pps_params(&slice_param->pps, pps);
+ mtk_vdec_h264_copy_scaling_matrix(&slice_param->scaling_matrix, scaling_matrix);
+ mtk_vdec_h264_copy_decode_params(&slice_param->decode_params,
+ dec_params, inst->dpb);
+ mtk_vdec_h264_fill_dpb_info(inst->ctx, &slice_param->decode_params,
+ slice_param->h264_dpb_info);
+
+ /* Build the reference lists */
+ v4l2_h264_init_reflist_builder(&reflist_builder, dec_params, sps,
+ inst->dpb);
+ v4l2_h264_build_p_ref_list(&reflist_builder, v4l2_p0_reflist);
+ v4l2_h264_build_b_ref_lists(&reflist_builder, v4l2_b0_reflist,
+ v4l2_b1_reflist);
+
+ /* Adapt the built lists to the firmware's expectations */
+ mtk_vdec_h264_get_ref_list(p0_reflist, v4l2_p0_reflist, reflist_builder.num_valid);
+ mtk_vdec_h264_get_ref_list(b0_reflist, v4l2_b0_reflist, reflist_builder.num_valid);
+ mtk_vdec_h264_get_ref_list(b1_reflist, v4l2_b1_reflist, reflist_builder.num_valid);
+
+ memcpy(&inst->vsi_ctx.h264_slice_params, slice_param,
+ sizeof(inst->vsi_ctx.h264_slice_params));
+
+ return 0;
+}
+
+static int allocate_predication_buf(struct vdec_h264_slice_inst *inst)
+{
+ int err;
+
+ inst->pred_buf.size = BUF_PREDICTION_SZ;
+ err = mtk_vcodec_mem_alloc(inst->ctx, &inst->pred_buf);
+ if (err) {
+ mtk_vdec_err(inst->ctx, "failed to allocate ppl buf");
+ return err;
+ }
+
+ inst->vsi_ctx.pred_buf_dma = inst->pred_buf.dma_addr;
+ return 0;
+}
+
+static void free_predication_buf(struct vdec_h264_slice_inst *inst)
+{
+ struct mtk_vcodec_mem *mem = &inst->pred_buf;
+
+ inst->vsi_ctx.pred_buf_dma = 0;
+ if (mem->va)
+ mtk_vcodec_mem_free(inst->ctx, mem);
+}
+
+static int alloc_mv_buf(struct vdec_h264_slice_inst *inst,
+ struct vdec_pic_info *pic)
+{
+ int i;
+ int err;
+ struct mtk_vcodec_mem *mem = NULL;
+ unsigned int buf_sz = mtk_vdec_h264_get_mv_buf_size(pic->buf_w, pic->buf_h);
+
+ mtk_v4l2_vdec_dbg(3, inst->ctx, "size = 0x%x", buf_sz);
+ for (i = 0; i < H264_MAX_MV_NUM; i++) {
+ mem = &inst->mv_buf[i];
+ if (mem->va)
+ mtk_vcodec_mem_free(inst->ctx, mem);
+ mem->size = buf_sz;
+ err = mtk_vcodec_mem_alloc(inst->ctx, mem);
+ if (err) {
+ mtk_vdec_err(inst->ctx, "failed to allocate mv buf");
+ return err;
+ }
+ inst->vsi_ctx.mv_buf_dma[i] = mem->dma_addr;
+ }
+
+ return 0;
+}
+
+static void free_mv_buf(struct vdec_h264_slice_inst *inst)
+{
+ int i;
+ struct mtk_vcodec_mem *mem;
+
+ for (i = 0; i < H264_MAX_MV_NUM; i++) {
+ inst->vsi_ctx.mv_buf_dma[i] = 0;
+ mem = &inst->mv_buf[i];
+ if (mem->va)
+ mtk_vcodec_mem_free(inst->ctx, mem);
+ }
+}
+
+static void get_pic_info(struct vdec_h264_slice_inst *inst,
+ struct vdec_pic_info *pic)
+{
+ struct mtk_vcodec_dec_ctx *ctx = inst->ctx;
+
+ ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, VCODEC_DEC_ALIGNED_64);
+ ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, VCODEC_DEC_ALIGNED_64);
+ ctx->picinfo.fb_sz[0] = ctx->picinfo.buf_w * ctx->picinfo.buf_h;
+ ctx->picinfo.fb_sz[1] = ctx->picinfo.fb_sz[0] >> 1;
+ inst->vsi_ctx.dec.cap_num_planes =
+ ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes;
+
+ *pic = ctx->picinfo;
+ mtk_vdec_debug(inst->ctx, "pic(%d, %d), buf(%d, %d)",
+ ctx->picinfo.pic_w, ctx->picinfo.pic_h,
+ ctx->picinfo.buf_w, ctx->picinfo.buf_h);
+ mtk_vdec_debug(inst->ctx, "Y/C(%d, %d)", ctx->picinfo.fb_sz[0],
+ ctx->picinfo.fb_sz[1]);
+
+ if (ctx->last_decoded_picinfo.pic_w != ctx->picinfo.pic_w ||
+ ctx->last_decoded_picinfo.pic_h != ctx->picinfo.pic_h) {
+ inst->vsi_ctx.dec.resolution_changed = true;
+ if (ctx->last_decoded_picinfo.buf_w != ctx->picinfo.buf_w ||
+ ctx->last_decoded_picinfo.buf_h != ctx->picinfo.buf_h)
+ inst->vsi_ctx.dec.realloc_mv_buf = true;
+
+ mtk_v4l2_vdec_dbg(1, inst->ctx, "ResChg: (%d %d) : old(%d, %d) -> new(%d, %d)",
+ inst->vsi_ctx.dec.resolution_changed,
+ inst->vsi_ctx.dec.realloc_mv_buf,
+ ctx->last_decoded_picinfo.pic_w,
+ ctx->last_decoded_picinfo.pic_h,
+ ctx->picinfo.pic_w, ctx->picinfo.pic_h);
+ }
+}
+
+static void get_crop_info(struct vdec_h264_slice_inst *inst, struct v4l2_rect *cr)
+{
+ cr->left = inst->vsi_ctx.crop.left;
+ cr->top = inst->vsi_ctx.crop.top;
+ cr->width = inst->vsi_ctx.crop.width;
+ cr->height = inst->vsi_ctx.crop.height;
+
+ mtk_vdec_debug(inst->ctx, "l=%d, t=%d, w=%d, h=%d",
+ cr->left, cr->top, cr->width, cr->height);
+}
+
+static void get_dpb_size(struct vdec_h264_slice_inst *inst, unsigned int *dpb_sz)
+{
+ *dpb_sz = inst->vsi_ctx.dec.dpb_sz;
+ mtk_vdec_debug(inst->ctx, "sz=%d", *dpb_sz);
+}
+
+static int vdec_h264_slice_init(struct mtk_vcodec_dec_ctx *ctx)
+{
+ struct vdec_h264_slice_inst *inst;
+ int err;
+
+ inst = kzalloc(sizeof(*inst), GFP_KERNEL);
+ if (!inst)
+ return -ENOMEM;
+
+ inst->ctx = ctx;
+
+ inst->vpu.id = SCP_IPI_VDEC_H264;
+ inst->vpu.ctx = ctx;
+
+ err = vpu_dec_init(&inst->vpu);
+ if (err) {
+ mtk_vdec_err(ctx, "vdec_h264 init err=%d", err);
+ goto error_free_inst;
+ }
+
+ memcpy(&inst->vsi_ctx, inst->vpu.vsi, sizeof(inst->vsi_ctx));
+ inst->vsi_ctx.dec.resolution_changed = true;
+ inst->vsi_ctx.dec.realloc_mv_buf = true;
+
+ err = allocate_predication_buf(inst);
+ if (err)
+ goto error_deinit;
+
+ mtk_vdec_debug(ctx, "struct size = %zu,%zu,%zu,%zu\n",
+ sizeof(struct mtk_h264_sps_param),
+ sizeof(struct mtk_h264_pps_param),
+ sizeof(struct mtk_h264_dec_slice_param),
+ sizeof(struct mtk_h264_dpb_info));
+
+ mtk_vdec_debug(ctx, "H264 Instance >> %p", inst);
+
+ ctx->drv_handle = inst;
+ return 0;
+
+error_deinit:
+ vpu_dec_deinit(&inst->vpu);
+
+error_free_inst:
+ kfree(inst);
+ return err;
+}
+
+static void vdec_h264_slice_deinit(void *h_vdec)
+{
+ struct vdec_h264_slice_inst *inst = h_vdec;
+
+ vpu_dec_deinit(&inst->vpu);
+ free_predication_buf(inst);
+ free_mv_buf(inst);
+
+ kfree(inst);
+}
+
+static int vdec_h264_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
+ struct vdec_fb *unused, bool *res_chg)
+{
+ struct vdec_h264_slice_inst *inst = h_vdec;
+ const struct v4l2_ctrl_h264_decode_params *dec_params =
+ mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_DECODE_PARAMS);
+ struct vdec_vpu_inst *vpu = &inst->vpu;
+ struct mtk_video_dec_buf *src_buf_info;
+ struct mtk_video_dec_buf *dst_buf_info;
+ struct vdec_fb *fb;
+ u32 data[2];
+ u64 y_fb_dma;
+ u64 c_fb_dma;
+ int err;
+
+ inst->num_nalu++;
+ /* bs NULL means flush decoder */
+ if (!bs)
+ return vpu_dec_reset(vpu);
+
+ fb = inst->ctx->dev->vdec_pdata->get_cap_buffer(inst->ctx);
+ src_buf_info = container_of(bs, struct mtk_video_dec_buf, bs_buffer);
+ dst_buf_info = container_of(fb, struct mtk_video_dec_buf, frame_buffer);
+
+ y_fb_dma = fb ? (u64)fb->base_y.dma_addr : 0;
+ c_fb_dma = fb ? (u64)fb->base_c.dma_addr : 0;
+
+ mtk_vdec_debug(inst->ctx, "+ [%d] FB y_dma=%llx c_dma=%llx va=%p",
+ inst->num_nalu, y_fb_dma, c_fb_dma, fb);
+
+ inst->vsi_ctx.dec.bs_dma = (uint64_t)bs->dma_addr;
+ inst->vsi_ctx.dec.y_fb_dma = y_fb_dma;
+ inst->vsi_ctx.dec.c_fb_dma = c_fb_dma;
+ inst->vsi_ctx.dec.vdec_fb_va = (u64)(uintptr_t)fb;
+
+ v4l2_m2m_buf_copy_metadata(&src_buf_info->m2m_buf.vb,
+ &dst_buf_info->m2m_buf.vb, true);
+ err = get_vdec_decode_parameters(inst);
+ if (err)
+ goto err_free_fb_out;
+
+ data[0] = bs->size;
+ /*
+ * Reconstruct the first byte of the NAL unit, as the firmware requests
+ * that information to be passed even though it is present in the stream
+ * itself...
+ */
+ data[1] = (dec_params->nal_ref_idc << 5) |
+ ((dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC)
+ ? 0x5 : 0x1);
+
+ *res_chg = inst->vsi_ctx.dec.resolution_changed;
+ if (*res_chg) {
+ mtk_vdec_debug(inst->ctx, "- resolution changed -");
+ if (inst->vsi_ctx.dec.realloc_mv_buf) {
+ err = alloc_mv_buf(inst, &inst->ctx->picinfo);
+ inst->vsi_ctx.dec.realloc_mv_buf = false;
+ if (err)
+ goto err_free_fb_out;
+ }
+ *res_chg = false;
+ }
+
+ memcpy(inst->vpu.vsi, &inst->vsi_ctx, sizeof(inst->vsi_ctx));
+ err = vpu_dec_start(vpu, data, 2);
+ if (err)
+ goto err_free_fb_out;
+
+ /* wait decoder done interrupt */
+ err = mtk_vcodec_wait_for_done_ctx(inst->ctx,
+ MTK_INST_IRQ_RECEIVED,
+ WAIT_INTR_TIMEOUT_MS, 0);
+ if (err)
+ goto err_free_fb_out;
+ vpu_dec_end(vpu);
+
+ memcpy(&inst->vsi_ctx, inst->vpu.vsi, sizeof(inst->vsi_ctx));
+ mtk_vdec_debug(inst->ctx, "\n - NALU[%d]", inst->num_nalu);
+ return 0;
+
+err_free_fb_out:
+ mtk_vdec_err(inst->ctx, "\n - NALU[%d] err=%d -\n", inst->num_nalu, err);
+ return err;
+}
+
+static int vdec_h264_slice_get_param(void *h_vdec, enum vdec_get_param_type type, void *out)
+{
+ struct vdec_h264_slice_inst *inst = h_vdec;
+
+ switch (type) {
+ case GET_PARAM_PIC_INFO:
+ get_pic_info(inst, out);
+ break;
+
+ case GET_PARAM_DPB_SIZE:
+ get_dpb_size(inst, out);
+ break;
+
+ case GET_PARAM_CROP_INFO:
+ get_crop_info(inst, out);
+ break;
+
+ default:
+ mtk_vdec_err(inst->ctx, "invalid get parameter type=%d", type);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+const struct vdec_common_if vdec_h264_slice_if = {
+ .init = vdec_h264_slice_init,
+ .decode = vdec_h264_slice_decode,
+ .get_param = vdec_h264_slice_get_param,
+ .deinit = vdec_h264_slice_deinit,
+};
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_multi_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_multi_if.c
new file mode 100644
index 000000000..0e741e0dc
--- /dev/null
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_multi_if.c
@@ -0,0 +1,848 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022 MediaTek Inc.
+ * Author: Yunfei Dong <yunfei.dong@mediatek.com>
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <media/v4l2-h264.h>
+#include <media/v4l2-mem2mem.h>
+#include <media/videobuf2-dma-contig.h>
+
+#include "../mtk_vcodec_dec.h"
+#include "../../common/mtk_vcodec_intr.h"
+#include "../vdec_drv_base.h"
+#include "../vdec_drv_if.h"
+#include "../vdec_vpu_if.h"
+#include "vdec_h264_req_common.h"
+
+/**
+ * enum vdec_h264_core_dec_err_type - core decode error type
+ *
+ * @TRANS_BUFFER_FULL: trans buffer is full
+ * @SLICE_HEADER_FULL: slice header buffer is full
+ */
+enum vdec_h264_core_dec_err_type {
+ TRANS_BUFFER_FULL = 1,
+ SLICE_HEADER_FULL,
+};
+
+/**
+ * struct vdec_h264_slice_lat_dec_param - parameters for decode current frame
+ *
+ * @sps: h264 sps syntax parameters
+ * @pps: h264 pps syntax parameters
+ * @slice_header: h264 slice header syntax parameters
+ * @scaling_matrix: h264 scaling list parameters
+ * @decode_params: decoder parameters of each frame used for hardware decode
+ * @h264_dpb_info: dpb reference list
+ */
+struct vdec_h264_slice_lat_dec_param {
+ struct mtk_h264_sps_param sps;
+ struct mtk_h264_pps_param pps;
+ struct mtk_h264_slice_hd_param slice_header;
+ struct slice_api_h264_scaling_matrix scaling_matrix;
+ struct slice_api_h264_decode_param decode_params;
+ struct mtk_h264_dpb_info h264_dpb_info[V4L2_H264_NUM_DPB_ENTRIES];
+};
+
+/**
+ * struct vdec_h264_slice_info - decode information
+ *
+ * @nal_info: nal info of current picture
+ * @timeout: Decode timeout: 1 timeout, 0 no timeount
+ * @bs_buf_size: bitstream size
+ * @bs_buf_addr: bitstream buffer dma address
+ * @y_fb_dma: Y frame buffer dma address
+ * @c_fb_dma: C frame buffer dma address
+ * @vdec_fb_va: VDEC frame buffer struct virtual address
+ * @crc: Used to check whether hardware's status is right
+ */
+struct vdec_h264_slice_info {
+ u16 nal_info;
+ u16 timeout;
+ u32 bs_buf_size;
+ u64 bs_buf_addr;
+ u64 y_fb_dma;
+ u64 c_fb_dma;
+ u64 vdec_fb_va;
+ u32 crc[8];
+};
+
+/**
+ * struct vdec_h264_slice_vsi - shared memory for decode information exchange
+ * between SCP and Host.
+ *
+ * @wdma_err_addr: wdma error dma address
+ * @wdma_start_addr: wdma start dma address
+ * @wdma_end_addr: wdma end dma address
+ * @slice_bc_start_addr: slice bc start dma address
+ * @slice_bc_end_addr: slice bc end dma address
+ * @row_info_start_addr: row info start dma address
+ * @row_info_end_addr: row info end dma address
+ * @trans_start: trans start dma address
+ * @trans_end: trans end dma address
+ * @wdma_end_addr_offset: wdma end address offset
+ *
+ * @mv_buf_dma: HW working motion vector buffer
+ * dma address (AP-W, VPU-R)
+ * @dec: decode information (AP-R, VPU-W)
+ * @h264_slice_params: decode parameters for hw used
+ */
+struct vdec_h264_slice_vsi {
+ /* LAT dec addr */
+ u64 wdma_err_addr;
+ u64 wdma_start_addr;
+ u64 wdma_end_addr;
+ u64 slice_bc_start_addr;
+ u64 slice_bc_end_addr;
+ u64 row_info_start_addr;
+ u64 row_info_end_addr;
+ u64 trans_start;
+ u64 trans_end;
+ u64 wdma_end_addr_offset;
+
+ u64 mv_buf_dma[H264_MAX_MV_NUM];
+ struct vdec_h264_slice_info dec;
+ struct vdec_h264_slice_lat_dec_param h264_slice_params;
+};
+
+/**
+ * struct vdec_h264_slice_share_info - shared information used to exchange
+ * message between lat and core
+ *
+ * @sps: sequence header information from user space
+ * @dec_params: decoder params from user space
+ * @h264_slice_params: decoder params used for hardware
+ * @trans_start: trans start dma address
+ * @trans_end: trans end dma address
+ * @nal_info: nal info of current picture
+ */
+struct vdec_h264_slice_share_info {
+ struct v4l2_ctrl_h264_sps sps;
+ struct v4l2_ctrl_h264_decode_params dec_params;
+ struct vdec_h264_slice_lat_dec_param h264_slice_params;
+ u64 trans_start;
+ u64 trans_end;
+ u16 nal_info;
+};
+
+/**
+ * struct vdec_h264_slice_inst - h264 decoder instance
+ *
+ * @slice_dec_num: how many picture be decoded
+ * @ctx: point to mtk_vcodec_dec_ctx
+ * @pred_buf: HW working predication buffer
+ * @mv_buf: HW working motion vector buffer
+ * @vpu: VPU instance
+ * @vsi: vsi used for lat
+ * @vsi_core: vsi used for core
+ *
+ * @vsi_ctx: Local VSI data for this decoding context
+ * @h264_slice_param: the parameters that hardware use to decode
+ *
+ * @resolution_changed:resolution changed
+ * @realloc_mv_buf: reallocate mv buffer
+ * @cap_num_planes: number of capture queue plane
+ *
+ * @dpb: decoded picture buffer used to store reference
+ * buffer information
+ *@is_field_bitstream: is field bitstream
+ */
+struct vdec_h264_slice_inst {
+ unsigned int slice_dec_num;
+ struct mtk_vcodec_dec_ctx *ctx;
+ struct mtk_vcodec_mem pred_buf;
+ struct mtk_vcodec_mem mv_buf[H264_MAX_MV_NUM];
+ struct vdec_vpu_inst vpu;
+ struct vdec_h264_slice_vsi *vsi;
+ struct vdec_h264_slice_vsi *vsi_core;
+
+ struct vdec_h264_slice_vsi vsi_ctx;
+ struct vdec_h264_slice_lat_dec_param h264_slice_param;
+
+ unsigned int resolution_changed;
+ unsigned int realloc_mv_buf;
+ unsigned int cap_num_planes;
+
+ struct v4l2_h264_dpb_entry dpb[16];
+ bool is_field_bitstream;
+};
+
+static int vdec_h264_slice_fill_decode_parameters(struct vdec_h264_slice_inst *inst,
+ struct vdec_h264_slice_share_info *share_info)
+{
+ struct vdec_h264_slice_lat_dec_param *slice_param = &inst->vsi->h264_slice_params;
+ const struct v4l2_ctrl_h264_decode_params *dec_params;
+ const struct v4l2_ctrl_h264_scaling_matrix *src_matrix;
+ const struct v4l2_ctrl_h264_sps *sps;
+ const struct v4l2_ctrl_h264_pps *pps;
+
+ dec_params =
+ mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_DECODE_PARAMS);
+ if (IS_ERR(dec_params))
+ return PTR_ERR(dec_params);
+
+ src_matrix =
+ mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_SCALING_MATRIX);
+ if (IS_ERR(src_matrix))
+ return PTR_ERR(src_matrix);
+
+ sps = mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_SPS);
+ if (IS_ERR(sps))
+ return PTR_ERR(sps);
+
+ pps = mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_PPS);
+ if (IS_ERR(pps))
+ return PTR_ERR(pps);
+
+ if (dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) {
+ mtk_vdec_err(inst->ctx, "No support for H.264 field decoding.");
+ inst->is_field_bitstream = true;
+ return -EINVAL;
+ }
+
+ mtk_vdec_h264_copy_sps_params(&slice_param->sps, sps);
+ mtk_vdec_h264_copy_pps_params(&slice_param->pps, pps);
+ mtk_vdec_h264_copy_scaling_matrix(&slice_param->scaling_matrix, src_matrix);
+
+ memcpy(&share_info->sps, sps, sizeof(*sps));
+ memcpy(&share_info->dec_params, dec_params, sizeof(*dec_params));
+
+ return 0;
+}
+
+static int get_vdec_sig_decode_parameters(struct vdec_h264_slice_inst *inst)
+{
+ const struct v4l2_ctrl_h264_decode_params *dec_params;
+ const struct v4l2_ctrl_h264_sps *sps;
+ const struct v4l2_ctrl_h264_pps *pps;
+ const struct v4l2_ctrl_h264_scaling_matrix *scaling_matrix;
+ struct vdec_h264_slice_lat_dec_param *slice_param = &inst->h264_slice_param;
+ struct v4l2_h264_reflist_builder reflist_builder;
+ struct v4l2_h264_reference v4l2_p0_reflist[V4L2_H264_REF_LIST_LEN];
+ struct v4l2_h264_reference v4l2_b0_reflist[V4L2_H264_REF_LIST_LEN];
+ struct v4l2_h264_reference v4l2_b1_reflist[V4L2_H264_REF_LIST_LEN];
+ u8 *p0_reflist = slice_param->decode_params.ref_pic_list_p0;
+ u8 *b0_reflist = slice_param->decode_params.ref_pic_list_b0;
+ u8 *b1_reflist = slice_param->decode_params.ref_pic_list_b1;
+
+ dec_params =
+ mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_DECODE_PARAMS);
+ if (IS_ERR(dec_params))
+ return PTR_ERR(dec_params);
+
+ sps = mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_SPS);
+ if (IS_ERR(sps))
+ return PTR_ERR(sps);
+
+ pps = mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_PPS);
+ if (IS_ERR(pps))
+ return PTR_ERR(pps);
+
+ scaling_matrix =
+ mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_SCALING_MATRIX);
+ if (IS_ERR(scaling_matrix))
+ return PTR_ERR(scaling_matrix);
+
+ mtk_vdec_h264_update_dpb(dec_params, inst->dpb);
+
+ mtk_vdec_h264_copy_sps_params(&slice_param->sps, sps);
+ mtk_vdec_h264_copy_pps_params(&slice_param->pps, pps);
+ mtk_vdec_h264_copy_scaling_matrix(&slice_param->scaling_matrix, scaling_matrix);
+
+ mtk_vdec_h264_copy_decode_params(&slice_param->decode_params, dec_params, inst->dpb);
+ mtk_vdec_h264_fill_dpb_info(inst->ctx, &slice_param->decode_params,
+ slice_param->h264_dpb_info);
+
+ /* Build the reference lists */
+ v4l2_h264_init_reflist_builder(&reflist_builder, dec_params, sps, inst->dpb);
+ v4l2_h264_build_p_ref_list(&reflist_builder, v4l2_p0_reflist);
+ v4l2_h264_build_b_ref_lists(&reflist_builder, v4l2_b0_reflist, v4l2_b1_reflist);
+
+ /* Adapt the built lists to the firmware's expectations */
+ mtk_vdec_h264_get_ref_list(p0_reflist, v4l2_p0_reflist, reflist_builder.num_valid);
+ mtk_vdec_h264_get_ref_list(b0_reflist, v4l2_b0_reflist, reflist_builder.num_valid);
+ mtk_vdec_h264_get_ref_list(b1_reflist, v4l2_b1_reflist, reflist_builder.num_valid);
+
+ memcpy(&inst->vsi_ctx.h264_slice_params, slice_param,
+ sizeof(inst->vsi_ctx.h264_slice_params));
+
+ return 0;
+}
+
+static void vdec_h264_slice_fill_decode_reflist(struct vdec_h264_slice_inst *inst,
+ struct vdec_h264_slice_lat_dec_param *slice_param,
+ struct vdec_h264_slice_share_info *share_info)
+{
+ struct v4l2_ctrl_h264_decode_params *dec_params = &share_info->dec_params;
+ struct v4l2_ctrl_h264_sps *sps = &share_info->sps;
+ struct v4l2_h264_reflist_builder reflist_builder;
+ struct v4l2_h264_reference v4l2_p0_reflist[V4L2_H264_REF_LIST_LEN];
+ struct v4l2_h264_reference v4l2_b0_reflist[V4L2_H264_REF_LIST_LEN];
+ struct v4l2_h264_reference v4l2_b1_reflist[V4L2_H264_REF_LIST_LEN];
+ u8 *p0_reflist = slice_param->decode_params.ref_pic_list_p0;
+ u8 *b0_reflist = slice_param->decode_params.ref_pic_list_b0;
+ u8 *b1_reflist = slice_param->decode_params.ref_pic_list_b1;
+
+ mtk_vdec_h264_update_dpb(dec_params, inst->dpb);
+
+ mtk_vdec_h264_copy_decode_params(&slice_param->decode_params, dec_params,
+ inst->dpb);
+ mtk_vdec_h264_fill_dpb_info(inst->ctx, &slice_param->decode_params,
+ slice_param->h264_dpb_info);
+
+ mtk_v4l2_vdec_dbg(3, inst->ctx, "cur poc = %d\n", dec_params->bottom_field_order_cnt);
+ /* Build the reference lists */
+ v4l2_h264_init_reflist_builder(&reflist_builder, dec_params, sps,
+ inst->dpb);
+ v4l2_h264_build_p_ref_list(&reflist_builder, v4l2_p0_reflist);
+ v4l2_h264_build_b_ref_lists(&reflist_builder, v4l2_b0_reflist, v4l2_b1_reflist);
+
+ /* Adapt the built lists to the firmware's expectations */
+ mtk_vdec_h264_get_ref_list(p0_reflist, v4l2_p0_reflist, reflist_builder.num_valid);
+ mtk_vdec_h264_get_ref_list(b0_reflist, v4l2_b0_reflist, reflist_builder.num_valid);
+ mtk_vdec_h264_get_ref_list(b1_reflist, v4l2_b1_reflist, reflist_builder.num_valid);
+}
+
+static int vdec_h264_slice_alloc_mv_buf(struct vdec_h264_slice_inst *inst,
+ struct vdec_pic_info *pic)
+{
+ unsigned int buf_sz = mtk_vdec_h264_get_mv_buf_size(pic->buf_w, pic->buf_h);
+ struct mtk_vcodec_mem *mem;
+ int i, err;
+
+ mtk_v4l2_vdec_dbg(3, inst->ctx, "size = 0x%x", buf_sz);
+ for (i = 0; i < H264_MAX_MV_NUM; i++) {
+ mem = &inst->mv_buf[i];
+ if (mem->va)
+ mtk_vcodec_mem_free(inst->ctx, mem);
+ mem->size = buf_sz;
+ err = mtk_vcodec_mem_alloc(inst->ctx, mem);
+ if (err) {
+ mtk_vdec_err(inst->ctx, "failed to allocate mv buf");
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static void vdec_h264_slice_free_mv_buf(struct vdec_h264_slice_inst *inst)
+{
+ int i;
+ struct mtk_vcodec_mem *mem;
+
+ for (i = 0; i < H264_MAX_MV_NUM; i++) {
+ mem = &inst->mv_buf[i];
+ if (mem->va)
+ mtk_vcodec_mem_free(inst->ctx, mem);
+ }
+}
+
+static void vdec_h264_slice_get_pic_info(struct vdec_h264_slice_inst *inst)
+{
+ struct mtk_vcodec_dec_ctx *ctx = inst->ctx;
+ u32 data[3];
+
+ data[0] = ctx->picinfo.pic_w;
+ data[1] = ctx->picinfo.pic_h;
+ data[2] = ctx->capture_fourcc;
+ vpu_dec_get_param(&inst->vpu, data, 3, GET_PARAM_PIC_INFO);
+
+ ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, VCODEC_DEC_ALIGNED_64);
+ ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, VCODEC_DEC_ALIGNED_64);
+ ctx->picinfo.fb_sz[0] = inst->vpu.fb_sz[0];
+ ctx->picinfo.fb_sz[1] = inst->vpu.fb_sz[1];
+ inst->cap_num_planes =
+ ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes;
+
+ mtk_vdec_debug(ctx, "pic(%d, %d), buf(%d, %d)",
+ ctx->picinfo.pic_w, ctx->picinfo.pic_h,
+ ctx->picinfo.buf_w, ctx->picinfo.buf_h);
+ mtk_vdec_debug(ctx, "Y/C(%d, %d)", ctx->picinfo.fb_sz[0],
+ ctx->picinfo.fb_sz[1]);
+
+ if (ctx->last_decoded_picinfo.pic_w != ctx->picinfo.pic_w ||
+ ctx->last_decoded_picinfo.pic_h != ctx->picinfo.pic_h) {
+ inst->resolution_changed = true;
+ if (ctx->last_decoded_picinfo.buf_w != ctx->picinfo.buf_w ||
+ ctx->last_decoded_picinfo.buf_h != ctx->picinfo.buf_h)
+ inst->realloc_mv_buf = true;
+
+ mtk_v4l2_vdec_dbg(1, inst->ctx, "resChg: (%d %d) : old(%d, %d) -> new(%d, %d)",
+ inst->resolution_changed,
+ inst->realloc_mv_buf,
+ ctx->last_decoded_picinfo.pic_w,
+ ctx->last_decoded_picinfo.pic_h,
+ ctx->picinfo.pic_w, ctx->picinfo.pic_h);
+ }
+}
+
+static void vdec_h264_slice_get_crop_info(struct vdec_h264_slice_inst *inst,
+ struct v4l2_rect *cr)
+{
+ cr->left = 0;
+ cr->top = 0;
+ cr->width = inst->ctx->picinfo.pic_w;
+ cr->height = inst->ctx->picinfo.pic_h;
+
+ mtk_vdec_debug(inst->ctx, "l=%d, t=%d, w=%d, h=%d",
+ cr->left, cr->top, cr->width, cr->height);
+}
+
+static int vdec_h264_slice_init(struct mtk_vcodec_dec_ctx *ctx)
+{
+ struct vdec_h264_slice_inst *inst;
+ int err, vsi_size;
+
+ inst = kzalloc(sizeof(*inst), GFP_KERNEL);
+ if (!inst)
+ return -ENOMEM;
+
+ inst->ctx = ctx;
+
+ inst->vpu.id = SCP_IPI_VDEC_LAT;
+ inst->vpu.core_id = SCP_IPI_VDEC_CORE;
+ inst->vpu.ctx = ctx;
+ inst->vpu.codec_type = ctx->current_codec;
+ inst->vpu.capture_type = ctx->capture_fourcc;
+
+ err = vpu_dec_init(&inst->vpu);
+ if (err) {
+ mtk_vdec_err(ctx, "vdec_h264 init err=%d", err);
+ goto error_free_inst;
+ }
+
+ vsi_size = round_up(sizeof(struct vdec_h264_slice_vsi), VCODEC_DEC_ALIGNED_64);
+ inst->vsi = inst->vpu.vsi;
+ inst->vsi_core =
+ (struct vdec_h264_slice_vsi *)(((char *)inst->vpu.vsi) + vsi_size);
+ inst->resolution_changed = true;
+ inst->realloc_mv_buf = true;
+
+ mtk_vdec_debug(ctx, "lat struct size = %d,%d,%d,%d vsi: %d\n",
+ (int)sizeof(struct mtk_h264_sps_param),
+ (int)sizeof(struct mtk_h264_pps_param),
+ (int)sizeof(struct vdec_h264_slice_lat_dec_param),
+ (int)sizeof(struct mtk_h264_dpb_info),
+ vsi_size);
+ mtk_vdec_debug(ctx, "lat H264 instance >> %p, codec_type = 0x%x",
+ inst, inst->vpu.codec_type);
+
+ ctx->drv_handle = inst;
+ return 0;
+
+error_free_inst:
+ kfree(inst);
+ return err;
+}
+
+static void vdec_h264_slice_deinit(void *h_vdec)
+{
+ struct vdec_h264_slice_inst *inst = h_vdec;
+
+ vpu_dec_deinit(&inst->vpu);
+ vdec_h264_slice_free_mv_buf(inst);
+ vdec_msg_queue_deinit(&inst->ctx->msg_queue, inst->ctx);
+
+ kfree(inst);
+}
+
+static int vdec_h264_slice_core_decode(struct vdec_lat_buf *lat_buf)
+{
+ struct vdec_fb *fb;
+ u64 vdec_fb_va;
+ u64 y_fb_dma, c_fb_dma;
+ int err, timeout, i;
+ struct mtk_vcodec_dec_ctx *ctx = lat_buf->ctx;
+ struct vdec_h264_slice_inst *inst = ctx->drv_handle;
+ struct vb2_v4l2_buffer *vb2_v4l2;
+ struct vdec_h264_slice_share_info *share_info = lat_buf->private_data;
+ struct mtk_vcodec_mem *mem;
+ struct vdec_vpu_inst *vpu = &inst->vpu;
+
+ mtk_vdec_debug(ctx, "[h264-core] vdec_h264 core decode");
+ memcpy(&inst->vsi_core->h264_slice_params, &share_info->h264_slice_params,
+ sizeof(share_info->h264_slice_params));
+
+ fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx);
+ if (!fb) {
+ err = -EBUSY;
+ mtk_vdec_err(ctx, "fb buffer is NULL");
+ goto vdec_dec_end;
+ }
+
+ vdec_fb_va = (unsigned long)fb;
+ y_fb_dma = (u64)fb->base_y.dma_addr;
+ if (ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 1)
+ c_fb_dma =
+ y_fb_dma + inst->ctx->picinfo.buf_w * inst->ctx->picinfo.buf_h;
+ else
+ c_fb_dma = (u64)fb->base_c.dma_addr;
+
+ mtk_vdec_debug(ctx, "[h264-core] y/c addr = 0x%llx 0x%llx", y_fb_dma, c_fb_dma);
+
+ inst->vsi_core->dec.y_fb_dma = y_fb_dma;
+ inst->vsi_core->dec.c_fb_dma = c_fb_dma;
+ inst->vsi_core->dec.vdec_fb_va = vdec_fb_va;
+ inst->vsi_core->dec.nal_info = share_info->nal_info;
+ inst->vsi_core->wdma_start_addr =
+ lat_buf->ctx->msg_queue.wdma_addr.dma_addr;
+ inst->vsi_core->wdma_end_addr =
+ lat_buf->ctx->msg_queue.wdma_addr.dma_addr +
+ lat_buf->ctx->msg_queue.wdma_addr.size;
+ inst->vsi_core->wdma_err_addr = lat_buf->wdma_err_addr.dma_addr;
+ inst->vsi_core->slice_bc_start_addr = lat_buf->slice_bc_addr.dma_addr;
+ inst->vsi_core->slice_bc_end_addr = lat_buf->slice_bc_addr.dma_addr +
+ lat_buf->slice_bc_addr.size;
+ inst->vsi_core->trans_start = share_info->trans_start;
+ inst->vsi_core->trans_end = share_info->trans_end;
+ for (i = 0; i < H264_MAX_MV_NUM; i++) {
+ mem = &inst->mv_buf[i];
+ inst->vsi_core->mv_buf_dma[i] = mem->dma_addr;
+ }
+
+ vb2_v4l2 = v4l2_m2m_next_dst_buf(ctx->m2m_ctx);
+ v4l2_m2m_buf_copy_metadata(&lat_buf->ts_info, vb2_v4l2, true);
+
+ vdec_h264_slice_fill_decode_reflist(inst, &inst->vsi_core->h264_slice_params,
+ share_info);
+
+ err = vpu_dec_core(vpu);
+ if (err) {
+ mtk_vdec_err(ctx, "core decode err=%d", err);
+ goto vdec_dec_end;
+ }
+
+ /* wait decoder done interrupt */
+ timeout = mtk_vcodec_wait_for_done_ctx(inst->ctx, MTK_INST_IRQ_RECEIVED,
+ WAIT_INTR_TIMEOUT_MS, MTK_VDEC_CORE);
+ if (timeout)
+ mtk_vdec_err(ctx, "core decode timeout: pic_%d", ctx->decoded_frame_cnt);
+ inst->vsi_core->dec.timeout = !!timeout;
+
+ vpu_dec_core_end(vpu);
+ mtk_vdec_debug(ctx, "pic[%d] crc: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x",
+ ctx->decoded_frame_cnt,
+ inst->vsi_core->dec.crc[0], inst->vsi_core->dec.crc[1],
+ inst->vsi_core->dec.crc[2], inst->vsi_core->dec.crc[3],
+ inst->vsi_core->dec.crc[4], inst->vsi_core->dec.crc[5],
+ inst->vsi_core->dec.crc[6], inst->vsi_core->dec.crc[7]);
+
+vdec_dec_end:
+ vdec_msg_queue_update_ube_rptr(&lat_buf->ctx->msg_queue, share_info->trans_end);
+ ctx->dev->vdec_pdata->cap_to_disp(ctx, !!err, lat_buf->src_buf_req);
+ mtk_vdec_debug(ctx, "core decode done err=%d", err);
+ ctx->decoded_frame_cnt++;
+ return 0;
+}
+
+static void vdec_h264_insert_startcode(struct mtk_vcodec_dec_dev *vcodec_dev, unsigned char *buf,
+ size_t *bs_size, struct mtk_h264_pps_param *pps)
+{
+ struct device *dev = &vcodec_dev->plat_dev->dev;
+
+ /* Need to add pending data at the end of bitstream when bs_sz is small than
+ * 20 bytes for cavlc bitstream, or lat will decode fail. This pending data is
+ * useful for mt8192 and mt8195 platform.
+ *
+ * cavlc bitstream when entropy_coding_mode_flag is false.
+ */
+ if (pps->entropy_coding_mode_flag || *bs_size > 20 ||
+ !(of_device_is_compatible(dev->of_node, "mediatek,mt8192-vcodec-dec") ||
+ of_device_is_compatible(dev->of_node, "mediatek,mt8195-vcodec-dec")))
+ return;
+
+ buf[*bs_size] = 0;
+ buf[*bs_size + 1] = 0;
+ buf[*bs_size + 2] = 1;
+ buf[*bs_size + 3] = 0xff;
+ (*bs_size) += 4;
+}
+
+static int vdec_h264_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
+ struct vdec_fb *fb, bool *res_chg)
+{
+ struct vdec_h264_slice_inst *inst = h_vdec;
+ struct vdec_vpu_inst *vpu = &inst->vpu;
+ struct mtk_video_dec_buf *src_buf_info;
+ int nal_start_idx, err, timeout = 0, i;
+ unsigned int data[2];
+ struct vdec_lat_buf *lat_buf;
+ struct vdec_h264_slice_share_info *share_info;
+ unsigned char *buf;
+ struct mtk_vcodec_mem *mem;
+
+ if (vdec_msg_queue_init(&inst->ctx->msg_queue, inst->ctx,
+ vdec_h264_slice_core_decode,
+ sizeof(*share_info)))
+ return -ENOMEM;
+
+ /* bs NULL means flush decoder */
+ if (!bs) {
+ vdec_msg_queue_wait_lat_buf_full(&inst->ctx->msg_queue);
+ return vpu_dec_reset(vpu);
+ }
+
+ if (inst->is_field_bitstream)
+ return -EINVAL;
+
+ lat_buf = vdec_msg_queue_dqbuf(&inst->ctx->msg_queue.lat_ctx);
+ if (!lat_buf) {
+ mtk_vdec_debug(inst->ctx, "failed to get lat buffer");
+ return -EAGAIN;
+ }
+ share_info = lat_buf->private_data;
+ src_buf_info = container_of(bs, struct mtk_video_dec_buf, bs_buffer);
+
+ buf = (unsigned char *)bs->va;
+ nal_start_idx = mtk_vdec_h264_find_start_code(buf, bs->size);
+ if (nal_start_idx < 0) {
+ err = -EINVAL;
+ goto err_free_fb_out;
+ }
+
+ inst->vsi->dec.nal_info = buf[nal_start_idx];
+ lat_buf->src_buf_req = src_buf_info->m2m_buf.vb.vb2_buf.req_obj.req;
+ v4l2_m2m_buf_copy_metadata(&src_buf_info->m2m_buf.vb, &lat_buf->ts_info, true);
+
+ err = vdec_h264_slice_fill_decode_parameters(inst, share_info);
+ if (err)
+ goto err_free_fb_out;
+
+ vdec_h264_insert_startcode(inst->ctx->dev, buf, &bs->size,
+ &share_info->h264_slice_params.pps);
+
+ inst->vsi->dec.bs_buf_addr = (uint64_t)bs->dma_addr;
+ inst->vsi->dec.bs_buf_size = bs->size;
+
+ *res_chg = inst->resolution_changed;
+ if (inst->resolution_changed) {
+ mtk_vdec_debug(inst->ctx, "- resolution changed -");
+ if (inst->realloc_mv_buf) {
+ err = vdec_h264_slice_alloc_mv_buf(inst, &inst->ctx->picinfo);
+ inst->realloc_mv_buf = false;
+ if (err)
+ goto err_free_fb_out;
+ }
+ inst->resolution_changed = false;
+ }
+ for (i = 0; i < H264_MAX_MV_NUM; i++) {
+ mem = &inst->mv_buf[i];
+ inst->vsi->mv_buf_dma[i] = mem->dma_addr;
+ }
+ inst->vsi->wdma_start_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr;
+ inst->vsi->wdma_end_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr +
+ lat_buf->ctx->msg_queue.wdma_addr.size;
+ inst->vsi->wdma_err_addr = lat_buf->wdma_err_addr.dma_addr;
+ inst->vsi->slice_bc_start_addr = lat_buf->slice_bc_addr.dma_addr;
+ inst->vsi->slice_bc_end_addr = lat_buf->slice_bc_addr.dma_addr +
+ lat_buf->slice_bc_addr.size;
+
+ inst->vsi->trans_end = inst->ctx->msg_queue.wdma_rptr_addr;
+ inst->vsi->trans_start = inst->ctx->msg_queue.wdma_wptr_addr;
+ mtk_vdec_debug(inst->ctx, "lat:trans(0x%llx 0x%llx) err:0x%llx",
+ inst->vsi->wdma_start_addr,
+ inst->vsi->wdma_end_addr,
+ inst->vsi->wdma_err_addr);
+
+ mtk_vdec_debug(inst->ctx, "slice(0x%llx 0x%llx) rprt((0x%llx 0x%llx))",
+ inst->vsi->slice_bc_start_addr,
+ inst->vsi->slice_bc_end_addr,
+ inst->vsi->trans_start,
+ inst->vsi->trans_end);
+ err = vpu_dec_start(vpu, data, 2);
+ if (err) {
+ mtk_vdec_debug(inst->ctx, "lat decode err: %d", err);
+ goto err_free_fb_out;
+ }
+
+ share_info->trans_end = inst->ctx->msg_queue.wdma_addr.dma_addr +
+ inst->vsi->wdma_end_addr_offset;
+ share_info->trans_start = inst->ctx->msg_queue.wdma_wptr_addr;
+ share_info->nal_info = inst->vsi->dec.nal_info;
+
+ if (IS_VDEC_INNER_RACING(inst->ctx->dev->dec_capability)) {
+ memcpy(&share_info->h264_slice_params, &inst->vsi->h264_slice_params,
+ sizeof(share_info->h264_slice_params));
+ vdec_msg_queue_qbuf(&inst->ctx->msg_queue.core_ctx, lat_buf);
+ }
+
+ /* wait decoder done interrupt */
+ timeout = mtk_vcodec_wait_for_done_ctx(inst->ctx, MTK_INST_IRQ_RECEIVED,
+ WAIT_INTR_TIMEOUT_MS, MTK_VDEC_LAT0);
+ if (timeout)
+ mtk_vdec_err(inst->ctx, "lat decode timeout: pic_%d", inst->slice_dec_num);
+ inst->vsi->dec.timeout = !!timeout;
+
+ err = vpu_dec_end(vpu);
+ if (err == SLICE_HEADER_FULL || err == TRANS_BUFFER_FULL) {
+ if (!IS_VDEC_INNER_RACING(inst->ctx->dev->dec_capability))
+ vdec_msg_queue_qbuf(&inst->ctx->msg_queue.lat_ctx, lat_buf);
+ inst->slice_dec_num++;
+ mtk_vdec_err(inst->ctx, "lat dec fail: pic_%d err:%d", inst->slice_dec_num, err);
+ return -EINVAL;
+ }
+
+ share_info->trans_end = inst->ctx->msg_queue.wdma_addr.dma_addr +
+ inst->vsi->wdma_end_addr_offset;
+ vdec_msg_queue_update_ube_wptr(&lat_buf->ctx->msg_queue, share_info->trans_end);
+
+ if (!IS_VDEC_INNER_RACING(inst->ctx->dev->dec_capability)) {
+ memcpy(&share_info->h264_slice_params, &inst->vsi->h264_slice_params,
+ sizeof(share_info->h264_slice_params));
+ vdec_msg_queue_qbuf(&inst->ctx->msg_queue.core_ctx, lat_buf);
+ }
+ mtk_vdec_debug(inst->ctx, "dec num: %d lat crc: 0x%x 0x%x 0x%x", inst->slice_dec_num,
+ inst->vsi->dec.crc[0], inst->vsi->dec.crc[1], inst->vsi->dec.crc[2]);
+
+ inst->slice_dec_num++;
+ return 0;
+err_free_fb_out:
+ vdec_msg_queue_qbuf(&inst->ctx->msg_queue.lat_ctx, lat_buf);
+ mtk_vdec_err(inst->ctx, "slice dec number: %d err: %d", inst->slice_dec_num, err);
+ return err;
+}
+
+static int vdec_h264_slice_single_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
+ struct vdec_fb *unused, bool *res_chg)
+{
+ struct vdec_h264_slice_inst *inst = h_vdec;
+ struct vdec_vpu_inst *vpu = &inst->vpu;
+ struct mtk_video_dec_buf *src_buf_info, *dst_buf_info;
+ struct vdec_fb *fb;
+ unsigned char *buf;
+ unsigned int data[2], i;
+ u64 y_fb_dma, c_fb_dma;
+ struct mtk_vcodec_mem *mem;
+ int err, nal_start_idx;
+
+ /* bs NULL means flush decoder */
+ if (!bs)
+ return vpu_dec_reset(vpu);
+
+ fb = inst->ctx->dev->vdec_pdata->get_cap_buffer(inst->ctx);
+ src_buf_info = container_of(bs, struct mtk_video_dec_buf, bs_buffer);
+ dst_buf_info = container_of(fb, struct mtk_video_dec_buf, frame_buffer);
+
+ y_fb_dma = fb ? (u64)fb->base_y.dma_addr : 0;
+ c_fb_dma = fb ? (u64)fb->base_c.dma_addr : 0;
+ mtk_vdec_debug(inst->ctx, "[h264-dec] [%d] y_dma=%llx c_dma=%llx",
+ inst->ctx->decoded_frame_cnt, y_fb_dma, c_fb_dma);
+
+ inst->vsi_ctx.dec.bs_buf_addr = (u64)bs->dma_addr;
+ inst->vsi_ctx.dec.bs_buf_size = bs->size;
+ inst->vsi_ctx.dec.y_fb_dma = y_fb_dma;
+ inst->vsi_ctx.dec.c_fb_dma = c_fb_dma;
+ inst->vsi_ctx.dec.vdec_fb_va = (u64)(uintptr_t)fb;
+
+ v4l2_m2m_buf_copy_metadata(&src_buf_info->m2m_buf.vb,
+ &dst_buf_info->m2m_buf.vb, true);
+ err = get_vdec_sig_decode_parameters(inst);
+ if (err)
+ goto err_free_fb_out;
+
+ buf = (unsigned char *)bs->va;
+ nal_start_idx = mtk_vdec_h264_find_start_code(buf, bs->size);
+ if (nal_start_idx < 0) {
+ err = -EINVAL;
+ goto err_free_fb_out;
+ }
+ inst->vsi_ctx.dec.nal_info = buf[nal_start_idx];
+
+ *res_chg = inst->resolution_changed;
+ if (inst->resolution_changed) {
+ mtk_vdec_debug(inst->ctx, "- resolution changed -");
+ if (inst->realloc_mv_buf) {
+ err = vdec_h264_slice_alloc_mv_buf(inst, &inst->ctx->picinfo);
+ inst->realloc_mv_buf = false;
+ if (err)
+ goto err_free_fb_out;
+ }
+ inst->resolution_changed = false;
+
+ for (i = 0; i < H264_MAX_MV_NUM; i++) {
+ mem = &inst->mv_buf[i];
+ inst->vsi_ctx.mv_buf_dma[i] = mem->dma_addr;
+ }
+ }
+
+ memcpy(inst->vpu.vsi, &inst->vsi_ctx, sizeof(inst->vsi_ctx));
+ err = vpu_dec_start(vpu, data, 2);
+ if (err)
+ goto err_free_fb_out;
+
+ /* wait decoder done interrupt */
+ err = mtk_vcodec_wait_for_done_ctx(inst->ctx, MTK_INST_IRQ_RECEIVED,
+ WAIT_INTR_TIMEOUT_MS, MTK_VDEC_CORE);
+ if (err)
+ mtk_vdec_err(inst->ctx, "decode timeout: pic_%d", inst->ctx->decoded_frame_cnt);
+
+ inst->vsi->dec.timeout = !!err;
+ err = vpu_dec_end(vpu);
+ if (err)
+ goto err_free_fb_out;
+
+ memcpy(&inst->vsi_ctx, inst->vpu.vsi, sizeof(inst->vsi_ctx));
+ mtk_vdec_debug(inst->ctx, "pic[%d] crc: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x",
+ inst->ctx->decoded_frame_cnt,
+ inst->vsi_ctx.dec.crc[0], inst->vsi_ctx.dec.crc[1],
+ inst->vsi_ctx.dec.crc[2], inst->vsi_ctx.dec.crc[3],
+ inst->vsi_ctx.dec.crc[4], inst->vsi_ctx.dec.crc[5],
+ inst->vsi_ctx.dec.crc[6], inst->vsi_ctx.dec.crc[7]);
+
+ inst->ctx->decoded_frame_cnt++;
+ return 0;
+
+err_free_fb_out:
+ mtk_vdec_err(inst->ctx, "dec frame number: %d err: %d", inst->ctx->decoded_frame_cnt, err);
+ return err;
+}
+
+static int vdec_h264_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
+ struct vdec_fb *unused, bool *res_chg)
+{
+ struct vdec_h264_slice_inst *inst = h_vdec;
+ int ret;
+
+ if (!h_vdec)
+ return -EINVAL;
+
+ if (inst->ctx->dev->vdec_pdata->hw_arch == MTK_VDEC_PURE_SINGLE_CORE)
+ ret = vdec_h264_slice_single_decode(h_vdec, bs, unused, res_chg);
+ else
+ ret = vdec_h264_slice_lat_decode(h_vdec, bs, unused, res_chg);
+
+ return ret;
+}
+
+static int vdec_h264_slice_get_param(void *h_vdec, enum vdec_get_param_type type,
+ void *out)
+{
+ struct vdec_h264_slice_inst *inst = h_vdec;
+
+ switch (type) {
+ case GET_PARAM_PIC_INFO:
+ vdec_h264_slice_get_pic_info(inst);
+ break;
+ case GET_PARAM_DPB_SIZE:
+ *(unsigned int *)out = 6;
+ break;
+ case GET_PARAM_CROP_INFO:
+ vdec_h264_slice_get_crop_info(inst, out);
+ break;
+ default:
+ mtk_vdec_err(inst->ctx, "invalid get parameter type=%d", type);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+const struct vdec_common_if vdec_h264_slice_multi_if = {
+ .init = vdec_h264_slice_init,
+ .decode = vdec_h264_slice_decode,
+ .get_param = vdec_h264_slice_get_param,
+ .deinit = vdec_h264_slice_deinit,
+};
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c
new file mode 100644
index 000000000..06ed47df6
--- /dev/null
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c
@@ -0,0 +1,1092 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2023 MediaTek Inc.
+ * Author: Yunfei Dong <yunfei.dong@mediatek.com>
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <media/videobuf2-dma-contig.h>
+
+#include "../mtk_vcodec_dec.h"
+#include "../../common/mtk_vcodec_intr.h"
+#include "../vdec_drv_base.h"
+#include "../vdec_drv_if.h"
+#include "../vdec_vpu_if.h"
+
+/* the size used to store hevc wrap information */
+#define VDEC_HEVC_WRAP_SZ (532 * SZ_1K)
+
+#define HEVC_MAX_MV_NUM 32
+
+/* get used parameters for sps/pps */
+#define GET_HEVC_VDEC_FLAG(cond, flag) \
+ { dst_param->cond = ((src_param->flags & (flag)) ? (1) : (0)); }
+#define GET_HEVC_VDEC_PARAM(param) \
+ { dst_param->param = src_param->param; }
+
+/**
+ * enum vdec_hevc_core_dec_err_type - core decode error type
+ *
+ * @TRANS_BUFFER_FULL: trans buffer is full
+ * @SLICE_HEADER_FULL: slice header buffer is full
+ */
+enum vdec_hevc_core_dec_err_type {
+ TRANS_BUFFER_FULL = 1,
+ SLICE_HEADER_FULL,
+};
+
+/**
+ * struct mtk_hevc_dpb_info - hevc dpb information
+ *
+ * @y_dma_addr: Y plane physical address
+ * @c_dma_addr: CbCr plane physical address
+ * @reference_flag: reference picture flag (short/long term reference picture)
+ * @field: field picture flag
+ */
+struct mtk_hevc_dpb_info {
+ dma_addr_t y_dma_addr;
+ dma_addr_t c_dma_addr;
+ int reference_flag;
+ int field;
+};
+
+/*
+ * struct mtk_hevc_sps_param - parameters for sps
+ */
+struct mtk_hevc_sps_param {
+ unsigned char video_parameter_set_id;
+ unsigned char seq_parameter_set_id;
+ unsigned short pic_width_in_luma_samples;
+ unsigned short pic_height_in_luma_samples;
+ unsigned char bit_depth_luma_minus8;
+ unsigned char bit_depth_chroma_minus8;
+ unsigned char log2_max_pic_order_cnt_lsb_minus4;
+ unsigned char sps_max_dec_pic_buffering_minus1;
+ unsigned char sps_max_num_reorder_pics;
+ unsigned char sps_max_latency_increase_plus1;
+ unsigned char log2_min_luma_coding_block_size_minus3;
+ unsigned char log2_diff_max_min_luma_coding_block_size;
+ unsigned char log2_min_luma_transform_block_size_minus2;
+ unsigned char log2_diff_max_min_luma_transform_block_size;
+ unsigned char max_transform_hierarchy_depth_inter;
+ unsigned char max_transform_hierarchy_depth_intra;
+ unsigned char pcm_sample_bit_depth_luma_minus1;
+ unsigned char pcm_sample_bit_depth_chroma_minus1;
+ unsigned char log2_min_pcm_luma_coding_block_size_minus3;
+ unsigned char log2_diff_max_min_pcm_luma_coding_block_size;
+ unsigned char num_short_term_ref_pic_sets;
+ unsigned char num_long_term_ref_pics_sps;
+ unsigned char chroma_format_idc;
+ unsigned char sps_max_sub_layers_minus1;
+ unsigned char separate_colour_plane;
+ unsigned char scaling_list_enabled;
+ unsigned char amp_enabled;
+ unsigned char sample_adaptive_offset;
+ unsigned char pcm_enabled;
+ unsigned char pcm_loop_filter_disabled;
+ unsigned char long_term_ref_pics_enabled;
+ unsigned char sps_temporal_mvp_enabled;
+ unsigned char strong_intra_smoothing_enabled;
+ unsigned char reserved[5];
+};
+
+/*
+ * struct mtk_hevc_pps_param - parameters for pps
+ */
+struct mtk_hevc_pps_param {
+ unsigned char pic_parameter_set_id;
+ unsigned char num_extra_slice_header_bits;
+ unsigned char num_ref_idx_l0_default_active_minus1;
+ unsigned char num_ref_idx_l1_default_active_minus1;
+ char init_qp_minus26;
+ unsigned char diff_cu_qp_delta_depth;
+ char pps_cb_qp_offset;
+ char pps_cr_qp_offset;
+ unsigned char num_tile_columns_minus1;
+ unsigned char num_tile_rows_minus1;
+ unsigned char column_width_minus1[20];
+ unsigned char row_height_minus1[22];
+ char pps_beta_offset_div2;
+ char pps_tc_offset_div2;
+ unsigned char log2_parallel_merge_level_minus2;
+ char dependent_slice_segment_enabled;
+ char output_flag_present;
+ char sign_data_hiding_enabled;
+ char cabac_init_present;
+ char constrained_intra_pred;
+ char transform_skip_enabled;
+ char cu_qp_delta_enabled;
+ char pps_slice_chroma_qp_offsets_present;
+ char weighted_pred;
+ char weighted_bipred;
+ char transquant_bypass_enabled;
+ char pps_flag_tiles_enabled;
+ char entropy_coding_sync_enabled;
+ char loop_filter_across_tiles_enabled;
+ char pps_loop_filter_across_slices_enabled;
+ char deblocking_filter_override_enabled;
+ char pps_disable_deflocking_filter;
+ char lists_modification_present;
+ char slice_segment_header_extersion_present;
+ char deblocking_filter_control_present;
+ char uniform_spacing;
+ char reserved[6];
+};
+
+/*
+ * struct mtk_hevc_slice_header_param - parameters for slice header
+ */
+struct mtk_hevc_slice_header_param {
+ unsigned int slice_type;
+ unsigned int num_active_ref_layer_pics;
+ int slice_qp;
+ int slice_qp_delta_cb;
+ int slice_qp_delta_cr;
+ int num_ref_idx[3];
+ unsigned int col_ref_idx;
+ unsigned int five_minus_max_num_merge_cand;
+ int slice_deblocking_filter_beta_offset_div2;
+ int slice_deblocking_filter_tc_offset_div2;
+ unsigned char sao_enable_flag;
+ unsigned char sao_enable_flag_chroma;
+ unsigned char cabac_init_flag;
+ unsigned char slice_tmvp_flags_present;
+ unsigned char col_from_l0_flag;
+ unsigned char mvd_l1_zero_flag;
+ unsigned char slice_loop_filter_across_slices_enabled_flag;
+ unsigned char deblocking_filter_disable_flag;
+ unsigned int slice_reg0;
+ unsigned int slice_reg1;
+ unsigned int slice_reg2;
+ unsigned int num_rps_curr_temp_list;
+ unsigned int ref_list_mode;
+ int str_num_delta_pocs;
+ int str_num_negtive_pos_pics;
+ int num_long_term;
+ int num_long_term_sps;
+ unsigned int max_cu_width;
+ unsigned int max_cu_height;
+ unsigned int num_entry_point_offsets;
+ unsigned int last_lcu_x_in_tile[17];
+ unsigned int last_lcu_y_in_tile[17];
+ unsigned char nal_unit_type;
+};
+
+/*
+ * struct slice_api_hevc_scaling_matrix - parameters for scaling list
+ */
+struct slice_api_hevc_scaling_matrix {
+ unsigned char scaling_list_4x4[6][16];
+ unsigned char scaling_list_8x8[6][64];
+ unsigned char scaling_list_16x16[6][64];
+ unsigned char scaling_list_32x32[2][64];
+ unsigned char scaling_list_dc_coef_16x16[6];
+ unsigned char scaling_list_dc_coef_32x32[2];
+};
+
+/*
+ * struct slice_hevc_dpb_entry - each dpb information
+ */
+struct slice_hevc_dpb_entry {
+ u64 timestamp;
+ unsigned char flags;
+ unsigned char field_pic;
+ int pic_order_cnt_val;
+};
+
+/*
+ * struct slice_api_hevc_decode_param - parameters for decode.
+ */
+struct slice_api_hevc_decode_param {
+ struct slice_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+ int pic_order_cnt_val;
+ unsigned short short_term_ref_pic_set_size;
+ unsigned short long_term_ref_pic_set_size;
+ unsigned char num_active_dpb_entries;
+ unsigned char num_poc_st_curr_before;
+ unsigned char num_poc_st_curr_after;
+ unsigned char num_poc_lt_curr;
+ unsigned char poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+ unsigned char poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+ unsigned char poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+ unsigned char num_delta_pocs_of_ref_rps_idx;
+ int flags;
+};
+
+/**
+ * struct hevc_fb - hevc decode frame buffer information
+ *
+ * @vdec_fb_va: virtual address of struct vdec_fb
+ * @y_fb_dma: dma address of Y frame buffer (luma)
+ * @c_fb_dma: dma address of C frame buffer (chroma)
+ * @poc: picture order count of frame buffer
+ * @reserved: for 8 bytes alignment
+ */
+struct hevc_fb {
+ u64 vdec_fb_va;
+ u64 y_fb_dma;
+ u64 c_fb_dma;
+ s32 poc;
+ u32 reserved;
+};
+
+/**
+ * struct vdec_hevc_slice_lat_dec_param - parameters for decode current frame
+ *
+ * @sps: hevc sps syntax parameters
+ * @pps: hevc pps syntax parameters
+ * @slice_header: hevc slice header syntax parameters
+ * @scaling_matrix: hevc scaling list parameters
+ * @decode_params: decoder parameters of each frame used for hardware decode
+ * @hevc_dpb_info: dpb reference list
+ */
+struct vdec_hevc_slice_lat_dec_param {
+ struct mtk_hevc_sps_param sps;
+ struct mtk_hevc_pps_param pps;
+ struct mtk_hevc_slice_header_param slice_header;
+ struct slice_api_hevc_scaling_matrix scaling_matrix;
+ struct slice_api_hevc_decode_param decode_params;
+ struct mtk_hevc_dpb_info hevc_dpb_info[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+};
+
+/**
+ * struct vdec_hevc_slice_info - decode information
+ *
+ * @wdma_end_addr_offset: wdma end address offset
+ * @timeout: Decode timeout: 1 timeout, 0 no timeount
+ * @vdec_fb_va: VDEC frame buffer struct virtual address
+ * @crc: Used to check whether hardware's status is right
+ */
+struct vdec_hevc_slice_info {
+ u64 wdma_end_addr_offset;
+ u64 timeout;
+ u64 vdec_fb_va;
+ u32 crc[8];
+};
+
+/*
+ * struct vdec_hevc_slice_mem - memory address and size
+ */
+struct vdec_hevc_slice_mem {
+ union {
+ u64 buf;
+ dma_addr_t dma_addr;
+ };
+ union {
+ size_t size;
+ dma_addr_t dma_addr_end;
+ u64 padding;
+ };
+};
+
+/**
+ * struct vdec_hevc_slice_fb - frame buffer for decoding
+ * @y: current y buffer address info
+ * @c: current c buffer address info
+ */
+struct vdec_hevc_slice_fb {
+ struct vdec_hevc_slice_mem y;
+ struct vdec_hevc_slice_mem c;
+};
+
+/**
+ * struct vdec_hevc_slice_vsi - shared memory for decode information exchange
+ * between SCP and Host.
+ *
+ * @bs: input buffer info
+ *
+ * @ube: ube buffer
+ * @trans: transcoded buffer
+ * @err_map: err map buffer
+ * @slice_bc: slice bc buffer
+ * @wrap: temp buffer
+ *
+ * @fb: current y/c buffer
+ * @mv_buf_dma: HW working motion vector buffer
+ * @dec: decode information (AP-R, VPU-W)
+ * @hevc_slice_params: decode parameters for hw used
+ */
+struct vdec_hevc_slice_vsi {
+ /* used in LAT stage */
+ struct vdec_hevc_slice_mem bs;
+
+ struct vdec_hevc_slice_mem ube;
+ struct vdec_hevc_slice_mem trans;
+ struct vdec_hevc_slice_mem err_map;
+ struct vdec_hevc_slice_mem slice_bc;
+ struct vdec_hevc_slice_mem wrap;
+
+ struct vdec_hevc_slice_fb fb;
+ struct vdec_hevc_slice_mem mv_buf_dma[HEVC_MAX_MV_NUM];
+ struct vdec_hevc_slice_info dec;
+ struct vdec_hevc_slice_lat_dec_param hevc_slice_params;
+};
+
+/**
+ * struct vdec_hevc_slice_share_info - shared information used to exchange
+ * message between lat and core
+ *
+ * @sps: sequence header information from user space
+ * @dec_params: decoder params from user space
+ * @hevc_slice_params: decoder params used for hardware
+ * @trans: trans buffer dma address
+ */
+struct vdec_hevc_slice_share_info {
+ struct v4l2_ctrl_hevc_sps sps;
+ struct v4l2_ctrl_hevc_decode_params dec_params;
+ struct vdec_hevc_slice_lat_dec_param hevc_slice_params;
+ struct vdec_hevc_slice_mem trans;
+};
+
+/**
+ * struct vdec_hevc_slice_inst - hevc decoder instance
+ *
+ * @slice_dec_num: how many picture be decoded
+ * @ctx: point to mtk_vcodec_dec_ctx
+ * @mv_buf: HW working motion vector buffer
+ * @vpu: VPU instance
+ * @vsi: vsi used for lat
+ * @vsi_core: vsi used for core
+ * @wrap_addr: wrap address used for hevc
+ *
+ * @hevc_slice_param: the parameters that hardware use to decode
+ *
+ * @resolution_changed: resolution changed
+ * @realloc_mv_buf: reallocate mv buffer
+ * @cap_num_planes: number of capture queue plane
+ */
+struct vdec_hevc_slice_inst {
+ unsigned int slice_dec_num;
+ struct mtk_vcodec_dec_ctx *ctx;
+ struct mtk_vcodec_mem mv_buf[HEVC_MAX_MV_NUM];
+ struct vdec_vpu_inst vpu;
+ struct vdec_hevc_slice_vsi *vsi;
+ struct vdec_hevc_slice_vsi *vsi_core;
+ struct mtk_vcodec_mem wrap_addr;
+
+ struct vdec_hevc_slice_lat_dec_param hevc_slice_param;
+
+ unsigned int resolution_changed;
+ unsigned int realloc_mv_buf;
+ unsigned int cap_num_planes;
+};
+
+static unsigned int vdec_hevc_get_mv_buf_size(unsigned int width, unsigned int height)
+{
+ const unsigned int unit_size = (width / 16) * (height / 16) + 8;
+
+ return 64 * unit_size;
+}
+
+static void *vdec_hevc_get_ctrl_ptr(struct mtk_vcodec_dec_ctx *ctx, int id)
+{
+ struct v4l2_ctrl *ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, id);
+
+ if (!ctrl)
+ return ERR_PTR(-EINVAL);
+
+ return ctrl->p_cur.p;
+}
+
+static void vdec_hevc_fill_dpb_info(struct mtk_vcodec_dec_ctx *ctx,
+ struct slice_api_hevc_decode_param *decode_params,
+ struct mtk_hevc_dpb_info *hevc_dpb_info)
+{
+ const struct slice_hevc_dpb_entry *dpb;
+ struct vb2_queue *vq;
+ struct vb2_buffer *vb;
+ int index;
+
+ vq = v4l2_m2m_get_vq(ctx->m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
+ for (index = 0; index < V4L2_HEVC_DPB_ENTRIES_NUM_MAX; index++) {
+ dpb = &decode_params->dpb[index];
+ if (index >= decode_params->num_active_dpb_entries)
+ continue;
+
+ vb = vb2_find_buffer(vq, dpb->timestamp);
+ if (!vb) {
+ dev_err(&ctx->dev->plat_dev->dev,
+ "Reference invalid: dpb_index(%d) timestamp(%lld)",
+ index, dpb->timestamp);
+ continue;
+ }
+
+ hevc_dpb_info[index].field = dpb->field_pic;
+
+ hevc_dpb_info[index].y_dma_addr = vb2_dma_contig_plane_dma_addr(vb, 0);
+ if (ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 2)
+ hevc_dpb_info[index].c_dma_addr = vb2_dma_contig_plane_dma_addr(vb, 1);
+ else
+ hevc_dpb_info[index].c_dma_addr =
+ hevc_dpb_info[index].y_dma_addr + ctx->picinfo.fb_sz[0];
+ }
+}
+
+static void vdec_hevc_copy_sps_params(struct mtk_hevc_sps_param *dst_param,
+ const struct v4l2_ctrl_hevc_sps *src_param)
+{
+ GET_HEVC_VDEC_PARAM(video_parameter_set_id);
+ GET_HEVC_VDEC_PARAM(seq_parameter_set_id);
+ GET_HEVC_VDEC_PARAM(pic_width_in_luma_samples);
+ GET_HEVC_VDEC_PARAM(pic_height_in_luma_samples);
+ GET_HEVC_VDEC_PARAM(bit_depth_luma_minus8);
+ GET_HEVC_VDEC_PARAM(bit_depth_chroma_minus8);
+ GET_HEVC_VDEC_PARAM(log2_max_pic_order_cnt_lsb_minus4);
+ GET_HEVC_VDEC_PARAM(sps_max_dec_pic_buffering_minus1);
+ GET_HEVC_VDEC_PARAM(sps_max_num_reorder_pics);
+ GET_HEVC_VDEC_PARAM(sps_max_latency_increase_plus1);
+ GET_HEVC_VDEC_PARAM(log2_min_luma_coding_block_size_minus3);
+ GET_HEVC_VDEC_PARAM(log2_diff_max_min_luma_coding_block_size);
+ GET_HEVC_VDEC_PARAM(log2_min_luma_transform_block_size_minus2);
+ GET_HEVC_VDEC_PARAM(log2_diff_max_min_luma_transform_block_size);
+ GET_HEVC_VDEC_PARAM(max_transform_hierarchy_depth_inter);
+ GET_HEVC_VDEC_PARAM(max_transform_hierarchy_depth_intra);
+ GET_HEVC_VDEC_PARAM(pcm_sample_bit_depth_luma_minus1);
+ GET_HEVC_VDEC_PARAM(pcm_sample_bit_depth_chroma_minus1);
+ GET_HEVC_VDEC_PARAM(log2_min_pcm_luma_coding_block_size_minus3);
+ GET_HEVC_VDEC_PARAM(log2_diff_max_min_pcm_luma_coding_block_size);
+ GET_HEVC_VDEC_PARAM(num_short_term_ref_pic_sets);
+ GET_HEVC_VDEC_PARAM(num_long_term_ref_pics_sps);
+ GET_HEVC_VDEC_PARAM(chroma_format_idc);
+ GET_HEVC_VDEC_PARAM(sps_max_sub_layers_minus1);
+
+ GET_HEVC_VDEC_FLAG(separate_colour_plane,
+ V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE);
+ GET_HEVC_VDEC_FLAG(scaling_list_enabled,
+ V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED);
+ GET_HEVC_VDEC_FLAG(amp_enabled,
+ V4L2_HEVC_SPS_FLAG_AMP_ENABLED);
+ GET_HEVC_VDEC_FLAG(sample_adaptive_offset,
+ V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET);
+ GET_HEVC_VDEC_FLAG(pcm_enabled,
+ V4L2_HEVC_SPS_FLAG_PCM_ENABLED);
+ GET_HEVC_VDEC_FLAG(pcm_loop_filter_disabled,
+ V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED);
+ GET_HEVC_VDEC_FLAG(long_term_ref_pics_enabled,
+ V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT);
+ GET_HEVC_VDEC_FLAG(sps_temporal_mvp_enabled,
+ V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED);
+ GET_HEVC_VDEC_FLAG(strong_intra_smoothing_enabled,
+ V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED);
+}
+
+static void vdec_hevc_copy_pps_params(struct mtk_hevc_pps_param *dst_param,
+ const struct v4l2_ctrl_hevc_pps *src_param)
+{
+ int i;
+
+ GET_HEVC_VDEC_PARAM(pic_parameter_set_id);
+ GET_HEVC_VDEC_PARAM(num_extra_slice_header_bits);
+ GET_HEVC_VDEC_PARAM(num_ref_idx_l0_default_active_minus1);
+ GET_HEVC_VDEC_PARAM(num_ref_idx_l1_default_active_minus1);
+ GET_HEVC_VDEC_PARAM(init_qp_minus26);
+ GET_HEVC_VDEC_PARAM(diff_cu_qp_delta_depth);
+ GET_HEVC_VDEC_PARAM(pps_cb_qp_offset);
+ GET_HEVC_VDEC_PARAM(pps_cr_qp_offset);
+ GET_HEVC_VDEC_PARAM(num_tile_columns_minus1);
+ GET_HEVC_VDEC_PARAM(num_tile_rows_minus1);
+ GET_HEVC_VDEC_PARAM(init_qp_minus26);
+ GET_HEVC_VDEC_PARAM(diff_cu_qp_delta_depth);
+ GET_HEVC_VDEC_PARAM(pic_parameter_set_id);
+ GET_HEVC_VDEC_PARAM(num_extra_slice_header_bits);
+ GET_HEVC_VDEC_PARAM(num_ref_idx_l0_default_active_minus1);
+ GET_HEVC_VDEC_PARAM(num_ref_idx_l1_default_active_minus1);
+ GET_HEVC_VDEC_PARAM(pps_beta_offset_div2);
+ GET_HEVC_VDEC_PARAM(pps_tc_offset_div2);
+ GET_HEVC_VDEC_PARAM(log2_parallel_merge_level_minus2);
+
+ for (i = 0; i < ARRAY_SIZE(src_param->column_width_minus1); i++)
+ GET_HEVC_VDEC_PARAM(column_width_minus1[i]);
+ for (i = 0; i < ARRAY_SIZE(src_param->row_height_minus1); i++)
+ GET_HEVC_VDEC_PARAM(row_height_minus1[i]);
+
+ GET_HEVC_VDEC_FLAG(dependent_slice_segment_enabled,
+ V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED);
+ GET_HEVC_VDEC_FLAG(output_flag_present,
+ V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT);
+ GET_HEVC_VDEC_FLAG(sign_data_hiding_enabled,
+ V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED);
+ GET_HEVC_VDEC_FLAG(cabac_init_present,
+ V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT);
+ GET_HEVC_VDEC_FLAG(constrained_intra_pred,
+ V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED);
+ GET_HEVC_VDEC_FLAG(transform_skip_enabled,
+ V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED);
+ GET_HEVC_VDEC_FLAG(cu_qp_delta_enabled,
+ V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED);
+ GET_HEVC_VDEC_FLAG(pps_slice_chroma_qp_offsets_present,
+ V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT);
+ GET_HEVC_VDEC_FLAG(weighted_pred,
+ V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED);
+ GET_HEVC_VDEC_FLAG(weighted_bipred,
+ V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED);
+ GET_HEVC_VDEC_FLAG(transquant_bypass_enabled,
+ V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED);
+ GET_HEVC_VDEC_FLAG(pps_flag_tiles_enabled,
+ V4L2_HEVC_PPS_FLAG_TILES_ENABLED);
+ GET_HEVC_VDEC_FLAG(entropy_coding_sync_enabled,
+ V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED);
+ GET_HEVC_VDEC_FLAG(loop_filter_across_tiles_enabled,
+ V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED);
+ GET_HEVC_VDEC_FLAG(pps_loop_filter_across_slices_enabled,
+ V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED);
+ GET_HEVC_VDEC_FLAG(deblocking_filter_override_enabled,
+ V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED);
+ GET_HEVC_VDEC_FLAG(pps_disable_deflocking_filter,
+ V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER);
+ GET_HEVC_VDEC_FLAG(lists_modification_present,
+ V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT);
+ GET_HEVC_VDEC_FLAG(slice_segment_header_extersion_present,
+ V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT);
+ GET_HEVC_VDEC_FLAG(deblocking_filter_control_present,
+ V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT);
+ GET_HEVC_VDEC_FLAG(uniform_spacing,
+ V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING);
+}
+
+static void vdec_hevc_copy_scaling_matrix(struct slice_api_hevc_scaling_matrix *dst_matrix,
+ const struct v4l2_ctrl_hevc_scaling_matrix *src_matrix)
+{
+ memcpy(dst_matrix, src_matrix, sizeof(*src_matrix));
+}
+
+static void
+vdec_hevc_copy_decode_params(struct slice_api_hevc_decode_param *dst_param,
+ const struct v4l2_ctrl_hevc_decode_params *src_param,
+ const struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX])
+{
+ struct slice_hevc_dpb_entry *dst_entry;
+ const struct v4l2_hevc_dpb_entry *src_entry;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(dst_param->dpb); i++) {
+ dst_entry = &dst_param->dpb[i];
+ src_entry = &dpb[i];
+
+ dst_entry->timestamp = src_entry->timestamp;
+ dst_entry->flags = src_entry->flags;
+ dst_entry->field_pic = src_entry->field_pic;
+ dst_entry->pic_order_cnt_val = src_entry->pic_order_cnt_val;
+
+ GET_HEVC_VDEC_PARAM(poc_st_curr_before[i]);
+ GET_HEVC_VDEC_PARAM(poc_st_curr_after[i]);
+ GET_HEVC_VDEC_PARAM(poc_lt_curr[i]);
+ }
+
+ GET_HEVC_VDEC_PARAM(pic_order_cnt_val);
+ GET_HEVC_VDEC_PARAM(short_term_ref_pic_set_size);
+ GET_HEVC_VDEC_PARAM(long_term_ref_pic_set_size);
+ GET_HEVC_VDEC_PARAM(num_active_dpb_entries);
+ GET_HEVC_VDEC_PARAM(num_poc_st_curr_before);
+ GET_HEVC_VDEC_PARAM(num_poc_st_curr_after);
+ GET_HEVC_VDEC_PARAM(num_delta_pocs_of_ref_rps_idx);
+ GET_HEVC_VDEC_PARAM(num_poc_lt_curr);
+ GET_HEVC_VDEC_PARAM(flags);
+}
+
+static int vdec_hevc_slice_fill_decode_parameters(struct vdec_hevc_slice_inst *inst,
+ struct vdec_hevc_slice_share_info *share_info)
+{
+ struct vdec_hevc_slice_lat_dec_param *slice_param = &inst->vsi->hevc_slice_params;
+ const struct v4l2_ctrl_hevc_decode_params *dec_params;
+ const struct v4l2_ctrl_hevc_scaling_matrix *src_matrix;
+ const struct v4l2_ctrl_hevc_sps *sps;
+ const struct v4l2_ctrl_hevc_pps *pps;
+
+ dec_params =
+ vdec_hevc_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_HEVC_DECODE_PARAMS);
+ if (IS_ERR(dec_params))
+ return PTR_ERR(dec_params);
+
+ src_matrix =
+ vdec_hevc_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_HEVC_SCALING_MATRIX);
+ if (IS_ERR(src_matrix))
+ return PTR_ERR(src_matrix);
+
+ sps = vdec_hevc_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_HEVC_SPS);
+ if (IS_ERR(sps))
+ return PTR_ERR(sps);
+
+ pps = vdec_hevc_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_HEVC_PPS);
+ if (IS_ERR(pps))
+ return PTR_ERR(pps);
+
+ vdec_hevc_copy_sps_params(&slice_param->sps, sps);
+ vdec_hevc_copy_pps_params(&slice_param->pps, pps);
+ vdec_hevc_copy_scaling_matrix(&slice_param->scaling_matrix, src_matrix);
+
+ memcpy(&share_info->sps, sps, sizeof(*sps));
+ memcpy(&share_info->dec_params, dec_params, sizeof(*dec_params));
+
+ slice_param->decode_params.num_poc_st_curr_before = dec_params->num_poc_st_curr_before;
+ slice_param->decode_params.num_poc_st_curr_after = dec_params->num_poc_st_curr_after;
+ slice_param->decode_params.num_poc_lt_curr = dec_params->num_poc_lt_curr;
+ slice_param->decode_params.num_delta_pocs_of_ref_rps_idx =
+ dec_params->num_delta_pocs_of_ref_rps_idx;
+
+ return 0;
+}
+
+static void vdec_hevc_slice_fill_decode_reflist(struct vdec_hevc_slice_inst *inst,
+ struct vdec_hevc_slice_lat_dec_param *slice_param,
+ struct vdec_hevc_slice_share_info *share_info)
+{
+ struct v4l2_ctrl_hevc_decode_params *dec_params = &share_info->dec_params;
+
+ vdec_hevc_copy_decode_params(&slice_param->decode_params, dec_params,
+ share_info->dec_params.dpb);
+
+ vdec_hevc_fill_dpb_info(inst->ctx, &slice_param->decode_params,
+ slice_param->hevc_dpb_info);
+}
+
+static int vdec_hevc_slice_alloc_mv_buf(struct vdec_hevc_slice_inst *inst,
+ struct vdec_pic_info *pic)
+{
+ unsigned int buf_sz = vdec_hevc_get_mv_buf_size(pic->buf_w, pic->buf_h);
+ struct mtk_vcodec_mem *mem;
+ int i, err;
+
+ mtk_v4l2_vdec_dbg(3, inst->ctx, "allocate mv buffer size = 0x%x", buf_sz);
+ for (i = 0; i < HEVC_MAX_MV_NUM; i++) {
+ mem = &inst->mv_buf[i];
+ if (mem->va)
+ mtk_vcodec_mem_free(inst->ctx, mem);
+ mem->size = buf_sz;
+ err = mtk_vcodec_mem_alloc(inst->ctx, mem);
+ if (err) {
+ mtk_vdec_err(inst->ctx, "failed to allocate mv buf");
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static void vdec_hevc_slice_free_mv_buf(struct vdec_hevc_slice_inst *inst)
+{
+ int i;
+ struct mtk_vcodec_mem *mem;
+
+ for (i = 0; i < HEVC_MAX_MV_NUM; i++) {
+ mem = &inst->mv_buf[i];
+ if (mem->va)
+ mtk_vcodec_mem_free(inst->ctx, mem);
+ }
+}
+
+static void vdec_hevc_slice_get_pic_info(struct vdec_hevc_slice_inst *inst)
+{
+ struct mtk_vcodec_dec_ctx *ctx = inst->ctx;
+ u32 data[3];
+
+ data[0] = ctx->picinfo.pic_w;
+ data[1] = ctx->picinfo.pic_h;
+ data[2] = ctx->capture_fourcc;
+ vpu_dec_get_param(&inst->vpu, data, 3, GET_PARAM_PIC_INFO);
+
+ ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, VCODEC_DEC_ALIGNED_64);
+ ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, VCODEC_DEC_ALIGNED_64);
+ ctx->picinfo.fb_sz[0] = inst->vpu.fb_sz[0];
+ ctx->picinfo.fb_sz[1] = inst->vpu.fb_sz[1];
+ inst->cap_num_planes =
+ ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes;
+
+ mtk_vdec_debug(ctx, "pic(%d, %d), buf(%d, %d)",
+ ctx->picinfo.pic_w, ctx->picinfo.pic_h,
+ ctx->picinfo.buf_w, ctx->picinfo.buf_h);
+ mtk_vdec_debug(ctx, "Y/C(%d, %d)", ctx->picinfo.fb_sz[0],
+ ctx->picinfo.fb_sz[1]);
+
+ if (ctx->last_decoded_picinfo.pic_w != ctx->picinfo.pic_w ||
+ ctx->last_decoded_picinfo.pic_h != ctx->picinfo.pic_h) {
+ inst->resolution_changed = true;
+ if (ctx->last_decoded_picinfo.buf_w != ctx->picinfo.buf_w ||
+ ctx->last_decoded_picinfo.buf_h != ctx->picinfo.buf_h)
+ inst->realloc_mv_buf = true;
+
+ mtk_v4l2_vdec_dbg(1, inst->ctx, "resChg: (%d %d) : old(%d, %d) -> new(%d, %d)",
+ inst->resolution_changed,
+ inst->realloc_mv_buf,
+ ctx->last_decoded_picinfo.pic_w,
+ ctx->last_decoded_picinfo.pic_h,
+ ctx->picinfo.pic_w, ctx->picinfo.pic_h);
+ }
+}
+
+static void vdec_hevc_slice_get_crop_info(struct vdec_hevc_slice_inst *inst,
+ struct v4l2_rect *cr)
+{
+ cr->left = 0;
+ cr->top = 0;
+ cr->width = inst->ctx->picinfo.pic_w;
+ cr->height = inst->ctx->picinfo.pic_h;
+
+ mtk_vdec_debug(inst->ctx, "l=%d, t=%d, w=%d, h=%d",
+ cr->left, cr->top, cr->width, cr->height);
+}
+
+static int vdec_hevc_slice_setup_lat_buffer(struct vdec_hevc_slice_inst *inst,
+ struct mtk_vcodec_mem *bs,
+ struct vdec_lat_buf *lat_buf,
+ bool *res_chg)
+{
+ struct mtk_vcodec_mem *mem;
+ struct mtk_video_dec_buf *src_buf_info;
+ struct vdec_hevc_slice_share_info *share_info;
+ int i, err;
+
+ inst->vsi->bs.dma_addr = (u64)bs->dma_addr;
+ inst->vsi->bs.size = bs->size;
+
+ src_buf_info = container_of(bs, struct mtk_video_dec_buf, bs_buffer);
+ lat_buf->src_buf_req = src_buf_info->m2m_buf.vb.vb2_buf.req_obj.req;
+ v4l2_m2m_buf_copy_metadata(&src_buf_info->m2m_buf.vb, &lat_buf->ts_info, true);
+
+ *res_chg = inst->resolution_changed;
+ if (inst->resolution_changed) {
+ mtk_vdec_debug(inst->ctx, "- resolution changed -");
+ if (inst->realloc_mv_buf) {
+ err = vdec_hevc_slice_alloc_mv_buf(inst, &inst->ctx->picinfo);
+ inst->realloc_mv_buf = false;
+ if (err)
+ return err;
+ }
+ inst->resolution_changed = false;
+ }
+
+ for (i = 0; i < HEVC_MAX_MV_NUM; i++) {
+ mem = &inst->mv_buf[i];
+ inst->vsi->mv_buf_dma[i].dma_addr = mem->dma_addr;
+ inst->vsi->mv_buf_dma[i].size = mem->size;
+ }
+
+ inst->vsi->ube.dma_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr;
+ inst->vsi->ube.size = lat_buf->ctx->msg_queue.wdma_addr.size;
+
+ inst->vsi->err_map.dma_addr = lat_buf->wdma_err_addr.dma_addr;
+ inst->vsi->err_map.size = lat_buf->wdma_err_addr.size;
+
+ inst->vsi->slice_bc.dma_addr = lat_buf->slice_bc_addr.dma_addr;
+ inst->vsi->slice_bc.size = lat_buf->slice_bc_addr.size;
+
+ inst->vsi->trans.dma_addr_end = inst->ctx->msg_queue.wdma_rptr_addr;
+ inst->vsi->trans.dma_addr = inst->ctx->msg_queue.wdma_wptr_addr;
+
+ share_info = lat_buf->private_data;
+ share_info->trans.dma_addr = inst->vsi->trans.dma_addr;
+ share_info->trans.dma_addr_end = inst->vsi->trans.dma_addr_end;
+
+ mtk_vdec_debug(inst->ctx, "lat: ube addr/size(0x%llx 0x%llx) err:0x%llx",
+ inst->vsi->ube.buf,
+ inst->vsi->ube.padding,
+ inst->vsi->err_map.buf);
+
+ mtk_vdec_debug(inst->ctx, "slice addr/size(0x%llx 0x%llx) trans start/end((0x%llx 0x%llx))",
+ inst->vsi->slice_bc.buf,
+ inst->vsi->slice_bc.padding,
+ inst->vsi->trans.buf,
+ inst->vsi->trans.padding);
+
+ return 0;
+}
+
+static int vdec_hevc_slice_setup_core_buffer(struct vdec_hevc_slice_inst *inst,
+ struct vdec_hevc_slice_share_info *share_info,
+ struct vdec_lat_buf *lat_buf)
+{
+ struct mtk_vcodec_mem *mem;
+ struct mtk_vcodec_dec_ctx *ctx = inst->ctx;
+ struct vb2_v4l2_buffer *vb2_v4l2;
+ struct vdec_fb *fb;
+ u64 y_fb_dma, c_fb_dma;
+ int i;
+
+ fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx);
+ if (!fb) {
+ mtk_vdec_err(inst->ctx, "fb buffer is NULL");
+ return -EBUSY;
+ }
+
+ y_fb_dma = (u64)fb->base_y.dma_addr;
+ if (ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 1)
+ c_fb_dma =
+ y_fb_dma + inst->ctx->picinfo.buf_w * inst->ctx->picinfo.buf_h;
+ else
+ c_fb_dma = (u64)fb->base_c.dma_addr;
+
+ mtk_vdec_debug(inst->ctx, "[hevc-core] y/c addr = 0x%llx 0x%llx", y_fb_dma, c_fb_dma);
+
+ inst->vsi_core->fb.y.dma_addr = y_fb_dma;
+ inst->vsi_core->fb.y.size = ctx->picinfo.fb_sz[0];
+ inst->vsi_core->fb.c.dma_addr = c_fb_dma;
+ inst->vsi_core->fb.y.size = ctx->picinfo.fb_sz[1];
+
+ inst->vsi_core->dec.vdec_fb_va = (unsigned long)fb;
+
+ inst->vsi_core->ube.dma_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr;
+ inst->vsi_core->ube.size = lat_buf->ctx->msg_queue.wdma_addr.size;
+
+ inst->vsi_core->err_map.dma_addr = lat_buf->wdma_err_addr.dma_addr;
+ inst->vsi_core->err_map.size = lat_buf->wdma_err_addr.size;
+
+ inst->vsi_core->slice_bc.dma_addr = lat_buf->slice_bc_addr.dma_addr;
+ inst->vsi_core->slice_bc.size = lat_buf->slice_bc_addr.size;
+
+ inst->vsi_core->trans.dma_addr = share_info->trans.dma_addr;
+ inst->vsi_core->trans.dma_addr_end = share_info->trans.dma_addr_end;
+
+ inst->vsi_core->wrap.dma_addr = inst->wrap_addr.dma_addr;
+ inst->vsi_core->wrap.size = inst->wrap_addr.size;
+
+ for (i = 0; i < HEVC_MAX_MV_NUM; i++) {
+ mem = &inst->mv_buf[i];
+ inst->vsi_core->mv_buf_dma[i].dma_addr = mem->dma_addr;
+ inst->vsi_core->mv_buf_dma[i].size = mem->size;
+ }
+
+ vb2_v4l2 = v4l2_m2m_next_dst_buf(ctx->m2m_ctx);
+ v4l2_m2m_buf_copy_metadata(&lat_buf->ts_info, vb2_v4l2, true);
+
+ return 0;
+}
+
+static int vdec_hevc_slice_init(struct mtk_vcodec_dec_ctx *ctx)
+{
+ struct vdec_hevc_slice_inst *inst;
+ int err, vsi_size;
+
+ inst = kzalloc(sizeof(*inst), GFP_KERNEL);
+ if (!inst)
+ return -ENOMEM;
+
+ inst->ctx = ctx;
+
+ inst->vpu.id = SCP_IPI_VDEC_LAT;
+ inst->vpu.core_id = SCP_IPI_VDEC_CORE;
+ inst->vpu.ctx = ctx;
+ inst->vpu.codec_type = ctx->current_codec;
+ inst->vpu.capture_type = ctx->capture_fourcc;
+
+ ctx->drv_handle = inst;
+ err = vpu_dec_init(&inst->vpu);
+ if (err) {
+ mtk_vdec_err(ctx, "vdec_hevc init err=%d", err);
+ goto error_free_inst;
+ }
+
+ vsi_size = round_up(sizeof(struct vdec_hevc_slice_vsi), VCODEC_DEC_ALIGNED_64);
+ inst->vsi = inst->vpu.vsi;
+ inst->vsi_core =
+ (struct vdec_hevc_slice_vsi *)(((char *)inst->vpu.vsi) + vsi_size);
+
+ inst->resolution_changed = true;
+ inst->realloc_mv_buf = true;
+
+ inst->wrap_addr.size = VDEC_HEVC_WRAP_SZ;
+ err = mtk_vcodec_mem_alloc(ctx, &inst->wrap_addr);
+ if (err)
+ goto error_free_inst;
+
+ mtk_vdec_debug(ctx, "lat struct size = %d,%d,%d,%d vsi: %d\n",
+ (int)sizeof(struct mtk_hevc_sps_param),
+ (int)sizeof(struct mtk_hevc_pps_param),
+ (int)sizeof(struct vdec_hevc_slice_lat_dec_param),
+ (int)sizeof(struct mtk_hevc_dpb_info),
+ vsi_size);
+ mtk_vdec_debug(ctx, "lat hevc instance >> %p, codec_type = 0x%x",
+ inst, inst->vpu.codec_type);
+
+ return 0;
+error_free_inst:
+ kfree(inst);
+ return err;
+}
+
+static void vdec_hevc_slice_deinit(void *h_vdec)
+{
+ struct vdec_hevc_slice_inst *inst = h_vdec;
+ struct mtk_vcodec_mem *mem;
+
+ vpu_dec_deinit(&inst->vpu);
+ vdec_hevc_slice_free_mv_buf(inst);
+
+ mem = &inst->wrap_addr;
+ if (mem->va)
+ mtk_vcodec_mem_free(inst->ctx, mem);
+
+ vdec_msg_queue_deinit(&inst->ctx->msg_queue, inst->ctx);
+ kfree(inst);
+}
+
+static int vdec_hevc_slice_core_decode(struct vdec_lat_buf *lat_buf)
+{
+ int err, timeout;
+ struct mtk_vcodec_dec_ctx *ctx = lat_buf->ctx;
+ struct vdec_hevc_slice_inst *inst = ctx->drv_handle;
+ struct vdec_hevc_slice_share_info *share_info = lat_buf->private_data;
+ struct vdec_vpu_inst *vpu = &inst->vpu;
+
+ mtk_vdec_debug(ctx, "[hevc-core] vdec_hevc core decode");
+ memcpy(&inst->vsi_core->hevc_slice_params, &share_info->hevc_slice_params,
+ sizeof(share_info->hevc_slice_params));
+
+ err = vdec_hevc_slice_setup_core_buffer(inst, share_info, lat_buf);
+ if (err)
+ goto vdec_dec_end;
+
+ vdec_hevc_slice_fill_decode_reflist(inst, &inst->vsi_core->hevc_slice_params,
+ share_info);
+ err = vpu_dec_core(vpu);
+ if (err) {
+ mtk_vdec_err(ctx, "core decode err=%d", err);
+ goto vdec_dec_end;
+ }
+
+ /* wait decoder done interrupt */
+ timeout = mtk_vcodec_wait_for_done_ctx(inst->ctx, MTK_INST_IRQ_RECEIVED,
+ WAIT_INTR_TIMEOUT_MS, MTK_VDEC_CORE);
+ if (timeout)
+ mtk_vdec_err(ctx, "core decode timeout: pic_%d", ctx->decoded_frame_cnt);
+ inst->vsi_core->dec.timeout = !!timeout;
+
+ vpu_dec_core_end(vpu);
+ mtk_vdec_debug(ctx, "pic[%d] crc: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x",
+ ctx->decoded_frame_cnt,
+ inst->vsi_core->dec.crc[0], inst->vsi_core->dec.crc[1],
+ inst->vsi_core->dec.crc[2], inst->vsi_core->dec.crc[3],
+ inst->vsi_core->dec.crc[4], inst->vsi_core->dec.crc[5],
+ inst->vsi_core->dec.crc[6], inst->vsi_core->dec.crc[7]);
+
+vdec_dec_end:
+ vdec_msg_queue_update_ube_rptr(&lat_buf->ctx->msg_queue, share_info->trans.dma_addr_end);
+ ctx->dev->vdec_pdata->cap_to_disp(ctx, !!err, lat_buf->src_buf_req);
+ mtk_vdec_debug(ctx, "core decode done err=%d", err);
+ ctx->decoded_frame_cnt++;
+ return 0;
+}
+
+static int vdec_hevc_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
+ struct vdec_fb *fb, bool *res_chg)
+{
+ struct vdec_hevc_slice_inst *inst = h_vdec;
+ struct vdec_vpu_inst *vpu = &inst->vpu;
+ int err, timeout = 0;
+ unsigned int data[2];
+ struct vdec_lat_buf *lat_buf;
+ struct vdec_hevc_slice_share_info *share_info;
+
+ if (vdec_msg_queue_init(&inst->ctx->msg_queue, inst->ctx,
+ vdec_hevc_slice_core_decode,
+ sizeof(*share_info)))
+ return -ENOMEM;
+
+ /* bs NULL means flush decoder */
+ if (!bs) {
+ vdec_msg_queue_wait_lat_buf_full(&inst->ctx->msg_queue);
+ return vpu_dec_reset(vpu);
+ }
+
+ lat_buf = vdec_msg_queue_dqbuf(&inst->ctx->msg_queue.lat_ctx);
+ if (!lat_buf) {
+ mtk_vdec_debug(inst->ctx, "failed to get lat buffer");
+ return -EAGAIN;
+ }
+
+ share_info = lat_buf->private_data;
+ err = vdec_hevc_slice_fill_decode_parameters(inst, share_info);
+ if (err)
+ goto err_free_fb_out;
+
+ err = vdec_hevc_slice_setup_lat_buffer(inst, bs, lat_buf, res_chg);
+ if (err)
+ goto err_free_fb_out;
+
+ err = vpu_dec_start(vpu, data, 2);
+ if (err) {
+ mtk_vdec_debug(inst->ctx, "lat decode err: %d", err);
+ goto err_free_fb_out;
+ }
+
+ if (IS_VDEC_INNER_RACING(inst->ctx->dev->dec_capability)) {
+ memcpy(&share_info->hevc_slice_params, &inst->vsi->hevc_slice_params,
+ sizeof(share_info->hevc_slice_params));
+ vdec_msg_queue_qbuf(&inst->ctx->msg_queue.core_ctx, lat_buf);
+ }
+
+ /* wait decoder done interrupt */
+ timeout = mtk_vcodec_wait_for_done_ctx(inst->ctx, MTK_INST_IRQ_RECEIVED,
+ WAIT_INTR_TIMEOUT_MS, MTK_VDEC_LAT0);
+ if (timeout)
+ mtk_vdec_err(inst->ctx, "lat decode timeout: pic_%d", inst->slice_dec_num);
+ inst->vsi->dec.timeout = !!timeout;
+
+ err = vpu_dec_end(vpu);
+ if (err == SLICE_HEADER_FULL || err == TRANS_BUFFER_FULL) {
+ if (!IS_VDEC_INNER_RACING(inst->ctx->dev->dec_capability))
+ vdec_msg_queue_qbuf(&inst->ctx->msg_queue.lat_ctx, lat_buf);
+ inst->slice_dec_num++;
+ mtk_vdec_err(inst->ctx, "lat dec fail: pic_%d err:%d", inst->slice_dec_num, err);
+ return -EINVAL;
+ }
+
+ share_info->trans.dma_addr_end = inst->ctx->msg_queue.wdma_addr.dma_addr +
+ inst->vsi->dec.wdma_end_addr_offset;
+ vdec_msg_queue_update_ube_wptr(&lat_buf->ctx->msg_queue, share_info->trans.dma_addr_end);
+
+ if (!IS_VDEC_INNER_RACING(inst->ctx->dev->dec_capability)) {
+ memcpy(&share_info->hevc_slice_params, &inst->vsi->hevc_slice_params,
+ sizeof(share_info->hevc_slice_params));
+ vdec_msg_queue_qbuf(&inst->ctx->msg_queue.core_ctx, lat_buf);
+ }
+ mtk_vdec_debug(inst->ctx, "dec num: %d lat crc: 0x%x 0x%x 0x%x", inst->slice_dec_num,
+ inst->vsi->dec.crc[0], inst->vsi->dec.crc[1], inst->vsi->dec.crc[2]);
+
+ inst->slice_dec_num++;
+ return 0;
+err_free_fb_out:
+ vdec_msg_queue_qbuf(&inst->ctx->msg_queue.lat_ctx, lat_buf);
+ mtk_vdec_err(inst->ctx, "slice dec number: %d err: %d", inst->slice_dec_num, err);
+ return err;
+}
+
+static int vdec_hevc_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
+ struct vdec_fb *unused, bool *res_chg)
+{
+ struct vdec_hevc_slice_inst *inst = h_vdec;
+
+ if (!h_vdec || inst->ctx->dev->vdec_pdata->hw_arch == MTK_VDEC_PURE_SINGLE_CORE)
+ return -EINVAL;
+
+ return vdec_hevc_slice_lat_decode(h_vdec, bs, unused, res_chg);
+}
+
+static int vdec_hevc_slice_get_param(void *h_vdec, enum vdec_get_param_type type,
+ void *out)
+{
+ struct vdec_hevc_slice_inst *inst = h_vdec;
+
+ switch (type) {
+ case GET_PARAM_PIC_INFO:
+ vdec_hevc_slice_get_pic_info(inst);
+ break;
+ case GET_PARAM_DPB_SIZE:
+ *(unsigned int *)out = 6;
+ break;
+ case GET_PARAM_CROP_INFO:
+ vdec_hevc_slice_get_crop_info(inst, out);
+ break;
+ default:
+ mtk_vdec_err(inst->ctx, "invalid get parameter type=%d", type);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+const struct vdec_common_if vdec_hevc_slice_multi_if = {
+ .init = vdec_hevc_slice_init,
+ .decode = vdec_hevc_slice_decode,
+ .get_param = vdec_hevc_slice_get_param,
+ .deinit = vdec_hevc_slice_deinit,
+};
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c
new file mode 100644
index 000000000..19407f9bc
--- /dev/null
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c
@@ -0,0 +1,608 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2016 MediaTek Inc.
+ * Author: Jungchang Tsao <jungchang.tsao@mediatek.com>
+ * PC Chen <pc.chen@mediatek.com>
+ */
+
+#include <linux/slab.h>
+#include "../vdec_drv_if.h"
+#include "../mtk_vcodec_dec.h"
+#include "../../common/mtk_vcodec_intr.h"
+#include "../vdec_vpu_if.h"
+#include "../vdec_drv_base.h"
+
+/* Decoding picture buffer size (3 reference frames plus current frame) */
+#define VP8_DPB_SIZE 4
+
+/* HW working buffer size (bytes) */
+#define VP8_WORKING_BUF_SZ (45 * 4096)
+
+/* HW control register address */
+#define VP8_SEGID_DRAM_ADDR 0x3c
+#define VP8_HW_VLD_ADDR 0x93C
+#define VP8_HW_VLD_VALUE 0x940
+#define VP8_BSASET 0x100
+#define VP8_BSDSET 0x104
+#define VP8_RW_CKEN_SET 0x0
+#define VP8_RW_DCM_CON 0x18
+#define VP8_WO_VLD_SRST 0x108
+#define VP8_RW_MISC_SYS_SEL 0x84
+#define VP8_RW_MISC_SPEC_CON 0xC8
+#define VP8_WO_VLD_SRST 0x108
+#define VP8_RW_VP8_CTRL 0xA4
+#define VP8_RW_MISC_DCM_CON 0xEC
+#define VP8_RW_MISC_SRST 0xF4
+#define VP8_RW_MISC_FUNC_CON 0xCC
+
+#define VP8_MAX_FRM_BUF_NUM 5
+#define VP8_MAX_FRM_BUF_NODE_NUM (VP8_MAX_FRM_BUF_NUM * 2)
+
+/* required buffer size (bytes) to store decode information */
+#define VP8_HW_SEGMENT_DATA_SZ 272
+#define VP8_HW_SEGMENT_UINT 4
+
+#define VP8_DEC_TABLE_PROC_LOOP 96
+#define VP8_DEC_TABLE_UNIT 3
+#define VP8_DEC_TABLE_SZ 300
+#define VP8_DEC_TABLE_OFFSET 2
+#define VP8_DEC_TABLE_RW_UNIT 4
+
+/**
+ * struct vdec_vp8_dec_info - decode misc information
+ * @working_buf_dma : working buffer dma address
+ * @prev_y_dma : previous decoded frame buffer Y plane address
+ * @cur_y_fb_dma : current plane Y frame buffer dma address
+ * @cur_c_fb_dma : current plane C frame buffer dma address
+ * @bs_dma : bitstream dma address
+ * @bs_sz : bitstream size
+ * @resolution_changed: resolution change flag 1 - changed, 0 - not change
+ * @show_frame : display this frame or not
+ * @wait_key_frame : wait key frame coming
+ */
+struct vdec_vp8_dec_info {
+ uint64_t working_buf_dma;
+ uint64_t prev_y_dma;
+ uint64_t cur_y_fb_dma;
+ uint64_t cur_c_fb_dma;
+ uint64_t bs_dma;
+ uint32_t bs_sz;
+ uint32_t resolution_changed;
+ uint32_t show_frame;
+ uint32_t wait_key_frame;
+};
+
+/**
+ * struct vdec_vp8_vsi - VPU shared information
+ * @dec : decoding information
+ * @pic : picture information
+ * @dec_table : decoder coefficient table
+ * @segment_buf : segmentation buffer
+ * @load_data : flag to indicate reload decode data
+ */
+struct vdec_vp8_vsi {
+ struct vdec_vp8_dec_info dec;
+ struct vdec_pic_info pic;
+ uint32_t dec_table[VP8_DEC_TABLE_SZ];
+ uint32_t segment_buf[VP8_HW_SEGMENT_DATA_SZ][VP8_HW_SEGMENT_UINT];
+ uint32_t load_data;
+};
+
+/**
+ * struct vdec_vp8_hw_reg_base - HW register base
+ * @misc : base address for misc
+ * @ld : base address for ld
+ * @top : base address for top
+ * @cm : base address for cm
+ * @hwd : base address for hwd
+ * @hwb : base address for hwb
+ */
+struct vdec_vp8_hw_reg_base {
+ void __iomem *misc;
+ void __iomem *ld;
+ void __iomem *top;
+ void __iomem *cm;
+ void __iomem *hwd;
+ void __iomem *hwb;
+};
+
+/**
+ * struct vdec_vp8_vpu_inst - VPU instance for VP8 decode
+ * @wq_hd : Wait queue to wait VPU message ack
+ * @signaled : 1 - Host has received ack message from VPU, 0 - not receive
+ * @failure : VPU execution result status 0 - success, others - fail
+ * @inst_addr : VPU decoder instance address
+ */
+struct vdec_vp8_vpu_inst {
+ wait_queue_head_t wq_hd;
+ int signaled;
+ int failure;
+ uint32_t inst_addr;
+};
+
+/* frame buffer (fb) list
+ * [available_fb_node_list] - decode fb are initialized to 0 and populated in
+ * [fb_use_list] - fb is set after decode and is moved to this list
+ * [fb_free_list] - fb is not needed for reference will be moved from
+ * [fb_use_list] to [fb_free_list] and
+ * once user remove fb from [fb_free_list],
+ * it is circulated back to [available_fb_node_list]
+ * [fb_disp_list] - fb is set after decode and is moved to this list
+ * once user remove fb from [fb_disp_list] it is
+ * circulated back to [available_fb_node_list]
+ */
+
+/**
+ * struct vdec_vp8_inst - VP8 decoder instance
+ * @cur_fb : current frame buffer
+ * @dec_fb : decode frame buffer node
+ * @available_fb_node_list : list to store available frame buffer node
+ * @fb_use_list : list to store frame buffer in use
+ * @fb_free_list : list to store free frame buffer
+ * @fb_disp_list : list to store display ready frame buffer
+ * @working_buf : HW decoder working buffer
+ * @reg_base : HW register base address
+ * @frm_cnt : decode frame count
+ * @ctx : V4L2 context
+ * @vpu : VPU instance for decoder
+ * @vsi : VPU share information
+ */
+struct vdec_vp8_inst {
+ struct vdec_fb *cur_fb;
+ struct vdec_fb_node dec_fb[VP8_MAX_FRM_BUF_NODE_NUM];
+ struct list_head available_fb_node_list;
+ struct list_head fb_use_list;
+ struct list_head fb_free_list;
+ struct list_head fb_disp_list;
+ struct mtk_vcodec_mem working_buf;
+ struct vdec_vp8_hw_reg_base reg_base;
+ unsigned int frm_cnt;
+ struct mtk_vcodec_dec_ctx *ctx;
+ struct vdec_vpu_inst vpu;
+ struct vdec_vp8_vsi *vsi;
+};
+
+static void get_hw_reg_base(struct vdec_vp8_inst *inst)
+{
+ void __iomem **reg_base = inst->ctx->dev->reg_base;
+
+ inst->reg_base.top = mtk_vcodec_get_reg_addr(reg_base, VDEC_TOP);
+ inst->reg_base.cm = mtk_vcodec_get_reg_addr(reg_base, VDEC_CM);
+ inst->reg_base.hwd = mtk_vcodec_get_reg_addr(reg_base, VDEC_HWD);
+ inst->reg_base.misc = mtk_vcodec_get_reg_addr(reg_base, VDEC_MISC);
+ inst->reg_base.ld = mtk_vcodec_get_reg_addr(reg_base, VDEC_LD);
+ inst->reg_base.hwb = mtk_vcodec_get_reg_addr(reg_base, VDEC_HWB);
+}
+
+static void write_hw_segmentation_data(struct vdec_vp8_inst *inst)
+{
+ int i, j;
+ u32 seg_id_addr;
+ u32 val;
+ void __iomem *cm = inst->reg_base.cm;
+ struct vdec_vp8_vsi *vsi = inst->vsi;
+
+ seg_id_addr = readl(inst->reg_base.top + VP8_SEGID_DRAM_ADDR) >> 4;
+
+ for (i = 0; i < ARRAY_SIZE(vsi->segment_buf); i++) {
+ for (j = ARRAY_SIZE(vsi->segment_buf[i]) - 1; j >= 0; j--) {
+ val = (1 << 16) + ((seg_id_addr + i) << 2) + j;
+ writel(val, cm + VP8_HW_VLD_ADDR);
+
+ val = vsi->segment_buf[i][j];
+ writel(val, cm + VP8_HW_VLD_VALUE);
+ }
+ }
+}
+
+static void read_hw_segmentation_data(struct vdec_vp8_inst *inst)
+{
+ int i, j;
+ u32 seg_id_addr;
+ u32 val;
+ void __iomem *cm = inst->reg_base.cm;
+ struct vdec_vp8_vsi *vsi = inst->vsi;
+
+ seg_id_addr = readl(inst->reg_base.top + VP8_SEGID_DRAM_ADDR) >> 4;
+
+ for (i = 0; i < ARRAY_SIZE(vsi->segment_buf); i++) {
+ for (j = ARRAY_SIZE(vsi->segment_buf[i]) - 1; j >= 0; j--) {
+ val = ((seg_id_addr + i) << 2) + j;
+ writel(val, cm + VP8_HW_VLD_ADDR);
+
+ val = readl(cm + VP8_HW_VLD_VALUE);
+ vsi->segment_buf[i][j] = val;
+ }
+ }
+}
+
+/* reset HW and enable HW read/write data function */
+static void enable_hw_rw_function(struct vdec_vp8_inst *inst)
+{
+ u32 val = 0;
+ void __iomem *misc = inst->reg_base.misc;
+ void __iomem *ld = inst->reg_base.ld;
+ void __iomem *hwb = inst->reg_base.hwb;
+ void __iomem *hwd = inst->reg_base.hwd;
+
+ mtk_vcodec_write_vdecsys(inst->ctx, VP8_RW_CKEN_SET, 0x1);
+ writel(0x101, ld + VP8_WO_VLD_SRST);
+ writel(0x101, hwb + VP8_WO_VLD_SRST);
+
+ mtk_vcodec_write_vdecsys(inst->ctx, 0, 0x1);
+ val = readl(misc + VP8_RW_MISC_SRST);
+ writel((val & 0xFFFFFFFE), misc + VP8_RW_MISC_SRST);
+
+ writel(0x1, misc + VP8_RW_MISC_SYS_SEL);
+ writel(0x17F, misc + VP8_RW_MISC_SPEC_CON);
+ writel(0x71201100, misc + VP8_RW_MISC_FUNC_CON);
+ writel(0x0, ld + VP8_WO_VLD_SRST);
+ writel(0x0, hwb + VP8_WO_VLD_SRST);
+ mtk_vcodec_write_vdecsys(inst->ctx, VP8_RW_DCM_CON, 0x1);
+ writel(0x1, misc + VP8_RW_MISC_DCM_CON);
+ writel(0x1, hwd + VP8_RW_VP8_CTRL);
+}
+
+static void store_dec_table(struct vdec_vp8_inst *inst)
+{
+ int i, j;
+ u32 addr = 0, val = 0;
+ void __iomem *hwd = inst->reg_base.hwd;
+ u32 *p = &inst->vsi->dec_table[VP8_DEC_TABLE_OFFSET];
+
+ for (i = 0; i < VP8_DEC_TABLE_PROC_LOOP; i++) {
+ writel(addr, hwd + VP8_BSASET);
+ for (j = 0; j < VP8_DEC_TABLE_UNIT ; j++) {
+ val = *p++;
+ writel(val, hwd + VP8_BSDSET);
+ }
+ addr += VP8_DEC_TABLE_RW_UNIT;
+ }
+}
+
+static void load_dec_table(struct vdec_vp8_inst *inst)
+{
+ int i;
+ u32 addr = 0;
+ u32 *p = &inst->vsi->dec_table[VP8_DEC_TABLE_OFFSET];
+ void __iomem *hwd = inst->reg_base.hwd;
+
+ for (i = 0; i < VP8_DEC_TABLE_PROC_LOOP; i++) {
+ writel(addr, hwd + VP8_BSASET);
+ /* read total 11 bytes */
+ *p++ = readl(hwd + VP8_BSDSET);
+ *p++ = readl(hwd + VP8_BSDSET);
+ *p++ = readl(hwd + VP8_BSDSET) & 0xFFFFFF;
+ addr += VP8_DEC_TABLE_RW_UNIT;
+ }
+}
+
+static void get_pic_info(struct vdec_vp8_inst *inst, struct vdec_pic_info *pic)
+{
+ *pic = inst->vsi->pic;
+
+ mtk_vdec_debug(inst->ctx, "pic(%d, %d), buf(%d, %d)",
+ pic->pic_w, pic->pic_h, pic->buf_w, pic->buf_h);
+ mtk_vdec_debug(inst->ctx, "fb size: Y(%d), C(%d)",
+ pic->fb_sz[0], pic->fb_sz[1]);
+}
+
+static void vp8_dec_finish(struct vdec_vp8_inst *inst)
+{
+ struct vdec_fb_node *node;
+ uint64_t prev_y_dma = inst->vsi->dec.prev_y_dma;
+
+ mtk_vdec_debug(inst->ctx, "prev fb base dma=%llx", prev_y_dma);
+
+ /* put last decode ok frame to fb_free_list */
+ if (prev_y_dma != 0) {
+ list_for_each_entry(node, &inst->fb_use_list, list) {
+ struct vdec_fb *fb = (struct vdec_fb *)node->fb;
+
+ if (prev_y_dma == (uint64_t)fb->base_y.dma_addr) {
+ list_move_tail(&node->list,
+ &inst->fb_free_list);
+ break;
+ }
+ }
+ }
+
+ /* available_fb_node_list -> fb_use_list */
+ node = list_first_entry(&inst->available_fb_node_list,
+ struct vdec_fb_node, list);
+ node->fb = inst->cur_fb;
+ list_move_tail(&node->list, &inst->fb_use_list);
+
+ /* available_fb_node_list -> fb_disp_list */
+ if (inst->vsi->dec.show_frame) {
+ node = list_first_entry(&inst->available_fb_node_list,
+ struct vdec_fb_node, list);
+ node->fb = inst->cur_fb;
+ list_move_tail(&node->list, &inst->fb_disp_list);
+ }
+}
+
+static void move_fb_list_use_to_free(struct vdec_vp8_inst *inst)
+{
+ struct vdec_fb_node *node, *tmp;
+
+ list_for_each_entry_safe(node, tmp, &inst->fb_use_list, list)
+ list_move_tail(&node->list, &inst->fb_free_list);
+}
+
+static void init_list(struct vdec_vp8_inst *inst)
+{
+ int i;
+
+ INIT_LIST_HEAD(&inst->available_fb_node_list);
+ INIT_LIST_HEAD(&inst->fb_use_list);
+ INIT_LIST_HEAD(&inst->fb_free_list);
+ INIT_LIST_HEAD(&inst->fb_disp_list);
+
+ for (i = 0; i < ARRAY_SIZE(inst->dec_fb); i++) {
+ INIT_LIST_HEAD(&inst->dec_fb[i].list);
+ inst->dec_fb[i].fb = NULL;
+ list_add_tail(&inst->dec_fb[i].list,
+ &inst->available_fb_node_list);
+ }
+}
+
+static void add_fb_to_free_list(struct vdec_vp8_inst *inst, void *fb)
+{
+ struct vdec_fb_node *node;
+
+ if (fb) {
+ node = list_first_entry(&inst->available_fb_node_list,
+ struct vdec_fb_node, list);
+ node->fb = fb;
+ list_move_tail(&node->list, &inst->fb_free_list);
+ }
+}
+
+static int alloc_working_buf(struct vdec_vp8_inst *inst)
+{
+ int err;
+ struct mtk_vcodec_mem *mem = &inst->working_buf;
+
+ mem->size = VP8_WORKING_BUF_SZ;
+ err = mtk_vcodec_mem_alloc(inst->ctx, mem);
+ if (err) {
+ mtk_vdec_err(inst->ctx, "Cannot allocate working buffer");
+ return err;
+ }
+
+ inst->vsi->dec.working_buf_dma = (uint64_t)mem->dma_addr;
+ return 0;
+}
+
+static void free_working_buf(struct vdec_vp8_inst *inst)
+{
+ struct mtk_vcodec_mem *mem = &inst->working_buf;
+
+ if (mem->va)
+ mtk_vcodec_mem_free(inst->ctx, mem);
+
+ inst->vsi->dec.working_buf_dma = 0;
+}
+
+static int vdec_vp8_init(struct mtk_vcodec_dec_ctx *ctx)
+{
+ struct vdec_vp8_inst *inst;
+ int err;
+
+ inst = kzalloc(sizeof(*inst), GFP_KERNEL);
+ if (!inst)
+ return -ENOMEM;
+
+ inst->ctx = ctx;
+
+ inst->vpu.id = IPI_VDEC_VP8;
+ inst->vpu.ctx = ctx;
+
+ err = vpu_dec_init(&inst->vpu);
+ if (err) {
+ mtk_vdec_err(ctx, "vdec_vp8 init err=%d", err);
+ goto error_free_inst;
+ }
+
+ inst->vsi = (struct vdec_vp8_vsi *)inst->vpu.vsi;
+ init_list(inst);
+ err = alloc_working_buf(inst);
+ if (err)
+ goto error_deinit;
+
+ get_hw_reg_base(inst);
+ mtk_vdec_debug(ctx, "VP8 Instance >> %p", inst);
+
+ ctx->drv_handle = inst;
+ return 0;
+
+error_deinit:
+ vpu_dec_deinit(&inst->vpu);
+error_free_inst:
+ kfree(inst);
+ return err;
+}
+
+static int vdec_vp8_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
+ struct vdec_fb *fb, bool *res_chg)
+{
+ struct vdec_vp8_inst *inst = (struct vdec_vp8_inst *)h_vdec;
+ struct vdec_vp8_dec_info *dec = &inst->vsi->dec;
+ struct vdec_vpu_inst *vpu = &inst->vpu;
+ unsigned char *bs_va;
+ unsigned int data;
+ int err = 0;
+ uint64_t y_fb_dma;
+ uint64_t c_fb_dma;
+
+ /* bs NULL means flush decoder */
+ if (bs == NULL) {
+ move_fb_list_use_to_free(inst);
+ return vpu_dec_reset(vpu);
+ }
+
+ y_fb_dma = fb ? (u64)fb->base_y.dma_addr : 0;
+ c_fb_dma = fb ? (u64)fb->base_c.dma_addr : 0;
+
+ mtk_vdec_debug(inst->ctx, "+ [%d] FB y_dma=%llx c_dma=%llx fb=%p",
+ inst->frm_cnt, y_fb_dma, c_fb_dma, fb);
+
+ inst->cur_fb = fb;
+ dec->bs_dma = (unsigned long)bs->dma_addr;
+ dec->bs_sz = bs->size;
+ dec->cur_y_fb_dma = y_fb_dma;
+ dec->cur_c_fb_dma = c_fb_dma;
+
+ mtk_vdec_debug(inst->ctx, "\n + FRAME[%d] +\n", inst->frm_cnt);
+
+ write_hw_segmentation_data(inst);
+ enable_hw_rw_function(inst);
+ store_dec_table(inst);
+
+ bs_va = (unsigned char *)bs->va;
+
+ /* retrieve width/hight and scale info from header */
+ data = (*(bs_va + 9) << 24) | (*(bs_va + 8) << 16) |
+ (*(bs_va + 7) << 8) | *(bs_va + 6);
+ err = vpu_dec_start(vpu, &data, 1);
+ if (err) {
+ add_fb_to_free_list(inst, fb);
+ if (dec->wait_key_frame) {
+ mtk_vdec_debug(inst->ctx, "wait key frame !");
+ return 0;
+ }
+
+ goto error;
+ }
+
+ if (dec->resolution_changed) {
+ mtk_vdec_debug(inst->ctx, "- resolution_changed -");
+ *res_chg = true;
+ add_fb_to_free_list(inst, fb);
+ return 0;
+ }
+
+ /* wait decoder done interrupt */
+ mtk_vcodec_wait_for_done_ctx(inst->ctx, MTK_INST_IRQ_RECEIVED,
+ WAIT_INTR_TIMEOUT_MS, 0);
+
+ if (inst->vsi->load_data)
+ load_dec_table(inst);
+
+ vp8_dec_finish(inst);
+ read_hw_segmentation_data(inst);
+
+ err = vpu_dec_end(vpu);
+ if (err)
+ goto error;
+
+ mtk_vdec_debug(inst->ctx, "\n - FRAME[%d] - show=%d\n", inst->frm_cnt, dec->show_frame);
+ inst->frm_cnt++;
+ *res_chg = false;
+ return 0;
+
+error:
+ mtk_vdec_err(inst->ctx, "\n - FRAME[%d] - err=%d\n", inst->frm_cnt, err);
+ return err;
+}
+
+static void get_disp_fb(struct vdec_vp8_inst *inst, struct vdec_fb **out_fb)
+{
+ struct vdec_fb_node *node;
+ struct vdec_fb *fb;
+
+ node = list_first_entry_or_null(&inst->fb_disp_list,
+ struct vdec_fb_node, list);
+ if (node) {
+ list_move_tail(&node->list, &inst->available_fb_node_list);
+ fb = (struct vdec_fb *)node->fb;
+ fb->status |= FB_ST_DISPLAY;
+ mtk_vdec_debug(inst->ctx, "[FB] get disp fb %p st=%d", node->fb, fb->status);
+ } else {
+ fb = NULL;
+ mtk_vdec_debug(inst->ctx, "[FB] there is no disp fb");
+ }
+
+ *out_fb = fb;
+}
+
+static void get_free_fb(struct vdec_vp8_inst *inst, struct vdec_fb **out_fb)
+{
+ struct vdec_fb_node *node;
+ struct vdec_fb *fb;
+
+ node = list_first_entry_or_null(&inst->fb_free_list,
+ struct vdec_fb_node, list);
+ if (node) {
+ list_move_tail(&node->list, &inst->available_fb_node_list);
+ fb = (struct vdec_fb *)node->fb;
+ fb->status |= FB_ST_FREE;
+ mtk_vdec_debug(inst->ctx, "[FB] get free fb %p st=%d", node->fb, fb->status);
+ } else {
+ fb = NULL;
+ mtk_vdec_debug(inst->ctx, "[FB] there is no free fb");
+ }
+
+ *out_fb = fb;
+}
+
+static void get_crop_info(struct vdec_vp8_inst *inst, struct v4l2_rect *cr)
+{
+ cr->left = 0;
+ cr->top = 0;
+ cr->width = inst->vsi->pic.pic_w;
+ cr->height = inst->vsi->pic.pic_h;
+ mtk_vdec_debug(inst->ctx, "get crop info l=%d, t=%d, w=%d, h=%d",
+ cr->left, cr->top, cr->width, cr->height);
+}
+
+static int vdec_vp8_get_param(void *h_vdec, enum vdec_get_param_type type,
+ void *out)
+{
+ struct vdec_vp8_inst *inst = (struct vdec_vp8_inst *)h_vdec;
+
+ switch (type) {
+ case GET_PARAM_DISP_FRAME_BUFFER:
+ get_disp_fb(inst, out);
+ break;
+
+ case GET_PARAM_FREE_FRAME_BUFFER:
+ get_free_fb(inst, out);
+ break;
+
+ case GET_PARAM_PIC_INFO:
+ get_pic_info(inst, out);
+ break;
+
+ case GET_PARAM_CROP_INFO:
+ get_crop_info(inst, out);
+ break;
+
+ case GET_PARAM_DPB_SIZE:
+ *((unsigned int *)out) = VP8_DPB_SIZE;
+ break;
+
+ default:
+ mtk_vdec_err(inst->ctx, "invalid get parameter type=%d", type);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void vdec_vp8_deinit(void *h_vdec)
+{
+ struct vdec_vp8_inst *inst = (struct vdec_vp8_inst *)h_vdec;
+
+ vpu_dec_deinit(&inst->vpu);
+ free_working_buf(inst);
+ kfree(inst);
+}
+
+const struct vdec_common_if vdec_vp8_if = {
+ .init = vdec_vp8_init,
+ .decode = vdec_vp8_decode,
+ .get_param = vdec_vp8_get_param,
+ .deinit = vdec_vp8_deinit,
+};
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_req_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_req_if.c
new file mode 100644
index 000000000..f64b21c07
--- /dev/null
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_req_if.c
@@ -0,0 +1,433 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 MediaTek Inc.
+ * Author: Yunfei Dong <yunfei.dong@mediatek.com>
+ */
+
+#include <linux/slab.h>
+#include <media/v4l2-mem2mem.h>
+#include <media/videobuf2-dma-contig.h>
+#include <uapi/linux/v4l2-controls.h>
+
+#include "../mtk_vcodec_dec.h"
+#include "../../common/mtk_vcodec_intr.h"
+#include "../vdec_drv_base.h"
+#include "../vdec_drv_if.h"
+#include "../vdec_vpu_if.h"
+
+/* Decoding picture buffer size (3 reference frames plus current frame) */
+#define VP8_DPB_SIZE 4
+
+/* HW working buffer size (bytes) */
+#define VP8_SEG_ID_SZ SZ_256K
+#define VP8_PP_WRAPY_SZ SZ_64K
+#define VP8_PP_WRAPC_SZ SZ_64K
+#define VP8_VLD_PRED_SZ SZ_64K
+
+/**
+ * struct vdec_vp8_slice_info - decode misc information
+ *
+ * @vld_wrapper_dma: vld wrapper dma address
+ * @seg_id_buf_dma: seg id dma address
+ * @wrap_y_dma: wrap y dma address
+ * @wrap_c_dma: wrap y dma address
+ * @cur_y_fb_dma: current plane Y frame buffer dma address
+ * @cur_c_fb_dma: current plane C frame buffer dma address
+ * @bs_dma: bitstream dma address
+ * @bs_sz: bitstream size
+ * @resolution_changed:resolution change flag 1 - changed, 0 - not change
+ * @frame_header_type: current frame header type
+ * @wait_key_frame: wait key frame coming
+ * @crc: used to check whether hardware's status is right
+ * @reserved: reserved, currently unused
+ */
+struct vdec_vp8_slice_info {
+ u64 vld_wrapper_dma;
+ u64 seg_id_buf_dma;
+ u64 wrap_y_dma;
+ u64 wrap_c_dma;
+ u64 cur_y_fb_dma;
+ u64 cur_c_fb_dma;
+ u64 bs_dma;
+ u32 bs_sz;
+ u32 resolution_changed;
+ u32 frame_header_type;
+ u32 crc[8];
+ u32 reserved;
+};
+
+/**
+ * struct vdec_vp8_slice_dpb_info - vp8 reference information
+ *
+ * @y_dma_addr: Y bitstream physical address
+ * @c_dma_addr: CbCr bitstream physical address
+ * @reference_flag: reference picture flag
+ * @reserved: 64bit align
+ */
+struct vdec_vp8_slice_dpb_info {
+ dma_addr_t y_dma_addr;
+ dma_addr_t c_dma_addr;
+ int reference_flag;
+ int reserved;
+};
+
+/**
+ * struct vdec_vp8_slice_vsi - VPU shared information
+ *
+ * @dec: decoding information
+ * @pic: picture information
+ * @vp8_dpb_info: reference buffer information
+ */
+struct vdec_vp8_slice_vsi {
+ struct vdec_vp8_slice_info dec;
+ struct vdec_pic_info pic;
+ struct vdec_vp8_slice_dpb_info vp8_dpb_info[3];
+};
+
+/**
+ * struct vdec_vp8_slice_inst - VP8 decoder instance
+ *
+ * @seg_id_buf: seg buffer
+ * @wrap_y_buf: wrapper y buffer
+ * @wrap_c_buf: wrapper c buffer
+ * @vld_wrapper_buf: vld wrapper buffer
+ * @ctx: V4L2 context
+ * @vpu: VPU instance for decoder
+ * @vsi: VPU share information
+ */
+struct vdec_vp8_slice_inst {
+ struct mtk_vcodec_mem seg_id_buf;
+ struct mtk_vcodec_mem wrap_y_buf;
+ struct mtk_vcodec_mem wrap_c_buf;
+ struct mtk_vcodec_mem vld_wrapper_buf;
+ struct mtk_vcodec_dec_ctx *ctx;
+ struct vdec_vpu_inst vpu;
+ struct vdec_vp8_slice_vsi *vsi;
+};
+
+static void *vdec_vp8_slice_get_ctrl_ptr(struct mtk_vcodec_dec_ctx *ctx, int id)
+{
+ struct v4l2_ctrl *ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, id);
+
+ if (!ctrl)
+ return ERR_PTR(-EINVAL);
+
+ return ctrl->p_cur.p;
+}
+
+static void vdec_vp8_slice_get_pic_info(struct vdec_vp8_slice_inst *inst)
+{
+ struct mtk_vcodec_dec_ctx *ctx = inst->ctx;
+ unsigned int data[3];
+
+ data[0] = ctx->picinfo.pic_w;
+ data[1] = ctx->picinfo.pic_h;
+ data[2] = ctx->capture_fourcc;
+ vpu_dec_get_param(&inst->vpu, data, 3, GET_PARAM_PIC_INFO);
+
+ ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, 64);
+ ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, 64);
+ ctx->picinfo.fb_sz[0] = inst->vpu.fb_sz[0];
+ ctx->picinfo.fb_sz[1] = inst->vpu.fb_sz[1];
+
+ inst->vsi->pic.pic_w = ctx->picinfo.pic_w;
+ inst->vsi->pic.pic_h = ctx->picinfo.pic_h;
+ inst->vsi->pic.buf_w = ctx->picinfo.buf_w;
+ inst->vsi->pic.buf_h = ctx->picinfo.buf_h;
+ inst->vsi->pic.fb_sz[0] = ctx->picinfo.fb_sz[0];
+ inst->vsi->pic.fb_sz[1] = ctx->picinfo.fb_sz[1];
+ mtk_vdec_debug(inst->ctx, "pic(%d, %d), buf(%d, %d)",
+ ctx->picinfo.pic_w, ctx->picinfo.pic_h,
+ ctx->picinfo.buf_w, ctx->picinfo.buf_h);
+ mtk_vdec_debug(inst->ctx, "fb size: Y(%d), C(%d)",
+ ctx->picinfo.fb_sz[0], ctx->picinfo.fb_sz[1]);
+}
+
+static int vdec_vp8_slice_alloc_working_buf(struct vdec_vp8_slice_inst *inst)
+{
+ int err;
+ struct mtk_vcodec_mem *mem;
+
+ mem = &inst->seg_id_buf;
+ mem->size = VP8_SEG_ID_SZ;
+ err = mtk_vcodec_mem_alloc(inst->ctx, mem);
+ if (err) {
+ mtk_vdec_err(inst->ctx, "Cannot allocate working buffer");
+ return err;
+ }
+ inst->vsi->dec.seg_id_buf_dma = (u64)mem->dma_addr;
+
+ mem = &inst->wrap_y_buf;
+ mem->size = VP8_PP_WRAPY_SZ;
+ err = mtk_vcodec_mem_alloc(inst->ctx, mem);
+ if (err) {
+ mtk_vdec_err(inst->ctx, "cannot allocate WRAP Y buffer");
+ return err;
+ }
+ inst->vsi->dec.wrap_y_dma = (u64)mem->dma_addr;
+
+ mem = &inst->wrap_c_buf;
+ mem->size = VP8_PP_WRAPC_SZ;
+ err = mtk_vcodec_mem_alloc(inst->ctx, mem);
+ if (err) {
+ mtk_vdec_err(inst->ctx, "cannot allocate WRAP C buffer");
+ return err;
+ }
+ inst->vsi->dec.wrap_c_dma = (u64)mem->dma_addr;
+
+ mem = &inst->vld_wrapper_buf;
+ mem->size = VP8_VLD_PRED_SZ;
+ err = mtk_vcodec_mem_alloc(inst->ctx, mem);
+ if (err) {
+ mtk_vdec_err(inst->ctx, "cannot allocate vld wrapper buffer");
+ return err;
+ }
+ inst->vsi->dec.vld_wrapper_dma = (u64)mem->dma_addr;
+
+ return 0;
+}
+
+static void vdec_vp8_slice_free_working_buf(struct vdec_vp8_slice_inst *inst)
+{
+ struct mtk_vcodec_mem *mem;
+
+ mem = &inst->seg_id_buf;
+ if (mem->va)
+ mtk_vcodec_mem_free(inst->ctx, mem);
+ inst->vsi->dec.seg_id_buf_dma = 0;
+
+ mem = &inst->wrap_y_buf;
+ if (mem->va)
+ mtk_vcodec_mem_free(inst->ctx, mem);
+ inst->vsi->dec.wrap_y_dma = 0;
+
+ mem = &inst->wrap_c_buf;
+ if (mem->va)
+ mtk_vcodec_mem_free(inst->ctx, mem);
+ inst->vsi->dec.wrap_c_dma = 0;
+
+ mem = &inst->vld_wrapper_buf;
+ if (mem->va)
+ mtk_vcodec_mem_free(inst->ctx, mem);
+ inst->vsi->dec.vld_wrapper_dma = 0;
+}
+
+static u64 vdec_vp8_slice_get_ref_by_ts(const struct v4l2_ctrl_vp8_frame *frame_header,
+ int index)
+{
+ switch (index) {
+ case 0:
+ return frame_header->last_frame_ts;
+ case 1:
+ return frame_header->golden_frame_ts;
+ case 2:
+ return frame_header->alt_frame_ts;
+ default:
+ break;
+ }
+
+ return -1;
+}
+
+static int vdec_vp8_slice_get_decode_parameters(struct vdec_vp8_slice_inst *inst)
+{
+ const struct v4l2_ctrl_vp8_frame *frame_header;
+ struct mtk_vcodec_dec_ctx *ctx = inst->ctx;
+ struct vb2_queue *vq;
+ struct vb2_buffer *vb;
+ u64 referenct_ts;
+ int index;
+
+ frame_header = vdec_vp8_slice_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_VP8_FRAME);
+ if (IS_ERR(frame_header))
+ return PTR_ERR(frame_header);
+
+ vq = v4l2_m2m_get_vq(ctx->m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
+ for (index = 0; index < 3; index++) {
+ referenct_ts = vdec_vp8_slice_get_ref_by_ts(frame_header, index);
+ vb = vb2_find_buffer(vq, referenct_ts);
+ if (!vb) {
+ if (!V4L2_VP8_FRAME_IS_KEY_FRAME(frame_header))
+ mtk_vdec_err(inst->ctx, "reference invalid: index(%d) ts(%lld)",
+ index, referenct_ts);
+ inst->vsi->vp8_dpb_info[index].reference_flag = 0;
+ continue;
+ }
+ inst->vsi->vp8_dpb_info[index].reference_flag = 1;
+
+ inst->vsi->vp8_dpb_info[index].y_dma_addr =
+ vb2_dma_contig_plane_dma_addr(vb, 0);
+ if (ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 2)
+ inst->vsi->vp8_dpb_info[index].c_dma_addr =
+ vb2_dma_contig_plane_dma_addr(vb, 1);
+ else
+ inst->vsi->vp8_dpb_info[index].c_dma_addr =
+ inst->vsi->vp8_dpb_info[index].y_dma_addr +
+ ctx->picinfo.fb_sz[0];
+ }
+
+ inst->vsi->dec.frame_header_type = frame_header->flags >> 1;
+
+ return 0;
+}
+
+static int vdec_vp8_slice_init(struct mtk_vcodec_dec_ctx *ctx)
+{
+ struct vdec_vp8_slice_inst *inst;
+ int err;
+
+ inst = kzalloc(sizeof(*inst), GFP_KERNEL);
+ if (!inst)
+ return -ENOMEM;
+
+ inst->ctx = ctx;
+
+ inst->vpu.id = SCP_IPI_VDEC_LAT;
+ inst->vpu.core_id = SCP_IPI_VDEC_CORE;
+ inst->vpu.ctx = ctx;
+ inst->vpu.codec_type = ctx->current_codec;
+ inst->vpu.capture_type = ctx->capture_fourcc;
+
+ err = vpu_dec_init(&inst->vpu);
+ if (err) {
+ mtk_vdec_err(ctx, "vdec_vp8 init err=%d", err);
+ goto error_free_inst;
+ }
+
+ inst->vsi = inst->vpu.vsi;
+ err = vdec_vp8_slice_alloc_working_buf(inst);
+ if (err)
+ goto error_deinit;
+
+ mtk_vdec_debug(ctx, "vp8 struct size = %d vsi: %d\n",
+ (int)sizeof(struct v4l2_ctrl_vp8_frame),
+ (int)sizeof(struct vdec_vp8_slice_vsi));
+ mtk_vdec_debug(ctx, "vp8:%p, codec_type = 0x%x vsi: 0x%p",
+ inst, inst->vpu.codec_type, inst->vpu.vsi);
+
+ ctx->drv_handle = inst;
+ return 0;
+
+error_deinit:
+ vpu_dec_deinit(&inst->vpu);
+error_free_inst:
+ kfree(inst);
+ return err;
+}
+
+static int vdec_vp8_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
+ struct vdec_fb *fb, bool *res_chg)
+{
+ struct vdec_vp8_slice_inst *inst = h_vdec;
+ struct vdec_vpu_inst *vpu = &inst->vpu;
+ struct mtk_video_dec_buf *src_buf_info, *dst_buf_info;
+ unsigned int data;
+ u64 y_fb_dma, c_fb_dma;
+ int err, timeout;
+
+ /* Resolution changes are never initiated by us */
+ *res_chg = false;
+
+ /* bs NULL means flush decoder */
+ if (!bs)
+ return vpu_dec_reset(vpu);
+
+ src_buf_info = container_of(bs, struct mtk_video_dec_buf, bs_buffer);
+
+ fb = inst->ctx->dev->vdec_pdata->get_cap_buffer(inst->ctx);
+ dst_buf_info = container_of(fb, struct mtk_video_dec_buf, frame_buffer);
+
+ y_fb_dma = fb ? (u64)fb->base_y.dma_addr : 0;
+ if (inst->ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 1)
+ c_fb_dma = y_fb_dma +
+ inst->ctx->picinfo.buf_w * inst->ctx->picinfo.buf_h;
+ else
+ c_fb_dma = fb ? (u64)fb->base_c.dma_addr : 0;
+
+ inst->vsi->dec.bs_dma = (u64)bs->dma_addr;
+ inst->vsi->dec.bs_sz = bs->size;
+ inst->vsi->dec.cur_y_fb_dma = y_fb_dma;
+ inst->vsi->dec.cur_c_fb_dma = c_fb_dma;
+
+ mtk_vdec_debug(inst->ctx, "frame[%d] bs(%zu 0x%llx) y/c(0x%llx 0x%llx)",
+ inst->ctx->decoded_frame_cnt,
+ bs->size, (u64)bs->dma_addr,
+ y_fb_dma, c_fb_dma);
+
+ v4l2_m2m_buf_copy_metadata(&src_buf_info->m2m_buf.vb,
+ &dst_buf_info->m2m_buf.vb, true);
+
+ err = vdec_vp8_slice_get_decode_parameters(inst);
+ if (err)
+ goto error;
+
+ err = vpu_dec_start(vpu, &data, 1);
+ if (err) {
+ mtk_vdec_debug(inst->ctx, "vp8 dec start err!");
+ goto error;
+ }
+
+ if (inst->vsi->dec.resolution_changed) {
+ mtk_vdec_debug(inst->ctx, "- resolution_changed -");
+ *res_chg = true;
+ return 0;
+ }
+
+ /* wait decode done interrupt */
+ timeout = mtk_vcodec_wait_for_done_ctx(inst->ctx, MTK_INST_IRQ_RECEIVED,
+ 50, MTK_VDEC_CORE);
+
+ err = vpu_dec_end(vpu);
+ if (err || timeout)
+ mtk_vdec_debug(inst->ctx, "vp8 dec error timeout:%d err: %d pic_%d",
+ timeout, err, inst->ctx->decoded_frame_cnt);
+
+ mtk_vdec_debug(inst->ctx, "pic[%d] crc: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x",
+ inst->ctx->decoded_frame_cnt,
+ inst->vsi->dec.crc[0], inst->vsi->dec.crc[1],
+ inst->vsi->dec.crc[2], inst->vsi->dec.crc[3],
+ inst->vsi->dec.crc[4], inst->vsi->dec.crc[5],
+ inst->vsi->dec.crc[6], inst->vsi->dec.crc[7]);
+
+ inst->ctx->decoded_frame_cnt++;
+error:
+ return err;
+}
+
+static int vdec_vp8_slice_get_param(void *h_vdec, enum vdec_get_param_type type, void *out)
+{
+ struct vdec_vp8_slice_inst *inst = h_vdec;
+
+ switch (type) {
+ case GET_PARAM_PIC_INFO:
+ vdec_vp8_slice_get_pic_info(inst);
+ break;
+ case GET_PARAM_CROP_INFO:
+ mtk_vdec_debug(inst->ctx, "No need to get vp8 crop information.");
+ break;
+ case GET_PARAM_DPB_SIZE:
+ *((unsigned int *)out) = VP8_DPB_SIZE;
+ break;
+ default:
+ mtk_vdec_err(inst->ctx, "invalid get parameter type=%d", type);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void vdec_vp8_slice_deinit(void *h_vdec)
+{
+ struct vdec_vp8_slice_inst *inst = h_vdec;
+
+ vpu_dec_deinit(&inst->vpu);
+ vdec_vp8_slice_free_working_buf(inst);
+ kfree(inst);
+}
+
+const struct vdec_common_if vdec_vp8_slice_if = {
+ .init = vdec_vp8_slice_init,
+ .decode = vdec_vp8_slice_decode,
+ .get_param = vdec_vp8_slice_get_param,
+ .deinit = vdec_vp8_slice_deinit,
+};
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c
new file mode 100644
index 000000000..55355fa70
--- /dev/null
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c
@@ -0,0 +1,1017 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2016 MediaTek Inc.
+ * Author: Daniel Hsiao <daniel.hsiao@mediatek.com>
+ * Kai-Sean Yang <kai-sean.yang@mediatek.com>
+ * Tiffany Lin <tiffany.lin@mediatek.com>
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+#include <linux/delay.h>
+#include <linux/time.h>
+
+#include "../../common/mtk_vcodec_intr.h"
+#include "../vdec_drv_base.h"
+#include "../vdec_vpu_if.h"
+
+#define VP9_SUPER_FRAME_BS_SZ 64
+#define MAX_VP9_DPB_SIZE 9
+
+#define REFS_PER_FRAME 3
+#define MAX_NUM_REF_FRAMES 8
+#define VP9_MAX_FRM_BUF_NUM 9
+#define VP9_MAX_FRM_BUF_NODE_NUM (VP9_MAX_FRM_BUF_NUM * 2)
+#define VP9_SEG_ID_SZ 0x12000
+
+/**
+ * struct vp9_dram_buf - contains buffer info for vpu
+ * @va : cpu address
+ * @pa : iova address
+ * @sz : buffer size
+ * @padding : for 64 bytes alignment
+ */
+struct vp9_dram_buf {
+ unsigned long va;
+ unsigned long pa;
+ unsigned int sz;
+ unsigned int padding;
+};
+
+/**
+ * struct vp9_fb_info - contains frame buffer info
+ * @fb : frmae buffer
+ * @reserved : reserved field used by vpu
+ */
+struct vp9_fb_info {
+ struct vdec_fb *fb;
+ unsigned int reserved[32];
+};
+
+/**
+ * struct vp9_ref_cnt_buf - contains reference buffer information
+ * @buf : referenced frame buffer
+ * @ref_cnt : referenced frame buffer's reference count.
+ * When reference count=0, remove it from reference list
+ */
+struct vp9_ref_cnt_buf {
+ struct vp9_fb_info buf;
+ unsigned int ref_cnt;
+};
+
+/**
+ * struct vp9_ref_buf - contains current frame's reference buffer information
+ * @buf : reference buffer
+ * @idx : reference buffer index to frm_bufs
+ * @reserved : reserved field used by vpu
+ */
+struct vp9_ref_buf {
+ struct vp9_fb_info *buf;
+ unsigned int idx;
+ unsigned int reserved[6];
+};
+
+/**
+ * struct vp9_sf_ref_fb - contains frame buffer info
+ * @fb : super frame reference frame buffer
+ * @used : this reference frame info entry is used
+ * @padding : for 64 bytes size align
+ */
+struct vp9_sf_ref_fb {
+ struct vdec_fb fb;
+ int used;
+ int padding;
+};
+
+/*
+ * struct vdec_vp9_vsi - shared buffer between host and VPU firmware
+ * AP-W/R : AP is writer/reader on this item
+ * VPU-W/R: VPU is write/reader on this item
+ * @sf_bs_buf : super frame backup buffer (AP-W, VPU-R)
+ * @sf_ref_fb : record supoer frame reference buffer information
+ * (AP-R/W, VPU-R/W)
+ * @sf_next_ref_fb_idx : next available super frame (AP-W, VPU-R)
+ * @sf_frm_cnt : super frame count, filled by vpu (AP-R, VPU-W)
+ * @sf_frm_offset : super frame offset, filled by vpu (AP-R, VPU-W)
+ * @sf_frm_sz : super frame size, filled by vpu (AP-R, VPU-W)
+ * @sf_frm_idx : current super frame (AP-R, VPU-W)
+ * @sf_init : inform super frame info already parsed by vpu (AP-R, VPU-W)
+ * @fb : capture buffer (AP-W, VPU-R)
+ * @bs : bs buffer (AP-W, VPU-R)
+ * @cur_fb : current show capture buffer (AP-R/W, VPU-R/W)
+ * @pic_w : picture width (AP-R, VPU-W)
+ * @pic_h : picture height (AP-R, VPU-W)
+ * @buf_w : codec width (AP-R, VPU-W)
+ * @buf_h : coded height (AP-R, VPU-W)
+ * @buf_sz_y_bs : ufo compressed y plane size (AP-R, VPU-W)
+ * @buf_sz_c_bs : ufo compressed cbcr plane size (AP-R, VPU-W)
+ * @buf_len_sz_y : size used to store y plane ufo info (AP-R, VPU-W)
+ * @buf_len_sz_c : size used to store cbcr plane ufo info (AP-R, VPU-W)
+
+ * @profile : profile sparsed from vpu (AP-R, VPU-W)
+ * @show_frame : [BIT(0)] display this frame or not (AP-R, VPU-W)
+ * [BIT(1)] reset segment data or not (AP-R, VPU-W)
+ * [BIT(2)] trig decoder hardware or not (AP-R, VPU-W)
+ * [BIT(3)] ask VPU to set bits(0~4) accordingly (AP-W, VPU-R)
+ * [BIT(4)] do not reset segment data before every frame (AP-R, VPU-W)
+ * @show_existing_frame : inform this frame is show existing frame
+ * (AP-R, VPU-W)
+ * @frm_to_show_idx : index to show frame (AP-R, VPU-W)
+
+ * @refresh_frm_flags : indicate when frame need to refine reference count
+ * (AP-R, VPU-W)
+ * @resolution_changed : resolution change in this frame (AP-R, VPU-W)
+
+ * @frm_bufs : maintain reference buffer info (AP-R/W, VPU-R/W)
+ * @ref_frm_map : maintain reference buffer map info (AP-R/W, VPU-R/W)
+ * @new_fb_idx : index to frm_bufs array (AP-R, VPU-W)
+ * @frm_num : decoded frame number, include sub-frame count (AP-R, VPU-W)
+ * @mv_buf : motion vector working buffer (AP-W, VPU-R)
+ * @frm_refs : maintain three reference buffer info (AP-R/W, VPU-R/W)
+ * @seg_id_buf : segmentation map working buffer (AP-W, VPU-R)
+ */
+struct vdec_vp9_vsi {
+ unsigned char sf_bs_buf[VP9_SUPER_FRAME_BS_SZ];
+ struct vp9_sf_ref_fb sf_ref_fb[VP9_MAX_FRM_BUF_NUM-1];
+ int sf_next_ref_fb_idx;
+ unsigned int sf_frm_cnt;
+ unsigned int sf_frm_offset[VP9_MAX_FRM_BUF_NUM-1];
+ unsigned int sf_frm_sz[VP9_MAX_FRM_BUF_NUM-1];
+ unsigned int sf_frm_idx;
+ unsigned int sf_init;
+ struct vdec_fb fb;
+ struct mtk_vcodec_mem bs;
+ struct vdec_fb cur_fb;
+ unsigned int pic_w;
+ unsigned int pic_h;
+ unsigned int buf_w;
+ unsigned int buf_h;
+ unsigned int buf_sz_y_bs;
+ unsigned int buf_sz_c_bs;
+ unsigned int buf_len_sz_y;
+ unsigned int buf_len_sz_c;
+ unsigned int profile;
+ unsigned int show_frame;
+ unsigned int show_existing_frame;
+ unsigned int frm_to_show_idx;
+ unsigned int refresh_frm_flags;
+ unsigned int resolution_changed;
+
+ struct vp9_ref_cnt_buf frm_bufs[VP9_MAX_FRM_BUF_NUM];
+ int ref_frm_map[MAX_NUM_REF_FRAMES];
+ unsigned int new_fb_idx;
+ unsigned int frm_num;
+ struct vp9_dram_buf mv_buf;
+
+ struct vp9_ref_buf frm_refs[REFS_PER_FRAME];
+ struct vp9_dram_buf seg_id_buf;
+
+};
+
+/*
+ * struct vdec_vp9_inst - vp9 decode instance
+ * @mv_buf : working buffer for mv
+ * @seg_id_buf : working buffer for segmentation map
+ * @dec_fb : vdec_fb node to link fb to different fb_xxx_list
+ * @available_fb_node_list : current available vdec_fb node
+ * @fb_use_list : current used or referenced vdec_fb
+ * @fb_free_list : current available to free vdec_fb
+ * @fb_disp_list : current available to display vdec_fb
+ * @cur_fb : current frame buffer
+ * @ctx : current decode context
+ * @vpu : vpu instance information
+ * @vsi : shared buffer between host and VPU firmware
+ * @total_frm_cnt : total frame count, it do not include sub-frames in super
+ * frame
+ * @mem : instance memory information
+ */
+struct vdec_vp9_inst {
+ struct mtk_vcodec_mem mv_buf;
+ struct mtk_vcodec_mem seg_id_buf;
+
+ struct vdec_fb_node dec_fb[VP9_MAX_FRM_BUF_NODE_NUM];
+ struct list_head available_fb_node_list;
+ struct list_head fb_use_list;
+ struct list_head fb_free_list;
+ struct list_head fb_disp_list;
+ struct vdec_fb *cur_fb;
+ struct mtk_vcodec_dec_ctx *ctx;
+ struct vdec_vpu_inst vpu;
+ struct vdec_vp9_vsi *vsi;
+ unsigned int total_frm_cnt;
+ struct mtk_vcodec_mem mem;
+};
+
+static bool vp9_is_sf_ref_fb(struct vdec_vp9_inst *inst, struct vdec_fb *fb)
+{
+ int i;
+ struct vdec_vp9_vsi *vsi = inst->vsi;
+
+ for (i = 0; i < ARRAY_SIZE(vsi->sf_ref_fb); i++) {
+ if (fb == &vsi->sf_ref_fb[i].fb)
+ return true;
+ }
+ return false;
+}
+
+static struct vdec_fb *vp9_rm_from_fb_use_list(struct vdec_vp9_inst
+ *inst, void *addr)
+{
+ struct vdec_fb *fb = NULL;
+ struct vdec_fb_node *node;
+
+ list_for_each_entry(node, &inst->fb_use_list, list) {
+ fb = (struct vdec_fb *)node->fb;
+ if (fb->base_y.va == addr) {
+ list_move_tail(&node->list,
+ &inst->available_fb_node_list);
+ return fb;
+ }
+ }
+
+ return NULL;
+}
+
+static void vp9_add_to_fb_free_list(struct vdec_vp9_inst *inst,
+ struct vdec_fb *fb)
+{
+ struct vdec_fb_node *node;
+
+ if (fb) {
+ node = list_first_entry_or_null(&inst->available_fb_node_list,
+ struct vdec_fb_node, list);
+
+ if (node) {
+ node->fb = fb;
+ list_move_tail(&node->list, &inst->fb_free_list);
+ }
+ } else {
+ mtk_vdec_debug(inst->ctx, "No free fb node");
+ }
+}
+
+static void vp9_free_sf_ref_fb(struct vdec_fb *fb)
+{
+ struct vp9_sf_ref_fb *sf_ref_fb =
+ container_of(fb, struct vp9_sf_ref_fb, fb);
+
+ sf_ref_fb->used = 0;
+}
+
+static void vp9_ref_cnt_fb(struct vdec_vp9_inst *inst, int *idx,
+ int new_idx)
+{
+ struct vdec_vp9_vsi *vsi = inst->vsi;
+ int ref_idx = *idx;
+
+ if (ref_idx >= 0 && vsi->frm_bufs[ref_idx].ref_cnt > 0) {
+ vsi->frm_bufs[ref_idx].ref_cnt--;
+
+ if (vsi->frm_bufs[ref_idx].ref_cnt == 0) {
+ if (!vp9_is_sf_ref_fb(inst,
+ vsi->frm_bufs[ref_idx].buf.fb)) {
+ struct vdec_fb *fb;
+
+ fb = vp9_rm_from_fb_use_list(inst,
+ vsi->frm_bufs[ref_idx].buf.fb->base_y.va);
+ vp9_add_to_fb_free_list(inst, fb);
+ } else
+ vp9_free_sf_ref_fb(
+ vsi->frm_bufs[ref_idx].buf.fb);
+ }
+ }
+
+ *idx = new_idx;
+ vsi->frm_bufs[new_idx].ref_cnt++;
+}
+
+static void vp9_free_all_sf_ref_fb(struct vdec_vp9_inst *inst)
+{
+ int i;
+ struct vdec_vp9_vsi *vsi = inst->vsi;
+
+ for (i = 0; i < ARRAY_SIZE(vsi->sf_ref_fb); i++) {
+ if (vsi->sf_ref_fb[i].fb.base_y.va) {
+ mtk_vcodec_mem_free(inst->ctx,
+ &vsi->sf_ref_fb[i].fb.base_y);
+ mtk_vcodec_mem_free(inst->ctx,
+ &vsi->sf_ref_fb[i].fb.base_c);
+ vsi->sf_ref_fb[i].used = 0;
+ }
+ }
+}
+
+/* For each sub-frame except the last one, the driver will dynamically
+ * allocate reference buffer by calling vp9_get_sf_ref_fb()
+ * The last sub-frame will use the original fb provided by the
+ * vp9_dec_decode() interface
+ */
+static int vp9_get_sf_ref_fb(struct vdec_vp9_inst *inst)
+{
+ int idx;
+ struct mtk_vcodec_mem *mem_basy_y;
+ struct mtk_vcodec_mem *mem_basy_c;
+ struct vdec_vp9_vsi *vsi = inst->vsi;
+
+ for (idx = 0;
+ idx < ARRAY_SIZE(vsi->sf_ref_fb);
+ idx++) {
+ if (vsi->sf_ref_fb[idx].fb.base_y.va &&
+ vsi->sf_ref_fb[idx].used == 0) {
+ return idx;
+ }
+ }
+
+ for (idx = 0;
+ idx < ARRAY_SIZE(vsi->sf_ref_fb);
+ idx++) {
+ if (vsi->sf_ref_fb[idx].fb.base_y.va == NULL)
+ break;
+ }
+
+ if (idx == ARRAY_SIZE(vsi->sf_ref_fb)) {
+ mtk_vdec_err(inst->ctx, "List Full");
+ return -1;
+ }
+
+ mem_basy_y = &vsi->sf_ref_fb[idx].fb.base_y;
+ mem_basy_y->size = vsi->buf_sz_y_bs +
+ vsi->buf_len_sz_y;
+
+ if (mtk_vcodec_mem_alloc(inst->ctx, mem_basy_y)) {
+ mtk_vdec_err(inst->ctx, "Cannot allocate sf_ref_buf y_buf");
+ return -1;
+ }
+
+ mem_basy_c = &vsi->sf_ref_fb[idx].fb.base_c;
+ mem_basy_c->size = vsi->buf_sz_c_bs +
+ vsi->buf_len_sz_c;
+
+ if (mtk_vcodec_mem_alloc(inst->ctx, mem_basy_c)) {
+ mtk_vdec_err(inst->ctx, "Cannot allocate sf_ref_fb c_buf");
+ return -1;
+ }
+ vsi->sf_ref_fb[idx].used = 0;
+
+ return idx;
+}
+
+static bool vp9_alloc_work_buf(struct vdec_vp9_inst *inst)
+{
+ struct vdec_vp9_vsi *vsi = inst->vsi;
+ int result;
+ struct mtk_vcodec_mem *mem;
+
+ unsigned int max_pic_w;
+ unsigned int max_pic_h;
+
+
+ if (!(inst->ctx->dev->dec_capability &
+ VCODEC_CAPABILITY_4K_DISABLED)) {
+ max_pic_w = VCODEC_DEC_4K_CODED_WIDTH;
+ max_pic_h = VCODEC_DEC_4K_CODED_HEIGHT;
+ } else {
+ max_pic_w = MTK_VDEC_MAX_W;
+ max_pic_h = MTK_VDEC_MAX_H;
+ }
+
+ if ((vsi->pic_w > max_pic_w) ||
+ (vsi->pic_h > max_pic_h)) {
+ mtk_vdec_err(inst->ctx, "Invalid w/h %d/%d", vsi->pic_w, vsi->pic_h);
+ return false;
+ }
+
+ mtk_vdec_debug(inst->ctx, "BUF CHG(%d): w/h/sb_w/sb_h=%d/%d/%d/%d",
+ vsi->resolution_changed, vsi->pic_w,
+ vsi->pic_h, vsi->buf_w, vsi->buf_h);
+
+ mem = &inst->mv_buf;
+ if (mem->va)
+ mtk_vcodec_mem_free(inst->ctx, mem);
+
+ mem->size = ((vsi->buf_w / 64) *
+ (vsi->buf_h / 64) + 2) * 36 * 16;
+ result = mtk_vcodec_mem_alloc(inst->ctx, mem);
+ if (result) {
+ mem->size = 0;
+ mtk_vdec_err(inst->ctx, "Cannot allocate mv_buf");
+ return false;
+ }
+ /* Set the va again */
+ vsi->mv_buf.va = (unsigned long)mem->va;
+ vsi->mv_buf.pa = (unsigned long)mem->dma_addr;
+ vsi->mv_buf.sz = (unsigned int)mem->size;
+
+
+ mem = &inst->seg_id_buf;
+ if (mem->va)
+ mtk_vcodec_mem_free(inst->ctx, mem);
+
+ mem->size = VP9_SEG_ID_SZ;
+ result = mtk_vcodec_mem_alloc(inst->ctx, mem);
+ if (result) {
+ mem->size = 0;
+ mtk_vdec_err(inst->ctx, "Cannot allocate seg_id_buf");
+ return false;
+ }
+ /* Set the va again */
+ vsi->seg_id_buf.va = (unsigned long)mem->va;
+ vsi->seg_id_buf.pa = (unsigned long)mem->dma_addr;
+ vsi->seg_id_buf.sz = (unsigned int)mem->size;
+
+
+ vp9_free_all_sf_ref_fb(inst);
+ vsi->sf_next_ref_fb_idx = vp9_get_sf_ref_fb(inst);
+
+ return true;
+}
+
+static bool vp9_add_to_fb_disp_list(struct vdec_vp9_inst *inst,
+ struct vdec_fb *fb)
+{
+ struct vdec_fb_node *node;
+
+ if (!fb) {
+ mtk_vdec_err(inst->ctx, "fb == NULL");
+ return false;
+ }
+
+ node = list_first_entry_or_null(&inst->available_fb_node_list,
+ struct vdec_fb_node, list);
+ if (node) {
+ node->fb = fb;
+ list_move_tail(&node->list, &inst->fb_disp_list);
+ } else {
+ mtk_vdec_err(inst->ctx, "No available fb node");
+ return false;
+ }
+
+ return true;
+}
+
+/* If any buffer updating is signaled it should be done here. */
+static void vp9_swap_frm_bufs(struct vdec_vp9_inst *inst)
+{
+ struct vdec_vp9_vsi *vsi = inst->vsi;
+ struct vp9_fb_info *frm_to_show;
+ int ref_index = 0, mask;
+
+ for (mask = vsi->refresh_frm_flags; mask; mask >>= 1) {
+ if (mask & 1)
+ vp9_ref_cnt_fb(inst, &vsi->ref_frm_map[ref_index],
+ vsi->new_fb_idx);
+ ++ref_index;
+ }
+
+ frm_to_show = &vsi->frm_bufs[vsi->new_fb_idx].buf;
+ vsi->frm_bufs[vsi->new_fb_idx].ref_cnt--;
+
+ if (frm_to_show->fb != inst->cur_fb) {
+ /* This frame is show exist frame and no decode output
+ * copy frame data from frm_to_show to current CAPTURE
+ * buffer
+ */
+ if ((frm_to_show->fb != NULL) &&
+ (inst->cur_fb->base_y.size >=
+ frm_to_show->fb->base_y.size) &&
+ (inst->cur_fb->base_c.size >=
+ frm_to_show->fb->base_c.size)) {
+ memcpy((void *)inst->cur_fb->base_y.va,
+ (void *)frm_to_show->fb->base_y.va,
+ frm_to_show->fb->base_y.size);
+ memcpy((void *)inst->cur_fb->base_c.va,
+ (void *)frm_to_show->fb->base_c.va,
+ frm_to_show->fb->base_c.size);
+ } else {
+ /* After resolution change case, current CAPTURE buffer
+ * may have less buffer size than frm_to_show buffer
+ * size
+ */
+ if (frm_to_show->fb != NULL)
+ mtk_vdec_err(inst->ctx,
+ "base_y.size=%zu, frm_to_show: base_y.size=%zu",
+ inst->cur_fb->base_y.size,
+ frm_to_show->fb->base_y.size);
+ }
+ if (!vp9_is_sf_ref_fb(inst, inst->cur_fb)) {
+ if (vsi->show_frame & BIT(0))
+ vp9_add_to_fb_disp_list(inst, inst->cur_fb);
+ }
+ } else {
+ if (!vp9_is_sf_ref_fb(inst, inst->cur_fb)) {
+ if (vsi->show_frame & BIT(0))
+ vp9_add_to_fb_disp_list(inst, frm_to_show->fb);
+ }
+ }
+
+ /* when ref_cnt ==0, move this fb to fb_free_list. v4l2 driver will
+ * clean fb_free_list
+ */
+ if (vsi->frm_bufs[vsi->new_fb_idx].ref_cnt == 0) {
+ if (!vp9_is_sf_ref_fb(
+ inst, vsi->frm_bufs[vsi->new_fb_idx].buf.fb)) {
+ struct vdec_fb *fb;
+
+ fb = vp9_rm_from_fb_use_list(inst,
+ vsi->frm_bufs[vsi->new_fb_idx].buf.fb->base_y.va);
+
+ vp9_add_to_fb_free_list(inst, fb);
+ } else {
+ vp9_free_sf_ref_fb(
+ vsi->frm_bufs[vsi->new_fb_idx].buf.fb);
+ }
+ }
+
+ /* if this super frame and it is not last sub-frame, get next fb for
+ * sub-frame decode
+ */
+ if (vsi->sf_frm_cnt > 0 && vsi->sf_frm_idx != vsi->sf_frm_cnt - 1)
+ vsi->sf_next_ref_fb_idx = vp9_get_sf_ref_fb(inst);
+}
+
+static bool vp9_wait_dec_end(struct vdec_vp9_inst *inst)
+{
+ struct mtk_vcodec_dec_ctx *ctx = inst->ctx;
+
+ mtk_vcodec_wait_for_done_ctx(inst->ctx,
+ MTK_INST_IRQ_RECEIVED,
+ WAIT_INTR_TIMEOUT_MS, 0);
+
+ if (ctx->irq_status & MTK_VDEC_IRQ_STATUS_DEC_SUCCESS)
+ return true;
+ else
+ return false;
+}
+
+static struct vdec_vp9_inst *vp9_alloc_inst(struct mtk_vcodec_dec_ctx *ctx)
+{
+ int result;
+ struct mtk_vcodec_mem mem;
+ struct vdec_vp9_inst *inst;
+
+ memset(&mem, 0, sizeof(mem));
+ mem.size = sizeof(struct vdec_vp9_inst);
+ result = mtk_vcodec_mem_alloc(ctx, &mem);
+ if (result)
+ return NULL;
+
+ inst = mem.va;
+ inst->mem = mem;
+
+ return inst;
+}
+
+static void vp9_free_inst(struct vdec_vp9_inst *inst)
+{
+ struct mtk_vcodec_mem mem;
+
+ mem = inst->mem;
+ if (mem.va)
+ mtk_vcodec_mem_free(inst->ctx, &mem);
+}
+
+static bool vp9_decode_end_proc(struct vdec_vp9_inst *inst)
+{
+ struct vdec_vp9_vsi *vsi = inst->vsi;
+ bool ret = false;
+
+ if (!vsi->show_existing_frame) {
+ ret = vp9_wait_dec_end(inst);
+ if (!ret) {
+ mtk_vdec_err(inst->ctx, "Decode failed, Decode Timeout @[%d]",
+ vsi->frm_num);
+ return false;
+ }
+
+ if (vpu_dec_end(&inst->vpu)) {
+ mtk_vdec_err(inst->ctx, "vp9_dec_vpu_end failed");
+ return false;
+ }
+ mtk_vdec_debug(inst->ctx, "Decode Ok @%d (%d/%d)", vsi->frm_num,
+ vsi->pic_w, vsi->pic_h);
+ } else {
+ mtk_vdec_debug(inst->ctx, "Decode Ok @%d (show_existing_frame)", vsi->frm_num);
+ }
+
+ vp9_swap_frm_bufs(inst);
+ vsi->frm_num++;
+ return true;
+}
+
+static bool vp9_is_last_sub_frm(struct vdec_vp9_inst *inst)
+{
+ struct vdec_vp9_vsi *vsi = inst->vsi;
+
+ if (vsi->sf_frm_cnt <= 0 || vsi->sf_frm_idx == vsi->sf_frm_cnt)
+ return true;
+
+ return false;
+}
+
+static struct vdec_fb *vp9_rm_from_fb_disp_list(struct vdec_vp9_inst *inst)
+{
+ struct vdec_fb_node *node;
+ struct vdec_fb *fb = NULL;
+
+ node = list_first_entry_or_null(&inst->fb_disp_list,
+ struct vdec_fb_node, list);
+ if (node) {
+ fb = (struct vdec_fb *)node->fb;
+ fb->status |= FB_ST_DISPLAY;
+ list_move_tail(&node->list, &inst->available_fb_node_list);
+ mtk_vdec_debug(inst->ctx, "[FB] get disp fb %p st=%d", node->fb, fb->status);
+ } else
+ mtk_vdec_debug(inst->ctx, "[FB] there is no disp fb");
+
+ return fb;
+}
+
+static bool vp9_add_to_fb_use_list(struct vdec_vp9_inst *inst,
+ struct vdec_fb *fb)
+{
+ struct vdec_fb_node *node;
+
+ if (!fb) {
+ mtk_vdec_debug(inst->ctx, "fb == NULL");
+ return false;
+ }
+
+ node = list_first_entry_or_null(&inst->available_fb_node_list,
+ struct vdec_fb_node, list);
+ if (node) {
+ node->fb = fb;
+ list_move_tail(&node->list, &inst->fb_use_list);
+ } else {
+ mtk_vdec_err(inst->ctx, "No free fb node");
+ return false;
+ }
+ return true;
+}
+
+static void vp9_reset(struct vdec_vp9_inst *inst)
+{
+ struct vdec_fb_node *node, *tmp;
+
+ list_for_each_entry_safe(node, tmp, &inst->fb_use_list, list)
+ list_move_tail(&node->list, &inst->fb_free_list);
+
+ vp9_free_all_sf_ref_fb(inst);
+ inst->vsi->sf_next_ref_fb_idx = vp9_get_sf_ref_fb(inst);
+
+ if (vpu_dec_reset(&inst->vpu))
+ mtk_vdec_err(inst->ctx, "vp9_dec_vpu_reset failed");
+
+ /* Set the va again, since vpu_dec_reset will clear mv_buf in vpu */
+ inst->vsi->mv_buf.va = (unsigned long)inst->mv_buf.va;
+ inst->vsi->mv_buf.pa = (unsigned long)inst->mv_buf.dma_addr;
+ inst->vsi->mv_buf.sz = (unsigned long)inst->mv_buf.size;
+
+ /* Set the va again, since vpu_dec_reset will clear seg_id_buf in vpu */
+ inst->vsi->seg_id_buf.va = (unsigned long)inst->seg_id_buf.va;
+ inst->vsi->seg_id_buf.pa = (unsigned long)inst->seg_id_buf.dma_addr;
+ inst->vsi->seg_id_buf.sz = (unsigned long)inst->seg_id_buf.size;
+
+}
+
+static void init_all_fb_lists(struct vdec_vp9_inst *inst)
+{
+ int i;
+
+ INIT_LIST_HEAD(&inst->available_fb_node_list);
+ INIT_LIST_HEAD(&inst->fb_use_list);
+ INIT_LIST_HEAD(&inst->fb_free_list);
+ INIT_LIST_HEAD(&inst->fb_disp_list);
+
+ for (i = 0; i < ARRAY_SIZE(inst->dec_fb); i++) {
+ INIT_LIST_HEAD(&inst->dec_fb[i].list);
+ inst->dec_fb[i].fb = NULL;
+ list_add_tail(&inst->dec_fb[i].list,
+ &inst->available_fb_node_list);
+ }
+}
+
+static void get_pic_info(struct vdec_vp9_inst *inst, struct vdec_pic_info *pic)
+{
+ pic->fb_sz[0] = inst->vsi->buf_sz_y_bs + inst->vsi->buf_len_sz_y;
+ pic->fb_sz[1] = inst->vsi->buf_sz_c_bs + inst->vsi->buf_len_sz_c;
+
+ pic->pic_w = inst->vsi->pic_w;
+ pic->pic_h = inst->vsi->pic_h;
+ pic->buf_w = inst->vsi->buf_w;
+ pic->buf_h = inst->vsi->buf_h;
+
+ mtk_vdec_debug(inst->ctx, "pic(%d, %d), buf(%d, %d)",
+ pic->pic_w, pic->pic_h, pic->buf_w, pic->buf_h);
+ mtk_vdec_debug(inst->ctx, "fb size: Y(%d), C(%d)", pic->fb_sz[0], pic->fb_sz[1]);
+}
+
+static void get_disp_fb(struct vdec_vp9_inst *inst, struct vdec_fb **out_fb)
+{
+
+ *out_fb = vp9_rm_from_fb_disp_list(inst);
+ if (*out_fb)
+ (*out_fb)->status |= FB_ST_DISPLAY;
+}
+
+static void get_free_fb(struct vdec_vp9_inst *inst, struct vdec_fb **out_fb)
+{
+ struct vdec_fb_node *node;
+ struct vdec_fb *fb = NULL;
+
+ node = list_first_entry_or_null(&inst->fb_free_list,
+ struct vdec_fb_node, list);
+ if (node) {
+ list_move_tail(&node->list, &inst->available_fb_node_list);
+ fb = (struct vdec_fb *)node->fb;
+ fb->status |= FB_ST_FREE;
+ mtk_vdec_debug(inst->ctx, "[FB] get free fb %p st=%d", node->fb, fb->status);
+ } else {
+ mtk_vdec_debug(inst->ctx, "[FB] there is no free fb");
+ }
+
+ *out_fb = fb;
+}
+
+static int validate_vsi_array_indexes(struct vdec_vp9_inst *inst,
+ struct vdec_vp9_vsi *vsi) {
+ if (vsi->sf_frm_idx >= VP9_MAX_FRM_BUF_NUM - 1) {
+ mtk_vdec_err(inst->ctx, "Invalid vsi->sf_frm_idx=%u.", vsi->sf_frm_idx);
+ return -EIO;
+ }
+ if (vsi->frm_to_show_idx >= VP9_MAX_FRM_BUF_NUM) {
+ mtk_vdec_err(inst->ctx, "Invalid vsi->frm_to_show_idx=%u.", vsi->frm_to_show_idx);
+ return -EIO;
+ }
+ if (vsi->new_fb_idx >= VP9_MAX_FRM_BUF_NUM) {
+ mtk_vdec_err(inst->ctx, "Invalid vsi->new_fb_idx=%u.", vsi->new_fb_idx);
+ return -EIO;
+ }
+ return 0;
+}
+
+static void vdec_vp9_deinit(void *h_vdec)
+{
+ struct vdec_vp9_inst *inst = (struct vdec_vp9_inst *)h_vdec;
+ struct mtk_vcodec_mem *mem;
+ int ret = 0;
+
+ ret = vpu_dec_deinit(&inst->vpu);
+ if (ret)
+ mtk_vdec_err(inst->ctx, "vpu_dec_deinit failed");
+
+ mem = &inst->mv_buf;
+ if (mem->va)
+ mtk_vcodec_mem_free(inst->ctx, mem);
+
+ mem = &inst->seg_id_buf;
+ if (mem->va)
+ mtk_vcodec_mem_free(inst->ctx, mem);
+
+ vp9_free_all_sf_ref_fb(inst);
+ vp9_free_inst(inst);
+}
+
+static int vdec_vp9_init(struct mtk_vcodec_dec_ctx *ctx)
+{
+ struct vdec_vp9_inst *inst;
+
+ inst = vp9_alloc_inst(ctx);
+ if (!inst)
+ return -ENOMEM;
+
+ inst->total_frm_cnt = 0;
+ inst->ctx = ctx;
+
+ inst->vpu.id = IPI_VDEC_VP9;
+ inst->vpu.ctx = ctx;
+
+ if (vpu_dec_init(&inst->vpu)) {
+ mtk_vdec_err(inst->ctx, "vp9_dec_vpu_init failed");
+ goto err_deinit_inst;
+ }
+
+ inst->vsi = (struct vdec_vp9_vsi *)inst->vpu.vsi;
+
+ inst->vsi->show_frame |= BIT(3);
+
+ init_all_fb_lists(inst);
+
+ ctx->drv_handle = inst;
+ return 0;
+
+err_deinit_inst:
+ vp9_free_inst(inst);
+
+ return -EINVAL;
+}
+
+static int vdec_vp9_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
+ struct vdec_fb *fb, bool *res_chg)
+{
+ int ret = 0;
+ struct vdec_vp9_inst *inst = (struct vdec_vp9_inst *)h_vdec;
+ struct vdec_vp9_vsi *vsi = inst->vsi;
+ u32 data[3];
+ int i;
+
+ *res_chg = false;
+
+ if ((bs == NULL) && (fb == NULL)) {
+ mtk_vdec_debug(inst->ctx, "[EOS]");
+ vp9_reset(inst);
+ return ret;
+ }
+
+ if (bs == NULL) {
+ mtk_vdec_err(inst->ctx, "bs == NULL");
+ return -EINVAL;
+ }
+
+ mtk_vdec_debug(inst->ctx, "Input BS Size = %zu", bs->size);
+
+ while (1) {
+ struct vdec_fb *cur_fb = NULL;
+
+ data[0] = *((unsigned int *)bs->va);
+ data[1] = *((unsigned int *)(bs->va + 4));
+ data[2] = *((unsigned int *)(bs->va + 8));
+
+ vsi->bs = *bs;
+
+ if (fb)
+ vsi->fb = *fb;
+
+ if (!vsi->sf_init) {
+ unsigned int sf_bs_sz;
+ unsigned int sf_bs_off;
+ unsigned char *sf_bs_src;
+ unsigned char *sf_bs_dst;
+
+ sf_bs_sz = bs->size > VP9_SUPER_FRAME_BS_SZ ?
+ VP9_SUPER_FRAME_BS_SZ : bs->size;
+ sf_bs_off = VP9_SUPER_FRAME_BS_SZ - sf_bs_sz;
+ sf_bs_src = bs->va + bs->size - sf_bs_sz;
+ sf_bs_dst = vsi->sf_bs_buf + sf_bs_off;
+ memcpy(sf_bs_dst, sf_bs_src, sf_bs_sz);
+ } else {
+ if ((vsi->sf_frm_cnt > 0) &&
+ (vsi->sf_frm_idx < vsi->sf_frm_cnt)) {
+ unsigned int idx = vsi->sf_frm_idx;
+
+ memcpy((void *)bs->va,
+ (void *)(bs->va +
+ vsi->sf_frm_offset[idx]),
+ vsi->sf_frm_sz[idx]);
+ }
+ }
+
+ if (!(vsi->show_frame & BIT(4)))
+ memset(inst->seg_id_buf.va, 0, inst->seg_id_buf.size);
+
+ ret = vpu_dec_start(&inst->vpu, data, 3);
+ if (ret) {
+ mtk_vdec_err(inst->ctx, "vpu_dec_start failed");
+ goto DECODE_ERROR;
+ }
+
+ if (vsi->show_frame & BIT(1)) {
+ memset(inst->seg_id_buf.va, 0, inst->seg_id_buf.size);
+
+ if (vsi->show_frame & BIT(2)) {
+ ret = vpu_dec_start(&inst->vpu, NULL, 0);
+ if (ret) {
+ mtk_vdec_err(inst->ctx, "vpu trig decoder failed");
+ goto DECODE_ERROR;
+ }
+ }
+ }
+
+ ret = validate_vsi_array_indexes(inst, vsi);
+ if (ret) {
+ mtk_vdec_err(inst->ctx, "Invalid values from VPU.");
+ goto DECODE_ERROR;
+ }
+
+ if (vsi->resolution_changed) {
+ if (!vp9_alloc_work_buf(inst)) {
+ ret = -EIO;
+ goto DECODE_ERROR;
+ }
+ }
+
+ if (vsi->sf_frm_cnt > 0) {
+ cur_fb = &vsi->sf_ref_fb[vsi->sf_next_ref_fb_idx].fb;
+
+ if (vsi->sf_frm_idx < vsi->sf_frm_cnt)
+ inst->cur_fb = cur_fb;
+ else
+ inst->cur_fb = fb;
+ } else {
+ inst->cur_fb = fb;
+ }
+
+ vsi->frm_bufs[vsi->new_fb_idx].buf.fb = inst->cur_fb;
+ if (!vp9_is_sf_ref_fb(inst, inst->cur_fb))
+ vp9_add_to_fb_use_list(inst, inst->cur_fb);
+
+ mtk_vdec_debug(inst->ctx, "[#pic %d]", vsi->frm_num);
+
+ if (vsi->show_existing_frame)
+ mtk_vdec_debug(inst->ctx,
+ "drv->new_fb_idx=%d, drv->frm_to_show_idx=%d",
+ vsi->new_fb_idx, vsi->frm_to_show_idx);
+
+ if (vsi->show_existing_frame && (vsi->frm_to_show_idx <
+ VP9_MAX_FRM_BUF_NUM)) {
+ mtk_vdec_debug(inst->ctx,
+ "Skip Decode drv->new_fb_idx=%d, drv->frm_to_show_idx=%d",
+ vsi->new_fb_idx, vsi->frm_to_show_idx);
+
+ vp9_ref_cnt_fb(inst, &vsi->new_fb_idx,
+ vsi->frm_to_show_idx);
+ }
+
+ /* VPU assign the buffer pointer in its address space,
+ * reassign here
+ */
+ for (i = 0; i < ARRAY_SIZE(vsi->frm_refs); i++) {
+ unsigned int idx = vsi->frm_refs[i].idx;
+
+ vsi->frm_refs[i].buf = &vsi->frm_bufs[idx].buf;
+ }
+
+ if (vsi->resolution_changed) {
+ *res_chg = true;
+ mtk_vdec_debug(inst->ctx, "VDEC_ST_RESOLUTION_CHANGED");
+
+ ret = 0;
+ goto DECODE_ERROR;
+ }
+
+ if (!vp9_decode_end_proc(inst)) {
+ mtk_vdec_err(inst->ctx, "vp9_decode_end_proc");
+ ret = -EINVAL;
+ goto DECODE_ERROR;
+ }
+
+ if (vp9_is_last_sub_frm(inst))
+ break;
+
+ }
+ inst->total_frm_cnt++;
+
+DECODE_ERROR:
+ if (ret < 0)
+ vp9_add_to_fb_free_list(inst, fb);
+
+ return ret;
+}
+
+static void get_crop_info(struct vdec_vp9_inst *inst, struct v4l2_rect *cr)
+{
+ cr->left = 0;
+ cr->top = 0;
+ cr->width = inst->vsi->pic_w;
+ cr->height = inst->vsi->pic_h;
+ mtk_vdec_debug(inst->ctx, "get crop info l=%d, t=%d, w=%d, h=%d\n",
+ cr->left, cr->top, cr->width, cr->height);
+}
+
+static int vdec_vp9_get_param(void *h_vdec, enum vdec_get_param_type type,
+ void *out)
+{
+ struct vdec_vp9_inst *inst = (struct vdec_vp9_inst *)h_vdec;
+ int ret = 0;
+
+ switch (type) {
+ case GET_PARAM_DISP_FRAME_BUFFER:
+ get_disp_fb(inst, out);
+ break;
+ case GET_PARAM_FREE_FRAME_BUFFER:
+ get_free_fb(inst, out);
+ break;
+ case GET_PARAM_PIC_INFO:
+ get_pic_info(inst, out);
+ break;
+ case GET_PARAM_DPB_SIZE:
+ *((unsigned int *)out) = MAX_VP9_DPB_SIZE;
+ break;
+ case GET_PARAM_CROP_INFO:
+ get_crop_info(inst, out);
+ break;
+ default:
+ mtk_vdec_err(inst->ctx, "not supported param type %d", type);
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+const struct vdec_common_if vdec_vp9_if = {
+ .init = vdec_vp9_init,
+ .decode = vdec_vp9_decode,
+ .get_param = vdec_vp9_get_param,
+ .deinit = vdec_vp9_deinit,
+};
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c
new file mode 100644
index 000000000..e393e3e66
--- /dev/null
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c
@@ -0,0 +1,2216 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 MediaTek Inc.
+ * Author: George Sun <george.sun@mediatek.com>
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <media/videobuf2-dma-contig.h>
+#include <media/v4l2-vp9.h>
+
+#include "../mtk_vcodec_dec.h"
+#include "../../common/mtk_vcodec_intr.h"
+#include "../vdec_drv_base.h"
+#include "../vdec_drv_if.h"
+#include "../vdec_vpu_if.h"
+
+/* reset_frame_context defined in VP9 spec */
+#define VP9_RESET_FRAME_CONTEXT_NONE0 0
+#define VP9_RESET_FRAME_CONTEXT_NONE1 1
+#define VP9_RESET_FRAME_CONTEXT_SPEC 2
+#define VP9_RESET_FRAME_CONTEXT_ALL 3
+
+#define VP9_TILE_BUF_SIZE 4096
+#define VP9_PROB_BUF_SIZE 2560
+#define VP9_COUNTS_BUF_SIZE 16384
+
+#define HDR_FLAG(x) (!!((hdr)->flags & V4L2_VP9_FRAME_FLAG_##x))
+#define LF_FLAG(x) (!!((lf)->flags & V4L2_VP9_LOOP_FILTER_FLAG_##x))
+#define SEG_FLAG(x) (!!((seg)->flags & V4L2_VP9_SEGMENTATION_FLAG_##x))
+#define VP9_BAND_6(band) ((band) == 0 ? 3 : 6)
+
+/*
+ * struct vdec_vp9_slice_frame_ctx - vp9 prob tables footprint
+ */
+struct vdec_vp9_slice_frame_ctx {
+ struct {
+ u8 probs[6][3];
+ u8 padding[2];
+ } coef_probs[4][2][2][6];
+
+ u8 y_mode_prob[4][16];
+ u8 switch_interp_prob[4][16];
+ u8 seg[32]; /* ignore */
+ u8 comp_inter_prob[16];
+ u8 comp_ref_prob[16];
+ u8 single_ref_prob[5][2];
+ u8 single_ref_prob_padding[6];
+
+ u8 joint[3];
+ u8 joint_padding[13];
+ struct {
+ u8 sign;
+ u8 classes[10];
+ u8 padding[5];
+ } sign_classes[2];
+ struct {
+ u8 class0[1];
+ u8 bits[10];
+ u8 padding[5];
+ } class0_bits[2];
+ struct {
+ u8 class0_fp[2][3];
+ u8 fp[3];
+ u8 class0_hp;
+ u8 hp;
+ u8 padding[5];
+ } class0_fp_hp[2];
+
+ u8 uv_mode_prob[10][16];
+ u8 uv_mode_prob_padding[2][16];
+
+ u8 partition_prob[16][4];
+
+ u8 inter_mode_probs[7][4];
+ u8 skip_probs[4];
+
+ u8 tx_p8x8[2][4];
+ u8 tx_p16x16[2][4];
+ u8 tx_p32x32[2][4];
+ u8 intra_inter_prob[8];
+};
+
+/*
+ * struct vdec_vp9_slice_frame_counts - vp9 counts tables footprint
+ */
+struct vdec_vp9_slice_frame_counts {
+ union {
+ struct {
+ u32 band_0[3];
+ u32 padding0[1];
+ u32 band_1_5[5][6];
+ u32 padding1[2];
+ } eob_branch[4][2][2];
+ u32 eob_branch_space[256 * 4];
+ };
+
+ struct {
+ u32 band_0[3][4];
+ u32 band_1_5[5][6][4];
+ } coef_probs[4][2][2];
+
+ u32 intra_inter[4][2];
+ u32 comp_inter[5][2];
+ u32 comp_inter_padding[2];
+ u32 comp_ref[5][2];
+ u32 comp_ref_padding[2];
+ u32 single_ref[5][2][2];
+ u32 inter_mode[7][4];
+ u32 y_mode[4][12];
+ u32 uv_mode[10][10];
+ u32 partition[16][4];
+ u32 switchable_interp[4][4];
+
+ u32 tx_p8x8[2][2];
+ u32 tx_p16x16[2][4];
+ u32 tx_p32x32[2][4];
+
+ u32 skip[3][4];
+
+ u32 joint[4];
+
+ struct {
+ u32 sign[2];
+ u32 class0[2];
+ u32 classes[12];
+ u32 bits[10][2];
+ u32 padding[4];
+ u32 class0_fp[2][4];
+ u32 fp[4];
+ u32 class0_hp[2];
+ u32 hp[2];
+ } mvcomp[2];
+
+ u32 reserved[126][4];
+};
+
+/**
+ * struct vdec_vp9_slice_counts_map - vp9 counts tables to map
+ * v4l2_vp9_frame_symbol_counts
+ * @skip: skip counts.
+ * @y_mode: Y prediction mode counts.
+ * @filter: interpolation filter counts.
+ * @mv_joint: motion vector joint counts.
+ * @sign: motion vector sign counts.
+ * @classes: motion vector class counts.
+ * @class0: motion vector class0 bit counts.
+ * @bits: motion vector bits counts.
+ * @class0_fp: motion vector class0 fractional bit counts.
+ * @fp: motion vector fractional bit counts.
+ * @class0_hp: motion vector class0 high precision fractional bit counts.
+ * @hp: motion vector high precision fractional bit counts.
+ */
+struct vdec_vp9_slice_counts_map {
+ u32 skip[3][2];
+ u32 y_mode[4][10];
+ u32 filter[4][3];
+ u32 sign[2][2];
+ u32 classes[2][11];
+ u32 class0[2][2];
+ u32 bits[2][10][2];
+ u32 class0_fp[2][2][4];
+ u32 fp[2][4];
+ u32 class0_hp[2][2];
+ u32 hp[2][2];
+};
+
+/*
+ * struct vdec_vp9_slice_uncompressed_header - vp9 uncompressed header syntax
+ * used for decoding
+ */
+struct vdec_vp9_slice_uncompressed_header {
+ u8 profile;
+ u8 last_frame_type;
+ u8 frame_type;
+
+ u8 last_show_frame;
+ u8 show_frame;
+ u8 error_resilient_mode;
+
+ u8 bit_depth;
+ u8 padding0[1];
+ u16 last_frame_width;
+ u16 last_frame_height;
+ u16 frame_width;
+ u16 frame_height;
+
+ u8 intra_only;
+ u8 reset_frame_context;
+ u8 ref_frame_sign_bias[4];
+ u8 allow_high_precision_mv;
+ u8 interpolation_filter;
+
+ u8 refresh_frame_context;
+ u8 frame_parallel_decoding_mode;
+ u8 frame_context_idx;
+
+ /* loop_filter_params */
+ u8 loop_filter_level;
+ u8 loop_filter_sharpness;
+ u8 loop_filter_delta_enabled;
+ s8 loop_filter_ref_deltas[4];
+ s8 loop_filter_mode_deltas[2];
+
+ /* quantization_params */
+ u8 base_q_idx;
+ s8 delta_q_y_dc;
+ s8 delta_q_uv_dc;
+ s8 delta_q_uv_ac;
+
+ /* segmentation_params */
+ u8 segmentation_enabled;
+ u8 segmentation_update_map;
+ u8 segmentation_tree_probs[7];
+ u8 padding1[1];
+ u8 segmentation_temporal_udpate;
+ u8 segmentation_pred_prob[3];
+ u8 segmentation_update_data;
+ u8 segmentation_abs_or_delta_update;
+ u8 feature_enabled[8];
+ s16 feature_value[8][4];
+
+ /* tile_info */
+ u8 tile_cols_log2;
+ u8 tile_rows_log2;
+ u8 padding2[2];
+
+ u16 uncompressed_header_size;
+ u16 header_size_in_bytes;
+
+ /* LAT OUT, CORE IN */
+ u32 dequant[8][4];
+};
+
+/*
+ * struct vdec_vp9_slice_compressed_header - vp9 compressed header syntax
+ * used for decoding.
+ */
+struct vdec_vp9_slice_compressed_header {
+ u8 tx_mode;
+ u8 ref_mode;
+ u8 comp_fixed_ref;
+ u8 comp_var_ref[2];
+ u8 padding[3];
+};
+
+/*
+ * struct vdec_vp9_slice_tiles - vp9 tile syntax
+ */
+struct vdec_vp9_slice_tiles {
+ u32 size[4][64];
+ u32 mi_rows[4];
+ u32 mi_cols[64];
+ u8 actual_rows;
+ u8 padding[7];
+};
+
+/*
+ * struct vdec_vp9_slice_reference - vp9 reference frame information
+ */
+struct vdec_vp9_slice_reference {
+ u16 frame_width;
+ u16 frame_height;
+ u8 bit_depth;
+ u8 subsampling_x;
+ u8 subsampling_y;
+ u8 padding;
+};
+
+/*
+ * struct vdec_vp9_slice_frame - vp9 syntax used for decoding
+ */
+struct vdec_vp9_slice_frame {
+ struct vdec_vp9_slice_uncompressed_header uh;
+ struct vdec_vp9_slice_compressed_header ch;
+ struct vdec_vp9_slice_tiles tiles;
+ struct vdec_vp9_slice_reference ref[3];
+};
+
+/*
+ * struct vdec_vp9_slice_init_vsi - VSI used to initialize instance
+ */
+struct vdec_vp9_slice_init_vsi {
+ unsigned int architecture;
+ unsigned int reserved;
+ u64 core_vsi;
+ /* default frame context's position in MicroP */
+ u64 default_frame_ctx;
+};
+
+/*
+ * struct vdec_vp9_slice_mem - memory address and size
+ */
+struct vdec_vp9_slice_mem {
+ union {
+ u64 buf;
+ dma_addr_t dma_addr;
+ };
+ union {
+ size_t size;
+ dma_addr_t dma_addr_end;
+ u64 padding;
+ };
+};
+
+/*
+ * struct vdec_vp9_slice_bs - input buffer for decoding
+ */
+struct vdec_vp9_slice_bs {
+ struct vdec_vp9_slice_mem buf;
+ struct vdec_vp9_slice_mem frame;
+};
+
+/*
+ * struct vdec_vp9_slice_fb - frame buffer for decoding
+ */
+struct vdec_vp9_slice_fb {
+ struct vdec_vp9_slice_mem y;
+ struct vdec_vp9_slice_mem c;
+};
+
+/*
+ * struct vdec_vp9_slice_state - decoding state
+ */
+struct vdec_vp9_slice_state {
+ int err;
+ unsigned int full;
+ unsigned int timeout;
+ unsigned int perf;
+
+ unsigned int crc[12];
+};
+
+/**
+ * struct vdec_vp9_slice_vsi - exchange decoding information
+ * between Main CPU and MicroP
+ *
+ * @bs: input buffer
+ * @fb: output buffer
+ * @ref: 3 reference buffers
+ * @mv: mv working buffer
+ * @seg: segmentation working buffer
+ * @tile: tile buffer
+ * @prob: prob table buffer, used to set/update prob table
+ * @counts: counts table buffer, used to update prob table
+ * @ube: general buffer
+ * @trans: trans buffer position in general buffer
+ * @err_map: error buffer
+ * @row_info: row info buffer
+ * @frame: decoding syntax
+ * @state: decoding state
+ */
+struct vdec_vp9_slice_vsi {
+ /* used in LAT stage */
+ struct vdec_vp9_slice_bs bs;
+ /* used in Core stage */
+ struct vdec_vp9_slice_fb fb;
+ struct vdec_vp9_slice_fb ref[3];
+
+ struct vdec_vp9_slice_mem mv[2];
+ struct vdec_vp9_slice_mem seg[2];
+ struct vdec_vp9_slice_mem tile;
+ struct vdec_vp9_slice_mem prob;
+ struct vdec_vp9_slice_mem counts;
+
+ /* LAT stage's output, Core stage's input */
+ struct vdec_vp9_slice_mem ube;
+ struct vdec_vp9_slice_mem trans;
+ struct vdec_vp9_slice_mem err_map;
+ struct vdec_vp9_slice_mem row_info;
+
+ /* decoding parameters */
+ struct vdec_vp9_slice_frame frame;
+
+ struct vdec_vp9_slice_state state;
+};
+
+/**
+ * struct vdec_vp9_slice_pfc - per-frame context that contains a local vsi.
+ * pass it from lat to core
+ *
+ * @vsi: local vsi. copy to/from remote vsi before/after decoding
+ * @ref_idx: reference buffer index
+ * @seq: picture sequence
+ * @state: decoding state
+ */
+struct vdec_vp9_slice_pfc {
+ struct vdec_vp9_slice_vsi vsi;
+
+ u64 ref_idx[3];
+
+ int seq;
+
+ /* LAT/Core CRC */
+ struct vdec_vp9_slice_state state[2];
+};
+
+/*
+ * enum vdec_vp9_slice_resolution_level
+ */
+enum vdec_vp9_slice_resolution_level {
+ VP9_RES_NONE,
+ VP9_RES_FHD,
+ VP9_RES_4K,
+ VP9_RES_8K,
+};
+
+/*
+ * struct vdec_vp9_slice_ref - picture's width & height should kept
+ * for later decoding as reference picture
+ */
+struct vdec_vp9_slice_ref {
+ unsigned int width;
+ unsigned int height;
+};
+
+/**
+ * struct vdec_vp9_slice_instance - represent one vp9 instance
+ *
+ * @ctx: pointer to codec's context
+ * @vpu: VPU instance
+ * @seq: global picture sequence
+ * @level: level of current resolution
+ * @width: width of last picture
+ * @height: height of last picture
+ * @frame_type: frame_type of last picture
+ * @irq: irq to Main CPU or MicroP
+ * @show_frame: show_frame of last picture
+ * @dpb: picture information (width/height) for reference
+ * @mv: mv working buffer
+ * @seg: segmentation working buffer
+ * @tile: tile buffer
+ * @prob: prob table buffer, used to set/update prob table
+ * @counts: counts table buffer, used to update prob table
+ * @frame_ctx: 4 frame context according to VP9 Spec
+ * @frame_ctx_helper: 4 frame context according to newest kernel spec
+ * @dirty: state of each frame context
+ * @init_vsi: vsi used for initialized VP9 instance
+ * @vsi: vsi used for decoding/flush ...
+ * @core_vsi: vsi used for Core stage
+ *
+ * @sc_pfc: per frame context single core
+ * @counts_map: used map to counts_helper
+ * @counts_helper: counts table according to newest kernel spec
+ */
+struct vdec_vp9_slice_instance {
+ struct mtk_vcodec_dec_ctx *ctx;
+ struct vdec_vpu_inst vpu;
+
+ int seq;
+
+ enum vdec_vp9_slice_resolution_level level;
+
+ /* for resolution change and get_pic_info */
+ unsigned int width;
+ unsigned int height;
+
+ /* for last_frame_type */
+ unsigned int frame_type;
+ unsigned int irq;
+
+ unsigned int show_frame;
+
+ /* maintain vp9 reference frame state */
+ struct vdec_vp9_slice_ref dpb[VB2_MAX_FRAME];
+
+ /*
+ * normal working buffers
+ * mv[0]/seg[0]/tile/prob/counts is used for LAT
+ * mv[1]/seg[1] is used for CORE
+ */
+ struct mtk_vcodec_mem mv[2];
+ struct mtk_vcodec_mem seg[2];
+ struct mtk_vcodec_mem tile;
+ struct mtk_vcodec_mem prob;
+ struct mtk_vcodec_mem counts;
+
+ /* 4 prob tables */
+ struct vdec_vp9_slice_frame_ctx frame_ctx[4];
+ /*4 helper tables */
+ struct v4l2_vp9_frame_context frame_ctx_helper;
+ unsigned char dirty[4];
+
+ /* MicroP vsi */
+ union {
+ struct vdec_vp9_slice_init_vsi *init_vsi;
+ struct vdec_vp9_slice_vsi *vsi;
+ };
+ struct vdec_vp9_slice_vsi *core_vsi;
+
+ struct vdec_vp9_slice_pfc sc_pfc;
+ struct vdec_vp9_slice_counts_map counts_map;
+ struct v4l2_vp9_frame_symbol_counts counts_helper;
+};
+
+/*
+ * all VP9 instances could share this default frame context.
+ */
+static struct vdec_vp9_slice_frame_ctx *vdec_vp9_slice_default_frame_ctx;
+static DEFINE_MUTEX(vdec_vp9_slice_frame_ctx_lock);
+
+static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf);
+
+static int vdec_vp9_slice_init_default_frame_ctx(struct vdec_vp9_slice_instance *instance)
+{
+ struct vdec_vp9_slice_frame_ctx *remote_frame_ctx;
+ struct vdec_vp9_slice_frame_ctx *frame_ctx;
+ struct mtk_vcodec_dec_ctx *ctx;
+ struct vdec_vp9_slice_init_vsi *vsi;
+ int ret = 0;
+
+ ctx = instance->ctx;
+ vsi = instance->vpu.vsi;
+ if (!ctx || !vsi)
+ return -EINVAL;
+
+ remote_frame_ctx = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler,
+ (u32)vsi->default_frame_ctx);
+ if (!remote_frame_ctx) {
+ mtk_vdec_err(ctx, "failed to map default frame ctx\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&vdec_vp9_slice_frame_ctx_lock);
+ if (vdec_vp9_slice_default_frame_ctx)
+ goto out;
+
+ frame_ctx = kmemdup(remote_frame_ctx, sizeof(*frame_ctx), GFP_KERNEL);
+ if (!frame_ctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ vdec_vp9_slice_default_frame_ctx = frame_ctx;
+
+out:
+ mutex_unlock(&vdec_vp9_slice_frame_ctx_lock);
+
+ return ret;
+}
+
+static int vdec_vp9_slice_alloc_working_buffer(struct vdec_vp9_slice_instance *instance,
+ struct vdec_vp9_slice_vsi *vsi)
+{
+ struct mtk_vcodec_dec_ctx *ctx = instance->ctx;
+ enum vdec_vp9_slice_resolution_level level;
+ /* super blocks */
+ unsigned int max_sb_w;
+ unsigned int max_sb_h;
+ unsigned int max_w;
+ unsigned int max_h;
+ unsigned int w;
+ unsigned int h;
+ size_t size;
+ int ret;
+ int i;
+
+ w = vsi->frame.uh.frame_width;
+ h = vsi->frame.uh.frame_height;
+
+ if (w > VCODEC_DEC_4K_CODED_WIDTH ||
+ h > VCODEC_DEC_4K_CODED_HEIGHT) {
+ return -EINVAL;
+ } else if (w > MTK_VDEC_MAX_W || h > MTK_VDEC_MAX_H) {
+ /* 4K */
+ level = VP9_RES_4K;
+ max_w = VCODEC_DEC_4K_CODED_WIDTH;
+ max_h = VCODEC_DEC_4K_CODED_HEIGHT;
+ } else {
+ /* FHD */
+ level = VP9_RES_FHD;
+ max_w = MTK_VDEC_MAX_W;
+ max_h = MTK_VDEC_MAX_H;
+ }
+
+ if (level == instance->level)
+ return 0;
+
+ mtk_vdec_debug(ctx, "resolution level changed, from %u to %u, %ux%u",
+ instance->level, level, w, h);
+
+ max_sb_w = DIV_ROUND_UP(max_w, 64);
+ max_sb_h = DIV_ROUND_UP(max_h, 64);
+ ret = -ENOMEM;
+
+ /*
+ * Lat-flush must wait core idle, otherwise core will
+ * use released buffers
+ */
+
+ size = (max_sb_w * max_sb_h + 2) * 576;
+ for (i = 0; i < 2; i++) {
+ if (instance->mv[i].va)
+ mtk_vcodec_mem_free(ctx, &instance->mv[i]);
+ instance->mv[i].size = size;
+ if (mtk_vcodec_mem_alloc(ctx, &instance->mv[i]))
+ goto err;
+ }
+
+ size = (max_sb_w * max_sb_h * 32) + 256;
+ for (i = 0; i < 2; i++) {
+ if (instance->seg[i].va)
+ mtk_vcodec_mem_free(ctx, &instance->seg[i]);
+ instance->seg[i].size = size;
+ if (mtk_vcodec_mem_alloc(ctx, &instance->seg[i]))
+ goto err;
+ }
+
+ if (!instance->tile.va) {
+ instance->tile.size = VP9_TILE_BUF_SIZE;
+ if (mtk_vcodec_mem_alloc(ctx, &instance->tile))
+ goto err;
+ }
+
+ if (!instance->prob.va) {
+ instance->prob.size = VP9_PROB_BUF_SIZE;
+ if (mtk_vcodec_mem_alloc(ctx, &instance->prob))
+ goto err;
+ }
+
+ if (!instance->counts.va) {
+ instance->counts.size = VP9_COUNTS_BUF_SIZE;
+ if (mtk_vcodec_mem_alloc(ctx, &instance->counts))
+ goto err;
+ }
+
+ instance->level = level;
+ return 0;
+
+err:
+ instance->level = VP9_RES_NONE;
+ return ret;
+}
+
+static void vdec_vp9_slice_free_working_buffer(struct vdec_vp9_slice_instance *instance)
+{
+ struct mtk_vcodec_dec_ctx *ctx = instance->ctx;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(instance->mv); i++) {
+ if (instance->mv[i].va)
+ mtk_vcodec_mem_free(ctx, &instance->mv[i]);
+ }
+ for (i = 0; i < ARRAY_SIZE(instance->seg); i++) {
+ if (instance->seg[i].va)
+ mtk_vcodec_mem_free(ctx, &instance->seg[i]);
+ }
+ if (instance->tile.va)
+ mtk_vcodec_mem_free(ctx, &instance->tile);
+ if (instance->prob.va)
+ mtk_vcodec_mem_free(ctx, &instance->prob);
+ if (instance->counts.va)
+ mtk_vcodec_mem_free(ctx, &instance->counts);
+
+ instance->level = VP9_RES_NONE;
+}
+
+static void vdec_vp9_slice_vsi_from_remote(struct vdec_vp9_slice_vsi *vsi,
+ struct vdec_vp9_slice_vsi *remote_vsi,
+ int skip)
+{
+ struct vdec_vp9_slice_frame *rf;
+ struct vdec_vp9_slice_frame *f;
+
+ /*
+ * compressed header
+ * dequant
+ * buffer position
+ * decode state
+ */
+ if (!skip) {
+ rf = &remote_vsi->frame;
+ f = &vsi->frame;
+ memcpy(&f->ch, &rf->ch, sizeof(f->ch));
+ memcpy(&f->uh.dequant, &rf->uh.dequant, sizeof(f->uh.dequant));
+ memcpy(&vsi->trans, &remote_vsi->trans, sizeof(vsi->trans));
+ }
+
+ memcpy(&vsi->state, &remote_vsi->state, sizeof(vsi->state));
+}
+
+static void vdec_vp9_slice_vsi_to_remote(struct vdec_vp9_slice_vsi *vsi,
+ struct vdec_vp9_slice_vsi *remote_vsi)
+{
+ memcpy(remote_vsi, vsi, sizeof(*vsi));
+}
+
+static int vdec_vp9_slice_tile_offset(int idx, int mi_num, int tile_log2)
+{
+ int sbs = (mi_num + 7) >> 3;
+ int offset = ((idx * sbs) >> tile_log2) << 3;
+
+ return min(offset, mi_num);
+}
+
+static
+int vdec_vp9_slice_setup_single_from_src_to_dst(struct vdec_vp9_slice_instance *instance)
+{
+ struct vb2_v4l2_buffer *src;
+ struct vb2_v4l2_buffer *dst;
+
+ src = v4l2_m2m_next_src_buf(instance->ctx->m2m_ctx);
+ if (!src)
+ return -EINVAL;
+
+ dst = v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx);
+ if (!dst)
+ return -EINVAL;
+
+ v4l2_m2m_buf_copy_metadata(src, dst, true);
+
+ return 0;
+}
+
+static int vdec_vp9_slice_setup_lat_from_src_buf(struct vdec_vp9_slice_instance *instance,
+ struct vdec_lat_buf *lat_buf)
+{
+ struct vb2_v4l2_buffer *src;
+ struct vb2_v4l2_buffer *dst;
+
+ src = v4l2_m2m_next_src_buf(instance->ctx->m2m_ctx);
+ if (!src)
+ return -EINVAL;
+
+ lat_buf->src_buf_req = src->vb2_buf.req_obj.req;
+
+ dst = &lat_buf->ts_info;
+ v4l2_m2m_buf_copy_metadata(src, dst, true);
+ return 0;
+}
+
+static void vdec_vp9_slice_setup_hdr(struct vdec_vp9_slice_instance *instance,
+ struct vdec_vp9_slice_uncompressed_header *uh,
+ struct v4l2_ctrl_vp9_frame *hdr)
+{
+ int i;
+
+ uh->profile = hdr->profile;
+ uh->last_frame_type = instance->frame_type;
+ uh->frame_type = !HDR_FLAG(KEY_FRAME);
+ uh->last_show_frame = instance->show_frame;
+ uh->show_frame = HDR_FLAG(SHOW_FRAME);
+ uh->error_resilient_mode = HDR_FLAG(ERROR_RESILIENT);
+ uh->bit_depth = hdr->bit_depth;
+ uh->last_frame_width = instance->width;
+ uh->last_frame_height = instance->height;
+ uh->frame_width = hdr->frame_width_minus_1 + 1;
+ uh->frame_height = hdr->frame_height_minus_1 + 1;
+ uh->intra_only = HDR_FLAG(INTRA_ONLY);
+ /* map v4l2 enum to values defined in VP9 spec for firmware */
+ switch (hdr->reset_frame_context) {
+ case V4L2_VP9_RESET_FRAME_CTX_NONE:
+ uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_NONE0;
+ break;
+ case V4L2_VP9_RESET_FRAME_CTX_SPEC:
+ uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_SPEC;
+ break;
+ case V4L2_VP9_RESET_FRAME_CTX_ALL:
+ uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_ALL;
+ break;
+ default:
+ uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_NONE0;
+ break;
+ }
+ /*
+ * ref_frame_sign_bias specifies the intended direction
+ * of the motion vector in time for each reference frame.
+ * - INTRA_FRAME = 0,
+ * - LAST_FRAME = 1,
+ * - GOLDEN_FRAME = 2,
+ * - ALTREF_FRAME = 3,
+ * ref_frame_sign_bias[INTRA_FRAME] is always 0
+ * and VDA only passes another 3 directions
+ */
+ uh->ref_frame_sign_bias[0] = 0;
+ for (i = 0; i < 3; i++)
+ uh->ref_frame_sign_bias[i + 1] =
+ !!(hdr->ref_frame_sign_bias & (1 << i));
+ uh->allow_high_precision_mv = HDR_FLAG(ALLOW_HIGH_PREC_MV);
+ uh->interpolation_filter = hdr->interpolation_filter;
+ uh->refresh_frame_context = HDR_FLAG(REFRESH_FRAME_CTX);
+ uh->frame_parallel_decoding_mode = HDR_FLAG(PARALLEL_DEC_MODE);
+ uh->frame_context_idx = hdr->frame_context_idx;
+
+ /* tile info */
+ uh->tile_cols_log2 = hdr->tile_cols_log2;
+ uh->tile_rows_log2 = hdr->tile_rows_log2;
+
+ uh->uncompressed_header_size = hdr->uncompressed_header_size;
+ uh->header_size_in_bytes = hdr->compressed_header_size;
+}
+
+static void vdec_vp9_slice_setup_frame_ctx(struct vdec_vp9_slice_instance *instance,
+ struct vdec_vp9_slice_uncompressed_header *uh,
+ struct v4l2_ctrl_vp9_frame *hdr)
+{
+ int error_resilient_mode;
+ int reset_frame_context;
+ int key_frame;
+ int intra_only;
+ int i;
+
+ key_frame = HDR_FLAG(KEY_FRAME);
+ intra_only = HDR_FLAG(INTRA_ONLY);
+ error_resilient_mode = HDR_FLAG(ERROR_RESILIENT);
+ reset_frame_context = uh->reset_frame_context;
+
+ /*
+ * according to "6.2 Uncompressed header syntax" in
+ * "VP9 Bitstream & Decoding Process Specification",
+ * reset @frame_context_idx when (FrameIsIntra || error_resilient_mode)
+ */
+ if (key_frame || intra_only || error_resilient_mode) {
+ /*
+ * @reset_frame_context specifies
+ * whether the frame context should be
+ * reset to default values:
+ * 0 or 1 means do not reset any frame context
+ * 2 resets just the context specified in the frame header
+ * 3 resets all contexts
+ */
+ if (key_frame || error_resilient_mode ||
+ reset_frame_context == 3) {
+ /* use default table */
+ for (i = 0; i < 4; i++)
+ instance->dirty[i] = 0;
+ } else if (reset_frame_context == 2) {
+ instance->dirty[uh->frame_context_idx] = 0;
+ }
+ uh->frame_context_idx = 0;
+ }
+}
+
+static void vdec_vp9_slice_setup_loop_filter(struct vdec_vp9_slice_uncompressed_header *uh,
+ struct v4l2_vp9_loop_filter *lf)
+{
+ int i;
+
+ uh->loop_filter_level = lf->level;
+ uh->loop_filter_sharpness = lf->sharpness;
+ uh->loop_filter_delta_enabled = LF_FLAG(DELTA_ENABLED);
+ for (i = 0; i < 4; i++)
+ uh->loop_filter_ref_deltas[i] = lf->ref_deltas[i];
+ for (i = 0; i < 2; i++)
+ uh->loop_filter_mode_deltas[i] = lf->mode_deltas[i];
+}
+
+static void vdec_vp9_slice_setup_quantization(struct vdec_vp9_slice_uncompressed_header *uh,
+ struct v4l2_vp9_quantization *quant)
+{
+ uh->base_q_idx = quant->base_q_idx;
+ uh->delta_q_y_dc = quant->delta_q_y_dc;
+ uh->delta_q_uv_dc = quant->delta_q_uv_dc;
+ uh->delta_q_uv_ac = quant->delta_q_uv_ac;
+}
+
+static void vdec_vp9_slice_setup_segmentation(struct vdec_vp9_slice_uncompressed_header *uh,
+ struct v4l2_vp9_segmentation *seg)
+{
+ int i;
+ int j;
+
+ uh->segmentation_enabled = SEG_FLAG(ENABLED);
+ uh->segmentation_update_map = SEG_FLAG(UPDATE_MAP);
+ for (i = 0; i < 7; i++)
+ uh->segmentation_tree_probs[i] = seg->tree_probs[i];
+ uh->segmentation_temporal_udpate = SEG_FLAG(TEMPORAL_UPDATE);
+ for (i = 0; i < 3; i++)
+ uh->segmentation_pred_prob[i] = seg->pred_probs[i];
+ uh->segmentation_update_data = SEG_FLAG(UPDATE_DATA);
+ uh->segmentation_abs_or_delta_update = SEG_FLAG(ABS_OR_DELTA_UPDATE);
+ for (i = 0; i < 8; i++) {
+ uh->feature_enabled[i] = seg->feature_enabled[i];
+ for (j = 0; j < 4; j++)
+ uh->feature_value[i][j] = seg->feature_data[i][j];
+ }
+}
+
+static int vdec_vp9_slice_setup_tile(struct vdec_vp9_slice_vsi *vsi,
+ struct v4l2_ctrl_vp9_frame *hdr)
+{
+ unsigned int rows_log2;
+ unsigned int cols_log2;
+ unsigned int rows;
+ unsigned int cols;
+ unsigned int mi_rows;
+ unsigned int mi_cols;
+ struct vdec_vp9_slice_tiles *tiles;
+ int offset;
+ int start;
+ int end;
+ int i;
+
+ rows_log2 = hdr->tile_rows_log2;
+ cols_log2 = hdr->tile_cols_log2;
+ rows = 1 << rows_log2;
+ cols = 1 << cols_log2;
+ tiles = &vsi->frame.tiles;
+ tiles->actual_rows = 0;
+
+ if (rows > 4 || cols > 64)
+ return -EINVAL;
+
+ /* setup mi rows/cols information */
+ mi_rows = (hdr->frame_height_minus_1 + 1 + 7) >> 3;
+ mi_cols = (hdr->frame_width_minus_1 + 1 + 7) >> 3;
+
+ for (i = 0; i < rows; i++) {
+ start = vdec_vp9_slice_tile_offset(i, mi_rows, rows_log2);
+ end = vdec_vp9_slice_tile_offset(i + 1, mi_rows, rows_log2);
+ offset = end - start;
+ tiles->mi_rows[i] = (offset + 7) >> 3;
+ if (tiles->mi_rows[i])
+ tiles->actual_rows++;
+ }
+
+ for (i = 0; i < cols; i++) {
+ start = vdec_vp9_slice_tile_offset(i, mi_cols, cols_log2);
+ end = vdec_vp9_slice_tile_offset(i + 1, mi_cols, cols_log2);
+ offset = end - start;
+ tiles->mi_cols[i] = (offset + 7) >> 3;
+ }
+
+ return 0;
+}
+
+static void vdec_vp9_slice_setup_state(struct vdec_vp9_slice_vsi *vsi)
+{
+ memset(&vsi->state, 0, sizeof(vsi->state));
+}
+
+static void vdec_vp9_slice_setup_ref_idx(struct vdec_vp9_slice_pfc *pfc,
+ struct v4l2_ctrl_vp9_frame *hdr)
+{
+ pfc->ref_idx[0] = hdr->last_frame_ts;
+ pfc->ref_idx[1] = hdr->golden_frame_ts;
+ pfc->ref_idx[2] = hdr->alt_frame_ts;
+}
+
+static int vdec_vp9_slice_setup_pfc(struct vdec_vp9_slice_instance *instance,
+ struct vdec_vp9_slice_pfc *pfc)
+{
+ struct v4l2_ctrl_vp9_frame *hdr;
+ struct vdec_vp9_slice_uncompressed_header *uh;
+ struct v4l2_ctrl *hdr_ctrl;
+ struct vdec_vp9_slice_vsi *vsi;
+ int ret;
+
+ /* frame header */
+ hdr_ctrl = v4l2_ctrl_find(&instance->ctx->ctrl_hdl, V4L2_CID_STATELESS_VP9_FRAME);
+ if (!hdr_ctrl || !hdr_ctrl->p_cur.p)
+ return -EINVAL;
+
+ hdr = hdr_ctrl->p_cur.p;
+ vsi = &pfc->vsi;
+ uh = &vsi->frame.uh;
+
+ /* setup vsi information */
+ vdec_vp9_slice_setup_hdr(instance, uh, hdr);
+ vdec_vp9_slice_setup_frame_ctx(instance, uh, hdr);
+ vdec_vp9_slice_setup_loop_filter(uh, &hdr->lf);
+ vdec_vp9_slice_setup_quantization(uh, &hdr->quant);
+ vdec_vp9_slice_setup_segmentation(uh, &hdr->seg);
+ ret = vdec_vp9_slice_setup_tile(vsi, hdr);
+ if (ret)
+ return ret;
+ vdec_vp9_slice_setup_state(vsi);
+
+ /* core stage needs buffer index to get ref y/c ... */
+ vdec_vp9_slice_setup_ref_idx(pfc, hdr);
+
+ pfc->seq = instance->seq;
+ instance->seq++;
+
+ return 0;
+}
+
+static int vdec_vp9_slice_setup_lat_buffer(struct vdec_vp9_slice_instance *instance,
+ struct vdec_vp9_slice_vsi *vsi,
+ struct mtk_vcodec_mem *bs,
+ struct vdec_lat_buf *lat_buf)
+{
+ int i;
+
+ vsi->bs.buf.dma_addr = bs->dma_addr;
+ vsi->bs.buf.size = bs->size;
+ vsi->bs.frame.dma_addr = bs->dma_addr;
+ vsi->bs.frame.size = bs->size;
+
+ for (i = 0; i < 2; i++) {
+ vsi->mv[i].dma_addr = instance->mv[i].dma_addr;
+ vsi->mv[i].size = instance->mv[i].size;
+ }
+ for (i = 0; i < 2; i++) {
+ vsi->seg[i].dma_addr = instance->seg[i].dma_addr;
+ vsi->seg[i].size = instance->seg[i].size;
+ }
+ vsi->tile.dma_addr = instance->tile.dma_addr;
+ vsi->tile.size = instance->tile.size;
+ vsi->prob.dma_addr = instance->prob.dma_addr;
+ vsi->prob.size = instance->prob.size;
+ vsi->counts.dma_addr = instance->counts.dma_addr;
+ vsi->counts.size = instance->counts.size;
+
+ vsi->ube.dma_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr;
+ vsi->ube.size = lat_buf->ctx->msg_queue.wdma_addr.size;
+ vsi->trans.dma_addr = lat_buf->ctx->msg_queue.wdma_wptr_addr;
+ /* used to store trans end */
+ vsi->trans.dma_addr_end = lat_buf->ctx->msg_queue.wdma_rptr_addr;
+ vsi->err_map.dma_addr = lat_buf->wdma_err_addr.dma_addr;
+ vsi->err_map.size = lat_buf->wdma_err_addr.size;
+
+ vsi->row_info.buf = 0;
+ vsi->row_info.size = 0;
+
+ return 0;
+}
+
+static int vdec_vp9_slice_setup_prob_buffer(struct vdec_vp9_slice_instance *instance,
+ struct vdec_vp9_slice_vsi *vsi)
+{
+ struct vdec_vp9_slice_frame_ctx *frame_ctx;
+ struct vdec_vp9_slice_uncompressed_header *uh;
+
+ uh = &vsi->frame.uh;
+
+ mtk_vdec_debug(instance->ctx, "ctx dirty %u idx %d\n",
+ instance->dirty[uh->frame_context_idx],
+ uh->frame_context_idx);
+
+ if (instance->dirty[uh->frame_context_idx])
+ frame_ctx = &instance->frame_ctx[uh->frame_context_idx];
+ else
+ frame_ctx = vdec_vp9_slice_default_frame_ctx;
+ memcpy(instance->prob.va, frame_ctx, sizeof(*frame_ctx));
+
+ return 0;
+}
+
+static void vdec_vp9_slice_setup_seg_buffer(struct vdec_vp9_slice_instance *instance,
+ struct vdec_vp9_slice_vsi *vsi,
+ struct mtk_vcodec_mem *buf)
+{
+ struct vdec_vp9_slice_uncompressed_header *uh;
+
+ /* reset segment buffer */
+ uh = &vsi->frame.uh;
+ if (uh->frame_type == 0 ||
+ uh->intra_only ||
+ uh->error_resilient_mode ||
+ uh->frame_width != instance->width ||
+ uh->frame_height != instance->height) {
+ mtk_vdec_debug(instance->ctx, "reset seg\n");
+ memset(buf->va, 0, buf->size);
+ }
+}
+
+/*
+ * parse tiles according to `6.4 Decode tiles syntax`
+ * in "vp9-bitstream-specification"
+ *
+ * frame contains uncompress header, compressed header and several tiles.
+ * this function parses tiles' position and size, stores them to tile buffer
+ * for decoding.
+ */
+static int vdec_vp9_slice_setup_tile_buffer(struct vdec_vp9_slice_instance *instance,
+ struct vdec_vp9_slice_vsi *vsi,
+ struct mtk_vcodec_mem *bs)
+{
+ struct vdec_vp9_slice_uncompressed_header *uh;
+ unsigned int rows_log2;
+ unsigned int cols_log2;
+ unsigned int rows;
+ unsigned int cols;
+ unsigned int mi_row;
+ unsigned int mi_col;
+ unsigned int offset;
+ unsigned int pa;
+ unsigned int size;
+ struct vdec_vp9_slice_tiles *tiles;
+ unsigned char *pos;
+ unsigned char *end;
+ unsigned char *va;
+ unsigned int *tb;
+ int i;
+ int j;
+
+ uh = &vsi->frame.uh;
+ rows_log2 = uh->tile_rows_log2;
+ cols_log2 = uh->tile_cols_log2;
+ rows = 1 << rows_log2;
+ cols = 1 << cols_log2;
+
+ if (rows > 4 || cols > 64) {
+ mtk_vdec_err(instance->ctx, "tile_rows %u tile_cols %u\n", rows, cols);
+ return -EINVAL;
+ }
+
+ offset = uh->uncompressed_header_size +
+ uh->header_size_in_bytes;
+ if (bs->size <= offset) {
+ mtk_vdec_err(instance->ctx, "bs size %zu tile offset %u\n", bs->size, offset);
+ return -EINVAL;
+ }
+
+ tiles = &vsi->frame.tiles;
+ /* setup tile buffer */
+
+ va = (unsigned char *)bs->va;
+ pos = va + offset;
+ end = va + bs->size;
+ /* truncated */
+ pa = (unsigned int)bs->dma_addr + offset;
+ tb = instance->tile.va;
+ for (i = 0; i < rows; i++) {
+ for (j = 0; j < cols; j++) {
+ if (i == rows - 1 &&
+ j == cols - 1) {
+ size = (unsigned int)(end - pos);
+ } else {
+ if (end - pos < 4)
+ return -EINVAL;
+
+ size = (pos[0] << 24) | (pos[1] << 16) |
+ (pos[2] << 8) | pos[3];
+ pos += 4;
+ pa += 4;
+ offset += 4;
+ if (end - pos < size)
+ return -EINVAL;
+ }
+ tiles->size[i][j] = size;
+ if (tiles->mi_rows[i]) {
+ *tb++ = (size << 3) + ((offset << 3) & 0x7f);
+ *tb++ = pa & ~0xf;
+ *tb++ = (pa << 3) & 0x7f;
+ mi_row = (tiles->mi_rows[i] - 1) & 0x1ff;
+ mi_col = (tiles->mi_cols[j] - 1) & 0x3f;
+ *tb++ = (mi_row << 6) + mi_col;
+ }
+ pos += size;
+ pa += size;
+ offset += size;
+ }
+ }
+
+ return 0;
+}
+
+static int vdec_vp9_slice_setup_lat(struct vdec_vp9_slice_instance *instance,
+ struct mtk_vcodec_mem *bs,
+ struct vdec_lat_buf *lat_buf,
+ struct vdec_vp9_slice_pfc *pfc)
+{
+ struct vdec_vp9_slice_vsi *vsi = &pfc->vsi;
+ int ret;
+
+ ret = vdec_vp9_slice_setup_lat_from_src_buf(instance, lat_buf);
+ if (ret)
+ goto err;
+
+ ret = vdec_vp9_slice_setup_pfc(instance, pfc);
+ if (ret)
+ goto err;
+
+ ret = vdec_vp9_slice_alloc_working_buffer(instance, vsi);
+ if (ret)
+ goto err;
+
+ ret = vdec_vp9_slice_setup_lat_buffer(instance, vsi, bs, lat_buf);
+ if (ret)
+ goto err;
+
+ vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[0]);
+
+ /* setup prob/tile buffers for LAT */
+
+ ret = vdec_vp9_slice_setup_prob_buffer(instance, vsi);
+ if (ret)
+ goto err;
+
+ ret = vdec_vp9_slice_setup_tile_buffer(instance, vsi, bs);
+ if (ret)
+ goto err;
+
+ return 0;
+
+err:
+ return ret;
+}
+
+static
+void vdec_vp9_slice_map_counts_eob_coef(unsigned int i, unsigned int j, unsigned int k,
+ struct vdec_vp9_slice_frame_counts *counts,
+ struct v4l2_vp9_frame_symbol_counts *counts_helper)
+{
+ u32 l = 0, m;
+
+ /*
+ * helper eo -> mtk eo
+ * helpre e1 -> mtk c3
+ * helper c0 -> c0
+ * helper c1 -> c1
+ * helper c2 -> c2
+ */
+ for (m = 0; m < 3; m++) {
+ counts_helper->coeff[i][j][k][l][m] =
+ (u32 (*)[3]) & counts->coef_probs[i][j][k].band_0[m];
+ counts_helper->eob[i][j][k][l][m][0] =
+ &counts->eob_branch[i][j][k].band_0[m];
+ counts_helper->eob[i][j][k][l][m][1] =
+ &counts->coef_probs[i][j][k].band_0[m][3];
+ }
+
+ for (l = 1; l < 6; l++) {
+ for (m = 0; m < 6; m++) {
+ counts_helper->coeff[i][j][k][l][m] =
+ (u32 (*)[3]) & counts->coef_probs[i][j][k].band_1_5[l - 1][m];
+ counts_helper->eob[i][j][k][l][m][0] =
+ &counts->eob_branch[i][j][k].band_1_5[l - 1][m];
+ counts_helper->eob[i][j][k][l][m][1] =
+ &counts->coef_probs[i][j][k].band_1_5[l - 1][m][3];
+ }
+ }
+}
+
+static void vdec_vp9_slice_counts_map_helper(struct vdec_vp9_slice_counts_map *counts_map,
+ struct vdec_vp9_slice_frame_counts *counts,
+ struct v4l2_vp9_frame_symbol_counts *counts_helper)
+{
+ int i, j, k;
+
+ counts_helper->partition = &counts->partition;
+ counts_helper->intra_inter = &counts->intra_inter;
+ counts_helper->tx32p = &counts->tx_p32x32;
+ counts_helper->tx16p = &counts->tx_p16x16;
+ counts_helper->tx8p = &counts->tx_p8x8;
+ counts_helper->uv_mode = &counts->uv_mode;
+
+ counts_helper->comp = &counts->comp_inter;
+ counts_helper->comp_ref = &counts->comp_ref;
+ counts_helper->single_ref = &counts->single_ref;
+ counts_helper->mv_mode = &counts->inter_mode;
+ counts_helper->mv_joint = &counts->joint;
+
+ for (i = 0; i < ARRAY_SIZE(counts_map->skip); i++)
+ memcpy(counts_map->skip[i], counts->skip[i],
+ sizeof(counts_map->skip[0]));
+ counts_helper->skip = &counts_map->skip;
+
+ for (i = 0; i < ARRAY_SIZE(counts_map->y_mode); i++)
+ memcpy(counts_map->y_mode[i], counts->y_mode[i],
+ sizeof(counts_map->y_mode[0]));
+ counts_helper->y_mode = &counts_map->y_mode;
+
+ for (i = 0; i < ARRAY_SIZE(counts_map->filter); i++)
+ memcpy(counts_map->filter[i], counts->switchable_interp[i],
+ sizeof(counts_map->filter[0]));
+ counts_helper->filter = &counts_map->filter;
+
+ for (i = 0; i < ARRAY_SIZE(counts_map->sign); i++)
+ memcpy(counts_map->sign[i], counts->mvcomp[i].sign,
+ sizeof(counts_map->sign[0]));
+ counts_helper->sign = &counts_map->sign;
+
+ for (i = 0; i < ARRAY_SIZE(counts_map->classes); i++)
+ memcpy(counts_map->classes[i], counts->mvcomp[i].classes,
+ sizeof(counts_map->classes[0]));
+ counts_helper->classes = &counts_map->classes;
+
+ for (i = 0; i < ARRAY_SIZE(counts_map->class0); i++)
+ memcpy(counts_map->class0[i], counts->mvcomp[i].class0,
+ sizeof(counts_map->class0[0]));
+ counts_helper->class0 = &counts_map->class0;
+
+ for (i = 0; i < ARRAY_SIZE(counts_map->bits); i++)
+ for (j = 0; j < ARRAY_SIZE(counts_map->bits[0]); j++)
+ memcpy(counts_map->bits[i][j], counts->mvcomp[i].bits[j],
+ sizeof(counts_map->bits[0][0]));
+ counts_helper->bits = &counts_map->bits;
+
+ for (i = 0; i < ARRAY_SIZE(counts_map->class0_fp); i++)
+ for (j = 0; j < ARRAY_SIZE(counts_map->class0_fp[0]); j++)
+ memcpy(counts_map->class0_fp[i][j], counts->mvcomp[i].class0_fp[j],
+ sizeof(counts_map->class0_fp[0][0]));
+ counts_helper->class0_fp = &counts_map->class0_fp;
+
+ for (i = 0; i < ARRAY_SIZE(counts_map->fp); i++)
+ memcpy(counts_map->fp[i], counts->mvcomp[i].fp,
+ sizeof(counts_map->fp[0]));
+ counts_helper->fp = &counts_map->fp;
+
+ for (i = 0; i < ARRAY_SIZE(counts_map->class0_hp); i++)
+ memcpy(counts_map->class0_hp[i], counts->mvcomp[i].class0_hp,
+ sizeof(counts_map->class0_hp[0]));
+ counts_helper->class0_hp = &counts_map->class0_hp;
+
+ for (i = 0; i < ARRAY_SIZE(counts_map->hp); i++)
+ memcpy(counts_map->hp[i], counts->mvcomp[i].hp, sizeof(counts_map->hp[0]));
+
+ counts_helper->hp = &counts_map->hp;
+
+ for (i = 0; i < 4; i++)
+ for (j = 0; j < 2; j++)
+ for (k = 0; k < 2; k++)
+ vdec_vp9_slice_map_counts_eob_coef(i, j, k, counts, counts_helper);
+}
+
+static void vdec_vp9_slice_map_to_coef(unsigned int i, unsigned int j, unsigned int k,
+ struct vdec_vp9_slice_frame_ctx *frame_ctx,
+ struct v4l2_vp9_frame_context *frame_ctx_helper)
+{
+ u32 l, m;
+
+ for (l = 0; l < ARRAY_SIZE(frame_ctx_helper->coef[0][0][0]); l++) {
+ for (m = 0; m < VP9_BAND_6(l); m++) {
+ memcpy(frame_ctx_helper->coef[i][j][k][l][m],
+ frame_ctx->coef_probs[i][j][k][l].probs[m],
+ sizeof(frame_ctx_helper->coef[i][j][k][l][0]));
+ }
+ }
+}
+
+static void vdec_vp9_slice_map_from_coef(unsigned int i, unsigned int j, unsigned int k,
+ struct vdec_vp9_slice_frame_ctx *frame_ctx,
+ struct v4l2_vp9_frame_context *frame_ctx_helper)
+{
+ u32 l, m;
+
+ for (l = 0; l < ARRAY_SIZE(frame_ctx_helper->coef[0][0][0]); l++) {
+ for (m = 0; m < VP9_BAND_6(l); m++) {
+ memcpy(frame_ctx->coef_probs[i][j][k][l].probs[m],
+ frame_ctx_helper->coef[i][j][k][l][m],
+ sizeof(frame_ctx_helper->coef[i][j][k][l][0]));
+ }
+ }
+}
+
+static
+void vdec_vp9_slice_framectx_map_helper(bool frame_is_intra,
+ struct vdec_vp9_slice_frame_ctx *pre_frame_ctx,
+ struct vdec_vp9_slice_frame_ctx *frame_ctx,
+ struct v4l2_vp9_frame_context *frame_ctx_helper)
+{
+ struct v4l2_vp9_frame_mv_context *mv = &frame_ctx_helper->mv;
+ u32 i, j, k;
+
+ for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->coef); i++)
+ for (j = 0; j < ARRAY_SIZE(frame_ctx_helper->coef[0]); j++)
+ for (k = 0; k < ARRAY_SIZE(frame_ctx_helper->coef[0][0]); k++)
+ vdec_vp9_slice_map_to_coef(i, j, k, pre_frame_ctx,
+ frame_ctx_helper);
+
+ /*
+ * use previous prob when frame is not intra or
+ * we should use the prob updated by the compressed header parse
+ */
+ if (!frame_is_intra)
+ frame_ctx = pre_frame_ctx;
+
+ for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx8); i++)
+ memcpy(frame_ctx_helper->tx8[i], frame_ctx->tx_p8x8[i],
+ sizeof(frame_ctx_helper->tx8[0]));
+
+ for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx16); i++)
+ memcpy(frame_ctx_helper->tx16[i], frame_ctx->tx_p16x16[i],
+ sizeof(frame_ctx_helper->tx16[0]));
+
+ for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx32); i++)
+ memcpy(frame_ctx_helper->tx32[i], frame_ctx->tx_p32x32[i],
+ sizeof(frame_ctx_helper->tx32[0]));
+
+ memcpy(frame_ctx_helper->skip, frame_ctx->skip_probs, sizeof(frame_ctx_helper->skip));
+
+ for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->inter_mode); i++)
+ memcpy(frame_ctx_helper->inter_mode[i], frame_ctx->inter_mode_probs[i],
+ sizeof(frame_ctx_helper->inter_mode[0]));
+
+ for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->interp_filter); i++)
+ memcpy(frame_ctx_helper->interp_filter[i], frame_ctx->switch_interp_prob[i],
+ sizeof(frame_ctx_helper->interp_filter[0]));
+
+ memcpy(frame_ctx_helper->is_inter, frame_ctx->intra_inter_prob,
+ sizeof(frame_ctx_helper->is_inter));
+
+ memcpy(frame_ctx_helper->comp_mode, frame_ctx->comp_inter_prob,
+ sizeof(frame_ctx_helper->comp_mode));
+
+ for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->single_ref); i++)
+ memcpy(frame_ctx_helper->single_ref[i], frame_ctx->single_ref_prob[i],
+ sizeof(frame_ctx_helper->single_ref[0]));
+
+ memcpy(frame_ctx_helper->comp_ref, frame_ctx->comp_ref_prob,
+ sizeof(frame_ctx_helper->comp_ref));
+
+ for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->y_mode); i++)
+ memcpy(frame_ctx_helper->y_mode[i], frame_ctx->y_mode_prob[i],
+ sizeof(frame_ctx_helper->y_mode[0]));
+
+ for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->uv_mode); i++)
+ memcpy(frame_ctx_helper->uv_mode[i], frame_ctx->uv_mode_prob[i],
+ sizeof(frame_ctx_helper->uv_mode[0]));
+
+ for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->partition); i++)
+ memcpy(frame_ctx_helper->partition[i], frame_ctx->partition_prob[i],
+ sizeof(frame_ctx_helper->partition[0]));
+
+ memcpy(mv->joint, frame_ctx->joint, sizeof(mv->joint));
+
+ for (i = 0; i < ARRAY_SIZE(mv->sign); i++)
+ mv->sign[i] = frame_ctx->sign_classes[i].sign;
+
+ for (i = 0; i < ARRAY_SIZE(mv->classes); i++)
+ memcpy(mv->classes[i], frame_ctx->sign_classes[i].classes,
+ sizeof(mv->classes[i]));
+
+ for (i = 0; i < ARRAY_SIZE(mv->class0_bit); i++)
+ mv->class0_bit[i] = frame_ctx->class0_bits[i].class0[0];
+
+ for (i = 0; i < ARRAY_SIZE(mv->bits); i++)
+ memcpy(mv->bits[i], frame_ctx->class0_bits[i].bits, sizeof(mv->bits[0]));
+
+ for (i = 0; i < ARRAY_SIZE(mv->class0_fr); i++)
+ for (j = 0; j < ARRAY_SIZE(mv->class0_fr[0]); j++)
+ memcpy(mv->class0_fr[i][j], frame_ctx->class0_fp_hp[i].class0_fp[j],
+ sizeof(mv->class0_fr[0][0]));
+
+ for (i = 0; i < ARRAY_SIZE(mv->fr); i++)
+ memcpy(mv->fr[i], frame_ctx->class0_fp_hp[i].fp, sizeof(mv->fr[0]));
+
+ for (i = 0; i < ARRAY_SIZE(mv->class0_hp); i++)
+ mv->class0_hp[i] = frame_ctx->class0_fp_hp[i].class0_hp;
+
+ for (i = 0; i < ARRAY_SIZE(mv->hp); i++)
+ mv->hp[i] = frame_ctx->class0_fp_hp[i].hp;
+}
+
+static void vdec_vp9_slice_helper_map_framectx(struct v4l2_vp9_frame_context *frame_ctx_helper,
+ struct vdec_vp9_slice_frame_ctx *frame_ctx)
+{
+ struct v4l2_vp9_frame_mv_context *mv = &frame_ctx_helper->mv;
+ u32 i, j, k;
+
+ for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx8); i++)
+ memcpy(frame_ctx->tx_p8x8[i], frame_ctx_helper->tx8[i],
+ sizeof(frame_ctx_helper->tx8[0]));
+
+ for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx16); i++)
+ memcpy(frame_ctx->tx_p16x16[i], frame_ctx_helper->tx16[i],
+ sizeof(frame_ctx_helper->tx16[0]));
+
+ for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx32); i++)
+ memcpy(frame_ctx->tx_p32x32[i], frame_ctx_helper->tx32[i],
+ sizeof(frame_ctx_helper->tx32[0]));
+
+ for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->coef); i++)
+ for (j = 0; j < ARRAY_SIZE(frame_ctx_helper->coef[0]); j++)
+ for (k = 0; k < ARRAY_SIZE(frame_ctx_helper->coef[0][0]); k++)
+ vdec_vp9_slice_map_from_coef(i, j, k, frame_ctx,
+ frame_ctx_helper);
+
+ memcpy(frame_ctx->skip_probs, frame_ctx_helper->skip, sizeof(frame_ctx_helper->skip));
+
+ for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->inter_mode); i++)
+ memcpy(frame_ctx->inter_mode_probs[i], frame_ctx_helper->inter_mode[i],
+ sizeof(frame_ctx_helper->inter_mode[0]));
+
+ for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->interp_filter); i++)
+ memcpy(frame_ctx->switch_interp_prob[i], frame_ctx_helper->interp_filter[i],
+ sizeof(frame_ctx_helper->interp_filter[0]));
+
+ memcpy(frame_ctx->intra_inter_prob, frame_ctx_helper->is_inter,
+ sizeof(frame_ctx_helper->is_inter));
+
+ memcpy(frame_ctx->comp_inter_prob, frame_ctx_helper->comp_mode,
+ sizeof(frame_ctx_helper->comp_mode));
+
+ for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->single_ref); i++)
+ memcpy(frame_ctx->single_ref_prob[i], frame_ctx_helper->single_ref[i],
+ sizeof(frame_ctx_helper->single_ref[0]));
+
+ memcpy(frame_ctx->comp_ref_prob, frame_ctx_helper->comp_ref,
+ sizeof(frame_ctx_helper->comp_ref));
+
+ for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->y_mode); i++)
+ memcpy(frame_ctx->y_mode_prob[i], frame_ctx_helper->y_mode[i],
+ sizeof(frame_ctx_helper->y_mode[0]));
+
+ for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->uv_mode); i++)
+ memcpy(frame_ctx->uv_mode_prob[i], frame_ctx_helper->uv_mode[i],
+ sizeof(frame_ctx_helper->uv_mode[0]));
+
+ for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->partition); i++)
+ memcpy(frame_ctx->partition_prob[i], frame_ctx_helper->partition[i],
+ sizeof(frame_ctx_helper->partition[0]));
+
+ memcpy(frame_ctx->joint, mv->joint, sizeof(mv->joint));
+
+ for (i = 0; i < ARRAY_SIZE(mv->sign); i++)
+ frame_ctx->sign_classes[i].sign = mv->sign[i];
+
+ for (i = 0; i < ARRAY_SIZE(mv->classes); i++)
+ memcpy(frame_ctx->sign_classes[i].classes, mv->classes[i],
+ sizeof(mv->classes[i]));
+
+ for (i = 0; i < ARRAY_SIZE(mv->class0_bit); i++)
+ frame_ctx->class0_bits[i].class0[0] = mv->class0_bit[i];
+
+ for (i = 0; i < ARRAY_SIZE(mv->bits); i++)
+ memcpy(frame_ctx->class0_bits[i].bits, mv->bits[i], sizeof(mv->bits[0]));
+
+ for (i = 0; i < ARRAY_SIZE(mv->class0_fr); i++)
+ for (j = 0; j < ARRAY_SIZE(mv->class0_fr[0]); j++)
+ memcpy(frame_ctx->class0_fp_hp[i].class0_fp[j], mv->class0_fr[i][j],
+ sizeof(mv->class0_fr[0][0]));
+
+ for (i = 0; i < ARRAY_SIZE(mv->fr); i++)
+ memcpy(frame_ctx->class0_fp_hp[i].fp, mv->fr[i], sizeof(mv->fr[0]));
+
+ for (i = 0; i < ARRAY_SIZE(mv->class0_hp); i++)
+ frame_ctx->class0_fp_hp[i].class0_hp = mv->class0_hp[i];
+
+ for (i = 0; i < ARRAY_SIZE(mv->hp); i++)
+ frame_ctx->class0_fp_hp[i].hp = mv->hp[i];
+}
+
+static int vdec_vp9_slice_update_prob(struct vdec_vp9_slice_instance *instance,
+ struct vdec_vp9_slice_vsi *vsi)
+{
+ struct vdec_vp9_slice_frame_ctx *pre_frame_ctx;
+ struct v4l2_vp9_frame_context *pre_frame_ctx_helper;
+ struct vdec_vp9_slice_frame_ctx *frame_ctx;
+ struct vdec_vp9_slice_frame_counts *counts;
+ struct v4l2_vp9_frame_symbol_counts *counts_helper;
+ struct vdec_vp9_slice_uncompressed_header *uh;
+ bool frame_is_intra;
+ bool use_128;
+
+ uh = &vsi->frame.uh;
+ pre_frame_ctx = &instance->frame_ctx[uh->frame_context_idx];
+ pre_frame_ctx_helper = &instance->frame_ctx_helper;
+ frame_ctx = (struct vdec_vp9_slice_frame_ctx *)instance->prob.va;
+ counts = (struct vdec_vp9_slice_frame_counts *)instance->counts.va;
+ counts_helper = &instance->counts_helper;
+
+ if (!uh->refresh_frame_context)
+ return 0;
+
+ if (!uh->frame_parallel_decoding_mode) {
+ vdec_vp9_slice_counts_map_helper(&instance->counts_map, counts, counts_helper);
+
+ frame_is_intra = !vsi->frame.uh.frame_type || vsi->frame.uh.intra_only;
+ /* check default prob */
+ if (!instance->dirty[uh->frame_context_idx])
+ vdec_vp9_slice_framectx_map_helper(frame_is_intra,
+ vdec_vp9_slice_default_frame_ctx,
+ frame_ctx,
+ pre_frame_ctx_helper);
+ else
+ vdec_vp9_slice_framectx_map_helper(frame_is_intra,
+ pre_frame_ctx,
+ frame_ctx,
+ pre_frame_ctx_helper);
+
+ use_128 = !frame_is_intra && !vsi->frame.uh.last_frame_type;
+ v4l2_vp9_adapt_coef_probs(pre_frame_ctx_helper,
+ counts_helper,
+ use_128,
+ frame_is_intra);
+ if (!frame_is_intra)
+ v4l2_vp9_adapt_noncoef_probs(pre_frame_ctx_helper,
+ counts_helper,
+ V4L2_VP9_REFERENCE_MODE_SINGLE_REFERENCE,
+ vsi->frame.uh.interpolation_filter,
+ vsi->frame.ch.tx_mode,
+ vsi->frame.uh.allow_high_precision_mv ?
+ V4L2_VP9_FRAME_FLAG_ALLOW_HIGH_PREC_MV : 0);
+ vdec_vp9_slice_helper_map_framectx(pre_frame_ctx_helper, pre_frame_ctx);
+ } else {
+ memcpy(pre_frame_ctx, frame_ctx, sizeof(*frame_ctx));
+ }
+
+ instance->dirty[uh->frame_context_idx] = 1;
+
+ return 0;
+}
+
+static int vdec_vp9_slice_update_single(struct vdec_vp9_slice_instance *instance,
+ struct vdec_vp9_slice_pfc *pfc)
+{
+ struct vdec_vp9_slice_vsi *vsi;
+
+ vsi = &pfc->vsi;
+ memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state));
+
+ mtk_vdec_debug(instance->ctx, "Frame %u Y_CRC %08x %08x %08x %08x\n",
+ pfc->seq, vsi->state.crc[0], vsi->state.crc[1],
+ vsi->state.crc[2], vsi->state.crc[3]);
+ mtk_vdec_debug(instance->ctx, "Frame %u C_CRC %08x %08x %08x %08x\n",
+ pfc->seq, vsi->state.crc[4], vsi->state.crc[5],
+ vsi->state.crc[6], vsi->state.crc[7]);
+
+ vdec_vp9_slice_update_prob(instance, vsi);
+
+ instance->width = vsi->frame.uh.frame_width;
+ instance->height = vsi->frame.uh.frame_height;
+ instance->frame_type = vsi->frame.uh.frame_type;
+ instance->show_frame = vsi->frame.uh.show_frame;
+
+ return 0;
+}
+
+static int vdec_vp9_slice_update_lat(struct vdec_vp9_slice_instance *instance,
+ struct vdec_lat_buf *lat_buf,
+ struct vdec_vp9_slice_pfc *pfc)
+{
+ struct vdec_vp9_slice_vsi *vsi;
+
+ vsi = &pfc->vsi;
+ memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state));
+
+ mtk_vdec_debug(instance->ctx, "Frame %u LAT CRC 0x%08x %lx %lx\n",
+ pfc->seq, vsi->state.crc[0],
+ (unsigned long)vsi->trans.dma_addr,
+ (unsigned long)vsi->trans.dma_addr_end);
+
+ /* buffer full, need to re-decode */
+ if (vsi->state.full) {
+ /* buffer not enough */
+ if (vsi->trans.dma_addr_end - vsi->trans.dma_addr ==
+ vsi->ube.size)
+ return -ENOMEM;
+ return -EAGAIN;
+ }
+
+ vdec_vp9_slice_update_prob(instance, vsi);
+
+ instance->width = vsi->frame.uh.frame_width;
+ instance->height = vsi->frame.uh.frame_height;
+ instance->frame_type = vsi->frame.uh.frame_type;
+ instance->show_frame = vsi->frame.uh.show_frame;
+
+ return 0;
+}
+
+static int vdec_vp9_slice_setup_core_to_dst_buf(struct vdec_vp9_slice_instance *instance,
+ struct vdec_lat_buf *lat_buf)
+{
+ struct vb2_v4l2_buffer *dst;
+
+ dst = v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx);
+ if (!dst)
+ return -EINVAL;
+
+ v4l2_m2m_buf_copy_metadata(&lat_buf->ts_info, dst, true);
+ return 0;
+}
+
+static int vdec_vp9_slice_setup_core_buffer(struct vdec_vp9_slice_instance *instance,
+ struct vdec_vp9_slice_pfc *pfc,
+ struct vdec_vp9_slice_vsi *vsi,
+ struct vdec_fb *fb,
+ struct vdec_lat_buf *lat_buf)
+{
+ struct vb2_buffer *vb;
+ struct vb2_queue *vq;
+ struct vdec_vp9_slice_reference *ref;
+ int plane;
+ int size;
+ int w;
+ int h;
+ int i;
+
+ plane = instance->ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes;
+ w = vsi->frame.uh.frame_width;
+ h = vsi->frame.uh.frame_height;
+ size = ALIGN(w, 64) * ALIGN(h, 64);
+
+ /* frame buffer */
+ vsi->fb.y.dma_addr = fb->base_y.dma_addr;
+ if (plane == 1)
+ vsi->fb.c.dma_addr = fb->base_y.dma_addr + size;
+ else
+ vsi->fb.c.dma_addr = fb->base_c.dma_addr;
+
+ /* reference buffers */
+ vq = v4l2_m2m_get_vq(instance->ctx->m2m_ctx,
+ V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
+ if (!vq)
+ return -EINVAL;
+
+ /* get current output buffer */
+ vb = &v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx)->vb2_buf;
+ if (!vb)
+ return -EINVAL;
+
+ /* update internal buffer's width/height */
+ for (i = 0; i < vq->num_buffers; i++) {
+ if (vb == vq->bufs[i]) {
+ instance->dpb[i].width = w;
+ instance->dpb[i].height = h;
+ break;
+ }
+ }
+
+ /*
+ * get buffer's width/height from instance
+ * get buffer address from vb2buf
+ */
+ for (i = 0; i < 3; i++) {
+ ref = &vsi->frame.ref[i];
+ vb = vb2_find_buffer(vq, pfc->ref_idx[i]);
+ if (!vb) {
+ ref->frame_width = w;
+ ref->frame_height = h;
+ memset(&vsi->ref[i], 0, sizeof(vsi->ref[i]));
+ } else {
+ int idx = vb->index;
+
+ ref->frame_width = instance->dpb[idx].width;
+ ref->frame_height = instance->dpb[idx].height;
+ vsi->ref[i].y.dma_addr =
+ vb2_dma_contig_plane_dma_addr(vb, 0);
+ if (plane == 1)
+ vsi->ref[i].c.dma_addr =
+ vsi->ref[i].y.dma_addr + size;
+ else
+ vsi->ref[i].c.dma_addr =
+ vb2_dma_contig_plane_dma_addr(vb, 1);
+ }
+ }
+
+ return 0;
+}
+
+static void vdec_vp9_slice_setup_single_buffer(struct vdec_vp9_slice_instance *instance,
+ struct vdec_vp9_slice_pfc *pfc,
+ struct vdec_vp9_slice_vsi *vsi,
+ struct mtk_vcodec_mem *bs,
+ struct vdec_fb *fb)
+{
+ int i;
+
+ vsi->bs.buf.dma_addr = bs->dma_addr;
+ vsi->bs.buf.size = bs->size;
+ vsi->bs.frame.dma_addr = bs->dma_addr;
+ vsi->bs.frame.size = bs->size;
+
+ for (i = 0; i < 2; i++) {
+ vsi->mv[i].dma_addr = instance->mv[i].dma_addr;
+ vsi->mv[i].size = instance->mv[i].size;
+ }
+ for (i = 0; i < 2; i++) {
+ vsi->seg[i].dma_addr = instance->seg[i].dma_addr;
+ vsi->seg[i].size = instance->seg[i].size;
+ }
+ vsi->tile.dma_addr = instance->tile.dma_addr;
+ vsi->tile.size = instance->tile.size;
+ vsi->prob.dma_addr = instance->prob.dma_addr;
+ vsi->prob.size = instance->prob.size;
+ vsi->counts.dma_addr = instance->counts.dma_addr;
+ vsi->counts.size = instance->counts.size;
+
+ vsi->row_info.buf = 0;
+ vsi->row_info.size = 0;
+
+ vdec_vp9_slice_setup_core_buffer(instance, pfc, vsi, fb, NULL);
+}
+
+static int vdec_vp9_slice_setup_core(struct vdec_vp9_slice_instance *instance,
+ struct vdec_fb *fb,
+ struct vdec_lat_buf *lat_buf,
+ struct vdec_vp9_slice_pfc *pfc)
+{
+ struct vdec_vp9_slice_vsi *vsi = &pfc->vsi;
+ int ret;
+
+ vdec_vp9_slice_setup_state(vsi);
+
+ ret = vdec_vp9_slice_setup_core_to_dst_buf(instance, lat_buf);
+ if (ret)
+ goto err;
+
+ ret = vdec_vp9_slice_setup_core_buffer(instance, pfc, vsi, fb, lat_buf);
+ if (ret)
+ goto err;
+
+ vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[1]);
+
+ return 0;
+
+err:
+ return ret;
+}
+
+static int vdec_vp9_slice_setup_single(struct vdec_vp9_slice_instance *instance,
+ struct mtk_vcodec_mem *bs,
+ struct vdec_fb *fb,
+ struct vdec_vp9_slice_pfc *pfc)
+{
+ struct vdec_vp9_slice_vsi *vsi = &pfc->vsi;
+ int ret;
+
+ ret = vdec_vp9_slice_setup_single_from_src_to_dst(instance);
+ if (ret)
+ goto err;
+
+ ret = vdec_vp9_slice_setup_pfc(instance, pfc);
+ if (ret)
+ goto err;
+
+ ret = vdec_vp9_slice_alloc_working_buffer(instance, vsi);
+ if (ret)
+ goto err;
+
+ vdec_vp9_slice_setup_single_buffer(instance, pfc, vsi, bs, fb);
+ vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[0]);
+
+ ret = vdec_vp9_slice_setup_prob_buffer(instance, vsi);
+ if (ret)
+ goto err;
+
+ ret = vdec_vp9_slice_setup_tile_buffer(instance, vsi, bs);
+ if (ret)
+ goto err;
+
+ return 0;
+
+err:
+ return ret;
+}
+
+static int vdec_vp9_slice_update_core(struct vdec_vp9_slice_instance *instance,
+ struct vdec_lat_buf *lat_buf,
+ struct vdec_vp9_slice_pfc *pfc)
+{
+ struct vdec_vp9_slice_vsi *vsi;
+
+ vsi = &pfc->vsi;
+ memcpy(&pfc->state[1], &vsi->state, sizeof(vsi->state));
+
+ mtk_vdec_debug(instance->ctx, "Frame %u Y_CRC %08x %08x %08x %08x\n",
+ pfc->seq, vsi->state.crc[0], vsi->state.crc[1],
+ vsi->state.crc[2], vsi->state.crc[3]);
+ mtk_vdec_debug(instance->ctx, "Frame %u C_CRC %08x %08x %08x %08x\n",
+ pfc->seq, vsi->state.crc[4], vsi->state.crc[5],
+ vsi->state.crc[6], vsi->state.crc[7]);
+
+ return 0;
+}
+
+static int vdec_vp9_slice_init(struct mtk_vcodec_dec_ctx *ctx)
+{
+ struct vdec_vp9_slice_instance *instance;
+ struct vdec_vp9_slice_init_vsi *vsi;
+ int ret;
+
+ instance = kzalloc(sizeof(*instance), GFP_KERNEL);
+ if (!instance)
+ return -ENOMEM;
+
+ instance->ctx = ctx;
+ instance->vpu.id = SCP_IPI_VDEC_LAT;
+ instance->vpu.core_id = SCP_IPI_VDEC_CORE;
+ instance->vpu.ctx = ctx;
+ instance->vpu.codec_type = ctx->current_codec;
+
+ ret = vpu_dec_init(&instance->vpu);
+ if (ret) {
+ mtk_vdec_err(ctx, "failed to init vpu dec, ret %d\n", ret);
+ goto error_vpu_init;
+ }
+
+ /* init vsi and global flags */
+
+ vsi = instance->vpu.vsi;
+ if (!vsi) {
+ mtk_vdec_err(ctx, "failed to get VP9 vsi\n");
+ ret = -EINVAL;
+ goto error_vsi;
+ }
+ instance->init_vsi = vsi;
+ instance->core_vsi = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler,
+ (u32)vsi->core_vsi);
+ if (!instance->core_vsi) {
+ mtk_vdec_err(ctx, "failed to get VP9 core vsi\n");
+ ret = -EINVAL;
+ goto error_vsi;
+ }
+
+ instance->irq = 1;
+
+ ret = vdec_vp9_slice_init_default_frame_ctx(instance);
+ if (ret)
+ goto error_default_frame_ctx;
+
+ ctx->drv_handle = instance;
+
+ return 0;
+
+error_default_frame_ctx:
+error_vsi:
+ vpu_dec_deinit(&instance->vpu);
+error_vpu_init:
+ kfree(instance);
+ return ret;
+}
+
+static void vdec_vp9_slice_deinit(void *h_vdec)
+{
+ struct vdec_vp9_slice_instance *instance = h_vdec;
+
+ if (!instance)
+ return;
+
+ vpu_dec_deinit(&instance->vpu);
+ vdec_vp9_slice_free_working_buffer(instance);
+ vdec_msg_queue_deinit(&instance->ctx->msg_queue, instance->ctx);
+ kfree(instance);
+}
+
+static int vdec_vp9_slice_flush(void *h_vdec, struct mtk_vcodec_mem *bs,
+ struct vdec_fb *fb, bool *res_chg)
+{
+ struct vdec_vp9_slice_instance *instance = h_vdec;
+
+ mtk_vdec_debug(instance->ctx, "flush ...\n");
+ if (instance->ctx->dev->vdec_pdata->hw_arch != MTK_VDEC_PURE_SINGLE_CORE)
+ vdec_msg_queue_wait_lat_buf_full(&instance->ctx->msg_queue);
+ return vpu_dec_reset(&instance->vpu);
+}
+
+static void vdec_vp9_slice_get_pic_info(struct vdec_vp9_slice_instance *instance)
+{
+ struct mtk_vcodec_dec_ctx *ctx = instance->ctx;
+ unsigned int data[3];
+
+ mtk_vdec_debug(instance->ctx, "w %u h %u\n", ctx->picinfo.pic_w, ctx->picinfo.pic_h);
+
+ data[0] = ctx->picinfo.pic_w;
+ data[1] = ctx->picinfo.pic_h;
+ data[2] = ctx->capture_fourcc;
+ vpu_dec_get_param(&instance->vpu, data, 3, GET_PARAM_PIC_INFO);
+
+ ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, 64);
+ ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, 64);
+ ctx->picinfo.fb_sz[0] = instance->vpu.fb_sz[0];
+ ctx->picinfo.fb_sz[1] = instance->vpu.fb_sz[1];
+}
+
+static void vdec_vp9_slice_get_dpb_size(struct vdec_vp9_slice_instance *instance,
+ unsigned int *dpb_sz)
+{
+ /* refer VP9 specification */
+ *dpb_sz = 9;
+}
+
+static int vdec_vp9_slice_get_param(void *h_vdec, enum vdec_get_param_type type, void *out)
+{
+ struct vdec_vp9_slice_instance *instance = h_vdec;
+
+ switch (type) {
+ case GET_PARAM_PIC_INFO:
+ vdec_vp9_slice_get_pic_info(instance);
+ break;
+ case GET_PARAM_DPB_SIZE:
+ vdec_vp9_slice_get_dpb_size(instance, out);
+ break;
+ case GET_PARAM_CROP_INFO:
+ mtk_vdec_debug(instance->ctx, "No need to get vp9 crop information.");
+ break;
+ default:
+ mtk_vdec_err(instance->ctx, "invalid get parameter type=%d\n", type);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int vdec_vp9_slice_single_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
+ struct vdec_fb *fb, bool *res_chg)
+{
+ struct vdec_vp9_slice_instance *instance = h_vdec;
+ struct vdec_vp9_slice_pfc *pfc = &instance->sc_pfc;
+ struct vdec_vp9_slice_vsi *vsi;
+ struct mtk_vcodec_dec_ctx *ctx;
+ int ret;
+
+ if (!instance || !instance->ctx)
+ return -EINVAL;
+ ctx = instance->ctx;
+
+ /* bs NULL means flush decoder */
+ if (!bs)
+ return vdec_vp9_slice_flush(h_vdec, bs, fb, res_chg);
+
+ fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx);
+ if (!fb)
+ return -EBUSY;
+
+ vsi = &pfc->vsi;
+
+ ret = vdec_vp9_slice_setup_single(instance, bs, fb, pfc);
+ if (ret) {
+ mtk_vdec_err(ctx, "Failed to setup VP9 single ret %d\n", ret);
+ return ret;
+ }
+ vdec_vp9_slice_vsi_to_remote(vsi, instance->vsi);
+
+ ret = vpu_dec_start(&instance->vpu, NULL, 0);
+ if (ret) {
+ mtk_vdec_err(ctx, "Failed to dec VP9 ret %d\n", ret);
+ return ret;
+ }
+
+ ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED,
+ WAIT_INTR_TIMEOUT_MS, MTK_VDEC_CORE);
+ /* update remote vsi if decode timeout */
+ if (ret) {
+ mtk_vdec_err(ctx, "VP9 decode timeout %d\n", ret);
+ WRITE_ONCE(instance->vsi->state.timeout, 1);
+ }
+
+ vpu_dec_end(&instance->vpu);
+
+ vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0);
+ ret = vdec_vp9_slice_update_single(instance, pfc);
+ if (ret) {
+ mtk_vdec_err(ctx, "VP9 decode error: %d\n", ret);
+ return ret;
+ }
+
+ instance->ctx->decoded_frame_cnt++;
+ return 0;
+}
+
+static int vdec_vp9_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
+ struct vdec_fb *fb, bool *res_chg)
+{
+ struct vdec_vp9_slice_instance *instance = h_vdec;
+ struct vdec_lat_buf *lat_buf;
+ struct vdec_vp9_slice_pfc *pfc;
+ struct vdec_vp9_slice_vsi *vsi;
+ struct mtk_vcodec_dec_ctx *ctx;
+ int ret;
+
+ if (!instance || !instance->ctx)
+ return -EINVAL;
+ ctx = instance->ctx;
+
+ /* init msgQ for the first time */
+ if (vdec_msg_queue_init(&ctx->msg_queue, ctx,
+ vdec_vp9_slice_core_decode,
+ sizeof(*pfc)))
+ return -ENOMEM;
+
+ /* bs NULL means flush decoder */
+ if (!bs)
+ return vdec_vp9_slice_flush(h_vdec, bs, fb, res_chg);
+
+ lat_buf = vdec_msg_queue_dqbuf(&instance->ctx->msg_queue.lat_ctx);
+ if (!lat_buf) {
+ mtk_vdec_debug(ctx, "Failed to get VP9 lat buf\n");
+ return -EAGAIN;
+ }
+ pfc = (struct vdec_vp9_slice_pfc *)lat_buf->private_data;
+ if (!pfc) {
+ ret = -EINVAL;
+ goto err_free_fb_out;
+ }
+ vsi = &pfc->vsi;
+
+ ret = vdec_vp9_slice_setup_lat(instance, bs, lat_buf, pfc);
+ if (ret) {
+ mtk_vdec_err(ctx, "Failed to setup VP9 lat ret %d\n", ret);
+ goto err_free_fb_out;
+ }
+ vdec_vp9_slice_vsi_to_remote(vsi, instance->vsi);
+
+ ret = vpu_dec_start(&instance->vpu, NULL, 0);
+ if (ret) {
+ mtk_vdec_err(ctx, "Failed to dec VP9 ret %d\n", ret);
+ goto err_free_fb_out;
+ }
+
+ if (instance->irq) {
+ ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED,
+ WAIT_INTR_TIMEOUT_MS, MTK_VDEC_LAT0);
+ /* update remote vsi if decode timeout */
+ if (ret) {
+ mtk_vdec_err(ctx, "VP9 decode timeout %d pic %d\n", ret, pfc->seq);
+ WRITE_ONCE(instance->vsi->state.timeout, 1);
+ }
+ vpu_dec_end(&instance->vpu);
+ }
+
+ vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0);
+ ret = vdec_vp9_slice_update_lat(instance, lat_buf, pfc);
+
+ /* LAT trans full, no more UBE or decode timeout */
+ if (ret) {
+ mtk_vdec_err(ctx, "VP9 decode error: %d\n", ret);
+ goto err_free_fb_out;
+ }
+
+ mtk_vdec_debug(ctx, "lat dma addr: 0x%lx 0x%lx\n",
+ (unsigned long)pfc->vsi.trans.dma_addr,
+ (unsigned long)pfc->vsi.trans.dma_addr_end);
+
+ vdec_msg_queue_update_ube_wptr(&ctx->msg_queue,
+ vsi->trans.dma_addr_end +
+ ctx->msg_queue.wdma_addr.dma_addr);
+ vdec_msg_queue_qbuf(&ctx->msg_queue.core_ctx, lat_buf);
+
+ return 0;
+err_free_fb_out:
+ vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf);
+ return ret;
+}
+
+static int vdec_vp9_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
+ struct vdec_fb *fb, bool *res_chg)
+{
+ struct vdec_vp9_slice_instance *instance = h_vdec;
+ int ret;
+
+ if (instance->ctx->dev->vdec_pdata->hw_arch == MTK_VDEC_PURE_SINGLE_CORE)
+ ret = vdec_vp9_slice_single_decode(h_vdec, bs, fb, res_chg);
+ else
+ ret = vdec_vp9_slice_lat_decode(h_vdec, bs, fb, res_chg);
+
+ return ret;
+}
+
+static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf)
+{
+ struct vdec_vp9_slice_instance *instance;
+ struct vdec_vp9_slice_pfc *pfc;
+ struct mtk_vcodec_dec_ctx *ctx = NULL;
+ struct vdec_fb *fb = NULL;
+ int ret = -EINVAL;
+
+ if (!lat_buf)
+ goto err;
+
+ pfc = lat_buf->private_data;
+ ctx = lat_buf->ctx;
+ if (!pfc || !ctx)
+ goto err;
+
+ instance = ctx->drv_handle;
+ if (!instance)
+ goto err;
+
+ fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx);
+ if (!fb) {
+ ret = -EBUSY;
+ goto err;
+ }
+
+ ret = vdec_vp9_slice_setup_core(instance, fb, lat_buf, pfc);
+ if (ret) {
+ mtk_vdec_err(ctx, "vdec_vp9_slice_setup_core\n");
+ goto err;
+ }
+ vdec_vp9_slice_vsi_to_remote(&pfc->vsi, instance->core_vsi);
+
+ ret = vpu_dec_core(&instance->vpu);
+ if (ret) {
+ mtk_vdec_err(ctx, "vpu_dec_core\n");
+ goto err;
+ }
+
+ if (instance->irq) {
+ ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED,
+ WAIT_INTR_TIMEOUT_MS, MTK_VDEC_CORE);
+ /* update remote vsi if decode timeout */
+ if (ret) {
+ mtk_vdec_err(ctx, "VP9 core timeout pic %d\n", pfc->seq);
+ WRITE_ONCE(instance->core_vsi->state.timeout, 1);
+ }
+ vpu_dec_core_end(&instance->vpu);
+ }
+
+ vdec_vp9_slice_vsi_from_remote(&pfc->vsi, instance->core_vsi, 1);
+ ret = vdec_vp9_slice_update_core(instance, lat_buf, pfc);
+ if (ret) {
+ mtk_vdec_err(ctx, "vdec_vp9_slice_update_core\n");
+ goto err;
+ }
+
+ pfc->vsi.trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr;
+ mtk_vdec_debug(ctx, "core dma_addr_end 0x%lx\n",
+ (unsigned long)pfc->vsi.trans.dma_addr_end);
+ vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end);
+ ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf->src_buf_req);
+
+ return 0;
+
+err:
+ if (ctx && pfc) {
+ /* always update read pointer */
+ vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end);
+
+ if (fb)
+ ctx->dev->vdec_pdata->cap_to_disp(ctx, 1, lat_buf->src_buf_req);
+ }
+ return ret;
+}
+
+const struct vdec_common_if vdec_vp9_slice_lat_if = {
+ .init = vdec_vp9_slice_init,
+ .decode = vdec_vp9_slice_decode,
+ .get_param = vdec_vp9_slice_get_param,
+ .deinit = vdec_vp9_slice_deinit,
+};