diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
commit | 2c3c1048746a4622d8c89a29670120dc8fab93c4 (patch) | |
tree | 848558de17fb3008cdf4d861b01ac7781903ce39 /drivers/media/platform/mediatek/vcodec/vdec | |
parent | Initial commit. (diff) | |
download | linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.tar.xz linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.zip |
Adding upstream version 6.1.76.upstream/6.1.76upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/media/platform/mediatek/vcodec/vdec')
9 files changed, 6712 insertions, 0 deletions
diff --git a/drivers/media/platform/mediatek/vcodec/vdec/vdec_h264_if.c b/drivers/media/platform/mediatek/vcodec/vdec/vdec_h264_if.c new file mode 100644 index 000000000..481655bb6 --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/vdec/vdec_h264_if.c @@ -0,0 +1,503 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2016 MediaTek Inc. + * Author: PC Chen <pc.chen@mediatek.com> + */ + +#include <linux/module.h> +#include <linux/slab.h> + +#include "../vdec_drv_if.h" +#include "../mtk_vcodec_util.h" +#include "../mtk_vcodec_dec.h" +#include "../mtk_vcodec_intr.h" +#include "../vdec_vpu_if.h" +#include "../vdec_drv_base.h" + +#define NAL_NON_IDR_SLICE 0x01 +#define NAL_IDR_SLICE 0x05 +#define NAL_H264_PPS 0x08 +#define NAL_TYPE(value) ((value) & 0x1F) + +#define BUF_PREDICTION_SZ (32 * 1024) + +#define MB_UNIT_LEN 16 + +/* motion vector size (bytes) for every macro block */ +#define HW_MB_STORE_SZ 64 + +#define H264_MAX_FB_NUM 17 +#define HDR_PARSING_BUF_SZ 1024 + +#define DEC_ERR_RET(ret) ((ret) >> 16) +#define H264_ERR_NOT_VALID 3 + +/** + * struct h264_fb - h264 decode frame buffer information + * @vdec_fb_va : virtual address of struct vdec_fb + * @y_fb_dma : dma address of Y frame buffer (luma) + * @c_fb_dma : dma address of C frame buffer (chroma) + * @poc : picture order count of frame buffer + * @reserved : for 8 bytes alignment + */ +struct h264_fb { + uint64_t vdec_fb_va; + uint64_t y_fb_dma; + uint64_t c_fb_dma; + int32_t poc; + uint32_t reserved; +}; + +/** + * struct h264_ring_fb_list - ring frame buffer list + * @fb_list : frame buffer array + * @read_idx : read index + * @write_idx : write index + * @count : buffer count in list + * @reserved : for 8 bytes alignment + */ +struct h264_ring_fb_list { + struct h264_fb fb_list[H264_MAX_FB_NUM]; + unsigned int read_idx; + unsigned int write_idx; + unsigned int count; + unsigned int reserved; +}; + +/** + * struct vdec_h264_dec_info - decode information + * @dpb_sz : decoding picture buffer size + * @resolution_changed : resolution change happen + * @realloc_mv_buf : flag to notify driver to re-allocate mv buffer + * @reserved : for 8 bytes alignment + * @bs_dma : Input bit-stream buffer dma address + * @y_fb_dma : Y frame buffer dma address + * @c_fb_dma : C frame buffer dma address + * @vdec_fb_va : VDEC frame buffer struct virtual address + */ +struct vdec_h264_dec_info { + uint32_t dpb_sz; + uint32_t resolution_changed; + uint32_t realloc_mv_buf; + uint32_t reserved; + uint64_t bs_dma; + uint64_t y_fb_dma; + uint64_t c_fb_dma; + uint64_t vdec_fb_va; +}; + +/** + * struct vdec_h264_vsi - shared memory for decode information exchange + * between VPU and Host. + * The memory is allocated by VPU then mapping to Host + * in vpu_dec_init() and freed in vpu_dec_deinit() + * by VPU. + * AP-W/R : AP is writer/reader on this item + * VPU-W/R: VPU is write/reader on this item + * @hdr_buf : Header parsing buffer (AP-W, VPU-R) + * @pred_buf_dma : HW working predication buffer dma address (AP-W, VPU-R) + * @mv_buf_dma : HW working motion vector buffer dma address (AP-W, VPU-R) + * @list_free : free frame buffer ring list (AP-W/R, VPU-W) + * @list_disp : display frame buffer ring list (AP-R, VPU-W) + * @dec : decode information (AP-R, VPU-W) + * @pic : picture information (AP-R, VPU-W) + * @crop : crop information (AP-R, VPU-W) + */ +struct vdec_h264_vsi { + unsigned char hdr_buf[HDR_PARSING_BUF_SZ]; + uint64_t pred_buf_dma; + uint64_t mv_buf_dma[H264_MAX_FB_NUM]; + struct h264_ring_fb_list list_free; + struct h264_ring_fb_list list_disp; + struct vdec_h264_dec_info dec; + struct vdec_pic_info pic; + struct v4l2_rect crop; +}; + +/** + * struct vdec_h264_inst - h264 decoder instance + * @num_nalu : how many nalus be decoded + * @ctx : point to mtk_vcodec_ctx + * @pred_buf : HW working predication buffer + * @mv_buf : HW working motion vector buffer + * @vpu : VPU instance + * @vsi : VPU shared information + */ +struct vdec_h264_inst { + unsigned int num_nalu; + struct mtk_vcodec_ctx *ctx; + struct mtk_vcodec_mem pred_buf; + struct mtk_vcodec_mem mv_buf[H264_MAX_FB_NUM]; + struct vdec_vpu_inst vpu; + struct vdec_h264_vsi *vsi; +}; + +static unsigned int get_mv_buf_size(unsigned int width, unsigned int height) +{ + return HW_MB_STORE_SZ * (width/MB_UNIT_LEN) * (height/MB_UNIT_LEN); +} + +static int allocate_predication_buf(struct vdec_h264_inst *inst) +{ + int err = 0; + + inst->pred_buf.size = BUF_PREDICTION_SZ; + err = mtk_vcodec_mem_alloc(inst->ctx, &inst->pred_buf); + if (err) { + mtk_vcodec_err(inst, "failed to allocate ppl buf"); + return err; + } + + inst->vsi->pred_buf_dma = inst->pred_buf.dma_addr; + return 0; +} + +static void free_predication_buf(struct vdec_h264_inst *inst) +{ + struct mtk_vcodec_mem *mem = NULL; + + mtk_vcodec_debug_enter(inst); + + inst->vsi->pred_buf_dma = 0; + mem = &inst->pred_buf; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); +} + +static int alloc_mv_buf(struct vdec_h264_inst *inst, struct vdec_pic_info *pic) +{ + int i; + int err; + struct mtk_vcodec_mem *mem = NULL; + unsigned int buf_sz = get_mv_buf_size(pic->buf_w, pic->buf_h); + + for (i = 0; i < H264_MAX_FB_NUM; i++) { + mem = &inst->mv_buf[i]; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + mem->size = buf_sz; + err = mtk_vcodec_mem_alloc(inst->ctx, mem); + if (err) { + mtk_vcodec_err(inst, "failed to allocate mv buf"); + return err; + } + inst->vsi->mv_buf_dma[i] = mem->dma_addr; + } + + return 0; +} + +static void free_mv_buf(struct vdec_h264_inst *inst) +{ + int i; + struct mtk_vcodec_mem *mem = NULL; + + for (i = 0; i < H264_MAX_FB_NUM; i++) { + inst->vsi->mv_buf_dma[i] = 0; + mem = &inst->mv_buf[i]; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + } +} + +static int check_list_validity(struct vdec_h264_inst *inst, bool disp_list) +{ + struct h264_ring_fb_list *list; + + list = disp_list ? &inst->vsi->list_disp : &inst->vsi->list_free; + + if (list->count > H264_MAX_FB_NUM || + list->read_idx >= H264_MAX_FB_NUM || + list->write_idx >= H264_MAX_FB_NUM) { + mtk_vcodec_err(inst, "%s list err: cnt=%d r_idx=%d w_idx=%d", + disp_list ? "disp" : "free", list->count, + list->read_idx, list->write_idx); + return -EINVAL; + } + + return 0; +} + +static void put_fb_to_free(struct vdec_h264_inst *inst, struct vdec_fb *fb) +{ + struct h264_ring_fb_list *list; + + if (fb) { + if (check_list_validity(inst, false)) + return; + + list = &inst->vsi->list_free; + if (list->count == H264_MAX_FB_NUM) { + mtk_vcodec_err(inst, "[FB] put fb free_list full"); + return; + } + + mtk_vcodec_debug(inst, "[FB] put fb into free_list @(%p, %llx)", + fb->base_y.va, (u64)fb->base_y.dma_addr); + + list->fb_list[list->write_idx].vdec_fb_va = (u64)(uintptr_t)fb; + list->write_idx = (list->write_idx == H264_MAX_FB_NUM - 1) ? + 0 : list->write_idx + 1; + list->count++; + } +} + +static void get_pic_info(struct vdec_h264_inst *inst, + struct vdec_pic_info *pic) +{ + *pic = inst->vsi->pic; + mtk_vcodec_debug(inst, "pic(%d, %d), buf(%d, %d)", + pic->pic_w, pic->pic_h, pic->buf_w, pic->buf_h); + mtk_vcodec_debug(inst, "fb size: Y(%d), C(%d)", + pic->fb_sz[0], pic->fb_sz[1]); +} + +static void get_crop_info(struct vdec_h264_inst *inst, struct v4l2_rect *cr) +{ + cr->left = inst->vsi->crop.left; + cr->top = inst->vsi->crop.top; + cr->width = inst->vsi->crop.width; + cr->height = inst->vsi->crop.height; + + mtk_vcodec_debug(inst, "l=%d, t=%d, w=%d, h=%d", + cr->left, cr->top, cr->width, cr->height); +} + +static void get_dpb_size(struct vdec_h264_inst *inst, unsigned int *dpb_sz) +{ + *dpb_sz = inst->vsi->dec.dpb_sz; + mtk_vcodec_debug(inst, "sz=%d", *dpb_sz); +} + +static int vdec_h264_init(struct mtk_vcodec_ctx *ctx) +{ + struct vdec_h264_inst *inst = NULL; + int err; + + inst = kzalloc(sizeof(*inst), GFP_KERNEL); + if (!inst) + return -ENOMEM; + + inst->ctx = ctx; + + inst->vpu.id = IPI_VDEC_H264; + inst->vpu.ctx = ctx; + + err = vpu_dec_init(&inst->vpu); + if (err) { + mtk_vcodec_err(inst, "vdec_h264 init err=%d", err); + goto error_free_inst; + } + + inst->vsi = (struct vdec_h264_vsi *)inst->vpu.vsi; + err = allocate_predication_buf(inst); + if (err) + goto error_deinit; + + mtk_vcodec_debug(inst, "H264 Instance >> %p", inst); + + ctx->drv_handle = inst; + return 0; + +error_deinit: + vpu_dec_deinit(&inst->vpu); + +error_free_inst: + kfree(inst); + return err; +} + +static void vdec_h264_deinit(void *h_vdec) +{ + struct vdec_h264_inst *inst = (struct vdec_h264_inst *)h_vdec; + + mtk_vcodec_debug_enter(inst); + + vpu_dec_deinit(&inst->vpu); + free_predication_buf(inst); + free_mv_buf(inst); + + kfree(inst); +} + +static int find_start_code(unsigned char *data, unsigned int data_sz) +{ + if (data_sz > 3 && data[0] == 0 && data[1] == 0 && data[2] == 1) + return 3; + + if (data_sz > 4 && data[0] == 0 && data[1] == 0 && data[2] == 0 && + data[3] == 1) + return 4; + + return -1; +} + +static int vdec_h264_decode(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *fb, bool *res_chg) +{ + struct vdec_h264_inst *inst = (struct vdec_h264_inst *)h_vdec; + struct vdec_vpu_inst *vpu = &inst->vpu; + int nal_start_idx = 0; + int err = 0; + unsigned int nal_start; + unsigned int nal_type; + unsigned char *buf; + unsigned int buf_sz; + unsigned int data[2]; + uint64_t vdec_fb_va = (u64)(uintptr_t)fb; + uint64_t y_fb_dma = fb ? (u64)fb->base_y.dma_addr : 0; + uint64_t c_fb_dma = fb ? (u64)fb->base_c.dma_addr : 0; + + mtk_vcodec_debug(inst, "+ [%d] FB y_dma=%llx c_dma=%llx va=%p", + ++inst->num_nalu, y_fb_dma, c_fb_dma, fb); + + /* bs NULL means flush decoder */ + if (bs == NULL) + return vpu_dec_reset(vpu); + + buf = (unsigned char *)bs->va; + buf_sz = bs->size; + nal_start_idx = find_start_code(buf, buf_sz); + if (nal_start_idx < 0) { + mtk_vcodec_err(inst, "invalid nal start code"); + err = -EIO; + goto err_free_fb_out; + } + + nal_start = buf[nal_start_idx]; + nal_type = NAL_TYPE(buf[nal_start_idx]); + mtk_vcodec_debug(inst, "\n + NALU[%d] type %d +\n", inst->num_nalu, + nal_type); + + if (nal_type == NAL_H264_PPS) { + buf_sz -= nal_start_idx; + if (buf_sz > HDR_PARSING_BUF_SZ) { + err = -EILSEQ; + goto err_free_fb_out; + } + memcpy(inst->vsi->hdr_buf, buf + nal_start_idx, buf_sz); + } + + inst->vsi->dec.bs_dma = (uint64_t)bs->dma_addr; + inst->vsi->dec.y_fb_dma = y_fb_dma; + inst->vsi->dec.c_fb_dma = c_fb_dma; + inst->vsi->dec.vdec_fb_va = vdec_fb_va; + + data[0] = buf_sz; + data[1] = nal_start; + err = vpu_dec_start(vpu, data, 2); + if (err) { + if (err > 0 && (DEC_ERR_RET(err) == H264_ERR_NOT_VALID)) { + mtk_vcodec_err(inst, "- error bitstream - err = %d -", + err); + err = -EIO; + } + goto err_free_fb_out; + } + + *res_chg = inst->vsi->dec.resolution_changed; + if (*res_chg) { + struct vdec_pic_info pic; + + mtk_vcodec_debug(inst, "- resolution changed -"); + get_pic_info(inst, &pic); + + if (inst->vsi->dec.realloc_mv_buf) { + err = alloc_mv_buf(inst, &pic); + if (err) + goto err_free_fb_out; + } + } + + if (nal_type == NAL_NON_IDR_SLICE || nal_type == NAL_IDR_SLICE) { + /* wait decoder done interrupt */ + err = mtk_vcodec_wait_for_done_ctx(inst->ctx, + MTK_INST_IRQ_RECEIVED, + WAIT_INTR_TIMEOUT_MS, 0); + if (err) + goto err_free_fb_out; + + vpu_dec_end(vpu); + } + + mtk_vcodec_debug(inst, "\n - NALU[%d] type=%d -\n", inst->num_nalu, + nal_type); + return 0; + +err_free_fb_out: + put_fb_to_free(inst, fb); + mtk_vcodec_err(inst, "\n - NALU[%d] err=%d -\n", inst->num_nalu, err); + return err; +} + +static void vdec_h264_get_fb(struct vdec_h264_inst *inst, + struct h264_ring_fb_list *list, + bool disp_list, struct vdec_fb **out_fb) +{ + struct vdec_fb *fb; + + if (check_list_validity(inst, disp_list)) + return; + + if (list->count == 0) { + mtk_vcodec_debug(inst, "[FB] there is no %s fb", + disp_list ? "disp" : "free"); + *out_fb = NULL; + return; + } + + fb = (struct vdec_fb *) + (uintptr_t)list->fb_list[list->read_idx].vdec_fb_va; + fb->status |= (disp_list ? FB_ST_DISPLAY : FB_ST_FREE); + + *out_fb = fb; + mtk_vcodec_debug(inst, "[FB] get %s fb st=%d poc=%d %llx", + disp_list ? "disp" : "free", + fb->status, list->fb_list[list->read_idx].poc, + list->fb_list[list->read_idx].vdec_fb_va); + + list->read_idx = (list->read_idx == H264_MAX_FB_NUM - 1) ? + 0 : list->read_idx + 1; + list->count--; +} + +static int vdec_h264_get_param(void *h_vdec, enum vdec_get_param_type type, + void *out) +{ + struct vdec_h264_inst *inst = (struct vdec_h264_inst *)h_vdec; + + switch (type) { + case GET_PARAM_DISP_FRAME_BUFFER: + vdec_h264_get_fb(inst, &inst->vsi->list_disp, true, out); + break; + + case GET_PARAM_FREE_FRAME_BUFFER: + vdec_h264_get_fb(inst, &inst->vsi->list_free, false, out); + break; + + case GET_PARAM_PIC_INFO: + get_pic_info(inst, out); + break; + + case GET_PARAM_DPB_SIZE: + get_dpb_size(inst, out); + break; + + case GET_PARAM_CROP_INFO: + get_crop_info(inst, out); + break; + + default: + mtk_vcodec_err(inst, "invalid get parameter type=%d", type); + return -EINVAL; + } + + return 0; +} + +const struct vdec_common_if vdec_h264_if = { + .init = vdec_h264_init, + .decode = vdec_h264_decode, + .get_param = vdec_h264_get_param, + .deinit = vdec_h264_deinit, +}; diff --git a/drivers/media/platform/mediatek/vcodec/vdec/vdec_h264_req_common.c b/drivers/media/platform/mediatek/vcodec/vdec/vdec_h264_req_common.c new file mode 100644 index 000000000..580ce979e --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/vdec/vdec_h264_req_common.c @@ -0,0 +1,322 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2022 MediaTek Inc. + * Author: Yunfei Dong <yunfei.dong@mediatek.com> + */ + +#include "vdec_h264_req_common.h" + +/* get used parameters for sps/pps */ +#define GET_MTK_VDEC_FLAG(cond, flag) \ + { dst_param->cond = ((src_param->flags & flag) ? (1) : (0)); } +#define GET_MTK_VDEC_PARAM(param) \ + { dst_param->param = src_param->param; } + +void mtk_vdec_h264_get_ref_list(u8 *ref_list, + const struct v4l2_h264_reference *v4l2_ref_list, + int num_valid) +{ + u32 i; + + /* + * TODO The firmware does not support field decoding. Future + * implementation must use v4l2_ref_list[i].fields to obtain + * the reference field parity. + */ + + for (i = 0; i < num_valid; i++) + ref_list[i] = v4l2_ref_list[i].index; + + /* + * The firmware expects unused reflist entries to have the value 0x20. + */ + memset(&ref_list[num_valid], 0x20, 32 - num_valid); +} + +void *mtk_vdec_h264_get_ctrl_ptr(struct mtk_vcodec_ctx *ctx, int id) +{ + struct v4l2_ctrl *ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, id); + + if (!ctrl) + return ERR_PTR(-EINVAL); + + return ctrl->p_cur.p; +} + +void mtk_vdec_h264_fill_dpb_info(struct mtk_vcodec_ctx *ctx, + struct slice_api_h264_decode_param *decode_params, + struct mtk_h264_dpb_info *h264_dpb_info) +{ + const struct slice_h264_dpb_entry *dpb; + struct vb2_queue *vq; + struct vb2_buffer *vb; + struct vb2_v4l2_buffer *vb2_v4l2; + int index; + + vq = v4l2_m2m_get_vq(ctx->m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE); + + for (index = 0; index < V4L2_H264_NUM_DPB_ENTRIES; index++) { + dpb = &decode_params->dpb[index]; + if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) { + h264_dpb_info[index].reference_flag = 0; + continue; + } + + vb = vb2_find_buffer(vq, dpb->reference_ts); + if (!vb) { + dev_err(&ctx->dev->plat_dev->dev, + "Reference invalid: dpb_index(%d) reference_ts(%lld)", + index, dpb->reference_ts); + continue; + } + + /* 1 for short term reference, 2 for long term reference */ + if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)) + h264_dpb_info[index].reference_flag = 1; + else + h264_dpb_info[index].reference_flag = 2; + + vb2_v4l2 = container_of(vb, struct vb2_v4l2_buffer, vb2_buf); + h264_dpb_info[index].field = vb2_v4l2->field; + + h264_dpb_info[index].y_dma_addr = + vb2_dma_contig_plane_dma_addr(vb, 0); + if (ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 2) + h264_dpb_info[index].c_dma_addr = + vb2_dma_contig_plane_dma_addr(vb, 1); + else + h264_dpb_info[index].c_dma_addr = + h264_dpb_info[index].y_dma_addr + + ctx->picinfo.fb_sz[0]; + } +} + +void mtk_vdec_h264_copy_sps_params(struct mtk_h264_sps_param *dst_param, + const struct v4l2_ctrl_h264_sps *src_param) +{ + GET_MTK_VDEC_PARAM(chroma_format_idc); + GET_MTK_VDEC_PARAM(bit_depth_luma_minus8); + GET_MTK_VDEC_PARAM(bit_depth_chroma_minus8); + GET_MTK_VDEC_PARAM(log2_max_frame_num_minus4); + GET_MTK_VDEC_PARAM(pic_order_cnt_type); + GET_MTK_VDEC_PARAM(log2_max_pic_order_cnt_lsb_minus4); + GET_MTK_VDEC_PARAM(max_num_ref_frames); + GET_MTK_VDEC_PARAM(pic_width_in_mbs_minus1); + GET_MTK_VDEC_PARAM(pic_height_in_map_units_minus1); + + GET_MTK_VDEC_FLAG(separate_colour_plane_flag, + V4L2_H264_SPS_FLAG_SEPARATE_COLOUR_PLANE); + GET_MTK_VDEC_FLAG(qpprime_y_zero_transform_bypass_flag, + V4L2_H264_SPS_FLAG_QPPRIME_Y_ZERO_TRANSFORM_BYPASS); + GET_MTK_VDEC_FLAG(delta_pic_order_always_zero_flag, + V4L2_H264_SPS_FLAG_DELTA_PIC_ORDER_ALWAYS_ZERO); + GET_MTK_VDEC_FLAG(frame_mbs_only_flag, + V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY); + GET_MTK_VDEC_FLAG(mb_adaptive_frame_field_flag, + V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD); + GET_MTK_VDEC_FLAG(direct_8x8_inference_flag, + V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE); +} + +void mtk_vdec_h264_copy_pps_params(struct mtk_h264_pps_param *dst_param, + const struct v4l2_ctrl_h264_pps *src_param) +{ + GET_MTK_VDEC_PARAM(num_ref_idx_l0_default_active_minus1); + GET_MTK_VDEC_PARAM(num_ref_idx_l1_default_active_minus1); + GET_MTK_VDEC_PARAM(weighted_bipred_idc); + GET_MTK_VDEC_PARAM(pic_init_qp_minus26); + GET_MTK_VDEC_PARAM(chroma_qp_index_offset); + GET_MTK_VDEC_PARAM(second_chroma_qp_index_offset); + + GET_MTK_VDEC_FLAG(entropy_coding_mode_flag, + V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE); + GET_MTK_VDEC_FLAG(pic_order_present_flag, + V4L2_H264_PPS_FLAG_BOTTOM_FIELD_PIC_ORDER_IN_FRAME_PRESENT); + GET_MTK_VDEC_FLAG(weighted_pred_flag, + V4L2_H264_PPS_FLAG_WEIGHTED_PRED); + GET_MTK_VDEC_FLAG(deblocking_filter_control_present_flag, + V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT); + GET_MTK_VDEC_FLAG(constrained_intra_pred_flag, + V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED); + GET_MTK_VDEC_FLAG(redundant_pic_cnt_present_flag, + V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT); + GET_MTK_VDEC_FLAG(transform_8x8_mode_flag, + V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE); + GET_MTK_VDEC_FLAG(scaling_matrix_present_flag, + V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT); +} + +void mtk_vdec_h264_copy_slice_hd_params(struct mtk_h264_slice_hd_param *dst_param, + const struct v4l2_ctrl_h264_slice_params *src_param, + const struct v4l2_ctrl_h264_decode_params *dec_param) +{ + int temp; + + GET_MTK_VDEC_PARAM(first_mb_in_slice); + GET_MTK_VDEC_PARAM(slice_type); + GET_MTK_VDEC_PARAM(cabac_init_idc); + GET_MTK_VDEC_PARAM(slice_qp_delta); + GET_MTK_VDEC_PARAM(disable_deblocking_filter_idc); + GET_MTK_VDEC_PARAM(slice_alpha_c0_offset_div2); + GET_MTK_VDEC_PARAM(slice_beta_offset_div2); + GET_MTK_VDEC_PARAM(num_ref_idx_l0_active_minus1); + GET_MTK_VDEC_PARAM(num_ref_idx_l1_active_minus1); + + dst_param->frame_num = dec_param->frame_num; + dst_param->pic_order_cnt_lsb = dec_param->pic_order_cnt_lsb; + + dst_param->delta_pic_order_cnt_bottom = + dec_param->delta_pic_order_cnt_bottom; + dst_param->delta_pic_order_cnt0 = + dec_param->delta_pic_order_cnt0; + dst_param->delta_pic_order_cnt1 = + dec_param->delta_pic_order_cnt1; + + temp = dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC; + dst_param->field_pic_flag = temp ? 1 : 0; + + temp = dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD; + dst_param->bottom_field_flag = temp ? 1 : 0; + + GET_MTK_VDEC_FLAG(direct_spatial_mv_pred_flag, + V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED); +} + +void mtk_vdec_h264_copy_scaling_matrix(struct slice_api_h264_scaling_matrix *dst_matrix, + const struct v4l2_ctrl_h264_scaling_matrix *src_matrix) +{ + memcpy(dst_matrix->scaling_list_4x4, src_matrix->scaling_list_4x4, + sizeof(dst_matrix->scaling_list_4x4)); + + memcpy(dst_matrix->scaling_list_8x8, src_matrix->scaling_list_8x8, + sizeof(dst_matrix->scaling_list_8x8)); +} + +void +mtk_vdec_h264_copy_decode_params(struct slice_api_h264_decode_param *dst_params, + const struct v4l2_ctrl_h264_decode_params *src_params, + const struct v4l2_h264_dpb_entry dpb[V4L2_H264_NUM_DPB_ENTRIES]) +{ + struct slice_h264_dpb_entry *dst_entry; + const struct v4l2_h264_dpb_entry *src_entry; + int i; + + for (i = 0; i < ARRAY_SIZE(dst_params->dpb); i++) { + dst_entry = &dst_params->dpb[i]; + src_entry = &dpb[i]; + + dst_entry->reference_ts = src_entry->reference_ts; + dst_entry->frame_num = src_entry->frame_num; + dst_entry->pic_num = src_entry->pic_num; + dst_entry->top_field_order_cnt = src_entry->top_field_order_cnt; + dst_entry->bottom_field_order_cnt = + src_entry->bottom_field_order_cnt; + dst_entry->flags = src_entry->flags; + } + + /* num_slices is a leftover from the old H.264 support and is ignored + * by the firmware. + */ + dst_params->num_slices = 0; + dst_params->nal_ref_idc = src_params->nal_ref_idc; + dst_params->top_field_order_cnt = src_params->top_field_order_cnt; + dst_params->bottom_field_order_cnt = src_params->bottom_field_order_cnt; + dst_params->flags = src_params->flags; +} + +static bool mtk_vdec_h264_dpb_entry_match(const struct v4l2_h264_dpb_entry *a, + const struct v4l2_h264_dpb_entry *b) +{ + return a->top_field_order_cnt == b->top_field_order_cnt && + a->bottom_field_order_cnt == b->bottom_field_order_cnt; +} + +/* + * Move DPB entries of dec_param that refer to a frame already existing in dpb + * into the already existing slot in dpb, and move other entries into new slots. + * + * This function is an adaptation of the similarly-named function in + * hantro_h264.c. + */ +void mtk_vdec_h264_update_dpb(const struct v4l2_ctrl_h264_decode_params *dec_param, + struct v4l2_h264_dpb_entry *dpb) +{ + DECLARE_BITMAP(new, ARRAY_SIZE(dec_param->dpb)) = { 0, }; + DECLARE_BITMAP(in_use, ARRAY_SIZE(dec_param->dpb)) = { 0, }; + DECLARE_BITMAP(used, ARRAY_SIZE(dec_param->dpb)) = { 0, }; + unsigned int i, j; + + /* Disable all entries by default, and mark the ones in use. */ + for (i = 0; i < ARRAY_SIZE(dec_param->dpb); i++) { + if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE) + set_bit(i, in_use); + dpb[i].flags &= ~V4L2_H264_DPB_ENTRY_FLAG_ACTIVE; + } + + /* Try to match new DPB entries with existing ones by their POCs. */ + for (i = 0; i < ARRAY_SIZE(dec_param->dpb); i++) { + const struct v4l2_h264_dpb_entry *ndpb = &dec_param->dpb[i]; + + if (!(ndpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) + continue; + + /* + * To cut off some comparisons, iterate only on target DPB + * entries were already used. + */ + for_each_set_bit(j, in_use, ARRAY_SIZE(dec_param->dpb)) { + struct v4l2_h264_dpb_entry *cdpb; + + cdpb = &dpb[j]; + if (!mtk_vdec_h264_dpb_entry_match(cdpb, ndpb)) + continue; + + *cdpb = *ndpb; + set_bit(j, used); + /* Don't reiterate on this one. */ + clear_bit(j, in_use); + break; + } + + if (j == ARRAY_SIZE(dec_param->dpb)) + set_bit(i, new); + } + + /* For entries that could not be matched, use remaining free slots. */ + for_each_set_bit(i, new, ARRAY_SIZE(dec_param->dpb)) { + const struct v4l2_h264_dpb_entry *ndpb = &dec_param->dpb[i]; + struct v4l2_h264_dpb_entry *cdpb; + + /* + * Both arrays are of the same sizes, so there is no way + * we can end up with no space in target array, unless + * something is buggy. + */ + j = find_first_zero_bit(used, ARRAY_SIZE(dec_param->dpb)); + if (WARN_ON(j >= ARRAY_SIZE(dec_param->dpb))) + return; + + cdpb = &dpb[j]; + *cdpb = *ndpb; + set_bit(j, used); + } +} + +unsigned int mtk_vdec_h264_get_mv_buf_size(unsigned int width, unsigned int height) +{ + int unit_size = (width / MB_UNIT_LEN) * (height / MB_UNIT_LEN) + 8; + + return HW_MB_STORE_SZ * unit_size; +} + +int mtk_vdec_h264_find_start_code(unsigned char *data, unsigned int data_sz) +{ + if (data_sz > 3 && data[0] == 0 && data[1] == 0 && data[2] == 1) + return 3; + + if (data_sz > 4 && data[0] == 0 && data[1] == 0 && data[2] == 0 && + data[3] == 1) + return 4; + + return -1; +} diff --git a/drivers/media/platform/mediatek/vcodec/vdec/vdec_h264_req_common.h b/drivers/media/platform/mediatek/vcodec/vdec/vdec_h264_req_common.h new file mode 100644 index 000000000..53d0a7c96 --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/vdec/vdec_h264_req_common.h @@ -0,0 +1,277 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2022 MediaTek Inc. + * Author: Yunfei Dong <yunfei.dong@mediatek.com> + */ + +#ifndef _VDEC_H264_REQ_COMMON_H_ +#define _VDEC_H264_REQ_COMMON_H_ + +#include <linux/module.h> +#include <linux/slab.h> +#include <media/v4l2-h264.h> +#include <media/v4l2-mem2mem.h> +#include <media/videobuf2-dma-contig.h> + +#include "../mtk_vcodec_drv.h" + +#define NAL_NON_IDR_SLICE 0x01 +#define NAL_IDR_SLICE 0x05 +#define NAL_TYPE(value) ((value) & 0x1F) + +#define BUF_PREDICTION_SZ (64 * 4096) +#define MB_UNIT_LEN 16 + +/* motion vector size (bytes) for every macro block */ +#define HW_MB_STORE_SZ 64 + +#define H264_MAX_MV_NUM 32 + +/** + * struct mtk_h264_dpb_info - h264 dpb information + * + * @y_dma_addr: Y bitstream physical address + * @c_dma_addr: CbCr bitstream physical address + * @reference_flag: reference picture flag (short/long term reference picture) + * @field: field picture flag + */ +struct mtk_h264_dpb_info { + dma_addr_t y_dma_addr; + dma_addr_t c_dma_addr; + int reference_flag; + int field; +}; + +/* + * struct mtk_h264_sps_param - parameters for sps + */ +struct mtk_h264_sps_param { + unsigned char chroma_format_idc; + unsigned char bit_depth_luma_minus8; + unsigned char bit_depth_chroma_minus8; + unsigned char log2_max_frame_num_minus4; + unsigned char pic_order_cnt_type; + unsigned char log2_max_pic_order_cnt_lsb_minus4; + unsigned char max_num_ref_frames; + unsigned char separate_colour_plane_flag; + unsigned short pic_width_in_mbs_minus1; + unsigned short pic_height_in_map_units_minus1; + unsigned int max_frame_nums; + unsigned char qpprime_y_zero_transform_bypass_flag; + unsigned char delta_pic_order_always_zero_flag; + unsigned char frame_mbs_only_flag; + unsigned char mb_adaptive_frame_field_flag; + unsigned char direct_8x8_inference_flag; + unsigned char reserved[3]; +}; + +/* + * struct mtk_h264_pps_param - parameters for pps + */ +struct mtk_h264_pps_param { + unsigned char num_ref_idx_l0_default_active_minus1; + unsigned char num_ref_idx_l1_default_active_minus1; + unsigned char weighted_bipred_idc; + char pic_init_qp_minus26; + char chroma_qp_index_offset; + char second_chroma_qp_index_offset; + unsigned char entropy_coding_mode_flag; + unsigned char pic_order_present_flag; + unsigned char deblocking_filter_control_present_flag; + unsigned char constrained_intra_pred_flag; + unsigned char weighted_pred_flag; + unsigned char redundant_pic_cnt_present_flag; + unsigned char transform_8x8_mode_flag; + unsigned char scaling_matrix_present_flag; + unsigned char reserved[2]; +}; + +/* + * struct mtk_h264_slice_hd_param - parameters for slice header + */ +struct mtk_h264_slice_hd_param { + unsigned int first_mb_in_slice; + unsigned int field_pic_flag; + unsigned int slice_type; + unsigned int frame_num; + int pic_order_cnt_lsb; + int delta_pic_order_cnt_bottom; + unsigned int bottom_field_flag; + unsigned int direct_spatial_mv_pred_flag; + int delta_pic_order_cnt0; + int delta_pic_order_cnt1; + unsigned int cabac_init_idc; + int slice_qp_delta; + unsigned int disable_deblocking_filter_idc; + int slice_alpha_c0_offset_div2; + int slice_beta_offset_div2; + unsigned int num_ref_idx_l0_active_minus1; + unsigned int num_ref_idx_l1_active_minus1; + unsigned int reserved; +}; + +/* + * struct slice_api_h264_scaling_matrix - parameters for scaling list + */ +struct slice_api_h264_scaling_matrix { + unsigned char scaling_list_4x4[6][16]; + unsigned char scaling_list_8x8[6][64]; +}; + +/* + * struct slice_h264_dpb_entry - each dpb information + */ +struct slice_h264_dpb_entry { + unsigned long long reference_ts; + unsigned short frame_num; + unsigned short pic_num; + /* Note that field is indicated by v4l2_buffer.field */ + int top_field_order_cnt; + int bottom_field_order_cnt; + unsigned int flags; +}; + +/* + * struct slice_api_h264_decode_param - parameters for decode. + */ +struct slice_api_h264_decode_param { + struct slice_h264_dpb_entry dpb[V4L2_H264_NUM_DPB_ENTRIES]; + unsigned short num_slices; + unsigned short nal_ref_idc; + unsigned char ref_pic_list_p0[32]; + unsigned char ref_pic_list_b0[32]; + unsigned char ref_pic_list_b1[32]; + int top_field_order_cnt; + int bottom_field_order_cnt; + unsigned int flags; +}; + +/** + * struct h264_fb - h264 decode frame buffer information + * + * @vdec_fb_va: virtual address of struct vdec_fb + * @y_fb_dma: dma address of Y frame buffer (luma) + * @c_fb_dma: dma address of C frame buffer (chroma) + * @poc: picture order count of frame buffer + * @reserved: for 8 bytes alignment + */ +struct h264_fb { + u64 vdec_fb_va; + u64 y_fb_dma; + u64 c_fb_dma; + s32 poc; + u32 reserved; +}; + +/** + * mtk_vdec_h264_get_ref_list - translate V4L2 reference list + * + * @ref_list: Mediatek reference picture list + * @v4l2_ref_list: V4L2 reference picture list + * @num_valid: used reference number + */ +void mtk_vdec_h264_get_ref_list(u8 *ref_list, + const struct v4l2_h264_reference *v4l2_ref_list, + int num_valid); + +/** + * mtk_vdec_h264_get_ctrl_ptr - get each CID contrl address. + * + * @ctx: v4l2 ctx + * @id: CID control ID + * + * Return: returns CID ctrl address. + */ +void *mtk_vdec_h264_get_ctrl_ptr(struct mtk_vcodec_ctx *ctx, int id); + +/** + * mtk_vdec_h264_fill_dpb_info - get each CID contrl address. + * + * @ctx: v4l2 ctx + * @decode_params: slice decode params + * @h264_dpb_info: dpb buffer information + */ +void mtk_vdec_h264_fill_dpb_info(struct mtk_vcodec_ctx *ctx, + struct slice_api_h264_decode_param *decode_params, + struct mtk_h264_dpb_info *h264_dpb_info); + +/** + * mtk_vdec_h264_copy_sps_params - get sps params. + * + * @dst_param: sps params for hw decoder + * @src_param: sps params from user driver + */ +void mtk_vdec_h264_copy_sps_params(struct mtk_h264_sps_param *dst_param, + const struct v4l2_ctrl_h264_sps *src_param); + +/** + * mtk_vdec_h264_copy_pps_params - get pps params. + * + * @dst_param: pps params for hw decoder + * @src_param: pps params from user driver + */ +void mtk_vdec_h264_copy_pps_params(struct mtk_h264_pps_param *dst_param, + const struct v4l2_ctrl_h264_pps *src_param); + +/** + * mtk_vdec_h264_copy_slice_hd_params - get slice header params. + * + * @dst_param: slice params for hw decoder + * @src_param: slice params from user driver + * @dec_param: decode params from user driver + */ +void mtk_vdec_h264_copy_slice_hd_params(struct mtk_h264_slice_hd_param *dst_param, + const struct v4l2_ctrl_h264_slice_params *src_param, + const struct v4l2_ctrl_h264_decode_params *dec_param); + +/** + * mtk_vdec_h264_copy_scaling_matrix - get each CID contrl address. + * + * @dst_matrix: scaling list params for hw decoder + * @src_matrix: scaling list params from user driver + */ +void mtk_vdec_h264_copy_scaling_matrix(struct slice_api_h264_scaling_matrix *dst_matrix, + const struct v4l2_ctrl_h264_scaling_matrix *src_matrix); + +/** + * mtk_vdec_h264_copy_decode_params - get decode params. + * + * @dst_params: dst params for hw decoder + * @src_params: decode params from user driver + * @dpb: dpb information + */ +void +mtk_vdec_h264_copy_decode_params(struct slice_api_h264_decode_param *dst_params, + const struct v4l2_ctrl_h264_decode_params *src_params, + const struct v4l2_h264_dpb_entry dpb[V4L2_H264_NUM_DPB_ENTRIES]); + +/** + * mtk_vdec_h264_update_dpb - updata dpb list. + * + * @dec_param: v4l2 control decode params + * @dpb: dpb entry informaton + */ +void mtk_vdec_h264_update_dpb(const struct v4l2_ctrl_h264_decode_params *dec_param, + struct v4l2_h264_dpb_entry *dpb); + +/** + * mtk_vdec_h264_find_start_code - find h264 start code using sofeware. + * + * @data: input buffer address + * @data_sz: input buffer size + * + * Return: returns start code position. + */ +int mtk_vdec_h264_find_start_code(unsigned char *data, unsigned int data_sz); + +/** + * mtk_vdec_h264_get_mv_buf_size - get mv buffer size. + * + * @width: picture width + * @height: picture height + * + * Return: returns mv buffer size. + */ +unsigned int mtk_vdec_h264_get_mv_buf_size(unsigned int width, unsigned int height); + +#endif diff --git a/drivers/media/platform/mediatek/vcodec/vdec/vdec_h264_req_if.c b/drivers/media/platform/mediatek/vcodec/vdec/vdec_h264_req_if.c new file mode 100644 index 000000000..4bc05ab5a --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/vdec/vdec_h264_req_if.c @@ -0,0 +1,449 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/module.h> +#include <linux/slab.h> +#include <media/v4l2-mem2mem.h> +#include <media/v4l2-h264.h> +#include <media/videobuf2-dma-contig.h> + +#include "../mtk_vcodec_util.h" +#include "../mtk_vcodec_dec.h" +#include "../mtk_vcodec_intr.h" +#include "../vdec_drv_base.h" +#include "../vdec_drv_if.h" +#include "../vdec_vpu_if.h" +#include "vdec_h264_req_common.h" + +/* + * struct mtk_h264_dec_slice_param - parameters for decode current frame + */ +struct mtk_h264_dec_slice_param { + struct mtk_h264_sps_param sps; + struct mtk_h264_pps_param pps; + struct slice_api_h264_scaling_matrix scaling_matrix; + struct slice_api_h264_decode_param decode_params; + struct mtk_h264_dpb_info h264_dpb_info[16]; +}; + +/** + * struct vdec_h264_dec_info - decode information + * @dpb_sz : decoding picture buffer size + * @resolution_changed : resoltion change happen + * @realloc_mv_buf : flag to notify driver to re-allocate mv buffer + * @cap_num_planes : number planes of capture buffer + * @bs_dma : Input bit-stream buffer dma address + * @y_fb_dma : Y frame buffer dma address + * @c_fb_dma : C frame buffer dma address + * @vdec_fb_va : VDEC frame buffer struct virtual address + */ +struct vdec_h264_dec_info { + u32 dpb_sz; + u32 resolution_changed; + u32 realloc_mv_buf; + u32 cap_num_planes; + u64 bs_dma; + u64 y_fb_dma; + u64 c_fb_dma; + u64 vdec_fb_va; +}; + +/** + * struct vdec_h264_vsi - shared memory for decode information exchange + * between VPU and Host. + * The memory is allocated by VPU then mapping to Host + * in vpu_dec_init() and freed in vpu_dec_deinit() + * by VPU. + * AP-W/R : AP is writer/reader on this item + * VPU-W/R: VPU is write/reader on this item + * @pred_buf_dma : HW working predication buffer dma address (AP-W, VPU-R) + * @mv_buf_dma : HW working motion vector buffer dma address (AP-W, VPU-R) + * @dec : decode information (AP-R, VPU-W) + * @pic : picture information (AP-R, VPU-W) + * @crop : crop information (AP-R, VPU-W) + * @h264_slice_params : the parameters that hardware use to decode + */ +struct vdec_h264_vsi { + u64 pred_buf_dma; + u64 mv_buf_dma[H264_MAX_MV_NUM]; + struct vdec_h264_dec_info dec; + struct vdec_pic_info pic; + struct v4l2_rect crop; + struct mtk_h264_dec_slice_param h264_slice_params; +}; + +/** + * struct vdec_h264_slice_inst - h264 decoder instance + * @num_nalu : how many nalus be decoded + * @ctx : point to mtk_vcodec_ctx + * @pred_buf : HW working predication buffer + * @mv_buf : HW working motion vector buffer + * @vpu : VPU instance + * @vsi_ctx : Local VSI data for this decoding context + * @h264_slice_param : the parameters that hardware use to decode + * @dpb : decoded picture buffer used to store reference buffer information + */ +struct vdec_h264_slice_inst { + unsigned int num_nalu; + struct mtk_vcodec_ctx *ctx; + struct mtk_vcodec_mem pred_buf; + struct mtk_vcodec_mem mv_buf[H264_MAX_MV_NUM]; + struct vdec_vpu_inst vpu; + struct vdec_h264_vsi vsi_ctx; + struct mtk_h264_dec_slice_param h264_slice_param; + + struct v4l2_h264_dpb_entry dpb[16]; +}; + +static int get_vdec_decode_parameters(struct vdec_h264_slice_inst *inst) +{ + const struct v4l2_ctrl_h264_decode_params *dec_params; + const struct v4l2_ctrl_h264_sps *sps; + const struct v4l2_ctrl_h264_pps *pps; + const struct v4l2_ctrl_h264_scaling_matrix *scaling_matrix; + struct mtk_h264_dec_slice_param *slice_param = &inst->h264_slice_param; + struct v4l2_h264_reflist_builder reflist_builder; + struct v4l2_h264_reference v4l2_p0_reflist[V4L2_H264_REF_LIST_LEN]; + struct v4l2_h264_reference v4l2_b0_reflist[V4L2_H264_REF_LIST_LEN]; + struct v4l2_h264_reference v4l2_b1_reflist[V4L2_H264_REF_LIST_LEN]; + u8 *p0_reflist = slice_param->decode_params.ref_pic_list_p0; + u8 *b0_reflist = slice_param->decode_params.ref_pic_list_b0; + u8 *b1_reflist = slice_param->decode_params.ref_pic_list_b1; + + dec_params = + mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_DECODE_PARAMS); + if (IS_ERR(dec_params)) + return PTR_ERR(dec_params); + + sps = mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_SPS); + if (IS_ERR(sps)) + return PTR_ERR(sps); + + pps = mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_PPS); + if (IS_ERR(pps)) + return PTR_ERR(pps); + + scaling_matrix = + mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_SCALING_MATRIX); + if (IS_ERR(scaling_matrix)) + return PTR_ERR(scaling_matrix); + + mtk_vdec_h264_update_dpb(dec_params, inst->dpb); + + mtk_vdec_h264_copy_sps_params(&slice_param->sps, sps); + mtk_vdec_h264_copy_pps_params(&slice_param->pps, pps); + mtk_vdec_h264_copy_scaling_matrix(&slice_param->scaling_matrix, scaling_matrix); + mtk_vdec_h264_copy_decode_params(&slice_param->decode_params, + dec_params, inst->dpb); + mtk_vdec_h264_fill_dpb_info(inst->ctx, &slice_param->decode_params, + slice_param->h264_dpb_info); + + /* Build the reference lists */ + v4l2_h264_init_reflist_builder(&reflist_builder, dec_params, sps, + inst->dpb); + v4l2_h264_build_p_ref_list(&reflist_builder, v4l2_p0_reflist); + v4l2_h264_build_b_ref_lists(&reflist_builder, v4l2_b0_reflist, + v4l2_b1_reflist); + + /* Adapt the built lists to the firmware's expectations */ + mtk_vdec_h264_get_ref_list(p0_reflist, v4l2_p0_reflist, reflist_builder.num_valid); + mtk_vdec_h264_get_ref_list(b0_reflist, v4l2_b0_reflist, reflist_builder.num_valid); + mtk_vdec_h264_get_ref_list(b1_reflist, v4l2_b1_reflist, reflist_builder.num_valid); + + memcpy(&inst->vsi_ctx.h264_slice_params, slice_param, + sizeof(inst->vsi_ctx.h264_slice_params)); + + return 0; +} + +static int allocate_predication_buf(struct vdec_h264_slice_inst *inst) +{ + int err; + + inst->pred_buf.size = BUF_PREDICTION_SZ; + err = mtk_vcodec_mem_alloc(inst->ctx, &inst->pred_buf); + if (err) { + mtk_vcodec_err(inst, "failed to allocate ppl buf"); + return err; + } + + inst->vsi_ctx.pred_buf_dma = inst->pred_buf.dma_addr; + return 0; +} + +static void free_predication_buf(struct vdec_h264_slice_inst *inst) +{ + struct mtk_vcodec_mem *mem = &inst->pred_buf; + + mtk_vcodec_debug_enter(inst); + + inst->vsi_ctx.pred_buf_dma = 0; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); +} + +static int alloc_mv_buf(struct vdec_h264_slice_inst *inst, + struct vdec_pic_info *pic) +{ + int i; + int err; + struct mtk_vcodec_mem *mem = NULL; + unsigned int buf_sz = mtk_vdec_h264_get_mv_buf_size(pic->buf_w, pic->buf_h); + + mtk_v4l2_debug(3, "size = 0x%x", buf_sz); + for (i = 0; i < H264_MAX_MV_NUM; i++) { + mem = &inst->mv_buf[i]; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + mem->size = buf_sz; + err = mtk_vcodec_mem_alloc(inst->ctx, mem); + if (err) { + mtk_vcodec_err(inst, "failed to allocate mv buf"); + return err; + } + inst->vsi_ctx.mv_buf_dma[i] = mem->dma_addr; + } + + return 0; +} + +static void free_mv_buf(struct vdec_h264_slice_inst *inst) +{ + int i; + struct mtk_vcodec_mem *mem; + + for (i = 0; i < H264_MAX_MV_NUM; i++) { + inst->vsi_ctx.mv_buf_dma[i] = 0; + mem = &inst->mv_buf[i]; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + } +} + +static void get_pic_info(struct vdec_h264_slice_inst *inst, + struct vdec_pic_info *pic) +{ + struct mtk_vcodec_ctx *ctx = inst->ctx; + + ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, VCODEC_DEC_ALIGNED_64); + ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, VCODEC_DEC_ALIGNED_64); + ctx->picinfo.fb_sz[0] = ctx->picinfo.buf_w * ctx->picinfo.buf_h; + ctx->picinfo.fb_sz[1] = ctx->picinfo.fb_sz[0] >> 1; + inst->vsi_ctx.dec.cap_num_planes = + ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes; + + *pic = ctx->picinfo; + mtk_vcodec_debug(inst, "pic(%d, %d), buf(%d, %d)", + ctx->picinfo.pic_w, ctx->picinfo.pic_h, + ctx->picinfo.buf_w, ctx->picinfo.buf_h); + mtk_vcodec_debug(inst, "Y/C(%d, %d)", ctx->picinfo.fb_sz[0], + ctx->picinfo.fb_sz[1]); + + if (ctx->last_decoded_picinfo.pic_w != ctx->picinfo.pic_w || + ctx->last_decoded_picinfo.pic_h != ctx->picinfo.pic_h) { + inst->vsi_ctx.dec.resolution_changed = true; + if (ctx->last_decoded_picinfo.buf_w != ctx->picinfo.buf_w || + ctx->last_decoded_picinfo.buf_h != ctx->picinfo.buf_h) + inst->vsi_ctx.dec.realloc_mv_buf = true; + + mtk_v4l2_debug(1, "ResChg: (%d %d) : old(%d, %d) -> new(%d, %d)", + inst->vsi_ctx.dec.resolution_changed, + inst->vsi_ctx.dec.realloc_mv_buf, + ctx->last_decoded_picinfo.pic_w, + ctx->last_decoded_picinfo.pic_h, + ctx->picinfo.pic_w, ctx->picinfo.pic_h); + } +} + +static void get_crop_info(struct vdec_h264_slice_inst *inst, struct v4l2_rect *cr) +{ + cr->left = inst->vsi_ctx.crop.left; + cr->top = inst->vsi_ctx.crop.top; + cr->width = inst->vsi_ctx.crop.width; + cr->height = inst->vsi_ctx.crop.height; + + mtk_vcodec_debug(inst, "l=%d, t=%d, w=%d, h=%d", + cr->left, cr->top, cr->width, cr->height); +} + +static void get_dpb_size(struct vdec_h264_slice_inst *inst, unsigned int *dpb_sz) +{ + *dpb_sz = inst->vsi_ctx.dec.dpb_sz; + mtk_vcodec_debug(inst, "sz=%d", *dpb_sz); +} + +static int vdec_h264_slice_init(struct mtk_vcodec_ctx *ctx) +{ + struct vdec_h264_slice_inst *inst; + int err; + + inst = kzalloc(sizeof(*inst), GFP_KERNEL); + if (!inst) + return -ENOMEM; + + inst->ctx = ctx; + + inst->vpu.id = SCP_IPI_VDEC_H264; + inst->vpu.ctx = ctx; + + err = vpu_dec_init(&inst->vpu); + if (err) { + mtk_vcodec_err(inst, "vdec_h264 init err=%d", err); + goto error_free_inst; + } + + memcpy(&inst->vsi_ctx, inst->vpu.vsi, sizeof(inst->vsi_ctx)); + inst->vsi_ctx.dec.resolution_changed = true; + inst->vsi_ctx.dec.realloc_mv_buf = true; + + err = allocate_predication_buf(inst); + if (err) + goto error_deinit; + + mtk_vcodec_debug(inst, "struct size = %zu,%zu,%zu,%zu\n", + sizeof(struct mtk_h264_sps_param), + sizeof(struct mtk_h264_pps_param), + sizeof(struct mtk_h264_dec_slice_param), + sizeof(struct mtk_h264_dpb_info)); + + mtk_vcodec_debug(inst, "H264 Instance >> %p", inst); + + ctx->drv_handle = inst; + return 0; + +error_deinit: + vpu_dec_deinit(&inst->vpu); + +error_free_inst: + kfree(inst); + return err; +} + +static void vdec_h264_slice_deinit(void *h_vdec) +{ + struct vdec_h264_slice_inst *inst = h_vdec; + + mtk_vcodec_debug_enter(inst); + + vpu_dec_deinit(&inst->vpu); + free_predication_buf(inst); + free_mv_buf(inst); + + kfree(inst); +} + +static int vdec_h264_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *unused, bool *res_chg) +{ + struct vdec_h264_slice_inst *inst = h_vdec; + const struct v4l2_ctrl_h264_decode_params *dec_params = + mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_DECODE_PARAMS); + struct vdec_vpu_inst *vpu = &inst->vpu; + struct mtk_video_dec_buf *src_buf_info; + struct mtk_video_dec_buf *dst_buf_info; + struct vdec_fb *fb; + u32 data[2]; + u64 y_fb_dma; + u64 c_fb_dma; + int err; + + inst->num_nalu++; + /* bs NULL means flush decoder */ + if (!bs) + return vpu_dec_reset(vpu); + + fb = inst->ctx->dev->vdec_pdata->get_cap_buffer(inst->ctx); + src_buf_info = container_of(bs, struct mtk_video_dec_buf, bs_buffer); + dst_buf_info = container_of(fb, struct mtk_video_dec_buf, frame_buffer); + + y_fb_dma = fb ? (u64)fb->base_y.dma_addr : 0; + c_fb_dma = fb ? (u64)fb->base_c.dma_addr : 0; + + mtk_vcodec_debug(inst, "+ [%d] FB y_dma=%llx c_dma=%llx va=%p", + inst->num_nalu, y_fb_dma, c_fb_dma, fb); + + inst->vsi_ctx.dec.bs_dma = (uint64_t)bs->dma_addr; + inst->vsi_ctx.dec.y_fb_dma = y_fb_dma; + inst->vsi_ctx.dec.c_fb_dma = c_fb_dma; + inst->vsi_ctx.dec.vdec_fb_va = (u64)(uintptr_t)fb; + + v4l2_m2m_buf_copy_metadata(&src_buf_info->m2m_buf.vb, + &dst_buf_info->m2m_buf.vb, true); + err = get_vdec_decode_parameters(inst); + if (err) + goto err_free_fb_out; + + data[0] = bs->size; + /* + * Reconstruct the first byte of the NAL unit, as the firmware requests + * that information to be passed even though it is present in the stream + * itself... + */ + data[1] = (dec_params->nal_ref_idc << 5) | + ((dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC) + ? 0x5 : 0x1); + + *res_chg = inst->vsi_ctx.dec.resolution_changed; + if (*res_chg) { + mtk_vcodec_debug(inst, "- resolution changed -"); + if (inst->vsi_ctx.dec.realloc_mv_buf) { + err = alloc_mv_buf(inst, &inst->ctx->picinfo); + inst->vsi_ctx.dec.realloc_mv_buf = false; + if (err) + goto err_free_fb_out; + } + *res_chg = false; + } + + memcpy(inst->vpu.vsi, &inst->vsi_ctx, sizeof(inst->vsi_ctx)); + err = vpu_dec_start(vpu, data, 2); + if (err) + goto err_free_fb_out; + + /* wait decoder done interrupt */ + err = mtk_vcodec_wait_for_done_ctx(inst->ctx, + MTK_INST_IRQ_RECEIVED, + WAIT_INTR_TIMEOUT_MS, 0); + if (err) + goto err_free_fb_out; + vpu_dec_end(vpu); + + memcpy(&inst->vsi_ctx, inst->vpu.vsi, sizeof(inst->vsi_ctx)); + mtk_vcodec_debug(inst, "\n - NALU[%d]", inst->num_nalu); + return 0; + +err_free_fb_out: + mtk_vcodec_err(inst, "\n - NALU[%d] err=%d -\n", inst->num_nalu, err); + return err; +} + +static int vdec_h264_slice_get_param(void *h_vdec, enum vdec_get_param_type type, void *out) +{ + struct vdec_h264_slice_inst *inst = h_vdec; + + switch (type) { + case GET_PARAM_PIC_INFO: + get_pic_info(inst, out); + break; + + case GET_PARAM_DPB_SIZE: + get_dpb_size(inst, out); + break; + + case GET_PARAM_CROP_INFO: + get_crop_info(inst, out); + break; + + default: + mtk_vcodec_err(inst, "invalid get parameter type=%d", type); + return -EINVAL; + } + + return 0; +} + +const struct vdec_common_if vdec_h264_slice_if = { + .init = vdec_h264_slice_init, + .decode = vdec_h264_slice_decode, + .get_param = vdec_h264_slice_get_param, + .deinit = vdec_h264_slice_deinit, +}; diff --git a/drivers/media/platform/mediatek/vcodec/vdec/vdec_h264_req_multi_if.c b/drivers/media/platform/mediatek/vcodec/vdec/vdec_h264_req_multi_if.c new file mode 100644 index 000000000..999ce7ee5 --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/vdec/vdec_h264_req_multi_if.c @@ -0,0 +1,855 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2022 MediaTek Inc. + * Author: Yunfei Dong <yunfei.dong@mediatek.com> + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <media/v4l2-h264.h> +#include <media/v4l2-mem2mem.h> +#include <media/videobuf2-dma-contig.h> + +#include "../mtk_vcodec_util.h" +#include "../mtk_vcodec_dec.h" +#include "../mtk_vcodec_intr.h" +#include "../vdec_drv_base.h" +#include "../vdec_drv_if.h" +#include "../vdec_vpu_if.h" +#include "vdec_h264_req_common.h" + +/** + * enum vdec_h264_core_dec_err_type - core decode error type + * + * @TRANS_BUFFER_FULL: trans buffer is full + * @SLICE_HEADER_FULL: slice header buffer is full + */ +enum vdec_h264_core_dec_err_type { + TRANS_BUFFER_FULL = 1, + SLICE_HEADER_FULL, +}; + +/** + * struct vdec_h264_slice_lat_dec_param - parameters for decode current frame + * + * @sps: h264 sps syntax parameters + * @pps: h264 pps syntax parameters + * @slice_header: h264 slice header syntax parameters + * @scaling_matrix: h264 scaling list parameters + * @decode_params: decoder parameters of each frame used for hardware decode + * @h264_dpb_info: dpb reference list + */ +struct vdec_h264_slice_lat_dec_param { + struct mtk_h264_sps_param sps; + struct mtk_h264_pps_param pps; + struct mtk_h264_slice_hd_param slice_header; + struct slice_api_h264_scaling_matrix scaling_matrix; + struct slice_api_h264_decode_param decode_params; + struct mtk_h264_dpb_info h264_dpb_info[V4L2_H264_NUM_DPB_ENTRIES]; +}; + +/** + * struct vdec_h264_slice_info - decode information + * + * @nal_info: nal info of current picture + * @timeout: Decode timeout: 1 timeout, 0 no timeount + * @bs_buf_size: bitstream size + * @bs_buf_addr: bitstream buffer dma address + * @y_fb_dma: Y frame buffer dma address + * @c_fb_dma: C frame buffer dma address + * @vdec_fb_va: VDEC frame buffer struct virtual address + * @crc: Used to check whether hardware's status is right + */ +struct vdec_h264_slice_info { + u16 nal_info; + u16 timeout; + u32 bs_buf_size; + u64 bs_buf_addr; + u64 y_fb_dma; + u64 c_fb_dma; + u64 vdec_fb_va; + u32 crc[8]; +}; + +/** + * struct vdec_h264_slice_vsi - shared memory for decode information exchange + * between SCP and Host. + * + * @wdma_err_addr: wdma error dma address + * @wdma_start_addr: wdma start dma address + * @wdma_end_addr: wdma end dma address + * @slice_bc_start_addr: slice bc start dma address + * @slice_bc_end_addr: slice bc end dma address + * @row_info_start_addr: row info start dma address + * @row_info_end_addr: row info end dma address + * @trans_start: trans start dma address + * @trans_end: trans end dma address + * @wdma_end_addr_offset: wdma end address offset + * + * @mv_buf_dma: HW working motion vector buffer + * dma address (AP-W, VPU-R) + * @dec: decode information (AP-R, VPU-W) + * @h264_slice_params: decode parameters for hw used + */ +struct vdec_h264_slice_vsi { + /* LAT dec addr */ + u64 wdma_err_addr; + u64 wdma_start_addr; + u64 wdma_end_addr; + u64 slice_bc_start_addr; + u64 slice_bc_end_addr; + u64 row_info_start_addr; + u64 row_info_end_addr; + u64 trans_start; + u64 trans_end; + u64 wdma_end_addr_offset; + + u64 mv_buf_dma[H264_MAX_MV_NUM]; + struct vdec_h264_slice_info dec; + struct vdec_h264_slice_lat_dec_param h264_slice_params; +}; + +/** + * struct vdec_h264_slice_share_info - shared information used to exchange + * message between lat and core + * + * @sps: sequence header information from user space + * @dec_params: decoder params from user space + * @h264_slice_params: decoder params used for hardware + * @trans_start: trans start dma address + * @trans_end: trans end dma address + * @nal_info: nal info of current picture + */ +struct vdec_h264_slice_share_info { + struct v4l2_ctrl_h264_sps sps; + struct v4l2_ctrl_h264_decode_params dec_params; + struct vdec_h264_slice_lat_dec_param h264_slice_params; + u64 trans_start; + u64 trans_end; + u16 nal_info; +}; + +/** + * struct vdec_h264_slice_inst - h264 decoder instance + * + * @slice_dec_num: how many picture be decoded + * @ctx: point to mtk_vcodec_ctx + * @pred_buf: HW working predication buffer + * @mv_buf: HW working motion vector buffer + * @vpu: VPU instance + * @vsi: vsi used for lat + * @vsi_core: vsi used for core + * + * @vsi_ctx: Local VSI data for this decoding context + * @h264_slice_param: the parameters that hardware use to decode + * + * @resolution_changed:resolution changed + * @realloc_mv_buf: reallocate mv buffer + * @cap_num_planes: number of capture queue plane + * + * @dpb: decoded picture buffer used to store reference + * buffer information + *@is_field_bitstream: is field bitstream + */ +struct vdec_h264_slice_inst { + unsigned int slice_dec_num; + struct mtk_vcodec_ctx *ctx; + struct mtk_vcodec_mem pred_buf; + struct mtk_vcodec_mem mv_buf[H264_MAX_MV_NUM]; + struct vdec_vpu_inst vpu; + struct vdec_h264_slice_vsi *vsi; + struct vdec_h264_slice_vsi *vsi_core; + + struct vdec_h264_slice_vsi vsi_ctx; + struct vdec_h264_slice_lat_dec_param h264_slice_param; + + unsigned int resolution_changed; + unsigned int realloc_mv_buf; + unsigned int cap_num_planes; + + struct v4l2_h264_dpb_entry dpb[16]; + bool is_field_bitstream; +}; + +static int vdec_h264_slice_fill_decode_parameters(struct vdec_h264_slice_inst *inst, + struct vdec_h264_slice_share_info *share_info) +{ + struct vdec_h264_slice_lat_dec_param *slice_param = &inst->vsi->h264_slice_params; + const struct v4l2_ctrl_h264_decode_params *dec_params; + const struct v4l2_ctrl_h264_scaling_matrix *src_matrix; + const struct v4l2_ctrl_h264_sps *sps; + const struct v4l2_ctrl_h264_pps *pps; + + dec_params = + mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_DECODE_PARAMS); + if (IS_ERR(dec_params)) + return PTR_ERR(dec_params); + + src_matrix = + mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_SCALING_MATRIX); + if (IS_ERR(src_matrix)) + return PTR_ERR(src_matrix); + + sps = mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_SPS); + if (IS_ERR(sps)) + return PTR_ERR(sps); + + pps = mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_PPS); + if (IS_ERR(pps)) + return PTR_ERR(pps); + + if (dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) { + mtk_vcodec_err(inst, "No support for H.264 field decoding."); + inst->is_field_bitstream = true; + return -EINVAL; + } + + mtk_vdec_h264_copy_sps_params(&slice_param->sps, sps); + mtk_vdec_h264_copy_pps_params(&slice_param->pps, pps); + mtk_vdec_h264_copy_scaling_matrix(&slice_param->scaling_matrix, src_matrix); + + memcpy(&share_info->sps, sps, sizeof(*sps)); + memcpy(&share_info->dec_params, dec_params, sizeof(*dec_params)); + + return 0; +} + +static int get_vdec_sig_decode_parameters(struct vdec_h264_slice_inst *inst) +{ + const struct v4l2_ctrl_h264_decode_params *dec_params; + const struct v4l2_ctrl_h264_sps *sps; + const struct v4l2_ctrl_h264_pps *pps; + const struct v4l2_ctrl_h264_scaling_matrix *scaling_matrix; + struct vdec_h264_slice_lat_dec_param *slice_param = &inst->h264_slice_param; + struct v4l2_h264_reflist_builder reflist_builder; + struct v4l2_h264_reference v4l2_p0_reflist[V4L2_H264_REF_LIST_LEN]; + struct v4l2_h264_reference v4l2_b0_reflist[V4L2_H264_REF_LIST_LEN]; + struct v4l2_h264_reference v4l2_b1_reflist[V4L2_H264_REF_LIST_LEN]; + u8 *p0_reflist = slice_param->decode_params.ref_pic_list_p0; + u8 *b0_reflist = slice_param->decode_params.ref_pic_list_b0; + u8 *b1_reflist = slice_param->decode_params.ref_pic_list_b1; + + dec_params = + mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_DECODE_PARAMS); + if (IS_ERR(dec_params)) + return PTR_ERR(dec_params); + + sps = mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_SPS); + if (IS_ERR(sps)) + return PTR_ERR(sps); + + pps = mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_PPS); + if (IS_ERR(pps)) + return PTR_ERR(pps); + + scaling_matrix = + mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_SCALING_MATRIX); + if (IS_ERR(scaling_matrix)) + return PTR_ERR(scaling_matrix); + + mtk_vdec_h264_update_dpb(dec_params, inst->dpb); + + mtk_vdec_h264_copy_sps_params(&slice_param->sps, sps); + mtk_vdec_h264_copy_pps_params(&slice_param->pps, pps); + mtk_vdec_h264_copy_scaling_matrix(&slice_param->scaling_matrix, scaling_matrix); + + mtk_vdec_h264_copy_decode_params(&slice_param->decode_params, dec_params, inst->dpb); + mtk_vdec_h264_fill_dpb_info(inst->ctx, &slice_param->decode_params, + slice_param->h264_dpb_info); + + /* Build the reference lists */ + v4l2_h264_init_reflist_builder(&reflist_builder, dec_params, sps, inst->dpb); + v4l2_h264_build_p_ref_list(&reflist_builder, v4l2_p0_reflist); + v4l2_h264_build_b_ref_lists(&reflist_builder, v4l2_b0_reflist, v4l2_b1_reflist); + + /* Adapt the built lists to the firmware's expectations */ + mtk_vdec_h264_get_ref_list(p0_reflist, v4l2_p0_reflist, reflist_builder.num_valid); + mtk_vdec_h264_get_ref_list(b0_reflist, v4l2_b0_reflist, reflist_builder.num_valid); + mtk_vdec_h264_get_ref_list(b1_reflist, v4l2_b1_reflist, reflist_builder.num_valid); + + memcpy(&inst->vsi_ctx.h264_slice_params, slice_param, + sizeof(inst->vsi_ctx.h264_slice_params)); + + return 0; +} + +static void vdec_h264_slice_fill_decode_reflist(struct vdec_h264_slice_inst *inst, + struct vdec_h264_slice_lat_dec_param *slice_param, + struct vdec_h264_slice_share_info *share_info) +{ + struct v4l2_ctrl_h264_decode_params *dec_params = &share_info->dec_params; + struct v4l2_ctrl_h264_sps *sps = &share_info->sps; + struct v4l2_h264_reflist_builder reflist_builder; + struct v4l2_h264_reference v4l2_p0_reflist[V4L2_H264_REF_LIST_LEN]; + struct v4l2_h264_reference v4l2_b0_reflist[V4L2_H264_REF_LIST_LEN]; + struct v4l2_h264_reference v4l2_b1_reflist[V4L2_H264_REF_LIST_LEN]; + u8 *p0_reflist = slice_param->decode_params.ref_pic_list_p0; + u8 *b0_reflist = slice_param->decode_params.ref_pic_list_b0; + u8 *b1_reflist = slice_param->decode_params.ref_pic_list_b1; + + mtk_vdec_h264_update_dpb(dec_params, inst->dpb); + + mtk_vdec_h264_copy_decode_params(&slice_param->decode_params, dec_params, + inst->dpb); + mtk_vdec_h264_fill_dpb_info(inst->ctx, &slice_param->decode_params, + slice_param->h264_dpb_info); + + mtk_v4l2_debug(3, "cur poc = %d\n", dec_params->bottom_field_order_cnt); + /* Build the reference lists */ + v4l2_h264_init_reflist_builder(&reflist_builder, dec_params, sps, + inst->dpb); + v4l2_h264_build_p_ref_list(&reflist_builder, v4l2_p0_reflist); + v4l2_h264_build_b_ref_lists(&reflist_builder, v4l2_b0_reflist, v4l2_b1_reflist); + + /* Adapt the built lists to the firmware's expectations */ + mtk_vdec_h264_get_ref_list(p0_reflist, v4l2_p0_reflist, reflist_builder.num_valid); + mtk_vdec_h264_get_ref_list(b0_reflist, v4l2_b0_reflist, reflist_builder.num_valid); + mtk_vdec_h264_get_ref_list(b1_reflist, v4l2_b1_reflist, reflist_builder.num_valid); +} + +static int vdec_h264_slice_alloc_mv_buf(struct vdec_h264_slice_inst *inst, + struct vdec_pic_info *pic) +{ + unsigned int buf_sz = mtk_vdec_h264_get_mv_buf_size(pic->buf_w, pic->buf_h); + struct mtk_vcodec_mem *mem; + int i, err; + + mtk_v4l2_debug(3, "size = 0x%x", buf_sz); + for (i = 0; i < H264_MAX_MV_NUM; i++) { + mem = &inst->mv_buf[i]; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + mem->size = buf_sz; + err = mtk_vcodec_mem_alloc(inst->ctx, mem); + if (err) { + mtk_vcodec_err(inst, "failed to allocate mv buf"); + return err; + } + } + + return 0; +} + +static void vdec_h264_slice_free_mv_buf(struct vdec_h264_slice_inst *inst) +{ + int i; + struct mtk_vcodec_mem *mem; + + for (i = 0; i < H264_MAX_MV_NUM; i++) { + mem = &inst->mv_buf[i]; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + } +} + +static void vdec_h264_slice_get_pic_info(struct vdec_h264_slice_inst *inst) +{ + struct mtk_vcodec_ctx *ctx = inst->ctx; + u32 data[3]; + + data[0] = ctx->picinfo.pic_w; + data[1] = ctx->picinfo.pic_h; + data[2] = ctx->capture_fourcc; + vpu_dec_get_param(&inst->vpu, data, 3, GET_PARAM_PIC_INFO); + + ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, VCODEC_DEC_ALIGNED_64); + ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, VCODEC_DEC_ALIGNED_64); + ctx->picinfo.fb_sz[0] = inst->vpu.fb_sz[0]; + ctx->picinfo.fb_sz[1] = inst->vpu.fb_sz[1]; + inst->cap_num_planes = + ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes; + + mtk_vcodec_debug(inst, "pic(%d, %d), buf(%d, %d)", + ctx->picinfo.pic_w, ctx->picinfo.pic_h, + ctx->picinfo.buf_w, ctx->picinfo.buf_h); + mtk_vcodec_debug(inst, "Y/C(%d, %d)", ctx->picinfo.fb_sz[0], + ctx->picinfo.fb_sz[1]); + + if (ctx->last_decoded_picinfo.pic_w != ctx->picinfo.pic_w || + ctx->last_decoded_picinfo.pic_h != ctx->picinfo.pic_h) { + inst->resolution_changed = true; + if (ctx->last_decoded_picinfo.buf_w != ctx->picinfo.buf_w || + ctx->last_decoded_picinfo.buf_h != ctx->picinfo.buf_h) + inst->realloc_mv_buf = true; + + mtk_v4l2_debug(1, "resChg: (%d %d) : old(%d, %d) -> new(%d, %d)", + inst->resolution_changed, + inst->realloc_mv_buf, + ctx->last_decoded_picinfo.pic_w, + ctx->last_decoded_picinfo.pic_h, + ctx->picinfo.pic_w, ctx->picinfo.pic_h); + } +} + +static void vdec_h264_slice_get_crop_info(struct vdec_h264_slice_inst *inst, + struct v4l2_rect *cr) +{ + cr->left = 0; + cr->top = 0; + cr->width = inst->ctx->picinfo.pic_w; + cr->height = inst->ctx->picinfo.pic_h; + + mtk_vcodec_debug(inst, "l=%d, t=%d, w=%d, h=%d", + cr->left, cr->top, cr->width, cr->height); +} + +static int vdec_h264_slice_init(struct mtk_vcodec_ctx *ctx) +{ + struct vdec_h264_slice_inst *inst; + int err, vsi_size; + + inst = kzalloc(sizeof(*inst), GFP_KERNEL); + if (!inst) + return -ENOMEM; + + inst->ctx = ctx; + + inst->vpu.id = SCP_IPI_VDEC_LAT; + inst->vpu.core_id = SCP_IPI_VDEC_CORE; + inst->vpu.ctx = ctx; + inst->vpu.codec_type = ctx->current_codec; + inst->vpu.capture_type = ctx->capture_fourcc; + + err = vpu_dec_init(&inst->vpu); + if (err) { + mtk_vcodec_err(inst, "vdec_h264 init err=%d", err); + goto error_free_inst; + } + + vsi_size = round_up(sizeof(struct vdec_h264_slice_vsi), VCODEC_DEC_ALIGNED_64); + inst->vsi = inst->vpu.vsi; + inst->vsi_core = + (struct vdec_h264_slice_vsi *)(((char *)inst->vpu.vsi) + vsi_size); + inst->resolution_changed = true; + inst->realloc_mv_buf = true; + + mtk_vcodec_debug(inst, "lat struct size = %d,%d,%d,%d vsi: %d\n", + (int)sizeof(struct mtk_h264_sps_param), + (int)sizeof(struct mtk_h264_pps_param), + (int)sizeof(struct vdec_h264_slice_lat_dec_param), + (int)sizeof(struct mtk_h264_dpb_info), + vsi_size); + mtk_vcodec_debug(inst, "lat H264 instance >> %p, codec_type = 0x%x", + inst, inst->vpu.codec_type); + + ctx->drv_handle = inst; + return 0; + +error_free_inst: + kfree(inst); + return err; +} + +static void vdec_h264_slice_deinit(void *h_vdec) +{ + struct vdec_h264_slice_inst *inst = h_vdec; + + mtk_vcodec_debug_enter(inst); + + vpu_dec_deinit(&inst->vpu); + vdec_h264_slice_free_mv_buf(inst); + vdec_msg_queue_deinit(&inst->ctx->msg_queue, inst->ctx); + + kfree(inst); +} + +static int vdec_h264_slice_core_decode(struct vdec_lat_buf *lat_buf) +{ + struct vdec_fb *fb; + u64 vdec_fb_va; + u64 y_fb_dma, c_fb_dma; + int err, timeout, i; + struct mtk_vcodec_ctx *ctx = lat_buf->ctx; + struct vdec_h264_slice_inst *inst = ctx->drv_handle; + struct vb2_v4l2_buffer *vb2_v4l2; + struct vdec_h264_slice_share_info *share_info = lat_buf->private_data; + struct mtk_vcodec_mem *mem; + struct vdec_vpu_inst *vpu = &inst->vpu; + + mtk_vcodec_debug(inst, "[h264-core] vdec_h264 core decode"); + memcpy(&inst->vsi_core->h264_slice_params, &share_info->h264_slice_params, + sizeof(share_info->h264_slice_params)); + + fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx); + if (!fb) { + err = -EBUSY; + mtk_vcodec_err(inst, "fb buffer is NULL"); + goto vdec_dec_end; + } + + vdec_fb_va = (unsigned long)fb; + y_fb_dma = (u64)fb->base_y.dma_addr; + if (ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 1) + c_fb_dma = + y_fb_dma + inst->ctx->picinfo.buf_w * inst->ctx->picinfo.buf_h; + else + c_fb_dma = (u64)fb->base_c.dma_addr; + + mtk_vcodec_debug(inst, "[h264-core] y/c addr = 0x%llx 0x%llx", y_fb_dma, + c_fb_dma); + + inst->vsi_core->dec.y_fb_dma = y_fb_dma; + inst->vsi_core->dec.c_fb_dma = c_fb_dma; + inst->vsi_core->dec.vdec_fb_va = vdec_fb_va; + inst->vsi_core->dec.nal_info = share_info->nal_info; + inst->vsi_core->wdma_start_addr = + lat_buf->ctx->msg_queue.wdma_addr.dma_addr; + inst->vsi_core->wdma_end_addr = + lat_buf->ctx->msg_queue.wdma_addr.dma_addr + + lat_buf->ctx->msg_queue.wdma_addr.size; + inst->vsi_core->wdma_err_addr = lat_buf->wdma_err_addr.dma_addr; + inst->vsi_core->slice_bc_start_addr = lat_buf->slice_bc_addr.dma_addr; + inst->vsi_core->slice_bc_end_addr = lat_buf->slice_bc_addr.dma_addr + + lat_buf->slice_bc_addr.size; + inst->vsi_core->trans_start = share_info->trans_start; + inst->vsi_core->trans_end = share_info->trans_end; + for (i = 0; i < H264_MAX_MV_NUM; i++) { + mem = &inst->mv_buf[i]; + inst->vsi_core->mv_buf_dma[i] = mem->dma_addr; + } + + vb2_v4l2 = v4l2_m2m_next_dst_buf(ctx->m2m_ctx); + v4l2_m2m_buf_copy_metadata(&lat_buf->ts_info, vb2_v4l2, true); + + vdec_h264_slice_fill_decode_reflist(inst, &inst->vsi_core->h264_slice_params, + share_info); + + err = vpu_dec_core(vpu); + if (err) { + mtk_vcodec_err(inst, "core decode err=%d", err); + goto vdec_dec_end; + } + + /* wait decoder done interrupt */ + timeout = mtk_vcodec_wait_for_done_ctx(inst->ctx, MTK_INST_IRQ_RECEIVED, + WAIT_INTR_TIMEOUT_MS, MTK_VDEC_CORE); + if (timeout) + mtk_vcodec_err(inst, "core decode timeout: pic_%d", + ctx->decoded_frame_cnt); + inst->vsi_core->dec.timeout = !!timeout; + + vpu_dec_core_end(vpu); + mtk_vcodec_debug(inst, "pic[%d] crc: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x", + ctx->decoded_frame_cnt, + inst->vsi_core->dec.crc[0], inst->vsi_core->dec.crc[1], + inst->vsi_core->dec.crc[2], inst->vsi_core->dec.crc[3], + inst->vsi_core->dec.crc[4], inst->vsi_core->dec.crc[5], + inst->vsi_core->dec.crc[6], inst->vsi_core->dec.crc[7]); + +vdec_dec_end: + vdec_msg_queue_update_ube_rptr(&lat_buf->ctx->msg_queue, share_info->trans_end); + ctx->dev->vdec_pdata->cap_to_disp(ctx, !!err, lat_buf->src_buf_req); + mtk_vcodec_debug(inst, "core decode done err=%d", err); + ctx->decoded_frame_cnt++; + return 0; +} + +static void vdec_h264_insert_startcode(struct mtk_vcodec_dev *vcodec_dev, unsigned char *buf, + size_t *bs_size, struct mtk_h264_pps_param *pps) +{ + struct device *dev = &vcodec_dev->plat_dev->dev; + + /* Need to add pending data at the end of bitstream when bs_sz is small than + * 20 bytes for cavlc bitstream, or lat will decode fail. This pending data is + * useful for mt8192 and mt8195 platform. + * + * cavlc bitstream when entropy_coding_mode_flag is false. + */ + if (pps->entropy_coding_mode_flag || *bs_size > 20 || + !(of_device_is_compatible(dev->of_node, "mediatek,mt8192-vcodec-dec") || + of_device_is_compatible(dev->of_node, "mediatek,mt8195-vcodec-dec"))) + return; + + buf[*bs_size] = 0; + buf[*bs_size + 1] = 0; + buf[*bs_size + 2] = 1; + buf[*bs_size + 3] = 0xff; + (*bs_size) += 4; +} + +static int vdec_h264_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *fb, bool *res_chg) +{ + struct vdec_h264_slice_inst *inst = h_vdec; + struct vdec_vpu_inst *vpu = &inst->vpu; + struct mtk_video_dec_buf *src_buf_info; + int nal_start_idx, err, timeout = 0, i; + unsigned int data[2]; + struct vdec_lat_buf *lat_buf; + struct vdec_h264_slice_share_info *share_info; + unsigned char *buf; + struct mtk_vcodec_mem *mem; + + if (vdec_msg_queue_init(&inst->ctx->msg_queue, inst->ctx, + vdec_h264_slice_core_decode, + sizeof(*share_info))) + return -ENOMEM; + + /* bs NULL means flush decoder */ + if (!bs) { + vdec_msg_queue_wait_lat_buf_full(&inst->ctx->msg_queue); + return vpu_dec_reset(vpu); + } + + if (inst->is_field_bitstream) + return -EINVAL; + + lat_buf = vdec_msg_queue_dqbuf(&inst->ctx->msg_queue.lat_ctx); + if (!lat_buf) { + mtk_vcodec_err(inst, "failed to get lat buffer"); + return -EAGAIN; + } + share_info = lat_buf->private_data; + src_buf_info = container_of(bs, struct mtk_video_dec_buf, bs_buffer); + + buf = (unsigned char *)bs->va; + nal_start_idx = mtk_vdec_h264_find_start_code(buf, bs->size); + if (nal_start_idx < 0) { + err = -EINVAL; + goto err_free_fb_out; + } + + inst->vsi->dec.nal_info = buf[nal_start_idx]; + lat_buf->src_buf_req = src_buf_info->m2m_buf.vb.vb2_buf.req_obj.req; + v4l2_m2m_buf_copy_metadata(&src_buf_info->m2m_buf.vb, &lat_buf->ts_info, true); + + err = vdec_h264_slice_fill_decode_parameters(inst, share_info); + if (err) + goto err_free_fb_out; + + vdec_h264_insert_startcode(inst->ctx->dev, buf, &bs->size, + &share_info->h264_slice_params.pps); + + inst->vsi->dec.bs_buf_addr = (uint64_t)bs->dma_addr; + inst->vsi->dec.bs_buf_size = bs->size; + + *res_chg = inst->resolution_changed; + if (inst->resolution_changed) { + mtk_vcodec_debug(inst, "- resolution changed -"); + if (inst->realloc_mv_buf) { + err = vdec_h264_slice_alloc_mv_buf(inst, &inst->ctx->picinfo); + inst->realloc_mv_buf = false; + if (err) + goto err_free_fb_out; + } + inst->resolution_changed = false; + } + for (i = 0; i < H264_MAX_MV_NUM; i++) { + mem = &inst->mv_buf[i]; + inst->vsi->mv_buf_dma[i] = mem->dma_addr; + } + inst->vsi->wdma_start_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr; + inst->vsi->wdma_end_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr + + lat_buf->ctx->msg_queue.wdma_addr.size; + inst->vsi->wdma_err_addr = lat_buf->wdma_err_addr.dma_addr; + inst->vsi->slice_bc_start_addr = lat_buf->slice_bc_addr.dma_addr; + inst->vsi->slice_bc_end_addr = lat_buf->slice_bc_addr.dma_addr + + lat_buf->slice_bc_addr.size; + + inst->vsi->trans_end = inst->ctx->msg_queue.wdma_rptr_addr; + inst->vsi->trans_start = inst->ctx->msg_queue.wdma_wptr_addr; + mtk_vcodec_debug(inst, "lat:trans(0x%llx 0x%llx) err:0x%llx", + inst->vsi->wdma_start_addr, + inst->vsi->wdma_end_addr, + inst->vsi->wdma_err_addr); + + mtk_vcodec_debug(inst, "slice(0x%llx 0x%llx) rprt((0x%llx 0x%llx))", + inst->vsi->slice_bc_start_addr, + inst->vsi->slice_bc_end_addr, + inst->vsi->trans_start, + inst->vsi->trans_end); + err = vpu_dec_start(vpu, data, 2); + if (err) { + mtk_vcodec_debug(inst, "lat decode err: %d", err); + goto err_free_fb_out; + } + + share_info->trans_end = inst->ctx->msg_queue.wdma_addr.dma_addr + + inst->vsi->wdma_end_addr_offset; + share_info->trans_start = inst->ctx->msg_queue.wdma_wptr_addr; + share_info->nal_info = inst->vsi->dec.nal_info; + + if (IS_VDEC_INNER_RACING(inst->ctx->dev->dec_capability)) { + memcpy(&share_info->h264_slice_params, &inst->vsi->h264_slice_params, + sizeof(share_info->h264_slice_params)); + vdec_msg_queue_qbuf(&inst->ctx->dev->msg_queue_core_ctx, lat_buf); + } + + /* wait decoder done interrupt */ + timeout = mtk_vcodec_wait_for_done_ctx(inst->ctx, MTK_INST_IRQ_RECEIVED, + WAIT_INTR_TIMEOUT_MS, MTK_VDEC_LAT0); + if (timeout) + mtk_vcodec_err(inst, "lat decode timeout: pic_%d", inst->slice_dec_num); + inst->vsi->dec.timeout = !!timeout; + + err = vpu_dec_end(vpu); + if (err == SLICE_HEADER_FULL || err == TRANS_BUFFER_FULL) { + if (!IS_VDEC_INNER_RACING(inst->ctx->dev->dec_capability)) + vdec_msg_queue_qbuf(&inst->ctx->msg_queue.lat_ctx, lat_buf); + inst->slice_dec_num++; + mtk_vcodec_err(inst, "lat dec fail: pic_%d err:%d", inst->slice_dec_num, err); + return -EINVAL; + } + + share_info->trans_end = inst->ctx->msg_queue.wdma_addr.dma_addr + + inst->vsi->wdma_end_addr_offset; + vdec_msg_queue_update_ube_wptr(&lat_buf->ctx->msg_queue, share_info->trans_end); + + if (!IS_VDEC_INNER_RACING(inst->ctx->dev->dec_capability)) { + memcpy(&share_info->h264_slice_params, &inst->vsi->h264_slice_params, + sizeof(share_info->h264_slice_params)); + vdec_msg_queue_qbuf(&inst->ctx->dev->msg_queue_core_ctx, lat_buf); + } + mtk_vcodec_debug(inst, "dec num: %d lat crc: 0x%x 0x%x 0x%x", inst->slice_dec_num, + inst->vsi->dec.crc[0], inst->vsi->dec.crc[1], inst->vsi->dec.crc[2]); + + inst->slice_dec_num++; + return 0; +err_free_fb_out: + vdec_msg_queue_qbuf(&inst->ctx->msg_queue.lat_ctx, lat_buf); + mtk_vcodec_err(inst, "slice dec number: %d err: %d", inst->slice_dec_num, err); + return err; +} + +static int vdec_h264_slice_single_decode(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *unused, bool *res_chg) +{ + struct vdec_h264_slice_inst *inst = h_vdec; + struct vdec_vpu_inst *vpu = &inst->vpu; + struct mtk_video_dec_buf *src_buf_info, *dst_buf_info; + struct vdec_fb *fb; + unsigned char *buf; + unsigned int data[2], i; + u64 y_fb_dma, c_fb_dma; + struct mtk_vcodec_mem *mem; + int err, nal_start_idx; + + /* bs NULL means flush decoder */ + if (!bs) + return vpu_dec_reset(vpu); + + fb = inst->ctx->dev->vdec_pdata->get_cap_buffer(inst->ctx); + src_buf_info = container_of(bs, struct mtk_video_dec_buf, bs_buffer); + dst_buf_info = container_of(fb, struct mtk_video_dec_buf, frame_buffer); + + y_fb_dma = fb ? (u64)fb->base_y.dma_addr : 0; + c_fb_dma = fb ? (u64)fb->base_c.dma_addr : 0; + mtk_vcodec_debug(inst, "[h264-dec] [%d] y_dma=%llx c_dma=%llx", + inst->ctx->decoded_frame_cnt, y_fb_dma, c_fb_dma); + + inst->vsi_ctx.dec.bs_buf_addr = (u64)bs->dma_addr; + inst->vsi_ctx.dec.bs_buf_size = bs->size; + inst->vsi_ctx.dec.y_fb_dma = y_fb_dma; + inst->vsi_ctx.dec.c_fb_dma = c_fb_dma; + inst->vsi_ctx.dec.vdec_fb_va = (u64)(uintptr_t)fb; + + v4l2_m2m_buf_copy_metadata(&src_buf_info->m2m_buf.vb, + &dst_buf_info->m2m_buf.vb, true); + err = get_vdec_sig_decode_parameters(inst); + if (err) + goto err_free_fb_out; + + buf = (unsigned char *)bs->va; + nal_start_idx = mtk_vdec_h264_find_start_code(buf, bs->size); + if (nal_start_idx < 0) { + err = -EINVAL; + goto err_free_fb_out; + } + inst->vsi_ctx.dec.nal_info = buf[nal_start_idx]; + + *res_chg = inst->resolution_changed; + if (inst->resolution_changed) { + mtk_vcodec_debug(inst, "- resolution changed -"); + if (inst->realloc_mv_buf) { + err = vdec_h264_slice_alloc_mv_buf(inst, &inst->ctx->picinfo); + inst->realloc_mv_buf = false; + if (err) + goto err_free_fb_out; + } + inst->resolution_changed = false; + + for (i = 0; i < H264_MAX_MV_NUM; i++) { + mem = &inst->mv_buf[i]; + inst->vsi_ctx.mv_buf_dma[i] = mem->dma_addr; + } + } + + memcpy(inst->vpu.vsi, &inst->vsi_ctx, sizeof(inst->vsi_ctx)); + err = vpu_dec_start(vpu, data, 2); + if (err) + goto err_free_fb_out; + + /* wait decoder done interrupt */ + err = mtk_vcodec_wait_for_done_ctx(inst->ctx, MTK_INST_IRQ_RECEIVED, + WAIT_INTR_TIMEOUT_MS, MTK_VDEC_CORE); + if (err) + mtk_vcodec_err(inst, "decode timeout: pic_%d", + inst->ctx->decoded_frame_cnt); + + inst->vsi->dec.timeout = !!err; + err = vpu_dec_end(vpu); + if (err) + goto err_free_fb_out; + + memcpy(&inst->vsi_ctx, inst->vpu.vsi, sizeof(inst->vsi_ctx)); + mtk_vcodec_debug(inst, "pic[%d] crc: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x", + inst->ctx->decoded_frame_cnt, + inst->vsi_ctx.dec.crc[0], inst->vsi_ctx.dec.crc[1], + inst->vsi_ctx.dec.crc[2], inst->vsi_ctx.dec.crc[3], + inst->vsi_ctx.dec.crc[4], inst->vsi_ctx.dec.crc[5], + inst->vsi_ctx.dec.crc[6], inst->vsi_ctx.dec.crc[7]); + + inst->ctx->decoded_frame_cnt++; + return 0; + +err_free_fb_out: + mtk_vcodec_err(inst, "dec frame number: %d err: %d", + inst->ctx->decoded_frame_cnt, err); + return err; +} + +static int vdec_h264_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *unused, bool *res_chg) +{ + struct vdec_h264_slice_inst *inst = h_vdec; + int ret; + + if (!h_vdec) + return -EINVAL; + + if (inst->ctx->dev->vdec_pdata->hw_arch == MTK_VDEC_PURE_SINGLE_CORE) + ret = vdec_h264_slice_single_decode(h_vdec, bs, unused, res_chg); + else + ret = vdec_h264_slice_lat_decode(h_vdec, bs, unused, res_chg); + + return ret; +} + +static int vdec_h264_slice_get_param(void *h_vdec, enum vdec_get_param_type type, + void *out) +{ + struct vdec_h264_slice_inst *inst = h_vdec; + + switch (type) { + case GET_PARAM_PIC_INFO: + vdec_h264_slice_get_pic_info(inst); + break; + case GET_PARAM_DPB_SIZE: + *(unsigned int *)out = 6; + break; + case GET_PARAM_CROP_INFO: + vdec_h264_slice_get_crop_info(inst, out); + break; + default: + mtk_vcodec_err(inst, "invalid get parameter type=%d", type); + return -EINVAL; + } + return 0; +} + +const struct vdec_common_if vdec_h264_slice_multi_if = { + .init = vdec_h264_slice_init, + .decode = vdec_h264_slice_decode, + .get_param = vdec_h264_slice_get_param, + .deinit = vdec_h264_slice_deinit, +}; diff --git a/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp8_if.c b/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp8_if.c new file mode 100644 index 000000000..88c046731 --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp8_if.c @@ -0,0 +1,616 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2016 MediaTek Inc. + * Author: Jungchang Tsao <jungchang.tsao@mediatek.com> + * PC Chen <pc.chen@mediatek.com> + */ + +#include <linux/slab.h> +#include "../vdec_drv_if.h" +#include "../mtk_vcodec_util.h" +#include "../mtk_vcodec_dec.h" +#include "../mtk_vcodec_intr.h" +#include "../vdec_vpu_if.h" +#include "../vdec_drv_base.h" + +/* Decoding picture buffer size (3 reference frames plus current frame) */ +#define VP8_DPB_SIZE 4 + +/* HW working buffer size (bytes) */ +#define VP8_WORKING_BUF_SZ (45 * 4096) + +/* HW control register address */ +#define VP8_SEGID_DRAM_ADDR 0x3c +#define VP8_HW_VLD_ADDR 0x93C +#define VP8_HW_VLD_VALUE 0x940 +#define VP8_BSASET 0x100 +#define VP8_BSDSET 0x104 +#define VP8_RW_CKEN_SET 0x0 +#define VP8_RW_DCM_CON 0x18 +#define VP8_WO_VLD_SRST 0x108 +#define VP8_RW_MISC_SYS_SEL 0x84 +#define VP8_RW_MISC_SPEC_CON 0xC8 +#define VP8_WO_VLD_SRST 0x108 +#define VP8_RW_VP8_CTRL 0xA4 +#define VP8_RW_MISC_DCM_CON 0xEC +#define VP8_RW_MISC_SRST 0xF4 +#define VP8_RW_MISC_FUNC_CON 0xCC + +#define VP8_MAX_FRM_BUF_NUM 5 +#define VP8_MAX_FRM_BUF_NODE_NUM (VP8_MAX_FRM_BUF_NUM * 2) + +/* required buffer size (bytes) to store decode information */ +#define VP8_HW_SEGMENT_DATA_SZ 272 +#define VP8_HW_SEGMENT_UINT 4 + +#define VP8_DEC_TABLE_PROC_LOOP 96 +#define VP8_DEC_TABLE_UNIT 3 +#define VP8_DEC_TABLE_SZ 300 +#define VP8_DEC_TABLE_OFFSET 2 +#define VP8_DEC_TABLE_RW_UNIT 4 + +/** + * struct vdec_vp8_dec_info - decode misc information + * @working_buf_dma : working buffer dma address + * @prev_y_dma : previous decoded frame buffer Y plane address + * @cur_y_fb_dma : current plane Y frame buffer dma address + * @cur_c_fb_dma : current plane C frame buffer dma address + * @bs_dma : bitstream dma address + * @bs_sz : bitstream size + * @resolution_changed: resolution change flag 1 - changed, 0 - not change + * @show_frame : display this frame or not + * @wait_key_frame : wait key frame coming + */ +struct vdec_vp8_dec_info { + uint64_t working_buf_dma; + uint64_t prev_y_dma; + uint64_t cur_y_fb_dma; + uint64_t cur_c_fb_dma; + uint64_t bs_dma; + uint32_t bs_sz; + uint32_t resolution_changed; + uint32_t show_frame; + uint32_t wait_key_frame; +}; + +/** + * struct vdec_vp8_vsi - VPU shared information + * @dec : decoding information + * @pic : picture information + * @dec_table : decoder coefficient table + * @segment_buf : segmentation buffer + * @load_data : flag to indicate reload decode data + */ +struct vdec_vp8_vsi { + struct vdec_vp8_dec_info dec; + struct vdec_pic_info pic; + uint32_t dec_table[VP8_DEC_TABLE_SZ]; + uint32_t segment_buf[VP8_HW_SEGMENT_DATA_SZ][VP8_HW_SEGMENT_UINT]; + uint32_t load_data; +}; + +/** + * struct vdec_vp8_hw_reg_base - HW register base + * @sys : base address for sys + * @misc : base address for misc + * @ld : base address for ld + * @top : base address for top + * @cm : base address for cm + * @hwd : base address for hwd + * @hwb : base address for hwb + */ +struct vdec_vp8_hw_reg_base { + void __iomem *sys; + void __iomem *misc; + void __iomem *ld; + void __iomem *top; + void __iomem *cm; + void __iomem *hwd; + void __iomem *hwb; +}; + +/** + * struct vdec_vp8_vpu_inst - VPU instance for VP8 decode + * @wq_hd : Wait queue to wait VPU message ack + * @signaled : 1 - Host has received ack message from VPU, 0 - not receive + * @failure : VPU execution result status 0 - success, others - fail + * @inst_addr : VPU decoder instance address + */ +struct vdec_vp8_vpu_inst { + wait_queue_head_t wq_hd; + int signaled; + int failure; + uint32_t inst_addr; +}; + +/* frame buffer (fb) list + * [available_fb_node_list] - decode fb are initialized to 0 and populated in + * [fb_use_list] - fb is set after decode and is moved to this list + * [fb_free_list] - fb is not needed for reference will be moved from + * [fb_use_list] to [fb_free_list] and + * once user remove fb from [fb_free_list], + * it is circulated back to [available_fb_node_list] + * [fb_disp_list] - fb is set after decode and is moved to this list + * once user remove fb from [fb_disp_list] it is + * circulated back to [available_fb_node_list] + */ + +/** + * struct vdec_vp8_inst - VP8 decoder instance + * @cur_fb : current frame buffer + * @dec_fb : decode frame buffer node + * @available_fb_node_list : list to store available frame buffer node + * @fb_use_list : list to store frame buffer in use + * @fb_free_list : list to store free frame buffer + * @fb_disp_list : list to store display ready frame buffer + * @working_buf : HW decoder working buffer + * @reg_base : HW register base address + * @frm_cnt : decode frame count + * @ctx : V4L2 context + * @vpu : VPU instance for decoder + * @vsi : VPU share information + */ +struct vdec_vp8_inst { + struct vdec_fb *cur_fb; + struct vdec_fb_node dec_fb[VP8_MAX_FRM_BUF_NODE_NUM]; + struct list_head available_fb_node_list; + struct list_head fb_use_list; + struct list_head fb_free_list; + struct list_head fb_disp_list; + struct mtk_vcodec_mem working_buf; + struct vdec_vp8_hw_reg_base reg_base; + unsigned int frm_cnt; + struct mtk_vcodec_ctx *ctx; + struct vdec_vpu_inst vpu; + struct vdec_vp8_vsi *vsi; +}; + +static void get_hw_reg_base(struct vdec_vp8_inst *inst) +{ + inst->reg_base.top = mtk_vcodec_get_reg_addr(inst->ctx, VDEC_TOP); + inst->reg_base.cm = mtk_vcodec_get_reg_addr(inst->ctx, VDEC_CM); + inst->reg_base.hwd = mtk_vcodec_get_reg_addr(inst->ctx, VDEC_HWD); + inst->reg_base.sys = mtk_vcodec_get_reg_addr(inst->ctx, VDEC_SYS); + inst->reg_base.misc = mtk_vcodec_get_reg_addr(inst->ctx, VDEC_MISC); + inst->reg_base.ld = mtk_vcodec_get_reg_addr(inst->ctx, VDEC_LD); + inst->reg_base.hwb = mtk_vcodec_get_reg_addr(inst->ctx, VDEC_HWB); +} + +static void write_hw_segmentation_data(struct vdec_vp8_inst *inst) +{ + int i, j; + u32 seg_id_addr; + u32 val; + void __iomem *cm = inst->reg_base.cm; + struct vdec_vp8_vsi *vsi = inst->vsi; + + seg_id_addr = readl(inst->reg_base.top + VP8_SEGID_DRAM_ADDR) >> 4; + + for (i = 0; i < ARRAY_SIZE(vsi->segment_buf); i++) { + for (j = ARRAY_SIZE(vsi->segment_buf[i]) - 1; j >= 0; j--) { + val = (1 << 16) + ((seg_id_addr + i) << 2) + j; + writel(val, cm + VP8_HW_VLD_ADDR); + + val = vsi->segment_buf[i][j]; + writel(val, cm + VP8_HW_VLD_VALUE); + } + } +} + +static void read_hw_segmentation_data(struct vdec_vp8_inst *inst) +{ + int i, j; + u32 seg_id_addr; + u32 val; + void __iomem *cm = inst->reg_base.cm; + struct vdec_vp8_vsi *vsi = inst->vsi; + + seg_id_addr = readl(inst->reg_base.top + VP8_SEGID_DRAM_ADDR) >> 4; + + for (i = 0; i < ARRAY_SIZE(vsi->segment_buf); i++) { + for (j = ARRAY_SIZE(vsi->segment_buf[i]) - 1; j >= 0; j--) { + val = ((seg_id_addr + i) << 2) + j; + writel(val, cm + VP8_HW_VLD_ADDR); + + val = readl(cm + VP8_HW_VLD_VALUE); + vsi->segment_buf[i][j] = val; + } + } +} + +/* reset HW and enable HW read/write data function */ +static void enable_hw_rw_function(struct vdec_vp8_inst *inst) +{ + u32 val = 0; + void __iomem *sys = inst->reg_base.sys; + void __iomem *misc = inst->reg_base.misc; + void __iomem *ld = inst->reg_base.ld; + void __iomem *hwb = inst->reg_base.hwb; + void __iomem *hwd = inst->reg_base.hwd; + + writel(0x1, sys + VP8_RW_CKEN_SET); + writel(0x101, ld + VP8_WO_VLD_SRST); + writel(0x101, hwb + VP8_WO_VLD_SRST); + + writel(1, sys); + val = readl(misc + VP8_RW_MISC_SRST); + writel((val & 0xFFFFFFFE), misc + VP8_RW_MISC_SRST); + + writel(0x1, misc + VP8_RW_MISC_SYS_SEL); + writel(0x17F, misc + VP8_RW_MISC_SPEC_CON); + writel(0x71201100, misc + VP8_RW_MISC_FUNC_CON); + writel(0x0, ld + VP8_WO_VLD_SRST); + writel(0x0, hwb + VP8_WO_VLD_SRST); + writel(0x1, sys + VP8_RW_DCM_CON); + writel(0x1, misc + VP8_RW_MISC_DCM_CON); + writel(0x1, hwd + VP8_RW_VP8_CTRL); +} + +static void store_dec_table(struct vdec_vp8_inst *inst) +{ + int i, j; + u32 addr = 0, val = 0; + void __iomem *hwd = inst->reg_base.hwd; + u32 *p = &inst->vsi->dec_table[VP8_DEC_TABLE_OFFSET]; + + for (i = 0; i < VP8_DEC_TABLE_PROC_LOOP; i++) { + writel(addr, hwd + VP8_BSASET); + for (j = 0; j < VP8_DEC_TABLE_UNIT ; j++) { + val = *p++; + writel(val, hwd + VP8_BSDSET); + } + addr += VP8_DEC_TABLE_RW_UNIT; + } +} + +static void load_dec_table(struct vdec_vp8_inst *inst) +{ + int i; + u32 addr = 0; + u32 *p = &inst->vsi->dec_table[VP8_DEC_TABLE_OFFSET]; + void __iomem *hwd = inst->reg_base.hwd; + + for (i = 0; i < VP8_DEC_TABLE_PROC_LOOP; i++) { + writel(addr, hwd + VP8_BSASET); + /* read total 11 bytes */ + *p++ = readl(hwd + VP8_BSDSET); + *p++ = readl(hwd + VP8_BSDSET); + *p++ = readl(hwd + VP8_BSDSET) & 0xFFFFFF; + addr += VP8_DEC_TABLE_RW_UNIT; + } +} + +static void get_pic_info(struct vdec_vp8_inst *inst, struct vdec_pic_info *pic) +{ + *pic = inst->vsi->pic; + + mtk_vcodec_debug(inst, "pic(%d, %d), buf(%d, %d)", + pic->pic_w, pic->pic_h, pic->buf_w, pic->buf_h); + mtk_vcodec_debug(inst, "fb size: Y(%d), C(%d)", + pic->fb_sz[0], pic->fb_sz[1]); +} + +static void vp8_dec_finish(struct vdec_vp8_inst *inst) +{ + struct vdec_fb_node *node; + uint64_t prev_y_dma = inst->vsi->dec.prev_y_dma; + + mtk_vcodec_debug(inst, "prev fb base dma=%llx", prev_y_dma); + + /* put last decode ok frame to fb_free_list */ + if (prev_y_dma != 0) { + list_for_each_entry(node, &inst->fb_use_list, list) { + struct vdec_fb *fb = (struct vdec_fb *)node->fb; + + if (prev_y_dma == (uint64_t)fb->base_y.dma_addr) { + list_move_tail(&node->list, + &inst->fb_free_list); + break; + } + } + } + + /* available_fb_node_list -> fb_use_list */ + node = list_first_entry(&inst->available_fb_node_list, + struct vdec_fb_node, list); + node->fb = inst->cur_fb; + list_move_tail(&node->list, &inst->fb_use_list); + + /* available_fb_node_list -> fb_disp_list */ + if (inst->vsi->dec.show_frame) { + node = list_first_entry(&inst->available_fb_node_list, + struct vdec_fb_node, list); + node->fb = inst->cur_fb; + list_move_tail(&node->list, &inst->fb_disp_list); + } +} + +static void move_fb_list_use_to_free(struct vdec_vp8_inst *inst) +{ + struct vdec_fb_node *node, *tmp; + + list_for_each_entry_safe(node, tmp, &inst->fb_use_list, list) + list_move_tail(&node->list, &inst->fb_free_list); +} + +static void init_list(struct vdec_vp8_inst *inst) +{ + int i; + + INIT_LIST_HEAD(&inst->available_fb_node_list); + INIT_LIST_HEAD(&inst->fb_use_list); + INIT_LIST_HEAD(&inst->fb_free_list); + INIT_LIST_HEAD(&inst->fb_disp_list); + + for (i = 0; i < ARRAY_SIZE(inst->dec_fb); i++) { + INIT_LIST_HEAD(&inst->dec_fb[i].list); + inst->dec_fb[i].fb = NULL; + list_add_tail(&inst->dec_fb[i].list, + &inst->available_fb_node_list); + } +} + +static void add_fb_to_free_list(struct vdec_vp8_inst *inst, void *fb) +{ + struct vdec_fb_node *node; + + if (fb) { + node = list_first_entry(&inst->available_fb_node_list, + struct vdec_fb_node, list); + node->fb = fb; + list_move_tail(&node->list, &inst->fb_free_list); + } +} + +static int alloc_working_buf(struct vdec_vp8_inst *inst) +{ + int err; + struct mtk_vcodec_mem *mem = &inst->working_buf; + + mem->size = VP8_WORKING_BUF_SZ; + err = mtk_vcodec_mem_alloc(inst->ctx, mem); + if (err) { + mtk_vcodec_err(inst, "Cannot allocate working buffer"); + return err; + } + + inst->vsi->dec.working_buf_dma = (uint64_t)mem->dma_addr; + return 0; +} + +static void free_working_buf(struct vdec_vp8_inst *inst) +{ + struct mtk_vcodec_mem *mem = &inst->working_buf; + + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + + inst->vsi->dec.working_buf_dma = 0; +} + +static int vdec_vp8_init(struct mtk_vcodec_ctx *ctx) +{ + struct vdec_vp8_inst *inst; + int err; + + inst = kzalloc(sizeof(*inst), GFP_KERNEL); + if (!inst) + return -ENOMEM; + + inst->ctx = ctx; + + inst->vpu.id = IPI_VDEC_VP8; + inst->vpu.ctx = ctx; + + err = vpu_dec_init(&inst->vpu); + if (err) { + mtk_vcodec_err(inst, "vdec_vp8 init err=%d", err); + goto error_free_inst; + } + + inst->vsi = (struct vdec_vp8_vsi *)inst->vpu.vsi; + init_list(inst); + err = alloc_working_buf(inst); + if (err) + goto error_deinit; + + get_hw_reg_base(inst); + mtk_vcodec_debug(inst, "VP8 Instance >> %p", inst); + + ctx->drv_handle = inst; + return 0; + +error_deinit: + vpu_dec_deinit(&inst->vpu); +error_free_inst: + kfree(inst); + return err; +} + +static int vdec_vp8_decode(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *fb, bool *res_chg) +{ + struct vdec_vp8_inst *inst = (struct vdec_vp8_inst *)h_vdec; + struct vdec_vp8_dec_info *dec = &inst->vsi->dec; + struct vdec_vpu_inst *vpu = &inst->vpu; + unsigned char *bs_va; + unsigned int data; + int err = 0; + uint64_t y_fb_dma; + uint64_t c_fb_dma; + + /* bs NULL means flush decoder */ + if (bs == NULL) { + move_fb_list_use_to_free(inst); + return vpu_dec_reset(vpu); + } + + y_fb_dma = fb ? (u64)fb->base_y.dma_addr : 0; + c_fb_dma = fb ? (u64)fb->base_c.dma_addr : 0; + + mtk_vcodec_debug(inst, "+ [%d] FB y_dma=%llx c_dma=%llx fb=%p", + inst->frm_cnt, y_fb_dma, c_fb_dma, fb); + + inst->cur_fb = fb; + dec->bs_dma = (unsigned long)bs->dma_addr; + dec->bs_sz = bs->size; + dec->cur_y_fb_dma = y_fb_dma; + dec->cur_c_fb_dma = c_fb_dma; + + mtk_vcodec_debug(inst, "\n + FRAME[%d] +\n", inst->frm_cnt); + + write_hw_segmentation_data(inst); + enable_hw_rw_function(inst); + store_dec_table(inst); + + bs_va = (unsigned char *)bs->va; + + /* retrieve width/hight and scale info from header */ + data = (*(bs_va + 9) << 24) | (*(bs_va + 8) << 16) | + (*(bs_va + 7) << 8) | *(bs_va + 6); + err = vpu_dec_start(vpu, &data, 1); + if (err) { + add_fb_to_free_list(inst, fb); + if (dec->wait_key_frame) { + mtk_vcodec_debug(inst, "wait key frame !"); + return 0; + } + + goto error; + } + + if (dec->resolution_changed) { + mtk_vcodec_debug(inst, "- resolution_changed -"); + *res_chg = true; + add_fb_to_free_list(inst, fb); + return 0; + } + + /* wait decoder done interrupt */ + mtk_vcodec_wait_for_done_ctx(inst->ctx, MTK_INST_IRQ_RECEIVED, + WAIT_INTR_TIMEOUT_MS, 0); + + if (inst->vsi->load_data) + load_dec_table(inst); + + vp8_dec_finish(inst); + read_hw_segmentation_data(inst); + + err = vpu_dec_end(vpu); + if (err) + goto error; + + mtk_vcodec_debug(inst, "\n - FRAME[%d] - show=%d\n", inst->frm_cnt, + dec->show_frame); + inst->frm_cnt++; + *res_chg = false; + return 0; + +error: + mtk_vcodec_err(inst, "\n - FRAME[%d] - err=%d\n", inst->frm_cnt, err); + return err; +} + +static void get_disp_fb(struct vdec_vp8_inst *inst, struct vdec_fb **out_fb) +{ + struct vdec_fb_node *node; + struct vdec_fb *fb; + + node = list_first_entry_or_null(&inst->fb_disp_list, + struct vdec_fb_node, list); + if (node) { + list_move_tail(&node->list, &inst->available_fb_node_list); + fb = (struct vdec_fb *)node->fb; + fb->status |= FB_ST_DISPLAY; + mtk_vcodec_debug(inst, "[FB] get disp fb %p st=%d", + node->fb, fb->status); + } else { + fb = NULL; + mtk_vcodec_debug(inst, "[FB] there is no disp fb"); + } + + *out_fb = fb; +} + +static void get_free_fb(struct vdec_vp8_inst *inst, struct vdec_fb **out_fb) +{ + struct vdec_fb_node *node; + struct vdec_fb *fb; + + node = list_first_entry_or_null(&inst->fb_free_list, + struct vdec_fb_node, list); + if (node) { + list_move_tail(&node->list, &inst->available_fb_node_list); + fb = (struct vdec_fb *)node->fb; + fb->status |= FB_ST_FREE; + mtk_vcodec_debug(inst, "[FB] get free fb %p st=%d", + node->fb, fb->status); + } else { + fb = NULL; + mtk_vcodec_debug(inst, "[FB] there is no free fb"); + } + + *out_fb = fb; +} + +static void get_crop_info(struct vdec_vp8_inst *inst, struct v4l2_rect *cr) +{ + cr->left = 0; + cr->top = 0; + cr->width = inst->vsi->pic.pic_w; + cr->height = inst->vsi->pic.pic_h; + mtk_vcodec_debug(inst, "get crop info l=%d, t=%d, w=%d, h=%d", + cr->left, cr->top, cr->width, cr->height); +} + +static int vdec_vp8_get_param(void *h_vdec, enum vdec_get_param_type type, + void *out) +{ + struct vdec_vp8_inst *inst = (struct vdec_vp8_inst *)h_vdec; + + switch (type) { + case GET_PARAM_DISP_FRAME_BUFFER: + get_disp_fb(inst, out); + break; + + case GET_PARAM_FREE_FRAME_BUFFER: + get_free_fb(inst, out); + break; + + case GET_PARAM_PIC_INFO: + get_pic_info(inst, out); + break; + + case GET_PARAM_CROP_INFO: + get_crop_info(inst, out); + break; + + case GET_PARAM_DPB_SIZE: + *((unsigned int *)out) = VP8_DPB_SIZE; + break; + + default: + mtk_vcodec_err(inst, "invalid get parameter type=%d", type); + return -EINVAL; + } + + return 0; +} + +static void vdec_vp8_deinit(void *h_vdec) +{ + struct vdec_vp8_inst *inst = (struct vdec_vp8_inst *)h_vdec; + + mtk_vcodec_debug_enter(inst); + + vpu_dec_deinit(&inst->vpu); + free_working_buf(inst); + kfree(inst); +} + +const struct vdec_common_if vdec_vp8_if = { + .init = vdec_vp8_init, + .decode = vdec_vp8_decode, + .get_param = vdec_vp8_get_param, + .deinit = vdec_vp8_deinit, +}; diff --git a/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp8_req_if.c b/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp8_req_if.c new file mode 100644 index 000000000..e1fe2603e --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp8_req_if.c @@ -0,0 +1,436 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2021 MediaTek Inc. + * Author: Yunfei Dong <yunfei.dong@mediatek.com> + */ + +#include <linux/slab.h> +#include <media/v4l2-mem2mem.h> +#include <media/videobuf2-dma-contig.h> +#include <uapi/linux/v4l2-controls.h> + +#include "../mtk_vcodec_util.h" +#include "../mtk_vcodec_dec.h" +#include "../mtk_vcodec_intr.h" +#include "../vdec_drv_base.h" +#include "../vdec_drv_if.h" +#include "../vdec_vpu_if.h" + +/* Decoding picture buffer size (3 reference frames plus current frame) */ +#define VP8_DPB_SIZE 4 + +/* HW working buffer size (bytes) */ +#define VP8_SEG_ID_SZ SZ_256K +#define VP8_PP_WRAPY_SZ SZ_64K +#define VP8_PP_WRAPC_SZ SZ_64K +#define VP8_VLD_PRED_SZ SZ_64K + +/** + * struct vdec_vp8_slice_info - decode misc information + * + * @vld_wrapper_dma: vld wrapper dma address + * @seg_id_buf_dma: seg id dma address + * @wrap_y_dma: wrap y dma address + * @wrap_c_dma: wrap y dma address + * @cur_y_fb_dma: current plane Y frame buffer dma address + * @cur_c_fb_dma: current plane C frame buffer dma address + * @bs_dma: bitstream dma address + * @bs_sz: bitstream size + * @resolution_changed:resolution change flag 1 - changed, 0 - not change + * @frame_header_type: current frame header type + * @wait_key_frame: wait key frame coming + * @crc: used to check whether hardware's status is right + * @reserved: reserved, currently unused + */ +struct vdec_vp8_slice_info { + u64 vld_wrapper_dma; + u64 seg_id_buf_dma; + u64 wrap_y_dma; + u64 wrap_c_dma; + u64 cur_y_fb_dma; + u64 cur_c_fb_dma; + u64 bs_dma; + u32 bs_sz; + u32 resolution_changed; + u32 frame_header_type; + u32 crc[8]; + u32 reserved; +}; + +/** + * struct vdec_vp8_slice_dpb_info - vp8 reference information + * + * @y_dma_addr: Y bitstream physical address + * @c_dma_addr: CbCr bitstream physical address + * @reference_flag: reference picture flag + * @reserved: 64bit align + */ +struct vdec_vp8_slice_dpb_info { + dma_addr_t y_dma_addr; + dma_addr_t c_dma_addr; + int reference_flag; + int reserved; +}; + +/** + * struct vdec_vp8_slice_vsi - VPU shared information + * + * @dec: decoding information + * @pic: picture information + * @vp8_dpb_info: reference buffer information + */ +struct vdec_vp8_slice_vsi { + struct vdec_vp8_slice_info dec; + struct vdec_pic_info pic; + struct vdec_vp8_slice_dpb_info vp8_dpb_info[3]; +}; + +/** + * struct vdec_vp8_slice_inst - VP8 decoder instance + * + * @seg_id_buf: seg buffer + * @wrap_y_buf: wrapper y buffer + * @wrap_c_buf: wrapper c buffer + * @vld_wrapper_buf: vld wrapper buffer + * @ctx: V4L2 context + * @vpu: VPU instance for decoder + * @vsi: VPU share information + */ +struct vdec_vp8_slice_inst { + struct mtk_vcodec_mem seg_id_buf; + struct mtk_vcodec_mem wrap_y_buf; + struct mtk_vcodec_mem wrap_c_buf; + struct mtk_vcodec_mem vld_wrapper_buf; + struct mtk_vcodec_ctx *ctx; + struct vdec_vpu_inst vpu; + struct vdec_vp8_slice_vsi *vsi; +}; + +static void *vdec_vp8_slice_get_ctrl_ptr(struct mtk_vcodec_ctx *ctx, int id) +{ + struct v4l2_ctrl *ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, id); + + if (!ctrl) + return ERR_PTR(-EINVAL); + + return ctrl->p_cur.p; +} + +static void vdec_vp8_slice_get_pic_info(struct vdec_vp8_slice_inst *inst) +{ + struct mtk_vcodec_ctx *ctx = inst->ctx; + unsigned int data[3]; + + data[0] = ctx->picinfo.pic_w; + data[1] = ctx->picinfo.pic_h; + data[2] = ctx->capture_fourcc; + vpu_dec_get_param(&inst->vpu, data, 3, GET_PARAM_PIC_INFO); + + ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, 64); + ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, 64); + ctx->picinfo.fb_sz[0] = inst->vpu.fb_sz[0]; + ctx->picinfo.fb_sz[1] = inst->vpu.fb_sz[1]; + + inst->vsi->pic.pic_w = ctx->picinfo.pic_w; + inst->vsi->pic.pic_h = ctx->picinfo.pic_h; + inst->vsi->pic.buf_w = ctx->picinfo.buf_w; + inst->vsi->pic.buf_h = ctx->picinfo.buf_h; + inst->vsi->pic.fb_sz[0] = ctx->picinfo.fb_sz[0]; + inst->vsi->pic.fb_sz[1] = ctx->picinfo.fb_sz[1]; + mtk_vcodec_debug(inst, "pic(%d, %d), buf(%d, %d)", + ctx->picinfo.pic_w, ctx->picinfo.pic_h, + ctx->picinfo.buf_w, ctx->picinfo.buf_h); + mtk_vcodec_debug(inst, "fb size: Y(%d), C(%d)", + ctx->picinfo.fb_sz[0], ctx->picinfo.fb_sz[1]); +} + +static int vdec_vp8_slice_alloc_working_buf(struct vdec_vp8_slice_inst *inst) +{ + int err; + struct mtk_vcodec_mem *mem; + + mem = &inst->seg_id_buf; + mem->size = VP8_SEG_ID_SZ; + err = mtk_vcodec_mem_alloc(inst->ctx, mem); + if (err) { + mtk_vcodec_err(inst, "Cannot allocate working buffer"); + return err; + } + inst->vsi->dec.seg_id_buf_dma = (u64)mem->dma_addr; + + mem = &inst->wrap_y_buf; + mem->size = VP8_PP_WRAPY_SZ; + err = mtk_vcodec_mem_alloc(inst->ctx, mem); + if (err) { + mtk_vcodec_err(inst, "cannot allocate WRAP Y buffer"); + return err; + } + inst->vsi->dec.wrap_y_dma = (u64)mem->dma_addr; + + mem = &inst->wrap_c_buf; + mem->size = VP8_PP_WRAPC_SZ; + err = mtk_vcodec_mem_alloc(inst->ctx, mem); + if (err) { + mtk_vcodec_err(inst, "cannot allocate WRAP C buffer"); + return err; + } + inst->vsi->dec.wrap_c_dma = (u64)mem->dma_addr; + + mem = &inst->vld_wrapper_buf; + mem->size = VP8_VLD_PRED_SZ; + err = mtk_vcodec_mem_alloc(inst->ctx, mem); + if (err) { + mtk_vcodec_err(inst, "cannot allocate vld wrapper buffer"); + return err; + } + inst->vsi->dec.vld_wrapper_dma = (u64)mem->dma_addr; + + return 0; +} + +static void vdec_vp8_slice_free_working_buf(struct vdec_vp8_slice_inst *inst) +{ + struct mtk_vcodec_mem *mem; + + mem = &inst->seg_id_buf; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + inst->vsi->dec.seg_id_buf_dma = 0; + + mem = &inst->wrap_y_buf; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + inst->vsi->dec.wrap_y_dma = 0; + + mem = &inst->wrap_c_buf; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + inst->vsi->dec.wrap_c_dma = 0; + + mem = &inst->vld_wrapper_buf; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + inst->vsi->dec.vld_wrapper_dma = 0; +} + +static u64 vdec_vp8_slice_get_ref_by_ts(const struct v4l2_ctrl_vp8_frame *frame_header, + int index) +{ + switch (index) { + case 0: + return frame_header->last_frame_ts; + case 1: + return frame_header->golden_frame_ts; + case 2: + return frame_header->alt_frame_ts; + default: + break; + } + + return -1; +} + +static int vdec_vp8_slice_get_decode_parameters(struct vdec_vp8_slice_inst *inst) +{ + const struct v4l2_ctrl_vp8_frame *frame_header; + struct mtk_vcodec_ctx *ctx = inst->ctx; + struct vb2_queue *vq; + struct vb2_buffer *vb; + u64 referenct_ts; + int index; + + frame_header = vdec_vp8_slice_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_VP8_FRAME); + if (IS_ERR(frame_header)) + return PTR_ERR(frame_header); + + vq = v4l2_m2m_get_vq(ctx->m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE); + for (index = 0; index < 3; index++) { + referenct_ts = vdec_vp8_slice_get_ref_by_ts(frame_header, index); + vb = vb2_find_buffer(vq, referenct_ts); + if (!vb) { + if (!V4L2_VP8_FRAME_IS_KEY_FRAME(frame_header)) + mtk_vcodec_err(inst, "reference invalid: index(%d) ts(%lld)", + index, referenct_ts); + inst->vsi->vp8_dpb_info[index].reference_flag = 0; + continue; + } + inst->vsi->vp8_dpb_info[index].reference_flag = 1; + + inst->vsi->vp8_dpb_info[index].y_dma_addr = + vb2_dma_contig_plane_dma_addr(vb, 0); + if (ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 2) + inst->vsi->vp8_dpb_info[index].c_dma_addr = + vb2_dma_contig_plane_dma_addr(vb, 1); + else + inst->vsi->vp8_dpb_info[index].c_dma_addr = + inst->vsi->vp8_dpb_info[index].y_dma_addr + + ctx->picinfo.fb_sz[0]; + } + + inst->vsi->dec.frame_header_type = frame_header->flags >> 1; + + return 0; +} + +static int vdec_vp8_slice_init(struct mtk_vcodec_ctx *ctx) +{ + struct vdec_vp8_slice_inst *inst; + int err; + + inst = kzalloc(sizeof(*inst), GFP_KERNEL); + if (!inst) + return -ENOMEM; + + inst->ctx = ctx; + + inst->vpu.id = SCP_IPI_VDEC_LAT; + inst->vpu.core_id = SCP_IPI_VDEC_CORE; + inst->vpu.ctx = ctx; + inst->vpu.codec_type = ctx->current_codec; + inst->vpu.capture_type = ctx->capture_fourcc; + + err = vpu_dec_init(&inst->vpu); + if (err) { + mtk_vcodec_err(inst, "vdec_vp8 init err=%d", err); + goto error_free_inst; + } + + inst->vsi = inst->vpu.vsi; + err = vdec_vp8_slice_alloc_working_buf(inst); + if (err) + goto error_deinit; + + mtk_vcodec_debug(inst, "vp8 struct size = %d vsi: %d\n", + (int)sizeof(struct v4l2_ctrl_vp8_frame), + (int)sizeof(struct vdec_vp8_slice_vsi)); + mtk_vcodec_debug(inst, "vp8:%p, codec_type = 0x%x vsi: 0x%p", + inst, inst->vpu.codec_type, inst->vpu.vsi); + + ctx->drv_handle = inst; + return 0; + +error_deinit: + vpu_dec_deinit(&inst->vpu); +error_free_inst: + kfree(inst); + return err; +} + +static int vdec_vp8_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *fb, bool *res_chg) +{ + struct vdec_vp8_slice_inst *inst = h_vdec; + struct vdec_vpu_inst *vpu = &inst->vpu; + struct mtk_video_dec_buf *src_buf_info, *dst_buf_info; + unsigned int data; + u64 y_fb_dma, c_fb_dma; + int err, timeout; + + /* Resolution changes are never initiated by us */ + *res_chg = false; + + /* bs NULL means flush decoder */ + if (!bs) + return vpu_dec_reset(vpu); + + src_buf_info = container_of(bs, struct mtk_video_dec_buf, bs_buffer); + + fb = inst->ctx->dev->vdec_pdata->get_cap_buffer(inst->ctx); + dst_buf_info = container_of(fb, struct mtk_video_dec_buf, frame_buffer); + + y_fb_dma = fb ? (u64)fb->base_y.dma_addr : 0; + if (inst->ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 1) + c_fb_dma = y_fb_dma + + inst->ctx->picinfo.buf_w * inst->ctx->picinfo.buf_h; + else + c_fb_dma = fb ? (u64)fb->base_c.dma_addr : 0; + + inst->vsi->dec.bs_dma = (u64)bs->dma_addr; + inst->vsi->dec.bs_sz = bs->size; + inst->vsi->dec.cur_y_fb_dma = y_fb_dma; + inst->vsi->dec.cur_c_fb_dma = c_fb_dma; + + mtk_vcodec_debug(inst, "frame[%d] bs(%zu 0x%llx) y/c(0x%llx 0x%llx)", + inst->ctx->decoded_frame_cnt, + bs->size, (u64)bs->dma_addr, + y_fb_dma, c_fb_dma); + + v4l2_m2m_buf_copy_metadata(&src_buf_info->m2m_buf.vb, + &dst_buf_info->m2m_buf.vb, true); + + err = vdec_vp8_slice_get_decode_parameters(inst); + if (err) + goto error; + + err = vpu_dec_start(vpu, &data, 1); + if (err) { + mtk_vcodec_debug(inst, "vp8 dec start err!"); + goto error; + } + + if (inst->vsi->dec.resolution_changed) { + mtk_vcodec_debug(inst, "- resolution_changed -"); + *res_chg = true; + return 0; + } + + /* wait decode done interrupt */ + timeout = mtk_vcodec_wait_for_done_ctx(inst->ctx, MTK_INST_IRQ_RECEIVED, + 50, MTK_VDEC_CORE); + + err = vpu_dec_end(vpu); + if (err || timeout) + mtk_vcodec_debug(inst, "vp8 dec error timeout:%d err: %d pic_%d", + timeout, err, inst->ctx->decoded_frame_cnt); + + mtk_vcodec_debug(inst, "pic[%d] crc: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x", + inst->ctx->decoded_frame_cnt, + inst->vsi->dec.crc[0], inst->vsi->dec.crc[1], + inst->vsi->dec.crc[2], inst->vsi->dec.crc[3], + inst->vsi->dec.crc[4], inst->vsi->dec.crc[5], + inst->vsi->dec.crc[6], inst->vsi->dec.crc[7]); + + inst->ctx->decoded_frame_cnt++; +error: + return err; +} + +static int vdec_vp8_slice_get_param(void *h_vdec, enum vdec_get_param_type type, void *out) +{ + struct vdec_vp8_slice_inst *inst = h_vdec; + + switch (type) { + case GET_PARAM_PIC_INFO: + vdec_vp8_slice_get_pic_info(inst); + break; + case GET_PARAM_CROP_INFO: + mtk_vcodec_debug(inst, "No need to get vp8 crop information."); + break; + case GET_PARAM_DPB_SIZE: + *((unsigned int *)out) = VP8_DPB_SIZE; + break; + default: + mtk_vcodec_err(inst, "invalid get parameter type=%d", type); + return -EINVAL; + } + + return 0; +} + +static void vdec_vp8_slice_deinit(void *h_vdec) +{ + struct vdec_vp8_slice_inst *inst = h_vdec; + + mtk_vcodec_debug_enter(inst); + + vpu_dec_deinit(&inst->vpu); + vdec_vp8_slice_free_working_buf(inst); + kfree(inst); +} + +const struct vdec_common_if vdec_vp8_slice_if = { + .init = vdec_vp8_slice_init, + .decode = vdec_vp8_slice_decode, + .get_param = vdec_vp8_slice_get_param, + .deinit = vdec_vp8_slice_deinit, +}; diff --git a/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_if.c b/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_if.c new file mode 100644 index 000000000..a27a109d8 --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_if.c @@ -0,0 +1,1029 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2016 MediaTek Inc. + * Author: Daniel Hsiao <daniel.hsiao@mediatek.com> + * Kai-Sean Yang <kai-sean.yang@mediatek.com> + * Tiffany Lin <tiffany.lin@mediatek.com> + */ + +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/syscalls.h> +#include <linux/delay.h> +#include <linux/time.h> + +#include "../mtk_vcodec_intr.h" +#include "../vdec_drv_base.h" +#include "../vdec_vpu_if.h" + +#define VP9_SUPER_FRAME_BS_SZ 64 +#define MAX_VP9_DPB_SIZE 9 + +#define REFS_PER_FRAME 3 +#define MAX_NUM_REF_FRAMES 8 +#define VP9_MAX_FRM_BUF_NUM 9 +#define VP9_MAX_FRM_BUF_NODE_NUM (VP9_MAX_FRM_BUF_NUM * 2) +#define VP9_SEG_ID_SZ 0x12000 + +/** + * struct vp9_dram_buf - contains buffer info for vpu + * @va : cpu address + * @pa : iova address + * @sz : buffer size + * @padding : for 64 bytes alignment + */ +struct vp9_dram_buf { + unsigned long va; + unsigned long pa; + unsigned int sz; + unsigned int padding; +}; + +/** + * struct vp9_fb_info - contains frame buffer info + * @fb : frmae buffer + * @reserved : reserved field used by vpu + */ +struct vp9_fb_info { + struct vdec_fb *fb; + unsigned int reserved[32]; +}; + +/** + * struct vp9_ref_cnt_buf - contains reference buffer information + * @buf : referenced frame buffer + * @ref_cnt : referenced frame buffer's reference count. + * When reference count=0, remove it from reference list + */ +struct vp9_ref_cnt_buf { + struct vp9_fb_info buf; + unsigned int ref_cnt; +}; + +/** + * struct vp9_ref_buf - contains current frame's reference buffer information + * @buf : reference buffer + * @idx : reference buffer index to frm_bufs + * @reserved : reserved field used by vpu + */ +struct vp9_ref_buf { + struct vp9_fb_info *buf; + unsigned int idx; + unsigned int reserved[6]; +}; + +/** + * struct vp9_sf_ref_fb - contains frame buffer info + * @fb : super frame reference frame buffer + * @used : this reference frame info entry is used + * @padding : for 64 bytes size align + */ +struct vp9_sf_ref_fb { + struct vdec_fb fb; + int used; + int padding; +}; + +/* + * struct vdec_vp9_vsi - shared buffer between host and VPU firmware + * AP-W/R : AP is writer/reader on this item + * VPU-W/R: VPU is write/reader on this item + * @sf_bs_buf : super frame backup buffer (AP-W, VPU-R) + * @sf_ref_fb : record supoer frame reference buffer information + * (AP-R/W, VPU-R/W) + * @sf_next_ref_fb_idx : next available super frame (AP-W, VPU-R) + * @sf_frm_cnt : super frame count, filled by vpu (AP-R, VPU-W) + * @sf_frm_offset : super frame offset, filled by vpu (AP-R, VPU-W) + * @sf_frm_sz : super frame size, filled by vpu (AP-R, VPU-W) + * @sf_frm_idx : current super frame (AP-R, VPU-W) + * @sf_init : inform super frame info already parsed by vpu (AP-R, VPU-W) + * @fb : capture buffer (AP-W, VPU-R) + * @bs : bs buffer (AP-W, VPU-R) + * @cur_fb : current show capture buffer (AP-R/W, VPU-R/W) + * @pic_w : picture width (AP-R, VPU-W) + * @pic_h : picture height (AP-R, VPU-W) + * @buf_w : codec width (AP-R, VPU-W) + * @buf_h : coded height (AP-R, VPU-W) + * @buf_sz_y_bs : ufo compressed y plane size (AP-R, VPU-W) + * @buf_sz_c_bs : ufo compressed cbcr plane size (AP-R, VPU-W) + * @buf_len_sz_y : size used to store y plane ufo info (AP-R, VPU-W) + * @buf_len_sz_c : size used to store cbcr plane ufo info (AP-R, VPU-W) + + * @profile : profile sparsed from vpu (AP-R, VPU-W) + * @show_frame : [BIT(0)] display this frame or not (AP-R, VPU-W) + * [BIT(1)] reset segment data or not (AP-R, VPU-W) + * [BIT(2)] trig decoder hardware or not (AP-R, VPU-W) + * [BIT(3)] ask VPU to set bits(0~4) accordingly (AP-W, VPU-R) + * [BIT(4)] do not reset segment data before every frame (AP-R, VPU-W) + * @show_existing_frame : inform this frame is show existing frame + * (AP-R, VPU-W) + * @frm_to_show_idx : index to show frame (AP-R, VPU-W) + + * @refresh_frm_flags : indicate when frame need to refine reference count + * (AP-R, VPU-W) + * @resolution_changed : resolution change in this frame (AP-R, VPU-W) + + * @frm_bufs : maintain reference buffer info (AP-R/W, VPU-R/W) + * @ref_frm_map : maintain reference buffer map info (AP-R/W, VPU-R/W) + * @new_fb_idx : index to frm_bufs array (AP-R, VPU-W) + * @frm_num : decoded frame number, include sub-frame count (AP-R, VPU-W) + * @mv_buf : motion vector working buffer (AP-W, VPU-R) + * @frm_refs : maintain three reference buffer info (AP-R/W, VPU-R/W) + * @seg_id_buf : segmentation map working buffer (AP-W, VPU-R) + */ +struct vdec_vp9_vsi { + unsigned char sf_bs_buf[VP9_SUPER_FRAME_BS_SZ]; + struct vp9_sf_ref_fb sf_ref_fb[VP9_MAX_FRM_BUF_NUM-1]; + int sf_next_ref_fb_idx; + unsigned int sf_frm_cnt; + unsigned int sf_frm_offset[VP9_MAX_FRM_BUF_NUM-1]; + unsigned int sf_frm_sz[VP9_MAX_FRM_BUF_NUM-1]; + unsigned int sf_frm_idx; + unsigned int sf_init; + struct vdec_fb fb; + struct mtk_vcodec_mem bs; + struct vdec_fb cur_fb; + unsigned int pic_w; + unsigned int pic_h; + unsigned int buf_w; + unsigned int buf_h; + unsigned int buf_sz_y_bs; + unsigned int buf_sz_c_bs; + unsigned int buf_len_sz_y; + unsigned int buf_len_sz_c; + unsigned int profile; + unsigned int show_frame; + unsigned int show_existing_frame; + unsigned int frm_to_show_idx; + unsigned int refresh_frm_flags; + unsigned int resolution_changed; + + struct vp9_ref_cnt_buf frm_bufs[VP9_MAX_FRM_BUF_NUM]; + int ref_frm_map[MAX_NUM_REF_FRAMES]; + unsigned int new_fb_idx; + unsigned int frm_num; + struct vp9_dram_buf mv_buf; + + struct vp9_ref_buf frm_refs[REFS_PER_FRAME]; + struct vp9_dram_buf seg_id_buf; + +}; + +/* + * struct vdec_vp9_inst - vp9 decode instance + * @mv_buf : working buffer for mv + * @seg_id_buf : working buffer for segmentation map + * @dec_fb : vdec_fb node to link fb to different fb_xxx_list + * @available_fb_node_list : current available vdec_fb node + * @fb_use_list : current used or referenced vdec_fb + * @fb_free_list : current available to free vdec_fb + * @fb_disp_list : current available to display vdec_fb + * @cur_fb : current frame buffer + * @ctx : current decode context + * @vpu : vpu instance information + * @vsi : shared buffer between host and VPU firmware + * @total_frm_cnt : total frame count, it do not include sub-frames in super + * frame + * @mem : instance memory information + */ +struct vdec_vp9_inst { + struct mtk_vcodec_mem mv_buf; + struct mtk_vcodec_mem seg_id_buf; + + struct vdec_fb_node dec_fb[VP9_MAX_FRM_BUF_NODE_NUM]; + struct list_head available_fb_node_list; + struct list_head fb_use_list; + struct list_head fb_free_list; + struct list_head fb_disp_list; + struct vdec_fb *cur_fb; + struct mtk_vcodec_ctx *ctx; + struct vdec_vpu_inst vpu; + struct vdec_vp9_vsi *vsi; + unsigned int total_frm_cnt; + struct mtk_vcodec_mem mem; +}; + +static bool vp9_is_sf_ref_fb(struct vdec_vp9_inst *inst, struct vdec_fb *fb) +{ + int i; + struct vdec_vp9_vsi *vsi = inst->vsi; + + for (i = 0; i < ARRAY_SIZE(vsi->sf_ref_fb); i++) { + if (fb == &vsi->sf_ref_fb[i].fb) + return true; + } + return false; +} + +static struct vdec_fb *vp9_rm_from_fb_use_list(struct vdec_vp9_inst + *inst, void *addr) +{ + struct vdec_fb *fb = NULL; + struct vdec_fb_node *node; + + list_for_each_entry(node, &inst->fb_use_list, list) { + fb = (struct vdec_fb *)node->fb; + if (fb->base_y.va == addr) { + list_move_tail(&node->list, + &inst->available_fb_node_list); + return fb; + } + } + + return NULL; +} + +static void vp9_add_to_fb_free_list(struct vdec_vp9_inst *inst, + struct vdec_fb *fb) +{ + struct vdec_fb_node *node; + + if (fb) { + node = list_first_entry_or_null(&inst->available_fb_node_list, + struct vdec_fb_node, list); + + if (node) { + node->fb = fb; + list_move_tail(&node->list, &inst->fb_free_list); + } + } else { + mtk_vcodec_debug(inst, "No free fb node"); + } +} + +static void vp9_free_sf_ref_fb(struct vdec_fb *fb) +{ + struct vp9_sf_ref_fb *sf_ref_fb = + container_of(fb, struct vp9_sf_ref_fb, fb); + + sf_ref_fb->used = 0; +} + +static void vp9_ref_cnt_fb(struct vdec_vp9_inst *inst, int *idx, + int new_idx) +{ + struct vdec_vp9_vsi *vsi = inst->vsi; + int ref_idx = *idx; + + if (ref_idx >= 0 && vsi->frm_bufs[ref_idx].ref_cnt > 0) { + vsi->frm_bufs[ref_idx].ref_cnt--; + + if (vsi->frm_bufs[ref_idx].ref_cnt == 0) { + if (!vp9_is_sf_ref_fb(inst, + vsi->frm_bufs[ref_idx].buf.fb)) { + struct vdec_fb *fb; + + fb = vp9_rm_from_fb_use_list(inst, + vsi->frm_bufs[ref_idx].buf.fb->base_y.va); + vp9_add_to_fb_free_list(inst, fb); + } else + vp9_free_sf_ref_fb( + vsi->frm_bufs[ref_idx].buf.fb); + } + } + + *idx = new_idx; + vsi->frm_bufs[new_idx].ref_cnt++; +} + +static void vp9_free_all_sf_ref_fb(struct vdec_vp9_inst *inst) +{ + int i; + struct vdec_vp9_vsi *vsi = inst->vsi; + + for (i = 0; i < ARRAY_SIZE(vsi->sf_ref_fb); i++) { + if (vsi->sf_ref_fb[i].fb.base_y.va) { + mtk_vcodec_mem_free(inst->ctx, + &vsi->sf_ref_fb[i].fb.base_y); + mtk_vcodec_mem_free(inst->ctx, + &vsi->sf_ref_fb[i].fb.base_c); + vsi->sf_ref_fb[i].used = 0; + } + } +} + +/* For each sub-frame except the last one, the driver will dynamically + * allocate reference buffer by calling vp9_get_sf_ref_fb() + * The last sub-frame will use the original fb provided by the + * vp9_dec_decode() interface + */ +static int vp9_get_sf_ref_fb(struct vdec_vp9_inst *inst) +{ + int idx; + struct mtk_vcodec_mem *mem_basy_y; + struct mtk_vcodec_mem *mem_basy_c; + struct vdec_vp9_vsi *vsi = inst->vsi; + + for (idx = 0; + idx < ARRAY_SIZE(vsi->sf_ref_fb); + idx++) { + if (vsi->sf_ref_fb[idx].fb.base_y.va && + vsi->sf_ref_fb[idx].used == 0) { + return idx; + } + } + + for (idx = 0; + idx < ARRAY_SIZE(vsi->sf_ref_fb); + idx++) { + if (vsi->sf_ref_fb[idx].fb.base_y.va == NULL) + break; + } + + if (idx == ARRAY_SIZE(vsi->sf_ref_fb)) { + mtk_vcodec_err(inst, "List Full"); + return -1; + } + + mem_basy_y = &vsi->sf_ref_fb[idx].fb.base_y; + mem_basy_y->size = vsi->buf_sz_y_bs + + vsi->buf_len_sz_y; + + if (mtk_vcodec_mem_alloc(inst->ctx, mem_basy_y)) { + mtk_vcodec_err(inst, "Cannot allocate sf_ref_buf y_buf"); + return -1; + } + + mem_basy_c = &vsi->sf_ref_fb[idx].fb.base_c; + mem_basy_c->size = vsi->buf_sz_c_bs + + vsi->buf_len_sz_c; + + if (mtk_vcodec_mem_alloc(inst->ctx, mem_basy_c)) { + mtk_vcodec_err(inst, "Cannot allocate sf_ref_fb c_buf"); + return -1; + } + vsi->sf_ref_fb[idx].used = 0; + + return idx; +} + +static bool vp9_alloc_work_buf(struct vdec_vp9_inst *inst) +{ + struct vdec_vp9_vsi *vsi = inst->vsi; + int result; + struct mtk_vcodec_mem *mem; + + unsigned int max_pic_w; + unsigned int max_pic_h; + + + if (!(inst->ctx->dev->dec_capability & + VCODEC_CAPABILITY_4K_DISABLED)) { + max_pic_w = VCODEC_DEC_4K_CODED_WIDTH; + max_pic_h = VCODEC_DEC_4K_CODED_HEIGHT; + } else { + max_pic_w = MTK_VDEC_MAX_W; + max_pic_h = MTK_VDEC_MAX_H; + } + + if ((vsi->pic_w > max_pic_w) || + (vsi->pic_h > max_pic_h)) { + mtk_vcodec_err(inst, "Invalid w/h %d/%d", + vsi->pic_w, vsi->pic_h); + return false; + } + + mtk_vcodec_debug(inst, "BUF CHG(%d): w/h/sb_w/sb_h=%d/%d/%d/%d", + vsi->resolution_changed, + vsi->pic_w, + vsi->pic_h, + vsi->buf_w, + vsi->buf_h); + + mem = &inst->mv_buf; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + + mem->size = ((vsi->buf_w / 64) * + (vsi->buf_h / 64) + 2) * 36 * 16; + result = mtk_vcodec_mem_alloc(inst->ctx, mem); + if (result) { + mem->size = 0; + mtk_vcodec_err(inst, "Cannot allocate mv_buf"); + return false; + } + /* Set the va again */ + vsi->mv_buf.va = (unsigned long)mem->va; + vsi->mv_buf.pa = (unsigned long)mem->dma_addr; + vsi->mv_buf.sz = (unsigned int)mem->size; + + + mem = &inst->seg_id_buf; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + + mem->size = VP9_SEG_ID_SZ; + result = mtk_vcodec_mem_alloc(inst->ctx, mem); + if (result) { + mem->size = 0; + mtk_vcodec_err(inst, "Cannot allocate seg_id_buf"); + return false; + } + /* Set the va again */ + vsi->seg_id_buf.va = (unsigned long)mem->va; + vsi->seg_id_buf.pa = (unsigned long)mem->dma_addr; + vsi->seg_id_buf.sz = (unsigned int)mem->size; + + + vp9_free_all_sf_ref_fb(inst); + vsi->sf_next_ref_fb_idx = vp9_get_sf_ref_fb(inst); + + return true; +} + +static bool vp9_add_to_fb_disp_list(struct vdec_vp9_inst *inst, + struct vdec_fb *fb) +{ + struct vdec_fb_node *node; + + if (!fb) { + mtk_vcodec_err(inst, "fb == NULL"); + return false; + } + + node = list_first_entry_or_null(&inst->available_fb_node_list, + struct vdec_fb_node, list); + if (node) { + node->fb = fb; + list_move_tail(&node->list, &inst->fb_disp_list); + } else { + mtk_vcodec_err(inst, "No available fb node"); + return false; + } + + return true; +} + +/* If any buffer updating is signaled it should be done here. */ +static void vp9_swap_frm_bufs(struct vdec_vp9_inst *inst) +{ + struct vdec_vp9_vsi *vsi = inst->vsi; + struct vp9_fb_info *frm_to_show; + int ref_index = 0, mask; + + for (mask = vsi->refresh_frm_flags; mask; mask >>= 1) { + if (mask & 1) + vp9_ref_cnt_fb(inst, &vsi->ref_frm_map[ref_index], + vsi->new_fb_idx); + ++ref_index; + } + + frm_to_show = &vsi->frm_bufs[vsi->new_fb_idx].buf; + vsi->frm_bufs[vsi->new_fb_idx].ref_cnt--; + + if (frm_to_show->fb != inst->cur_fb) { + /* This frame is show exist frame and no decode output + * copy frame data from frm_to_show to current CAPTURE + * buffer + */ + if ((frm_to_show->fb != NULL) && + (inst->cur_fb->base_y.size >= + frm_to_show->fb->base_y.size) && + (inst->cur_fb->base_c.size >= + frm_to_show->fb->base_c.size)) { + memcpy((void *)inst->cur_fb->base_y.va, + (void *)frm_to_show->fb->base_y.va, + frm_to_show->fb->base_y.size); + memcpy((void *)inst->cur_fb->base_c.va, + (void *)frm_to_show->fb->base_c.va, + frm_to_show->fb->base_c.size); + } else { + /* After resolution change case, current CAPTURE buffer + * may have less buffer size than frm_to_show buffer + * size + */ + if (frm_to_show->fb != NULL) + mtk_vcodec_err(inst, + "inst->cur_fb->base_y.size=%zu, frm_to_show->fb.base_y.size=%zu", + inst->cur_fb->base_y.size, + frm_to_show->fb->base_y.size); + } + if (!vp9_is_sf_ref_fb(inst, inst->cur_fb)) { + if (vsi->show_frame & BIT(0)) + vp9_add_to_fb_disp_list(inst, inst->cur_fb); + } + } else { + if (!vp9_is_sf_ref_fb(inst, inst->cur_fb)) { + if (vsi->show_frame & BIT(0)) + vp9_add_to_fb_disp_list(inst, frm_to_show->fb); + } + } + + /* when ref_cnt ==0, move this fb to fb_free_list. v4l2 driver will + * clean fb_free_list + */ + if (vsi->frm_bufs[vsi->new_fb_idx].ref_cnt == 0) { + if (!vp9_is_sf_ref_fb( + inst, vsi->frm_bufs[vsi->new_fb_idx].buf.fb)) { + struct vdec_fb *fb; + + fb = vp9_rm_from_fb_use_list(inst, + vsi->frm_bufs[vsi->new_fb_idx].buf.fb->base_y.va); + + vp9_add_to_fb_free_list(inst, fb); + } else { + vp9_free_sf_ref_fb( + vsi->frm_bufs[vsi->new_fb_idx].buf.fb); + } + } + + /* if this super frame and it is not last sub-frame, get next fb for + * sub-frame decode + */ + if (vsi->sf_frm_cnt > 0 && vsi->sf_frm_idx != vsi->sf_frm_cnt - 1) + vsi->sf_next_ref_fb_idx = vp9_get_sf_ref_fb(inst); +} + +static bool vp9_wait_dec_end(struct vdec_vp9_inst *inst) +{ + struct mtk_vcodec_ctx *ctx = inst->ctx; + + mtk_vcodec_wait_for_done_ctx(inst->ctx, + MTK_INST_IRQ_RECEIVED, + WAIT_INTR_TIMEOUT_MS, 0); + + if (ctx->irq_status & MTK_VDEC_IRQ_STATUS_DEC_SUCCESS) + return true; + else + return false; +} + +static struct vdec_vp9_inst *vp9_alloc_inst(struct mtk_vcodec_ctx *ctx) +{ + int result; + struct mtk_vcodec_mem mem; + struct vdec_vp9_inst *inst; + + memset(&mem, 0, sizeof(mem)); + mem.size = sizeof(struct vdec_vp9_inst); + result = mtk_vcodec_mem_alloc(ctx, &mem); + if (result) + return NULL; + + inst = mem.va; + inst->mem = mem; + + return inst; +} + +static void vp9_free_inst(struct vdec_vp9_inst *inst) +{ + struct mtk_vcodec_mem mem; + + mem = inst->mem; + if (mem.va) + mtk_vcodec_mem_free(inst->ctx, &mem); +} + +static bool vp9_decode_end_proc(struct vdec_vp9_inst *inst) +{ + struct vdec_vp9_vsi *vsi = inst->vsi; + bool ret = false; + + if (!vsi->show_existing_frame) { + ret = vp9_wait_dec_end(inst); + if (!ret) { + mtk_vcodec_err(inst, "Decode failed, Decode Timeout @[%d]", + vsi->frm_num); + return false; + } + + if (vpu_dec_end(&inst->vpu)) { + mtk_vcodec_err(inst, "vp9_dec_vpu_end failed"); + return false; + } + mtk_vcodec_debug(inst, "Decode Ok @%d (%d/%d)", vsi->frm_num, + vsi->pic_w, vsi->pic_h); + } else { + mtk_vcodec_debug(inst, "Decode Ok @%d (show_existing_frame)", + vsi->frm_num); + } + + vp9_swap_frm_bufs(inst); + vsi->frm_num++; + return true; +} + +static bool vp9_is_last_sub_frm(struct vdec_vp9_inst *inst) +{ + struct vdec_vp9_vsi *vsi = inst->vsi; + + if (vsi->sf_frm_cnt <= 0 || vsi->sf_frm_idx == vsi->sf_frm_cnt) + return true; + + return false; +} + +static struct vdec_fb *vp9_rm_from_fb_disp_list(struct vdec_vp9_inst *inst) +{ + struct vdec_fb_node *node; + struct vdec_fb *fb = NULL; + + node = list_first_entry_or_null(&inst->fb_disp_list, + struct vdec_fb_node, list); + if (node) { + fb = (struct vdec_fb *)node->fb; + fb->status |= FB_ST_DISPLAY; + list_move_tail(&node->list, &inst->available_fb_node_list); + mtk_vcodec_debug(inst, "[FB] get disp fb %p st=%d", + node->fb, fb->status); + } else + mtk_vcodec_debug(inst, "[FB] there is no disp fb"); + + return fb; +} + +static bool vp9_add_to_fb_use_list(struct vdec_vp9_inst *inst, + struct vdec_fb *fb) +{ + struct vdec_fb_node *node; + + if (!fb) { + mtk_vcodec_debug(inst, "fb == NULL"); + return false; + } + + node = list_first_entry_or_null(&inst->available_fb_node_list, + struct vdec_fb_node, list); + if (node) { + node->fb = fb; + list_move_tail(&node->list, &inst->fb_use_list); + } else { + mtk_vcodec_err(inst, "No free fb node"); + return false; + } + return true; +} + +static void vp9_reset(struct vdec_vp9_inst *inst) +{ + struct vdec_fb_node *node, *tmp; + + list_for_each_entry_safe(node, tmp, &inst->fb_use_list, list) + list_move_tail(&node->list, &inst->fb_free_list); + + vp9_free_all_sf_ref_fb(inst); + inst->vsi->sf_next_ref_fb_idx = vp9_get_sf_ref_fb(inst); + + if (vpu_dec_reset(&inst->vpu)) + mtk_vcodec_err(inst, "vp9_dec_vpu_reset failed"); + + /* Set the va again, since vpu_dec_reset will clear mv_buf in vpu */ + inst->vsi->mv_buf.va = (unsigned long)inst->mv_buf.va; + inst->vsi->mv_buf.pa = (unsigned long)inst->mv_buf.dma_addr; + inst->vsi->mv_buf.sz = (unsigned long)inst->mv_buf.size; + + /* Set the va again, since vpu_dec_reset will clear seg_id_buf in vpu */ + inst->vsi->seg_id_buf.va = (unsigned long)inst->seg_id_buf.va; + inst->vsi->seg_id_buf.pa = (unsigned long)inst->seg_id_buf.dma_addr; + inst->vsi->seg_id_buf.sz = (unsigned long)inst->seg_id_buf.size; + +} + +static void init_all_fb_lists(struct vdec_vp9_inst *inst) +{ + int i; + + INIT_LIST_HEAD(&inst->available_fb_node_list); + INIT_LIST_HEAD(&inst->fb_use_list); + INIT_LIST_HEAD(&inst->fb_free_list); + INIT_LIST_HEAD(&inst->fb_disp_list); + + for (i = 0; i < ARRAY_SIZE(inst->dec_fb); i++) { + INIT_LIST_HEAD(&inst->dec_fb[i].list); + inst->dec_fb[i].fb = NULL; + list_add_tail(&inst->dec_fb[i].list, + &inst->available_fb_node_list); + } +} + +static void get_pic_info(struct vdec_vp9_inst *inst, struct vdec_pic_info *pic) +{ + pic->fb_sz[0] = inst->vsi->buf_sz_y_bs + inst->vsi->buf_len_sz_y; + pic->fb_sz[1] = inst->vsi->buf_sz_c_bs + inst->vsi->buf_len_sz_c; + + pic->pic_w = inst->vsi->pic_w; + pic->pic_h = inst->vsi->pic_h; + pic->buf_w = inst->vsi->buf_w; + pic->buf_h = inst->vsi->buf_h; + + mtk_vcodec_debug(inst, "pic(%d, %d), buf(%d, %d)", + pic->pic_w, pic->pic_h, pic->buf_w, pic->buf_h); + mtk_vcodec_debug(inst, "fb size: Y(%d), C(%d)", + pic->fb_sz[0], + pic->fb_sz[1]); +} + +static void get_disp_fb(struct vdec_vp9_inst *inst, struct vdec_fb **out_fb) +{ + + *out_fb = vp9_rm_from_fb_disp_list(inst); + if (*out_fb) + (*out_fb)->status |= FB_ST_DISPLAY; +} + +static void get_free_fb(struct vdec_vp9_inst *inst, struct vdec_fb **out_fb) +{ + struct vdec_fb_node *node; + struct vdec_fb *fb = NULL; + + node = list_first_entry_or_null(&inst->fb_free_list, + struct vdec_fb_node, list); + if (node) { + list_move_tail(&node->list, &inst->available_fb_node_list); + fb = (struct vdec_fb *)node->fb; + fb->status |= FB_ST_FREE; + mtk_vcodec_debug(inst, "[FB] get free fb %p st=%d", + node->fb, fb->status); + } else { + mtk_vcodec_debug(inst, "[FB] there is no free fb"); + } + + *out_fb = fb; +} + +static int validate_vsi_array_indexes(struct vdec_vp9_inst *inst, + struct vdec_vp9_vsi *vsi) { + if (vsi->sf_frm_idx >= VP9_MAX_FRM_BUF_NUM - 1) { + mtk_vcodec_err(inst, "Invalid vsi->sf_frm_idx=%u.", + vsi->sf_frm_idx); + return -EIO; + } + if (vsi->frm_to_show_idx >= VP9_MAX_FRM_BUF_NUM) { + mtk_vcodec_err(inst, "Invalid vsi->frm_to_show_idx=%u.", + vsi->frm_to_show_idx); + return -EIO; + } + if (vsi->new_fb_idx >= VP9_MAX_FRM_BUF_NUM) { + mtk_vcodec_err(inst, "Invalid vsi->new_fb_idx=%u.", + vsi->new_fb_idx); + return -EIO; + } + return 0; +} + +static void vdec_vp9_deinit(void *h_vdec) +{ + struct vdec_vp9_inst *inst = (struct vdec_vp9_inst *)h_vdec; + struct mtk_vcodec_mem *mem; + int ret = 0; + + ret = vpu_dec_deinit(&inst->vpu); + if (ret) + mtk_vcodec_err(inst, "vpu_dec_deinit failed"); + + mem = &inst->mv_buf; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + + mem = &inst->seg_id_buf; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + + vp9_free_all_sf_ref_fb(inst); + vp9_free_inst(inst); +} + +static int vdec_vp9_init(struct mtk_vcodec_ctx *ctx) +{ + struct vdec_vp9_inst *inst; + + inst = vp9_alloc_inst(ctx); + if (!inst) + return -ENOMEM; + + inst->total_frm_cnt = 0; + inst->ctx = ctx; + + inst->vpu.id = IPI_VDEC_VP9; + inst->vpu.ctx = ctx; + + if (vpu_dec_init(&inst->vpu)) { + mtk_vcodec_err(inst, "vp9_dec_vpu_init failed"); + goto err_deinit_inst; + } + + inst->vsi = (struct vdec_vp9_vsi *)inst->vpu.vsi; + + inst->vsi->show_frame |= BIT(3); + + init_all_fb_lists(inst); + + ctx->drv_handle = inst; + return 0; + +err_deinit_inst: + vp9_free_inst(inst); + + return -EINVAL; +} + +static int vdec_vp9_decode(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *fb, bool *res_chg) +{ + int ret = 0; + struct vdec_vp9_inst *inst = (struct vdec_vp9_inst *)h_vdec; + struct vdec_vp9_vsi *vsi = inst->vsi; + u32 data[3]; + int i; + + *res_chg = false; + + if ((bs == NULL) && (fb == NULL)) { + mtk_vcodec_debug(inst, "[EOS]"); + vp9_reset(inst); + return ret; + } + + if (bs == NULL) { + mtk_vcodec_err(inst, "bs == NULL"); + return -EINVAL; + } + + mtk_vcodec_debug(inst, "Input BS Size = %zu", bs->size); + + while (1) { + struct vdec_fb *cur_fb = NULL; + + data[0] = *((unsigned int *)bs->va); + data[1] = *((unsigned int *)(bs->va + 4)); + data[2] = *((unsigned int *)(bs->va + 8)); + + vsi->bs = *bs; + + if (fb) + vsi->fb = *fb; + + if (!vsi->sf_init) { + unsigned int sf_bs_sz; + unsigned int sf_bs_off; + unsigned char *sf_bs_src; + unsigned char *sf_bs_dst; + + sf_bs_sz = bs->size > VP9_SUPER_FRAME_BS_SZ ? + VP9_SUPER_FRAME_BS_SZ : bs->size; + sf_bs_off = VP9_SUPER_FRAME_BS_SZ - sf_bs_sz; + sf_bs_src = bs->va + bs->size - sf_bs_sz; + sf_bs_dst = vsi->sf_bs_buf + sf_bs_off; + memcpy(sf_bs_dst, sf_bs_src, sf_bs_sz); + } else { + if ((vsi->sf_frm_cnt > 0) && + (vsi->sf_frm_idx < vsi->sf_frm_cnt)) { + unsigned int idx = vsi->sf_frm_idx; + + memcpy((void *)bs->va, + (void *)(bs->va + + vsi->sf_frm_offset[idx]), + vsi->sf_frm_sz[idx]); + } + } + + if (!(vsi->show_frame & BIT(4))) + memset(inst->seg_id_buf.va, 0, inst->seg_id_buf.size); + + ret = vpu_dec_start(&inst->vpu, data, 3); + if (ret) { + mtk_vcodec_err(inst, "vpu_dec_start failed"); + goto DECODE_ERROR; + } + + if (vsi->show_frame & BIT(1)) { + memset(inst->seg_id_buf.va, 0, inst->seg_id_buf.size); + + if (vsi->show_frame & BIT(2)) { + ret = vpu_dec_start(&inst->vpu, NULL, 0); + if (ret) { + mtk_vcodec_err(inst, "vpu trig decoder failed"); + goto DECODE_ERROR; + } + } + } + + ret = validate_vsi_array_indexes(inst, vsi); + if (ret) { + mtk_vcodec_err(inst, "Invalid values from VPU."); + goto DECODE_ERROR; + } + + if (vsi->resolution_changed) { + if (!vp9_alloc_work_buf(inst)) { + ret = -EIO; + goto DECODE_ERROR; + } + } + + if (vsi->sf_frm_cnt > 0) { + cur_fb = &vsi->sf_ref_fb[vsi->sf_next_ref_fb_idx].fb; + + if (vsi->sf_frm_idx < vsi->sf_frm_cnt) + inst->cur_fb = cur_fb; + else + inst->cur_fb = fb; + } else { + inst->cur_fb = fb; + } + + vsi->frm_bufs[vsi->new_fb_idx].buf.fb = inst->cur_fb; + if (!vp9_is_sf_ref_fb(inst, inst->cur_fb)) + vp9_add_to_fb_use_list(inst, inst->cur_fb); + + mtk_vcodec_debug(inst, "[#pic %d]", vsi->frm_num); + + if (vsi->show_existing_frame) + mtk_vcodec_debug(inst, + "drv->new_fb_idx=%d, drv->frm_to_show_idx=%d", + vsi->new_fb_idx, vsi->frm_to_show_idx); + + if (vsi->show_existing_frame && (vsi->frm_to_show_idx < + VP9_MAX_FRM_BUF_NUM)) { + mtk_vcodec_debug(inst, + "Skip Decode drv->new_fb_idx=%d, drv->frm_to_show_idx=%d", + vsi->new_fb_idx, vsi->frm_to_show_idx); + + vp9_ref_cnt_fb(inst, &vsi->new_fb_idx, + vsi->frm_to_show_idx); + } + + /* VPU assign the buffer pointer in its address space, + * reassign here + */ + for (i = 0; i < ARRAY_SIZE(vsi->frm_refs); i++) { + unsigned int idx = vsi->frm_refs[i].idx; + + vsi->frm_refs[i].buf = &vsi->frm_bufs[idx].buf; + } + + if (vsi->resolution_changed) { + *res_chg = true; + mtk_vcodec_debug(inst, "VDEC_ST_RESOLUTION_CHANGED"); + + ret = 0; + goto DECODE_ERROR; + } + + if (!vp9_decode_end_proc(inst)) { + mtk_vcodec_err(inst, "vp9_decode_end_proc"); + ret = -EINVAL; + goto DECODE_ERROR; + } + + if (vp9_is_last_sub_frm(inst)) + break; + + } + inst->total_frm_cnt++; + +DECODE_ERROR: + if (ret < 0) + vp9_add_to_fb_free_list(inst, fb); + + return ret; +} + +static void get_crop_info(struct vdec_vp9_inst *inst, struct v4l2_rect *cr) +{ + cr->left = 0; + cr->top = 0; + cr->width = inst->vsi->pic_w; + cr->height = inst->vsi->pic_h; + mtk_vcodec_debug(inst, "get crop info l=%d, t=%d, w=%d, h=%d\n", + cr->left, cr->top, cr->width, cr->height); +} + +static int vdec_vp9_get_param(void *h_vdec, enum vdec_get_param_type type, + void *out) +{ + struct vdec_vp9_inst *inst = (struct vdec_vp9_inst *)h_vdec; + int ret = 0; + + switch (type) { + case GET_PARAM_DISP_FRAME_BUFFER: + get_disp_fb(inst, out); + break; + case GET_PARAM_FREE_FRAME_BUFFER: + get_free_fb(inst, out); + break; + case GET_PARAM_PIC_INFO: + get_pic_info(inst, out); + break; + case GET_PARAM_DPB_SIZE: + *((unsigned int *)out) = MAX_VP9_DPB_SIZE; + break; + case GET_PARAM_CROP_INFO: + get_crop_info(inst, out); + break; + default: + mtk_vcodec_err(inst, "not supported param type %d", type); + ret = -EINVAL; + break; + } + + return ret; +} + +const struct vdec_common_if vdec_vp9_if = { + .init = vdec_vp9_init, + .decode = vdec_vp9_decode, + .get_param = vdec_vp9_get_param, + .deinit = vdec_vp9_deinit, +}; diff --git a/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c b/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c new file mode 100644 index 000000000..cf16cf280 --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c @@ -0,0 +1,2225 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2021 MediaTek Inc. + * Author: George Sun <george.sun@mediatek.com> + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <media/videobuf2-dma-contig.h> +#include <media/v4l2-vp9.h> + +#include "../mtk_vcodec_util.h" +#include "../mtk_vcodec_dec.h" +#include "../mtk_vcodec_intr.h" +#include "../vdec_drv_base.h" +#include "../vdec_drv_if.h" +#include "../vdec_vpu_if.h" + +/* reset_frame_context defined in VP9 spec */ +#define VP9_RESET_FRAME_CONTEXT_NONE0 0 +#define VP9_RESET_FRAME_CONTEXT_NONE1 1 +#define VP9_RESET_FRAME_CONTEXT_SPEC 2 +#define VP9_RESET_FRAME_CONTEXT_ALL 3 + +#define VP9_TILE_BUF_SIZE 4096 +#define VP9_PROB_BUF_SIZE 2560 +#define VP9_COUNTS_BUF_SIZE 16384 + +#define HDR_FLAG(x) (!!((hdr)->flags & V4L2_VP9_FRAME_FLAG_##x)) +#define LF_FLAG(x) (!!((lf)->flags & V4L2_VP9_LOOP_FILTER_FLAG_##x)) +#define SEG_FLAG(x) (!!((seg)->flags & V4L2_VP9_SEGMENTATION_FLAG_##x)) +#define VP9_BAND_6(band) ((band) == 0 ? 3 : 6) + +/* + * struct vdec_vp9_slice_frame_ctx - vp9 prob tables footprint + */ +struct vdec_vp9_slice_frame_ctx { + struct { + u8 probs[6][3]; + u8 padding[2]; + } coef_probs[4][2][2][6]; + + u8 y_mode_prob[4][16]; + u8 switch_interp_prob[4][16]; + u8 seg[32]; /* ignore */ + u8 comp_inter_prob[16]; + u8 comp_ref_prob[16]; + u8 single_ref_prob[5][2]; + u8 single_ref_prob_padding[6]; + + u8 joint[3]; + u8 joint_padding[13]; + struct { + u8 sign; + u8 classes[10]; + u8 padding[5]; + } sign_classes[2]; + struct { + u8 class0[1]; + u8 bits[10]; + u8 padding[5]; + } class0_bits[2]; + struct { + u8 class0_fp[2][3]; + u8 fp[3]; + u8 class0_hp; + u8 hp; + u8 padding[5]; + } class0_fp_hp[2]; + + u8 uv_mode_prob[10][16]; + u8 uv_mode_prob_padding[2][16]; + + u8 partition_prob[16][4]; + + u8 inter_mode_probs[7][4]; + u8 skip_probs[4]; + + u8 tx_p8x8[2][4]; + u8 tx_p16x16[2][4]; + u8 tx_p32x32[2][4]; + u8 intra_inter_prob[8]; +}; + +/* + * struct vdec_vp9_slice_frame_counts - vp9 counts tables footprint + */ +struct vdec_vp9_slice_frame_counts { + union { + struct { + u32 band_0[3]; + u32 padding0[1]; + u32 band_1_5[5][6]; + u32 padding1[2]; + } eob_branch[4][2][2]; + u32 eob_branch_space[256 * 4]; + }; + + struct { + u32 band_0[3][4]; + u32 band_1_5[5][6][4]; + } coef_probs[4][2][2]; + + u32 intra_inter[4][2]; + u32 comp_inter[5][2]; + u32 comp_inter_padding[2]; + u32 comp_ref[5][2]; + u32 comp_ref_padding[2]; + u32 single_ref[5][2][2]; + u32 inter_mode[7][4]; + u32 y_mode[4][12]; + u32 uv_mode[10][10]; + u32 partition[16][4]; + u32 switchable_interp[4][4]; + + u32 tx_p8x8[2][2]; + u32 tx_p16x16[2][4]; + u32 tx_p32x32[2][4]; + + u32 skip[3][4]; + + u32 joint[4]; + + struct { + u32 sign[2]; + u32 class0[2]; + u32 classes[12]; + u32 bits[10][2]; + u32 padding[4]; + u32 class0_fp[2][4]; + u32 fp[4]; + u32 class0_hp[2]; + u32 hp[2]; + } mvcomp[2]; + + u32 reserved[126][4]; +}; + +/** + * struct vdec_vp9_slice_counts_map - vp9 counts tables to map + * v4l2_vp9_frame_symbol_counts + * @skip: skip counts. + * @y_mode: Y prediction mode counts. + * @filter: interpolation filter counts. + * @mv_joint: motion vector joint counts. + * @sign: motion vector sign counts. + * @classes: motion vector class counts. + * @class0: motion vector class0 bit counts. + * @bits: motion vector bits counts. + * @class0_fp: motion vector class0 fractional bit counts. + * @fp: motion vector fractional bit counts. + * @class0_hp: motion vector class0 high precision fractional bit counts. + * @hp: motion vector high precision fractional bit counts. + */ +struct vdec_vp9_slice_counts_map { + u32 skip[3][2]; + u32 y_mode[4][10]; + u32 filter[4][3]; + u32 sign[2][2]; + u32 classes[2][11]; + u32 class0[2][2]; + u32 bits[2][10][2]; + u32 class0_fp[2][2][4]; + u32 fp[2][4]; + u32 class0_hp[2][2]; + u32 hp[2][2]; +}; + +/* + * struct vdec_vp9_slice_uncompressed_header - vp9 uncompressed header syntax + * used for decoding + */ +struct vdec_vp9_slice_uncompressed_header { + u8 profile; + u8 last_frame_type; + u8 frame_type; + + u8 last_show_frame; + u8 show_frame; + u8 error_resilient_mode; + + u8 bit_depth; + u8 padding0[1]; + u16 last_frame_width; + u16 last_frame_height; + u16 frame_width; + u16 frame_height; + + u8 intra_only; + u8 reset_frame_context; + u8 ref_frame_sign_bias[4]; + u8 allow_high_precision_mv; + u8 interpolation_filter; + + u8 refresh_frame_context; + u8 frame_parallel_decoding_mode; + u8 frame_context_idx; + + /* loop_filter_params */ + u8 loop_filter_level; + u8 loop_filter_sharpness; + u8 loop_filter_delta_enabled; + s8 loop_filter_ref_deltas[4]; + s8 loop_filter_mode_deltas[2]; + + /* quantization_params */ + u8 base_q_idx; + s8 delta_q_y_dc; + s8 delta_q_uv_dc; + s8 delta_q_uv_ac; + + /* segmentation_params */ + u8 segmentation_enabled; + u8 segmentation_update_map; + u8 segmentation_tree_probs[7]; + u8 padding1[1]; + u8 segmentation_temporal_udpate; + u8 segmentation_pred_prob[3]; + u8 segmentation_update_data; + u8 segmentation_abs_or_delta_update; + u8 feature_enabled[8]; + s16 feature_value[8][4]; + + /* tile_info */ + u8 tile_cols_log2; + u8 tile_rows_log2; + u8 padding2[2]; + + u16 uncompressed_header_size; + u16 header_size_in_bytes; + + /* LAT OUT, CORE IN */ + u32 dequant[8][4]; +}; + +/* + * struct vdec_vp9_slice_compressed_header - vp9 compressed header syntax + * used for decoding. + */ +struct vdec_vp9_slice_compressed_header { + u8 tx_mode; + u8 ref_mode; + u8 comp_fixed_ref; + u8 comp_var_ref[2]; + u8 padding[3]; +}; + +/* + * struct vdec_vp9_slice_tiles - vp9 tile syntax + */ +struct vdec_vp9_slice_tiles { + u32 size[4][64]; + u32 mi_rows[4]; + u32 mi_cols[64]; + u8 actual_rows; + u8 padding[7]; +}; + +/* + * struct vdec_vp9_slice_reference - vp9 reference frame information + */ +struct vdec_vp9_slice_reference { + u16 frame_width; + u16 frame_height; + u8 bit_depth; + u8 subsampling_x; + u8 subsampling_y; + u8 padding; +}; + +/* + * struct vdec_vp9_slice_frame - vp9 syntax used for decoding + */ +struct vdec_vp9_slice_frame { + struct vdec_vp9_slice_uncompressed_header uh; + struct vdec_vp9_slice_compressed_header ch; + struct vdec_vp9_slice_tiles tiles; + struct vdec_vp9_slice_reference ref[3]; +}; + +/* + * struct vdec_vp9_slice_init_vsi - VSI used to initialize instance + */ +struct vdec_vp9_slice_init_vsi { + unsigned int architecture; + unsigned int reserved; + u64 core_vsi; + /* default frame context's position in MicroP */ + u64 default_frame_ctx; +}; + +/* + * struct vdec_vp9_slice_mem - memory address and size + */ +struct vdec_vp9_slice_mem { + union { + u64 buf; + dma_addr_t dma_addr; + }; + union { + size_t size; + dma_addr_t dma_addr_end; + u64 padding; + }; +}; + +/* + * struct vdec_vp9_slice_bs - input buffer for decoding + */ +struct vdec_vp9_slice_bs { + struct vdec_vp9_slice_mem buf; + struct vdec_vp9_slice_mem frame; +}; + +/* + * struct vdec_vp9_slice_fb - frame buffer for decoding + */ +struct vdec_vp9_slice_fb { + struct vdec_vp9_slice_mem y; + struct vdec_vp9_slice_mem c; +}; + +/* + * struct vdec_vp9_slice_state - decoding state + */ +struct vdec_vp9_slice_state { + int err; + unsigned int full; + unsigned int timeout; + unsigned int perf; + + unsigned int crc[12]; +}; + +/** + * struct vdec_vp9_slice_vsi - exchange decoding information + * between Main CPU and MicroP + * + * @bs: input buffer + * @fb: output buffer + * @ref: 3 reference buffers + * @mv: mv working buffer + * @seg: segmentation working buffer + * @tile: tile buffer + * @prob: prob table buffer, used to set/update prob table + * @counts: counts table buffer, used to update prob table + * @ube: general buffer + * @trans: trans buffer position in general buffer + * @err_map: error buffer + * @row_info: row info buffer + * @frame: decoding syntax + * @state: decoding state + */ +struct vdec_vp9_slice_vsi { + /* used in LAT stage */ + struct vdec_vp9_slice_bs bs; + /* used in Core stage */ + struct vdec_vp9_slice_fb fb; + struct vdec_vp9_slice_fb ref[3]; + + struct vdec_vp9_slice_mem mv[2]; + struct vdec_vp9_slice_mem seg[2]; + struct vdec_vp9_slice_mem tile; + struct vdec_vp9_slice_mem prob; + struct vdec_vp9_slice_mem counts; + + /* LAT stage's output, Core stage's input */ + struct vdec_vp9_slice_mem ube; + struct vdec_vp9_slice_mem trans; + struct vdec_vp9_slice_mem err_map; + struct vdec_vp9_slice_mem row_info; + + /* decoding parameters */ + struct vdec_vp9_slice_frame frame; + + struct vdec_vp9_slice_state state; +}; + +/** + * struct vdec_vp9_slice_pfc - per-frame context that contains a local vsi. + * pass it from lat to core + * + * @vsi: local vsi. copy to/from remote vsi before/after decoding + * @ref_idx: reference buffer index + * @seq: picture sequence + * @state: decoding state + */ +struct vdec_vp9_slice_pfc { + struct vdec_vp9_slice_vsi vsi; + + u64 ref_idx[3]; + + int seq; + + /* LAT/Core CRC */ + struct vdec_vp9_slice_state state[2]; +}; + +/* + * enum vdec_vp9_slice_resolution_level + */ +enum vdec_vp9_slice_resolution_level { + VP9_RES_NONE, + VP9_RES_FHD, + VP9_RES_4K, + VP9_RES_8K, +}; + +/* + * struct vdec_vp9_slice_ref - picture's width & height should kept + * for later decoding as reference picture + */ +struct vdec_vp9_slice_ref { + unsigned int width; + unsigned int height; +}; + +/** + * struct vdec_vp9_slice_instance - represent one vp9 instance + * + * @ctx: pointer to codec's context + * @vpu: VPU instance + * @seq: global picture sequence + * @level: level of current resolution + * @width: width of last picture + * @height: height of last picture + * @frame_type: frame_type of last picture + * @irq: irq to Main CPU or MicroP + * @show_frame: show_frame of last picture + * @dpb: picture information (width/height) for reference + * @mv: mv working buffer + * @seg: segmentation working buffer + * @tile: tile buffer + * @prob: prob table buffer, used to set/update prob table + * @counts: counts table buffer, used to update prob table + * @frame_ctx: 4 frame context according to VP9 Spec + * @frame_ctx_helper: 4 frame context according to newest kernel spec + * @dirty: state of each frame context + * @init_vsi: vsi used for initialized VP9 instance + * @vsi: vsi used for decoding/flush ... + * @core_vsi: vsi used for Core stage + * + * @sc_pfc: per frame context single core + * @counts_map: used map to counts_helper + * @counts_helper: counts table according to newest kernel spec + */ +struct vdec_vp9_slice_instance { + struct mtk_vcodec_ctx *ctx; + struct vdec_vpu_inst vpu; + + int seq; + + enum vdec_vp9_slice_resolution_level level; + + /* for resolution change and get_pic_info */ + unsigned int width; + unsigned int height; + + /* for last_frame_type */ + unsigned int frame_type; + unsigned int irq; + + unsigned int show_frame; + + /* maintain vp9 reference frame state */ + struct vdec_vp9_slice_ref dpb[VB2_MAX_FRAME]; + + /* + * normal working buffers + * mv[0]/seg[0]/tile/prob/counts is used for LAT + * mv[1]/seg[1] is used for CORE + */ + struct mtk_vcodec_mem mv[2]; + struct mtk_vcodec_mem seg[2]; + struct mtk_vcodec_mem tile; + struct mtk_vcodec_mem prob; + struct mtk_vcodec_mem counts; + + /* 4 prob tables */ + struct vdec_vp9_slice_frame_ctx frame_ctx[4]; + /*4 helper tables */ + struct v4l2_vp9_frame_context frame_ctx_helper; + unsigned char dirty[4]; + + /* MicroP vsi */ + union { + struct vdec_vp9_slice_init_vsi *init_vsi; + struct vdec_vp9_slice_vsi *vsi; + }; + struct vdec_vp9_slice_vsi *core_vsi; + + struct vdec_vp9_slice_pfc sc_pfc; + struct vdec_vp9_slice_counts_map counts_map; + struct v4l2_vp9_frame_symbol_counts counts_helper; +}; + +/* + * all VP9 instances could share this default frame context. + */ +static struct vdec_vp9_slice_frame_ctx *vdec_vp9_slice_default_frame_ctx; +static DEFINE_MUTEX(vdec_vp9_slice_frame_ctx_lock); + +static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf); + +static int vdec_vp9_slice_init_default_frame_ctx(struct vdec_vp9_slice_instance *instance) +{ + struct vdec_vp9_slice_frame_ctx *remote_frame_ctx; + struct vdec_vp9_slice_frame_ctx *frame_ctx; + struct mtk_vcodec_ctx *ctx; + struct vdec_vp9_slice_init_vsi *vsi; + int ret = 0; + + ctx = instance->ctx; + vsi = instance->vpu.vsi; + if (!ctx || !vsi) + return -EINVAL; + + remote_frame_ctx = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler, + (u32)vsi->default_frame_ctx); + if (!remote_frame_ctx) { + mtk_vcodec_err(instance, "failed to map default frame ctx\n"); + return -EINVAL; + } + + mutex_lock(&vdec_vp9_slice_frame_ctx_lock); + if (vdec_vp9_slice_default_frame_ctx) + goto out; + + frame_ctx = kmemdup(remote_frame_ctx, sizeof(*frame_ctx), GFP_KERNEL); + if (!frame_ctx) { + ret = -ENOMEM; + goto out; + } + + vdec_vp9_slice_default_frame_ctx = frame_ctx; + +out: + mutex_unlock(&vdec_vp9_slice_frame_ctx_lock); + + return ret; +} + +static int vdec_vp9_slice_alloc_working_buffer(struct vdec_vp9_slice_instance *instance, + struct vdec_vp9_slice_vsi *vsi) +{ + struct mtk_vcodec_ctx *ctx = instance->ctx; + enum vdec_vp9_slice_resolution_level level; + /* super blocks */ + unsigned int max_sb_w; + unsigned int max_sb_h; + unsigned int max_w; + unsigned int max_h; + unsigned int w; + unsigned int h; + size_t size; + int ret; + int i; + + w = vsi->frame.uh.frame_width; + h = vsi->frame.uh.frame_height; + + if (w > VCODEC_DEC_4K_CODED_WIDTH || + h > VCODEC_DEC_4K_CODED_HEIGHT) { + return -EINVAL; + } else if (w > MTK_VDEC_MAX_W || h > MTK_VDEC_MAX_H) { + /* 4K */ + level = VP9_RES_4K; + max_w = VCODEC_DEC_4K_CODED_WIDTH; + max_h = VCODEC_DEC_4K_CODED_HEIGHT; + } else { + /* FHD */ + level = VP9_RES_FHD; + max_w = MTK_VDEC_MAX_W; + max_h = MTK_VDEC_MAX_H; + } + + if (level == instance->level) + return 0; + + mtk_vcodec_debug(instance, "resolution level changed, from %u to %u, %ux%u", + instance->level, level, w, h); + + max_sb_w = DIV_ROUND_UP(max_w, 64); + max_sb_h = DIV_ROUND_UP(max_h, 64); + ret = -ENOMEM; + + /* + * Lat-flush must wait core idle, otherwise core will + * use released buffers + */ + + size = (max_sb_w * max_sb_h + 2) * 576; + for (i = 0; i < 2; i++) { + if (instance->mv[i].va) + mtk_vcodec_mem_free(ctx, &instance->mv[i]); + instance->mv[i].size = size; + if (mtk_vcodec_mem_alloc(ctx, &instance->mv[i])) + goto err; + } + + size = (max_sb_w * max_sb_h * 32) + 256; + for (i = 0; i < 2; i++) { + if (instance->seg[i].va) + mtk_vcodec_mem_free(ctx, &instance->seg[i]); + instance->seg[i].size = size; + if (mtk_vcodec_mem_alloc(ctx, &instance->seg[i])) + goto err; + } + + if (!instance->tile.va) { + instance->tile.size = VP9_TILE_BUF_SIZE; + if (mtk_vcodec_mem_alloc(ctx, &instance->tile)) + goto err; + } + + if (!instance->prob.va) { + instance->prob.size = VP9_PROB_BUF_SIZE; + if (mtk_vcodec_mem_alloc(ctx, &instance->prob)) + goto err; + } + + if (!instance->counts.va) { + instance->counts.size = VP9_COUNTS_BUF_SIZE; + if (mtk_vcodec_mem_alloc(ctx, &instance->counts)) + goto err; + } + + instance->level = level; + return 0; + +err: + instance->level = VP9_RES_NONE; + return ret; +} + +static void vdec_vp9_slice_free_working_buffer(struct vdec_vp9_slice_instance *instance) +{ + struct mtk_vcodec_ctx *ctx = instance->ctx; + int i; + + for (i = 0; i < ARRAY_SIZE(instance->mv); i++) { + if (instance->mv[i].va) + mtk_vcodec_mem_free(ctx, &instance->mv[i]); + } + for (i = 0; i < ARRAY_SIZE(instance->seg); i++) { + if (instance->seg[i].va) + mtk_vcodec_mem_free(ctx, &instance->seg[i]); + } + if (instance->tile.va) + mtk_vcodec_mem_free(ctx, &instance->tile); + if (instance->prob.va) + mtk_vcodec_mem_free(ctx, &instance->prob); + if (instance->counts.va) + mtk_vcodec_mem_free(ctx, &instance->counts); + + instance->level = VP9_RES_NONE; +} + +static void vdec_vp9_slice_vsi_from_remote(struct vdec_vp9_slice_vsi *vsi, + struct vdec_vp9_slice_vsi *remote_vsi, + int skip) +{ + struct vdec_vp9_slice_frame *rf; + struct vdec_vp9_slice_frame *f; + + /* + * compressed header + * dequant + * buffer position + * decode state + */ + if (!skip) { + rf = &remote_vsi->frame; + f = &vsi->frame; + memcpy(&f->ch, &rf->ch, sizeof(f->ch)); + memcpy(&f->uh.dequant, &rf->uh.dequant, sizeof(f->uh.dequant)); + memcpy(&vsi->trans, &remote_vsi->trans, sizeof(vsi->trans)); + } + + memcpy(&vsi->state, &remote_vsi->state, sizeof(vsi->state)); +} + +static void vdec_vp9_slice_vsi_to_remote(struct vdec_vp9_slice_vsi *vsi, + struct vdec_vp9_slice_vsi *remote_vsi) +{ + memcpy(remote_vsi, vsi, sizeof(*vsi)); +} + +static int vdec_vp9_slice_tile_offset(int idx, int mi_num, int tile_log2) +{ + int sbs = (mi_num + 7) >> 3; + int offset = ((idx * sbs) >> tile_log2) << 3; + + return min(offset, mi_num); +} + +static +int vdec_vp9_slice_setup_single_from_src_to_dst(struct vdec_vp9_slice_instance *instance) +{ + struct vb2_v4l2_buffer *src; + struct vb2_v4l2_buffer *dst; + + src = v4l2_m2m_next_src_buf(instance->ctx->m2m_ctx); + if (!src) + return -EINVAL; + + dst = v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx); + if (!dst) + return -EINVAL; + + v4l2_m2m_buf_copy_metadata(src, dst, true); + + return 0; +} + +static int vdec_vp9_slice_setup_lat_from_src_buf(struct vdec_vp9_slice_instance *instance, + struct vdec_lat_buf *lat_buf) +{ + struct vb2_v4l2_buffer *src; + struct vb2_v4l2_buffer *dst; + + src = v4l2_m2m_next_src_buf(instance->ctx->m2m_ctx); + if (!src) + return -EINVAL; + + lat_buf->src_buf_req = src->vb2_buf.req_obj.req; + + dst = &lat_buf->ts_info; + v4l2_m2m_buf_copy_metadata(src, dst, true); + return 0; +} + +static void vdec_vp9_slice_setup_hdr(struct vdec_vp9_slice_instance *instance, + struct vdec_vp9_slice_uncompressed_header *uh, + struct v4l2_ctrl_vp9_frame *hdr) +{ + int i; + + uh->profile = hdr->profile; + uh->last_frame_type = instance->frame_type; + uh->frame_type = !HDR_FLAG(KEY_FRAME); + uh->last_show_frame = instance->show_frame; + uh->show_frame = HDR_FLAG(SHOW_FRAME); + uh->error_resilient_mode = HDR_FLAG(ERROR_RESILIENT); + uh->bit_depth = hdr->bit_depth; + uh->last_frame_width = instance->width; + uh->last_frame_height = instance->height; + uh->frame_width = hdr->frame_width_minus_1 + 1; + uh->frame_height = hdr->frame_height_minus_1 + 1; + uh->intra_only = HDR_FLAG(INTRA_ONLY); + /* map v4l2 enum to values defined in VP9 spec for firmware */ + switch (hdr->reset_frame_context) { + case V4L2_VP9_RESET_FRAME_CTX_NONE: + uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_NONE0; + break; + case V4L2_VP9_RESET_FRAME_CTX_SPEC: + uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_SPEC; + break; + case V4L2_VP9_RESET_FRAME_CTX_ALL: + uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_ALL; + break; + default: + uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_NONE0; + break; + } + /* + * ref_frame_sign_bias specifies the intended direction + * of the motion vector in time for each reference frame. + * - INTRA_FRAME = 0, + * - LAST_FRAME = 1, + * - GOLDEN_FRAME = 2, + * - ALTREF_FRAME = 3, + * ref_frame_sign_bias[INTRA_FRAME] is always 0 + * and VDA only passes another 3 directions + */ + uh->ref_frame_sign_bias[0] = 0; + for (i = 0; i < 3; i++) + uh->ref_frame_sign_bias[i + 1] = + !!(hdr->ref_frame_sign_bias & (1 << i)); + uh->allow_high_precision_mv = HDR_FLAG(ALLOW_HIGH_PREC_MV); + uh->interpolation_filter = hdr->interpolation_filter; + uh->refresh_frame_context = HDR_FLAG(REFRESH_FRAME_CTX); + uh->frame_parallel_decoding_mode = HDR_FLAG(PARALLEL_DEC_MODE); + uh->frame_context_idx = hdr->frame_context_idx; + + /* tile info */ + uh->tile_cols_log2 = hdr->tile_cols_log2; + uh->tile_rows_log2 = hdr->tile_rows_log2; + + uh->uncompressed_header_size = hdr->uncompressed_header_size; + uh->header_size_in_bytes = hdr->compressed_header_size; +} + +static void vdec_vp9_slice_setup_frame_ctx(struct vdec_vp9_slice_instance *instance, + struct vdec_vp9_slice_uncompressed_header *uh, + struct v4l2_ctrl_vp9_frame *hdr) +{ + int error_resilient_mode; + int reset_frame_context; + int key_frame; + int intra_only; + int i; + + key_frame = HDR_FLAG(KEY_FRAME); + intra_only = HDR_FLAG(INTRA_ONLY); + error_resilient_mode = HDR_FLAG(ERROR_RESILIENT); + reset_frame_context = uh->reset_frame_context; + + /* + * according to "6.2 Uncompressed header syntax" in + * "VP9 Bitstream & Decoding Process Specification", + * reset @frame_context_idx when (FrameIsIntra || error_resilient_mode) + */ + if (key_frame || intra_only || error_resilient_mode) { + /* + * @reset_frame_context specifies + * whether the frame context should be + * reset to default values: + * 0 or 1 means do not reset any frame context + * 2 resets just the context specified in the frame header + * 3 resets all contexts + */ + if (key_frame || error_resilient_mode || + reset_frame_context == 3) { + /* use default table */ + for (i = 0; i < 4; i++) + instance->dirty[i] = 0; + } else if (reset_frame_context == 2) { + instance->dirty[uh->frame_context_idx] = 0; + } + uh->frame_context_idx = 0; + } +} + +static void vdec_vp9_slice_setup_loop_filter(struct vdec_vp9_slice_uncompressed_header *uh, + struct v4l2_vp9_loop_filter *lf) +{ + int i; + + uh->loop_filter_level = lf->level; + uh->loop_filter_sharpness = lf->sharpness; + uh->loop_filter_delta_enabled = LF_FLAG(DELTA_ENABLED); + for (i = 0; i < 4; i++) + uh->loop_filter_ref_deltas[i] = lf->ref_deltas[i]; + for (i = 0; i < 2; i++) + uh->loop_filter_mode_deltas[i] = lf->mode_deltas[i]; +} + +static void vdec_vp9_slice_setup_quantization(struct vdec_vp9_slice_uncompressed_header *uh, + struct v4l2_vp9_quantization *quant) +{ + uh->base_q_idx = quant->base_q_idx; + uh->delta_q_y_dc = quant->delta_q_y_dc; + uh->delta_q_uv_dc = quant->delta_q_uv_dc; + uh->delta_q_uv_ac = quant->delta_q_uv_ac; +} + +static void vdec_vp9_slice_setup_segmentation(struct vdec_vp9_slice_uncompressed_header *uh, + struct v4l2_vp9_segmentation *seg) +{ + int i; + int j; + + uh->segmentation_enabled = SEG_FLAG(ENABLED); + uh->segmentation_update_map = SEG_FLAG(UPDATE_MAP); + for (i = 0; i < 7; i++) + uh->segmentation_tree_probs[i] = seg->tree_probs[i]; + uh->segmentation_temporal_udpate = SEG_FLAG(TEMPORAL_UPDATE); + for (i = 0; i < 3; i++) + uh->segmentation_pred_prob[i] = seg->pred_probs[i]; + uh->segmentation_update_data = SEG_FLAG(UPDATE_DATA); + uh->segmentation_abs_or_delta_update = SEG_FLAG(ABS_OR_DELTA_UPDATE); + for (i = 0; i < 8; i++) { + uh->feature_enabled[i] = seg->feature_enabled[i]; + for (j = 0; j < 4; j++) + uh->feature_value[i][j] = seg->feature_data[i][j]; + } +} + +static int vdec_vp9_slice_setup_tile(struct vdec_vp9_slice_vsi *vsi, + struct v4l2_ctrl_vp9_frame *hdr) +{ + unsigned int rows_log2; + unsigned int cols_log2; + unsigned int rows; + unsigned int cols; + unsigned int mi_rows; + unsigned int mi_cols; + struct vdec_vp9_slice_tiles *tiles; + int offset; + int start; + int end; + int i; + + rows_log2 = hdr->tile_rows_log2; + cols_log2 = hdr->tile_cols_log2; + rows = 1 << rows_log2; + cols = 1 << cols_log2; + tiles = &vsi->frame.tiles; + tiles->actual_rows = 0; + + if (rows > 4 || cols > 64) + return -EINVAL; + + /* setup mi rows/cols information */ + mi_rows = (hdr->frame_height_minus_1 + 1 + 7) >> 3; + mi_cols = (hdr->frame_width_minus_1 + 1 + 7) >> 3; + + for (i = 0; i < rows; i++) { + start = vdec_vp9_slice_tile_offset(i, mi_rows, rows_log2); + end = vdec_vp9_slice_tile_offset(i + 1, mi_rows, rows_log2); + offset = end - start; + tiles->mi_rows[i] = (offset + 7) >> 3; + if (tiles->mi_rows[i]) + tiles->actual_rows++; + } + + for (i = 0; i < cols; i++) { + start = vdec_vp9_slice_tile_offset(i, mi_cols, cols_log2); + end = vdec_vp9_slice_tile_offset(i + 1, mi_cols, cols_log2); + offset = end - start; + tiles->mi_cols[i] = (offset + 7) >> 3; + } + + return 0; +} + +static void vdec_vp9_slice_setup_state(struct vdec_vp9_slice_vsi *vsi) +{ + memset(&vsi->state, 0, sizeof(vsi->state)); +} + +static void vdec_vp9_slice_setup_ref_idx(struct vdec_vp9_slice_pfc *pfc, + struct v4l2_ctrl_vp9_frame *hdr) +{ + pfc->ref_idx[0] = hdr->last_frame_ts; + pfc->ref_idx[1] = hdr->golden_frame_ts; + pfc->ref_idx[2] = hdr->alt_frame_ts; +} + +static int vdec_vp9_slice_setup_pfc(struct vdec_vp9_slice_instance *instance, + struct vdec_vp9_slice_pfc *pfc) +{ + struct v4l2_ctrl_vp9_frame *hdr; + struct vdec_vp9_slice_uncompressed_header *uh; + struct v4l2_ctrl *hdr_ctrl; + struct vdec_vp9_slice_vsi *vsi; + int ret; + + /* frame header */ + hdr_ctrl = v4l2_ctrl_find(&instance->ctx->ctrl_hdl, V4L2_CID_STATELESS_VP9_FRAME); + if (!hdr_ctrl || !hdr_ctrl->p_cur.p) + return -EINVAL; + + hdr = hdr_ctrl->p_cur.p; + vsi = &pfc->vsi; + uh = &vsi->frame.uh; + + /* setup vsi information */ + vdec_vp9_slice_setup_hdr(instance, uh, hdr); + vdec_vp9_slice_setup_frame_ctx(instance, uh, hdr); + vdec_vp9_slice_setup_loop_filter(uh, &hdr->lf); + vdec_vp9_slice_setup_quantization(uh, &hdr->quant); + vdec_vp9_slice_setup_segmentation(uh, &hdr->seg); + ret = vdec_vp9_slice_setup_tile(vsi, hdr); + if (ret) + return ret; + vdec_vp9_slice_setup_state(vsi); + + /* core stage needs buffer index to get ref y/c ... */ + vdec_vp9_slice_setup_ref_idx(pfc, hdr); + + pfc->seq = instance->seq; + instance->seq++; + + return 0; +} + +static int vdec_vp9_slice_setup_lat_buffer(struct vdec_vp9_slice_instance *instance, + struct vdec_vp9_slice_vsi *vsi, + struct mtk_vcodec_mem *bs, + struct vdec_lat_buf *lat_buf) +{ + int i; + + vsi->bs.buf.dma_addr = bs->dma_addr; + vsi->bs.buf.size = bs->size; + vsi->bs.frame.dma_addr = bs->dma_addr; + vsi->bs.frame.size = bs->size; + + for (i = 0; i < 2; i++) { + vsi->mv[i].dma_addr = instance->mv[i].dma_addr; + vsi->mv[i].size = instance->mv[i].size; + } + for (i = 0; i < 2; i++) { + vsi->seg[i].dma_addr = instance->seg[i].dma_addr; + vsi->seg[i].size = instance->seg[i].size; + } + vsi->tile.dma_addr = instance->tile.dma_addr; + vsi->tile.size = instance->tile.size; + vsi->prob.dma_addr = instance->prob.dma_addr; + vsi->prob.size = instance->prob.size; + vsi->counts.dma_addr = instance->counts.dma_addr; + vsi->counts.size = instance->counts.size; + + vsi->ube.dma_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr; + vsi->ube.size = lat_buf->ctx->msg_queue.wdma_addr.size; + vsi->trans.dma_addr = lat_buf->ctx->msg_queue.wdma_wptr_addr; + /* used to store trans end */ + vsi->trans.dma_addr_end = lat_buf->ctx->msg_queue.wdma_rptr_addr; + vsi->err_map.dma_addr = lat_buf->wdma_err_addr.dma_addr; + vsi->err_map.size = lat_buf->wdma_err_addr.size; + + vsi->row_info.buf = 0; + vsi->row_info.size = 0; + + return 0; +} + +static int vdec_vp9_slice_setup_prob_buffer(struct vdec_vp9_slice_instance *instance, + struct vdec_vp9_slice_vsi *vsi) +{ + struct vdec_vp9_slice_frame_ctx *frame_ctx; + struct vdec_vp9_slice_uncompressed_header *uh; + + uh = &vsi->frame.uh; + + mtk_vcodec_debug(instance, "ctx dirty %u idx %d\n", + instance->dirty[uh->frame_context_idx], + uh->frame_context_idx); + + if (instance->dirty[uh->frame_context_idx]) + frame_ctx = &instance->frame_ctx[uh->frame_context_idx]; + else + frame_ctx = vdec_vp9_slice_default_frame_ctx; + memcpy(instance->prob.va, frame_ctx, sizeof(*frame_ctx)); + + return 0; +} + +static void vdec_vp9_slice_setup_seg_buffer(struct vdec_vp9_slice_instance *instance, + struct vdec_vp9_slice_vsi *vsi, + struct mtk_vcodec_mem *buf) +{ + struct vdec_vp9_slice_uncompressed_header *uh; + + /* reset segment buffer */ + uh = &vsi->frame.uh; + if (uh->frame_type == 0 || + uh->intra_only || + uh->error_resilient_mode || + uh->frame_width != instance->width || + uh->frame_height != instance->height) { + mtk_vcodec_debug(instance, "reset seg\n"); + memset(buf->va, 0, buf->size); + } +} + +/* + * parse tiles according to `6.4 Decode tiles syntax` + * in "vp9-bitstream-specification" + * + * frame contains uncompress header, compressed header and several tiles. + * this function parses tiles' position and size, stores them to tile buffer + * for decoding. + */ +static int vdec_vp9_slice_setup_tile_buffer(struct vdec_vp9_slice_instance *instance, + struct vdec_vp9_slice_vsi *vsi, + struct mtk_vcodec_mem *bs) +{ + struct vdec_vp9_slice_uncompressed_header *uh; + unsigned int rows_log2; + unsigned int cols_log2; + unsigned int rows; + unsigned int cols; + unsigned int mi_row; + unsigned int mi_col; + unsigned int offset; + unsigned int pa; + unsigned int size; + struct vdec_vp9_slice_tiles *tiles; + unsigned char *pos; + unsigned char *end; + unsigned char *va; + unsigned int *tb; + int i; + int j; + + uh = &vsi->frame.uh; + rows_log2 = uh->tile_rows_log2; + cols_log2 = uh->tile_cols_log2; + rows = 1 << rows_log2; + cols = 1 << cols_log2; + + if (rows > 4 || cols > 64) { + mtk_vcodec_err(instance, "tile_rows %u tile_cols %u\n", + rows, cols); + return -EINVAL; + } + + offset = uh->uncompressed_header_size + + uh->header_size_in_bytes; + if (bs->size <= offset) { + mtk_vcodec_err(instance, "bs size %zu tile offset %u\n", + bs->size, offset); + return -EINVAL; + } + + tiles = &vsi->frame.tiles; + /* setup tile buffer */ + + va = (unsigned char *)bs->va; + pos = va + offset; + end = va + bs->size; + /* truncated */ + pa = (unsigned int)bs->dma_addr + offset; + tb = instance->tile.va; + for (i = 0; i < rows; i++) { + for (j = 0; j < cols; j++) { + if (i == rows - 1 && + j == cols - 1) { + size = (unsigned int)(end - pos); + } else { + if (end - pos < 4) + return -EINVAL; + + size = (pos[0] << 24) | (pos[1] << 16) | + (pos[2] << 8) | pos[3]; + pos += 4; + pa += 4; + offset += 4; + if (end - pos < size) + return -EINVAL; + } + tiles->size[i][j] = size; + if (tiles->mi_rows[i]) { + *tb++ = (size << 3) + ((offset << 3) & 0x7f); + *tb++ = pa & ~0xf; + *tb++ = (pa << 3) & 0x7f; + mi_row = (tiles->mi_rows[i] - 1) & 0x1ff; + mi_col = (tiles->mi_cols[j] - 1) & 0x3f; + *tb++ = (mi_row << 6) + mi_col; + } + pos += size; + pa += size; + offset += size; + } + } + + return 0; +} + +static int vdec_vp9_slice_setup_lat(struct vdec_vp9_slice_instance *instance, + struct mtk_vcodec_mem *bs, + struct vdec_lat_buf *lat_buf, + struct vdec_vp9_slice_pfc *pfc) +{ + struct vdec_vp9_slice_vsi *vsi = &pfc->vsi; + int ret; + + ret = vdec_vp9_slice_setup_lat_from_src_buf(instance, lat_buf); + if (ret) + goto err; + + ret = vdec_vp9_slice_setup_pfc(instance, pfc); + if (ret) + goto err; + + ret = vdec_vp9_slice_alloc_working_buffer(instance, vsi); + if (ret) + goto err; + + ret = vdec_vp9_slice_setup_lat_buffer(instance, vsi, bs, lat_buf); + if (ret) + goto err; + + vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[0]); + + /* setup prob/tile buffers for LAT */ + + ret = vdec_vp9_slice_setup_prob_buffer(instance, vsi); + if (ret) + goto err; + + ret = vdec_vp9_slice_setup_tile_buffer(instance, vsi, bs); + if (ret) + goto err; + + return 0; + +err: + return ret; +} + +static +void vdec_vp9_slice_map_counts_eob_coef(unsigned int i, unsigned int j, unsigned int k, + struct vdec_vp9_slice_frame_counts *counts, + struct v4l2_vp9_frame_symbol_counts *counts_helper) +{ + u32 l = 0, m; + + /* + * helper eo -> mtk eo + * helpre e1 -> mtk c3 + * helper c0 -> c0 + * helper c1 -> c1 + * helper c2 -> c2 + */ + for (m = 0; m < 3; m++) { + counts_helper->coeff[i][j][k][l][m] = + (u32 (*)[3]) & counts->coef_probs[i][j][k].band_0[m]; + counts_helper->eob[i][j][k][l][m][0] = + &counts->eob_branch[i][j][k].band_0[m]; + counts_helper->eob[i][j][k][l][m][1] = + &counts->coef_probs[i][j][k].band_0[m][3]; + } + + for (l = 1; l < 6; l++) { + for (m = 0; m < 6; m++) { + counts_helper->coeff[i][j][k][l][m] = + (u32 (*)[3]) & counts->coef_probs[i][j][k].band_1_5[l - 1][m]; + counts_helper->eob[i][j][k][l][m][0] = + &counts->eob_branch[i][j][k].band_1_5[l - 1][m]; + counts_helper->eob[i][j][k][l][m][1] = + &counts->coef_probs[i][j][k].band_1_5[l - 1][m][3]; + } + } +} + +static void vdec_vp9_slice_counts_map_helper(struct vdec_vp9_slice_counts_map *counts_map, + struct vdec_vp9_slice_frame_counts *counts, + struct v4l2_vp9_frame_symbol_counts *counts_helper) +{ + int i, j, k; + + counts_helper->partition = &counts->partition; + counts_helper->intra_inter = &counts->intra_inter; + counts_helper->tx32p = &counts->tx_p32x32; + counts_helper->tx16p = &counts->tx_p16x16; + counts_helper->tx8p = &counts->tx_p8x8; + counts_helper->uv_mode = &counts->uv_mode; + + counts_helper->comp = &counts->comp_inter; + counts_helper->comp_ref = &counts->comp_ref; + counts_helper->single_ref = &counts->single_ref; + counts_helper->mv_mode = &counts->inter_mode; + counts_helper->mv_joint = &counts->joint; + + for (i = 0; i < ARRAY_SIZE(counts_map->skip); i++) + memcpy(counts_map->skip[i], counts->skip[i], + sizeof(counts_map->skip[0])); + counts_helper->skip = &counts_map->skip; + + for (i = 0; i < ARRAY_SIZE(counts_map->y_mode); i++) + memcpy(counts_map->y_mode[i], counts->y_mode[i], + sizeof(counts_map->y_mode[0])); + counts_helper->y_mode = &counts_map->y_mode; + + for (i = 0; i < ARRAY_SIZE(counts_map->filter); i++) + memcpy(counts_map->filter[i], counts->switchable_interp[i], + sizeof(counts_map->filter[0])); + counts_helper->filter = &counts_map->filter; + + for (i = 0; i < ARRAY_SIZE(counts_map->sign); i++) + memcpy(counts_map->sign[i], counts->mvcomp[i].sign, + sizeof(counts_map->sign[0])); + counts_helper->sign = &counts_map->sign; + + for (i = 0; i < ARRAY_SIZE(counts_map->classes); i++) + memcpy(counts_map->classes[i], counts->mvcomp[i].classes, + sizeof(counts_map->classes[0])); + counts_helper->classes = &counts_map->classes; + + for (i = 0; i < ARRAY_SIZE(counts_map->class0); i++) + memcpy(counts_map->class0[i], counts->mvcomp[i].class0, + sizeof(counts_map->class0[0])); + counts_helper->class0 = &counts_map->class0; + + for (i = 0; i < ARRAY_SIZE(counts_map->bits); i++) + for (j = 0; j < ARRAY_SIZE(counts_map->bits[0]); j++) + memcpy(counts_map->bits[i][j], counts->mvcomp[i].bits[j], + sizeof(counts_map->bits[0][0])); + counts_helper->bits = &counts_map->bits; + + for (i = 0; i < ARRAY_SIZE(counts_map->class0_fp); i++) + for (j = 0; j < ARRAY_SIZE(counts_map->class0_fp[0]); j++) + memcpy(counts_map->class0_fp[i][j], counts->mvcomp[i].class0_fp[j], + sizeof(counts_map->class0_fp[0][0])); + counts_helper->class0_fp = &counts_map->class0_fp; + + for (i = 0; i < ARRAY_SIZE(counts_map->fp); i++) + memcpy(counts_map->fp[i], counts->mvcomp[i].fp, + sizeof(counts_map->fp[0])); + counts_helper->fp = &counts_map->fp; + + for (i = 0; i < ARRAY_SIZE(counts_map->class0_hp); i++) + memcpy(counts_map->class0_hp[i], counts->mvcomp[i].class0_hp, + sizeof(counts_map->class0_hp[0])); + counts_helper->class0_hp = &counts_map->class0_hp; + + for (i = 0; i < ARRAY_SIZE(counts_map->hp); i++) + memcpy(counts_map->hp[i], counts->mvcomp[i].hp, sizeof(counts_map->hp[0])); + + counts_helper->hp = &counts_map->hp; + + for (i = 0; i < 4; i++) + for (j = 0; j < 2; j++) + for (k = 0; k < 2; k++) + vdec_vp9_slice_map_counts_eob_coef(i, j, k, counts, counts_helper); +} + +static void vdec_vp9_slice_map_to_coef(unsigned int i, unsigned int j, unsigned int k, + struct vdec_vp9_slice_frame_ctx *frame_ctx, + struct v4l2_vp9_frame_context *frame_ctx_helper) +{ + u32 l, m; + + for (l = 0; l < ARRAY_SIZE(frame_ctx_helper->coef[0][0][0]); l++) { + for (m = 0; m < VP9_BAND_6(l); m++) { + memcpy(frame_ctx_helper->coef[i][j][k][l][m], + frame_ctx->coef_probs[i][j][k][l].probs[m], + sizeof(frame_ctx_helper->coef[i][j][k][l][0])); + } + } +} + +static void vdec_vp9_slice_map_from_coef(unsigned int i, unsigned int j, unsigned int k, + struct vdec_vp9_slice_frame_ctx *frame_ctx, + struct v4l2_vp9_frame_context *frame_ctx_helper) +{ + u32 l, m; + + for (l = 0; l < ARRAY_SIZE(frame_ctx_helper->coef[0][0][0]); l++) { + for (m = 0; m < VP9_BAND_6(l); m++) { + memcpy(frame_ctx->coef_probs[i][j][k][l].probs[m], + frame_ctx_helper->coef[i][j][k][l][m], + sizeof(frame_ctx_helper->coef[i][j][k][l][0])); + } + } +} + +static +void vdec_vp9_slice_framectx_map_helper(bool frame_is_intra, + struct vdec_vp9_slice_frame_ctx *pre_frame_ctx, + struct vdec_vp9_slice_frame_ctx *frame_ctx, + struct v4l2_vp9_frame_context *frame_ctx_helper) +{ + struct v4l2_vp9_frame_mv_context *mv = &frame_ctx_helper->mv; + u32 i, j, k; + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->coef); i++) + for (j = 0; j < ARRAY_SIZE(frame_ctx_helper->coef[0]); j++) + for (k = 0; k < ARRAY_SIZE(frame_ctx_helper->coef[0][0]); k++) + vdec_vp9_slice_map_to_coef(i, j, k, pre_frame_ctx, + frame_ctx_helper); + + /* + * use previous prob when frame is not intra or + * we should use the prob updated by the compressed header parse + */ + if (!frame_is_intra) + frame_ctx = pre_frame_ctx; + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx8); i++) + memcpy(frame_ctx_helper->tx8[i], frame_ctx->tx_p8x8[i], + sizeof(frame_ctx_helper->tx8[0])); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx16); i++) + memcpy(frame_ctx_helper->tx16[i], frame_ctx->tx_p16x16[i], + sizeof(frame_ctx_helper->tx16[0])); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx32); i++) + memcpy(frame_ctx_helper->tx32[i], frame_ctx->tx_p32x32[i], + sizeof(frame_ctx_helper->tx32[0])); + + memcpy(frame_ctx_helper->skip, frame_ctx->skip_probs, sizeof(frame_ctx_helper->skip)); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->inter_mode); i++) + memcpy(frame_ctx_helper->inter_mode[i], frame_ctx->inter_mode_probs[i], + sizeof(frame_ctx_helper->inter_mode[0])); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->interp_filter); i++) + memcpy(frame_ctx_helper->interp_filter[i], frame_ctx->switch_interp_prob[i], + sizeof(frame_ctx_helper->interp_filter[0])); + + memcpy(frame_ctx_helper->is_inter, frame_ctx->intra_inter_prob, + sizeof(frame_ctx_helper->is_inter)); + + memcpy(frame_ctx_helper->comp_mode, frame_ctx->comp_inter_prob, + sizeof(frame_ctx_helper->comp_mode)); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->single_ref); i++) + memcpy(frame_ctx_helper->single_ref[i], frame_ctx->single_ref_prob[i], + sizeof(frame_ctx_helper->single_ref[0])); + + memcpy(frame_ctx_helper->comp_ref, frame_ctx->comp_ref_prob, + sizeof(frame_ctx_helper->comp_ref)); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->y_mode); i++) + memcpy(frame_ctx_helper->y_mode[i], frame_ctx->y_mode_prob[i], + sizeof(frame_ctx_helper->y_mode[0])); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->uv_mode); i++) + memcpy(frame_ctx_helper->uv_mode[i], frame_ctx->uv_mode_prob[i], + sizeof(frame_ctx_helper->uv_mode[0])); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->partition); i++) + memcpy(frame_ctx_helper->partition[i], frame_ctx->partition_prob[i], + sizeof(frame_ctx_helper->partition[0])); + + memcpy(mv->joint, frame_ctx->joint, sizeof(mv->joint)); + + for (i = 0; i < ARRAY_SIZE(mv->sign); i++) + mv->sign[i] = frame_ctx->sign_classes[i].sign; + + for (i = 0; i < ARRAY_SIZE(mv->classes); i++) + memcpy(mv->classes[i], frame_ctx->sign_classes[i].classes, + sizeof(mv->classes[i])); + + for (i = 0; i < ARRAY_SIZE(mv->class0_bit); i++) + mv->class0_bit[i] = frame_ctx->class0_bits[i].class0[0]; + + for (i = 0; i < ARRAY_SIZE(mv->bits); i++) + memcpy(mv->bits[i], frame_ctx->class0_bits[i].bits, sizeof(mv->bits[0])); + + for (i = 0; i < ARRAY_SIZE(mv->class0_fr); i++) + for (j = 0; j < ARRAY_SIZE(mv->class0_fr[0]); j++) + memcpy(mv->class0_fr[i][j], frame_ctx->class0_fp_hp[i].class0_fp[j], + sizeof(mv->class0_fr[0][0])); + + for (i = 0; i < ARRAY_SIZE(mv->fr); i++) + memcpy(mv->fr[i], frame_ctx->class0_fp_hp[i].fp, sizeof(mv->fr[0])); + + for (i = 0; i < ARRAY_SIZE(mv->class0_hp); i++) + mv->class0_hp[i] = frame_ctx->class0_fp_hp[i].class0_hp; + + for (i = 0; i < ARRAY_SIZE(mv->hp); i++) + mv->hp[i] = frame_ctx->class0_fp_hp[i].hp; +} + +static void vdec_vp9_slice_helper_map_framectx(struct v4l2_vp9_frame_context *frame_ctx_helper, + struct vdec_vp9_slice_frame_ctx *frame_ctx) +{ + struct v4l2_vp9_frame_mv_context *mv = &frame_ctx_helper->mv; + u32 i, j, k; + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx8); i++) + memcpy(frame_ctx->tx_p8x8[i], frame_ctx_helper->tx8[i], + sizeof(frame_ctx_helper->tx8[0])); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx16); i++) + memcpy(frame_ctx->tx_p16x16[i], frame_ctx_helper->tx16[i], + sizeof(frame_ctx_helper->tx16[0])); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx32); i++) + memcpy(frame_ctx->tx_p32x32[i], frame_ctx_helper->tx32[i], + sizeof(frame_ctx_helper->tx32[0])); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->coef); i++) + for (j = 0; j < ARRAY_SIZE(frame_ctx_helper->coef[0]); j++) + for (k = 0; k < ARRAY_SIZE(frame_ctx_helper->coef[0][0]); k++) + vdec_vp9_slice_map_from_coef(i, j, k, frame_ctx, + frame_ctx_helper); + + memcpy(frame_ctx->skip_probs, frame_ctx_helper->skip, sizeof(frame_ctx_helper->skip)); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->inter_mode); i++) + memcpy(frame_ctx->inter_mode_probs[i], frame_ctx_helper->inter_mode[i], + sizeof(frame_ctx_helper->inter_mode[0])); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->interp_filter); i++) + memcpy(frame_ctx->switch_interp_prob[i], frame_ctx_helper->interp_filter[i], + sizeof(frame_ctx_helper->interp_filter[0])); + + memcpy(frame_ctx->intra_inter_prob, frame_ctx_helper->is_inter, + sizeof(frame_ctx_helper->is_inter)); + + memcpy(frame_ctx->comp_inter_prob, frame_ctx_helper->comp_mode, + sizeof(frame_ctx_helper->comp_mode)); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->single_ref); i++) + memcpy(frame_ctx->single_ref_prob[i], frame_ctx_helper->single_ref[i], + sizeof(frame_ctx_helper->single_ref[0])); + + memcpy(frame_ctx->comp_ref_prob, frame_ctx_helper->comp_ref, + sizeof(frame_ctx_helper->comp_ref)); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->y_mode); i++) + memcpy(frame_ctx->y_mode_prob[i], frame_ctx_helper->y_mode[i], + sizeof(frame_ctx_helper->y_mode[0])); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->uv_mode); i++) + memcpy(frame_ctx->uv_mode_prob[i], frame_ctx_helper->uv_mode[i], + sizeof(frame_ctx_helper->uv_mode[0])); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->partition); i++) + memcpy(frame_ctx->partition_prob[i], frame_ctx_helper->partition[i], + sizeof(frame_ctx_helper->partition[0])); + + memcpy(frame_ctx->joint, mv->joint, sizeof(mv->joint)); + + for (i = 0; i < ARRAY_SIZE(mv->sign); i++) + frame_ctx->sign_classes[i].sign = mv->sign[i]; + + for (i = 0; i < ARRAY_SIZE(mv->classes); i++) + memcpy(frame_ctx->sign_classes[i].classes, mv->classes[i], + sizeof(mv->classes[i])); + + for (i = 0; i < ARRAY_SIZE(mv->class0_bit); i++) + frame_ctx->class0_bits[i].class0[0] = mv->class0_bit[i]; + + for (i = 0; i < ARRAY_SIZE(mv->bits); i++) + memcpy(frame_ctx->class0_bits[i].bits, mv->bits[i], sizeof(mv->bits[0])); + + for (i = 0; i < ARRAY_SIZE(mv->class0_fr); i++) + for (j = 0; j < ARRAY_SIZE(mv->class0_fr[0]); j++) + memcpy(frame_ctx->class0_fp_hp[i].class0_fp[j], mv->class0_fr[i][j], + sizeof(mv->class0_fr[0][0])); + + for (i = 0; i < ARRAY_SIZE(mv->fr); i++) + memcpy(frame_ctx->class0_fp_hp[i].fp, mv->fr[i], sizeof(mv->fr[0])); + + for (i = 0; i < ARRAY_SIZE(mv->class0_hp); i++) + frame_ctx->class0_fp_hp[i].class0_hp = mv->class0_hp[i]; + + for (i = 0; i < ARRAY_SIZE(mv->hp); i++) + frame_ctx->class0_fp_hp[i].hp = mv->hp[i]; +} + +static int vdec_vp9_slice_update_prob(struct vdec_vp9_slice_instance *instance, + struct vdec_vp9_slice_vsi *vsi) +{ + struct vdec_vp9_slice_frame_ctx *pre_frame_ctx; + struct v4l2_vp9_frame_context *pre_frame_ctx_helper; + struct vdec_vp9_slice_frame_ctx *frame_ctx; + struct vdec_vp9_slice_frame_counts *counts; + struct v4l2_vp9_frame_symbol_counts *counts_helper; + struct vdec_vp9_slice_uncompressed_header *uh; + bool frame_is_intra; + bool use_128; + + uh = &vsi->frame.uh; + pre_frame_ctx = &instance->frame_ctx[uh->frame_context_idx]; + pre_frame_ctx_helper = &instance->frame_ctx_helper; + frame_ctx = (struct vdec_vp9_slice_frame_ctx *)instance->prob.va; + counts = (struct vdec_vp9_slice_frame_counts *)instance->counts.va; + counts_helper = &instance->counts_helper; + + if (!uh->refresh_frame_context) + return 0; + + if (!uh->frame_parallel_decoding_mode) { + vdec_vp9_slice_counts_map_helper(&instance->counts_map, counts, counts_helper); + + frame_is_intra = !vsi->frame.uh.frame_type || vsi->frame.uh.intra_only; + /* check default prob */ + if (!instance->dirty[uh->frame_context_idx]) + vdec_vp9_slice_framectx_map_helper(frame_is_intra, + vdec_vp9_slice_default_frame_ctx, + frame_ctx, + pre_frame_ctx_helper); + else + vdec_vp9_slice_framectx_map_helper(frame_is_intra, + pre_frame_ctx, + frame_ctx, + pre_frame_ctx_helper); + + use_128 = !frame_is_intra && !vsi->frame.uh.last_frame_type; + v4l2_vp9_adapt_coef_probs(pre_frame_ctx_helper, + counts_helper, + use_128, + frame_is_intra); + if (!frame_is_intra) + v4l2_vp9_adapt_noncoef_probs(pre_frame_ctx_helper, + counts_helper, + V4L2_VP9_REFERENCE_MODE_SINGLE_REFERENCE, + vsi->frame.uh.interpolation_filter, + vsi->frame.ch.tx_mode, + vsi->frame.uh.allow_high_precision_mv ? + V4L2_VP9_FRAME_FLAG_ALLOW_HIGH_PREC_MV : 0); + vdec_vp9_slice_helper_map_framectx(pre_frame_ctx_helper, pre_frame_ctx); + } else { + memcpy(pre_frame_ctx, frame_ctx, sizeof(*frame_ctx)); + } + + instance->dirty[uh->frame_context_idx] = 1; + + return 0; +} + +static int vdec_vp9_slice_update_single(struct vdec_vp9_slice_instance *instance, + struct vdec_vp9_slice_pfc *pfc) +{ + struct vdec_vp9_slice_vsi *vsi; + + vsi = &pfc->vsi; + memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state)); + + mtk_vcodec_debug(instance, "Frame %u Y_CRC %08x %08x %08x %08x\n", + pfc->seq, + vsi->state.crc[0], vsi->state.crc[1], + vsi->state.crc[2], vsi->state.crc[3]); + mtk_vcodec_debug(instance, "Frame %u C_CRC %08x %08x %08x %08x\n", + pfc->seq, + vsi->state.crc[4], vsi->state.crc[5], + vsi->state.crc[6], vsi->state.crc[7]); + + vdec_vp9_slice_update_prob(instance, vsi); + + instance->width = vsi->frame.uh.frame_width; + instance->height = vsi->frame.uh.frame_height; + instance->frame_type = vsi->frame.uh.frame_type; + instance->show_frame = vsi->frame.uh.show_frame; + + return 0; +} + +static int vdec_vp9_slice_update_lat(struct vdec_vp9_slice_instance *instance, + struct vdec_lat_buf *lat_buf, + struct vdec_vp9_slice_pfc *pfc) +{ + struct vdec_vp9_slice_vsi *vsi; + + vsi = &pfc->vsi; + memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state)); + + mtk_vcodec_debug(instance, "Frame %u LAT CRC 0x%08x %lx %lx\n", + pfc->seq, vsi->state.crc[0], + (unsigned long)vsi->trans.dma_addr, + (unsigned long)vsi->trans.dma_addr_end); + + /* buffer full, need to re-decode */ + if (vsi->state.full) { + /* buffer not enough */ + if (vsi->trans.dma_addr_end - vsi->trans.dma_addr == + vsi->ube.size) + return -ENOMEM; + return -EAGAIN; + } + + vdec_vp9_slice_update_prob(instance, vsi); + + instance->width = vsi->frame.uh.frame_width; + instance->height = vsi->frame.uh.frame_height; + instance->frame_type = vsi->frame.uh.frame_type; + instance->show_frame = vsi->frame.uh.show_frame; + + return 0; +} + +static int vdec_vp9_slice_setup_core_to_dst_buf(struct vdec_vp9_slice_instance *instance, + struct vdec_lat_buf *lat_buf) +{ + struct vb2_v4l2_buffer *dst; + + dst = v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx); + if (!dst) + return -EINVAL; + + v4l2_m2m_buf_copy_metadata(&lat_buf->ts_info, dst, true); + return 0; +} + +static int vdec_vp9_slice_setup_core_buffer(struct vdec_vp9_slice_instance *instance, + struct vdec_vp9_slice_pfc *pfc, + struct vdec_vp9_slice_vsi *vsi, + struct vdec_fb *fb, + struct vdec_lat_buf *lat_buf) +{ + struct vb2_buffer *vb; + struct vb2_queue *vq; + struct vdec_vp9_slice_reference *ref; + int plane; + int size; + int w; + int h; + int i; + + plane = instance->ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes; + w = vsi->frame.uh.frame_width; + h = vsi->frame.uh.frame_height; + size = ALIGN(w, 64) * ALIGN(h, 64); + + /* frame buffer */ + vsi->fb.y.dma_addr = fb->base_y.dma_addr; + if (plane == 1) + vsi->fb.c.dma_addr = fb->base_y.dma_addr + size; + else + vsi->fb.c.dma_addr = fb->base_c.dma_addr; + + /* reference buffers */ + vq = v4l2_m2m_get_vq(instance->ctx->m2m_ctx, + V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE); + if (!vq) + return -EINVAL; + + /* get current output buffer */ + vb = &v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx)->vb2_buf; + if (!vb) + return -EINVAL; + + /* update internal buffer's width/height */ + for (i = 0; i < vq->num_buffers; i++) { + if (vb == vq->bufs[i]) { + instance->dpb[i].width = w; + instance->dpb[i].height = h; + break; + } + } + + /* + * get buffer's width/height from instance + * get buffer address from vb2buf + */ + for (i = 0; i < 3; i++) { + ref = &vsi->frame.ref[i]; + vb = vb2_find_buffer(vq, pfc->ref_idx[i]); + if (!vb) { + ref->frame_width = w; + ref->frame_height = h; + memset(&vsi->ref[i], 0, sizeof(vsi->ref[i])); + } else { + int idx = vb->index; + + ref->frame_width = instance->dpb[idx].width; + ref->frame_height = instance->dpb[idx].height; + vsi->ref[i].y.dma_addr = + vb2_dma_contig_plane_dma_addr(vb, 0); + if (plane == 1) + vsi->ref[i].c.dma_addr = + vsi->ref[i].y.dma_addr + size; + else + vsi->ref[i].c.dma_addr = + vb2_dma_contig_plane_dma_addr(vb, 1); + } + } + + return 0; +} + +static void vdec_vp9_slice_setup_single_buffer(struct vdec_vp9_slice_instance *instance, + struct vdec_vp9_slice_pfc *pfc, + struct vdec_vp9_slice_vsi *vsi, + struct mtk_vcodec_mem *bs, + struct vdec_fb *fb) +{ + int i; + + vsi->bs.buf.dma_addr = bs->dma_addr; + vsi->bs.buf.size = bs->size; + vsi->bs.frame.dma_addr = bs->dma_addr; + vsi->bs.frame.size = bs->size; + + for (i = 0; i < 2; i++) { + vsi->mv[i].dma_addr = instance->mv[i].dma_addr; + vsi->mv[i].size = instance->mv[i].size; + } + for (i = 0; i < 2; i++) { + vsi->seg[i].dma_addr = instance->seg[i].dma_addr; + vsi->seg[i].size = instance->seg[i].size; + } + vsi->tile.dma_addr = instance->tile.dma_addr; + vsi->tile.size = instance->tile.size; + vsi->prob.dma_addr = instance->prob.dma_addr; + vsi->prob.size = instance->prob.size; + vsi->counts.dma_addr = instance->counts.dma_addr; + vsi->counts.size = instance->counts.size; + + vsi->row_info.buf = 0; + vsi->row_info.size = 0; + + vdec_vp9_slice_setup_core_buffer(instance, pfc, vsi, fb, NULL); +} + +static int vdec_vp9_slice_setup_core(struct vdec_vp9_slice_instance *instance, + struct vdec_fb *fb, + struct vdec_lat_buf *lat_buf, + struct vdec_vp9_slice_pfc *pfc) +{ + struct vdec_vp9_slice_vsi *vsi = &pfc->vsi; + int ret; + + vdec_vp9_slice_setup_state(vsi); + + ret = vdec_vp9_slice_setup_core_to_dst_buf(instance, lat_buf); + if (ret) + goto err; + + ret = vdec_vp9_slice_setup_core_buffer(instance, pfc, vsi, fb, lat_buf); + if (ret) + goto err; + + vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[1]); + + return 0; + +err: + return ret; +} + +static int vdec_vp9_slice_setup_single(struct vdec_vp9_slice_instance *instance, + struct mtk_vcodec_mem *bs, + struct vdec_fb *fb, + struct vdec_vp9_slice_pfc *pfc) +{ + struct vdec_vp9_slice_vsi *vsi = &pfc->vsi; + int ret; + + ret = vdec_vp9_slice_setup_single_from_src_to_dst(instance); + if (ret) + goto err; + + ret = vdec_vp9_slice_setup_pfc(instance, pfc); + if (ret) + goto err; + + ret = vdec_vp9_slice_alloc_working_buffer(instance, vsi); + if (ret) + goto err; + + vdec_vp9_slice_setup_single_buffer(instance, pfc, vsi, bs, fb); + vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[0]); + + ret = vdec_vp9_slice_setup_prob_buffer(instance, vsi); + if (ret) + goto err; + + ret = vdec_vp9_slice_setup_tile_buffer(instance, vsi, bs); + if (ret) + goto err; + + return 0; + +err: + return ret; +} + +static int vdec_vp9_slice_update_core(struct vdec_vp9_slice_instance *instance, + struct vdec_lat_buf *lat_buf, + struct vdec_vp9_slice_pfc *pfc) +{ + struct vdec_vp9_slice_vsi *vsi; + + vsi = &pfc->vsi; + memcpy(&pfc->state[1], &vsi->state, sizeof(vsi->state)); + + mtk_vcodec_debug(instance, "Frame %u Y_CRC %08x %08x %08x %08x\n", + pfc->seq, + vsi->state.crc[0], vsi->state.crc[1], + vsi->state.crc[2], vsi->state.crc[3]); + mtk_vcodec_debug(instance, "Frame %u C_CRC %08x %08x %08x %08x\n", + pfc->seq, + vsi->state.crc[4], vsi->state.crc[5], + vsi->state.crc[6], vsi->state.crc[7]); + + return 0; +} + +static int vdec_vp9_slice_init(struct mtk_vcodec_ctx *ctx) +{ + struct vdec_vp9_slice_instance *instance; + struct vdec_vp9_slice_init_vsi *vsi; + int ret; + + instance = kzalloc(sizeof(*instance), GFP_KERNEL); + if (!instance) + return -ENOMEM; + + instance->ctx = ctx; + instance->vpu.id = SCP_IPI_VDEC_LAT; + instance->vpu.core_id = SCP_IPI_VDEC_CORE; + instance->vpu.ctx = ctx; + instance->vpu.codec_type = ctx->current_codec; + + ret = vpu_dec_init(&instance->vpu); + if (ret) { + mtk_vcodec_err(instance, "failed to init vpu dec, ret %d\n", ret); + goto error_vpu_init; + } + + /* init vsi and global flags */ + + vsi = instance->vpu.vsi; + if (!vsi) { + mtk_vcodec_err(instance, "failed to get VP9 vsi\n"); + ret = -EINVAL; + goto error_vsi; + } + instance->init_vsi = vsi; + instance->core_vsi = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler, + (u32)vsi->core_vsi); + if (!instance->core_vsi) { + mtk_vcodec_err(instance, "failed to get VP9 core vsi\n"); + ret = -EINVAL; + goto error_vsi; + } + + instance->irq = 1; + + ret = vdec_vp9_slice_init_default_frame_ctx(instance); + if (ret) + goto error_default_frame_ctx; + + ctx->drv_handle = instance; + + return 0; + +error_default_frame_ctx: +error_vsi: + vpu_dec_deinit(&instance->vpu); +error_vpu_init: + kfree(instance); + return ret; +} + +static void vdec_vp9_slice_deinit(void *h_vdec) +{ + struct vdec_vp9_slice_instance *instance = h_vdec; + + if (!instance) + return; + + vpu_dec_deinit(&instance->vpu); + vdec_vp9_slice_free_working_buffer(instance); + vdec_msg_queue_deinit(&instance->ctx->msg_queue, instance->ctx); + kfree(instance); +} + +static int vdec_vp9_slice_flush(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *fb, bool *res_chg) +{ + struct vdec_vp9_slice_instance *instance = h_vdec; + + mtk_vcodec_debug(instance, "flush ...\n"); + if (instance->ctx->dev->vdec_pdata->hw_arch != MTK_VDEC_PURE_SINGLE_CORE) + vdec_msg_queue_wait_lat_buf_full(&instance->ctx->msg_queue); + return vpu_dec_reset(&instance->vpu); +} + +static void vdec_vp9_slice_get_pic_info(struct vdec_vp9_slice_instance *instance) +{ + struct mtk_vcodec_ctx *ctx = instance->ctx; + unsigned int data[3]; + + mtk_vcodec_debug(instance, "w %u h %u\n", + ctx->picinfo.pic_w, ctx->picinfo.pic_h); + + data[0] = ctx->picinfo.pic_w; + data[1] = ctx->picinfo.pic_h; + data[2] = ctx->capture_fourcc; + vpu_dec_get_param(&instance->vpu, data, 3, GET_PARAM_PIC_INFO); + + ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, 64); + ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, 64); + ctx->picinfo.fb_sz[0] = instance->vpu.fb_sz[0]; + ctx->picinfo.fb_sz[1] = instance->vpu.fb_sz[1]; +} + +static void vdec_vp9_slice_get_dpb_size(struct vdec_vp9_slice_instance *instance, + unsigned int *dpb_sz) +{ + /* refer VP9 specification */ + *dpb_sz = 9; +} + +static int vdec_vp9_slice_get_param(void *h_vdec, enum vdec_get_param_type type, void *out) +{ + struct vdec_vp9_slice_instance *instance = h_vdec; + + switch (type) { + case GET_PARAM_PIC_INFO: + vdec_vp9_slice_get_pic_info(instance); + break; + case GET_PARAM_DPB_SIZE: + vdec_vp9_slice_get_dpb_size(instance, out); + break; + case GET_PARAM_CROP_INFO: + mtk_vcodec_debug(instance, "No need to get vp9 crop information."); + break; + default: + mtk_vcodec_err(instance, "invalid get parameter type=%d\n", + type); + return -EINVAL; + } + + return 0; +} + +static int vdec_vp9_slice_single_decode(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *fb, bool *res_chg) +{ + struct vdec_vp9_slice_instance *instance = h_vdec; + struct vdec_vp9_slice_pfc *pfc = &instance->sc_pfc; + struct vdec_vp9_slice_vsi *vsi; + struct mtk_vcodec_ctx *ctx; + int ret; + + if (!instance || !instance->ctx) + return -EINVAL; + ctx = instance->ctx; + + /* bs NULL means flush decoder */ + if (!bs) + return vdec_vp9_slice_flush(h_vdec, bs, fb, res_chg); + + fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx); + if (!fb) + return -EBUSY; + + vsi = &pfc->vsi; + + ret = vdec_vp9_slice_setup_single(instance, bs, fb, pfc); + if (ret) { + mtk_vcodec_err(instance, "Failed to setup VP9 single ret %d\n", ret); + return ret; + } + vdec_vp9_slice_vsi_to_remote(vsi, instance->vsi); + + ret = vpu_dec_start(&instance->vpu, NULL, 0); + if (ret) { + mtk_vcodec_err(instance, "Failed to dec VP9 ret %d\n", ret); + return ret; + } + + ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED, + WAIT_INTR_TIMEOUT_MS, MTK_VDEC_CORE); + /* update remote vsi if decode timeout */ + if (ret) { + mtk_vcodec_err(instance, "VP9 decode timeout %d\n", ret); + WRITE_ONCE(instance->vsi->state.timeout, 1); + } + + vpu_dec_end(&instance->vpu); + + vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0); + ret = vdec_vp9_slice_update_single(instance, pfc); + if (ret) { + mtk_vcodec_err(instance, "VP9 decode error: %d\n", ret); + return ret; + } + + instance->ctx->decoded_frame_cnt++; + return 0; +} + +static int vdec_vp9_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *fb, bool *res_chg) +{ + struct vdec_vp9_slice_instance *instance = h_vdec; + struct vdec_lat_buf *lat_buf; + struct vdec_vp9_slice_pfc *pfc; + struct vdec_vp9_slice_vsi *vsi; + struct mtk_vcodec_ctx *ctx; + int ret; + + if (!instance || !instance->ctx) + return -EINVAL; + ctx = instance->ctx; + + /* init msgQ for the first time */ + if (vdec_msg_queue_init(&ctx->msg_queue, ctx, + vdec_vp9_slice_core_decode, + sizeof(*pfc))) + return -ENOMEM; + + /* bs NULL means flush decoder */ + if (!bs) + return vdec_vp9_slice_flush(h_vdec, bs, fb, res_chg); + + lat_buf = vdec_msg_queue_dqbuf(&instance->ctx->msg_queue.lat_ctx); + if (!lat_buf) { + mtk_vcodec_err(instance, "Failed to get VP9 lat buf\n"); + return -EAGAIN; + } + pfc = (struct vdec_vp9_slice_pfc *)lat_buf->private_data; + if (!pfc) { + ret = -EINVAL; + goto err_free_fb_out; + } + vsi = &pfc->vsi; + + ret = vdec_vp9_slice_setup_lat(instance, bs, lat_buf, pfc); + if (ret) { + mtk_vcodec_err(instance, "Failed to setup VP9 lat ret %d\n", ret); + goto err_free_fb_out; + } + vdec_vp9_slice_vsi_to_remote(vsi, instance->vsi); + + ret = vpu_dec_start(&instance->vpu, NULL, 0); + if (ret) { + mtk_vcodec_err(instance, "Failed to dec VP9 ret %d\n", ret); + goto err_free_fb_out; + } + + if (instance->irq) { + ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED, + WAIT_INTR_TIMEOUT_MS, MTK_VDEC_LAT0); + /* update remote vsi if decode timeout */ + if (ret) { + mtk_vcodec_err(instance, "VP9 decode timeout %d pic %d\n", ret, pfc->seq); + WRITE_ONCE(instance->vsi->state.timeout, 1); + } + vpu_dec_end(&instance->vpu); + } + + vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0); + ret = vdec_vp9_slice_update_lat(instance, lat_buf, pfc); + + /* LAT trans full, no more UBE or decode timeout */ + if (ret) { + mtk_vcodec_err(instance, "VP9 decode error: %d\n", ret); + goto err_free_fb_out; + } + + mtk_vcodec_debug(instance, "lat dma addr: 0x%lx 0x%lx\n", + (unsigned long)pfc->vsi.trans.dma_addr, + (unsigned long)pfc->vsi.trans.dma_addr_end); + + vdec_msg_queue_update_ube_wptr(&ctx->msg_queue, + vsi->trans.dma_addr_end + + ctx->msg_queue.wdma_addr.dma_addr); + vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx, lat_buf); + + return 0; +err_free_fb_out: + vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf); + return ret; +} + +static int vdec_vp9_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *fb, bool *res_chg) +{ + struct vdec_vp9_slice_instance *instance = h_vdec; + int ret; + + if (instance->ctx->dev->vdec_pdata->hw_arch == MTK_VDEC_PURE_SINGLE_CORE) + ret = vdec_vp9_slice_single_decode(h_vdec, bs, fb, res_chg); + else + ret = vdec_vp9_slice_lat_decode(h_vdec, bs, fb, res_chg); + + return ret; +} + +static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf) +{ + struct vdec_vp9_slice_instance *instance; + struct vdec_vp9_slice_pfc *pfc; + struct mtk_vcodec_ctx *ctx = NULL; + struct vdec_fb *fb = NULL; + int ret = -EINVAL; + + if (!lat_buf) + goto err; + + pfc = lat_buf->private_data; + ctx = lat_buf->ctx; + if (!pfc || !ctx) + goto err; + + instance = ctx->drv_handle; + if (!instance) + goto err; + + fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx); + if (!fb) { + ret = -EBUSY; + goto err; + } + + ret = vdec_vp9_slice_setup_core(instance, fb, lat_buf, pfc); + if (ret) { + mtk_vcodec_err(instance, "vdec_vp9_slice_setup_core\n"); + goto err; + } + vdec_vp9_slice_vsi_to_remote(&pfc->vsi, instance->core_vsi); + + ret = vpu_dec_core(&instance->vpu); + if (ret) { + mtk_vcodec_err(instance, "vpu_dec_core\n"); + goto err; + } + + if (instance->irq) { + ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED, + WAIT_INTR_TIMEOUT_MS, MTK_VDEC_CORE); + /* update remote vsi if decode timeout */ + if (ret) { + mtk_vcodec_err(instance, "VP9 core timeout pic %d\n", pfc->seq); + WRITE_ONCE(instance->core_vsi->state.timeout, 1); + } + vpu_dec_core_end(&instance->vpu); + } + + vdec_vp9_slice_vsi_from_remote(&pfc->vsi, instance->core_vsi, 1); + ret = vdec_vp9_slice_update_core(instance, lat_buf, pfc); + if (ret) { + mtk_vcodec_err(instance, "vdec_vp9_slice_update_core\n"); + goto err; + } + + pfc->vsi.trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr; + mtk_vcodec_debug(instance, "core dma_addr_end 0x%lx\n", + (unsigned long)pfc->vsi.trans.dma_addr_end); + vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end); + ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf->src_buf_req); + + return 0; + +err: + if (ctx && pfc) { + /* always update read pointer */ + vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end); + + if (fb) + ctx->dev->vdec_pdata->cap_to_disp(ctx, 1, lat_buf->src_buf_req); + } + return ret; +} + +const struct vdec_common_if vdec_vp9_slice_lat_if = { + .init = vdec_vp9_slice_init, + .decode = vdec_vp9_slice_decode, + .get_param = vdec_vp9_slice_get_param, + .deinit = vdec_vp9_slice_deinit, +}; |