diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-11 08:27:49 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-11 08:27:49 +0000 |
commit | ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch) | |
tree | b2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/media/platform/mediatek/vcodec/decoder | |
parent | Initial commit. (diff) | |
download | linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip |
Adding upstream version 6.6.15.upstream/6.6.15
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/media/platform/mediatek/vcodec/decoder')
30 files changed, 15347 insertions, 0 deletions
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/Makefile b/drivers/media/platform/mediatek/vcodec/decoder/Makefile new file mode 100644 index 0000000000..904cd22def --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/Makefile @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_VIDEO_MEDIATEK_VCODEC) += mtk-vcodec-dec.o \ + mtk-vcodec-dec-hw.o + +mtk-vcodec-dec-y := vdec/vdec_h264_if.o \ + vdec/vdec_vp8_if.o \ + vdec/vdec_vp8_req_if.o \ + vdec/vdec_vp9_if.o \ + vdec/vdec_vp9_req_lat_if.o \ + vdec/vdec_av1_req_lat_if.o \ + vdec/vdec_h264_req_if.o \ + vdec/vdec_h264_req_common.o \ + vdec/vdec_h264_req_multi_if.o \ + vdec/vdec_hevc_req_multi_if.o \ + mtk_vcodec_dec_drv.o \ + vdec_drv_if.o \ + vdec_vpu_if.o \ + vdec_msg_queue.o \ + mtk_vcodec_dec.o \ + mtk_vcodec_dec_stateful.o \ + mtk_vcodec_dec_stateless.o \ + mtk_vcodec_dec_pm.o \ + +mtk-vcodec-dec-hw-y := mtk_vcodec_dec_hw.o diff --git a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec.c b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec.c new file mode 100644 index 0000000000..91ed576d68 --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec.c @@ -0,0 +1,1020 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2016 MediaTek Inc. + * Author: PC Chen <pc.chen@mediatek.com> + * Tiffany Lin <tiffany.lin@mediatek.com> + */ + +#include <media/v4l2-event.h> +#include <media/v4l2-mem2mem.h> +#include <media/videobuf2-dma-contig.h> + +#include "mtk_vcodec_dec_drv.h" +#include "mtk_vcodec_dec.h" +#include "vdec_drv_if.h" +#include "mtk_vcodec_dec_pm.h" + +#define DFT_CFG_WIDTH MTK_VDEC_MIN_W +#define DFT_CFG_HEIGHT MTK_VDEC_MIN_H + +static const struct mtk_video_fmt * +mtk_vdec_find_format(struct v4l2_format *f, + const struct mtk_vcodec_dec_pdata *dec_pdata) +{ + const struct mtk_video_fmt *fmt; + unsigned int k; + + for (k = 0; k < *dec_pdata->num_formats; k++) { + fmt = &dec_pdata->vdec_formats[k]; + if (fmt->fourcc == f->fmt.pix_mp.pixelformat) + return fmt; + } + + return NULL; +} + +static bool mtk_vdec_get_cap_fmt(struct mtk_vcodec_dec_ctx *ctx, int format_index) +{ + const struct mtk_vcodec_dec_pdata *dec_pdata = ctx->dev->vdec_pdata; + const struct mtk_video_fmt *fmt; + struct mtk_q_data *q_data; + int num_frame_count = 0, i; + bool ret = false; + + fmt = &dec_pdata->vdec_formats[format_index]; + for (i = 0; i < *dec_pdata->num_formats; i++) { + if (dec_pdata->vdec_formats[i].type != MTK_FMT_FRAME) + continue; + + num_frame_count++; + } + + if (num_frame_count == 1 || (!ctx->is_10bit_bitstream && fmt->fourcc == V4L2_PIX_FMT_MM21)) + return true; + + q_data = &ctx->q_data[MTK_Q_DATA_SRC]; + switch (q_data->fmt->fourcc) { + case V4L2_PIX_FMT_H264_SLICE: + if (ctx->is_10bit_bitstream && fmt->fourcc == V4L2_PIX_FMT_MT2110R) + ret = true; + break; + case V4L2_PIX_FMT_VP9_FRAME: + case V4L2_PIX_FMT_AV1_FRAME: + case V4L2_PIX_FMT_HEVC_SLICE: + if (ctx->is_10bit_bitstream && fmt->fourcc == V4L2_PIX_FMT_MT2110T) + ret = true; + break; + default: + break; + } + + return ret; +} + +static struct mtk_q_data *mtk_vdec_get_q_data(struct mtk_vcodec_dec_ctx *ctx, + enum v4l2_buf_type type) +{ + if (V4L2_TYPE_IS_OUTPUT(type)) + return &ctx->q_data[MTK_Q_DATA_SRC]; + + return &ctx->q_data[MTK_Q_DATA_DST]; +} + +static int vidioc_try_decoder_cmd(struct file *file, void *priv, + struct v4l2_decoder_cmd *cmd) +{ + return v4l2_m2m_ioctl_try_decoder_cmd(file, priv, cmd); +} + + +static int vidioc_decoder_cmd(struct file *file, void *priv, + struct v4l2_decoder_cmd *cmd) +{ + struct mtk_vcodec_dec_ctx *ctx = fh_to_dec_ctx(priv); + struct vb2_queue *src_vq, *dst_vq; + int ret; + + ret = vidioc_try_decoder_cmd(file, priv, cmd); + if (ret) + return ret; + + mtk_v4l2_vdec_dbg(1, ctx, "decoder cmd=%u", cmd->cmd); + dst_vq = v4l2_m2m_get_vq(ctx->m2m_ctx, + V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE); + switch (cmd->cmd) { + case V4L2_DEC_CMD_STOP: + src_vq = v4l2_m2m_get_vq(ctx->m2m_ctx, + V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE); + if (!vb2_is_streaming(src_vq)) { + mtk_v4l2_vdec_dbg(1, ctx, "Output stream is off. No need to flush."); + return 0; + } + if (!vb2_is_streaming(dst_vq)) { + mtk_v4l2_vdec_dbg(1, ctx, "Capture stream is off. No need to flush."); + return 0; + } + v4l2_m2m_buf_queue(ctx->m2m_ctx, &ctx->empty_flush_buf.vb); + v4l2_m2m_try_schedule(ctx->m2m_ctx); + break; + + case V4L2_DEC_CMD_START: + vb2_clear_last_buffer_dequeued(dst_vq); + break; + + default: + return -EINVAL; + } + + return 0; +} + +void mtk_vdec_unlock(struct mtk_vcodec_dec_ctx *ctx) +{ + mutex_unlock(&ctx->dev->dec_mutex[ctx->hw_id]); +} + +void mtk_vdec_lock(struct mtk_vcodec_dec_ctx *ctx) +{ + mutex_lock(&ctx->dev->dec_mutex[ctx->hw_id]); +} + +void mtk_vcodec_dec_release(struct mtk_vcodec_dec_ctx *ctx) +{ + vdec_if_deinit(ctx); + ctx->state = MTK_STATE_FREE; +} + +void mtk_vcodec_dec_set_default_params(struct mtk_vcodec_dec_ctx *ctx) +{ + struct mtk_q_data *q_data; + + ctx->m2m_ctx->q_lock = &ctx->dev->dev_mutex; + ctx->fh.m2m_ctx = ctx->m2m_ctx; + ctx->fh.ctrl_handler = &ctx->ctrl_hdl; + INIT_WORK(&ctx->decode_work, ctx->dev->vdec_pdata->worker); + ctx->colorspace = V4L2_COLORSPACE_REC709; + ctx->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT; + ctx->quantization = V4L2_QUANTIZATION_DEFAULT; + ctx->xfer_func = V4L2_XFER_FUNC_DEFAULT; + + q_data = &ctx->q_data[MTK_Q_DATA_SRC]; + memset(q_data, 0, sizeof(struct mtk_q_data)); + q_data->visible_width = DFT_CFG_WIDTH; + q_data->visible_height = DFT_CFG_HEIGHT; + q_data->fmt = ctx->dev->vdec_pdata->default_out_fmt; + q_data->field = V4L2_FIELD_NONE; + + q_data->sizeimage[0] = DFT_CFG_WIDTH * DFT_CFG_HEIGHT; + q_data->bytesperline[0] = 0; + + q_data = &ctx->q_data[MTK_Q_DATA_DST]; + memset(q_data, 0, sizeof(struct mtk_q_data)); + q_data->visible_width = DFT_CFG_WIDTH; + q_data->visible_height = DFT_CFG_HEIGHT; + q_data->coded_width = DFT_CFG_WIDTH; + q_data->coded_height = DFT_CFG_HEIGHT; + q_data->fmt = ctx->dev->vdec_pdata->default_cap_fmt; + q_data->field = V4L2_FIELD_NONE; + + q_data->sizeimage[0] = q_data->coded_width * q_data->coded_height; + q_data->bytesperline[0] = q_data->coded_width; + q_data->sizeimage[1] = q_data->sizeimage[0] / 2; + q_data->bytesperline[1] = q_data->coded_width; +} + +static int vidioc_vdec_qbuf(struct file *file, void *priv, + struct v4l2_buffer *buf) +{ + struct mtk_vcodec_dec_ctx *ctx = fh_to_dec_ctx(priv); + + if (ctx->state == MTK_STATE_ABORT) { + mtk_v4l2_vdec_err(ctx, "[%d] Call on QBUF after unrecoverable error", ctx->id); + return -EIO; + } + + return v4l2_m2m_qbuf(file, ctx->m2m_ctx, buf); +} + +static int vidioc_vdec_dqbuf(struct file *file, void *priv, + struct v4l2_buffer *buf) +{ + struct mtk_vcodec_dec_ctx *ctx = fh_to_dec_ctx(priv); + + if (ctx->state == MTK_STATE_ABORT) { + mtk_v4l2_vdec_err(ctx, "[%d] Call on DQBUF after unrecoverable error", ctx->id); + return -EIO; + } + + return v4l2_m2m_dqbuf(file, ctx->m2m_ctx, buf); +} + +static int mtk_vcodec_dec_get_chip_name(void *priv) +{ + struct mtk_vcodec_dec_ctx *ctx = fh_to_dec_ctx(priv); + struct device *dev = &ctx->dev->plat_dev->dev; + + if (of_device_is_compatible(dev->of_node, "mediatek,mt8173-vcodec-dec")) + return 8173; + else if (of_device_is_compatible(dev->of_node, "mediatek,mt8183-vcodec-dec")) + return 8183; + else if (of_device_is_compatible(dev->of_node, "mediatek,mt8192-vcodec-dec")) + return 8192; + else if (of_device_is_compatible(dev->of_node, "mediatek,mt8195-vcodec-dec")) + return 8195; + else if (of_device_is_compatible(dev->of_node, "mediatek,mt8186-vcodec-dec")) + return 8186; + else if (of_device_is_compatible(dev->of_node, "mediatek,mt8188-vcodec-dec")) + return 8188; + else + return 8173; +} + +static int vidioc_vdec_querycap(struct file *file, void *priv, + struct v4l2_capability *cap) +{ + struct mtk_vcodec_dec_ctx *ctx = fh_to_dec_ctx(priv); + struct device *dev = &ctx->dev->plat_dev->dev; + int platform_name = mtk_vcodec_dec_get_chip_name(priv); + + strscpy(cap->driver, dev->driver->name, sizeof(cap->driver)); + snprintf(cap->card, sizeof(cap->card), "MT%d video decoder", platform_name); + + return 0; +} + +static int vidioc_vdec_subscribe_evt(struct v4l2_fh *fh, + const struct v4l2_event_subscription *sub) +{ + struct mtk_vcodec_dec_ctx *ctx = fh_to_dec_ctx(fh); + + if (ctx->dev->vdec_pdata->uses_stateless_api) + return v4l2_ctrl_subscribe_event(fh, sub); + + switch (sub->type) { + case V4L2_EVENT_EOS: + return v4l2_event_subscribe(fh, sub, 2, NULL); + case V4L2_EVENT_SOURCE_CHANGE: + return v4l2_src_change_event_subscribe(fh, sub); + default: + return v4l2_ctrl_subscribe_event(fh, sub); + } +} + +static int vidioc_try_fmt(struct mtk_vcodec_dec_ctx *ctx, struct v4l2_format *f, + const struct mtk_video_fmt *fmt) +{ + struct v4l2_pix_format_mplane *pix_fmt_mp = &f->fmt.pix_mp; + const struct v4l2_frmsize_stepwise *frmsize; + + pix_fmt_mp->field = V4L2_FIELD_NONE; + + /* Always apply frame size constraints from the coded side */ + if (V4L2_TYPE_IS_OUTPUT(f->type)) + frmsize = &fmt->frmsize; + else + frmsize = &ctx->q_data[MTK_Q_DATA_SRC].fmt->frmsize; + + pix_fmt_mp->width = clamp(pix_fmt_mp->width, MTK_VDEC_MIN_W, frmsize->max_width); + pix_fmt_mp->height = clamp(pix_fmt_mp->height, MTK_VDEC_MIN_H, frmsize->max_height); + + if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) { + pix_fmt_mp->num_planes = 1; + pix_fmt_mp->plane_fmt[0].bytesperline = 0; + } else if (f->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) { + int tmp_w, tmp_h; + + /* + * Find next closer width align 64, heign align 64, size align + * 64 rectangle + * Note: This only get default value, the real HW needed value + * only available when ctx in MTK_STATE_HEADER state + */ + tmp_w = pix_fmt_mp->width; + tmp_h = pix_fmt_mp->height; + v4l_bound_align_image(&pix_fmt_mp->width, MTK_VDEC_MIN_W, frmsize->max_width, 6, + &pix_fmt_mp->height, MTK_VDEC_MIN_H, frmsize->max_height, 6, + 9); + + if (pix_fmt_mp->width < tmp_w && + (pix_fmt_mp->width + 64) <= frmsize->max_width) + pix_fmt_mp->width += 64; + if (pix_fmt_mp->height < tmp_h && + (pix_fmt_mp->height + 64) <= frmsize->max_height) + pix_fmt_mp->height += 64; + + mtk_v4l2_vdec_dbg(0, ctx, + "before resize wxh=%dx%d, after resize wxh=%dx%d, sizeimage=%d", + tmp_w, tmp_h, pix_fmt_mp->width, pix_fmt_mp->height, + pix_fmt_mp->width * pix_fmt_mp->height); + + pix_fmt_mp->num_planes = fmt->num_planes; + pix_fmt_mp->plane_fmt[0].sizeimage = + pix_fmt_mp->width * pix_fmt_mp->height; + pix_fmt_mp->plane_fmt[0].bytesperline = pix_fmt_mp->width; + + if (pix_fmt_mp->num_planes == 2) { + pix_fmt_mp->plane_fmt[1].sizeimage = + (pix_fmt_mp->width * pix_fmt_mp->height) / 2; + pix_fmt_mp->plane_fmt[1].bytesperline = + pix_fmt_mp->width; + } + } + + pix_fmt_mp->flags = 0; + return 0; +} + +static int vidioc_try_fmt_vid_cap_mplane(struct file *file, void *priv, + struct v4l2_format *f) +{ + const struct mtk_video_fmt *fmt; + struct mtk_vcodec_dec_ctx *ctx = fh_to_dec_ctx(priv); + const struct mtk_vcodec_dec_pdata *dec_pdata = ctx->dev->vdec_pdata; + + fmt = mtk_vdec_find_format(f, dec_pdata); + if (!fmt) { + f->fmt.pix.pixelformat = + ctx->q_data[MTK_Q_DATA_DST].fmt->fourcc; + fmt = mtk_vdec_find_format(f, dec_pdata); + } + + return vidioc_try_fmt(ctx, f, fmt); +} + +static int vidioc_try_fmt_vid_out_mplane(struct file *file, void *priv, + struct v4l2_format *f) +{ + struct v4l2_pix_format_mplane *pix_fmt_mp = &f->fmt.pix_mp; + const struct mtk_video_fmt *fmt; + struct mtk_vcodec_dec_ctx *ctx = fh_to_dec_ctx(priv); + const struct mtk_vcodec_dec_pdata *dec_pdata = ctx->dev->vdec_pdata; + + fmt = mtk_vdec_find_format(f, dec_pdata); + if (!fmt) { + f->fmt.pix.pixelformat = + ctx->q_data[MTK_Q_DATA_SRC].fmt->fourcc; + fmt = mtk_vdec_find_format(f, dec_pdata); + } + + if (pix_fmt_mp->plane_fmt[0].sizeimage == 0) { + mtk_v4l2_vdec_err(ctx, "sizeimage of output format must be given"); + return -EINVAL; + } + + return vidioc_try_fmt(ctx, f, fmt); +} + +static int vidioc_vdec_g_selection(struct file *file, void *priv, + struct v4l2_selection *s) +{ + struct mtk_vcodec_dec_ctx *ctx = fh_to_dec_ctx(priv); + struct mtk_q_data *q_data; + + if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) + return -EINVAL; + + q_data = &ctx->q_data[MTK_Q_DATA_DST]; + + switch (s->target) { + case V4L2_SEL_TGT_COMPOSE_DEFAULT: + s->r.left = 0; + s->r.top = 0; + s->r.width = ctx->picinfo.pic_w; + s->r.height = ctx->picinfo.pic_h; + break; + case V4L2_SEL_TGT_COMPOSE_BOUNDS: + s->r.left = 0; + s->r.top = 0; + s->r.width = ctx->picinfo.buf_w; + s->r.height = ctx->picinfo.buf_h; + break; + case V4L2_SEL_TGT_COMPOSE: + if (vdec_if_get_param(ctx, GET_PARAM_CROP_INFO, &(s->r))) { + /* set to default value if header info not ready yet*/ + s->r.left = 0; + s->r.top = 0; + s->r.width = q_data->visible_width; + s->r.height = q_data->visible_height; + } + break; + default: + return -EINVAL; + } + + if (ctx->state < MTK_STATE_HEADER) { + /* set to default value if header info not ready yet*/ + s->r.left = 0; + s->r.top = 0; + s->r.width = q_data->visible_width; + s->r.height = q_data->visible_height; + return 0; + } + + return 0; +} + +static int vidioc_vdec_s_selection(struct file *file, void *priv, + struct v4l2_selection *s) +{ + struct mtk_vcodec_dec_ctx *ctx = fh_to_dec_ctx(priv); + + if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) + return -EINVAL; + + switch (s->target) { + case V4L2_SEL_TGT_COMPOSE: + s->r.left = 0; + s->r.top = 0; + s->r.width = ctx->picinfo.pic_w; + s->r.height = ctx->picinfo.pic_h; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int vidioc_vdec_s_fmt(struct file *file, void *priv, + struct v4l2_format *f) +{ + struct mtk_vcodec_dec_ctx *ctx = fh_to_dec_ctx(priv); + struct v4l2_pix_format_mplane *pix_mp; + struct mtk_q_data *q_data; + int ret = 0; + const struct mtk_video_fmt *fmt; + const struct mtk_vcodec_dec_pdata *dec_pdata = ctx->dev->vdec_pdata; + + mtk_v4l2_vdec_dbg(3, ctx, "[%d]", ctx->id); + + q_data = mtk_vdec_get_q_data(ctx, f->type); + if (!q_data) + return -EINVAL; + + pix_mp = &f->fmt.pix_mp; + /* + * Setting OUTPUT format after OUTPUT buffers are allocated is invalid + * if using the stateful API. + */ + if (!dec_pdata->uses_stateless_api && + f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE && + vb2_is_busy(&ctx->m2m_ctx->out_q_ctx.q)) { + mtk_v4l2_vdec_err(ctx, "out_q_ctx buffers already requested"); + ret = -EBUSY; + } + + /* + * Setting CAPTURE format after CAPTURE buffers are allocated is + * invalid. + */ + if ((f->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) && + vb2_is_busy(&ctx->m2m_ctx->cap_q_ctx.q)) { + mtk_v4l2_vdec_err(ctx, "cap_q_ctx buffers already requested"); + ret = -EBUSY; + } + + fmt = mtk_vdec_find_format(f, dec_pdata); + if (fmt == NULL) { + if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) { + f->fmt.pix.pixelformat = + dec_pdata->default_out_fmt->fourcc; + fmt = mtk_vdec_find_format(f, dec_pdata); + } else if (f->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) { + f->fmt.pix.pixelformat = + dec_pdata->default_cap_fmt->fourcc; + fmt = mtk_vdec_find_format(f, dec_pdata); + } + } + if (fmt == NULL) + return -EINVAL; + + q_data->fmt = fmt; + vidioc_try_fmt(ctx, f, q_data->fmt); + if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) { + q_data->sizeimage[0] = pix_mp->plane_fmt[0].sizeimage; + q_data->coded_width = pix_mp->width; + q_data->coded_height = pix_mp->height; + + ctx->colorspace = pix_mp->colorspace; + ctx->ycbcr_enc = pix_mp->ycbcr_enc; + ctx->quantization = pix_mp->quantization; + ctx->xfer_func = pix_mp->xfer_func; + + ctx->current_codec = fmt->fourcc; + if (ctx->state == MTK_STATE_FREE) { + ret = vdec_if_init(ctx, q_data->fmt->fourcc); + if (ret) { + mtk_v4l2_vdec_err(ctx, "[%d]: vdec_if_init() fail ret=%d", + ctx->id, ret); + return -EINVAL; + } + ctx->state = MTK_STATE_INIT; + } + } else { + ctx->capture_fourcc = fmt->fourcc; + } + + /* + * If using the stateless API, S_FMT should have the effect of setting + * the CAPTURE queue resolution no matter which queue it was called on. + */ + if (dec_pdata->uses_stateless_api) { + ctx->picinfo.pic_w = pix_mp->width; + ctx->picinfo.pic_h = pix_mp->height; + + /* + * If get pic info fail, need to use the default pic info params, or + * v4l2-compliance will fail + */ + ret = vdec_if_get_param(ctx, GET_PARAM_PIC_INFO, &ctx->picinfo); + if (ret) { + mtk_v4l2_vdec_err(ctx, "[%d]Error!! Get GET_PARAM_PICTURE_INFO Fail", + ctx->id); + } + + ctx->last_decoded_picinfo = ctx->picinfo; + + if (ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 1) { + ctx->q_data[MTK_Q_DATA_DST].sizeimage[0] = + ctx->picinfo.fb_sz[0] + + ctx->picinfo.fb_sz[1]; + ctx->q_data[MTK_Q_DATA_DST].bytesperline[0] = + ctx->picinfo.buf_w; + } else { + ctx->q_data[MTK_Q_DATA_DST].sizeimage[0] = + ctx->picinfo.fb_sz[0]; + ctx->q_data[MTK_Q_DATA_DST].bytesperline[0] = + ctx->picinfo.buf_w; + ctx->q_data[MTK_Q_DATA_DST].sizeimage[1] = + ctx->picinfo.fb_sz[1]; + ctx->q_data[MTK_Q_DATA_DST].bytesperline[1] = + ctx->picinfo.buf_w; + } + + ctx->q_data[MTK_Q_DATA_DST].coded_width = ctx->picinfo.buf_w; + ctx->q_data[MTK_Q_DATA_DST].coded_height = ctx->picinfo.buf_h; + mtk_v4l2_vdec_dbg(2, ctx, + "[%d] init() plane:%d wxh=%dx%d pic wxh=%dx%d sz=0x%x_0x%x", + ctx->id, pix_mp->num_planes, + ctx->picinfo.buf_w, ctx->picinfo.buf_h, + ctx->picinfo.pic_w, ctx->picinfo.pic_h, + ctx->q_data[MTK_Q_DATA_DST].sizeimage[0], + ctx->q_data[MTK_Q_DATA_DST].sizeimage[1]); + } + return 0; +} + +static int vidioc_enum_framesizes(struct file *file, void *priv, + struct v4l2_frmsizeenum *fsize) +{ + int i = 0; + struct mtk_vcodec_dec_ctx *ctx = fh_to_dec_ctx(priv); + const struct mtk_vcodec_dec_pdata *dec_pdata = ctx->dev->vdec_pdata; + + if (fsize->index != 0) + return -EINVAL; + + for (i = 0; i < *dec_pdata->num_formats; i++) { + if (fsize->pixel_format != dec_pdata->vdec_formats[i].fourcc) + continue; + + /* Only coded formats have frame sizes set */ + if (!dec_pdata->vdec_formats[i].frmsize.max_width) + return -ENOTTY; + + fsize->type = V4L2_FRMSIZE_TYPE_STEPWISE; + fsize->stepwise = dec_pdata->vdec_formats[i].frmsize; + + mtk_v4l2_vdec_dbg(1, ctx, "%x, %d %d %d %d %d %d", + ctx->dev->dec_capability, fsize->stepwise.min_width, + fsize->stepwise.max_width, fsize->stepwise.step_width, + fsize->stepwise.min_height, fsize->stepwise.max_height, + fsize->stepwise.step_height); + + return 0; + } + + return -EINVAL; +} + +static int vidioc_enum_fmt(struct v4l2_fmtdesc *f, void *priv, + bool output_queue) +{ + struct mtk_vcodec_dec_ctx *ctx = fh_to_dec_ctx(priv); + const struct mtk_vcodec_dec_pdata *dec_pdata = ctx->dev->vdec_pdata; + const struct mtk_video_fmt *fmt; + int i, j = 0; + + for (i = 0; i < *dec_pdata->num_formats; i++) { + if (output_queue && + dec_pdata->vdec_formats[i].type != MTK_FMT_DEC) + continue; + if (!output_queue && + dec_pdata->vdec_formats[i].type != MTK_FMT_FRAME) + continue; + + if (!output_queue && !mtk_vdec_get_cap_fmt(ctx, i)) + continue; + + if (j == f->index) + break; + ++j; + } + + if (i == *dec_pdata->num_formats) + return -EINVAL; + + fmt = &dec_pdata->vdec_formats[i]; + f->pixelformat = fmt->fourcc; + f->flags = fmt->flags; + + return 0; +} + +static int vidioc_vdec_enum_fmt_vid_cap(struct file *file, void *priv, + struct v4l2_fmtdesc *f) +{ + return vidioc_enum_fmt(f, priv, false); +} + +static int vidioc_vdec_enum_fmt_vid_out(struct file *file, void *priv, + struct v4l2_fmtdesc *f) +{ + return vidioc_enum_fmt(f, priv, true); +} + +static int vidioc_vdec_g_fmt(struct file *file, void *priv, + struct v4l2_format *f) +{ + struct mtk_vcodec_dec_ctx *ctx = fh_to_dec_ctx(priv); + struct v4l2_pix_format_mplane *pix_mp = &f->fmt.pix_mp; + struct vb2_queue *vq; + struct mtk_q_data *q_data; + + vq = v4l2_m2m_get_vq(ctx->m2m_ctx, f->type); + if (!vq) { + mtk_v4l2_vdec_err(ctx, "no vb2 queue for type=%d", f->type); + return -EINVAL; + } + + q_data = mtk_vdec_get_q_data(ctx, f->type); + + pix_mp->field = V4L2_FIELD_NONE; + pix_mp->colorspace = ctx->colorspace; + pix_mp->ycbcr_enc = ctx->ycbcr_enc; + pix_mp->quantization = ctx->quantization; + pix_mp->xfer_func = ctx->xfer_func; + + if ((f->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) && + (ctx->state >= MTK_STATE_HEADER)) { + /* Until STREAMOFF is called on the CAPTURE queue + * (acknowledging the event), the driver operates as if + * the resolution hasn't changed yet. + * So we just return picinfo yet, and update picinfo in + * stop_streaming hook function + */ + q_data->sizeimage[0] = ctx->picinfo.fb_sz[0]; + q_data->sizeimage[1] = ctx->picinfo.fb_sz[1]; + q_data->bytesperline[0] = ctx->last_decoded_picinfo.buf_w; + q_data->bytesperline[1] = ctx->last_decoded_picinfo.buf_w; + q_data->coded_width = ctx->picinfo.buf_w; + q_data->coded_height = ctx->picinfo.buf_h; + ctx->last_decoded_picinfo.cap_fourcc = q_data->fmt->fourcc; + + /* + * Width and height are set to the dimensions + * of the movie, the buffer is bigger and + * further processing stages should crop to this + * rectangle. + */ + pix_mp->width = q_data->coded_width; + pix_mp->height = q_data->coded_height; + + /* + * Set pixelformat to the format in which mt vcodec + * outputs the decoded frame + */ + pix_mp->num_planes = q_data->fmt->num_planes; + pix_mp->pixelformat = q_data->fmt->fourcc; + pix_mp->plane_fmt[0].bytesperline = q_data->bytesperline[0]; + pix_mp->plane_fmt[0].sizeimage = q_data->sizeimage[0]; + pix_mp->plane_fmt[1].bytesperline = q_data->bytesperline[1]; + pix_mp->plane_fmt[1].sizeimage = q_data->sizeimage[1]; + + } else if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) { + /* + * This is run on OUTPUT + * The buffer contains compressed image + * so width and height have no meaning. + * Assign value here to pass v4l2-compliance test + */ + pix_mp->width = q_data->visible_width; + pix_mp->height = q_data->visible_height; + pix_mp->plane_fmt[0].bytesperline = q_data->bytesperline[0]; + pix_mp->plane_fmt[0].sizeimage = q_data->sizeimage[0]; + pix_mp->pixelformat = q_data->fmt->fourcc; + pix_mp->num_planes = q_data->fmt->num_planes; + } else { + pix_mp->width = q_data->coded_width; + pix_mp->height = q_data->coded_height; + pix_mp->num_planes = q_data->fmt->num_planes; + pix_mp->pixelformat = q_data->fmt->fourcc; + pix_mp->plane_fmt[0].bytesperline = q_data->bytesperline[0]; + pix_mp->plane_fmt[0].sizeimage = q_data->sizeimage[0]; + pix_mp->plane_fmt[1].bytesperline = q_data->bytesperline[1]; + pix_mp->plane_fmt[1].sizeimage = q_data->sizeimage[1]; + + mtk_v4l2_vdec_dbg(1, ctx, "[%d] type=%d state=%d Format information not ready!", + ctx->id, f->type, ctx->state); + } + + return 0; +} + +int vb2ops_vdec_queue_setup(struct vb2_queue *vq, unsigned int *nbuffers, + unsigned int *nplanes, unsigned int sizes[], + struct device *alloc_devs[]) +{ + struct mtk_vcodec_dec_ctx *ctx = vb2_get_drv_priv(vq); + struct mtk_q_data *q_data; + unsigned int i; + + q_data = mtk_vdec_get_q_data(ctx, vq->type); + + if (q_data == NULL) { + mtk_v4l2_vdec_err(ctx, "vq->type=%d err\n", vq->type); + return -EINVAL; + } + + if (*nplanes) { + if (vq->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) { + if (*nplanes != q_data->fmt->num_planes) + return -EINVAL; + } else { + if (*nplanes != 1) + return -EINVAL; + } + for (i = 0; i < *nplanes; i++) { + if (sizes[i] < q_data->sizeimage[i]) + return -EINVAL; + } + } else { + if (vq->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) + *nplanes = q_data->fmt->num_planes; + else + *nplanes = 1; + + for (i = 0; i < *nplanes; i++) + sizes[i] = q_data->sizeimage[i]; + } + + mtk_v4l2_vdec_dbg(1, ctx, + "[%d]\t type = %d, get %d plane(s), %d buffer(s) of size 0x%x 0x%x ", + ctx->id, vq->type, *nplanes, *nbuffers, sizes[0], sizes[1]); + + return 0; +} + +int vb2ops_vdec_buf_prepare(struct vb2_buffer *vb) +{ + struct mtk_vcodec_dec_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); + struct mtk_q_data *q_data; + int i; + + mtk_v4l2_vdec_dbg(3, ctx, "[%d] (%d) id=%d", + ctx->id, vb->vb2_queue->type, vb->index); + + q_data = mtk_vdec_get_q_data(ctx, vb->vb2_queue->type); + + for (i = 0; i < q_data->fmt->num_planes; i++) { + if (vb2_plane_size(vb, i) < q_data->sizeimage[i]) { + mtk_v4l2_vdec_err(ctx, "data will not fit into plane %d (%lu < %d)", + i, vb2_plane_size(vb, i), q_data->sizeimage[i]); + return -EINVAL; + } + if (!V4L2_TYPE_IS_OUTPUT(vb->type)) + vb2_set_plane_payload(vb, i, q_data->sizeimage[i]); + } + + return 0; +} + +void vb2ops_vdec_buf_finish(struct vb2_buffer *vb) +{ + struct mtk_vcodec_dec_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); + struct vb2_v4l2_buffer *vb2_v4l2; + struct mtk_video_dec_buf *buf; + bool buf_error; + + vb2_v4l2 = container_of(vb, struct vb2_v4l2_buffer, vb2_buf); + buf = container_of(vb2_v4l2, struct mtk_video_dec_buf, m2m_buf.vb); + mutex_lock(&ctx->lock); + if (vb->vb2_queue->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) { + buf->queued_in_v4l2 = false; + buf->queued_in_vb2 = false; + } + buf_error = buf->error; + mutex_unlock(&ctx->lock); + + if (buf_error) { + mtk_v4l2_vdec_err(ctx, "Unrecoverable error on buffer."); + ctx->state = MTK_STATE_ABORT; + } +} + +int vb2ops_vdec_buf_init(struct vb2_buffer *vb) +{ + struct vb2_v4l2_buffer *vb2_v4l2 = container_of(vb, + struct vb2_v4l2_buffer, vb2_buf); + struct mtk_video_dec_buf *buf = container_of(vb2_v4l2, + struct mtk_video_dec_buf, m2m_buf.vb); + + if (vb->vb2_queue->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) { + buf->used = false; + buf->queued_in_v4l2 = false; + } + + return 0; +} + +int vb2ops_vdec_start_streaming(struct vb2_queue *q, unsigned int count) +{ + struct mtk_vcodec_dec_ctx *ctx = vb2_get_drv_priv(q); + + if (ctx->state == MTK_STATE_FLUSH) + ctx->state = MTK_STATE_HEADER; + + return 0; +} + +void vb2ops_vdec_stop_streaming(struct vb2_queue *q) +{ + struct vb2_v4l2_buffer *src_buf = NULL, *dst_buf = NULL; + struct mtk_vcodec_dec_ctx *ctx = vb2_get_drv_priv(q); + int ret; + + mtk_v4l2_vdec_dbg(3, ctx, "[%d] (%d) state=(%x) ctx->decoded_frame_cnt=%d", + ctx->id, q->type, ctx->state, ctx->decoded_frame_cnt); + + if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) { + while ((src_buf = v4l2_m2m_src_buf_remove(ctx->m2m_ctx))) { + if (src_buf != &ctx->empty_flush_buf.vb) { + struct media_request *req = + src_buf->vb2_buf.req_obj.req; + v4l2_m2m_buf_done(src_buf, + VB2_BUF_STATE_ERROR); + if (req) + v4l2_ctrl_request_complete(req, &ctx->ctrl_hdl); + } + } + return; + } + + if (ctx->state >= MTK_STATE_HEADER) { + + /* Until STREAMOFF is called on the CAPTURE queue + * (acknowledging the event), the driver operates + * as if the resolution hasn't changed yet, i.e. + * VIDIOC_G_FMT< etc. return previous resolution. + * So we update picinfo here + */ + ctx->picinfo = ctx->last_decoded_picinfo; + + mtk_v4l2_vdec_dbg(2, ctx, + "[%d]-> new(%d,%d), old(%d,%d), real(%d,%d)", + ctx->id, ctx->last_decoded_picinfo.pic_w, + ctx->last_decoded_picinfo.pic_h, + ctx->picinfo.pic_w, ctx->picinfo.pic_h, + ctx->last_decoded_picinfo.buf_w, + ctx->last_decoded_picinfo.buf_h); + + ret = ctx->dev->vdec_pdata->flush_decoder(ctx); + if (ret) + mtk_v4l2_vdec_err(ctx, "DecodeFinal failed, ret=%d", ret); + } + ctx->state = MTK_STATE_FLUSH; + + while ((dst_buf = v4l2_m2m_dst_buf_remove(ctx->m2m_ctx))) { + vb2_set_plane_payload(&dst_buf->vb2_buf, 0, 0); + if (ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 2) + vb2_set_plane_payload(&dst_buf->vb2_buf, 1, 0); + v4l2_m2m_buf_done(dst_buf, VB2_BUF_STATE_ERROR); + } + +} + +static void m2mops_vdec_device_run(void *priv) +{ + struct mtk_vcodec_dec_ctx *ctx = priv; + struct mtk_vcodec_dec_dev *dev = ctx->dev; + + queue_work(dev->decode_workqueue, &ctx->decode_work); +} + +static int m2mops_vdec_job_ready(void *m2m_priv) +{ + struct mtk_vcodec_dec_ctx *ctx = m2m_priv; + + mtk_v4l2_vdec_dbg(3, ctx, "[%d]", ctx->id); + + if (ctx->state == MTK_STATE_ABORT) + return 0; + + if ((ctx->last_decoded_picinfo.pic_w != ctx->picinfo.pic_w) || + (ctx->last_decoded_picinfo.pic_h != ctx->picinfo.pic_h)) + return 0; + + if (ctx->state != MTK_STATE_HEADER) + return 0; + + return 1; +} + +static void m2mops_vdec_job_abort(void *priv) +{ + struct mtk_vcodec_dec_ctx *ctx = priv; + + ctx->state = MTK_STATE_ABORT; +} + +const struct v4l2_m2m_ops mtk_vdec_m2m_ops = { + .device_run = m2mops_vdec_device_run, + .job_ready = m2mops_vdec_job_ready, + .job_abort = m2mops_vdec_job_abort, +}; + +const struct v4l2_ioctl_ops mtk_vdec_ioctl_ops = { + .vidioc_streamon = v4l2_m2m_ioctl_streamon, + .vidioc_streamoff = v4l2_m2m_ioctl_streamoff, + .vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs, + .vidioc_querybuf = v4l2_m2m_ioctl_querybuf, + .vidioc_expbuf = v4l2_m2m_ioctl_expbuf, + + .vidioc_qbuf = vidioc_vdec_qbuf, + .vidioc_dqbuf = vidioc_vdec_dqbuf, + + .vidioc_try_fmt_vid_cap_mplane = vidioc_try_fmt_vid_cap_mplane, + .vidioc_try_fmt_vid_out_mplane = vidioc_try_fmt_vid_out_mplane, + + .vidioc_s_fmt_vid_cap_mplane = vidioc_vdec_s_fmt, + .vidioc_s_fmt_vid_out_mplane = vidioc_vdec_s_fmt, + .vidioc_g_fmt_vid_cap_mplane = vidioc_vdec_g_fmt, + .vidioc_g_fmt_vid_out_mplane = vidioc_vdec_g_fmt, + + .vidioc_create_bufs = v4l2_m2m_ioctl_create_bufs, + + .vidioc_enum_fmt_vid_cap = vidioc_vdec_enum_fmt_vid_cap, + .vidioc_enum_fmt_vid_out = vidioc_vdec_enum_fmt_vid_out, + .vidioc_enum_framesizes = vidioc_enum_framesizes, + + .vidioc_querycap = vidioc_vdec_querycap, + .vidioc_subscribe_event = vidioc_vdec_subscribe_evt, + .vidioc_unsubscribe_event = v4l2_event_unsubscribe, + .vidioc_g_selection = vidioc_vdec_g_selection, + .vidioc_s_selection = vidioc_vdec_s_selection, + + .vidioc_decoder_cmd = vidioc_decoder_cmd, + .vidioc_try_decoder_cmd = vidioc_try_decoder_cmd, +}; + +int mtk_vcodec_dec_queue_init(void *priv, struct vb2_queue *src_vq, + struct vb2_queue *dst_vq) +{ + struct mtk_vcodec_dec_ctx *ctx = priv; + int ret = 0; + + mtk_v4l2_vdec_dbg(3, ctx, "[%d]", ctx->id); + + src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; + src_vq->io_modes = VB2_DMABUF | VB2_MMAP; + src_vq->drv_priv = ctx; + src_vq->buf_struct_size = sizeof(struct mtk_video_dec_buf); + src_vq->ops = ctx->dev->vdec_pdata->vdec_vb2_ops; + src_vq->mem_ops = &vb2_dma_contig_memops; + src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY; + src_vq->lock = &ctx->dev->dev_mutex; + src_vq->dev = &ctx->dev->plat_dev->dev; + src_vq->allow_cache_hints = 1; + + ret = vb2_queue_init(src_vq); + if (ret) { + mtk_v4l2_vdec_err(ctx, "Failed to initialize videobuf2 queue(output)"); + return ret; + } + dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; + dst_vq->io_modes = VB2_DMABUF | VB2_MMAP; + dst_vq->drv_priv = ctx; + dst_vq->buf_struct_size = sizeof(struct mtk_video_dec_buf); + dst_vq->ops = ctx->dev->vdec_pdata->vdec_vb2_ops; + dst_vq->mem_ops = &vb2_dma_contig_memops; + dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY; + dst_vq->lock = &ctx->dev->dev_mutex; + dst_vq->dev = &ctx->dev->plat_dev->dev; + dst_vq->allow_cache_hints = 1; + + ret = vb2_queue_init(dst_vq); + if (ret) + mtk_v4l2_vdec_err(ctx, "Failed to initialize videobuf2 queue(capture)"); + + return ret; +} diff --git a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec.h b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec.h new file mode 100644 index 0000000000..ece27c880e --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2016 MediaTek Inc. + * Author: PC Chen <pc.chen@mediatek.com> + * Tiffany Lin <tiffany.lin@mediatek.com> + */ + +#ifndef _MTK_VCODEC_DEC_H_ +#define _MTK_VCODEC_DEC_H_ + +#include <media/videobuf2-core.h> +#include <media/v4l2-mem2mem.h> + +#include "mtk_vcodec_dec_drv.h" + +#define VCODEC_DEC_ALIGNED_64 64 +#define VCODEC_CAPABILITY_4K_DISABLED 0x10 +#define VCODEC_DEC_4K_CODED_WIDTH 4096U +#define VCODEC_DEC_4K_CODED_HEIGHT 2304U +#define MTK_VDEC_MAX_W 2048U +#define MTK_VDEC_MAX_H 1088U +#define MTK_VDEC_MIN_W 64U +#define MTK_VDEC_MIN_H 64U + +#define MTK_VDEC_IRQ_STATUS_DEC_SUCCESS 0x10000 + +/** + * struct vdec_fb - decoder frame buffer + * @base_y : Y plane memory info + * @base_c : C plane memory info + * @status : frame buffer status (vdec_fb_status) + */ +struct vdec_fb { + struct mtk_vcodec_mem base_y; + struct mtk_vcodec_mem base_c; + unsigned int status; +}; + +/** + * struct mtk_video_dec_buf - Private data related to each VB2 buffer. + * @m2m_buf: M2M buffer + * @list: link list + * @used: Capture buffer contain decoded frame data and keep in + * codec data structure + * @queued_in_vb2: Capture buffer is queue in vb2 + * @queued_in_v4l2: Capture buffer is in v4l2 driver, but not in vb2 + * queue yet + * @error: An unrecoverable error occurs on this buffer. + * @frame_buffer: Decode status, and buffer information of Capture buffer + * @bs_buffer: Output buffer info + * + * Note : These status information help us track and debug buffer state + */ +struct mtk_video_dec_buf { + struct v4l2_m2m_buffer m2m_buf; + + bool used; + bool queued_in_vb2; + bool queued_in_v4l2; + bool error; + + union { + struct vdec_fb frame_buffer; + struct mtk_vcodec_mem bs_buffer; + }; +}; + +extern const struct v4l2_ioctl_ops mtk_vdec_ioctl_ops; +extern const struct v4l2_m2m_ops mtk_vdec_m2m_ops; +extern const struct media_device_ops mtk_vcodec_media_ops; +extern const struct mtk_vcodec_dec_pdata mtk_vdec_8173_pdata; +extern const struct mtk_vcodec_dec_pdata mtk_vdec_8183_pdata; +extern const struct mtk_vcodec_dec_pdata mtk_lat_sig_core_pdata; +extern const struct mtk_vcodec_dec_pdata mtk_vdec_single_core_pdata; + + +/* + * mtk_vdec_lock/mtk_vdec_unlock are for ctx instance to + * get/release lock before/after access decoder hw. + * mtk_vdec_lock get decoder hw lock and set curr_ctx + * to ctx instance that get lock + */ +void mtk_vdec_unlock(struct mtk_vcodec_dec_ctx *ctx); +void mtk_vdec_lock(struct mtk_vcodec_dec_ctx *ctx); +int mtk_vcodec_dec_queue_init(void *priv, struct vb2_queue *src_vq, + struct vb2_queue *dst_vq); +void mtk_vcodec_dec_set_default_params(struct mtk_vcodec_dec_ctx *ctx); +void mtk_vcodec_dec_release(struct mtk_vcodec_dec_ctx *ctx); + +/* + * VB2 ops + */ +int vb2ops_vdec_queue_setup(struct vb2_queue *vq, unsigned int *nbuffers, + unsigned int *nplanes, unsigned int sizes[], + struct device *alloc_devs[]); +int vb2ops_vdec_buf_prepare(struct vb2_buffer *vb); +void vb2ops_vdec_buf_finish(struct vb2_buffer *vb); +int vb2ops_vdec_buf_init(struct vb2_buffer *vb); +int vb2ops_vdec_start_streaming(struct vb2_queue *q, unsigned int count); +void vb2ops_vdec_stop_streaming(struct vb2_queue *q); + + +#endif /* _MTK_VCODEC_DEC_H_ */ diff --git a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c new file mode 100644 index 0000000000..0a89ce452a --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c @@ -0,0 +1,573 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2016 MediaTek Inc. + * Author: PC Chen <pc.chen@mediatek.com> + * Tiffany Lin <tiffany.lin@mediatek.com> + */ + +#include <linux/bitfield.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/mfd/syscon.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <linux/regmap.h> +#include <media/v4l2-event.h> +#include <media/v4l2-mem2mem.h> +#include <media/videobuf2-dma-contig.h> +#include <media/v4l2-device.h> + +#include "mtk_vcodec_dec.h" +#include "mtk_vcodec_dec_hw.h" +#include "mtk_vcodec_dec_pm.h" +#include "../common/mtk_vcodec_intr.h" + +static int mtk_vcodec_get_hw_count(struct mtk_vcodec_dec_ctx *ctx, struct mtk_vcodec_dec_dev *dev) +{ + switch (dev->vdec_pdata->hw_arch) { + case MTK_VDEC_PURE_SINGLE_CORE: + return MTK_VDEC_ONE_CORE; + case MTK_VDEC_LAT_SINGLE_CORE: + return MTK_VDEC_ONE_LAT_ONE_CORE; + default: + mtk_v4l2_vdec_err(ctx, "hw arch %d not supported", dev->vdec_pdata->hw_arch); + return MTK_VDEC_NO_HW; + } +} + +static bool mtk_vcodec_is_hw_active(struct mtk_vcodec_dec_dev *dev) +{ + u32 cg_status; + + if (dev->vdecsys_regmap) + return !regmap_test_bits(dev->vdecsys_regmap, VDEC_HW_ACTIVE_ADDR, + VDEC_HW_ACTIVE_MASK); + + cg_status = readl(dev->reg_base[VDEC_SYS] + VDEC_HW_ACTIVE_ADDR); + return !FIELD_GET(VDEC_HW_ACTIVE_MASK, cg_status); +} + +static irqreturn_t mtk_vcodec_dec_irq_handler(int irq, void *priv) +{ + struct mtk_vcodec_dec_dev *dev = priv; + struct mtk_vcodec_dec_ctx *ctx; + unsigned int dec_done_status = 0; + void __iomem *vdec_misc_addr = dev->reg_base[VDEC_MISC] + + VDEC_IRQ_CFG_REG; + + ctx = mtk_vcodec_get_curr_ctx(dev, MTK_VDEC_CORE); + + if (!mtk_vcodec_is_hw_active(dev)) { + mtk_v4l2_vdec_err(ctx, "DEC ISR, VDEC active is not 0x0"); + return IRQ_HANDLED; + } + + dec_done_status = readl(vdec_misc_addr); + ctx->irq_status = dec_done_status; + if ((dec_done_status & MTK_VDEC_IRQ_STATUS_DEC_SUCCESS) != + MTK_VDEC_IRQ_STATUS_DEC_SUCCESS) + return IRQ_HANDLED; + + /* clear interrupt */ + writel((readl(vdec_misc_addr) | VDEC_IRQ_CFG), + dev->reg_base[VDEC_MISC] + VDEC_IRQ_CFG_REG); + writel((readl(vdec_misc_addr) & ~VDEC_IRQ_CLR), + dev->reg_base[VDEC_MISC] + VDEC_IRQ_CFG_REG); + + wake_up_dec_ctx(ctx, MTK_INST_IRQ_RECEIVED, 0); + + mtk_v4l2_vdec_dbg(3, ctx, "wake up ctx %d, dec_done_status=%x", ctx->id, dec_done_status); + + return IRQ_HANDLED; +} + +static int mtk_vcodec_get_reg_bases(struct mtk_vcodec_dec_dev *dev) +{ + struct platform_device *pdev = dev->plat_dev; + int reg_num, i; + struct resource *res; + bool has_vdecsys_reg; + int num_max_vdec_regs; + static const char * const mtk_dec_reg_names[] = { + "misc", + "ld", + "top", + "cm", + "ad", + "av", + "pp", + "hwd", + "hwq", + "hwb", + "hwg" + }; + + /* + * If we have reg-names in devicetree, this means that we're on a new + * register organization, which implies that the VDEC_SYS iospace gets + * R/W through a syscon (regmap). + * Here we try to get the "misc" iostart only to check if we have reg-names + */ + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "misc"); + if (res) + has_vdecsys_reg = false; + else + has_vdecsys_reg = true; + + num_max_vdec_regs = has_vdecsys_reg ? NUM_MAX_VDEC_REG_BASE : + ARRAY_SIZE(mtk_dec_reg_names); + + /* Sizeof(u32) * 4 bytes for each register base. */ + reg_num = of_property_count_elems_of_size(pdev->dev.of_node, "reg", + sizeof(u32) * 4); + if (reg_num <= 0 || reg_num > num_max_vdec_regs) { + dev_err(&pdev->dev, "Invalid register property size: %d\n", reg_num); + return -EINVAL; + } + + if (has_vdecsys_reg) { + for (i = 0; i < reg_num; i++) { + dev->reg_base[i] = devm_platform_ioremap_resource(pdev, i); + if (IS_ERR(dev->reg_base[i])) + return PTR_ERR(dev->reg_base[i]); + + dev_dbg(&pdev->dev, "reg[%d] base=%p", i, dev->reg_base[i]); + } + } else { + for (i = 0; i < reg_num; i++) { + dev->reg_base[i+1] = devm_platform_ioremap_resource_byname(pdev, mtk_dec_reg_names[i]); + if (IS_ERR(dev->reg_base[i+1])) + return PTR_ERR(dev->reg_base[i+1]); + + dev_dbg(&pdev->dev, "reg[%d] base=%p", i + 1, dev->reg_base[i + 1]); + } + + dev->vdecsys_regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "mediatek,vdecsys"); + if (IS_ERR(dev->vdecsys_regmap)) { + dev_err(&pdev->dev, "Missing mediatek,vdecsys property"); + return PTR_ERR(dev->vdecsys_regmap); + } + } + + return 0; +} + +static int mtk_vcodec_init_dec_resources(struct mtk_vcodec_dec_dev *dev) +{ + struct platform_device *pdev = dev->plat_dev; + int ret; + + ret = mtk_vcodec_get_reg_bases(dev); + if (ret) + return ret; + + if (dev->vdec_pdata->is_subdev_supported) + return 0; + + dev->dec_irq = platform_get_irq(pdev, 0); + if (dev->dec_irq < 0) + return dev->dec_irq; + + irq_set_status_flags(dev->dec_irq, IRQ_NOAUTOEN); + ret = devm_request_irq(&pdev->dev, dev->dec_irq, + mtk_vcodec_dec_irq_handler, 0, pdev->name, dev); + if (ret) { + dev_err(&pdev->dev, "failed to install dev->dec_irq %d (%d)", + dev->dec_irq, ret); + return ret; + } + + ret = mtk_vcodec_init_dec_clk(pdev, &dev->pm); + if (ret < 0) { + dev_err(&pdev->dev, "failed to get mt vcodec clock source"); + return ret; + } + + pm_runtime_enable(&pdev->dev); + return 0; +} + +static int fops_vcodec_open(struct file *file) +{ + struct mtk_vcodec_dec_dev *dev = video_drvdata(file); + struct mtk_vcodec_dec_ctx *ctx = NULL; + int ret = 0, i, hw_count; + struct vb2_queue *src_vq; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + mutex_lock(&dev->dev_mutex); + ctx->id = dev->id_counter++; + v4l2_fh_init(&ctx->fh, video_devdata(file)); + file->private_data = &ctx->fh; + v4l2_fh_add(&ctx->fh); + INIT_LIST_HEAD(&ctx->list); + ctx->dev = dev; + if (ctx->dev->vdec_pdata->is_subdev_supported) { + hw_count = mtk_vcodec_get_hw_count(ctx, dev); + if (!hw_count || !dev->subdev_prob_done) { + ret = -EINVAL; + goto err_ctrls_setup; + } + + ret = dev->subdev_prob_done(dev); + if (ret) + goto err_ctrls_setup; + + for (i = 0; i < hw_count; i++) + init_waitqueue_head(&ctx->queue[i]); + } else { + init_waitqueue_head(&ctx->queue[0]); + } + mutex_init(&ctx->lock); + + ctx->type = MTK_INST_DECODER; + ret = dev->vdec_pdata->ctrls_setup(ctx); + if (ret) { + mtk_v4l2_vdec_err(ctx, "Failed to setup mt vcodec controls"); + goto err_ctrls_setup; + } + ctx->m2m_ctx = v4l2_m2m_ctx_init(dev->m2m_dev_dec, ctx, + &mtk_vcodec_dec_queue_init); + if (IS_ERR((__force void *)ctx->m2m_ctx)) { + ret = PTR_ERR((__force void *)ctx->m2m_ctx); + mtk_v4l2_vdec_err(ctx, "Failed to v4l2_m2m_ctx_init() (%d)", ret); + goto err_m2m_ctx_init; + } + src_vq = v4l2_m2m_get_vq(ctx->m2m_ctx, + V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE); + ctx->empty_flush_buf.vb.vb2_buf.vb2_queue = src_vq; + mtk_vcodec_dec_set_default_params(ctx); + + if (v4l2_fh_is_singular(&ctx->fh)) { + /* + * Does nothing if firmware was already loaded. + */ + ret = mtk_vcodec_fw_load_firmware(dev->fw_handler); + if (ret < 0) { + /* + * Return 0 if downloading firmware successfully, + * otherwise it is failed + */ + mtk_v4l2_vdec_err(ctx, "failed to load firmware!"); + goto err_load_fw; + } + + dev->dec_capability = + mtk_vcodec_fw_get_vdec_capa(dev->fw_handler); + + mtk_v4l2_vdec_dbg(0, ctx, "decoder capability %x", dev->dec_capability); + } + + ctx->dev->vdec_pdata->init_vdec_params(ctx); + + list_add(&ctx->list, &dev->ctx_list); + mtk_vcodec_dbgfs_create(ctx); + + mutex_unlock(&dev->dev_mutex); + mtk_v4l2_vdec_dbg(0, ctx, "%s decoder [%d]", dev_name(&dev->plat_dev->dev), ctx->id); + return ret; + + /* Deinit when failure occurred */ +err_load_fw: + v4l2_m2m_ctx_release(ctx->m2m_ctx); +err_m2m_ctx_init: + v4l2_ctrl_handler_free(&ctx->ctrl_hdl); +err_ctrls_setup: + v4l2_fh_del(&ctx->fh); + v4l2_fh_exit(&ctx->fh); + kfree(ctx); + mutex_unlock(&dev->dev_mutex); + + return ret; +} + +static int fops_vcodec_release(struct file *file) +{ + struct mtk_vcodec_dec_dev *dev = video_drvdata(file); + struct mtk_vcodec_dec_ctx *ctx = fh_to_dec_ctx(file->private_data); + + mtk_v4l2_vdec_dbg(0, ctx, "[%d] decoder", ctx->id); + mutex_lock(&dev->dev_mutex); + + /* + * Call v4l2_m2m_ctx_release before mtk_vcodec_dec_release. First, it + * makes sure the worker thread is not running after vdec_if_deinit. + * Second, the decoder will be flushed and all the buffers will be + * returned in stop_streaming. + */ + v4l2_m2m_ctx_release(ctx->m2m_ctx); + mtk_vcodec_dec_release(ctx); + + v4l2_fh_del(&ctx->fh); + v4l2_fh_exit(&ctx->fh); + v4l2_ctrl_handler_free(&ctx->ctrl_hdl); + + mtk_vcodec_dbgfs_remove(dev, ctx->id); + list_del_init(&ctx->list); + kfree(ctx); + mutex_unlock(&dev->dev_mutex); + return 0; +} + +static const struct v4l2_file_operations mtk_vcodec_fops = { + .owner = THIS_MODULE, + .open = fops_vcodec_open, + .release = fops_vcodec_release, + .poll = v4l2_m2m_fop_poll, + .unlocked_ioctl = video_ioctl2, + .mmap = v4l2_m2m_fop_mmap, +}; + +static int mtk_vcodec_probe(struct platform_device *pdev) +{ + struct mtk_vcodec_dec_dev *dev; + struct video_device *vfd_dec; + phandle rproc_phandle; + enum mtk_vcodec_fw_type fw_type; + int i, ret; + + dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + INIT_LIST_HEAD(&dev->ctx_list); + dev->plat_dev = pdev; + + dev->vdec_pdata = of_device_get_match_data(&pdev->dev); + if (!of_property_read_u32(pdev->dev.of_node, "mediatek,vpu", + &rproc_phandle)) { + fw_type = VPU; + } else if (!of_property_read_u32(pdev->dev.of_node, "mediatek,scp", + &rproc_phandle)) { + fw_type = SCP; + } else { + dev_dbg(&pdev->dev, "Could not get vdec IPI device"); + return -ENODEV; + } + dma_set_max_seg_size(&pdev->dev, UINT_MAX); + + dev->fw_handler = mtk_vcodec_fw_select(dev, fw_type, DECODER); + if (IS_ERR(dev->fw_handler)) + return PTR_ERR(dev->fw_handler); + + ret = mtk_vcodec_init_dec_resources(dev); + if (ret) { + dev_err(&pdev->dev, "Failed to init dec resources"); + goto err_dec_pm; + } + + if (IS_VDEC_LAT_ARCH(dev->vdec_pdata->hw_arch)) { + dev->core_workqueue = + alloc_ordered_workqueue("core-decoder", + WQ_MEM_RECLAIM | WQ_FREEZABLE); + if (!dev->core_workqueue) { + dev_dbg(&pdev->dev, "Failed to create core workqueue"); + ret = -EINVAL; + goto err_res; + } + } + + for (i = 0; i < MTK_VDEC_HW_MAX; i++) + mutex_init(&dev->dec_mutex[i]); + mutex_init(&dev->dev_mutex); + spin_lock_init(&dev->irqlock); + + snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name), "%s", + "[/MTK_V4L2_VDEC]"); + + ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev); + if (ret) { + dev_err(&pdev->dev, "v4l2_device_register err=%d", ret); + goto err_core_workq; + } + + vfd_dec = video_device_alloc(); + if (!vfd_dec) { + dev_err(&pdev->dev, "Failed to allocate video device"); + ret = -ENOMEM; + goto err_dec_alloc; + } + vfd_dec->fops = &mtk_vcodec_fops; + vfd_dec->ioctl_ops = &mtk_vdec_ioctl_ops; + vfd_dec->release = video_device_release; + vfd_dec->lock = &dev->dev_mutex; + vfd_dec->v4l2_dev = &dev->v4l2_dev; + vfd_dec->vfl_dir = VFL_DIR_M2M; + vfd_dec->device_caps = V4L2_CAP_VIDEO_M2M_MPLANE | + V4L2_CAP_STREAMING; + + snprintf(vfd_dec->name, sizeof(vfd_dec->name), "%s", + MTK_VCODEC_DEC_NAME); + video_set_drvdata(vfd_dec, dev); + dev->vfd_dec = vfd_dec; + platform_set_drvdata(pdev, dev); + + dev->m2m_dev_dec = v4l2_m2m_init(&mtk_vdec_m2m_ops); + if (IS_ERR((__force void *)dev->m2m_dev_dec)) { + dev_err(&pdev->dev, "Failed to init mem2mem dec device"); + ret = PTR_ERR((__force void *)dev->m2m_dev_dec); + goto err_dec_alloc; + } + + dev->decode_workqueue = + alloc_ordered_workqueue(MTK_VCODEC_DEC_NAME, + WQ_MEM_RECLAIM | WQ_FREEZABLE); + if (!dev->decode_workqueue) { + dev_err(&pdev->dev, "Failed to create decode workqueue"); + ret = -EINVAL; + goto err_event_workq; + } + + if (dev->vdec_pdata->is_subdev_supported) { + ret = of_platform_populate(pdev->dev.of_node, NULL, NULL, + &pdev->dev); + if (ret) { + dev_err(&pdev->dev, "Main device of_platform_populate failed."); + goto err_reg_cont; + } + } else { + set_bit(MTK_VDEC_CORE, dev->subdev_bitmap); + } + + atomic_set(&dev->dec_active_cnt, 0); + memset(dev->vdec_racing_info, 0, sizeof(dev->vdec_racing_info)); + mutex_init(&dev->dec_racing_info_mutex); + + ret = video_register_device(vfd_dec, VFL_TYPE_VIDEO, -1); + if (ret) { + dev_err(&pdev->dev, "Failed to register video device"); + goto err_reg_cont; + } + + if (dev->vdec_pdata->uses_stateless_api) { + v4l2_disable_ioctl(vfd_dec, VIDIOC_DECODER_CMD); + v4l2_disable_ioctl(vfd_dec, VIDIOC_TRY_DECODER_CMD); + + dev->mdev_dec.dev = &pdev->dev; + strscpy(dev->mdev_dec.model, MTK_VCODEC_DEC_NAME, + sizeof(dev->mdev_dec.model)); + + media_device_init(&dev->mdev_dec); + dev->mdev_dec.ops = &mtk_vcodec_media_ops; + dev->v4l2_dev.mdev = &dev->mdev_dec; + + ret = v4l2_m2m_register_media_controller(dev->m2m_dev_dec, dev->vfd_dec, + MEDIA_ENT_F_PROC_VIDEO_DECODER); + if (ret) { + dev_err(&pdev->dev, "Failed to register media controller"); + goto err_dec_mem_init; + } + + ret = media_device_register(&dev->mdev_dec); + if (ret) { + dev_err(&pdev->dev, "Failed to register media device"); + goto err_media_reg; + } + + dev_dbg(&pdev->dev, "media registered as /dev/media%d", vfd_dec->minor); + } + + mtk_vcodec_dbgfs_init(dev, false); + dev_dbg(&pdev->dev, "decoder registered as /dev/video%d", vfd_dec->minor); + + return 0; + +err_media_reg: + v4l2_m2m_unregister_media_controller(dev->m2m_dev_dec); +err_dec_mem_init: + video_unregister_device(vfd_dec); +err_reg_cont: + if (dev->vdec_pdata->uses_stateless_api) + media_device_cleanup(&dev->mdev_dec); + destroy_workqueue(dev->decode_workqueue); +err_event_workq: + v4l2_m2m_release(dev->m2m_dev_dec); +err_dec_alloc: + v4l2_device_unregister(&dev->v4l2_dev); +err_core_workq: + if (IS_VDEC_LAT_ARCH(dev->vdec_pdata->hw_arch)) + destroy_workqueue(dev->core_workqueue); +err_res: + if (!dev->vdec_pdata->is_subdev_supported) + pm_runtime_disable(dev->pm.dev); +err_dec_pm: + mtk_vcodec_fw_release(dev->fw_handler); + return ret; +} + +static const struct of_device_id mtk_vcodec_match[] = { + { + .compatible = "mediatek,mt8173-vcodec-dec", + .data = &mtk_vdec_8173_pdata, + }, + { + .compatible = "mediatek,mt8183-vcodec-dec", + .data = &mtk_vdec_8183_pdata, + }, + { + .compatible = "mediatek,mt8192-vcodec-dec", + .data = &mtk_lat_sig_core_pdata, + }, + { + .compatible = "mediatek,mt8186-vcodec-dec", + .data = &mtk_vdec_single_core_pdata, + }, + { + .compatible = "mediatek,mt8195-vcodec-dec", + .data = &mtk_lat_sig_core_pdata, + }, + { + .compatible = "mediatek,mt8188-vcodec-dec", + .data = &mtk_lat_sig_core_pdata, + }, + {}, +}; + +MODULE_DEVICE_TABLE(of, mtk_vcodec_match); + +static void mtk_vcodec_dec_remove(struct platform_device *pdev) +{ + struct mtk_vcodec_dec_dev *dev = platform_get_drvdata(pdev); + + destroy_workqueue(dev->decode_workqueue); + + if (media_devnode_is_registered(dev->mdev_dec.devnode)) { + media_device_unregister(&dev->mdev_dec); + v4l2_m2m_unregister_media_controller(dev->m2m_dev_dec); + media_device_cleanup(&dev->mdev_dec); + } + + if (dev->m2m_dev_dec) + v4l2_m2m_release(dev->m2m_dev_dec); + + if (dev->vfd_dec) + video_unregister_device(dev->vfd_dec); + + mtk_vcodec_dbgfs_deinit(&dev->dbgfs); + v4l2_device_unregister(&dev->v4l2_dev); + if (!dev->vdec_pdata->is_subdev_supported) + pm_runtime_disable(dev->pm.dev); + mtk_vcodec_fw_release(dev->fw_handler); +} + +static struct platform_driver mtk_vcodec_dec_driver = { + .probe = mtk_vcodec_probe, + .remove_new = mtk_vcodec_dec_remove, + .driver = { + .name = MTK_VCODEC_DEC_NAME, + .of_match_table = mtk_vcodec_match, + }, +}; + +module_platform_driver(mtk_vcodec_dec_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Mediatek video codec V4L2 decoder driver"); diff --git a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.h b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.h new file mode 100644 index 0000000000..7e36b2c69b --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.h @@ -0,0 +1,324 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023 MediaTek Inc. + * Author: Yunfei Dong <yunfei.dong@mediatek.com> + */ + +#ifndef _MTK_VCODEC_DEC_DRV_H_ +#define _MTK_VCODEC_DEC_DRV_H_ + +#include "../common/mtk_vcodec_cmn_drv.h" +#include "../common/mtk_vcodec_dbgfs.h" +#include "../common/mtk_vcodec_fw_priv.h" +#include "../common/mtk_vcodec_util.h" +#include "vdec_msg_queue.h" + +#define MTK_VCODEC_DEC_NAME "mtk-vcodec-dec" + +#define IS_VDEC_LAT_ARCH(hw_arch) ((hw_arch) >= MTK_VDEC_LAT_SINGLE_CORE) +#define IS_VDEC_INNER_RACING(capability) ((capability) & MTK_VCODEC_INNER_RACING) + +/* + * enum mtk_vdec_format_types - Structure used to get supported + * format types according to decoder capability + */ +enum mtk_vdec_format_types { + MTK_VDEC_FORMAT_MM21 = 0x20, + MTK_VDEC_FORMAT_MT21C = 0x40, + MTK_VDEC_FORMAT_H264_SLICE = 0x100, + MTK_VDEC_FORMAT_VP8_FRAME = 0x200, + MTK_VDEC_FORMAT_VP9_FRAME = 0x400, + MTK_VDEC_FORMAT_AV1_FRAME = 0x800, + MTK_VDEC_FORMAT_HEVC_FRAME = 0x1000, + MTK_VCODEC_INNER_RACING = 0x20000, + MTK_VDEC_IS_SUPPORT_10BIT = 0x40000, +}; + +/* + * enum mtk_vdec_hw_count - Supported hardware count + */ +enum mtk_vdec_hw_count { + MTK_VDEC_NO_HW = 0, + MTK_VDEC_ONE_CORE, + MTK_VDEC_ONE_LAT_ONE_CORE, + MTK_VDEC_MAX_HW_COUNT, +}; + +/* + * enum mtk_vdec_hw_arch - Used to separate different hardware architecture + */ +enum mtk_vdec_hw_arch { + MTK_VDEC_PURE_SINGLE_CORE, + MTK_VDEC_LAT_SINGLE_CORE, +}; + +/** + * struct vdec_pic_info - picture size information + * @pic_w: picture width + * @pic_h: picture height + * @buf_w: picture buffer width (64 aligned up from pic_w) + * @buf_h: picture buffer heiht (64 aligned up from pic_h) + * @fb_sz: bitstream size of each plane + * E.g. suppose picture size is 176x144, + * buffer size will be aligned to 176x160. + * @cap_fourcc: fourcc number(may changed when resolution change) + * @reserved: align struct to 64-bit in order to adjust 32-bit and 64-bit os. + */ +struct vdec_pic_info { + unsigned int pic_w; + unsigned int pic_h; + unsigned int buf_w; + unsigned int buf_h; + unsigned int fb_sz[VIDEO_MAX_PLANES]; + unsigned int cap_fourcc; + unsigned int reserved; +}; + +/** + * struct mtk_vcodec_dec_pdata - compatible data for each IC + * @init_vdec_params: init vdec params + * @ctrls_setup: init vcodec dec ctrls + * @worker: worker to start a decode job + * @flush_decoder: function that flushes the decoder + * @get_cap_buffer: get capture buffer from capture queue + * @cap_to_disp: put capture buffer to disp list for lat and core arch + * @vdec_vb2_ops: struct vb2_ops + * + * @vdec_formats: supported video decoder formats + * @num_formats: count of video decoder formats + * @default_out_fmt: default output buffer format + * @default_cap_fmt: default capture buffer format + * + * @hw_arch: hardware arch is used to separate pure_sin_core and lat_sin_core + * + * @is_subdev_supported: whether support parent-node architecture(subdev) + * @uses_stateless_api: whether the decoder uses the stateless API with requests + */ +struct mtk_vcodec_dec_pdata { + void (*init_vdec_params)(struct mtk_vcodec_dec_ctx *ctx); + int (*ctrls_setup)(struct mtk_vcodec_dec_ctx *ctx); + void (*worker)(struct work_struct *work); + int (*flush_decoder)(struct mtk_vcodec_dec_ctx *ctx); + struct vdec_fb *(*get_cap_buffer)(struct mtk_vcodec_dec_ctx *ctx); + void (*cap_to_disp)(struct mtk_vcodec_dec_ctx *ctx, int error, + struct media_request *src_buf_req); + + const struct vb2_ops *vdec_vb2_ops; + + const struct mtk_video_fmt *vdec_formats; + const int *num_formats; + const struct mtk_video_fmt *default_out_fmt; + const struct mtk_video_fmt *default_cap_fmt; + + enum mtk_vdec_hw_arch hw_arch; + + bool is_subdev_supported; + bool uses_stateless_api; +}; + +/** + * struct mtk_vcodec_dec_ctx - Context (instance) private data. + * + * @type: type of decoder instance + * @dev: pointer to the mtk_vcodec_dec_dev of the device + * @list: link to ctx_list of mtk_vcodec_dec_dev + * + * @fh: struct v4l2_fh + * @m2m_ctx: pointer to the v4l2_m2m_ctx of the context + * @q_data: store information of input and output queue of the context + * @id: index of the context that this structure describes + * @state: state of the context + * + * @dec_if: hooked decoder driver interface + * @drv_handle: driver handle for specific decode/encode instance + * + * @picinfo: store picture info after header parsing + * @dpb_size: store dpb count after header parsing + * + * @int_cond: variable used by the waitqueue + * @int_type: type of the last interrupt + * @queue: waitqueue that can be used to wait for this context to finish + * @irq_status: irq status + * + * @ctrl_hdl: handler for v4l2 framework + * @decode_work: worker for the decoding + * @last_decoded_picinfo: pic information get from latest decode + * @empty_flush_buf: a fake size-0 capture buffer that indicates flush. Used + * for stateful decoder. + * @is_flushing: set to true if flushing is in progress. + * + * @current_codec: current set input codec, in V4L2 pixel format + * @capture_fourcc: capture queue type in V4L2 pixel format + * + * @colorspace: enum v4l2_colorspace; supplemental to pixelformat + * @ycbcr_enc: enum v4l2_ycbcr_encoding, Y'CbCr encoding + * @quantization: enum v4l2_quantization, colorspace quantization + * @xfer_func: enum v4l2_xfer_func, colorspace transfer function + * + * @decoded_frame_cnt: number of decoded frames + * @lock: protect variables accessed by V4L2 threads and worker thread such as + * mtk_video_dec_buf. + * @hw_id: hardware index used to identify different hardware. + * + * @msg_queue: msg queue used to store lat buffer information. + * @vpu_inst: vpu instance pointer. + * + * @is_10bit_bitstream: set to true if it's 10bit bitstream + */ +struct mtk_vcodec_dec_ctx { + enum mtk_instance_type type; + struct mtk_vcodec_dec_dev *dev; + struct list_head list; + + struct v4l2_fh fh; + struct v4l2_m2m_ctx *m2m_ctx; + struct mtk_q_data q_data[2]; + int id; + enum mtk_instance_state state; + + const struct vdec_common_if *dec_if; + void *drv_handle; + + struct vdec_pic_info picinfo; + int dpb_size; + + int int_cond[MTK_VDEC_HW_MAX]; + int int_type[MTK_VDEC_HW_MAX]; + wait_queue_head_t queue[MTK_VDEC_HW_MAX]; + unsigned int irq_status; + + struct v4l2_ctrl_handler ctrl_hdl; + struct work_struct decode_work; + struct vdec_pic_info last_decoded_picinfo; + struct v4l2_m2m_buffer empty_flush_buf; + bool is_flushing; + + u32 current_codec; + u32 capture_fourcc; + + enum v4l2_colorspace colorspace; + enum v4l2_ycbcr_encoding ycbcr_enc; + enum v4l2_quantization quantization; + enum v4l2_xfer_func xfer_func; + + int decoded_frame_cnt; + struct mutex lock; + int hw_id; + + struct vdec_msg_queue msg_queue; + void *vpu_inst; + + bool is_10bit_bitstream; +}; + +/** + * struct mtk_vcodec_dec_dev - driver data + * @v4l2_dev: V4L2 device to register video devices for. + * @vfd_dec: Video device for decoder + * @mdev_dec: Media device for decoder + * + * @m2m_dev_dec: m2m device for decoder + * @plat_dev: platform device + * @ctx_list: list of struct mtk_vcodec_ctx + * @curr_ctx: The context that is waiting for codec hardware + * + * @reg_base: Mapped address of MTK Vcodec registers. + * @vdec_pdata: decoder IC-specific data + * @vdecsys_regmap: VDEC_SYS register space passed through syscon + * + * @fw_handler: used to communicate with the firmware. + * @id_counter: used to identify current opened instance + * + * @dec_mutex: decoder hardware lock + * @dev_mutex: video_device lock + * @decode_workqueue: decode work queue + * + * @irqlock: protect data access by irq handler and work thread + * @dec_irq: decoder irq resource + * + * @pm: power management control + * @dec_capability: used to identify decode capability, ex: 4k + * + * @core_workqueue: queue used for core hardware decode + * + * @subdev_dev: subdev hardware device + * @subdev_prob_done: check whether all used hw device is prob done + * @subdev_bitmap: used to record hardware is ready or not + * + * @dec_active_cnt: used to mark whether need to record register value + * @vdec_racing_info: record register value + * @dec_racing_info_mutex: mutex lock used for inner racing mode + * @dbgfs: debug log related information + */ +struct mtk_vcodec_dec_dev { + struct v4l2_device v4l2_dev; + struct video_device *vfd_dec; + struct media_device mdev_dec; + + struct v4l2_m2m_dev *m2m_dev_dec; + struct platform_device *plat_dev; + struct list_head ctx_list; + struct mtk_vcodec_dec_ctx *curr_ctx; + + void __iomem *reg_base[NUM_MAX_VCODEC_REG_BASE]; + const struct mtk_vcodec_dec_pdata *vdec_pdata; + struct regmap *vdecsys_regmap; + + struct mtk_vcodec_fw *fw_handler; + u64 id_counter; + + /* decoder hardware mutex lock */ + struct mutex dec_mutex[MTK_VDEC_HW_MAX]; + struct mutex dev_mutex; + struct workqueue_struct *decode_workqueue; + + spinlock_t irqlock; + int dec_irq; + + struct mtk_vcodec_pm pm; + unsigned int dec_capability; + + struct workqueue_struct *core_workqueue; + + void *subdev_dev[MTK_VDEC_HW_MAX]; + int (*subdev_prob_done)(struct mtk_vcodec_dec_dev *vdec_dev); + DECLARE_BITMAP(subdev_bitmap, MTK_VDEC_HW_MAX); + + atomic_t dec_active_cnt; + u32 vdec_racing_info[132]; + /* Protects access to vdec_racing_info data */ + struct mutex dec_racing_info_mutex; + struct mtk_vcodec_dbgfs dbgfs; +}; + +static inline struct mtk_vcodec_dec_ctx *fh_to_dec_ctx(struct v4l2_fh *fh) +{ + return container_of(fh, struct mtk_vcodec_dec_ctx, fh); +} + +static inline struct mtk_vcodec_dec_ctx *ctrl_to_dec_ctx(struct v4l2_ctrl *ctrl) +{ + return container_of(ctrl->handler, struct mtk_vcodec_dec_ctx, ctrl_hdl); +} + +/* Wake up context wait_queue */ +static inline void +wake_up_dec_ctx(struct mtk_vcodec_dec_ctx *ctx, unsigned int reason, unsigned int hw_id) +{ + ctx->int_cond[hw_id] = 1; + ctx->int_type[hw_id] = reason; + wake_up_interruptible(&ctx->queue[hw_id]); +} + +#define mtk_vdec_err(ctx, fmt, args...) \ + mtk_vcodec_err((ctx)->id, (ctx)->dev->plat_dev, fmt, ##args) + +#define mtk_vdec_debug(ctx, fmt, args...) \ + mtk_vcodec_debug((ctx)->id, (ctx)->dev->plat_dev, fmt, ##args) + +#define mtk_v4l2_vdec_err(ctx, fmt, args...) mtk_v4l2_err((ctx)->dev->plat_dev, fmt, ##args) + +#define mtk_v4l2_vdec_dbg(level, ctx, fmt, args...) \ + mtk_v4l2_debug((ctx)->dev->plat_dev, level, fmt, ##args) + +#endif /* _MTK_VCODEC_DEC_DRV_H_ */ diff --git a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_hw.c b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_hw.c new file mode 100644 index 0000000000..881d5de41e --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_hw.c @@ -0,0 +1,201 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2021 MediaTek Inc. + * Author: Yunfei Dong <yunfei.dong@mediatek.com> + */ + +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/pm_runtime.h> +#include <linux/slab.h> + +#include "mtk_vcodec_dec.h" +#include "mtk_vcodec_dec_hw.h" +#include "mtk_vcodec_dec_pm.h" +#include "../common/mtk_vcodec_intr.h" + +static const struct of_device_id mtk_vdec_hw_match[] = { + { + .compatible = "mediatek,mtk-vcodec-lat", + .data = (void *)MTK_VDEC_LAT0, + }, + { + .compatible = "mediatek,mtk-vcodec-core", + .data = (void *)MTK_VDEC_CORE, + }, + { + .compatible = "mediatek,mtk-vcodec-lat-soc", + .data = (void *)MTK_VDEC_LAT_SOC, + }, + {}, +}; +MODULE_DEVICE_TABLE(of, mtk_vdec_hw_match); + +static int mtk_vdec_hw_prob_done(struct mtk_vcodec_dec_dev *vdec_dev) +{ + struct platform_device *pdev = vdec_dev->plat_dev; + struct device_node *subdev_node; + enum mtk_vdec_hw_id hw_idx; + const struct of_device_id *of_id; + int i; + + for (i = 0; i < ARRAY_SIZE(mtk_vdec_hw_match); i++) { + of_id = &mtk_vdec_hw_match[i]; + subdev_node = of_find_compatible_node(NULL, NULL, + of_id->compatible); + if (!subdev_node) + continue; + + of_node_put(subdev_node); + + hw_idx = (enum mtk_vdec_hw_id)(uintptr_t)of_id->data; + if (!test_bit(hw_idx, vdec_dev->subdev_bitmap)) { + dev_err(&pdev->dev, "vdec %d is not ready", hw_idx); + return -EAGAIN; + } + } + + return 0; +} + +static irqreturn_t mtk_vdec_hw_irq_handler(int irq, void *priv) +{ + struct mtk_vdec_hw_dev *dev = priv; + struct mtk_vcodec_dec_ctx *ctx; + u32 cg_status; + unsigned int dec_done_status; + void __iomem *vdec_misc_addr = dev->reg_base[VDEC_HW_MISC] + + VDEC_IRQ_CFG_REG; + + ctx = mtk_vcodec_get_curr_ctx(dev->main_dev, dev->hw_idx); + + /* check if HW active or not */ + cg_status = readl(dev->reg_base[VDEC_HW_SYS] + VDEC_HW_ACTIVE_ADDR); + if (cg_status & VDEC_HW_ACTIVE_MASK) { + mtk_v4l2_vdec_err(ctx, "vdec active is not 0x0 (0x%08x)", cg_status); + return IRQ_HANDLED; + } + + dec_done_status = readl(vdec_misc_addr); + if ((dec_done_status & MTK_VDEC_IRQ_STATUS_DEC_SUCCESS) != + MTK_VDEC_IRQ_STATUS_DEC_SUCCESS) + return IRQ_HANDLED; + + /* clear interrupt */ + writel(dec_done_status | VDEC_IRQ_CFG, vdec_misc_addr); + writel(dec_done_status & ~VDEC_IRQ_CLR, vdec_misc_addr); + + wake_up_dec_ctx(ctx, MTK_INST_IRQ_RECEIVED, dev->hw_idx); + + mtk_v4l2_vdec_dbg(3, ctx, "wake up ctx %d, dec_done_status=%x", + ctx->id, dec_done_status); + + return IRQ_HANDLED; +} + +static int mtk_vdec_hw_init_irq(struct mtk_vdec_hw_dev *dev) +{ + struct platform_device *pdev = dev->plat_dev; + int ret; + + dev->dec_irq = platform_get_irq(pdev, 0); + if (dev->dec_irq < 0) + return dev->dec_irq; + + irq_set_status_flags(dev->dec_irq, IRQ_NOAUTOEN); + ret = devm_request_irq(&pdev->dev, dev->dec_irq, + mtk_vdec_hw_irq_handler, 0, pdev->name, dev); + if (ret) { + dev_err(&pdev->dev, "Failed to install dev->dec_irq %d (%d)", + dev->dec_irq, ret); + return ret; + } + + return 0; +} + +static int mtk_vdec_hw_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct mtk_vdec_hw_dev *subdev_dev; + struct mtk_vcodec_dec_dev *main_dev; + const struct of_device_id *of_id; + int hw_idx; + int ret; + + if (!dev->parent) { + dev_err(dev, "no parent for hardware devices.\n"); + return -ENODEV; + } + + main_dev = dev_get_drvdata(dev->parent); + if (!main_dev) { + dev_err(dev, "failed to get parent driver data"); + return -EINVAL; + } + + subdev_dev = devm_kzalloc(dev, sizeof(*subdev_dev), GFP_KERNEL); + if (!subdev_dev) + return -ENOMEM; + + subdev_dev->plat_dev = pdev; + ret = mtk_vcodec_init_dec_clk(pdev, &subdev_dev->pm); + if (ret) + return ret; + + ret = devm_pm_runtime_enable(&pdev->dev); + if (ret) + return ret; + + of_id = of_match_device(mtk_vdec_hw_match, dev); + if (!of_id) { + dev_err(dev, "Can't get vdec subdev id.\n"); + return -EINVAL; + } + + hw_idx = (enum mtk_vdec_hw_id)(uintptr_t)of_id->data; + if (hw_idx >= MTK_VDEC_HW_MAX) { + dev_err(dev, "Hardware index %d not correct.\n", hw_idx); + return -EINVAL; + } + + main_dev->subdev_dev[hw_idx] = subdev_dev; + subdev_dev->hw_idx = hw_idx; + subdev_dev->main_dev = main_dev; + subdev_dev->reg_base[VDEC_HW_SYS] = main_dev->reg_base[VDEC_HW_SYS]; + set_bit(subdev_dev->hw_idx, main_dev->subdev_bitmap); + + if (IS_SUPPORT_VDEC_HW_IRQ(hw_idx)) { + ret = mtk_vdec_hw_init_irq(subdev_dev); + if (ret) + return ret; + } + + subdev_dev->reg_base[VDEC_HW_MISC] = + devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR((__force void *)subdev_dev->reg_base[VDEC_HW_MISC])) { + ret = PTR_ERR((__force void *)subdev_dev->reg_base[VDEC_HW_MISC]); + return ret; + } + + if (!main_dev->subdev_prob_done) + main_dev->subdev_prob_done = mtk_vdec_hw_prob_done; + + platform_set_drvdata(pdev, subdev_dev); + return 0; +} + +static struct platform_driver mtk_vdec_driver = { + .probe = mtk_vdec_hw_probe, + .driver = { + .name = "mtk-vdec-comp", + .of_match_table = mtk_vdec_hw_match, + }, +}; +module_platform_driver(mtk_vdec_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Mediatek video decoder hardware driver"); diff --git a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_hw.h b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_hw.h new file mode 100644 index 0000000000..83fe8b9428 --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_hw.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2021 MediaTek Inc. + * Author: Yunfei Dong <yunfei.dong@mediatek.com> + */ + +#ifndef _MTK_VCODEC_DEC_HW_H_ +#define _MTK_VCODEC_DEC_HW_H_ + +#include <linux/io.h> +#include <linux/platform_device.h> + +#include "mtk_vcodec_dec_drv.h" + +#define VDEC_HW_ACTIVE_ADDR 0x0 +#define VDEC_HW_ACTIVE_MASK BIT(4) +#define VDEC_IRQ_CFG 0x11 +#define VDEC_IRQ_CLR 0x10 +#define VDEC_IRQ_CFG_REG 0xa4 + +#define IS_SUPPORT_VDEC_HW_IRQ(hw_idx) ((hw_idx) != MTK_VDEC_LAT_SOC) + +/** + * enum mtk_vdec_hw_reg_idx - subdev hardware register base index + * @VDEC_HW_SYS : vdec soc register index + * @VDEC_HW_MISC: vdec misc register index + * @VDEC_HW_MAX : vdec supported max register index + */ +enum mtk_vdec_hw_reg_idx { + VDEC_HW_SYS, + VDEC_HW_MISC, + VDEC_HW_MAX +}; + +/** + * struct mtk_vdec_hw_dev - vdec hardware driver data + * @plat_dev: platform device + * @main_dev: main device + * @reg_base: mapped address of MTK Vcodec registers. + * + * @curr_ctx: the context that is waiting for codec hardware + * + * @dec_irq : decoder irq resource + * @pm : power management control + * @hw_idx : each hardware index + */ +struct mtk_vdec_hw_dev { + struct platform_device *plat_dev; + struct mtk_vcodec_dec_dev *main_dev; + void __iomem *reg_base[VDEC_HW_MAX]; + + struct mtk_vcodec_dec_ctx *curr_ctx; + + int dec_irq; + struct mtk_vcodec_pm pm; + int hw_idx; +}; + +#endif /* _MTK_VCODEC_DEC_HW_H_ */ diff --git a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_pm.c b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_pm.c new file mode 100644 index 0000000000..aefd3e9e30 --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_pm.c @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2016 MediaTek Inc. + * Author: Tiffany Lin <tiffany.lin@mediatek.com> + */ + +#include <linux/clk.h> +#include <linux/interrupt.h> +#include <linux/of.h> +#include <linux/pm_runtime.h> + +#include "mtk_vcodec_dec_hw.h" +#include "mtk_vcodec_dec_pm.h" + +int mtk_vcodec_init_dec_clk(struct platform_device *pdev, struct mtk_vcodec_pm *pm) +{ + struct mtk_vcodec_clk *dec_clk; + struct mtk_vcodec_clk_info *clk_info; + int i = 0, ret; + + dec_clk = &pm->vdec_clk; + pm->dev = &pdev->dev; + + dec_clk->clk_num = + of_property_count_strings(pdev->dev.of_node, "clock-names"); + if (dec_clk->clk_num > 0) { + dec_clk->clk_info = devm_kcalloc(&pdev->dev, + dec_clk->clk_num, sizeof(*clk_info), + GFP_KERNEL); + if (!dec_clk->clk_info) + return -ENOMEM; + } else { + dev_err(&pdev->dev, "Failed to get vdec clock count"); + return -EINVAL; + } + + for (i = 0; i < dec_clk->clk_num; i++) { + clk_info = &dec_clk->clk_info[i]; + ret = of_property_read_string_index(pdev->dev.of_node, + "clock-names", i, &clk_info->clk_name); + if (ret) { + dev_err(&pdev->dev, "Failed to get clock name id = %d", i); + return ret; + } + clk_info->vcodec_clk = devm_clk_get(&pdev->dev, + clk_info->clk_name); + if (IS_ERR(clk_info->vcodec_clk)) { + dev_err(&pdev->dev, "devm_clk_get (%d)%s fail", i, clk_info->clk_name); + return PTR_ERR(clk_info->vcodec_clk); + } + } + + return 0; +} +EXPORT_SYMBOL_GPL(mtk_vcodec_init_dec_clk); + +static int mtk_vcodec_dec_pw_on(struct mtk_vcodec_pm *pm) +{ + int ret; + + ret = pm_runtime_resume_and_get(pm->dev); + if (ret) + dev_err(pm->dev, "pm_runtime_resume_and_get fail %d", ret); + + return ret; +} + +static void mtk_vcodec_dec_pw_off(struct mtk_vcodec_pm *pm) +{ + int ret; + + ret = pm_runtime_put(pm->dev); + if (ret && ret != -EAGAIN) + dev_err(pm->dev, "pm_runtime_put fail %d", ret); +} + +static void mtk_vcodec_dec_clock_on(struct mtk_vcodec_pm *pm) +{ + struct mtk_vcodec_clk *dec_clk; + int ret, i; + + dec_clk = &pm->vdec_clk; + for (i = 0; i < dec_clk->clk_num; i++) { + ret = clk_prepare_enable(dec_clk->clk_info[i].vcodec_clk); + if (ret) { + dev_err(pm->dev, "clk_prepare_enable %d %s fail %d", i, + dec_clk->clk_info[i].clk_name, ret); + goto error; + } + } + + return; +error: + for (i -= 1; i >= 0; i--) + clk_disable_unprepare(dec_clk->clk_info[i].vcodec_clk); +} + +static void mtk_vcodec_dec_clock_off(struct mtk_vcodec_pm *pm) +{ + struct mtk_vcodec_clk *dec_clk; + int i; + + dec_clk = &pm->vdec_clk; + for (i = dec_clk->clk_num - 1; i >= 0; i--) + clk_disable_unprepare(dec_clk->clk_info[i].vcodec_clk); +} + +static void mtk_vcodec_dec_enable_irq(struct mtk_vcodec_dec_dev *vdec_dev, int hw_idx) +{ + struct mtk_vdec_hw_dev *subdev_dev; + + if (!test_bit(hw_idx, vdec_dev->subdev_bitmap)) + return; + + if (vdec_dev->vdec_pdata->is_subdev_supported) { + subdev_dev = mtk_vcodec_get_hw_dev(vdec_dev, hw_idx); + if (subdev_dev) + enable_irq(subdev_dev->dec_irq); + else + dev_err(&vdec_dev->plat_dev->dev, "Failed to get hw dev\n"); + } else { + enable_irq(vdec_dev->dec_irq); + } +} + +static void mtk_vcodec_dec_disable_irq(struct mtk_vcodec_dec_dev *vdec_dev, int hw_idx) +{ + struct mtk_vdec_hw_dev *subdev_dev; + + if (!test_bit(hw_idx, vdec_dev->subdev_bitmap)) + return; + + if (vdec_dev->vdec_pdata->is_subdev_supported) { + subdev_dev = mtk_vcodec_get_hw_dev(vdec_dev, hw_idx); + if (subdev_dev) + disable_irq(subdev_dev->dec_irq); + else + dev_err(&vdec_dev->plat_dev->dev, "Failed to get hw dev\n"); + } else { + disable_irq(vdec_dev->dec_irq); + } +} + +static void mtk_vcodec_load_racing_info(struct mtk_vcodec_dec_ctx *ctx) +{ + void __iomem *vdec_racing_addr; + int j; + + mutex_lock(&ctx->dev->dec_racing_info_mutex); + if (atomic_inc_return(&ctx->dev->dec_active_cnt) == 1) { + vdec_racing_addr = ctx->dev->reg_base[VDEC_MISC] + 0x100; + for (j = 0; j < 132; j++) + writel(ctx->dev->vdec_racing_info[j], vdec_racing_addr + j * 4); + } + mutex_unlock(&ctx->dev->dec_racing_info_mutex); +} + +static void mtk_vcodec_record_racing_info(struct mtk_vcodec_dec_ctx *ctx) +{ + void __iomem *vdec_racing_addr; + int j; + + mutex_lock(&ctx->dev->dec_racing_info_mutex); + if (atomic_dec_and_test(&ctx->dev->dec_active_cnt)) { + vdec_racing_addr = ctx->dev->reg_base[VDEC_MISC] + 0x100; + for (j = 0; j < 132; j++) + ctx->dev->vdec_racing_info[j] = readl(vdec_racing_addr + j * 4); + } + mutex_unlock(&ctx->dev->dec_racing_info_mutex); +} + +static struct mtk_vcodec_pm *mtk_vcodec_dec_get_pm(struct mtk_vcodec_dec_dev *vdec_dev, + int hw_idx) +{ + struct mtk_vdec_hw_dev *subdev_dev; + + if (!test_bit(hw_idx, vdec_dev->subdev_bitmap)) + return NULL; + + if (vdec_dev->vdec_pdata->is_subdev_supported) { + subdev_dev = mtk_vcodec_get_hw_dev(vdec_dev, hw_idx); + if (subdev_dev) + return &subdev_dev->pm; + + dev_err(&vdec_dev->plat_dev->dev, "Failed to get hw dev\n"); + return NULL; + } + + return &vdec_dev->pm; +} + +static void mtk_vcodec_dec_child_dev_on(struct mtk_vcodec_dec_dev *vdec_dev, + int hw_idx) +{ + struct mtk_vcodec_pm *pm; + + pm = mtk_vcodec_dec_get_pm(vdec_dev, hw_idx); + if (pm) { + mtk_vcodec_dec_pw_on(pm); + mtk_vcodec_dec_clock_on(pm); + } + + if (hw_idx == MTK_VDEC_LAT0) { + pm = mtk_vcodec_dec_get_pm(vdec_dev, MTK_VDEC_LAT_SOC); + if (pm) { + mtk_vcodec_dec_pw_on(pm); + mtk_vcodec_dec_clock_on(pm); + } + } +} + +static void mtk_vcodec_dec_child_dev_off(struct mtk_vcodec_dec_dev *vdec_dev, + int hw_idx) +{ + struct mtk_vcodec_pm *pm; + + pm = mtk_vcodec_dec_get_pm(vdec_dev, hw_idx); + if (pm) { + mtk_vcodec_dec_clock_off(pm); + mtk_vcodec_dec_pw_off(pm); + } + + if (hw_idx == MTK_VDEC_LAT0) { + pm = mtk_vcodec_dec_get_pm(vdec_dev, MTK_VDEC_LAT_SOC); + if (pm) { + mtk_vcodec_dec_clock_off(pm); + mtk_vcodec_dec_pw_off(pm); + } + } +} + +void mtk_vcodec_dec_enable_hardware(struct mtk_vcodec_dec_ctx *ctx, int hw_idx) +{ + mutex_lock(&ctx->dev->dec_mutex[hw_idx]); + + if (IS_VDEC_LAT_ARCH(ctx->dev->vdec_pdata->hw_arch) && + hw_idx == MTK_VDEC_CORE) + mtk_vcodec_dec_child_dev_on(ctx->dev, MTK_VDEC_LAT0); + mtk_vcodec_dec_child_dev_on(ctx->dev, hw_idx); + + mtk_vcodec_dec_enable_irq(ctx->dev, hw_idx); + + if (IS_VDEC_INNER_RACING(ctx->dev->dec_capability)) + mtk_vcodec_load_racing_info(ctx); +} +EXPORT_SYMBOL_GPL(mtk_vcodec_dec_enable_hardware); + +void mtk_vcodec_dec_disable_hardware(struct mtk_vcodec_dec_ctx *ctx, int hw_idx) +{ + if (IS_VDEC_INNER_RACING(ctx->dev->dec_capability)) + mtk_vcodec_record_racing_info(ctx); + + mtk_vcodec_dec_disable_irq(ctx->dev, hw_idx); + + mtk_vcodec_dec_child_dev_off(ctx->dev, hw_idx); + if (IS_VDEC_LAT_ARCH(ctx->dev->vdec_pdata->hw_arch) && + hw_idx == MTK_VDEC_CORE) + mtk_vcodec_dec_child_dev_off(ctx->dev, MTK_VDEC_LAT0); + + mutex_unlock(&ctx->dev->dec_mutex[hw_idx]); +} +EXPORT_SYMBOL_GPL(mtk_vcodec_dec_disable_hardware); diff --git a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_pm.h b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_pm.h new file mode 100644 index 0000000000..87a50d589d --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_pm.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2016 MediaTek Inc. + * Author: Tiffany Lin <tiffany.lin@mediatek.com> + */ + +#ifndef _MTK_VCODEC_DEC_PM_H_ +#define _MTK_VCODEC_DEC_PM_H_ + +#include "mtk_vcodec_dec_drv.h" + +int mtk_vcodec_init_dec_clk(struct platform_device *pdev, struct mtk_vcodec_pm *pm); + +void mtk_vcodec_dec_enable_hardware(struct mtk_vcodec_dec_ctx *ctx, int hw_idx); +void mtk_vcodec_dec_disable_hardware(struct mtk_vcodec_dec_ctx *ctx, int hw_idx); + +#endif /* _MTK_VCODEC_DEC_PM_H_ */ diff --git a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_stateful.c b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_stateful.c new file mode 100644 index 0000000000..11ca2c2fba --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_stateful.c @@ -0,0 +1,623 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <media/v4l2-event.h> +#include <media/v4l2-mem2mem.h> +#include <media/videobuf2-dma-contig.h> + +#include "mtk_vcodec_dec.h" +#include "mtk_vcodec_dec_pm.h" +#include "vdec_drv_if.h" + +static struct mtk_video_fmt mtk_video_formats[] = { + { + .fourcc = V4L2_PIX_FMT_H264, + .type = MTK_FMT_DEC, + .num_planes = 1, + .flags = V4L2_FMT_FLAG_DYN_RESOLUTION, + .frmsize = { MTK_VDEC_MIN_W, MTK_VDEC_MAX_W, 16, + MTK_VDEC_MIN_H, MTK_VDEC_MAX_H, 16 }, + }, + { + .fourcc = V4L2_PIX_FMT_VP8, + .type = MTK_FMT_DEC, + .num_planes = 1, + .flags = V4L2_FMT_FLAG_DYN_RESOLUTION, + .frmsize = { MTK_VDEC_MIN_W, MTK_VDEC_MAX_W, 16, + MTK_VDEC_MIN_H, MTK_VDEC_MAX_H, 16 }, + }, + { + .fourcc = V4L2_PIX_FMT_VP9, + .type = MTK_FMT_DEC, + .num_planes = 1, + .flags = V4L2_FMT_FLAG_DYN_RESOLUTION, + .frmsize = { MTK_VDEC_MIN_W, MTK_VDEC_MAX_W, 16, + MTK_VDEC_MIN_H, MTK_VDEC_MAX_H, 16 }, + }, + { + .fourcc = V4L2_PIX_FMT_MT21C, + .type = MTK_FMT_FRAME, + .num_planes = 2, + }, +}; + +static const unsigned int num_supported_formats = + ARRAY_SIZE(mtk_video_formats); + +#define DEFAULT_OUT_FMT_IDX 0 +#define DEFAULT_CAP_FMT_IDX 3 + +/* + * This function tries to clean all display buffers, the buffers will return + * in display order. + * Note the buffers returned from codec driver may still be in driver's + * reference list. + */ +static struct vb2_buffer *get_display_buffer(struct mtk_vcodec_dec_ctx *ctx) +{ + struct vdec_fb *disp_frame_buffer = NULL; + struct mtk_video_dec_buf *dstbuf; + struct vb2_v4l2_buffer *vb; + + mtk_v4l2_vdec_dbg(3, ctx, "[%d]", ctx->id); + if (vdec_if_get_param(ctx, GET_PARAM_DISP_FRAME_BUFFER, + &disp_frame_buffer)) { + mtk_v4l2_vdec_err(ctx, "[%d]Cannot get param : GET_PARAM_DISP_FRAME_BUFFER", + ctx->id); + return NULL; + } + + if (!disp_frame_buffer) { + mtk_v4l2_vdec_dbg(3, ctx, "No display frame buffer"); + return NULL; + } + + dstbuf = container_of(disp_frame_buffer, struct mtk_video_dec_buf, + frame_buffer); + vb = &dstbuf->m2m_buf.vb; + mutex_lock(&ctx->lock); + if (dstbuf->used) { + mtk_v4l2_vdec_dbg(2, ctx, "[%d]status=%x queue id=%d to done_list %d", + ctx->id, disp_frame_buffer->status, + vb->vb2_buf.index, dstbuf->queued_in_vb2); + + v4l2_m2m_buf_done(vb, VB2_BUF_STATE_DONE); + ctx->decoded_frame_cnt++; + } + mutex_unlock(&ctx->lock); + return &vb->vb2_buf; +} + +/* + * This function tries to clean all capture buffers that are not used as + * reference buffers by codec driver any more + * In this case, we need re-queue buffer to vb2 buffer if user space + * already returns this buffer to v4l2 or this buffer is just the output of + * previous sps/pps/resolution change decode, or do nothing if user + * space still owns this buffer + */ +static struct vb2_buffer *get_free_buffer(struct mtk_vcodec_dec_ctx *ctx) +{ + struct mtk_video_dec_buf *dstbuf; + struct vdec_fb *free_frame_buffer = NULL; + struct vb2_v4l2_buffer *vb; + + if (vdec_if_get_param(ctx, GET_PARAM_FREE_FRAME_BUFFER, + &free_frame_buffer)) { + mtk_v4l2_vdec_err(ctx, "[%d] Error!! Cannot get param", ctx->id); + return NULL; + } + if (!free_frame_buffer) { + mtk_v4l2_vdec_dbg(3, ctx, " No free frame buffer"); + return NULL; + } + + mtk_v4l2_vdec_dbg(3, ctx, "[%d] tmp_frame_addr = 0x%p", ctx->id, + free_frame_buffer); + + dstbuf = container_of(free_frame_buffer, struct mtk_video_dec_buf, + frame_buffer); + vb = &dstbuf->m2m_buf.vb; + + mutex_lock(&ctx->lock); + if (dstbuf->used) { + if (dstbuf->queued_in_vb2 && dstbuf->queued_in_v4l2 && + free_frame_buffer->status == FB_ST_FREE) { + /* + * After decode sps/pps or non-display buffer, we don't + * need to return capture buffer to user space, but + * just re-queue this capture buffer to vb2 queue. + * This reduce overheads that dq/q unused capture + * buffer. In this case, queued_in_vb2 = true. + */ + mtk_v4l2_vdec_dbg(2, ctx, "[%d]status=%x queue id=%d to rdy_queue %d", + ctx->id, free_frame_buffer->status, + vb->vb2_buf.index, dstbuf->queued_in_vb2); + v4l2_m2m_buf_queue(ctx->m2m_ctx, vb); + } else if (!dstbuf->queued_in_vb2 && dstbuf->queued_in_v4l2) { + /* + * If buffer in v4l2 driver but not in vb2 queue yet, + * and we get this buffer from free_list, it means + * that codec driver do not use this buffer as + * reference buffer anymore. We should q buffer to vb2 + * queue, so later work thread could get this buffer + * for decode. In this case, queued_in_vb2 = false + * means this buffer is not from previous decode + * output. + */ + mtk_v4l2_vdec_dbg(2, ctx, + "[%d]status=%x queue id=%d to rdy_queue", + ctx->id, free_frame_buffer->status, + vb->vb2_buf.index); + v4l2_m2m_buf_queue(ctx->m2m_ctx, vb); + dstbuf->queued_in_vb2 = true; + } else { + /* + * Codec driver do not need to reference this capture + * buffer and this buffer is not in v4l2 driver. + * Then we don't need to do any thing, just add log when + * we need to debug buffer flow. + * When this buffer q from user space, it could + * directly q to vb2 buffer + */ + mtk_v4l2_vdec_dbg(3, ctx, "[%d]status=%x err queue id=%d %d %d", + ctx->id, free_frame_buffer->status, + vb->vb2_buf.index, dstbuf->queued_in_vb2, + dstbuf->queued_in_v4l2); + } + dstbuf->used = false; + } + mutex_unlock(&ctx->lock); + return &vb->vb2_buf; +} + +static void clean_display_buffer(struct mtk_vcodec_dec_ctx *ctx) +{ + while (get_display_buffer(ctx)) + ; +} + +static void clean_free_buffer(struct mtk_vcodec_dec_ctx *ctx) +{ + while (get_free_buffer(ctx)) + ; +} + +static void mtk_vdec_queue_res_chg_event(struct mtk_vcodec_dec_ctx *ctx) +{ + static const struct v4l2_event ev_src_ch = { + .type = V4L2_EVENT_SOURCE_CHANGE, + .u.src_change.changes = V4L2_EVENT_SRC_CH_RESOLUTION, + }; + + mtk_v4l2_vdec_dbg(1, ctx, "[%d]", ctx->id); + v4l2_event_queue_fh(&ctx->fh, &ev_src_ch); +} + +static int mtk_vdec_flush_decoder(struct mtk_vcodec_dec_ctx *ctx) +{ + bool res_chg; + int ret; + + ret = vdec_if_decode(ctx, NULL, NULL, &res_chg); + if (ret) + mtk_v4l2_vdec_err(ctx, "DecodeFinal failed, ret=%d", ret); + + clean_display_buffer(ctx); + clean_free_buffer(ctx); + + return 0; +} + +static void mtk_vdec_update_fmt(struct mtk_vcodec_dec_ctx *ctx, + unsigned int pixelformat) +{ + const struct mtk_video_fmt *fmt; + struct mtk_q_data *dst_q_data; + unsigned int k; + + dst_q_data = &ctx->q_data[MTK_Q_DATA_DST]; + for (k = 0; k < num_supported_formats; k++) { + fmt = &mtk_video_formats[k]; + if (fmt->fourcc == pixelformat) { + mtk_v4l2_vdec_dbg(1, ctx, "Update cap fourcc(%d -> %d)", + dst_q_data->fmt->fourcc, pixelformat); + dst_q_data->fmt = fmt; + return; + } + } + + mtk_v4l2_vdec_err(ctx, "Cannot get fourcc(%d), using init value", pixelformat); +} + +static int mtk_vdec_pic_info_update(struct mtk_vcodec_dec_ctx *ctx) +{ + unsigned int dpbsize = 0; + int ret; + + if (vdec_if_get_param(ctx, GET_PARAM_PIC_INFO, + &ctx->last_decoded_picinfo)) { + mtk_v4l2_vdec_err(ctx, "[%d]Error!! Cannot get param : GET_PARAM_PICTURE_INFO ERR", + ctx->id); + return -EINVAL; + } + + if (ctx->last_decoded_picinfo.pic_w == 0 || + ctx->last_decoded_picinfo.pic_h == 0 || + ctx->last_decoded_picinfo.buf_w == 0 || + ctx->last_decoded_picinfo.buf_h == 0) { + mtk_v4l2_vdec_err(ctx, "Cannot get correct pic info"); + return -EINVAL; + } + + if (ctx->last_decoded_picinfo.cap_fourcc != ctx->picinfo.cap_fourcc && + ctx->picinfo.cap_fourcc != 0) + mtk_vdec_update_fmt(ctx, ctx->picinfo.cap_fourcc); + + if (ctx->last_decoded_picinfo.pic_w == ctx->picinfo.pic_w || + ctx->last_decoded_picinfo.pic_h == ctx->picinfo.pic_h) + return 0; + + mtk_v4l2_vdec_dbg(1, ctx, "[%d]-> new(%d,%d), old(%d,%d), real(%d,%d)", ctx->id, + ctx->last_decoded_picinfo.pic_w, + ctx->last_decoded_picinfo.pic_h, ctx->picinfo.pic_w, + ctx->picinfo.pic_h, ctx->last_decoded_picinfo.buf_w, + ctx->last_decoded_picinfo.buf_h); + + ret = vdec_if_get_param(ctx, GET_PARAM_DPB_SIZE, &dpbsize); + if (dpbsize == 0) + mtk_v4l2_vdec_err(ctx, "Incorrect dpb size, ret=%d", ret); + + ctx->dpb_size = dpbsize; + + return ret; +} + +static void mtk_vdec_worker(struct work_struct *work) +{ + struct mtk_vcodec_dec_ctx *ctx = + container_of(work, struct mtk_vcodec_dec_ctx, decode_work); + struct mtk_vcodec_dec_dev *dev = ctx->dev; + struct vb2_v4l2_buffer *src_buf, *dst_buf; + struct mtk_vcodec_mem buf; + struct vdec_fb *pfb; + bool res_chg = false; + int ret; + struct mtk_video_dec_buf *dst_buf_info, *src_buf_info; + + src_buf = v4l2_m2m_next_src_buf(ctx->m2m_ctx); + if (!src_buf) { + v4l2_m2m_job_finish(dev->m2m_dev_dec, ctx->m2m_ctx); + mtk_v4l2_vdec_dbg(1, ctx, "[%d] src_buf empty!!", ctx->id); + return; + } + + dst_buf = v4l2_m2m_next_dst_buf(ctx->m2m_ctx); + if (!dst_buf) { + v4l2_m2m_job_finish(dev->m2m_dev_dec, ctx->m2m_ctx); + mtk_v4l2_vdec_dbg(1, ctx, "[%d] dst_buf empty!!", ctx->id); + return; + } + + dst_buf_info = + container_of(dst_buf, struct mtk_video_dec_buf, m2m_buf.vb); + + pfb = &dst_buf_info->frame_buffer; + pfb->base_y.va = vb2_plane_vaddr(&dst_buf->vb2_buf, 0); + pfb->base_y.dma_addr = + vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0); + pfb->base_y.size = ctx->picinfo.fb_sz[0]; + + pfb->base_c.va = vb2_plane_vaddr(&dst_buf->vb2_buf, 1); + pfb->base_c.dma_addr = + vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 1); + pfb->base_c.size = ctx->picinfo.fb_sz[1]; + pfb->status = 0; + mtk_v4l2_vdec_dbg(3, ctx, "===>[%d] vdec_if_decode() ===>", ctx->id); + + mtk_v4l2_vdec_dbg(3, ctx, + "id=%d Framebuf pfb=%p VA=%p Y_DMA=%pad C_DMA=%pad Size=%zx", + dst_buf->vb2_buf.index, pfb, pfb->base_y.va, + &pfb->base_y.dma_addr, &pfb->base_c.dma_addr, pfb->base_y.size); + + if (src_buf == &ctx->empty_flush_buf.vb) { + mtk_v4l2_vdec_dbg(1, ctx, "Got empty flush input buffer."); + src_buf = v4l2_m2m_src_buf_remove(ctx->m2m_ctx); + + /* update dst buf status */ + dst_buf = v4l2_m2m_dst_buf_remove(ctx->m2m_ctx); + mutex_lock(&ctx->lock); + dst_buf_info->used = false; + mutex_unlock(&ctx->lock); + + vdec_if_decode(ctx, NULL, NULL, &res_chg); + clean_display_buffer(ctx); + vb2_set_plane_payload(&dst_buf->vb2_buf, 0, 0); + if (ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 2) + vb2_set_plane_payload(&dst_buf->vb2_buf, 1, 0); + dst_buf->flags |= V4L2_BUF_FLAG_LAST; + v4l2_m2m_buf_done(dst_buf, VB2_BUF_STATE_DONE); + clean_free_buffer(ctx); + v4l2_m2m_job_finish(dev->m2m_dev_dec, ctx->m2m_ctx); + return; + } + + src_buf_info = + container_of(src_buf, struct mtk_video_dec_buf, m2m_buf.vb); + + buf.va = vb2_plane_vaddr(&src_buf->vb2_buf, 0); + buf.dma_addr = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0); + buf.size = (size_t)src_buf->vb2_buf.planes[0].bytesused; + if (!buf.va) { + v4l2_m2m_job_finish(dev->m2m_dev_dec, ctx->m2m_ctx); + mtk_v4l2_vdec_err(ctx, "[%d] id=%d src_addr is NULL!!", ctx->id, + src_buf->vb2_buf.index); + return; + } + mtk_v4l2_vdec_dbg(3, ctx, "[%d] Bitstream VA=%p DMA=%pad Size=%zx vb=%p", + ctx->id, buf.va, &buf.dma_addr, buf.size, src_buf); + dst_buf->vb2_buf.timestamp = src_buf->vb2_buf.timestamp; + dst_buf->timecode = src_buf->timecode; + mutex_lock(&ctx->lock); + dst_buf_info->used = true; + mutex_unlock(&ctx->lock); + src_buf_info->used = true; + + ret = vdec_if_decode(ctx, &buf, pfb, &res_chg); + + if (ret) { + mtk_v4l2_vdec_err(ctx, + "[%d] decode src[%d] sz=0x%zx pts=%llu dst[%d] ret=%d res_chg=%d", + ctx->id, src_buf->vb2_buf.index, buf.size, + src_buf->vb2_buf.timestamp, dst_buf->vb2_buf.index, ret, res_chg); + src_buf = v4l2_m2m_src_buf_remove(ctx->m2m_ctx); + if (ret == -EIO) { + mutex_lock(&ctx->lock); + src_buf_info->error = true; + mutex_unlock(&ctx->lock); + } + v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR); + } else if (!res_chg) { + /* + * we only return src buffer with VB2_BUF_STATE_DONE + * when decode success without resolution change + */ + src_buf = v4l2_m2m_src_buf_remove(ctx->m2m_ctx); + v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE); + } + + dst_buf = v4l2_m2m_dst_buf_remove(ctx->m2m_ctx); + clean_display_buffer(ctx); + clean_free_buffer(ctx); + + if (!ret && res_chg) { + mtk_vdec_pic_info_update(ctx); + /* + * On encountering a resolution change in the stream. + * The driver must first process and decode all + * remaining buffers from before the resolution change + * point, so call flush decode here + */ + mtk_vdec_flush_decoder(ctx); + /* + * After all buffers containing decoded frames from + * before the resolution change point ready to be + * dequeued on the CAPTURE queue, the driver sends a + * V4L2_EVENT_SOURCE_CHANGE event for source change + * type V4L2_EVENT_SRC_CH_RESOLUTION + */ + mtk_vdec_queue_res_chg_event(ctx); + } + v4l2_m2m_job_finish(dev->m2m_dev_dec, ctx->m2m_ctx); +} + +static void vb2ops_vdec_stateful_buf_queue(struct vb2_buffer *vb) +{ + struct vb2_v4l2_buffer *src_buf; + struct mtk_vcodec_mem src_mem; + bool res_chg = false; + int ret; + unsigned int dpbsize = 1, i; + struct mtk_vcodec_dec_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); + struct vb2_v4l2_buffer *vb2_v4l2; + struct mtk_q_data *dst_q_data; + + mtk_v4l2_vdec_dbg(3, ctx, "[%d] (%d) id=%d, vb=%p", ctx->id, + vb->vb2_queue->type, vb->index, vb); + /* + * check if this buffer is ready to be used after decode + */ + if (vb->vb2_queue->type != V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) { + struct mtk_video_dec_buf *buf; + + vb2_v4l2 = to_vb2_v4l2_buffer(vb); + buf = container_of(vb2_v4l2, struct mtk_video_dec_buf, + m2m_buf.vb); + mutex_lock(&ctx->lock); + if (!buf->used) { + v4l2_m2m_buf_queue(ctx->m2m_ctx, vb2_v4l2); + buf->queued_in_vb2 = true; + buf->queued_in_v4l2 = true; + } else { + buf->queued_in_vb2 = false; + buf->queued_in_v4l2 = true; + } + mutex_unlock(&ctx->lock); + return; + } + + v4l2_m2m_buf_queue(ctx->m2m_ctx, to_vb2_v4l2_buffer(vb)); + + if (ctx->state != MTK_STATE_INIT) { + mtk_v4l2_vdec_dbg(3, ctx, "[%d] already init driver %d", ctx->id, ctx->state); + return; + } + + src_buf = v4l2_m2m_next_src_buf(ctx->m2m_ctx); + if (!src_buf) { + mtk_v4l2_vdec_err(ctx, "No src buffer"); + return; + } + + if (src_buf == &ctx->empty_flush_buf.vb) { + /* This shouldn't happen. Just in case. */ + mtk_v4l2_vdec_err(ctx, "Invalid flush buffer."); + v4l2_m2m_src_buf_remove(ctx->m2m_ctx); + return; + } + + src_mem.va = vb2_plane_vaddr(&src_buf->vb2_buf, 0); + src_mem.dma_addr = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0); + src_mem.size = (size_t)src_buf->vb2_buf.planes[0].bytesused; + mtk_v4l2_vdec_dbg(2, ctx, "[%d] buf id=%d va=%p dma=%pad size=%zx", ctx->id, + src_buf->vb2_buf.index, src_mem.va, &src_mem.dma_addr, src_mem.size); + + ret = vdec_if_decode(ctx, &src_mem, NULL, &res_chg); + if (ret || !res_chg) { + /* + * fb == NULL means to parse SPS/PPS header or + * resolution info in src_mem. Decode can fail + * if there is no SPS header or picture info + * in bs + */ + + src_buf = v4l2_m2m_src_buf_remove(ctx->m2m_ctx); + if (ret == -EIO) { + mtk_v4l2_vdec_err(ctx, "[%d] Unrecoverable error in vdec_if_decode.", + ctx->id); + ctx->state = MTK_STATE_ABORT; + v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR); + } else { + v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE); + } + mtk_v4l2_vdec_dbg(ret ? 0 : 1, ctx, + "[%d] decode() src_buf=%d, size=%zu, fail=%d, res_chg=%d", + ctx->id, src_buf->vb2_buf.index, src_mem.size, ret, res_chg); + return; + } + + if (vdec_if_get_param(ctx, GET_PARAM_PIC_INFO, &ctx->picinfo)) { + mtk_v4l2_vdec_err(ctx, "[%d]Error!! Cannot get param : GET_PARAM_PICTURE_INFO ERR", + ctx->id); + return; + } + + ctx->last_decoded_picinfo = ctx->picinfo; + dst_q_data = &ctx->q_data[MTK_Q_DATA_DST]; + for (i = 0; i < dst_q_data->fmt->num_planes; i++) { + dst_q_data->sizeimage[i] = ctx->picinfo.fb_sz[i]; + dst_q_data->bytesperline[i] = ctx->picinfo.buf_w; + } + + mtk_v4l2_vdec_dbg(2, ctx, "[%d] init OK wxh=%dx%d pic wxh=%dx%d sz[0]=0x%x sz[1]=0x%x", + ctx->id, ctx->picinfo.buf_w, ctx->picinfo.buf_h, ctx->picinfo.pic_w, + ctx->picinfo.pic_h, dst_q_data->sizeimage[0], dst_q_data->sizeimage[1]); + + ret = vdec_if_get_param(ctx, GET_PARAM_DPB_SIZE, &dpbsize); + if (dpbsize == 0) + mtk_v4l2_vdec_err(ctx, "[%d] GET_PARAM_DPB_SIZE fail=%d", ctx->id, ret); + + ctx->dpb_size = dpbsize; + ctx->state = MTK_STATE_HEADER; + mtk_v4l2_vdec_dbg(1, ctx, "[%d] dpbsize=%d", ctx->id, ctx->dpb_size); + + mtk_vdec_queue_res_chg_event(ctx); +} + +static int mtk_vdec_g_v_ctrl(struct v4l2_ctrl *ctrl) +{ + struct mtk_vcodec_dec_ctx *ctx = ctrl_to_dec_ctx(ctrl); + int ret = 0; + + switch (ctrl->id) { + case V4L2_CID_MIN_BUFFERS_FOR_CAPTURE: + if (ctx->state >= MTK_STATE_HEADER) { + ctrl->val = ctx->dpb_size; + } else { + mtk_v4l2_vdec_dbg(0, ctx, "Seqinfo not ready"); + ctrl->val = 0; + } + break; + default: + ret = -EINVAL; + } + return ret; +} + +static const struct v4l2_ctrl_ops mtk_vcodec_dec_ctrl_ops = { + .g_volatile_ctrl = mtk_vdec_g_v_ctrl, +}; + +static int mtk_vcodec_dec_ctrls_setup(struct mtk_vcodec_dec_ctx *ctx) +{ + struct v4l2_ctrl *ctrl; + + v4l2_ctrl_handler_init(&ctx->ctrl_hdl, 1); + + ctrl = v4l2_ctrl_new_std(&ctx->ctrl_hdl, &mtk_vcodec_dec_ctrl_ops, + V4L2_CID_MIN_BUFFERS_FOR_CAPTURE, 0, 32, 1, 1); + ctrl->flags |= V4L2_CTRL_FLAG_VOLATILE; + v4l2_ctrl_new_std_menu(&ctx->ctrl_hdl, &mtk_vcodec_dec_ctrl_ops, + V4L2_CID_MPEG_VIDEO_VP9_PROFILE, + V4L2_MPEG_VIDEO_VP9_PROFILE_0, 0, + V4L2_MPEG_VIDEO_VP9_PROFILE_0); + /* + * H264. Baseline / Extended decoding is not supported. + */ + v4l2_ctrl_new_std_menu(&ctx->ctrl_hdl, &mtk_vcodec_dec_ctrl_ops, + V4L2_CID_MPEG_VIDEO_H264_PROFILE, V4L2_MPEG_VIDEO_H264_PROFILE_HIGH, + BIT(V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE) | + BIT(V4L2_MPEG_VIDEO_H264_PROFILE_EXTENDED), + V4L2_MPEG_VIDEO_H264_PROFILE_MAIN); + + if (ctx->ctrl_hdl.error) { + mtk_v4l2_vdec_err(ctx, "Adding control failed %d", ctx->ctrl_hdl.error); + return ctx->ctrl_hdl.error; + } + + v4l2_ctrl_handler_setup(&ctx->ctrl_hdl); + return 0; +} + +static void mtk_init_vdec_params(struct mtk_vcodec_dec_ctx *ctx) +{ + unsigned int i; + + if (!(ctx->dev->dec_capability & VCODEC_CAPABILITY_4K_DISABLED)) { + for (i = 0; i < num_supported_formats; i++) { + if (mtk_video_formats[i].type != MTK_FMT_DEC) + continue; + + mtk_video_formats[i].frmsize.max_width = + VCODEC_DEC_4K_CODED_WIDTH; + mtk_video_formats[i].frmsize.max_height = + VCODEC_DEC_4K_CODED_HEIGHT; + } + } +} + +static struct vb2_ops mtk_vdec_frame_vb2_ops = { + .queue_setup = vb2ops_vdec_queue_setup, + .buf_prepare = vb2ops_vdec_buf_prepare, + .wait_prepare = vb2_ops_wait_prepare, + .wait_finish = vb2_ops_wait_finish, + .start_streaming = vb2ops_vdec_start_streaming, + + .buf_queue = vb2ops_vdec_stateful_buf_queue, + .buf_init = vb2ops_vdec_buf_init, + .buf_finish = vb2ops_vdec_buf_finish, + .stop_streaming = vb2ops_vdec_stop_streaming, +}; + +const struct mtk_vcodec_dec_pdata mtk_vdec_8173_pdata = { + .init_vdec_params = mtk_init_vdec_params, + .ctrls_setup = mtk_vcodec_dec_ctrls_setup, + .vdec_vb2_ops = &mtk_vdec_frame_vb2_ops, + .vdec_formats = mtk_video_formats, + .num_formats = &num_supported_formats, + .default_out_fmt = &mtk_video_formats[DEFAULT_OUT_FMT_IDX], + .default_cap_fmt = &mtk_video_formats[DEFAULT_CAP_FMT_IDX], + .worker = mtk_vdec_worker, + .flush_decoder = mtk_vdec_flush_decoder, + .is_subdev_supported = false, + .hw_arch = MTK_VDEC_PURE_SINGLE_CORE, +}; diff --git a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_stateless.c b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_stateless.c new file mode 100644 index 0000000000..e29c9c58f3 --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_stateless.c @@ -0,0 +1,756 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <media/videobuf2-v4l2.h> +#include <media/videobuf2-dma-contig.h> +#include <media/v4l2-event.h> +#include <media/v4l2-mem2mem.h> +#include <linux/module.h> + +#include "mtk_vcodec_dec.h" +#include "mtk_vcodec_dec_pm.h" +#include "vdec_drv_if.h" + +/** + * struct mtk_stateless_control - CID control type + * @cfg: control configuration + * @codec_type: codec type (V4L2 pixel format) for CID control type + */ +struct mtk_stateless_control { + struct v4l2_ctrl_config cfg; + int codec_type; +}; + +static const struct mtk_stateless_control mtk_stateless_controls[] = { + { + .cfg = { + .id = V4L2_CID_STATELESS_H264_SPS, + }, + .codec_type = V4L2_PIX_FMT_H264_SLICE, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_H264_PPS, + }, + .codec_type = V4L2_PIX_FMT_H264_SLICE, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_H264_SCALING_MATRIX, + }, + .codec_type = V4L2_PIX_FMT_H264_SLICE, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_H264_DECODE_PARAMS, + }, + .codec_type = V4L2_PIX_FMT_H264_SLICE, + }, + { + .cfg = { + .id = V4L2_CID_MPEG_VIDEO_H264_PROFILE, + .def = V4L2_MPEG_VIDEO_H264_PROFILE_MAIN, + .max = V4L2_MPEG_VIDEO_H264_PROFILE_HIGH, + .menu_skip_mask = + BIT(V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE) | + BIT(V4L2_MPEG_VIDEO_H264_PROFILE_EXTENDED), + }, + .codec_type = V4L2_PIX_FMT_H264_SLICE, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_H264_DECODE_MODE, + .min = V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED, + .def = V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED, + .max = V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED, + }, + .codec_type = V4L2_PIX_FMT_H264_SLICE, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_H264_START_CODE, + .min = V4L2_STATELESS_H264_START_CODE_ANNEX_B, + .def = V4L2_STATELESS_H264_START_CODE_ANNEX_B, + .max = V4L2_STATELESS_H264_START_CODE_ANNEX_B, + }, + .codec_type = V4L2_PIX_FMT_H264_SLICE, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_VP8_FRAME, + }, + .codec_type = V4L2_PIX_FMT_VP8_FRAME, + }, + { + .cfg = { + .id = V4L2_CID_MPEG_VIDEO_VP8_PROFILE, + .min = V4L2_MPEG_VIDEO_VP8_PROFILE_0, + .def = V4L2_MPEG_VIDEO_VP8_PROFILE_0, + .max = V4L2_MPEG_VIDEO_VP8_PROFILE_3, + }, + .codec_type = V4L2_PIX_FMT_VP8_FRAME, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_VP9_FRAME, + }, + .codec_type = V4L2_PIX_FMT_VP9_FRAME, + }, + { + .cfg = { + .id = V4L2_CID_MPEG_VIDEO_VP9_PROFILE, + .min = V4L2_MPEG_VIDEO_VP9_PROFILE_0, + .def = V4L2_MPEG_VIDEO_VP9_PROFILE_0, + .max = V4L2_MPEG_VIDEO_VP9_PROFILE_3, + }, + .codec_type = V4L2_PIX_FMT_VP9_FRAME, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_HEVC_SPS, + }, + .codec_type = V4L2_PIX_FMT_HEVC_SLICE, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_HEVC_PPS, + }, + .codec_type = V4L2_PIX_FMT_HEVC_SLICE, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX, + }, + .codec_type = V4L2_PIX_FMT_HEVC_SLICE, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS, + }, + .codec_type = V4L2_PIX_FMT_HEVC_SLICE, + }, + { + .cfg = { + .id = V4L2_CID_MPEG_VIDEO_HEVC_PROFILE, + .def = V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN, + .max = V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN_10, + .menu_skip_mask = + BIT(V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN_STILL_PICTURE), + }, + .codec_type = V4L2_PIX_FMT_HEVC_SLICE, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, + .min = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED, + .def = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED, + .max = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED, + }, + .codec_type = V4L2_PIX_FMT_HEVC_SLICE, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_HEVC_START_CODE, + .min = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B, + .def = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B, + .max = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B, + }, + .codec_type = V4L2_PIX_FMT_HEVC_SLICE, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_AV1_SEQUENCE, + + }, + .codec_type = V4L2_PIX_FMT_AV1_FRAME, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_AV1_FRAME, + + }, + .codec_type = V4L2_PIX_FMT_AV1_FRAME, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY, + .dims = { V4L2_AV1_MAX_TILE_COUNT }, + + }, + .codec_type = V4L2_PIX_FMT_AV1_FRAME, + }, + { + .cfg = { + .id = V4L2_CID_MPEG_VIDEO_AV1_PROFILE, + .min = V4L2_MPEG_VIDEO_AV1_PROFILE_MAIN, + .def = V4L2_MPEG_VIDEO_AV1_PROFILE_MAIN, + .max = V4L2_MPEG_VIDEO_AV1_PROFILE_MAIN, + }, + .codec_type = V4L2_PIX_FMT_AV1_FRAME, + }, + { + .cfg = { + .id = V4L2_CID_MPEG_VIDEO_AV1_LEVEL, + .min = V4L2_MPEG_VIDEO_AV1_LEVEL_2_0, + .def = V4L2_MPEG_VIDEO_AV1_LEVEL_4_0, + .max = V4L2_MPEG_VIDEO_AV1_LEVEL_5_1, + }, + .codec_type = V4L2_PIX_FMT_AV1_FRAME, + }, +}; + +#define NUM_CTRLS ARRAY_SIZE(mtk_stateless_controls) + +static struct mtk_video_fmt mtk_video_formats[9]; + +static struct mtk_video_fmt default_out_format; +static struct mtk_video_fmt default_cap_format; +static unsigned int num_formats; + +static const struct v4l2_frmsize_stepwise stepwise_fhd = { + .min_width = MTK_VDEC_MIN_W, + .max_width = MTK_VDEC_MAX_W, + .step_width = 16, + .min_height = MTK_VDEC_MIN_H, + .max_height = MTK_VDEC_MAX_H, + .step_height = 16 +}; + +static void mtk_vdec_stateless_cap_to_disp(struct mtk_vcodec_dec_ctx *ctx, int error, + struct media_request *src_buf_req) +{ + struct vb2_v4l2_buffer *vb2_dst; + enum vb2_buffer_state state; + + if (error) + state = VB2_BUF_STATE_ERROR; + else + state = VB2_BUF_STATE_DONE; + + vb2_dst = v4l2_m2m_dst_buf_remove(ctx->m2m_ctx); + if (vb2_dst) { + v4l2_m2m_buf_done(vb2_dst, state); + mtk_v4l2_vdec_dbg(2, ctx, "free frame buffer id:%d to done list", + vb2_dst->vb2_buf.index); + } else { + mtk_v4l2_vdec_err(ctx, "dst buffer is NULL"); + } + + if (src_buf_req) + v4l2_ctrl_request_complete(src_buf_req, &ctx->ctrl_hdl); +} + +static struct vdec_fb *vdec_get_cap_buffer(struct mtk_vcodec_dec_ctx *ctx) +{ + struct mtk_video_dec_buf *framebuf; + struct vb2_v4l2_buffer *vb2_v4l2; + struct vb2_buffer *dst_buf; + struct vdec_fb *pfb; + + vb2_v4l2 = v4l2_m2m_next_dst_buf(ctx->m2m_ctx); + if (!vb2_v4l2) { + mtk_v4l2_vdec_dbg(1, ctx, "[%d] dst_buf empty!!", ctx->id); + return NULL; + } + + dst_buf = &vb2_v4l2->vb2_buf; + framebuf = container_of(vb2_v4l2, struct mtk_video_dec_buf, m2m_buf.vb); + + pfb = &framebuf->frame_buffer; + pfb->base_y.va = vb2_plane_vaddr(dst_buf, 0); + pfb->base_y.dma_addr = vb2_dma_contig_plane_dma_addr(dst_buf, 0); + pfb->base_y.size = ctx->q_data[MTK_Q_DATA_DST].sizeimage[0]; + + if (ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 2) { + pfb->base_c.va = vb2_plane_vaddr(dst_buf, 1); + pfb->base_c.dma_addr = + vb2_dma_contig_plane_dma_addr(dst_buf, 1); + pfb->base_c.size = ctx->q_data[MTK_Q_DATA_DST].sizeimage[1]; + } + mtk_v4l2_vdec_dbg(1, ctx, + "id=%d Framebuf pfb=%p VA=%p Y/C_DMA=%pad_%pad Sz=%zx frame_count = %d", + dst_buf->index, pfb, pfb->base_y.va, &pfb->base_y.dma_addr, + &pfb->base_c.dma_addr, pfb->base_y.size, ctx->decoded_frame_cnt); + + return pfb; +} + +static void vb2ops_vdec_buf_request_complete(struct vb2_buffer *vb) +{ + struct mtk_vcodec_dec_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); + + v4l2_ctrl_request_complete(vb->req_obj.req, &ctx->ctrl_hdl); +} + +static void mtk_vdec_worker(struct work_struct *work) +{ + struct mtk_vcodec_dec_ctx *ctx = + container_of(work, struct mtk_vcodec_dec_ctx, decode_work); + struct mtk_vcodec_dec_dev *dev = ctx->dev; + struct vb2_v4l2_buffer *vb2_v4l2_src; + struct vb2_buffer *vb2_src; + struct mtk_vcodec_mem *bs_src; + struct mtk_video_dec_buf *dec_buf_src; + struct media_request *src_buf_req; + enum vb2_buffer_state state; + bool res_chg = false; + int ret; + + vb2_v4l2_src = v4l2_m2m_next_src_buf(ctx->m2m_ctx); + if (!vb2_v4l2_src) { + v4l2_m2m_job_finish(dev->m2m_dev_dec, ctx->m2m_ctx); + mtk_v4l2_vdec_dbg(1, ctx, "[%d] no available source buffer", ctx->id); + return; + } + + vb2_src = &vb2_v4l2_src->vb2_buf; + dec_buf_src = container_of(vb2_v4l2_src, struct mtk_video_dec_buf, + m2m_buf.vb); + bs_src = &dec_buf_src->bs_buffer; + + mtk_v4l2_vdec_dbg(3, ctx, "[%d] (%d) id=%d, vb=%p", ctx->id, + vb2_src->vb2_queue->type, vb2_src->index, vb2_src); + + bs_src->va = vb2_plane_vaddr(vb2_src, 0); + bs_src->dma_addr = vb2_dma_contig_plane_dma_addr(vb2_src, 0); + bs_src->size = (size_t)vb2_src->planes[0].bytesused; + if (!bs_src->va) { + v4l2_m2m_job_finish(dev->m2m_dev_dec, ctx->m2m_ctx); + mtk_v4l2_vdec_err(ctx, "[%d] id=%d source buffer is NULL", ctx->id, + vb2_src->index); + return; + } + + mtk_v4l2_vdec_dbg(3, ctx, "[%d] Bitstream VA=%p DMA=%pad Size=%zx vb=%p", + ctx->id, bs_src->va, &bs_src->dma_addr, bs_src->size, vb2_src); + /* Apply request controls. */ + src_buf_req = vb2_src->req_obj.req; + if (src_buf_req) + v4l2_ctrl_request_setup(src_buf_req, &ctx->ctrl_hdl); + else + mtk_v4l2_vdec_err(ctx, "vb2 buffer media request is NULL"); + + ret = vdec_if_decode(ctx, bs_src, NULL, &res_chg); + if (ret && ret != -EAGAIN) { + mtk_v4l2_vdec_err(ctx, + "[%d] decode src_buf[%d] sz=0x%zx pts=%llu ret=%d res_chg=%d", + ctx->id, vb2_src->index, bs_src->size, + vb2_src->timestamp, ret, res_chg); + if (ret == -EIO) { + mutex_lock(&ctx->lock); + dec_buf_src->error = true; + mutex_unlock(&ctx->lock); + } + } + + state = ret ? VB2_BUF_STATE_ERROR : VB2_BUF_STATE_DONE; + if (!IS_VDEC_LAT_ARCH(dev->vdec_pdata->hw_arch) || + ctx->current_codec == V4L2_PIX_FMT_VP8_FRAME) { + v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev_dec, ctx->m2m_ctx, state); + if (src_buf_req) + v4l2_ctrl_request_complete(src_buf_req, &ctx->ctrl_hdl); + } else { + if (ret != -EAGAIN) { + v4l2_m2m_src_buf_remove(ctx->m2m_ctx); + v4l2_m2m_buf_done(vb2_v4l2_src, state); + } + v4l2_m2m_job_finish(dev->m2m_dev_dec, ctx->m2m_ctx); + } +} + +static void vb2ops_vdec_stateless_buf_queue(struct vb2_buffer *vb) +{ + struct mtk_vcodec_dec_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); + struct vb2_v4l2_buffer *vb2_v4l2 = to_vb2_v4l2_buffer(vb); + + mtk_v4l2_vdec_dbg(3, ctx, "[%d] (%d) id=%d, vb=%p", ctx->id, vb->vb2_queue->type, + vb->index, vb); + + mutex_lock(&ctx->lock); + v4l2_m2m_buf_queue(ctx->m2m_ctx, vb2_v4l2); + mutex_unlock(&ctx->lock); + if (vb->vb2_queue->type != V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) + return; + + /* If an OUTPUT buffer, we may need to update the state */ + if (ctx->state == MTK_STATE_INIT) { + ctx->state = MTK_STATE_HEADER; + mtk_v4l2_vdec_dbg(1, ctx, "Init driver from init to header."); + } else { + mtk_v4l2_vdec_dbg(3, ctx, "[%d] already init driver %d", ctx->id, ctx->state); + } +} + +static int mtk_vdec_flush_decoder(struct mtk_vcodec_dec_ctx *ctx) +{ + bool res_chg; + + return vdec_if_decode(ctx, NULL, NULL, &res_chg); +} + +static int mtk_vcodec_get_pic_info(struct mtk_vcodec_dec_ctx *ctx) +{ + struct mtk_q_data *q_data; + int ret = 0; + + q_data = &ctx->q_data[MTK_Q_DATA_DST]; + if (q_data->fmt->num_planes == 1) { + mtk_v4l2_vdec_err(ctx, "[%d]Error!! 10bit mode not support one plane", ctx->id); + return -EINVAL; + } + + ctx->capture_fourcc = q_data->fmt->fourcc; + ret = vdec_if_get_param(ctx, GET_PARAM_PIC_INFO, &ctx->picinfo); + if (ret) { + mtk_v4l2_vdec_err(ctx, "[%d]Error!! Get GET_PARAM_PICTURE_INFO Fail", ctx->id); + return ret; + } + + ctx->last_decoded_picinfo = ctx->picinfo; + + q_data->sizeimage[0] = ctx->picinfo.fb_sz[0]; + q_data->bytesperline[0] = ctx->picinfo.buf_w * 5 / 4; + + q_data->sizeimage[1] = ctx->picinfo.fb_sz[1]; + q_data->bytesperline[1] = ctx->picinfo.buf_w * 5 / 4; + + q_data->coded_width = ctx->picinfo.buf_w; + q_data->coded_height = ctx->picinfo.buf_h; + mtk_v4l2_vdec_dbg(1, ctx, "[%d] wxh=%dx%d pic wxh=%dx%d sz[0]=0x%x sz[1]=0x%x", + ctx->id, ctx->picinfo.buf_w, ctx->picinfo.buf_h, + ctx->picinfo.pic_w, ctx->picinfo.pic_h, + q_data->sizeimage[0], q_data->sizeimage[1]); + + return ret; +} + +static int mtk_vdec_s_ctrl(struct v4l2_ctrl *ctrl) +{ + struct mtk_vcodec_dec_ctx *ctx = ctrl_to_dec_ctx(ctrl); + struct v4l2_ctrl_h264_sps *h264; + struct v4l2_ctrl_hevc_sps *h265; + struct v4l2_ctrl_vp9_frame *frame; + struct v4l2_ctrl_av1_sequence *seq; + struct v4l2_ctrl *hdr_ctrl; + const struct mtk_vcodec_dec_pdata *dec_pdata = ctx->dev->vdec_pdata; + const struct mtk_video_fmt *fmt; + int i = 0, ret = 0; + + hdr_ctrl = ctrl; + if (!hdr_ctrl || !hdr_ctrl->p_new.p) + return -EINVAL; + + switch (hdr_ctrl->id) { + case V4L2_CID_STATELESS_H264_SPS: + h264 = (struct v4l2_ctrl_h264_sps *)hdr_ctrl->p_new.p; + + if (h264->bit_depth_chroma_minus8 == 2 && h264->bit_depth_luma_minus8 == 2) { + ctx->is_10bit_bitstream = true; + } else if (h264->bit_depth_chroma_minus8 != 0 && + h264->bit_depth_luma_minus8 != 0) { + mtk_v4l2_vdec_err(ctx, "H264: chroma_minus8:%d, luma_minus8:%d", + h264->bit_depth_chroma_minus8, + h264->bit_depth_luma_minus8); + return -EINVAL; + } + break; + case V4L2_CID_STATELESS_HEVC_SPS: + h265 = (struct v4l2_ctrl_hevc_sps *)hdr_ctrl->p_new.p; + + if (h265->bit_depth_chroma_minus8 == 2 && h265->bit_depth_luma_minus8 == 2) { + ctx->is_10bit_bitstream = true; + } else if (h265->bit_depth_chroma_minus8 != 0 && + h265->bit_depth_luma_minus8 != 0) { + mtk_v4l2_vdec_err(ctx, "HEVC: chroma_minus8:%d, luma_minus8:%d", + h265->bit_depth_chroma_minus8, + h265->bit_depth_luma_minus8); + return -EINVAL; + } + break; + case V4L2_CID_STATELESS_VP9_FRAME: + frame = (struct v4l2_ctrl_vp9_frame *)hdr_ctrl->p_new.p; + + if (frame->bit_depth == 10) { + ctx->is_10bit_bitstream = true; + } else if (frame->bit_depth != 8) { + mtk_v4l2_vdec_err(ctx, "VP9: bit_depth:%d", frame->bit_depth); + return -EINVAL; + } + break; + case V4L2_CID_STATELESS_AV1_SEQUENCE: + seq = (struct v4l2_ctrl_av1_sequence *)hdr_ctrl->p_new.p; + + if (seq->bit_depth == 10) { + ctx->is_10bit_bitstream = true; + } else if (seq->bit_depth != 8) { + mtk_v4l2_vdec_err(ctx, "AV1: bit_depth:%d", seq->bit_depth); + return -EINVAL; + } + break; + default: + mtk_v4l2_vdec_dbg(3, ctx, "Not supported to set ctrl id: 0x%x\n", hdr_ctrl->id); + return ret; + } + + if (!ctx->is_10bit_bitstream) + return ret; + + for (i = 0; i < *dec_pdata->num_formats; i++) { + fmt = &dec_pdata->vdec_formats[i]; + if (fmt->fourcc == V4L2_PIX_FMT_MT2110R && + hdr_ctrl->id == V4L2_CID_STATELESS_H264_SPS) { + ctx->q_data[MTK_Q_DATA_DST].fmt = fmt; + break; + } + + if (fmt->fourcc == V4L2_PIX_FMT_MT2110T && + (hdr_ctrl->id == V4L2_CID_STATELESS_HEVC_SPS || + hdr_ctrl->id == V4L2_CID_STATELESS_VP9_FRAME || + hdr_ctrl->id == V4L2_CID_STATELESS_AV1_SEQUENCE)) { + ctx->q_data[MTK_Q_DATA_DST].fmt = fmt; + break; + } + } + ret = mtk_vcodec_get_pic_info(ctx); + + return ret; +} + +static const struct v4l2_ctrl_ops mtk_vcodec_dec_ctrl_ops = { + .s_ctrl = mtk_vdec_s_ctrl, +}; + +static int mtk_vcodec_dec_ctrls_setup(struct mtk_vcodec_dec_ctx *ctx) +{ + unsigned int i; + + v4l2_ctrl_handler_init(&ctx->ctrl_hdl, NUM_CTRLS); + if (ctx->ctrl_hdl.error) { + mtk_v4l2_vdec_err(ctx, "v4l2_ctrl_handler_init failed\n"); + return ctx->ctrl_hdl.error; + } + + for (i = 0; i < NUM_CTRLS; i++) { + struct v4l2_ctrl_config cfg = mtk_stateless_controls[i].cfg; + cfg.ops = &mtk_vcodec_dec_ctrl_ops; + v4l2_ctrl_new_custom(&ctx->ctrl_hdl, &cfg, NULL); + if (ctx->ctrl_hdl.error) { + mtk_v4l2_vdec_err(ctx, "Adding control %d failed %d", i, + ctx->ctrl_hdl.error); + return ctx->ctrl_hdl.error; + } + } + + v4l2_ctrl_handler_setup(&ctx->ctrl_hdl); + + return 0; +} + +static int fops_media_request_validate(struct media_request *mreq) +{ + const unsigned int buffer_cnt = vb2_request_buffer_cnt(mreq); + + switch (buffer_cnt) { + case 1: + /* We expect exactly one buffer with the request */ + break; + case 0: + pr_debug(MTK_DBG_VCODEC_STR "No buffer provided with the request."); + return -ENOENT; + default: + pr_debug(MTK_DBG_VCODEC_STR "Too many buffers (%d) provided with the request.", + buffer_cnt); + return -EINVAL; + } + + return vb2_request_validate(mreq); +} + +const struct media_device_ops mtk_vcodec_media_ops = { + .req_validate = fops_media_request_validate, + .req_queue = v4l2_m2m_request_queue, +}; + +static void mtk_vcodec_add_formats(unsigned int fourcc, + struct mtk_vcodec_dec_ctx *ctx) +{ + struct mtk_vcodec_dec_dev *dev = ctx->dev; + const struct mtk_vcodec_dec_pdata *pdata = dev->vdec_pdata; + int count_formats = *pdata->num_formats; + + switch (fourcc) { + case V4L2_PIX_FMT_H264_SLICE: + case V4L2_PIX_FMT_VP8_FRAME: + case V4L2_PIX_FMT_VP9_FRAME: + case V4L2_PIX_FMT_HEVC_SLICE: + case V4L2_PIX_FMT_AV1_FRAME: + mtk_video_formats[count_formats].fourcc = fourcc; + mtk_video_formats[count_formats].type = MTK_FMT_DEC; + mtk_video_formats[count_formats].num_planes = 1; + mtk_video_formats[count_formats].frmsize = stepwise_fhd; + + if (!(ctx->dev->dec_capability & VCODEC_CAPABILITY_4K_DISABLED) && + fourcc != V4L2_PIX_FMT_VP8_FRAME) { + mtk_video_formats[count_formats].frmsize.max_width = + VCODEC_DEC_4K_CODED_WIDTH; + mtk_video_formats[count_formats].frmsize.max_height = + VCODEC_DEC_4K_CODED_HEIGHT; + } + break; + case V4L2_PIX_FMT_MM21: + case V4L2_PIX_FMT_MT21C: + case V4L2_PIX_FMT_MT2110T: + case V4L2_PIX_FMT_MT2110R: + mtk_video_formats[count_formats].fourcc = fourcc; + mtk_video_formats[count_formats].type = MTK_FMT_FRAME; + mtk_video_formats[count_formats].num_planes = 2; + break; + default: + mtk_v4l2_vdec_err(ctx, "Can not add unsupported format type"); + return; + } + + num_formats++; + mtk_v4l2_vdec_dbg(3, ctx, "num_formats: %d dec_capability: 0x%x", + count_formats, ctx->dev->dec_capability); +} + +static void mtk_vcodec_get_supported_formats(struct mtk_vcodec_dec_ctx *ctx) +{ + int cap_format_count = 0, out_format_count = 0; + + if (num_formats) + return; + + if (ctx->dev->dec_capability & MTK_VDEC_FORMAT_MT21C) { + mtk_vcodec_add_formats(V4L2_PIX_FMT_MT21C, ctx); + cap_format_count++; + } + if (ctx->dev->dec_capability & MTK_VDEC_IS_SUPPORT_10BIT) { + mtk_vcodec_add_formats(V4L2_PIX_FMT_MT2110T, ctx); + cap_format_count++; + mtk_vcodec_add_formats(V4L2_PIX_FMT_MT2110R, ctx); + cap_format_count++; + } + if (ctx->dev->dec_capability & MTK_VDEC_FORMAT_MM21) { + mtk_vcodec_add_formats(V4L2_PIX_FMT_MM21, ctx); + cap_format_count++; + } + if (ctx->dev->dec_capability & MTK_VDEC_FORMAT_H264_SLICE) { + mtk_vcodec_add_formats(V4L2_PIX_FMT_H264_SLICE, ctx); + out_format_count++; + } + if (ctx->dev->dec_capability & MTK_VDEC_FORMAT_VP8_FRAME) { + mtk_vcodec_add_formats(V4L2_PIX_FMT_VP8_FRAME, ctx); + out_format_count++; + } + if (ctx->dev->dec_capability & MTK_VDEC_FORMAT_VP9_FRAME) { + mtk_vcodec_add_formats(V4L2_PIX_FMT_VP9_FRAME, ctx); + out_format_count++; + } + if (ctx->dev->dec_capability & MTK_VDEC_FORMAT_HEVC_FRAME) { + mtk_vcodec_add_formats(V4L2_PIX_FMT_HEVC_SLICE, ctx); + out_format_count++; + } + if (ctx->dev->dec_capability & MTK_VDEC_FORMAT_AV1_FRAME) { + mtk_vcodec_add_formats(V4L2_PIX_FMT_AV1_FRAME, ctx); + out_format_count++; + } + + if (cap_format_count) + default_cap_format = mtk_video_formats[cap_format_count - 1]; + if (out_format_count) + default_out_format = + mtk_video_formats[cap_format_count + out_format_count - 1]; +} + +static void mtk_init_vdec_params(struct mtk_vcodec_dec_ctx *ctx) +{ + struct vb2_queue *src_vq; + + src_vq = v4l2_m2m_get_vq(ctx->m2m_ctx, + V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE); + + if (!ctx->dev->vdec_pdata->is_subdev_supported) + ctx->dev->dec_capability |= + MTK_VDEC_FORMAT_H264_SLICE | MTK_VDEC_FORMAT_MM21; + mtk_vcodec_get_supported_formats(ctx); + + /* Support request api for output plane */ + src_vq->supports_requests = true; + src_vq->requires_requests = true; +} + +static int vb2ops_vdec_out_buf_validate(struct vb2_buffer *vb) +{ + struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); + + vbuf->field = V4L2_FIELD_NONE; + return 0; +} + +static struct vb2_ops mtk_vdec_request_vb2_ops = { + .queue_setup = vb2ops_vdec_queue_setup, + .wait_prepare = vb2_ops_wait_prepare, + .wait_finish = vb2_ops_wait_finish, + .start_streaming = vb2ops_vdec_start_streaming, + .stop_streaming = vb2ops_vdec_stop_streaming, + + .buf_queue = vb2ops_vdec_stateless_buf_queue, + .buf_out_validate = vb2ops_vdec_out_buf_validate, + .buf_init = vb2ops_vdec_buf_init, + .buf_prepare = vb2ops_vdec_buf_prepare, + .buf_finish = vb2ops_vdec_buf_finish, + .buf_request_complete = vb2ops_vdec_buf_request_complete, +}; + +const struct mtk_vcodec_dec_pdata mtk_vdec_8183_pdata = { + .init_vdec_params = mtk_init_vdec_params, + .ctrls_setup = mtk_vcodec_dec_ctrls_setup, + .vdec_vb2_ops = &mtk_vdec_request_vb2_ops, + .vdec_formats = mtk_video_formats, + .num_formats = &num_formats, + .default_out_fmt = &default_out_format, + .default_cap_fmt = &default_cap_format, + .uses_stateless_api = true, + .worker = mtk_vdec_worker, + .flush_decoder = mtk_vdec_flush_decoder, + .cap_to_disp = mtk_vdec_stateless_cap_to_disp, + .get_cap_buffer = vdec_get_cap_buffer, + .is_subdev_supported = false, + .hw_arch = MTK_VDEC_PURE_SINGLE_CORE, +}; + +/* This platform data is used for one lat and one core architecture. */ +const struct mtk_vcodec_dec_pdata mtk_lat_sig_core_pdata = { + .init_vdec_params = mtk_init_vdec_params, + .ctrls_setup = mtk_vcodec_dec_ctrls_setup, + .vdec_vb2_ops = &mtk_vdec_request_vb2_ops, + .vdec_formats = mtk_video_formats, + .num_formats = &num_formats, + .default_out_fmt = &default_out_format, + .default_cap_fmt = &default_cap_format, + .uses_stateless_api = true, + .worker = mtk_vdec_worker, + .flush_decoder = mtk_vdec_flush_decoder, + .cap_to_disp = mtk_vdec_stateless_cap_to_disp, + .get_cap_buffer = vdec_get_cap_buffer, + .is_subdev_supported = true, + .hw_arch = MTK_VDEC_LAT_SINGLE_CORE, +}; + +const struct mtk_vcodec_dec_pdata mtk_vdec_single_core_pdata = { + .init_vdec_params = mtk_init_vdec_params, + .ctrls_setup = mtk_vcodec_dec_ctrls_setup, + .vdec_vb2_ops = &mtk_vdec_request_vb2_ops, + .vdec_formats = mtk_video_formats, + .num_formats = &num_formats, + .default_out_fmt = &default_out_format, + .default_cap_fmt = &default_cap_format, + .uses_stateless_api = true, + .worker = mtk_vdec_worker, + .flush_decoder = mtk_vdec_flush_decoder, + .cap_to_disp = mtk_vdec_stateless_cap_to_disp, + .get_cap_buffer = vdec_get_cap_buffer, + .is_subdev_supported = true, + .hw_arch = MTK_VDEC_PURE_SINGLE_CORE, +}; diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_av1_req_lat_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_av1_req_lat_if.c new file mode 100644 index 0000000000..2b6a5adbc4 --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_av1_req_lat_if.c @@ -0,0 +1,2209 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2023 MediaTek Inc. + * Author: Xiaoyong Lu <xiaoyong.lu@mediatek.com> + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <media/videobuf2-dma-contig.h> + +#include "../mtk_vcodec_dec.h" +#include "../../common/mtk_vcodec_intr.h" +#include "../vdec_drv_base.h" +#include "../vdec_drv_if.h" +#include "../vdec_vpu_if.h" + +#define AV1_MAX_FRAME_BUF_COUNT (V4L2_AV1_TOTAL_REFS_PER_FRAME + 1) +#define AV1_TILE_BUF_SIZE 64 +#define AV1_SCALE_SUBPEL_BITS 10 +#define AV1_REF_SCALE_SHIFT 14 +#define AV1_REF_NO_SCALE BIT(AV1_REF_SCALE_SHIFT) +#define AV1_REF_INVALID_SCALE -1 +#define AV1_CDF_TABLE_BUFFER_SIZE 16384 +#define AV1_PRIMARY_REF_NONE 7 + +#define AV1_INVALID_IDX -1 + +#define AV1_DIV_ROUND_UP_POW2(value, n) \ +({ \ + typeof(n) _n = n; \ + typeof(value) _value = value; \ + (_value + (BIT(_n) >> 1)) >> _n; \ +}) + +#define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n) \ +({ \ + typeof(n) _n_ = n; \ + typeof(value) _value_ = value; \ + (((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_)) \ + : AV1_DIV_ROUND_UP_POW2((_value_), (_n_))); \ +}) + +#define BIT_FLAG(x, bit) (!!((x)->flags & (bit))) +#define SEGMENTATION_FLAG(x, name) (!!((x)->flags & V4L2_AV1_SEGMENTATION_FLAG_##name)) +#define QUANT_FLAG(x, name) (!!((x)->flags & V4L2_AV1_QUANTIZATION_FLAG_##name)) +#define SEQUENCE_FLAG(x, name) (!!((x)->flags & V4L2_AV1_SEQUENCE_FLAG_##name)) +#define FH_FLAG(x, name) (!!((x)->flags & V4L2_AV1_FRAME_FLAG_##name)) + +#define MINQ 0 +#define MAXQ 255 + +#define DIV_LUT_PREC_BITS 14 +#define DIV_LUT_BITS 8 +#define DIV_LUT_NUM BIT(DIV_LUT_BITS) +#define WARP_PARAM_REDUCE_BITS 6 +#define WARPEDMODEL_PREC_BITS 16 + +#define SEG_LVL_ALT_Q 0 +#define SECONDARY_FILTER_STRENGTH_NUM_BITS 2 + +static const short div_lut[DIV_LUT_NUM + 1] = { + 16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768, + 15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142, + 15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564, + 14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028, + 13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530, + 13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066, + 13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633, + 12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228, + 12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848, + 11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491, + 11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155, + 11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838, + 10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538, + 10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255, + 10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986, + 9963, 9939, 9916, 9892, 9869, 9846, 9823, 9800, 9777, 9754, 9732, + 9709, 9687, 9664, 9642, 9620, 9598, 9576, 9554, 9533, 9511, 9489, + 9468, 9447, 9425, 9404, 9383, 9362, 9341, 9321, 9300, 9279, 9259, + 9239, 9218, 9198, 9178, 9158, 9138, 9118, 9098, 9079, 9059, 9039, + 9020, 9001, 8981, 8962, 8943, 8924, 8905, 8886, 8867, 8849, 8830, + 8812, 8793, 8775, 8756, 8738, 8720, 8702, 8684, 8666, 8648, 8630, + 8613, 8595, 8577, 8560, 8542, 8525, 8508, 8490, 8473, 8456, 8439, + 8422, 8405, 8389, 8372, 8355, 8339, 8322, 8306, 8289, 8273, 8257, + 8240, 8224, 8208, 8192, +}; + +/** + * struct vdec_av1_slice_init_vsi - VSI used to initialize instance + * @architecture: architecture type + * @reserved: reserved + * @core_vsi: for core vsi + * @cdf_table_addr: cdf table addr + * @cdf_table_size: cdf table size + * @iq_table_addr: iq table addr + * @iq_table_size: iq table size + * @vsi_size: share vsi structure size + */ +struct vdec_av1_slice_init_vsi { + u32 architecture; + u32 reserved; + u64 core_vsi; + u64 cdf_table_addr; + u32 cdf_table_size; + u64 iq_table_addr; + u32 iq_table_size; + u32 vsi_size; +}; + +/** + * struct vdec_av1_slice_mem - memory address and size + * @buf: dma_addr padding + * @dma_addr: buffer address + * @size: buffer size + * @dma_addr_end: buffer end address + * @padding: for padding + */ +struct vdec_av1_slice_mem { + union { + u64 buf; + dma_addr_t dma_addr; + }; + union { + size_t size; + dma_addr_t dma_addr_end; + u64 padding; + }; +}; + +/** + * struct vdec_av1_slice_state - decoding state + * @err : err type for decode + * @full : transcoded buffer is full or not + * @timeout : decode timeout or not + * @perf : performance enable + * @crc : hw checksum + * @out_size : hw output size + */ +struct vdec_av1_slice_state { + int err; + u32 full; + u32 timeout; + u32 perf; + u32 crc[16]; + u32 out_size; +}; + +/* + * enum vdec_av1_slice_resolution_level - resolution level + */ +enum vdec_av1_slice_resolution_level { + AV1_RES_NONE, + AV1_RES_FHD, + AV1_RES_4K, + AV1_RES_8K, +}; + +/* + * enum vdec_av1_slice_frame_type - av1 frame type + */ +enum vdec_av1_slice_frame_type { + AV1_KEY_FRAME = 0, + AV1_INTER_FRAME, + AV1_INTRA_ONLY_FRAME, + AV1_SWITCH_FRAME, + AV1_FRAME_TYPES, +}; + +/* + * enum vdec_av1_slice_reference_mode - reference mode type + */ +enum vdec_av1_slice_reference_mode { + AV1_SINGLE_REFERENCE = 0, + AV1_COMPOUND_REFERENCE, + AV1_REFERENCE_MODE_SELECT, + AV1_REFERENCE_MODES, +}; + +/** + * struct vdec_av1_slice_tile_group - info for each tile + * @num_tiles: tile number + * @tile_size: input size for each tile + * @tile_start_offset: tile offset to input buffer + */ +struct vdec_av1_slice_tile_group { + u32 num_tiles; + u32 tile_size[V4L2_AV1_MAX_TILE_COUNT]; + u32 tile_start_offset[V4L2_AV1_MAX_TILE_COUNT]; +}; + +/** + * struct vdec_av1_slice_scale_factors - scale info for each ref frame + * @is_scaled: frame is scaled or not + * @x_scale: frame width scale coefficient + * @y_scale: frame height scale coefficient + * @x_step: width step for x_scale + * @y_step: height step for y_scale + */ +struct vdec_av1_slice_scale_factors { + u8 is_scaled; + int x_scale; + int y_scale; + int x_step; + int y_step; +}; + +/** + * struct vdec_av1_slice_frame_refs - ref frame info + * @ref_fb_idx: ref slot index + * @ref_map_idx: ref frame index + * @scale_factors: scale factors for each ref frame + */ +struct vdec_av1_slice_frame_refs { + int ref_fb_idx; + int ref_map_idx; + struct vdec_av1_slice_scale_factors scale_factors; +}; + +/** + * struct vdec_av1_slice_gm - AV1 Global Motion parameters + * @wmtype: The type of global motion transform used + * @wmmat: gm_params + * @alpha: alpha info + * @beta: beta info + * @gamma: gamma info + * @delta: delta info + * @invalid: is invalid or not + */ +struct vdec_av1_slice_gm { + int wmtype; + int wmmat[8]; + short alpha; + short beta; + short gamma; + short delta; + char invalid; +}; + +/** + * struct vdec_av1_slice_sm - AV1 Skip Mode parameters + * @skip_mode_allowed: Skip Mode is allowed or not + * @skip_mode_present: specified that the skip_mode will be present or not + * @skip_mode_frame: specifies the frames to use for compound prediction + */ +struct vdec_av1_slice_sm { + u8 skip_mode_allowed; + u8 skip_mode_present; + int skip_mode_frame[2]; +}; + +/** + * struct vdec_av1_slice_seg - AV1 Segmentation params + * @segmentation_enabled: this frame makes use of the segmentation tool or not + * @segmentation_update_map: segmentation map are updated during the decoding frame + * @segmentation_temporal_update:segmentation map are coded relative the existing segmentaion map + * @segmentation_update_data: new parameters are about to be specified for each segment + * @feature_data: specifies the feature data for a segment feature + * @feature_enabled_mask: the corresponding feature value is coded or not. + * @segid_preskip: segment id will be read before the skip syntax element. + * @last_active_segid: the highest numbered segment id that has some enabled feature + */ +struct vdec_av1_slice_seg { + u8 segmentation_enabled; + u8 segmentation_update_map; + u8 segmentation_temporal_update; + u8 segmentation_update_data; + int feature_data[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX]; + u16 feature_enabled_mask[V4L2_AV1_MAX_SEGMENTS]; + int segid_preskip; + int last_active_segid; +}; + +/** + * struct vdec_av1_slice_delta_q_lf - AV1 Loop Filter delta parameters + * @delta_q_present: specified whether quantizer index delta values are present + * @delta_q_res: specifies the left shift which should be applied to decoded quantizer index + * @delta_lf_present: specifies whether loop filter delta values are present + * @delta_lf_res: specifies the left shift which should be applied to decoded + * loop filter delta values + * @delta_lf_multi: specifies that separate loop filter deltas are sent for horizontal + * luma edges,vertical luma edges,the u edges, and the v edges. + */ +struct vdec_av1_slice_delta_q_lf { + u8 delta_q_present; + u8 delta_q_res; + u8 delta_lf_present; + u8 delta_lf_res; + u8 delta_lf_multi; +}; + +/** + * struct vdec_av1_slice_quantization - AV1 Quantization params + * @base_q_idx: indicates the base frame qindex. This is used for Y AC + * coefficients and as the base value for the other quantizers. + * @qindex: qindex + * @delta_qydc: indicates the Y DC quantizer relative to base_q_idx + * @delta_qudc: indicates the U DC quantizer relative to base_q_idx. + * @delta_quac: indicates the U AC quantizer relative to base_q_idx + * @delta_qvdc: indicates the V DC quantizer relative to base_q_idx + * @delta_qvac: indicates the V AC quantizer relative to base_q_idx + * @using_qmatrix: specifies that the quantizer matrix will be used to + * compute quantizers + * @qm_y: specifies the level in the quantizer matrix that should + * be used for luma plane decoding + * @qm_u: specifies the level in the quantizer matrix that should + * be used for chroma U plane decoding. + * @qm_v: specifies the level in the quantizer matrix that should be + * used for chroma V plane decoding + */ +struct vdec_av1_slice_quantization { + int base_q_idx; + int qindex[V4L2_AV1_MAX_SEGMENTS]; + int delta_qydc; + int delta_qudc; + int delta_quac; + int delta_qvdc; + int delta_qvac; + u8 using_qmatrix; + u8 qm_y; + u8 qm_u; + u8 qm_v; +}; + +/** + * struct vdec_av1_slice_lr - AV1 Loop Restauration parameters + * @use_lr: whether to use loop restoration + * @use_chroma_lr: whether to use chroma loop restoration + * @frame_restoration_type: specifies the type of restoration used for each plane + * @loop_restoration_size: pecifies the size of loop restoration units in units + * of samples in the current plane + */ +struct vdec_av1_slice_lr { + u8 use_lr; + u8 use_chroma_lr; + u8 frame_restoration_type[V4L2_AV1_NUM_PLANES_MAX]; + u32 loop_restoration_size[V4L2_AV1_NUM_PLANES_MAX]; +}; + +/** + * struct vdec_av1_slice_loop_filter - AV1 Loop filter parameters + * @loop_filter_level: an array containing loop filter strength values. + * @loop_filter_ref_deltas: contains the adjustment needed for the filter + * level based on the chosen reference frame + * @loop_filter_mode_deltas: contains the adjustment needed for the filter + * level based on the chosen mode + * @loop_filter_sharpness: indicates the sharpness level. The loop_filter_level + * and loop_filter_sharpness together determine when + * a block edge is filtered, and by how much the + * filtering can change the sample values + * @loop_filter_delta_enabled: filetr level depends on the mode and reference + * frame used to predict a block + */ +struct vdec_av1_slice_loop_filter { + u8 loop_filter_level[4]; + int loop_filter_ref_deltas[V4L2_AV1_TOTAL_REFS_PER_FRAME]; + int loop_filter_mode_deltas[2]; + u8 loop_filter_sharpness; + u8 loop_filter_delta_enabled; +}; + +/** + * struct vdec_av1_slice_cdef - AV1 CDEF parameters + * @cdef_damping: controls the amount of damping in the deringing filter + * @cdef_y_strength: specifies the strength of the primary filter and secondary filter + * @cdef_uv_strength: specifies the strength of the primary filter and secondary filter + * @cdef_bits: specifies the number of bits needed to specify which + * CDEF filter to apply + */ +struct vdec_av1_slice_cdef { + u8 cdef_damping; + u8 cdef_y_strength[8]; + u8 cdef_uv_strength[8]; + u8 cdef_bits; +}; + +/** + * struct vdec_av1_slice_mfmv - AV1 mfmv parameters + * @mfmv_valid_ref: mfmv_valid_ref + * @mfmv_dir: mfmv_dir + * @mfmv_ref_to_cur: mfmv_ref_to_cur + * @mfmv_ref_frame_idx: mfmv_ref_frame_idx + * @mfmv_count: mfmv_count + */ +struct vdec_av1_slice_mfmv { + u32 mfmv_valid_ref[3]; + u32 mfmv_dir[3]; + int mfmv_ref_to_cur[3]; + int mfmv_ref_frame_idx[3]; + int mfmv_count; +}; + +/** + * struct vdec_av1_slice_tile - AV1 Tile info + * @tile_cols: specifies the number of tiles across the frame + * @tile_rows: pecifies the number of tiles down the frame + * @mi_col_starts: an array specifying the start column + * @mi_row_starts: an array specifying the start row + * @context_update_tile_id: specifies which tile to use for the CDF update + * @uniform_tile_spacing_flag: tiles are uniformly spaced across the frame + * or the tile sizes are coded + */ +struct vdec_av1_slice_tile { + u8 tile_cols; + u8 tile_rows; + int mi_col_starts[V4L2_AV1_MAX_TILE_COLS + 1]; + int mi_row_starts[V4L2_AV1_MAX_TILE_ROWS + 1]; + u8 context_update_tile_id; + u8 uniform_tile_spacing_flag; +}; + +/** + * struct vdec_av1_slice_uncompressed_header - Represents an AV1 Frame Header OBU + * @use_ref_frame_mvs: use_ref_frame_mvs flag + * @order_hint: specifies OrderHintBits least significant bits of the expected + * @gm: global motion param + * @upscaled_width: the upscaled width + * @frame_width: frame's width + * @frame_height: frame's height + * @reduced_tx_set: frame is restricted to a reduced subset of the full + * set of transform types + * @tx_mode: specifies how the transform size is determined + * @uniform_tile_spacing_flag: tiles are uniformly spaced across the frame + * or the tile sizes are coded + * @interpolation_filter: specifies the filter selection used for performing inter prediction + * @allow_warped_motion: motion_mode may be present or not + * @is_motion_mode_switchable : euqlt to 0 specifies that only the SIMPLE motion mode will be used + * @reference_mode : frame reference mode selected + * @allow_high_precision_mv: specifies that motion vectors are specified to + * quarter pel precision or to eighth pel precision + * @allow_intra_bc: ubducates that intra block copy may be used in this frame + * @force_integer_mv: specifies motion vectors will always be integers or + * can contain fractional bits + * @allow_screen_content_tools: intra blocks may use palette encoding + * @error_resilient_mode: error resislent mode is enable/disable + * @frame_type: specifies the AV1 frame type + * @primary_ref_frame: specifies which reference frame contains the CDF values + * and other state that should be loaded at the start of the frame + * slots will be updated with the current frame after it is decoded + * @disable_frame_end_update_cdf:indicates the end of frame CDF update is disable or enable + * @disable_cdf_update: specified whether the CDF update in the symbol + * decoding process should be disables + * @skip_mode: av1 skip mode parameters + * @seg: av1 segmentaon parameters + * @delta_q_lf: av1 delta loop fileter + * @quant: av1 Quantization params + * @lr: av1 Loop Restauration parameters + * @superres_denom: the denominator for the upscaling ratio + * @loop_filter: av1 Loop filter parameters + * @cdef: av1 CDEF parameters + * @mfmv: av1 mfmv parameters + * @tile: av1 Tile info + * @frame_is_intra: intra frame + * @loss_less_array: loss less array + * @coded_loss_less: coded lsss less + * @mi_rows: size of mi unit in rows + * @mi_cols: size of mi unit in cols + */ +struct vdec_av1_slice_uncompressed_header { + u8 use_ref_frame_mvs; + int order_hint; + struct vdec_av1_slice_gm gm[V4L2_AV1_TOTAL_REFS_PER_FRAME]; + u32 upscaled_width; + u32 frame_width; + u32 frame_height; + u8 reduced_tx_set; + u8 tx_mode; + u8 uniform_tile_spacing_flag; + u8 interpolation_filter; + u8 allow_warped_motion; + u8 is_motion_mode_switchable; + u8 reference_mode; + u8 allow_high_precision_mv; + u8 allow_intra_bc; + u8 force_integer_mv; + u8 allow_screen_content_tools; + u8 error_resilient_mode; + u8 frame_type; + u8 primary_ref_frame; + u8 disable_frame_end_update_cdf; + u32 disable_cdf_update; + struct vdec_av1_slice_sm skip_mode; + struct vdec_av1_slice_seg seg; + struct vdec_av1_slice_delta_q_lf delta_q_lf; + struct vdec_av1_slice_quantization quant; + struct vdec_av1_slice_lr lr; + u32 superres_denom; + struct vdec_av1_slice_loop_filter loop_filter; + struct vdec_av1_slice_cdef cdef; + struct vdec_av1_slice_mfmv mfmv; + struct vdec_av1_slice_tile tile; + u8 frame_is_intra; + u8 loss_less_array[V4L2_AV1_MAX_SEGMENTS]; + u8 coded_loss_less; + u32 mi_rows; + u32 mi_cols; +}; + +/** + * struct vdec_av1_slice_seq_header - Represents an AV1 Sequence OBU + * @bitdepth: the bitdepth to use for the sequence + * @enable_superres: specifies whether the use_superres syntax element may be present + * @enable_filter_intra: specifies the use_filter_intra syntax element may be present + * @enable_intra_edge_filter: whether the intra edge filtering process should be enabled + * @enable_interintra_compound: specifies the mode info fo rinter blocks may + * contain the syntax element interintra + * @enable_masked_compound: specifies the mode info fo rinter blocks may + * contain the syntax element compound_type + * @enable_dual_filter: the inter prediction filter type may be specified independently + * @enable_jnt_comp: distance weights process may be used for inter prediction + * @mono_chrome: indicates the video does not contain U and V color planes + * @enable_order_hint: tools based on the values of order hints may be used + * @order_hint_bits: the number of bits used for the order_hint field at each frame + * @use_128x128_superblock: indicates superblocks contain 128*128 luma samples + * @subsampling_x: the chroma subsamling format + * @subsampling_y: the chroma subsamling format + * @max_frame_width: the maximum frame width for the frames represented by sequence + * @max_frame_height: the maximum frame height for the frames represented by sequence + */ +struct vdec_av1_slice_seq_header { + u8 bitdepth; + u8 enable_superres; + u8 enable_filter_intra; + u8 enable_intra_edge_filter; + u8 enable_interintra_compound; + u8 enable_masked_compound; + u8 enable_dual_filter; + u8 enable_jnt_comp; + u8 mono_chrome; + u8 enable_order_hint; + u8 order_hint_bits; + u8 use_128x128_superblock; + u8 subsampling_x; + u8 subsampling_y; + u32 max_frame_width; + u32 max_frame_height; +}; + +/** + * struct vdec_av1_slice_frame - Represents current Frame info + * @uh: uncompressed header info + * @seq: sequence header info + * @large_scale_tile: is large scale mode + * @cur_ts: current frame timestamp + * @prev_fb_idx: prev slot id + * @ref_frame_sign_bias: arrays for ref_frame sign bias + * @order_hints: arrays for ref_frame order hint + * @ref_frame_valid: arrays for valid ref_frame + * @ref_frame_map: map to slot frame info + * @frame_refs: ref_frame info + */ +struct vdec_av1_slice_frame { + struct vdec_av1_slice_uncompressed_header uh; + struct vdec_av1_slice_seq_header seq; + u8 large_scale_tile; + u64 cur_ts; + int prev_fb_idx; + u8 ref_frame_sign_bias[V4L2_AV1_TOTAL_REFS_PER_FRAME]; + u32 order_hints[V4L2_AV1_REFS_PER_FRAME]; + u32 ref_frame_valid[V4L2_AV1_REFS_PER_FRAME]; + int ref_frame_map[V4L2_AV1_TOTAL_REFS_PER_FRAME]; + struct vdec_av1_slice_frame_refs frame_refs[V4L2_AV1_REFS_PER_FRAME]; +}; + +/** + * struct vdec_av1_slice_work_buffer - work buffer for lat + * @mv_addr: mv buffer memory info + * @cdf_addr: cdf buffer memory info + * @segid_addr: segid buffer memory info + */ +struct vdec_av1_slice_work_buffer { + struct vdec_av1_slice_mem mv_addr; + struct vdec_av1_slice_mem cdf_addr; + struct vdec_av1_slice_mem segid_addr; +}; + +/** + * struct vdec_av1_slice_frame_info - frame info for each slot + * @frame_type: frame type + * @frame_is_intra: is intra frame + * @order_hint: order hint + * @order_hints: referece frame order hint + * @upscaled_width: upscale width + * @pic_pitch: buffer pitch + * @frame_width: frane width + * @frame_height: frame height + * @mi_rows: rows in mode info + * @mi_cols: cols in mode info + * @ref_count: mark to reference frame counts + */ +struct vdec_av1_slice_frame_info { + u8 frame_type; + u8 frame_is_intra; + int order_hint; + u32 order_hints[V4L2_AV1_REFS_PER_FRAME]; + u32 upscaled_width; + u32 pic_pitch; + u32 frame_width; + u32 frame_height; + u32 mi_rows; + u32 mi_cols; + int ref_count; +}; + +/** + * struct vdec_av1_slice_slot - slot info that needs to be saved in the global instance + * @frame_info: frame info for each slot + * @timestamp: time stamp info + */ +struct vdec_av1_slice_slot { + struct vdec_av1_slice_frame_info frame_info[AV1_MAX_FRAME_BUF_COUNT]; + u64 timestamp[AV1_MAX_FRAME_BUF_COUNT]; +}; + +/** + * struct vdec_av1_slice_fb - frame buffer for decoding + * @y: current y buffer address info + * @c: current c buffer address info + */ +struct vdec_av1_slice_fb { + struct vdec_av1_slice_mem y; + struct vdec_av1_slice_mem c; +}; + +/** + * struct vdec_av1_slice_vsi - exchange frame information between Main CPU and MicroP + * @bs: input buffer info + * @work_buffer: working buffe for hw + * @cdf_table: cdf_table buffer + * @cdf_tmp: cdf temp buffer + * @rd_mv: mv buffer for lat output , core input + * @ube: ube buffer + * @trans: transcoded buffer + * @err_map: err map buffer + * @row_info: row info buffer + * @fb: current y/c buffer + * @ref: ref y/c buffer + * @iq_table: iq table buffer + * @tile: tile buffer + * @slots: slots info for each frame + * @slot_id: current frame slot id + * @frame: current frame info + * @state: status after decode done + * @cur_lst_tile_id: tile id for large scale + */ +struct vdec_av1_slice_vsi { + /* lat */ + struct vdec_av1_slice_mem bs; + struct vdec_av1_slice_work_buffer work_buffer[AV1_MAX_FRAME_BUF_COUNT]; + struct vdec_av1_slice_mem cdf_table; + struct vdec_av1_slice_mem cdf_tmp; + /* LAT stage's output, Core stage's input */ + struct vdec_av1_slice_mem rd_mv; + struct vdec_av1_slice_mem ube; + struct vdec_av1_slice_mem trans; + struct vdec_av1_slice_mem err_map; + struct vdec_av1_slice_mem row_info; + /* core */ + struct vdec_av1_slice_fb fb; + struct vdec_av1_slice_fb ref[V4L2_AV1_REFS_PER_FRAME]; + struct vdec_av1_slice_mem iq_table; + /* lat and core share*/ + struct vdec_av1_slice_mem tile; + struct vdec_av1_slice_slot slots; + s8 slot_id; + struct vdec_av1_slice_frame frame; + struct vdec_av1_slice_state state; + u32 cur_lst_tile_id; +}; + +/** + * struct vdec_av1_slice_pfc - per-frame context that contains a local vsi. + * pass it from lat to core + * @vsi: local vsi. copy to/from remote vsi before/after decoding + * @ref_idx: reference buffer timestamp + * @seq: picture sequence + */ +struct vdec_av1_slice_pfc { + struct vdec_av1_slice_vsi vsi; + u64 ref_idx[V4L2_AV1_REFS_PER_FRAME]; + int seq; +}; + +/** + * struct vdec_av1_slice_instance - represent one av1 instance + * @ctx: pointer to codec's context + * @vpu: VPU instance + * @iq_table: iq table buffer + * @cdf_table: cdf table buffer + * @mv: mv working buffer + * @cdf: cdf working buffer + * @seg: segmentation working buffer + * @cdf_temp: cdf temp buffer + * @tile: tile buffer + * @slots: slots info + * @tile_group: tile_group entry + * @level: level of current resolution + * @width: width of last picture + * @height: height of last picture + * @frame_type: frame_type of last picture + * @irq_enabled: irq to Main CPU or MicroP + * @inneracing_mode: is inneracing mode + * @init_vsi: vsi used for initialized AV1 instance + * @vsi: vsi used for decoding/flush ... + * @core_vsi: vsi used for Core stage + * @seq: global picture sequence + */ +struct vdec_av1_slice_instance { + struct mtk_vcodec_dec_ctx *ctx; + struct vdec_vpu_inst vpu; + + struct mtk_vcodec_mem iq_table; + struct mtk_vcodec_mem cdf_table; + + struct mtk_vcodec_mem mv[AV1_MAX_FRAME_BUF_COUNT]; + struct mtk_vcodec_mem cdf[AV1_MAX_FRAME_BUF_COUNT]; + struct mtk_vcodec_mem seg[AV1_MAX_FRAME_BUF_COUNT]; + struct mtk_vcodec_mem cdf_temp; + struct mtk_vcodec_mem tile; + struct vdec_av1_slice_slot slots; + struct vdec_av1_slice_tile_group tile_group; + + /* for resolution change and get_pic_info */ + enum vdec_av1_slice_resolution_level level; + u32 width; + u32 height; + + u32 frame_type; + u32 irq_enabled; + u32 inneracing_mode; + + /* MicroP vsi */ + union { + struct vdec_av1_slice_init_vsi *init_vsi; + struct vdec_av1_slice_vsi *vsi; + }; + struct vdec_av1_slice_vsi *core_vsi; + int seq; +}; + +static int vdec_av1_slice_core_decode(struct vdec_lat_buf *lat_buf); + +static inline int vdec_av1_slice_get_msb(u32 n) +{ + if (n == 0) + return 0; + return 31 ^ __builtin_clz(n); +} + +static inline bool vdec_av1_slice_need_scale(u32 ref_width, u32 ref_height, + u32 this_width, u32 this_height) +{ + return ((this_width << 1) >= ref_width) && + ((this_height << 1) >= ref_height) && + (this_width <= (ref_width << 4)) && + (this_height <= (ref_height << 4)); +} + +static void *vdec_av1_get_ctrl_ptr(struct mtk_vcodec_dec_ctx *ctx, int id) +{ + struct v4l2_ctrl *ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, id); + + if (!ctrl) + return ERR_PTR(-EINVAL); + + return ctrl->p_cur.p; +} + +static int vdec_av1_slice_init_cdf_table(struct vdec_av1_slice_instance *instance) +{ + u8 *remote_cdf_table; + struct mtk_vcodec_dec_ctx *ctx; + struct vdec_av1_slice_init_vsi *vsi; + int ret; + + ctx = instance->ctx; + vsi = instance->vpu.vsi; + remote_cdf_table = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler, + (u32)vsi->cdf_table_addr); + if (IS_ERR(remote_cdf_table)) { + mtk_vdec_err(ctx, "failed to map cdf table\n"); + return PTR_ERR(remote_cdf_table); + } + + mtk_vdec_debug(ctx, "map cdf table to 0x%p\n", remote_cdf_table); + + if (instance->cdf_table.va) + mtk_vcodec_mem_free(ctx, &instance->cdf_table); + instance->cdf_table.size = vsi->cdf_table_size; + + ret = mtk_vcodec_mem_alloc(ctx, &instance->cdf_table); + if (ret) + return ret; + + memcpy(instance->cdf_table.va, remote_cdf_table, vsi->cdf_table_size); + + return 0; +} + +static int vdec_av1_slice_init_iq_table(struct vdec_av1_slice_instance *instance) +{ + u8 *remote_iq_table; + struct mtk_vcodec_dec_ctx *ctx; + struct vdec_av1_slice_init_vsi *vsi; + int ret; + + ctx = instance->ctx; + vsi = instance->vpu.vsi; + remote_iq_table = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler, + (u32)vsi->iq_table_addr); + if (IS_ERR(remote_iq_table)) { + mtk_vdec_err(ctx, "failed to map iq table\n"); + return PTR_ERR(remote_iq_table); + } + + mtk_vdec_debug(ctx, "map iq table to 0x%p\n", remote_iq_table); + + if (instance->iq_table.va) + mtk_vcodec_mem_free(ctx, &instance->iq_table); + instance->iq_table.size = vsi->iq_table_size; + + ret = mtk_vcodec_mem_alloc(ctx, &instance->iq_table); + if (ret) + return ret; + + memcpy(instance->iq_table.va, remote_iq_table, vsi->iq_table_size); + + return 0; +} + +static int vdec_av1_slice_get_new_slot(struct vdec_av1_slice_vsi *vsi) +{ + struct vdec_av1_slice_slot *slots = &vsi->slots; + int new_slot_idx = AV1_INVALID_IDX; + int i; + + for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) { + if (slots->frame_info[i].ref_count == 0) { + new_slot_idx = i; + break; + } + } + + if (new_slot_idx != AV1_INVALID_IDX) { + slots->frame_info[new_slot_idx].ref_count++; + slots->timestamp[new_slot_idx] = vsi->frame.cur_ts; + } + + return new_slot_idx; +} + +static inline void vdec_av1_slice_clear_fb(struct vdec_av1_slice_frame_info *frame_info) +{ + memset((void *)frame_info, 0, sizeof(struct vdec_av1_slice_frame_info)); +} + +static void vdec_av1_slice_decrease_ref_count(struct vdec_av1_slice_slot *slots, int fb_idx) +{ + struct vdec_av1_slice_frame_info *frame_info = slots->frame_info; + + frame_info[fb_idx].ref_count--; + if (frame_info[fb_idx].ref_count < 0) { + frame_info[fb_idx].ref_count = 0; + pr_err(MTK_DBG_V4L2_STR "av1_error: %s() fb_idx %d decrease ref_count error\n", + __func__, fb_idx); + } + + vdec_av1_slice_clear_fb(&frame_info[fb_idx]); +} + +static void vdec_av1_slice_cleanup_slots(struct vdec_av1_slice_slot *slots, + struct vdec_av1_slice_frame *frame, + struct v4l2_ctrl_av1_frame *ctrl_fh) +{ + int slot_id, ref_id; + + for (ref_id = 0; ref_id < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref_id++) + frame->ref_frame_map[ref_id] = AV1_INVALID_IDX; + + for (slot_id = 0; slot_id < AV1_MAX_FRAME_BUF_COUNT; slot_id++) { + u64 timestamp = slots->timestamp[slot_id]; + bool ref_used = false; + + /* ignored unused slots */ + if (slots->frame_info[slot_id].ref_count == 0) + continue; + + for (ref_id = 0; ref_id < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref_id++) { + if (ctrl_fh->reference_frame_ts[ref_id] == timestamp) { + frame->ref_frame_map[ref_id] = slot_id; + ref_used = true; + } + } + + if (!ref_used) + vdec_av1_slice_decrease_ref_count(slots, slot_id); + } +} + +static void vdec_av1_slice_setup_slot(struct vdec_av1_slice_instance *instance, + struct vdec_av1_slice_vsi *vsi, + struct v4l2_ctrl_av1_frame *ctrl_fh) +{ + struct vdec_av1_slice_frame_info *cur_frame_info; + struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh; + int ref_id; + + memcpy(&vsi->slots, &instance->slots, sizeof(instance->slots)); + vdec_av1_slice_cleanup_slots(&vsi->slots, &vsi->frame, ctrl_fh); + vsi->slot_id = vdec_av1_slice_get_new_slot(vsi); + + if (vsi->slot_id == AV1_INVALID_IDX) { + mtk_v4l2_vdec_err(instance->ctx, "warning:av1 get invalid index slot\n"); + vsi->slot_id = 0; + } + cur_frame_info = &vsi->slots.frame_info[vsi->slot_id]; + cur_frame_info->frame_type = uh->frame_type; + cur_frame_info->frame_is_intra = ((uh->frame_type == AV1_INTRA_ONLY_FRAME) || + (uh->frame_type == AV1_KEY_FRAME)); + cur_frame_info->order_hint = uh->order_hint; + cur_frame_info->upscaled_width = uh->upscaled_width; + cur_frame_info->pic_pitch = 0; + cur_frame_info->frame_width = uh->frame_width; + cur_frame_info->frame_height = uh->frame_height; + cur_frame_info->mi_cols = ((uh->frame_width + 7) >> 3) << 1; + cur_frame_info->mi_rows = ((uh->frame_height + 7) >> 3) << 1; + + /* ensure current frame is properly mapped if referenced */ + for (ref_id = 0; ref_id < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref_id++) { + u64 timestamp = vsi->slots.timestamp[vsi->slot_id]; + + if (ctrl_fh->reference_frame_ts[ref_id] == timestamp) + vsi->frame.ref_frame_map[ref_id] = vsi->slot_id; + } +} + +static int vdec_av1_slice_alloc_working_buffer(struct vdec_av1_slice_instance *instance, + struct vdec_av1_slice_vsi *vsi) +{ + struct mtk_vcodec_dec_ctx *ctx = instance->ctx; + enum vdec_av1_slice_resolution_level level; + u32 max_sb_w, max_sb_h, max_w, max_h, w, h; + int i, ret; + + w = vsi->frame.uh.frame_width; + h = vsi->frame.uh.frame_height; + + if (w > VCODEC_DEC_4K_CODED_WIDTH || h > VCODEC_DEC_4K_CODED_HEIGHT) + /* 8K */ + return -EINVAL; + + if (w > MTK_VDEC_MAX_W || h > MTK_VDEC_MAX_H) { + /* 4K */ + level = AV1_RES_4K; + max_w = VCODEC_DEC_4K_CODED_WIDTH; + max_h = VCODEC_DEC_4K_CODED_HEIGHT; + } else { + /* FHD */ + level = AV1_RES_FHD; + max_w = MTK_VDEC_MAX_W; + max_h = MTK_VDEC_MAX_H; + } + + if (level == instance->level) + return 0; + + mtk_vdec_debug(ctx, "resolution level changed from %u to %u, %ux%u", + instance->level, level, w, h); + + max_sb_w = DIV_ROUND_UP(max_w, 128); + max_sb_h = DIV_ROUND_UP(max_h, 128); + + for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) { + if (instance->mv[i].va) + mtk_vcodec_mem_free(ctx, &instance->mv[i]); + instance->mv[i].size = max_sb_w * max_sb_h * SZ_1K; + ret = mtk_vcodec_mem_alloc(ctx, &instance->mv[i]); + if (ret) + goto err; + + if (instance->seg[i].va) + mtk_vcodec_mem_free(ctx, &instance->seg[i]); + instance->seg[i].size = max_sb_w * max_sb_h * 512; + ret = mtk_vcodec_mem_alloc(ctx, &instance->seg[i]); + if (ret) + goto err; + + if (instance->cdf[i].va) + mtk_vcodec_mem_free(ctx, &instance->cdf[i]); + instance->cdf[i].size = AV1_CDF_TABLE_BUFFER_SIZE; + ret = mtk_vcodec_mem_alloc(ctx, &instance->cdf[i]); + if (ret) + goto err; + } + + if (!instance->cdf_temp.va) { + instance->cdf_temp.size = (SZ_1K * 16 * 100); + ret = mtk_vcodec_mem_alloc(ctx, &instance->cdf_temp); + if (ret) + goto err; + vsi->cdf_tmp.buf = instance->cdf_temp.dma_addr; + vsi->cdf_tmp.size = instance->cdf_temp.size; + } + + if (instance->tile.va) + mtk_vcodec_mem_free(ctx, &instance->tile); + + instance->tile.size = AV1_TILE_BUF_SIZE * V4L2_AV1_MAX_TILE_COUNT; + ret = mtk_vcodec_mem_alloc(ctx, &instance->tile); + if (ret) + goto err; + + instance->level = level; + return 0; + +err: + instance->level = AV1_RES_NONE; + return ret; +} + +static void vdec_av1_slice_free_working_buffer(struct vdec_av1_slice_instance *instance) +{ + struct mtk_vcodec_dec_ctx *ctx = instance->ctx; + int i; + + for (i = 0; i < ARRAY_SIZE(instance->mv); i++) + mtk_vcodec_mem_free(ctx, &instance->mv[i]); + + for (i = 0; i < ARRAY_SIZE(instance->seg); i++) + mtk_vcodec_mem_free(ctx, &instance->seg[i]); + + for (i = 0; i < ARRAY_SIZE(instance->cdf); i++) + mtk_vcodec_mem_free(ctx, &instance->cdf[i]); + + mtk_vcodec_mem_free(ctx, &instance->tile); + mtk_vcodec_mem_free(ctx, &instance->cdf_temp); + mtk_vcodec_mem_free(ctx, &instance->cdf_table); + mtk_vcodec_mem_free(ctx, &instance->iq_table); + + instance->level = AV1_RES_NONE; +} + +static inline void vdec_av1_slice_vsi_from_remote(struct vdec_av1_slice_vsi *vsi, + struct vdec_av1_slice_vsi *remote_vsi) +{ + memcpy(&vsi->trans, &remote_vsi->trans, sizeof(vsi->trans)); + memcpy(&vsi->state, &remote_vsi->state, sizeof(vsi->state)); +} + +static inline void vdec_av1_slice_vsi_to_remote(struct vdec_av1_slice_vsi *vsi, + struct vdec_av1_slice_vsi *remote_vsi) +{ + memcpy(remote_vsi, vsi, sizeof(*vsi)); +} + +static int vdec_av1_slice_setup_lat_from_src_buf(struct vdec_av1_slice_instance *instance, + struct vdec_av1_slice_vsi *vsi, + struct vdec_lat_buf *lat_buf) +{ + struct vb2_v4l2_buffer *src; + struct vb2_v4l2_buffer *dst; + + src = v4l2_m2m_next_src_buf(instance->ctx->m2m_ctx); + if (!src) + return -EINVAL; + + lat_buf->src_buf_req = src->vb2_buf.req_obj.req; + dst = &lat_buf->ts_info; + v4l2_m2m_buf_copy_metadata(src, dst, true); + vsi->frame.cur_ts = dst->vb2_buf.timestamp; + + return 0; +} + +static short vdec_av1_slice_resolve_divisor_32(u32 D, short *shift) +{ + int f; + int e; + + *shift = vdec_av1_slice_get_msb(D); + /* e is obtained from D after resetting the most significant 1 bit. */ + e = D - ((u32)1 << *shift); + /* Get the most significant DIV_LUT_BITS (8) bits of e into f */ + if (*shift > DIV_LUT_BITS) + f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS); + else + f = e << (DIV_LUT_BITS - *shift); + if (f > DIV_LUT_NUM) + return -1; + *shift += DIV_LUT_PREC_BITS; + /* Use f as lookup into the precomputed table of multipliers */ + return div_lut[f]; +} + +static void vdec_av1_slice_get_shear_params(struct vdec_av1_slice_gm *gm_params) +{ + const int *mat = gm_params->wmmat; + short shift; + short y; + long long gv, dv; + + if (gm_params->wmmat[2] <= 0) + return; + + gm_params->alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX); + gm_params->beta = clamp_val(mat[3], S16_MIN, S16_MAX); + + y = vdec_av1_slice_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1); + + gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y; + gm_params->gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift), + S16_MIN, S16_MAX); + + dv = ((long long)mat[3] * mat[4]) * y; + gm_params->delta = clamp_val(mat[5] - (int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) - + (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX); + + gm_params->alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->alpha, WARP_PARAM_REDUCE_BITS) * + (1 << WARP_PARAM_REDUCE_BITS); + gm_params->beta = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->beta, WARP_PARAM_REDUCE_BITS) * + (1 << WARP_PARAM_REDUCE_BITS); + gm_params->gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->gamma, WARP_PARAM_REDUCE_BITS) * + (1 << WARP_PARAM_REDUCE_BITS); + gm_params->delta = AV1_DIV_ROUND_UP_POW2_SIGNED(gm_params->delta, WARP_PARAM_REDUCE_BITS) * + (1 << WARP_PARAM_REDUCE_BITS); +} + +static void vdec_av1_slice_setup_gm(struct vdec_av1_slice_gm *gm, + struct v4l2_av1_global_motion *ctrl_gm) +{ + u32 i, j; + + for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++) { + gm[i].wmtype = ctrl_gm->type[i]; + for (j = 0; j < 6; j++) + gm[i].wmmat[j] = ctrl_gm->params[i][j]; + + gm[i].invalid = !!(ctrl_gm->invalid & BIT(i)); + gm[i].alpha = 0; + gm[i].beta = 0; + gm[i].gamma = 0; + gm[i].delta = 0; + if (gm[i].wmtype <= V4L2_AV1_WARP_MODEL_AFFINE) + vdec_av1_slice_get_shear_params(&gm[i]); + } +} + +static void vdec_av1_slice_setup_seg(struct vdec_av1_slice_seg *seg, + struct v4l2_av1_segmentation *ctrl_seg) +{ + u32 i, j; + + seg->segmentation_enabled = SEGMENTATION_FLAG(ctrl_seg, ENABLED); + seg->segmentation_update_map = SEGMENTATION_FLAG(ctrl_seg, UPDATE_MAP); + seg->segmentation_temporal_update = SEGMENTATION_FLAG(ctrl_seg, TEMPORAL_UPDATE); + seg->segmentation_update_data = SEGMENTATION_FLAG(ctrl_seg, UPDATE_DATA); + seg->segid_preskip = SEGMENTATION_FLAG(ctrl_seg, SEG_ID_PRE_SKIP); + seg->last_active_segid = ctrl_seg->last_active_seg_id; + + for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) { + seg->feature_enabled_mask[i] = ctrl_seg->feature_enabled[i]; + for (j = 0; j < V4L2_AV1_SEG_LVL_MAX; j++) + seg->feature_data[i][j] = ctrl_seg->feature_data[i][j]; + } +} + +static void vdec_av1_slice_setup_quant(struct vdec_av1_slice_quantization *quant, + struct v4l2_av1_quantization *ctrl_quant) +{ + quant->base_q_idx = ctrl_quant->base_q_idx; + quant->delta_qydc = ctrl_quant->delta_q_y_dc; + quant->delta_qudc = ctrl_quant->delta_q_u_dc; + quant->delta_quac = ctrl_quant->delta_q_u_ac; + quant->delta_qvdc = ctrl_quant->delta_q_v_dc; + quant->delta_qvac = ctrl_quant->delta_q_v_ac; + quant->qm_y = ctrl_quant->qm_y; + quant->qm_u = ctrl_quant->qm_u; + quant->qm_v = ctrl_quant->qm_v; + quant->using_qmatrix = QUANT_FLAG(ctrl_quant, USING_QMATRIX); +} + +static int vdec_av1_slice_get_qindex(struct vdec_av1_slice_uncompressed_header *uh, + int segmentation_id) +{ + struct vdec_av1_slice_seg *seg = &uh->seg; + struct vdec_av1_slice_quantization *quant = &uh->quant; + int data = 0, qindex = 0; + + if (seg->segmentation_enabled && + (seg->feature_enabled_mask[segmentation_id] & BIT(SEG_LVL_ALT_Q))) { + data = seg->feature_data[segmentation_id][SEG_LVL_ALT_Q]; + qindex = quant->base_q_idx + data; + return clamp_val(qindex, 0, MAXQ); + } + + return quant->base_q_idx; +} + +static void vdec_av1_slice_setup_lr(struct vdec_av1_slice_lr *lr, + struct v4l2_av1_loop_restoration *ctrl_lr) +{ + int i; + + lr->use_lr = 0; + lr->use_chroma_lr = 0; + for (i = 0; i < V4L2_AV1_NUM_PLANES_MAX; i++) { + lr->frame_restoration_type[i] = ctrl_lr->frame_restoration_type[i]; + lr->loop_restoration_size[i] = ctrl_lr->loop_restoration_size[i]; + if (lr->frame_restoration_type[i]) { + lr->use_lr = 1; + if (i > 0) + lr->use_chroma_lr = 1; + } + } +} + +static void vdec_av1_slice_setup_lf(struct vdec_av1_slice_loop_filter *lf, + struct v4l2_av1_loop_filter *ctrl_lf) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(lf->loop_filter_level); i++) + lf->loop_filter_level[i] = ctrl_lf->level[i]; + + for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++) + lf->loop_filter_ref_deltas[i] = ctrl_lf->ref_deltas[i]; + + for (i = 0; i < ARRAY_SIZE(lf->loop_filter_mode_deltas); i++) + lf->loop_filter_mode_deltas[i] = ctrl_lf->mode_deltas[i]; + + lf->loop_filter_sharpness = ctrl_lf->sharpness; + lf->loop_filter_delta_enabled = + BIT_FLAG(ctrl_lf, V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED); +} + +static void vdec_av1_slice_setup_cdef(struct vdec_av1_slice_cdef *cdef, + struct v4l2_av1_cdef *ctrl_cdef) +{ + int i; + + cdef->cdef_damping = ctrl_cdef->damping_minus_3 + 3; + cdef->cdef_bits = ctrl_cdef->bits; + + for (i = 0; i < V4L2_AV1_CDEF_MAX; i++) { + if (ctrl_cdef->y_sec_strength[i] == 4) + ctrl_cdef->y_sec_strength[i] -= 1; + + if (ctrl_cdef->uv_sec_strength[i] == 4) + ctrl_cdef->uv_sec_strength[i] -= 1; + + cdef->cdef_y_strength[i] = + ctrl_cdef->y_pri_strength[i] << SECONDARY_FILTER_STRENGTH_NUM_BITS | + ctrl_cdef->y_sec_strength[i]; + cdef->cdef_uv_strength[i] = + ctrl_cdef->uv_pri_strength[i] << SECONDARY_FILTER_STRENGTH_NUM_BITS | + ctrl_cdef->uv_sec_strength[i]; + } +} + +static void vdec_av1_slice_setup_seq(struct vdec_av1_slice_seq_header *seq, + struct v4l2_ctrl_av1_sequence *ctrl_seq) +{ + seq->bitdepth = ctrl_seq->bit_depth; + seq->max_frame_width = ctrl_seq->max_frame_width_minus_1 + 1; + seq->max_frame_height = ctrl_seq->max_frame_height_minus_1 + 1; + seq->enable_superres = SEQUENCE_FLAG(ctrl_seq, ENABLE_SUPERRES); + seq->enable_filter_intra = SEQUENCE_FLAG(ctrl_seq, ENABLE_FILTER_INTRA); + seq->enable_intra_edge_filter = SEQUENCE_FLAG(ctrl_seq, ENABLE_INTRA_EDGE_FILTER); + seq->enable_interintra_compound = SEQUENCE_FLAG(ctrl_seq, ENABLE_INTERINTRA_COMPOUND); + seq->enable_masked_compound = SEQUENCE_FLAG(ctrl_seq, ENABLE_MASKED_COMPOUND); + seq->enable_dual_filter = SEQUENCE_FLAG(ctrl_seq, ENABLE_DUAL_FILTER); + seq->enable_jnt_comp = SEQUENCE_FLAG(ctrl_seq, ENABLE_JNT_COMP); + seq->mono_chrome = SEQUENCE_FLAG(ctrl_seq, MONO_CHROME); + seq->enable_order_hint = SEQUENCE_FLAG(ctrl_seq, ENABLE_ORDER_HINT); + seq->order_hint_bits = ctrl_seq->order_hint_bits; + seq->use_128x128_superblock = SEQUENCE_FLAG(ctrl_seq, USE_128X128_SUPERBLOCK); + seq->subsampling_x = SEQUENCE_FLAG(ctrl_seq, SUBSAMPLING_X); + seq->subsampling_y = SEQUENCE_FLAG(ctrl_seq, SUBSAMPLING_Y); +} + +static void vdec_av1_slice_setup_tile(struct vdec_av1_slice_frame *frame, + struct v4l2_av1_tile_info *ctrl_tile) +{ + struct vdec_av1_slice_seq_header *seq = &frame->seq; + struct vdec_av1_slice_tile *tile = &frame->uh.tile; + u32 mib_size_log2 = seq->use_128x128_superblock ? 5 : 4; + int i; + + tile->tile_cols = ctrl_tile->tile_cols; + tile->tile_rows = ctrl_tile->tile_rows; + tile->context_update_tile_id = ctrl_tile->context_update_tile_id; + tile->uniform_tile_spacing_flag = + BIT_FLAG(ctrl_tile, V4L2_AV1_TILE_INFO_FLAG_UNIFORM_TILE_SPACING); + + for (i = 0; i < tile->tile_cols + 1; i++) + tile->mi_col_starts[i] = + ALIGN(ctrl_tile->mi_col_starts[i], BIT(mib_size_log2)) >> mib_size_log2; + + for (i = 0; i < tile->tile_rows + 1; i++) + tile->mi_row_starts[i] = + ALIGN(ctrl_tile->mi_row_starts[i], BIT(mib_size_log2)) >> mib_size_log2; +} + +static void vdec_av1_slice_setup_uh(struct vdec_av1_slice_instance *instance, + struct vdec_av1_slice_frame *frame, + struct v4l2_ctrl_av1_frame *ctrl_fh) +{ + struct vdec_av1_slice_uncompressed_header *uh = &frame->uh; + int i; + + uh->use_ref_frame_mvs = FH_FLAG(ctrl_fh, USE_REF_FRAME_MVS); + uh->order_hint = ctrl_fh->order_hint; + vdec_av1_slice_setup_gm(uh->gm, &ctrl_fh->global_motion); + uh->upscaled_width = ctrl_fh->upscaled_width; + uh->frame_width = ctrl_fh->frame_width_minus_1 + 1; + uh->frame_height = ctrl_fh->frame_height_minus_1 + 1; + uh->mi_cols = ((uh->frame_width + 7) >> 3) << 1; + uh->mi_rows = ((uh->frame_height + 7) >> 3) << 1; + uh->reduced_tx_set = FH_FLAG(ctrl_fh, REDUCED_TX_SET); + uh->tx_mode = ctrl_fh->tx_mode; + uh->uniform_tile_spacing_flag = + BIT_FLAG(&ctrl_fh->tile_info, V4L2_AV1_TILE_INFO_FLAG_UNIFORM_TILE_SPACING); + uh->interpolation_filter = ctrl_fh->interpolation_filter; + uh->allow_warped_motion = FH_FLAG(ctrl_fh, ALLOW_WARPED_MOTION); + uh->is_motion_mode_switchable = FH_FLAG(ctrl_fh, IS_MOTION_MODE_SWITCHABLE); + uh->frame_type = ctrl_fh->frame_type; + uh->frame_is_intra = (uh->frame_type == V4L2_AV1_INTRA_ONLY_FRAME || + uh->frame_type == V4L2_AV1_KEY_FRAME); + + if (!uh->frame_is_intra && FH_FLAG(ctrl_fh, REFERENCE_SELECT)) + uh->reference_mode = AV1_REFERENCE_MODE_SELECT; + else + uh->reference_mode = AV1_SINGLE_REFERENCE; + + uh->allow_high_precision_mv = FH_FLAG(ctrl_fh, ALLOW_HIGH_PRECISION_MV); + uh->allow_intra_bc = FH_FLAG(ctrl_fh, ALLOW_INTRABC); + uh->force_integer_mv = FH_FLAG(ctrl_fh, FORCE_INTEGER_MV); + uh->allow_screen_content_tools = FH_FLAG(ctrl_fh, ALLOW_SCREEN_CONTENT_TOOLS); + uh->error_resilient_mode = FH_FLAG(ctrl_fh, ERROR_RESILIENT_MODE); + uh->primary_ref_frame = ctrl_fh->primary_ref_frame; + uh->disable_frame_end_update_cdf = + FH_FLAG(ctrl_fh, DISABLE_FRAME_END_UPDATE_CDF); + uh->disable_cdf_update = FH_FLAG(ctrl_fh, DISABLE_CDF_UPDATE); + uh->skip_mode.skip_mode_present = FH_FLAG(ctrl_fh, SKIP_MODE_PRESENT); + uh->skip_mode.skip_mode_frame[0] = + ctrl_fh->skip_mode_frame[0] - V4L2_AV1_REF_LAST_FRAME; + uh->skip_mode.skip_mode_frame[1] = + ctrl_fh->skip_mode_frame[1] - V4L2_AV1_REF_LAST_FRAME; + uh->skip_mode.skip_mode_allowed = ctrl_fh->skip_mode_frame[0] ? 1 : 0; + + vdec_av1_slice_setup_seg(&uh->seg, &ctrl_fh->segmentation); + uh->delta_q_lf.delta_q_present = QUANT_FLAG(&ctrl_fh->quantization, DELTA_Q_PRESENT); + uh->delta_q_lf.delta_q_res = 1 << ctrl_fh->quantization.delta_q_res; + uh->delta_q_lf.delta_lf_present = + BIT_FLAG(&ctrl_fh->loop_filter, V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT); + uh->delta_q_lf.delta_lf_res = ctrl_fh->loop_filter.delta_lf_res; + uh->delta_q_lf.delta_lf_multi = + BIT_FLAG(&ctrl_fh->loop_filter, V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI); + vdec_av1_slice_setup_quant(&uh->quant, &ctrl_fh->quantization); + + uh->coded_loss_less = 1; + for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) { + uh->quant.qindex[i] = vdec_av1_slice_get_qindex(uh, i); + uh->loss_less_array[i] = + (uh->quant.qindex[i] == 0 && uh->quant.delta_qydc == 0 && + uh->quant.delta_quac == 0 && uh->quant.delta_qudc == 0 && + uh->quant.delta_qvac == 0 && uh->quant.delta_qvdc == 0); + + if (!uh->loss_less_array[i]) + uh->coded_loss_less = 0; + } + + vdec_av1_slice_setup_lr(&uh->lr, &ctrl_fh->loop_restoration); + uh->superres_denom = ctrl_fh->superres_denom; + vdec_av1_slice_setup_lf(&uh->loop_filter, &ctrl_fh->loop_filter); + vdec_av1_slice_setup_cdef(&uh->cdef, &ctrl_fh->cdef); + vdec_av1_slice_setup_tile(frame, &ctrl_fh->tile_info); +} + +static int vdec_av1_slice_setup_tile_group(struct vdec_av1_slice_instance *instance, + struct vdec_av1_slice_vsi *vsi) +{ + struct v4l2_ctrl_av1_tile_group_entry *ctrl_tge; + struct vdec_av1_slice_tile_group *tile_group = &instance->tile_group; + struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh; + struct vdec_av1_slice_tile *tile = &uh->tile; + struct v4l2_ctrl *ctrl; + u32 tge_size; + int i; + + ctrl = v4l2_ctrl_find(&instance->ctx->ctrl_hdl, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY); + if (!ctrl) + return -EINVAL; + + tge_size = ctrl->elems; + ctrl_tge = (struct v4l2_ctrl_av1_tile_group_entry *)ctrl->p_cur.p; + + tile_group->num_tiles = tile->tile_cols * tile->tile_rows; + + if (tile_group->num_tiles != tge_size || + tile_group->num_tiles > V4L2_AV1_MAX_TILE_COUNT) { + mtk_vdec_err(instance->ctx, "invalid tge_size %d, tile_num:%d\n", + tge_size, tile_group->num_tiles); + return -EINVAL; + } + + for (i = 0; i < tge_size; i++) { + if (i != ctrl_tge[i].tile_row * vsi->frame.uh.tile.tile_cols + + ctrl_tge[i].tile_col) { + mtk_vdec_err(instance->ctx, "invalid tge info %d, %d %d %d\n", + i, ctrl_tge[i].tile_row, ctrl_tge[i].tile_col, + vsi->frame.uh.tile.tile_rows); + return -EINVAL; + } + tile_group->tile_size[i] = ctrl_tge[i].tile_size; + tile_group->tile_start_offset[i] = ctrl_tge[i].tile_offset; + } + + return 0; +} + +static inline void vdec_av1_slice_setup_state(struct vdec_av1_slice_vsi *vsi) +{ + memset(&vsi->state, 0, sizeof(vsi->state)); +} + +static void vdec_av1_slice_setup_scale_factors(struct vdec_av1_slice_frame_refs *frame_ref, + struct vdec_av1_slice_frame_info *ref_frame_info, + struct vdec_av1_slice_uncompressed_header *uh) +{ + struct vdec_av1_slice_scale_factors *scale_factors = &frame_ref->scale_factors; + u32 ref_upscaled_width = ref_frame_info->upscaled_width; + u32 ref_frame_height = ref_frame_info->frame_height; + u32 frame_width = uh->frame_width; + u32 frame_height = uh->frame_height; + + if (!vdec_av1_slice_need_scale(ref_upscaled_width, ref_frame_height, + frame_width, frame_height)) { + scale_factors->x_scale = -1; + scale_factors->y_scale = -1; + scale_factors->is_scaled = 0; + return; + } + + scale_factors->x_scale = + ((ref_upscaled_width << AV1_REF_SCALE_SHIFT) + (frame_width >> 1)) / frame_width; + scale_factors->y_scale = + ((ref_frame_height << AV1_REF_SCALE_SHIFT) + (frame_height >> 1)) / frame_height; + scale_factors->is_scaled = + (scale_factors->x_scale != AV1_REF_INVALID_SCALE) && + (scale_factors->y_scale != AV1_REF_INVALID_SCALE) && + (scale_factors->x_scale != AV1_REF_NO_SCALE || + scale_factors->y_scale != AV1_REF_NO_SCALE); + scale_factors->x_step = + AV1_DIV_ROUND_UP_POW2(scale_factors->x_scale, + AV1_REF_SCALE_SHIFT - AV1_SCALE_SUBPEL_BITS); + scale_factors->y_step = + AV1_DIV_ROUND_UP_POW2(scale_factors->y_scale, + AV1_REF_SCALE_SHIFT - AV1_SCALE_SUBPEL_BITS); +} + +static unsigned char vdec_av1_slice_get_sign_bias(int a, + int b, + u8 enable_order_hint, + u8 order_hint_bits) +{ + int diff = 0; + int m = 0; + unsigned char result = 0; + + if (!enable_order_hint) + return 0; + + diff = a - b; + m = 1 << (order_hint_bits - 1); + diff = (diff & (m - 1)) - (diff & m); + + if (diff > 0) + result = 1; + + return result; +} + +static void vdec_av1_slice_setup_ref(struct vdec_av1_slice_pfc *pfc, + struct v4l2_ctrl_av1_frame *ctrl_fh) +{ + struct vdec_av1_slice_vsi *vsi = &pfc->vsi; + struct vdec_av1_slice_frame *frame = &vsi->frame; + struct vdec_av1_slice_slot *slots = &vsi->slots; + struct vdec_av1_slice_uncompressed_header *uh = &frame->uh; + struct vdec_av1_slice_seq_header *seq = &frame->seq; + struct vdec_av1_slice_frame_info *cur_frame_info = + &slots->frame_info[vsi->slot_id]; + struct vdec_av1_slice_frame_info *frame_info; + int i, slot_id; + + if (uh->frame_is_intra) + return; + + for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) { + int ref_idx = ctrl_fh->ref_frame_idx[i]; + + pfc->ref_idx[i] = ctrl_fh->reference_frame_ts[ref_idx]; + slot_id = frame->ref_frame_map[ref_idx]; + frame_info = &slots->frame_info[slot_id]; + if (slot_id == AV1_INVALID_IDX) { + pr_err(MTK_DBG_V4L2_STR "cannot match reference[%d] 0x%llx\n", i, + ctrl_fh->reference_frame_ts[ref_idx]); + frame->order_hints[i] = 0; + frame->ref_frame_valid[i] = 0; + continue; + } + + frame->frame_refs[i].ref_fb_idx = slot_id; + vdec_av1_slice_setup_scale_factors(&frame->frame_refs[i], + frame_info, uh); + if (!seq->enable_order_hint) + frame->ref_frame_sign_bias[i + 1] = 0; + else + frame->ref_frame_sign_bias[i + 1] = + vdec_av1_slice_get_sign_bias(frame_info->order_hint, + uh->order_hint, + seq->enable_order_hint, + seq->order_hint_bits); + + frame->order_hints[i] = ctrl_fh->order_hints[i + 1]; + cur_frame_info->order_hints[i] = frame->order_hints[i]; + frame->ref_frame_valid[i] = 1; + } +} + +static void vdec_av1_slice_get_previous(struct vdec_av1_slice_vsi *vsi) +{ + struct vdec_av1_slice_frame *frame = &vsi->frame; + + if (frame->uh.primary_ref_frame == AV1_PRIMARY_REF_NONE) + frame->prev_fb_idx = AV1_INVALID_IDX; + else + frame->prev_fb_idx = frame->frame_refs[frame->uh.primary_ref_frame].ref_fb_idx; +} + +static inline void vdec_av1_slice_setup_operating_mode(struct vdec_av1_slice_instance *instance, + struct vdec_av1_slice_frame *frame) +{ + frame->large_scale_tile = 0; +} + +static int vdec_av1_slice_setup_pfc(struct vdec_av1_slice_instance *instance, + struct vdec_av1_slice_pfc *pfc) +{ + struct v4l2_ctrl_av1_frame *ctrl_fh; + struct v4l2_ctrl_av1_sequence *ctrl_seq; + struct vdec_av1_slice_vsi *vsi = &pfc->vsi; + int ret = 0; + + /* frame header */ + ctrl_fh = (struct v4l2_ctrl_av1_frame *) + vdec_av1_get_ctrl_ptr(instance->ctx, + V4L2_CID_STATELESS_AV1_FRAME); + if (IS_ERR(ctrl_fh)) + return PTR_ERR(ctrl_fh); + + ctrl_seq = (struct v4l2_ctrl_av1_sequence *) + vdec_av1_get_ctrl_ptr(instance->ctx, + V4L2_CID_STATELESS_AV1_SEQUENCE); + if (IS_ERR(ctrl_seq)) + return PTR_ERR(ctrl_seq); + + /* setup vsi information */ + vdec_av1_slice_setup_seq(&vsi->frame.seq, ctrl_seq); + vdec_av1_slice_setup_uh(instance, &vsi->frame, ctrl_fh); + vdec_av1_slice_setup_operating_mode(instance, &vsi->frame); + + vdec_av1_slice_setup_state(vsi); + vdec_av1_slice_setup_slot(instance, vsi, ctrl_fh); + vdec_av1_slice_setup_ref(pfc, ctrl_fh); + vdec_av1_slice_get_previous(vsi); + + pfc->seq = instance->seq; + instance->seq++; + + return ret; +} + +static void vdec_av1_slice_setup_lat_buffer(struct vdec_av1_slice_instance *instance, + struct vdec_av1_slice_vsi *vsi, + struct mtk_vcodec_mem *bs, + struct vdec_lat_buf *lat_buf) +{ + struct vdec_av1_slice_work_buffer *work_buffer; + int i; + + vsi->bs.dma_addr = bs->dma_addr; + vsi->bs.size = bs->size; + + vsi->ube.dma_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr; + vsi->ube.size = lat_buf->ctx->msg_queue.wdma_addr.size; + vsi->trans.dma_addr = lat_buf->ctx->msg_queue.wdma_wptr_addr; + /* used to store trans end */ + vsi->trans.dma_addr_end = lat_buf->ctx->msg_queue.wdma_rptr_addr; + vsi->err_map.dma_addr = lat_buf->wdma_err_addr.dma_addr; + vsi->err_map.size = lat_buf->wdma_err_addr.size; + vsi->rd_mv.dma_addr = lat_buf->rd_mv_addr.dma_addr; + vsi->rd_mv.size = lat_buf->rd_mv_addr.size; + + vsi->row_info.buf = 0; + vsi->row_info.size = 0; + + work_buffer = vsi->work_buffer; + + for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) { + work_buffer[i].mv_addr.buf = instance->mv[i].dma_addr; + work_buffer[i].mv_addr.size = instance->mv[i].size; + work_buffer[i].segid_addr.buf = instance->seg[i].dma_addr; + work_buffer[i].segid_addr.size = instance->seg[i].size; + work_buffer[i].cdf_addr.buf = instance->cdf[i].dma_addr; + work_buffer[i].cdf_addr.size = instance->cdf[i].size; + } + + vsi->cdf_tmp.buf = instance->cdf_temp.dma_addr; + vsi->cdf_tmp.size = instance->cdf_temp.size; + + vsi->tile.buf = instance->tile.dma_addr; + vsi->tile.size = instance->tile.size; + memcpy(lat_buf->tile_addr.va, instance->tile.va, 64 * instance->tile_group.num_tiles); + + vsi->cdf_table.buf = instance->cdf_table.dma_addr; + vsi->cdf_table.size = instance->cdf_table.size; + vsi->iq_table.buf = instance->iq_table.dma_addr; + vsi->iq_table.size = instance->iq_table.size; +} + +static void vdec_av1_slice_setup_seg_buffer(struct vdec_av1_slice_instance *instance, + struct vdec_av1_slice_vsi *vsi) +{ + struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh; + struct mtk_vcodec_mem *buf; + + /* reset segment buffer */ + if (uh->primary_ref_frame == AV1_PRIMARY_REF_NONE || !uh->seg.segmentation_enabled) { + mtk_vdec_debug(instance->ctx, "reset seg %d\n", vsi->slot_id); + if (vsi->slot_id != AV1_INVALID_IDX) { + buf = &instance->seg[vsi->slot_id]; + memset(buf->va, 0, buf->size); + } + } +} + +static void vdec_av1_slice_setup_tile_buffer(struct vdec_av1_slice_instance *instance, + struct vdec_av1_slice_vsi *vsi, + struct mtk_vcodec_mem *bs) +{ + struct vdec_av1_slice_tile_group *tile_group = &instance->tile_group; + struct vdec_av1_slice_uncompressed_header *uh = &vsi->frame.uh; + struct vdec_av1_slice_tile *tile = &uh->tile; + u32 tile_num, tile_row, tile_col; + u32 allow_update_cdf = 0; + u32 sb_boundary_x_m1 = 0, sb_boundary_y_m1 = 0; + int tile_info_base; + u64 tile_buf_pa; + u32 *tile_info_buf = instance->tile.va; + u64 pa = (u64)bs->dma_addr; + + if (uh->disable_cdf_update == 0) + allow_update_cdf = 1; + + for (tile_num = 0; tile_num < tile_group->num_tiles; tile_num++) { + /* each uint32 takes place of 4 bytes */ + tile_info_base = (AV1_TILE_BUF_SIZE * tile_num) >> 2; + tile_row = tile_num / tile->tile_cols; + tile_col = tile_num % tile->tile_cols; + tile_info_buf[tile_info_base + 0] = (tile_group->tile_size[tile_num] << 3); + tile_buf_pa = pa + tile_group->tile_start_offset[tile_num]; + + /* save av1 tile high 4bits(bit 32-35) address in lower 4 bits position + * and clear original for hw requirement. + */ + tile_info_buf[tile_info_base + 1] = (tile_buf_pa & 0xFFFFFFF0ull) | + ((tile_buf_pa & 0xF00000000ull) >> 32); + tile_info_buf[tile_info_base + 2] = (tile_buf_pa & 0xFull) << 3; + + sb_boundary_x_m1 = + (tile->mi_col_starts[tile_col + 1] - tile->mi_col_starts[tile_col] - 1) & + 0x3f; + sb_boundary_y_m1 = + (tile->mi_row_starts[tile_row + 1] - tile->mi_row_starts[tile_row] - 1) & + 0x1ff; + + tile_info_buf[tile_info_base + 3] = (sb_boundary_y_m1 << 7) | sb_boundary_x_m1; + tile_info_buf[tile_info_base + 4] = ((allow_update_cdf << 18) | (1 << 16)); + + if (tile_num == tile->context_update_tile_id && + uh->disable_frame_end_update_cdf == 0) + tile_info_buf[tile_info_base + 4] |= (1 << 17); + + mtk_vdec_debug(instance->ctx, "// tile buf %d pos(%dx%d) offset 0x%x\n", + tile_num, tile_row, tile_col, tile_info_base); + mtk_vdec_debug(instance->ctx, "// %08x %08x %08x %08x\n", + tile_info_buf[tile_info_base + 0], + tile_info_buf[tile_info_base + 1], + tile_info_buf[tile_info_base + 2], + tile_info_buf[tile_info_base + 3]); + mtk_vdec_debug(instance->ctx, "// %08x %08x %08x %08x\n", + tile_info_buf[tile_info_base + 4], + tile_info_buf[tile_info_base + 5], + tile_info_buf[tile_info_base + 6], + tile_info_buf[tile_info_base + 7]); + } +} + +static int vdec_av1_slice_setup_lat(struct vdec_av1_slice_instance *instance, + struct mtk_vcodec_mem *bs, + struct vdec_lat_buf *lat_buf, + struct vdec_av1_slice_pfc *pfc) +{ + struct vdec_av1_slice_vsi *vsi = &pfc->vsi; + int ret; + + ret = vdec_av1_slice_setup_lat_from_src_buf(instance, vsi, lat_buf); + if (ret) + return ret; + + ret = vdec_av1_slice_setup_pfc(instance, pfc); + if (ret) + return ret; + + ret = vdec_av1_slice_setup_tile_group(instance, vsi); + if (ret) + return ret; + + ret = vdec_av1_slice_alloc_working_buffer(instance, vsi); + if (ret) + return ret; + + vdec_av1_slice_setup_seg_buffer(instance, vsi); + vdec_av1_slice_setup_tile_buffer(instance, vsi, bs); + vdec_av1_slice_setup_lat_buffer(instance, vsi, bs, lat_buf); + + return 0; +} + +static int vdec_av1_slice_update_lat(struct vdec_av1_slice_instance *instance, + struct vdec_lat_buf *lat_buf, + struct vdec_av1_slice_pfc *pfc) +{ + struct vdec_av1_slice_vsi *vsi; + + vsi = &pfc->vsi; + mtk_vdec_debug(instance->ctx, "frame %u LAT CRC 0x%08x, output size is %d\n", + pfc->seq, vsi->state.crc[0], vsi->state.out_size); + + /* buffer full, need to re-decode */ + if (vsi->state.full) { + /* buffer not enough */ + if (vsi->trans.dma_addr_end - vsi->trans.dma_addr == vsi->ube.size) + return -ENOMEM; + return -EAGAIN; + } + + instance->width = vsi->frame.uh.upscaled_width; + instance->height = vsi->frame.uh.frame_height; + instance->frame_type = vsi->frame.uh.frame_type; + + return 0; +} + +static int vdec_av1_slice_setup_core_to_dst_buf(struct vdec_av1_slice_instance *instance, + struct vdec_lat_buf *lat_buf) +{ + struct vb2_v4l2_buffer *dst; + + dst = v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx); + if (!dst) + return -EINVAL; + + v4l2_m2m_buf_copy_metadata(&lat_buf->ts_info, dst, true); + + return 0; +} + +static int vdec_av1_slice_setup_core_buffer(struct vdec_av1_slice_instance *instance, + struct vdec_av1_slice_pfc *pfc, + struct vdec_av1_slice_vsi *vsi, + struct vdec_fb *fb, + struct vdec_lat_buf *lat_buf) +{ + struct vb2_buffer *vb; + struct vb2_queue *vq; + int w, h, plane, size; + int i; + + plane = instance->ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes; + w = vsi->frame.uh.upscaled_width; + h = vsi->frame.uh.frame_height; + size = ALIGN(w, VCODEC_DEC_ALIGNED_64) * ALIGN(h, VCODEC_DEC_ALIGNED_64); + + /* frame buffer */ + vsi->fb.y.dma_addr = fb->base_y.dma_addr; + if (plane == 1) + vsi->fb.c.dma_addr = fb->base_y.dma_addr + size; + else + vsi->fb.c.dma_addr = fb->base_c.dma_addr; + + /* reference buffers */ + vq = v4l2_m2m_get_vq(instance->ctx->m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE); + if (!vq) + return -EINVAL; + + /* get current output buffer */ + vb = &v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx)->vb2_buf; + if (!vb) + return -EINVAL; + + /* get buffer address from vb2buf */ + for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) { + struct vdec_av1_slice_fb *vref = &vsi->ref[i]; + + vb = vb2_find_buffer(vq, pfc->ref_idx[i]); + if (!vb) { + memset(vref, 0, sizeof(*vref)); + continue; + } + + vref->y.dma_addr = vb2_dma_contig_plane_dma_addr(vb, 0); + if (plane == 1) + vref->c.dma_addr = vref->y.dma_addr + size; + else + vref->c.dma_addr = vb2_dma_contig_plane_dma_addr(vb, 1); + } + vsi->tile.dma_addr = lat_buf->tile_addr.dma_addr; + vsi->tile.size = lat_buf->tile_addr.size; + + return 0; +} + +static int vdec_av1_slice_setup_core(struct vdec_av1_slice_instance *instance, + struct vdec_fb *fb, + struct vdec_lat_buf *lat_buf, + struct vdec_av1_slice_pfc *pfc) +{ + struct vdec_av1_slice_vsi *vsi = &pfc->vsi; + int ret; + + ret = vdec_av1_slice_setup_core_to_dst_buf(instance, lat_buf); + if (ret) + return ret; + + ret = vdec_av1_slice_setup_core_buffer(instance, pfc, vsi, fb, lat_buf); + if (ret) + return ret; + + return 0; +} + +static int vdec_av1_slice_update_core(struct vdec_av1_slice_instance *instance, + struct vdec_lat_buf *lat_buf, + struct vdec_av1_slice_pfc *pfc) +{ + struct vdec_av1_slice_vsi *vsi = instance->core_vsi; + + mtk_vdec_debug(instance->ctx, "frame %u Y_CRC %08x %08x %08x %08x\n", + pfc->seq, vsi->state.crc[0], vsi->state.crc[1], + vsi->state.crc[2], vsi->state.crc[3]); + mtk_vdec_debug(instance->ctx, "frame %u C_CRC %08x %08x %08x %08x\n", + pfc->seq, vsi->state.crc[8], vsi->state.crc[9], + vsi->state.crc[10], vsi->state.crc[11]); + + return 0; +} + +static int vdec_av1_slice_init(struct mtk_vcodec_dec_ctx *ctx) +{ + struct vdec_av1_slice_instance *instance; + struct vdec_av1_slice_init_vsi *vsi; + int ret; + + instance = kzalloc(sizeof(*instance), GFP_KERNEL); + if (!instance) + return -ENOMEM; + + instance->ctx = ctx; + instance->vpu.id = SCP_IPI_VDEC_LAT; + instance->vpu.core_id = SCP_IPI_VDEC_CORE; + instance->vpu.ctx = ctx; + instance->vpu.codec_type = ctx->current_codec; + + ret = vpu_dec_init(&instance->vpu); + if (ret) { + mtk_vdec_err(ctx, "failed to init vpu dec, ret %d\n", ret); + goto error_vpu_init; + } + + /* init vsi and global flags */ + vsi = instance->vpu.vsi; + if (!vsi) { + mtk_vdec_err(ctx, "failed to get AV1 vsi\n"); + ret = -EINVAL; + goto error_vsi; + } + instance->init_vsi = vsi; + instance->core_vsi = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler, (u32)vsi->core_vsi); + + if (!instance->core_vsi) { + mtk_vdec_err(ctx, "failed to get AV1 core vsi\n"); + ret = -EINVAL; + goto error_vsi; + } + + if (vsi->vsi_size != sizeof(struct vdec_av1_slice_vsi)) + mtk_vdec_err(ctx, "remote vsi size 0x%x mismatch! expected: 0x%zx\n", + vsi->vsi_size, sizeof(struct vdec_av1_slice_vsi)); + + instance->irq_enabled = 1; + instance->inneracing_mode = IS_VDEC_INNER_RACING(instance->ctx->dev->dec_capability); + + mtk_vdec_debug(ctx, "vsi 0x%p core_vsi 0x%llx 0x%p, inneracing_mode %d\n", + vsi, vsi->core_vsi, instance->core_vsi, instance->inneracing_mode); + + ret = vdec_av1_slice_init_cdf_table(instance); + if (ret) + goto error_vsi; + + ret = vdec_av1_slice_init_iq_table(instance); + if (ret) + goto error_vsi; + + ctx->drv_handle = instance; + + return 0; +error_vsi: + vpu_dec_deinit(&instance->vpu); +error_vpu_init: + kfree(instance); + + return ret; +} + +static void vdec_av1_slice_deinit(void *h_vdec) +{ + struct vdec_av1_slice_instance *instance = h_vdec; + + if (!instance) + return; + mtk_vdec_debug(instance->ctx, "h_vdec 0x%p\n", h_vdec); + vpu_dec_deinit(&instance->vpu); + vdec_av1_slice_free_working_buffer(instance); + vdec_msg_queue_deinit(&instance->ctx->msg_queue, instance->ctx); + kfree(instance); +} + +static int vdec_av1_slice_flush(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *fb, bool *res_chg) +{ + struct vdec_av1_slice_instance *instance = h_vdec; + int i; + + mtk_vdec_debug(instance->ctx, "flush ...\n"); + + vdec_msg_queue_wait_lat_buf_full(&instance->ctx->msg_queue); + + for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) + vdec_av1_slice_clear_fb(&instance->slots.frame_info[i]); + + return vpu_dec_reset(&instance->vpu); +} + +static void vdec_av1_slice_get_pic_info(struct vdec_av1_slice_instance *instance) +{ + struct mtk_vcodec_dec_ctx *ctx = instance->ctx; + u32 data[3]; + + mtk_vdec_debug(ctx, "w %u h %u\n", ctx->picinfo.pic_w, ctx->picinfo.pic_h); + + data[0] = ctx->picinfo.pic_w; + data[1] = ctx->picinfo.pic_h; + data[2] = ctx->capture_fourcc; + vpu_dec_get_param(&instance->vpu, data, 3, GET_PARAM_PIC_INFO); + + ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, VCODEC_DEC_ALIGNED_64); + ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, VCODEC_DEC_ALIGNED_64); + ctx->picinfo.fb_sz[0] = instance->vpu.fb_sz[0]; + ctx->picinfo.fb_sz[1] = instance->vpu.fb_sz[1]; +} + +static inline void vdec_av1_slice_get_dpb_size(struct vdec_av1_slice_instance *instance, + u32 *dpb_sz) +{ + /* refer av1 specification */ + *dpb_sz = V4L2_AV1_TOTAL_REFS_PER_FRAME + 1; +} + +static void vdec_av1_slice_get_crop_info(struct vdec_av1_slice_instance *instance, + struct v4l2_rect *cr) +{ + struct mtk_vcodec_dec_ctx *ctx = instance->ctx; + + cr->left = 0; + cr->top = 0; + cr->width = ctx->picinfo.pic_w; + cr->height = ctx->picinfo.pic_h; + + mtk_vdec_debug(ctx, "l=%d, t=%d, w=%d, h=%d\n", + cr->left, cr->top, cr->width, cr->height); +} + +static int vdec_av1_slice_get_param(void *h_vdec, enum vdec_get_param_type type, void *out) +{ + struct vdec_av1_slice_instance *instance = h_vdec; + + switch (type) { + case GET_PARAM_PIC_INFO: + vdec_av1_slice_get_pic_info(instance); + break; + case GET_PARAM_DPB_SIZE: + vdec_av1_slice_get_dpb_size(instance, out); + break; + case GET_PARAM_CROP_INFO: + vdec_av1_slice_get_crop_info(instance, out); + break; + default: + mtk_vdec_err(instance->ctx, "invalid get parameter type=%d\n", type); + return -EINVAL; + } + + return 0; +} + +static int vdec_av1_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *fb, bool *res_chg) +{ + struct vdec_av1_slice_instance *instance = h_vdec; + struct vdec_lat_buf *lat_buf; + struct vdec_av1_slice_pfc *pfc; + struct vdec_av1_slice_vsi *vsi; + struct mtk_vcodec_dec_ctx *ctx; + int ret; + + if (!instance || !instance->ctx) + return -EINVAL; + + ctx = instance->ctx; + /* init msgQ for the first time */ + if (vdec_msg_queue_init(&ctx->msg_queue, ctx, + vdec_av1_slice_core_decode, sizeof(*pfc))) { + mtk_vdec_err(ctx, "failed to init AV1 msg queue\n"); + return -ENOMEM; + } + + /* bs NULL means flush decoder */ + if (!bs) + return vdec_av1_slice_flush(h_vdec, bs, fb, res_chg); + + lat_buf = vdec_msg_queue_dqbuf(&ctx->msg_queue.lat_ctx); + if (!lat_buf) { + mtk_vdec_err(ctx, "failed to get AV1 lat buf\n"); + return -EAGAIN; + } + pfc = (struct vdec_av1_slice_pfc *)lat_buf->private_data; + if (!pfc) { + ret = -EINVAL; + goto err_free_fb_out; + } + vsi = &pfc->vsi; + + ret = vdec_av1_slice_setup_lat(instance, bs, lat_buf, pfc); + if (ret) { + mtk_vdec_err(ctx, "failed to setup AV1 lat ret %d\n", ret); + goto err_free_fb_out; + } + + vdec_av1_slice_vsi_to_remote(vsi, instance->vsi); + ret = vpu_dec_start(&instance->vpu, NULL, 0); + if (ret) { + mtk_vdec_err(ctx, "failed to dec AV1 ret %d\n", ret); + goto err_free_fb_out; + } + if (instance->inneracing_mode) + vdec_msg_queue_qbuf(&ctx->msg_queue.core_ctx, lat_buf); + + if (instance->irq_enabled) { + ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED, + WAIT_INTR_TIMEOUT_MS, + MTK_VDEC_LAT0); + /* update remote vsi if decode timeout */ + if (ret) { + mtk_vdec_err(ctx, "AV1 Frame %d decode timeout %d\n", pfc->seq, ret); + WRITE_ONCE(instance->vsi->state.timeout, 1); + } + vpu_dec_end(&instance->vpu); + } + + vdec_av1_slice_vsi_from_remote(vsi, instance->vsi); + ret = vdec_av1_slice_update_lat(instance, lat_buf, pfc); + + /* LAT trans full, re-decode */ + if (ret == -EAGAIN) { + mtk_vdec_err(ctx, "AV1 Frame %d trans full\n", pfc->seq); + if (!instance->inneracing_mode) + vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf); + return 0; + } + + /* LAT trans full, no more UBE or decode timeout */ + if (ret == -ENOMEM || vsi->state.timeout) { + mtk_vdec_err(ctx, "AV1 Frame %d insufficient buffer or timeout\n", pfc->seq); + if (!instance->inneracing_mode) + vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf); + return -EBUSY; + } + vsi->trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr; + mtk_vdec_debug(ctx, "lat dma 1 0x%pad 0x%pad\n", + &pfc->vsi.trans.dma_addr, &pfc->vsi.trans.dma_addr_end); + + vdec_msg_queue_update_ube_wptr(&ctx->msg_queue, vsi->trans.dma_addr_end); + + if (!instance->inneracing_mode) + vdec_msg_queue_qbuf(&ctx->msg_queue.core_ctx, lat_buf); + memcpy(&instance->slots, &vsi->slots, sizeof(instance->slots)); + + return 0; + +err_free_fb_out: + vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf); + + if (pfc) + mtk_vdec_err(ctx, "slice dec number: %d err: %d", pfc->seq, ret); + + return ret; +} + +static int vdec_av1_slice_core_decode(struct vdec_lat_buf *lat_buf) +{ + struct vdec_av1_slice_instance *instance; + struct vdec_av1_slice_pfc *pfc; + struct mtk_vcodec_dec_ctx *ctx = NULL; + struct vdec_fb *fb = NULL; + int ret = -EINVAL; + + if (!lat_buf) + return -EINVAL; + + pfc = lat_buf->private_data; + ctx = lat_buf->ctx; + if (!pfc || !ctx) + return -EINVAL; + + instance = ctx->drv_handle; + if (!instance) + goto err; + + fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx); + if (!fb) { + ret = -EBUSY; + goto err; + } + + ret = vdec_av1_slice_setup_core(instance, fb, lat_buf, pfc); + if (ret) { + mtk_vdec_err(ctx, "vdec_av1_slice_setup_core\n"); + goto err; + } + vdec_av1_slice_vsi_to_remote(&pfc->vsi, instance->core_vsi); + ret = vpu_dec_core(&instance->vpu); + if (ret) { + mtk_vdec_err(ctx, "vpu_dec_core\n"); + goto err; + } + + if (instance->irq_enabled) { + ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED, + WAIT_INTR_TIMEOUT_MS, + MTK_VDEC_CORE); + /* update remote vsi if decode timeout */ + if (ret) { + mtk_vdec_err(ctx, "AV1 frame %d core timeout\n", pfc->seq); + WRITE_ONCE(instance->vsi->state.timeout, 1); + } + vpu_dec_core_end(&instance->vpu); + } + + ret = vdec_av1_slice_update_core(instance, lat_buf, pfc); + if (ret) { + mtk_vdec_err(ctx, "vdec_av1_slice_update_core\n"); + goto err; + } + + mtk_vdec_debug(ctx, "core dma_addr_end 0x%pad\n", + &instance->core_vsi->trans.dma_addr_end); + vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, instance->core_vsi->trans.dma_addr_end); + + ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf->src_buf_req); + + return 0; + +err: + /* always update read pointer */ + vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end); + + if (fb) + ctx->dev->vdec_pdata->cap_to_disp(ctx, 1, lat_buf->src_buf_req); + + return ret; +} + +const struct vdec_common_if vdec_av1_slice_lat_if = { + .init = vdec_av1_slice_init, + .decode = vdec_av1_slice_lat_decode, + .get_param = vdec_av1_slice_get_param, + .deinit = vdec_av1_slice_deinit, +}; diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_if.c new file mode 100644 index 0000000000..bf7dffe60d --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_if.c @@ -0,0 +1,494 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2016 MediaTek Inc. + * Author: PC Chen <pc.chen@mediatek.com> + */ + +#include <linux/module.h> +#include <linux/slab.h> + +#include "../vdec_drv_if.h" +#include "../mtk_vcodec_dec.h" +#include "../../common/mtk_vcodec_intr.h" +#include "../vdec_vpu_if.h" +#include "../vdec_drv_base.h" + +#define NAL_NON_IDR_SLICE 0x01 +#define NAL_IDR_SLICE 0x05 +#define NAL_H264_PPS 0x08 +#define NAL_TYPE(value) ((value) & 0x1F) + +#define BUF_PREDICTION_SZ (32 * 1024) + +#define MB_UNIT_LEN 16 + +/* motion vector size (bytes) for every macro block */ +#define HW_MB_STORE_SZ 64 + +#define H264_MAX_FB_NUM 17 +#define HDR_PARSING_BUF_SZ 1024 + +#define DEC_ERR_RET(ret) ((ret) >> 16) +#define H264_ERR_NOT_VALID 3 + +/** + * struct h264_fb - h264 decode frame buffer information + * @vdec_fb_va : virtual address of struct vdec_fb + * @y_fb_dma : dma address of Y frame buffer (luma) + * @c_fb_dma : dma address of C frame buffer (chroma) + * @poc : picture order count of frame buffer + * @reserved : for 8 bytes alignment + */ +struct h264_fb { + uint64_t vdec_fb_va; + uint64_t y_fb_dma; + uint64_t c_fb_dma; + int32_t poc; + uint32_t reserved; +}; + +/** + * struct h264_ring_fb_list - ring frame buffer list + * @fb_list : frame buffer array + * @read_idx : read index + * @write_idx : write index + * @count : buffer count in list + * @reserved : for 8 bytes alignment + */ +struct h264_ring_fb_list { + struct h264_fb fb_list[H264_MAX_FB_NUM]; + unsigned int read_idx; + unsigned int write_idx; + unsigned int count; + unsigned int reserved; +}; + +/** + * struct vdec_h264_dec_info - decode information + * @dpb_sz : decoding picture buffer size + * @resolution_changed : resolution change happen + * @realloc_mv_buf : flag to notify driver to re-allocate mv buffer + * @reserved : for 8 bytes alignment + * @bs_dma : Input bit-stream buffer dma address + * @y_fb_dma : Y frame buffer dma address + * @c_fb_dma : C frame buffer dma address + * @vdec_fb_va : VDEC frame buffer struct virtual address + */ +struct vdec_h264_dec_info { + uint32_t dpb_sz; + uint32_t resolution_changed; + uint32_t realloc_mv_buf; + uint32_t reserved; + uint64_t bs_dma; + uint64_t y_fb_dma; + uint64_t c_fb_dma; + uint64_t vdec_fb_va; +}; + +/** + * struct vdec_h264_vsi - shared memory for decode information exchange + * between VPU and Host. + * The memory is allocated by VPU then mapping to Host + * in vpu_dec_init() and freed in vpu_dec_deinit() + * by VPU. + * AP-W/R : AP is writer/reader on this item + * VPU-W/R: VPU is write/reader on this item + * @hdr_buf : Header parsing buffer (AP-W, VPU-R) + * @pred_buf_dma : HW working predication buffer dma address (AP-W, VPU-R) + * @mv_buf_dma : HW working motion vector buffer dma address (AP-W, VPU-R) + * @list_free : free frame buffer ring list (AP-W/R, VPU-W) + * @list_disp : display frame buffer ring list (AP-R, VPU-W) + * @dec : decode information (AP-R, VPU-W) + * @pic : picture information (AP-R, VPU-W) + * @crop : crop information (AP-R, VPU-W) + */ +struct vdec_h264_vsi { + unsigned char hdr_buf[HDR_PARSING_BUF_SZ]; + uint64_t pred_buf_dma; + uint64_t mv_buf_dma[H264_MAX_FB_NUM]; + struct h264_ring_fb_list list_free; + struct h264_ring_fb_list list_disp; + struct vdec_h264_dec_info dec; + struct vdec_pic_info pic; + struct v4l2_rect crop; +}; + +/** + * struct vdec_h264_inst - h264 decoder instance + * @num_nalu : how many nalus be decoded + * @ctx : point to mtk_vcodec_dec_ctx + * @pred_buf : HW working predication buffer + * @mv_buf : HW working motion vector buffer + * @vpu : VPU instance + * @vsi : VPU shared information + */ +struct vdec_h264_inst { + unsigned int num_nalu; + struct mtk_vcodec_dec_ctx *ctx; + struct mtk_vcodec_mem pred_buf; + struct mtk_vcodec_mem mv_buf[H264_MAX_FB_NUM]; + struct vdec_vpu_inst vpu; + struct vdec_h264_vsi *vsi; +}; + +static unsigned int get_mv_buf_size(unsigned int width, unsigned int height) +{ + return HW_MB_STORE_SZ * (width/MB_UNIT_LEN) * (height/MB_UNIT_LEN); +} + +static int allocate_predication_buf(struct vdec_h264_inst *inst) +{ + int err = 0; + + inst->pred_buf.size = BUF_PREDICTION_SZ; + err = mtk_vcodec_mem_alloc(inst->ctx, &inst->pred_buf); + if (err) { + mtk_vdec_err(inst->ctx, "failed to allocate ppl buf"); + return err; + } + + inst->vsi->pred_buf_dma = inst->pred_buf.dma_addr; + return 0; +} + +static void free_predication_buf(struct vdec_h264_inst *inst) +{ + struct mtk_vcodec_mem *mem = NULL; + + inst->vsi->pred_buf_dma = 0; + mem = &inst->pred_buf; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); +} + +static int alloc_mv_buf(struct vdec_h264_inst *inst, struct vdec_pic_info *pic) +{ + int i; + int err; + struct mtk_vcodec_mem *mem = NULL; + unsigned int buf_sz = get_mv_buf_size(pic->buf_w, pic->buf_h); + + for (i = 0; i < H264_MAX_FB_NUM; i++) { + mem = &inst->mv_buf[i]; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + mem->size = buf_sz; + err = mtk_vcodec_mem_alloc(inst->ctx, mem); + if (err) { + mtk_vdec_err(inst->ctx, "failed to allocate mv buf"); + return err; + } + inst->vsi->mv_buf_dma[i] = mem->dma_addr; + } + + return 0; +} + +static void free_mv_buf(struct vdec_h264_inst *inst) +{ + int i; + struct mtk_vcodec_mem *mem = NULL; + + for (i = 0; i < H264_MAX_FB_NUM; i++) { + inst->vsi->mv_buf_dma[i] = 0; + mem = &inst->mv_buf[i]; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + } +} + +static int check_list_validity(struct vdec_h264_inst *inst, bool disp_list) +{ + struct h264_ring_fb_list *list; + + list = disp_list ? &inst->vsi->list_disp : &inst->vsi->list_free; + + if (list->count > H264_MAX_FB_NUM || + list->read_idx >= H264_MAX_FB_NUM || + list->write_idx >= H264_MAX_FB_NUM) { + mtk_vdec_err(inst->ctx, "%s list err: cnt=%d r_idx=%d w_idx=%d", + disp_list ? "disp" : "free", list->count, + list->read_idx, list->write_idx); + return -EINVAL; + } + + return 0; +} + +static void put_fb_to_free(struct vdec_h264_inst *inst, struct vdec_fb *fb) +{ + struct h264_ring_fb_list *list; + + if (fb) { + if (check_list_validity(inst, false)) + return; + + list = &inst->vsi->list_free; + if (list->count == H264_MAX_FB_NUM) { + mtk_vdec_err(inst->ctx, "[FB] put fb free_list full"); + return; + } + + mtk_vdec_debug(inst->ctx, "[FB] put fb into free_list @(%p, %llx)", + fb->base_y.va, (u64)fb->base_y.dma_addr); + + list->fb_list[list->write_idx].vdec_fb_va = (u64)(uintptr_t)fb; + list->write_idx = (list->write_idx == H264_MAX_FB_NUM - 1) ? + 0 : list->write_idx + 1; + list->count++; + } +} + +static void get_pic_info(struct vdec_h264_inst *inst, + struct vdec_pic_info *pic) +{ + *pic = inst->vsi->pic; + mtk_vdec_debug(inst->ctx, "pic(%d, %d), buf(%d, %d)", + pic->pic_w, pic->pic_h, pic->buf_w, pic->buf_h); + mtk_vdec_debug(inst->ctx, "fb size: Y(%d), C(%d)", pic->fb_sz[0], pic->fb_sz[1]); +} + +static void get_crop_info(struct vdec_h264_inst *inst, struct v4l2_rect *cr) +{ + cr->left = inst->vsi->crop.left; + cr->top = inst->vsi->crop.top; + cr->width = inst->vsi->crop.width; + cr->height = inst->vsi->crop.height; + + mtk_vdec_debug(inst->ctx, "l=%d, t=%d, w=%d, h=%d", cr->left, cr->top, + cr->width, cr->height); +} + +static void get_dpb_size(struct vdec_h264_inst *inst, unsigned int *dpb_sz) +{ + *dpb_sz = inst->vsi->dec.dpb_sz; + mtk_vdec_debug(inst->ctx, "sz=%d", *dpb_sz); +} + +static int vdec_h264_init(struct mtk_vcodec_dec_ctx *ctx) +{ + struct vdec_h264_inst *inst = NULL; + int err; + + inst = kzalloc(sizeof(*inst), GFP_KERNEL); + if (!inst) + return -ENOMEM; + + inst->ctx = ctx; + + inst->vpu.id = IPI_VDEC_H264; + inst->vpu.ctx = ctx; + + err = vpu_dec_init(&inst->vpu); + if (err) { + mtk_vdec_err(ctx, "vdec_h264 init err=%d", err); + goto error_free_inst; + } + + inst->vsi = (struct vdec_h264_vsi *)inst->vpu.vsi; + err = allocate_predication_buf(inst); + if (err) + goto error_deinit; + + mtk_vdec_debug(ctx, "H264 Instance >> %p", inst); + + ctx->drv_handle = inst; + return 0; + +error_deinit: + vpu_dec_deinit(&inst->vpu); + +error_free_inst: + kfree(inst); + return err; +} + +static void vdec_h264_deinit(void *h_vdec) +{ + struct vdec_h264_inst *inst = (struct vdec_h264_inst *)h_vdec; + + vpu_dec_deinit(&inst->vpu); + free_predication_buf(inst); + free_mv_buf(inst); + + kfree(inst); +} + +static int find_start_code(unsigned char *data, unsigned int data_sz) +{ + if (data_sz > 3 && data[0] == 0 && data[1] == 0 && data[2] == 1) + return 3; + + if (data_sz > 4 && data[0] == 0 && data[1] == 0 && data[2] == 0 && + data[3] == 1) + return 4; + + return -1; +} + +static int vdec_h264_decode(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *fb, bool *res_chg) +{ + struct vdec_h264_inst *inst = (struct vdec_h264_inst *)h_vdec; + struct vdec_vpu_inst *vpu = &inst->vpu; + int nal_start_idx = 0; + int err = 0; + unsigned int nal_start; + unsigned int nal_type; + unsigned char *buf; + unsigned int buf_sz; + unsigned int data[2]; + uint64_t vdec_fb_va = (u64)(uintptr_t)fb; + uint64_t y_fb_dma = fb ? (u64)fb->base_y.dma_addr : 0; + uint64_t c_fb_dma = fb ? (u64)fb->base_c.dma_addr : 0; + + mtk_vdec_debug(inst->ctx, "+ [%d] FB y_dma=%llx c_dma=%llx va=%p", + ++inst->num_nalu, y_fb_dma, c_fb_dma, fb); + + /* bs NULL means flush decoder */ + if (bs == NULL) + return vpu_dec_reset(vpu); + + buf = (unsigned char *)bs->va; + buf_sz = bs->size; + nal_start_idx = find_start_code(buf, buf_sz); + if (nal_start_idx < 0) { + mtk_vdec_err(inst->ctx, "invalid nal start code"); + err = -EIO; + goto err_free_fb_out; + } + + nal_start = buf[nal_start_idx]; + nal_type = NAL_TYPE(buf[nal_start_idx]); + mtk_vdec_debug(inst->ctx, "\n + NALU[%d] type %d +\n", inst->num_nalu, + nal_type); + + if (nal_type == NAL_H264_PPS) { + buf_sz -= nal_start_idx; + if (buf_sz > HDR_PARSING_BUF_SZ) { + err = -EILSEQ; + goto err_free_fb_out; + } + memcpy(inst->vsi->hdr_buf, buf + nal_start_idx, buf_sz); + } + + inst->vsi->dec.bs_dma = (uint64_t)bs->dma_addr; + inst->vsi->dec.y_fb_dma = y_fb_dma; + inst->vsi->dec.c_fb_dma = c_fb_dma; + inst->vsi->dec.vdec_fb_va = vdec_fb_va; + + data[0] = buf_sz; + data[1] = nal_start; + err = vpu_dec_start(vpu, data, 2); + if (err) { + if (err > 0 && (DEC_ERR_RET(err) == H264_ERR_NOT_VALID)) { + mtk_vdec_err(inst->ctx, "- error bitstream - err = %d -", err); + err = -EIO; + } + goto err_free_fb_out; + } + + *res_chg = inst->vsi->dec.resolution_changed; + if (*res_chg) { + struct vdec_pic_info pic; + + mtk_vdec_debug(inst->ctx, "- resolution changed -"); + get_pic_info(inst, &pic); + + if (inst->vsi->dec.realloc_mv_buf) { + err = alloc_mv_buf(inst, &pic); + if (err) + goto err_free_fb_out; + } + } + + if (nal_type == NAL_NON_IDR_SLICE || nal_type == NAL_IDR_SLICE) { + /* wait decoder done interrupt */ + err = mtk_vcodec_wait_for_done_ctx(inst->ctx, + MTK_INST_IRQ_RECEIVED, + WAIT_INTR_TIMEOUT_MS, 0); + if (err) + goto err_free_fb_out; + + vpu_dec_end(vpu); + } + + mtk_vdec_debug(inst->ctx, "\n - NALU[%d] type=%d -\n", inst->num_nalu, nal_type); + return 0; + +err_free_fb_out: + put_fb_to_free(inst, fb); + mtk_vdec_err(inst->ctx, "\n - NALU[%d] err=%d -\n", inst->num_nalu, err); + return err; +} + +static void vdec_h264_get_fb(struct vdec_h264_inst *inst, + struct h264_ring_fb_list *list, + bool disp_list, struct vdec_fb **out_fb) +{ + struct vdec_fb *fb; + + if (check_list_validity(inst, disp_list)) + return; + + if (list->count == 0) { + mtk_vdec_debug(inst->ctx, "[FB] there is no %s fb", disp_list ? "disp" : "free"); + *out_fb = NULL; + return; + } + + fb = (struct vdec_fb *) + (uintptr_t)list->fb_list[list->read_idx].vdec_fb_va; + fb->status |= (disp_list ? FB_ST_DISPLAY : FB_ST_FREE); + + *out_fb = fb; + mtk_vdec_debug(inst->ctx, "[FB] get %s fb st=%d poc=%d %llx", + disp_list ? "disp" : "free", + fb->status, list->fb_list[list->read_idx].poc, + list->fb_list[list->read_idx].vdec_fb_va); + + list->read_idx = (list->read_idx == H264_MAX_FB_NUM - 1) ? + 0 : list->read_idx + 1; + list->count--; +} + +static int vdec_h264_get_param(void *h_vdec, enum vdec_get_param_type type, + void *out) +{ + struct vdec_h264_inst *inst = (struct vdec_h264_inst *)h_vdec; + + switch (type) { + case GET_PARAM_DISP_FRAME_BUFFER: + vdec_h264_get_fb(inst, &inst->vsi->list_disp, true, out); + break; + + case GET_PARAM_FREE_FRAME_BUFFER: + vdec_h264_get_fb(inst, &inst->vsi->list_free, false, out); + break; + + case GET_PARAM_PIC_INFO: + get_pic_info(inst, out); + break; + + case GET_PARAM_DPB_SIZE: + get_dpb_size(inst, out); + break; + + case GET_PARAM_CROP_INFO: + get_crop_info(inst, out); + break; + + default: + mtk_vdec_err(inst->ctx, "invalid get parameter type=%d", type); + return -EINVAL; + } + + return 0; +} + +const struct vdec_common_if vdec_h264_if = { + .init = vdec_h264_init, + .decode = vdec_h264_decode, + .get_param = vdec_h264_get_param, + .deinit = vdec_h264_deinit, +}; diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_common.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_common.c new file mode 100644 index 0000000000..5ca20d75dc --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_common.c @@ -0,0 +1,322 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2022 MediaTek Inc. + * Author: Yunfei Dong <yunfei.dong@mediatek.com> + */ + +#include "vdec_h264_req_common.h" + +/* get used parameters for sps/pps */ +#define GET_MTK_VDEC_FLAG(cond, flag) \ + { dst_param->cond = ((src_param->flags & flag) ? (1) : (0)); } +#define GET_MTK_VDEC_PARAM(param) \ + { dst_param->param = src_param->param; } + +void mtk_vdec_h264_get_ref_list(u8 *ref_list, + const struct v4l2_h264_reference *v4l2_ref_list, + int num_valid) +{ + u32 i; + + /* + * TODO The firmware does not support field decoding. Future + * implementation must use v4l2_ref_list[i].fields to obtain + * the reference field parity. + */ + + for (i = 0; i < num_valid; i++) + ref_list[i] = v4l2_ref_list[i].index; + + /* + * The firmware expects unused reflist entries to have the value 0x20. + */ + memset(&ref_list[num_valid], 0x20, 32 - num_valid); +} + +void *mtk_vdec_h264_get_ctrl_ptr(struct mtk_vcodec_dec_ctx *ctx, int id) +{ + struct v4l2_ctrl *ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, id); + + if (!ctrl) + return ERR_PTR(-EINVAL); + + return ctrl->p_cur.p; +} + +void mtk_vdec_h264_fill_dpb_info(struct mtk_vcodec_dec_ctx *ctx, + struct slice_api_h264_decode_param *decode_params, + struct mtk_h264_dpb_info *h264_dpb_info) +{ + const struct slice_h264_dpb_entry *dpb; + struct vb2_queue *vq; + struct vb2_buffer *vb; + struct vb2_v4l2_buffer *vb2_v4l2; + int index; + + vq = v4l2_m2m_get_vq(ctx->m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE); + + for (index = 0; index < V4L2_H264_NUM_DPB_ENTRIES; index++) { + dpb = &decode_params->dpb[index]; + if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) { + h264_dpb_info[index].reference_flag = 0; + continue; + } + + vb = vb2_find_buffer(vq, dpb->reference_ts); + if (!vb) { + dev_err(&ctx->dev->plat_dev->dev, + "Reference invalid: dpb_index(%d) reference_ts(%lld)", + index, dpb->reference_ts); + continue; + } + + /* 1 for short term reference, 2 for long term reference */ + if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)) + h264_dpb_info[index].reference_flag = 1; + else + h264_dpb_info[index].reference_flag = 2; + + vb2_v4l2 = container_of(vb, struct vb2_v4l2_buffer, vb2_buf); + h264_dpb_info[index].field = vb2_v4l2->field; + + h264_dpb_info[index].y_dma_addr = + vb2_dma_contig_plane_dma_addr(vb, 0); + if (ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 2) + h264_dpb_info[index].c_dma_addr = + vb2_dma_contig_plane_dma_addr(vb, 1); + else + h264_dpb_info[index].c_dma_addr = + h264_dpb_info[index].y_dma_addr + + ctx->picinfo.fb_sz[0]; + } +} + +void mtk_vdec_h264_copy_sps_params(struct mtk_h264_sps_param *dst_param, + const struct v4l2_ctrl_h264_sps *src_param) +{ + GET_MTK_VDEC_PARAM(chroma_format_idc); + GET_MTK_VDEC_PARAM(bit_depth_luma_minus8); + GET_MTK_VDEC_PARAM(bit_depth_chroma_minus8); + GET_MTK_VDEC_PARAM(log2_max_frame_num_minus4); + GET_MTK_VDEC_PARAM(pic_order_cnt_type); + GET_MTK_VDEC_PARAM(log2_max_pic_order_cnt_lsb_minus4); + GET_MTK_VDEC_PARAM(max_num_ref_frames); + GET_MTK_VDEC_PARAM(pic_width_in_mbs_minus1); + GET_MTK_VDEC_PARAM(pic_height_in_map_units_minus1); + + GET_MTK_VDEC_FLAG(separate_colour_plane_flag, + V4L2_H264_SPS_FLAG_SEPARATE_COLOUR_PLANE); + GET_MTK_VDEC_FLAG(qpprime_y_zero_transform_bypass_flag, + V4L2_H264_SPS_FLAG_QPPRIME_Y_ZERO_TRANSFORM_BYPASS); + GET_MTK_VDEC_FLAG(delta_pic_order_always_zero_flag, + V4L2_H264_SPS_FLAG_DELTA_PIC_ORDER_ALWAYS_ZERO); + GET_MTK_VDEC_FLAG(frame_mbs_only_flag, + V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY); + GET_MTK_VDEC_FLAG(mb_adaptive_frame_field_flag, + V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD); + GET_MTK_VDEC_FLAG(direct_8x8_inference_flag, + V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE); +} + +void mtk_vdec_h264_copy_pps_params(struct mtk_h264_pps_param *dst_param, + const struct v4l2_ctrl_h264_pps *src_param) +{ + GET_MTK_VDEC_PARAM(num_ref_idx_l0_default_active_minus1); + GET_MTK_VDEC_PARAM(num_ref_idx_l1_default_active_minus1); + GET_MTK_VDEC_PARAM(weighted_bipred_idc); + GET_MTK_VDEC_PARAM(pic_init_qp_minus26); + GET_MTK_VDEC_PARAM(chroma_qp_index_offset); + GET_MTK_VDEC_PARAM(second_chroma_qp_index_offset); + + GET_MTK_VDEC_FLAG(entropy_coding_mode_flag, + V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE); + GET_MTK_VDEC_FLAG(pic_order_present_flag, + V4L2_H264_PPS_FLAG_BOTTOM_FIELD_PIC_ORDER_IN_FRAME_PRESENT); + GET_MTK_VDEC_FLAG(weighted_pred_flag, + V4L2_H264_PPS_FLAG_WEIGHTED_PRED); + GET_MTK_VDEC_FLAG(deblocking_filter_control_present_flag, + V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT); + GET_MTK_VDEC_FLAG(constrained_intra_pred_flag, + V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED); + GET_MTK_VDEC_FLAG(redundant_pic_cnt_present_flag, + V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT); + GET_MTK_VDEC_FLAG(transform_8x8_mode_flag, + V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE); + GET_MTK_VDEC_FLAG(scaling_matrix_present_flag, + V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT); +} + +void mtk_vdec_h264_copy_slice_hd_params(struct mtk_h264_slice_hd_param *dst_param, + const struct v4l2_ctrl_h264_slice_params *src_param, + const struct v4l2_ctrl_h264_decode_params *dec_param) +{ + int temp; + + GET_MTK_VDEC_PARAM(first_mb_in_slice); + GET_MTK_VDEC_PARAM(slice_type); + GET_MTK_VDEC_PARAM(cabac_init_idc); + GET_MTK_VDEC_PARAM(slice_qp_delta); + GET_MTK_VDEC_PARAM(disable_deblocking_filter_idc); + GET_MTK_VDEC_PARAM(slice_alpha_c0_offset_div2); + GET_MTK_VDEC_PARAM(slice_beta_offset_div2); + GET_MTK_VDEC_PARAM(num_ref_idx_l0_active_minus1); + GET_MTK_VDEC_PARAM(num_ref_idx_l1_active_minus1); + + dst_param->frame_num = dec_param->frame_num; + dst_param->pic_order_cnt_lsb = dec_param->pic_order_cnt_lsb; + + dst_param->delta_pic_order_cnt_bottom = + dec_param->delta_pic_order_cnt_bottom; + dst_param->delta_pic_order_cnt0 = + dec_param->delta_pic_order_cnt0; + dst_param->delta_pic_order_cnt1 = + dec_param->delta_pic_order_cnt1; + + temp = dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC; + dst_param->field_pic_flag = temp ? 1 : 0; + + temp = dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD; + dst_param->bottom_field_flag = temp ? 1 : 0; + + GET_MTK_VDEC_FLAG(direct_spatial_mv_pred_flag, + V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED); +} + +void mtk_vdec_h264_copy_scaling_matrix(struct slice_api_h264_scaling_matrix *dst_matrix, + const struct v4l2_ctrl_h264_scaling_matrix *src_matrix) +{ + memcpy(dst_matrix->scaling_list_4x4, src_matrix->scaling_list_4x4, + sizeof(dst_matrix->scaling_list_4x4)); + + memcpy(dst_matrix->scaling_list_8x8, src_matrix->scaling_list_8x8, + sizeof(dst_matrix->scaling_list_8x8)); +} + +void +mtk_vdec_h264_copy_decode_params(struct slice_api_h264_decode_param *dst_params, + const struct v4l2_ctrl_h264_decode_params *src_params, + const struct v4l2_h264_dpb_entry dpb[V4L2_H264_NUM_DPB_ENTRIES]) +{ + struct slice_h264_dpb_entry *dst_entry; + const struct v4l2_h264_dpb_entry *src_entry; + int i; + + for (i = 0; i < ARRAY_SIZE(dst_params->dpb); i++) { + dst_entry = &dst_params->dpb[i]; + src_entry = &dpb[i]; + + dst_entry->reference_ts = src_entry->reference_ts; + dst_entry->frame_num = src_entry->frame_num; + dst_entry->pic_num = src_entry->pic_num; + dst_entry->top_field_order_cnt = src_entry->top_field_order_cnt; + dst_entry->bottom_field_order_cnt = + src_entry->bottom_field_order_cnt; + dst_entry->flags = src_entry->flags; + } + + /* num_slices is a leftover from the old H.264 support and is ignored + * by the firmware. + */ + dst_params->num_slices = 0; + dst_params->nal_ref_idc = src_params->nal_ref_idc; + dst_params->top_field_order_cnt = src_params->top_field_order_cnt; + dst_params->bottom_field_order_cnt = src_params->bottom_field_order_cnt; + dst_params->flags = src_params->flags; +} + +static bool mtk_vdec_h264_dpb_entry_match(const struct v4l2_h264_dpb_entry *a, + const struct v4l2_h264_dpb_entry *b) +{ + return a->top_field_order_cnt == b->top_field_order_cnt && + a->bottom_field_order_cnt == b->bottom_field_order_cnt; +} + +/* + * Move DPB entries of dec_param that refer to a frame already existing in dpb + * into the already existing slot in dpb, and move other entries into new slots. + * + * This function is an adaptation of the similarly-named function in + * hantro_h264.c. + */ +void mtk_vdec_h264_update_dpb(const struct v4l2_ctrl_h264_decode_params *dec_param, + struct v4l2_h264_dpb_entry *dpb) +{ + DECLARE_BITMAP(new, ARRAY_SIZE(dec_param->dpb)) = { 0, }; + DECLARE_BITMAP(in_use, ARRAY_SIZE(dec_param->dpb)) = { 0, }; + DECLARE_BITMAP(used, ARRAY_SIZE(dec_param->dpb)) = { 0, }; + unsigned int i, j; + + /* Disable all entries by default, and mark the ones in use. */ + for (i = 0; i < ARRAY_SIZE(dec_param->dpb); i++) { + if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE) + set_bit(i, in_use); + dpb[i].flags &= ~V4L2_H264_DPB_ENTRY_FLAG_ACTIVE; + } + + /* Try to match new DPB entries with existing ones by their POCs. */ + for (i = 0; i < ARRAY_SIZE(dec_param->dpb); i++) { + const struct v4l2_h264_dpb_entry *ndpb = &dec_param->dpb[i]; + + if (!(ndpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) + continue; + + /* + * To cut off some comparisons, iterate only on target DPB + * entries were already used. + */ + for_each_set_bit(j, in_use, ARRAY_SIZE(dec_param->dpb)) { + struct v4l2_h264_dpb_entry *cdpb; + + cdpb = &dpb[j]; + if (!mtk_vdec_h264_dpb_entry_match(cdpb, ndpb)) + continue; + + *cdpb = *ndpb; + set_bit(j, used); + /* Don't reiterate on this one. */ + clear_bit(j, in_use); + break; + } + + if (j == ARRAY_SIZE(dec_param->dpb)) + set_bit(i, new); + } + + /* For entries that could not be matched, use remaining free slots. */ + for_each_set_bit(i, new, ARRAY_SIZE(dec_param->dpb)) { + const struct v4l2_h264_dpb_entry *ndpb = &dec_param->dpb[i]; + struct v4l2_h264_dpb_entry *cdpb; + + /* + * Both arrays are of the same sizes, so there is no way + * we can end up with no space in target array, unless + * something is buggy. + */ + j = find_first_zero_bit(used, ARRAY_SIZE(dec_param->dpb)); + if (WARN_ON(j >= ARRAY_SIZE(dec_param->dpb))) + return; + + cdpb = &dpb[j]; + *cdpb = *ndpb; + set_bit(j, used); + } +} + +unsigned int mtk_vdec_h264_get_mv_buf_size(unsigned int width, unsigned int height) +{ + int unit_size = (width / MB_UNIT_LEN) * (height / MB_UNIT_LEN) + 8; + + return HW_MB_STORE_SZ * unit_size; +} + +int mtk_vdec_h264_find_start_code(unsigned char *data, unsigned int data_sz) +{ + if (data_sz > 3 && data[0] == 0 && data[1] == 0 && data[2] == 1) + return 3; + + if (data_sz > 4 && data[0] == 0 && data[1] == 0 && data[2] == 0 && + data[3] == 1) + return 4; + + return -1; +} diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_common.h b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_common.h new file mode 100644 index 0000000000..ac82be3360 --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_common.h @@ -0,0 +1,277 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2022 MediaTek Inc. + * Author: Yunfei Dong <yunfei.dong@mediatek.com> + */ + +#ifndef _VDEC_H264_REQ_COMMON_H_ +#define _VDEC_H264_REQ_COMMON_H_ + +#include <linux/module.h> +#include <linux/slab.h> +#include <media/v4l2-h264.h> +#include <media/v4l2-mem2mem.h> +#include <media/videobuf2-dma-contig.h> + +#include "../mtk_vcodec_dec_drv.h" + +#define NAL_NON_IDR_SLICE 0x01 +#define NAL_IDR_SLICE 0x05 +#define NAL_TYPE(value) ((value) & 0x1F) + +#define BUF_PREDICTION_SZ (64 * 4096) +#define MB_UNIT_LEN 16 + +/* motion vector size (bytes) for every macro block */ +#define HW_MB_STORE_SZ 64 + +#define H264_MAX_MV_NUM 32 + +/** + * struct mtk_h264_dpb_info - h264 dpb information + * + * @y_dma_addr: Y bitstream physical address + * @c_dma_addr: CbCr bitstream physical address + * @reference_flag: reference picture flag (short/long term reference picture) + * @field: field picture flag + */ +struct mtk_h264_dpb_info { + dma_addr_t y_dma_addr; + dma_addr_t c_dma_addr; + int reference_flag; + int field; +}; + +/* + * struct mtk_h264_sps_param - parameters for sps + */ +struct mtk_h264_sps_param { + unsigned char chroma_format_idc; + unsigned char bit_depth_luma_minus8; + unsigned char bit_depth_chroma_minus8; + unsigned char log2_max_frame_num_minus4; + unsigned char pic_order_cnt_type; + unsigned char log2_max_pic_order_cnt_lsb_minus4; + unsigned char max_num_ref_frames; + unsigned char separate_colour_plane_flag; + unsigned short pic_width_in_mbs_minus1; + unsigned short pic_height_in_map_units_minus1; + unsigned int max_frame_nums; + unsigned char qpprime_y_zero_transform_bypass_flag; + unsigned char delta_pic_order_always_zero_flag; + unsigned char frame_mbs_only_flag; + unsigned char mb_adaptive_frame_field_flag; + unsigned char direct_8x8_inference_flag; + unsigned char reserved[3]; +}; + +/* + * struct mtk_h264_pps_param - parameters for pps + */ +struct mtk_h264_pps_param { + unsigned char num_ref_idx_l0_default_active_minus1; + unsigned char num_ref_idx_l1_default_active_minus1; + unsigned char weighted_bipred_idc; + char pic_init_qp_minus26; + char chroma_qp_index_offset; + char second_chroma_qp_index_offset; + unsigned char entropy_coding_mode_flag; + unsigned char pic_order_present_flag; + unsigned char deblocking_filter_control_present_flag; + unsigned char constrained_intra_pred_flag; + unsigned char weighted_pred_flag; + unsigned char redundant_pic_cnt_present_flag; + unsigned char transform_8x8_mode_flag; + unsigned char scaling_matrix_present_flag; + unsigned char reserved[2]; +}; + +/* + * struct mtk_h264_slice_hd_param - parameters for slice header + */ +struct mtk_h264_slice_hd_param { + unsigned int first_mb_in_slice; + unsigned int field_pic_flag; + unsigned int slice_type; + unsigned int frame_num; + int pic_order_cnt_lsb; + int delta_pic_order_cnt_bottom; + unsigned int bottom_field_flag; + unsigned int direct_spatial_mv_pred_flag; + int delta_pic_order_cnt0; + int delta_pic_order_cnt1; + unsigned int cabac_init_idc; + int slice_qp_delta; + unsigned int disable_deblocking_filter_idc; + int slice_alpha_c0_offset_div2; + int slice_beta_offset_div2; + unsigned int num_ref_idx_l0_active_minus1; + unsigned int num_ref_idx_l1_active_minus1; + unsigned int reserved; +}; + +/* + * struct slice_api_h264_scaling_matrix - parameters for scaling list + */ +struct slice_api_h264_scaling_matrix { + unsigned char scaling_list_4x4[6][16]; + unsigned char scaling_list_8x8[6][64]; +}; + +/* + * struct slice_h264_dpb_entry - each dpb information + */ +struct slice_h264_dpb_entry { + unsigned long long reference_ts; + unsigned short frame_num; + unsigned short pic_num; + /* Note that field is indicated by v4l2_buffer.field */ + int top_field_order_cnt; + int bottom_field_order_cnt; + unsigned int flags; +}; + +/* + * struct slice_api_h264_decode_param - parameters for decode. + */ +struct slice_api_h264_decode_param { + struct slice_h264_dpb_entry dpb[V4L2_H264_NUM_DPB_ENTRIES]; + unsigned short num_slices; + unsigned short nal_ref_idc; + unsigned char ref_pic_list_p0[32]; + unsigned char ref_pic_list_b0[32]; + unsigned char ref_pic_list_b1[32]; + int top_field_order_cnt; + int bottom_field_order_cnt; + unsigned int flags; +}; + +/** + * struct h264_fb - h264 decode frame buffer information + * + * @vdec_fb_va: virtual address of struct vdec_fb + * @y_fb_dma: dma address of Y frame buffer (luma) + * @c_fb_dma: dma address of C frame buffer (chroma) + * @poc: picture order count of frame buffer + * @reserved: for 8 bytes alignment + */ +struct h264_fb { + u64 vdec_fb_va; + u64 y_fb_dma; + u64 c_fb_dma; + s32 poc; + u32 reserved; +}; + +/** + * mtk_vdec_h264_get_ref_list - translate V4L2 reference list + * + * @ref_list: Mediatek reference picture list + * @v4l2_ref_list: V4L2 reference picture list + * @num_valid: used reference number + */ +void mtk_vdec_h264_get_ref_list(u8 *ref_list, + const struct v4l2_h264_reference *v4l2_ref_list, + int num_valid); + +/** + * mtk_vdec_h264_get_ctrl_ptr - get each CID contrl address. + * + * @ctx: v4l2 ctx + * @id: CID control ID + * + * Return: returns CID ctrl address. + */ +void *mtk_vdec_h264_get_ctrl_ptr(struct mtk_vcodec_dec_ctx *ctx, int id); + +/** + * mtk_vdec_h264_fill_dpb_info - get each CID contrl address. + * + * @ctx: v4l2 ctx + * @decode_params: slice decode params + * @h264_dpb_info: dpb buffer information + */ +void mtk_vdec_h264_fill_dpb_info(struct mtk_vcodec_dec_ctx *ctx, + struct slice_api_h264_decode_param *decode_params, + struct mtk_h264_dpb_info *h264_dpb_info); + +/** + * mtk_vdec_h264_copy_sps_params - get sps params. + * + * @dst_param: sps params for hw decoder + * @src_param: sps params from user driver + */ +void mtk_vdec_h264_copy_sps_params(struct mtk_h264_sps_param *dst_param, + const struct v4l2_ctrl_h264_sps *src_param); + +/** + * mtk_vdec_h264_copy_pps_params - get pps params. + * + * @dst_param: pps params for hw decoder + * @src_param: pps params from user driver + */ +void mtk_vdec_h264_copy_pps_params(struct mtk_h264_pps_param *dst_param, + const struct v4l2_ctrl_h264_pps *src_param); + +/** + * mtk_vdec_h264_copy_slice_hd_params - get slice header params. + * + * @dst_param: slice params for hw decoder + * @src_param: slice params from user driver + * @dec_param: decode params from user driver + */ +void mtk_vdec_h264_copy_slice_hd_params(struct mtk_h264_slice_hd_param *dst_param, + const struct v4l2_ctrl_h264_slice_params *src_param, + const struct v4l2_ctrl_h264_decode_params *dec_param); + +/** + * mtk_vdec_h264_copy_scaling_matrix - get each CID contrl address. + * + * @dst_matrix: scaling list params for hw decoder + * @src_matrix: scaling list params from user driver + */ +void mtk_vdec_h264_copy_scaling_matrix(struct slice_api_h264_scaling_matrix *dst_matrix, + const struct v4l2_ctrl_h264_scaling_matrix *src_matrix); + +/** + * mtk_vdec_h264_copy_decode_params - get decode params. + * + * @dst_params: dst params for hw decoder + * @src_params: decode params from user driver + * @dpb: dpb information + */ +void +mtk_vdec_h264_copy_decode_params(struct slice_api_h264_decode_param *dst_params, + const struct v4l2_ctrl_h264_decode_params *src_params, + const struct v4l2_h264_dpb_entry dpb[V4L2_H264_NUM_DPB_ENTRIES]); + +/** + * mtk_vdec_h264_update_dpb - updata dpb list. + * + * @dec_param: v4l2 control decode params + * @dpb: dpb entry informaton + */ +void mtk_vdec_h264_update_dpb(const struct v4l2_ctrl_h264_decode_params *dec_param, + struct v4l2_h264_dpb_entry *dpb); + +/** + * mtk_vdec_h264_find_start_code - find h264 start code using sofeware. + * + * @data: input buffer address + * @data_sz: input buffer size + * + * Return: returns start code position. + */ +int mtk_vdec_h264_find_start_code(unsigned char *data, unsigned int data_sz); + +/** + * mtk_vdec_h264_get_mv_buf_size - get mv buffer size. + * + * @width: picture width + * @height: picture height + * + * Return: returns mv buffer size. + */ +unsigned int mtk_vdec_h264_get_mv_buf_size(unsigned int width, unsigned int height); + +#endif diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_if.c new file mode 100644 index 0000000000..5600f1df65 --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_if.c @@ -0,0 +1,444 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/module.h> +#include <linux/slab.h> +#include <media/v4l2-mem2mem.h> +#include <media/v4l2-h264.h> +#include <media/videobuf2-dma-contig.h> + +#include "../mtk_vcodec_dec.h" +#include "../../common/mtk_vcodec_intr.h" +#include "../vdec_drv_base.h" +#include "../vdec_drv_if.h" +#include "../vdec_vpu_if.h" +#include "vdec_h264_req_common.h" + +/* + * struct mtk_h264_dec_slice_param - parameters for decode current frame + */ +struct mtk_h264_dec_slice_param { + struct mtk_h264_sps_param sps; + struct mtk_h264_pps_param pps; + struct slice_api_h264_scaling_matrix scaling_matrix; + struct slice_api_h264_decode_param decode_params; + struct mtk_h264_dpb_info h264_dpb_info[16]; +}; + +/** + * struct vdec_h264_dec_info - decode information + * @dpb_sz : decoding picture buffer size + * @resolution_changed : resoltion change happen + * @realloc_mv_buf : flag to notify driver to re-allocate mv buffer + * @cap_num_planes : number planes of capture buffer + * @bs_dma : Input bit-stream buffer dma address + * @y_fb_dma : Y frame buffer dma address + * @c_fb_dma : C frame buffer dma address + * @vdec_fb_va : VDEC frame buffer struct virtual address + */ +struct vdec_h264_dec_info { + u32 dpb_sz; + u32 resolution_changed; + u32 realloc_mv_buf; + u32 cap_num_planes; + u64 bs_dma; + u64 y_fb_dma; + u64 c_fb_dma; + u64 vdec_fb_va; +}; + +/** + * struct vdec_h264_vsi - shared memory for decode information exchange + * between VPU and Host. + * The memory is allocated by VPU then mapping to Host + * in vpu_dec_init() and freed in vpu_dec_deinit() + * by VPU. + * AP-W/R : AP is writer/reader on this item + * VPU-W/R: VPU is write/reader on this item + * @pred_buf_dma : HW working predication buffer dma address (AP-W, VPU-R) + * @mv_buf_dma : HW working motion vector buffer dma address (AP-W, VPU-R) + * @dec : decode information (AP-R, VPU-W) + * @pic : picture information (AP-R, VPU-W) + * @crop : crop information (AP-R, VPU-W) + * @h264_slice_params : the parameters that hardware use to decode + */ +struct vdec_h264_vsi { + u64 pred_buf_dma; + u64 mv_buf_dma[H264_MAX_MV_NUM]; + struct vdec_h264_dec_info dec; + struct vdec_pic_info pic; + struct v4l2_rect crop; + struct mtk_h264_dec_slice_param h264_slice_params; +}; + +/** + * struct vdec_h264_slice_inst - h264 decoder instance + * @num_nalu : how many nalus be decoded + * @ctx : point to mtk_vcodec_dec_ctx + * @pred_buf : HW working predication buffer + * @mv_buf : HW working motion vector buffer + * @vpu : VPU instance + * @vsi_ctx : Local VSI data for this decoding context + * @h264_slice_param : the parameters that hardware use to decode + * @dpb : decoded picture buffer used to store reference buffer information + */ +struct vdec_h264_slice_inst { + unsigned int num_nalu; + struct mtk_vcodec_dec_ctx *ctx; + struct mtk_vcodec_mem pred_buf; + struct mtk_vcodec_mem mv_buf[H264_MAX_MV_NUM]; + struct vdec_vpu_inst vpu; + struct vdec_h264_vsi vsi_ctx; + struct mtk_h264_dec_slice_param h264_slice_param; + + struct v4l2_h264_dpb_entry dpb[16]; +}; + +static int get_vdec_decode_parameters(struct vdec_h264_slice_inst *inst) +{ + const struct v4l2_ctrl_h264_decode_params *dec_params; + const struct v4l2_ctrl_h264_sps *sps; + const struct v4l2_ctrl_h264_pps *pps; + const struct v4l2_ctrl_h264_scaling_matrix *scaling_matrix; + struct mtk_h264_dec_slice_param *slice_param = &inst->h264_slice_param; + struct v4l2_h264_reflist_builder reflist_builder; + struct v4l2_h264_reference v4l2_p0_reflist[V4L2_H264_REF_LIST_LEN]; + struct v4l2_h264_reference v4l2_b0_reflist[V4L2_H264_REF_LIST_LEN]; + struct v4l2_h264_reference v4l2_b1_reflist[V4L2_H264_REF_LIST_LEN]; + u8 *p0_reflist = slice_param->decode_params.ref_pic_list_p0; + u8 *b0_reflist = slice_param->decode_params.ref_pic_list_b0; + u8 *b1_reflist = slice_param->decode_params.ref_pic_list_b1; + + dec_params = + mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_DECODE_PARAMS); + if (IS_ERR(dec_params)) + return PTR_ERR(dec_params); + + sps = mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_SPS); + if (IS_ERR(sps)) + return PTR_ERR(sps); + + pps = mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_PPS); + if (IS_ERR(pps)) + return PTR_ERR(pps); + + scaling_matrix = + mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_SCALING_MATRIX); + if (IS_ERR(scaling_matrix)) + return PTR_ERR(scaling_matrix); + + mtk_vdec_h264_update_dpb(dec_params, inst->dpb); + + mtk_vdec_h264_copy_sps_params(&slice_param->sps, sps); + mtk_vdec_h264_copy_pps_params(&slice_param->pps, pps); + mtk_vdec_h264_copy_scaling_matrix(&slice_param->scaling_matrix, scaling_matrix); + mtk_vdec_h264_copy_decode_params(&slice_param->decode_params, + dec_params, inst->dpb); + mtk_vdec_h264_fill_dpb_info(inst->ctx, &slice_param->decode_params, + slice_param->h264_dpb_info); + + /* Build the reference lists */ + v4l2_h264_init_reflist_builder(&reflist_builder, dec_params, sps, + inst->dpb); + v4l2_h264_build_p_ref_list(&reflist_builder, v4l2_p0_reflist); + v4l2_h264_build_b_ref_lists(&reflist_builder, v4l2_b0_reflist, + v4l2_b1_reflist); + + /* Adapt the built lists to the firmware's expectations */ + mtk_vdec_h264_get_ref_list(p0_reflist, v4l2_p0_reflist, reflist_builder.num_valid); + mtk_vdec_h264_get_ref_list(b0_reflist, v4l2_b0_reflist, reflist_builder.num_valid); + mtk_vdec_h264_get_ref_list(b1_reflist, v4l2_b1_reflist, reflist_builder.num_valid); + + memcpy(&inst->vsi_ctx.h264_slice_params, slice_param, + sizeof(inst->vsi_ctx.h264_slice_params)); + + return 0; +} + +static int allocate_predication_buf(struct vdec_h264_slice_inst *inst) +{ + int err; + + inst->pred_buf.size = BUF_PREDICTION_SZ; + err = mtk_vcodec_mem_alloc(inst->ctx, &inst->pred_buf); + if (err) { + mtk_vdec_err(inst->ctx, "failed to allocate ppl buf"); + return err; + } + + inst->vsi_ctx.pred_buf_dma = inst->pred_buf.dma_addr; + return 0; +} + +static void free_predication_buf(struct vdec_h264_slice_inst *inst) +{ + struct mtk_vcodec_mem *mem = &inst->pred_buf; + + inst->vsi_ctx.pred_buf_dma = 0; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); +} + +static int alloc_mv_buf(struct vdec_h264_slice_inst *inst, + struct vdec_pic_info *pic) +{ + int i; + int err; + struct mtk_vcodec_mem *mem = NULL; + unsigned int buf_sz = mtk_vdec_h264_get_mv_buf_size(pic->buf_w, pic->buf_h); + + mtk_v4l2_vdec_dbg(3, inst->ctx, "size = 0x%x", buf_sz); + for (i = 0; i < H264_MAX_MV_NUM; i++) { + mem = &inst->mv_buf[i]; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + mem->size = buf_sz; + err = mtk_vcodec_mem_alloc(inst->ctx, mem); + if (err) { + mtk_vdec_err(inst->ctx, "failed to allocate mv buf"); + return err; + } + inst->vsi_ctx.mv_buf_dma[i] = mem->dma_addr; + } + + return 0; +} + +static void free_mv_buf(struct vdec_h264_slice_inst *inst) +{ + int i; + struct mtk_vcodec_mem *mem; + + for (i = 0; i < H264_MAX_MV_NUM; i++) { + inst->vsi_ctx.mv_buf_dma[i] = 0; + mem = &inst->mv_buf[i]; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + } +} + +static void get_pic_info(struct vdec_h264_slice_inst *inst, + struct vdec_pic_info *pic) +{ + struct mtk_vcodec_dec_ctx *ctx = inst->ctx; + + ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, VCODEC_DEC_ALIGNED_64); + ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, VCODEC_DEC_ALIGNED_64); + ctx->picinfo.fb_sz[0] = ctx->picinfo.buf_w * ctx->picinfo.buf_h; + ctx->picinfo.fb_sz[1] = ctx->picinfo.fb_sz[0] >> 1; + inst->vsi_ctx.dec.cap_num_planes = + ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes; + + *pic = ctx->picinfo; + mtk_vdec_debug(inst->ctx, "pic(%d, %d), buf(%d, %d)", + ctx->picinfo.pic_w, ctx->picinfo.pic_h, + ctx->picinfo.buf_w, ctx->picinfo.buf_h); + mtk_vdec_debug(inst->ctx, "Y/C(%d, %d)", ctx->picinfo.fb_sz[0], + ctx->picinfo.fb_sz[1]); + + if (ctx->last_decoded_picinfo.pic_w != ctx->picinfo.pic_w || + ctx->last_decoded_picinfo.pic_h != ctx->picinfo.pic_h) { + inst->vsi_ctx.dec.resolution_changed = true; + if (ctx->last_decoded_picinfo.buf_w != ctx->picinfo.buf_w || + ctx->last_decoded_picinfo.buf_h != ctx->picinfo.buf_h) + inst->vsi_ctx.dec.realloc_mv_buf = true; + + mtk_v4l2_vdec_dbg(1, inst->ctx, "ResChg: (%d %d) : old(%d, %d) -> new(%d, %d)", + inst->vsi_ctx.dec.resolution_changed, + inst->vsi_ctx.dec.realloc_mv_buf, + ctx->last_decoded_picinfo.pic_w, + ctx->last_decoded_picinfo.pic_h, + ctx->picinfo.pic_w, ctx->picinfo.pic_h); + } +} + +static void get_crop_info(struct vdec_h264_slice_inst *inst, struct v4l2_rect *cr) +{ + cr->left = inst->vsi_ctx.crop.left; + cr->top = inst->vsi_ctx.crop.top; + cr->width = inst->vsi_ctx.crop.width; + cr->height = inst->vsi_ctx.crop.height; + + mtk_vdec_debug(inst->ctx, "l=%d, t=%d, w=%d, h=%d", + cr->left, cr->top, cr->width, cr->height); +} + +static void get_dpb_size(struct vdec_h264_slice_inst *inst, unsigned int *dpb_sz) +{ + *dpb_sz = inst->vsi_ctx.dec.dpb_sz; + mtk_vdec_debug(inst->ctx, "sz=%d", *dpb_sz); +} + +static int vdec_h264_slice_init(struct mtk_vcodec_dec_ctx *ctx) +{ + struct vdec_h264_slice_inst *inst; + int err; + + inst = kzalloc(sizeof(*inst), GFP_KERNEL); + if (!inst) + return -ENOMEM; + + inst->ctx = ctx; + + inst->vpu.id = SCP_IPI_VDEC_H264; + inst->vpu.ctx = ctx; + + err = vpu_dec_init(&inst->vpu); + if (err) { + mtk_vdec_err(ctx, "vdec_h264 init err=%d", err); + goto error_free_inst; + } + + memcpy(&inst->vsi_ctx, inst->vpu.vsi, sizeof(inst->vsi_ctx)); + inst->vsi_ctx.dec.resolution_changed = true; + inst->vsi_ctx.dec.realloc_mv_buf = true; + + err = allocate_predication_buf(inst); + if (err) + goto error_deinit; + + mtk_vdec_debug(ctx, "struct size = %zu,%zu,%zu,%zu\n", + sizeof(struct mtk_h264_sps_param), + sizeof(struct mtk_h264_pps_param), + sizeof(struct mtk_h264_dec_slice_param), + sizeof(struct mtk_h264_dpb_info)); + + mtk_vdec_debug(ctx, "H264 Instance >> %p", inst); + + ctx->drv_handle = inst; + return 0; + +error_deinit: + vpu_dec_deinit(&inst->vpu); + +error_free_inst: + kfree(inst); + return err; +} + +static void vdec_h264_slice_deinit(void *h_vdec) +{ + struct vdec_h264_slice_inst *inst = h_vdec; + + vpu_dec_deinit(&inst->vpu); + free_predication_buf(inst); + free_mv_buf(inst); + + kfree(inst); +} + +static int vdec_h264_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *unused, bool *res_chg) +{ + struct vdec_h264_slice_inst *inst = h_vdec; + const struct v4l2_ctrl_h264_decode_params *dec_params = + mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_DECODE_PARAMS); + struct vdec_vpu_inst *vpu = &inst->vpu; + struct mtk_video_dec_buf *src_buf_info; + struct mtk_video_dec_buf *dst_buf_info; + struct vdec_fb *fb; + u32 data[2]; + u64 y_fb_dma; + u64 c_fb_dma; + int err; + + inst->num_nalu++; + /* bs NULL means flush decoder */ + if (!bs) + return vpu_dec_reset(vpu); + + fb = inst->ctx->dev->vdec_pdata->get_cap_buffer(inst->ctx); + src_buf_info = container_of(bs, struct mtk_video_dec_buf, bs_buffer); + dst_buf_info = container_of(fb, struct mtk_video_dec_buf, frame_buffer); + + y_fb_dma = fb ? (u64)fb->base_y.dma_addr : 0; + c_fb_dma = fb ? (u64)fb->base_c.dma_addr : 0; + + mtk_vdec_debug(inst->ctx, "+ [%d] FB y_dma=%llx c_dma=%llx va=%p", + inst->num_nalu, y_fb_dma, c_fb_dma, fb); + + inst->vsi_ctx.dec.bs_dma = (uint64_t)bs->dma_addr; + inst->vsi_ctx.dec.y_fb_dma = y_fb_dma; + inst->vsi_ctx.dec.c_fb_dma = c_fb_dma; + inst->vsi_ctx.dec.vdec_fb_va = (u64)(uintptr_t)fb; + + v4l2_m2m_buf_copy_metadata(&src_buf_info->m2m_buf.vb, + &dst_buf_info->m2m_buf.vb, true); + err = get_vdec_decode_parameters(inst); + if (err) + goto err_free_fb_out; + + data[0] = bs->size; + /* + * Reconstruct the first byte of the NAL unit, as the firmware requests + * that information to be passed even though it is present in the stream + * itself... + */ + data[1] = (dec_params->nal_ref_idc << 5) | + ((dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC) + ? 0x5 : 0x1); + + *res_chg = inst->vsi_ctx.dec.resolution_changed; + if (*res_chg) { + mtk_vdec_debug(inst->ctx, "- resolution changed -"); + if (inst->vsi_ctx.dec.realloc_mv_buf) { + err = alloc_mv_buf(inst, &inst->ctx->picinfo); + inst->vsi_ctx.dec.realloc_mv_buf = false; + if (err) + goto err_free_fb_out; + } + *res_chg = false; + } + + memcpy(inst->vpu.vsi, &inst->vsi_ctx, sizeof(inst->vsi_ctx)); + err = vpu_dec_start(vpu, data, 2); + if (err) + goto err_free_fb_out; + + /* wait decoder done interrupt */ + err = mtk_vcodec_wait_for_done_ctx(inst->ctx, + MTK_INST_IRQ_RECEIVED, + WAIT_INTR_TIMEOUT_MS, 0); + if (err) + goto err_free_fb_out; + vpu_dec_end(vpu); + + memcpy(&inst->vsi_ctx, inst->vpu.vsi, sizeof(inst->vsi_ctx)); + mtk_vdec_debug(inst->ctx, "\n - NALU[%d]", inst->num_nalu); + return 0; + +err_free_fb_out: + mtk_vdec_err(inst->ctx, "\n - NALU[%d] err=%d -\n", inst->num_nalu, err); + return err; +} + +static int vdec_h264_slice_get_param(void *h_vdec, enum vdec_get_param_type type, void *out) +{ + struct vdec_h264_slice_inst *inst = h_vdec; + + switch (type) { + case GET_PARAM_PIC_INFO: + get_pic_info(inst, out); + break; + + case GET_PARAM_DPB_SIZE: + get_dpb_size(inst, out); + break; + + case GET_PARAM_CROP_INFO: + get_crop_info(inst, out); + break; + + default: + mtk_vdec_err(inst->ctx, "invalid get parameter type=%d", type); + return -EINVAL; + } + + return 0; +} + +const struct vdec_common_if vdec_h264_slice_if = { + .init = vdec_h264_slice_init, + .decode = vdec_h264_slice_decode, + .get_param = vdec_h264_slice_get_param, + .deinit = vdec_h264_slice_deinit, +}; diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_multi_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_multi_if.c new file mode 100644 index 0000000000..0e741e0dc8 --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_multi_if.c @@ -0,0 +1,848 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2022 MediaTek Inc. + * Author: Yunfei Dong <yunfei.dong@mediatek.com> + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <media/v4l2-h264.h> +#include <media/v4l2-mem2mem.h> +#include <media/videobuf2-dma-contig.h> + +#include "../mtk_vcodec_dec.h" +#include "../../common/mtk_vcodec_intr.h" +#include "../vdec_drv_base.h" +#include "../vdec_drv_if.h" +#include "../vdec_vpu_if.h" +#include "vdec_h264_req_common.h" + +/** + * enum vdec_h264_core_dec_err_type - core decode error type + * + * @TRANS_BUFFER_FULL: trans buffer is full + * @SLICE_HEADER_FULL: slice header buffer is full + */ +enum vdec_h264_core_dec_err_type { + TRANS_BUFFER_FULL = 1, + SLICE_HEADER_FULL, +}; + +/** + * struct vdec_h264_slice_lat_dec_param - parameters for decode current frame + * + * @sps: h264 sps syntax parameters + * @pps: h264 pps syntax parameters + * @slice_header: h264 slice header syntax parameters + * @scaling_matrix: h264 scaling list parameters + * @decode_params: decoder parameters of each frame used for hardware decode + * @h264_dpb_info: dpb reference list + */ +struct vdec_h264_slice_lat_dec_param { + struct mtk_h264_sps_param sps; + struct mtk_h264_pps_param pps; + struct mtk_h264_slice_hd_param slice_header; + struct slice_api_h264_scaling_matrix scaling_matrix; + struct slice_api_h264_decode_param decode_params; + struct mtk_h264_dpb_info h264_dpb_info[V4L2_H264_NUM_DPB_ENTRIES]; +}; + +/** + * struct vdec_h264_slice_info - decode information + * + * @nal_info: nal info of current picture + * @timeout: Decode timeout: 1 timeout, 0 no timeount + * @bs_buf_size: bitstream size + * @bs_buf_addr: bitstream buffer dma address + * @y_fb_dma: Y frame buffer dma address + * @c_fb_dma: C frame buffer dma address + * @vdec_fb_va: VDEC frame buffer struct virtual address + * @crc: Used to check whether hardware's status is right + */ +struct vdec_h264_slice_info { + u16 nal_info; + u16 timeout; + u32 bs_buf_size; + u64 bs_buf_addr; + u64 y_fb_dma; + u64 c_fb_dma; + u64 vdec_fb_va; + u32 crc[8]; +}; + +/** + * struct vdec_h264_slice_vsi - shared memory for decode information exchange + * between SCP and Host. + * + * @wdma_err_addr: wdma error dma address + * @wdma_start_addr: wdma start dma address + * @wdma_end_addr: wdma end dma address + * @slice_bc_start_addr: slice bc start dma address + * @slice_bc_end_addr: slice bc end dma address + * @row_info_start_addr: row info start dma address + * @row_info_end_addr: row info end dma address + * @trans_start: trans start dma address + * @trans_end: trans end dma address + * @wdma_end_addr_offset: wdma end address offset + * + * @mv_buf_dma: HW working motion vector buffer + * dma address (AP-W, VPU-R) + * @dec: decode information (AP-R, VPU-W) + * @h264_slice_params: decode parameters for hw used + */ +struct vdec_h264_slice_vsi { + /* LAT dec addr */ + u64 wdma_err_addr; + u64 wdma_start_addr; + u64 wdma_end_addr; + u64 slice_bc_start_addr; + u64 slice_bc_end_addr; + u64 row_info_start_addr; + u64 row_info_end_addr; + u64 trans_start; + u64 trans_end; + u64 wdma_end_addr_offset; + + u64 mv_buf_dma[H264_MAX_MV_NUM]; + struct vdec_h264_slice_info dec; + struct vdec_h264_slice_lat_dec_param h264_slice_params; +}; + +/** + * struct vdec_h264_slice_share_info - shared information used to exchange + * message between lat and core + * + * @sps: sequence header information from user space + * @dec_params: decoder params from user space + * @h264_slice_params: decoder params used for hardware + * @trans_start: trans start dma address + * @trans_end: trans end dma address + * @nal_info: nal info of current picture + */ +struct vdec_h264_slice_share_info { + struct v4l2_ctrl_h264_sps sps; + struct v4l2_ctrl_h264_decode_params dec_params; + struct vdec_h264_slice_lat_dec_param h264_slice_params; + u64 trans_start; + u64 trans_end; + u16 nal_info; +}; + +/** + * struct vdec_h264_slice_inst - h264 decoder instance + * + * @slice_dec_num: how many picture be decoded + * @ctx: point to mtk_vcodec_dec_ctx + * @pred_buf: HW working predication buffer + * @mv_buf: HW working motion vector buffer + * @vpu: VPU instance + * @vsi: vsi used for lat + * @vsi_core: vsi used for core + * + * @vsi_ctx: Local VSI data for this decoding context + * @h264_slice_param: the parameters that hardware use to decode + * + * @resolution_changed:resolution changed + * @realloc_mv_buf: reallocate mv buffer + * @cap_num_planes: number of capture queue plane + * + * @dpb: decoded picture buffer used to store reference + * buffer information + *@is_field_bitstream: is field bitstream + */ +struct vdec_h264_slice_inst { + unsigned int slice_dec_num; + struct mtk_vcodec_dec_ctx *ctx; + struct mtk_vcodec_mem pred_buf; + struct mtk_vcodec_mem mv_buf[H264_MAX_MV_NUM]; + struct vdec_vpu_inst vpu; + struct vdec_h264_slice_vsi *vsi; + struct vdec_h264_slice_vsi *vsi_core; + + struct vdec_h264_slice_vsi vsi_ctx; + struct vdec_h264_slice_lat_dec_param h264_slice_param; + + unsigned int resolution_changed; + unsigned int realloc_mv_buf; + unsigned int cap_num_planes; + + struct v4l2_h264_dpb_entry dpb[16]; + bool is_field_bitstream; +}; + +static int vdec_h264_slice_fill_decode_parameters(struct vdec_h264_slice_inst *inst, + struct vdec_h264_slice_share_info *share_info) +{ + struct vdec_h264_slice_lat_dec_param *slice_param = &inst->vsi->h264_slice_params; + const struct v4l2_ctrl_h264_decode_params *dec_params; + const struct v4l2_ctrl_h264_scaling_matrix *src_matrix; + const struct v4l2_ctrl_h264_sps *sps; + const struct v4l2_ctrl_h264_pps *pps; + + dec_params = + mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_DECODE_PARAMS); + if (IS_ERR(dec_params)) + return PTR_ERR(dec_params); + + src_matrix = + mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_SCALING_MATRIX); + if (IS_ERR(src_matrix)) + return PTR_ERR(src_matrix); + + sps = mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_SPS); + if (IS_ERR(sps)) + return PTR_ERR(sps); + + pps = mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_PPS); + if (IS_ERR(pps)) + return PTR_ERR(pps); + + if (dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) { + mtk_vdec_err(inst->ctx, "No support for H.264 field decoding."); + inst->is_field_bitstream = true; + return -EINVAL; + } + + mtk_vdec_h264_copy_sps_params(&slice_param->sps, sps); + mtk_vdec_h264_copy_pps_params(&slice_param->pps, pps); + mtk_vdec_h264_copy_scaling_matrix(&slice_param->scaling_matrix, src_matrix); + + memcpy(&share_info->sps, sps, sizeof(*sps)); + memcpy(&share_info->dec_params, dec_params, sizeof(*dec_params)); + + return 0; +} + +static int get_vdec_sig_decode_parameters(struct vdec_h264_slice_inst *inst) +{ + const struct v4l2_ctrl_h264_decode_params *dec_params; + const struct v4l2_ctrl_h264_sps *sps; + const struct v4l2_ctrl_h264_pps *pps; + const struct v4l2_ctrl_h264_scaling_matrix *scaling_matrix; + struct vdec_h264_slice_lat_dec_param *slice_param = &inst->h264_slice_param; + struct v4l2_h264_reflist_builder reflist_builder; + struct v4l2_h264_reference v4l2_p0_reflist[V4L2_H264_REF_LIST_LEN]; + struct v4l2_h264_reference v4l2_b0_reflist[V4L2_H264_REF_LIST_LEN]; + struct v4l2_h264_reference v4l2_b1_reflist[V4L2_H264_REF_LIST_LEN]; + u8 *p0_reflist = slice_param->decode_params.ref_pic_list_p0; + u8 *b0_reflist = slice_param->decode_params.ref_pic_list_b0; + u8 *b1_reflist = slice_param->decode_params.ref_pic_list_b1; + + dec_params = + mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_DECODE_PARAMS); + if (IS_ERR(dec_params)) + return PTR_ERR(dec_params); + + sps = mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_SPS); + if (IS_ERR(sps)) + return PTR_ERR(sps); + + pps = mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_PPS); + if (IS_ERR(pps)) + return PTR_ERR(pps); + + scaling_matrix = + mtk_vdec_h264_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_H264_SCALING_MATRIX); + if (IS_ERR(scaling_matrix)) + return PTR_ERR(scaling_matrix); + + mtk_vdec_h264_update_dpb(dec_params, inst->dpb); + + mtk_vdec_h264_copy_sps_params(&slice_param->sps, sps); + mtk_vdec_h264_copy_pps_params(&slice_param->pps, pps); + mtk_vdec_h264_copy_scaling_matrix(&slice_param->scaling_matrix, scaling_matrix); + + mtk_vdec_h264_copy_decode_params(&slice_param->decode_params, dec_params, inst->dpb); + mtk_vdec_h264_fill_dpb_info(inst->ctx, &slice_param->decode_params, + slice_param->h264_dpb_info); + + /* Build the reference lists */ + v4l2_h264_init_reflist_builder(&reflist_builder, dec_params, sps, inst->dpb); + v4l2_h264_build_p_ref_list(&reflist_builder, v4l2_p0_reflist); + v4l2_h264_build_b_ref_lists(&reflist_builder, v4l2_b0_reflist, v4l2_b1_reflist); + + /* Adapt the built lists to the firmware's expectations */ + mtk_vdec_h264_get_ref_list(p0_reflist, v4l2_p0_reflist, reflist_builder.num_valid); + mtk_vdec_h264_get_ref_list(b0_reflist, v4l2_b0_reflist, reflist_builder.num_valid); + mtk_vdec_h264_get_ref_list(b1_reflist, v4l2_b1_reflist, reflist_builder.num_valid); + + memcpy(&inst->vsi_ctx.h264_slice_params, slice_param, + sizeof(inst->vsi_ctx.h264_slice_params)); + + return 0; +} + +static void vdec_h264_slice_fill_decode_reflist(struct vdec_h264_slice_inst *inst, + struct vdec_h264_slice_lat_dec_param *slice_param, + struct vdec_h264_slice_share_info *share_info) +{ + struct v4l2_ctrl_h264_decode_params *dec_params = &share_info->dec_params; + struct v4l2_ctrl_h264_sps *sps = &share_info->sps; + struct v4l2_h264_reflist_builder reflist_builder; + struct v4l2_h264_reference v4l2_p0_reflist[V4L2_H264_REF_LIST_LEN]; + struct v4l2_h264_reference v4l2_b0_reflist[V4L2_H264_REF_LIST_LEN]; + struct v4l2_h264_reference v4l2_b1_reflist[V4L2_H264_REF_LIST_LEN]; + u8 *p0_reflist = slice_param->decode_params.ref_pic_list_p0; + u8 *b0_reflist = slice_param->decode_params.ref_pic_list_b0; + u8 *b1_reflist = slice_param->decode_params.ref_pic_list_b1; + + mtk_vdec_h264_update_dpb(dec_params, inst->dpb); + + mtk_vdec_h264_copy_decode_params(&slice_param->decode_params, dec_params, + inst->dpb); + mtk_vdec_h264_fill_dpb_info(inst->ctx, &slice_param->decode_params, + slice_param->h264_dpb_info); + + mtk_v4l2_vdec_dbg(3, inst->ctx, "cur poc = %d\n", dec_params->bottom_field_order_cnt); + /* Build the reference lists */ + v4l2_h264_init_reflist_builder(&reflist_builder, dec_params, sps, + inst->dpb); + v4l2_h264_build_p_ref_list(&reflist_builder, v4l2_p0_reflist); + v4l2_h264_build_b_ref_lists(&reflist_builder, v4l2_b0_reflist, v4l2_b1_reflist); + + /* Adapt the built lists to the firmware's expectations */ + mtk_vdec_h264_get_ref_list(p0_reflist, v4l2_p0_reflist, reflist_builder.num_valid); + mtk_vdec_h264_get_ref_list(b0_reflist, v4l2_b0_reflist, reflist_builder.num_valid); + mtk_vdec_h264_get_ref_list(b1_reflist, v4l2_b1_reflist, reflist_builder.num_valid); +} + +static int vdec_h264_slice_alloc_mv_buf(struct vdec_h264_slice_inst *inst, + struct vdec_pic_info *pic) +{ + unsigned int buf_sz = mtk_vdec_h264_get_mv_buf_size(pic->buf_w, pic->buf_h); + struct mtk_vcodec_mem *mem; + int i, err; + + mtk_v4l2_vdec_dbg(3, inst->ctx, "size = 0x%x", buf_sz); + for (i = 0; i < H264_MAX_MV_NUM; i++) { + mem = &inst->mv_buf[i]; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + mem->size = buf_sz; + err = mtk_vcodec_mem_alloc(inst->ctx, mem); + if (err) { + mtk_vdec_err(inst->ctx, "failed to allocate mv buf"); + return err; + } + } + + return 0; +} + +static void vdec_h264_slice_free_mv_buf(struct vdec_h264_slice_inst *inst) +{ + int i; + struct mtk_vcodec_mem *mem; + + for (i = 0; i < H264_MAX_MV_NUM; i++) { + mem = &inst->mv_buf[i]; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + } +} + +static void vdec_h264_slice_get_pic_info(struct vdec_h264_slice_inst *inst) +{ + struct mtk_vcodec_dec_ctx *ctx = inst->ctx; + u32 data[3]; + + data[0] = ctx->picinfo.pic_w; + data[1] = ctx->picinfo.pic_h; + data[2] = ctx->capture_fourcc; + vpu_dec_get_param(&inst->vpu, data, 3, GET_PARAM_PIC_INFO); + + ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, VCODEC_DEC_ALIGNED_64); + ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, VCODEC_DEC_ALIGNED_64); + ctx->picinfo.fb_sz[0] = inst->vpu.fb_sz[0]; + ctx->picinfo.fb_sz[1] = inst->vpu.fb_sz[1]; + inst->cap_num_planes = + ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes; + + mtk_vdec_debug(ctx, "pic(%d, %d), buf(%d, %d)", + ctx->picinfo.pic_w, ctx->picinfo.pic_h, + ctx->picinfo.buf_w, ctx->picinfo.buf_h); + mtk_vdec_debug(ctx, "Y/C(%d, %d)", ctx->picinfo.fb_sz[0], + ctx->picinfo.fb_sz[1]); + + if (ctx->last_decoded_picinfo.pic_w != ctx->picinfo.pic_w || + ctx->last_decoded_picinfo.pic_h != ctx->picinfo.pic_h) { + inst->resolution_changed = true; + if (ctx->last_decoded_picinfo.buf_w != ctx->picinfo.buf_w || + ctx->last_decoded_picinfo.buf_h != ctx->picinfo.buf_h) + inst->realloc_mv_buf = true; + + mtk_v4l2_vdec_dbg(1, inst->ctx, "resChg: (%d %d) : old(%d, %d) -> new(%d, %d)", + inst->resolution_changed, + inst->realloc_mv_buf, + ctx->last_decoded_picinfo.pic_w, + ctx->last_decoded_picinfo.pic_h, + ctx->picinfo.pic_w, ctx->picinfo.pic_h); + } +} + +static void vdec_h264_slice_get_crop_info(struct vdec_h264_slice_inst *inst, + struct v4l2_rect *cr) +{ + cr->left = 0; + cr->top = 0; + cr->width = inst->ctx->picinfo.pic_w; + cr->height = inst->ctx->picinfo.pic_h; + + mtk_vdec_debug(inst->ctx, "l=%d, t=%d, w=%d, h=%d", + cr->left, cr->top, cr->width, cr->height); +} + +static int vdec_h264_slice_init(struct mtk_vcodec_dec_ctx *ctx) +{ + struct vdec_h264_slice_inst *inst; + int err, vsi_size; + + inst = kzalloc(sizeof(*inst), GFP_KERNEL); + if (!inst) + return -ENOMEM; + + inst->ctx = ctx; + + inst->vpu.id = SCP_IPI_VDEC_LAT; + inst->vpu.core_id = SCP_IPI_VDEC_CORE; + inst->vpu.ctx = ctx; + inst->vpu.codec_type = ctx->current_codec; + inst->vpu.capture_type = ctx->capture_fourcc; + + err = vpu_dec_init(&inst->vpu); + if (err) { + mtk_vdec_err(ctx, "vdec_h264 init err=%d", err); + goto error_free_inst; + } + + vsi_size = round_up(sizeof(struct vdec_h264_slice_vsi), VCODEC_DEC_ALIGNED_64); + inst->vsi = inst->vpu.vsi; + inst->vsi_core = + (struct vdec_h264_slice_vsi *)(((char *)inst->vpu.vsi) + vsi_size); + inst->resolution_changed = true; + inst->realloc_mv_buf = true; + + mtk_vdec_debug(ctx, "lat struct size = %d,%d,%d,%d vsi: %d\n", + (int)sizeof(struct mtk_h264_sps_param), + (int)sizeof(struct mtk_h264_pps_param), + (int)sizeof(struct vdec_h264_slice_lat_dec_param), + (int)sizeof(struct mtk_h264_dpb_info), + vsi_size); + mtk_vdec_debug(ctx, "lat H264 instance >> %p, codec_type = 0x%x", + inst, inst->vpu.codec_type); + + ctx->drv_handle = inst; + return 0; + +error_free_inst: + kfree(inst); + return err; +} + +static void vdec_h264_slice_deinit(void *h_vdec) +{ + struct vdec_h264_slice_inst *inst = h_vdec; + + vpu_dec_deinit(&inst->vpu); + vdec_h264_slice_free_mv_buf(inst); + vdec_msg_queue_deinit(&inst->ctx->msg_queue, inst->ctx); + + kfree(inst); +} + +static int vdec_h264_slice_core_decode(struct vdec_lat_buf *lat_buf) +{ + struct vdec_fb *fb; + u64 vdec_fb_va; + u64 y_fb_dma, c_fb_dma; + int err, timeout, i; + struct mtk_vcodec_dec_ctx *ctx = lat_buf->ctx; + struct vdec_h264_slice_inst *inst = ctx->drv_handle; + struct vb2_v4l2_buffer *vb2_v4l2; + struct vdec_h264_slice_share_info *share_info = lat_buf->private_data; + struct mtk_vcodec_mem *mem; + struct vdec_vpu_inst *vpu = &inst->vpu; + + mtk_vdec_debug(ctx, "[h264-core] vdec_h264 core decode"); + memcpy(&inst->vsi_core->h264_slice_params, &share_info->h264_slice_params, + sizeof(share_info->h264_slice_params)); + + fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx); + if (!fb) { + err = -EBUSY; + mtk_vdec_err(ctx, "fb buffer is NULL"); + goto vdec_dec_end; + } + + vdec_fb_va = (unsigned long)fb; + y_fb_dma = (u64)fb->base_y.dma_addr; + if (ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 1) + c_fb_dma = + y_fb_dma + inst->ctx->picinfo.buf_w * inst->ctx->picinfo.buf_h; + else + c_fb_dma = (u64)fb->base_c.dma_addr; + + mtk_vdec_debug(ctx, "[h264-core] y/c addr = 0x%llx 0x%llx", y_fb_dma, c_fb_dma); + + inst->vsi_core->dec.y_fb_dma = y_fb_dma; + inst->vsi_core->dec.c_fb_dma = c_fb_dma; + inst->vsi_core->dec.vdec_fb_va = vdec_fb_va; + inst->vsi_core->dec.nal_info = share_info->nal_info; + inst->vsi_core->wdma_start_addr = + lat_buf->ctx->msg_queue.wdma_addr.dma_addr; + inst->vsi_core->wdma_end_addr = + lat_buf->ctx->msg_queue.wdma_addr.dma_addr + + lat_buf->ctx->msg_queue.wdma_addr.size; + inst->vsi_core->wdma_err_addr = lat_buf->wdma_err_addr.dma_addr; + inst->vsi_core->slice_bc_start_addr = lat_buf->slice_bc_addr.dma_addr; + inst->vsi_core->slice_bc_end_addr = lat_buf->slice_bc_addr.dma_addr + + lat_buf->slice_bc_addr.size; + inst->vsi_core->trans_start = share_info->trans_start; + inst->vsi_core->trans_end = share_info->trans_end; + for (i = 0; i < H264_MAX_MV_NUM; i++) { + mem = &inst->mv_buf[i]; + inst->vsi_core->mv_buf_dma[i] = mem->dma_addr; + } + + vb2_v4l2 = v4l2_m2m_next_dst_buf(ctx->m2m_ctx); + v4l2_m2m_buf_copy_metadata(&lat_buf->ts_info, vb2_v4l2, true); + + vdec_h264_slice_fill_decode_reflist(inst, &inst->vsi_core->h264_slice_params, + share_info); + + err = vpu_dec_core(vpu); + if (err) { + mtk_vdec_err(ctx, "core decode err=%d", err); + goto vdec_dec_end; + } + + /* wait decoder done interrupt */ + timeout = mtk_vcodec_wait_for_done_ctx(inst->ctx, MTK_INST_IRQ_RECEIVED, + WAIT_INTR_TIMEOUT_MS, MTK_VDEC_CORE); + if (timeout) + mtk_vdec_err(ctx, "core decode timeout: pic_%d", ctx->decoded_frame_cnt); + inst->vsi_core->dec.timeout = !!timeout; + + vpu_dec_core_end(vpu); + mtk_vdec_debug(ctx, "pic[%d] crc: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x", + ctx->decoded_frame_cnt, + inst->vsi_core->dec.crc[0], inst->vsi_core->dec.crc[1], + inst->vsi_core->dec.crc[2], inst->vsi_core->dec.crc[3], + inst->vsi_core->dec.crc[4], inst->vsi_core->dec.crc[5], + inst->vsi_core->dec.crc[6], inst->vsi_core->dec.crc[7]); + +vdec_dec_end: + vdec_msg_queue_update_ube_rptr(&lat_buf->ctx->msg_queue, share_info->trans_end); + ctx->dev->vdec_pdata->cap_to_disp(ctx, !!err, lat_buf->src_buf_req); + mtk_vdec_debug(ctx, "core decode done err=%d", err); + ctx->decoded_frame_cnt++; + return 0; +} + +static void vdec_h264_insert_startcode(struct mtk_vcodec_dec_dev *vcodec_dev, unsigned char *buf, + size_t *bs_size, struct mtk_h264_pps_param *pps) +{ + struct device *dev = &vcodec_dev->plat_dev->dev; + + /* Need to add pending data at the end of bitstream when bs_sz is small than + * 20 bytes for cavlc bitstream, or lat will decode fail. This pending data is + * useful for mt8192 and mt8195 platform. + * + * cavlc bitstream when entropy_coding_mode_flag is false. + */ + if (pps->entropy_coding_mode_flag || *bs_size > 20 || + !(of_device_is_compatible(dev->of_node, "mediatek,mt8192-vcodec-dec") || + of_device_is_compatible(dev->of_node, "mediatek,mt8195-vcodec-dec"))) + return; + + buf[*bs_size] = 0; + buf[*bs_size + 1] = 0; + buf[*bs_size + 2] = 1; + buf[*bs_size + 3] = 0xff; + (*bs_size) += 4; +} + +static int vdec_h264_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *fb, bool *res_chg) +{ + struct vdec_h264_slice_inst *inst = h_vdec; + struct vdec_vpu_inst *vpu = &inst->vpu; + struct mtk_video_dec_buf *src_buf_info; + int nal_start_idx, err, timeout = 0, i; + unsigned int data[2]; + struct vdec_lat_buf *lat_buf; + struct vdec_h264_slice_share_info *share_info; + unsigned char *buf; + struct mtk_vcodec_mem *mem; + + if (vdec_msg_queue_init(&inst->ctx->msg_queue, inst->ctx, + vdec_h264_slice_core_decode, + sizeof(*share_info))) + return -ENOMEM; + + /* bs NULL means flush decoder */ + if (!bs) { + vdec_msg_queue_wait_lat_buf_full(&inst->ctx->msg_queue); + return vpu_dec_reset(vpu); + } + + if (inst->is_field_bitstream) + return -EINVAL; + + lat_buf = vdec_msg_queue_dqbuf(&inst->ctx->msg_queue.lat_ctx); + if (!lat_buf) { + mtk_vdec_debug(inst->ctx, "failed to get lat buffer"); + return -EAGAIN; + } + share_info = lat_buf->private_data; + src_buf_info = container_of(bs, struct mtk_video_dec_buf, bs_buffer); + + buf = (unsigned char *)bs->va; + nal_start_idx = mtk_vdec_h264_find_start_code(buf, bs->size); + if (nal_start_idx < 0) { + err = -EINVAL; + goto err_free_fb_out; + } + + inst->vsi->dec.nal_info = buf[nal_start_idx]; + lat_buf->src_buf_req = src_buf_info->m2m_buf.vb.vb2_buf.req_obj.req; + v4l2_m2m_buf_copy_metadata(&src_buf_info->m2m_buf.vb, &lat_buf->ts_info, true); + + err = vdec_h264_slice_fill_decode_parameters(inst, share_info); + if (err) + goto err_free_fb_out; + + vdec_h264_insert_startcode(inst->ctx->dev, buf, &bs->size, + &share_info->h264_slice_params.pps); + + inst->vsi->dec.bs_buf_addr = (uint64_t)bs->dma_addr; + inst->vsi->dec.bs_buf_size = bs->size; + + *res_chg = inst->resolution_changed; + if (inst->resolution_changed) { + mtk_vdec_debug(inst->ctx, "- resolution changed -"); + if (inst->realloc_mv_buf) { + err = vdec_h264_slice_alloc_mv_buf(inst, &inst->ctx->picinfo); + inst->realloc_mv_buf = false; + if (err) + goto err_free_fb_out; + } + inst->resolution_changed = false; + } + for (i = 0; i < H264_MAX_MV_NUM; i++) { + mem = &inst->mv_buf[i]; + inst->vsi->mv_buf_dma[i] = mem->dma_addr; + } + inst->vsi->wdma_start_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr; + inst->vsi->wdma_end_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr + + lat_buf->ctx->msg_queue.wdma_addr.size; + inst->vsi->wdma_err_addr = lat_buf->wdma_err_addr.dma_addr; + inst->vsi->slice_bc_start_addr = lat_buf->slice_bc_addr.dma_addr; + inst->vsi->slice_bc_end_addr = lat_buf->slice_bc_addr.dma_addr + + lat_buf->slice_bc_addr.size; + + inst->vsi->trans_end = inst->ctx->msg_queue.wdma_rptr_addr; + inst->vsi->trans_start = inst->ctx->msg_queue.wdma_wptr_addr; + mtk_vdec_debug(inst->ctx, "lat:trans(0x%llx 0x%llx) err:0x%llx", + inst->vsi->wdma_start_addr, + inst->vsi->wdma_end_addr, + inst->vsi->wdma_err_addr); + + mtk_vdec_debug(inst->ctx, "slice(0x%llx 0x%llx) rprt((0x%llx 0x%llx))", + inst->vsi->slice_bc_start_addr, + inst->vsi->slice_bc_end_addr, + inst->vsi->trans_start, + inst->vsi->trans_end); + err = vpu_dec_start(vpu, data, 2); + if (err) { + mtk_vdec_debug(inst->ctx, "lat decode err: %d", err); + goto err_free_fb_out; + } + + share_info->trans_end = inst->ctx->msg_queue.wdma_addr.dma_addr + + inst->vsi->wdma_end_addr_offset; + share_info->trans_start = inst->ctx->msg_queue.wdma_wptr_addr; + share_info->nal_info = inst->vsi->dec.nal_info; + + if (IS_VDEC_INNER_RACING(inst->ctx->dev->dec_capability)) { + memcpy(&share_info->h264_slice_params, &inst->vsi->h264_slice_params, + sizeof(share_info->h264_slice_params)); + vdec_msg_queue_qbuf(&inst->ctx->msg_queue.core_ctx, lat_buf); + } + + /* wait decoder done interrupt */ + timeout = mtk_vcodec_wait_for_done_ctx(inst->ctx, MTK_INST_IRQ_RECEIVED, + WAIT_INTR_TIMEOUT_MS, MTK_VDEC_LAT0); + if (timeout) + mtk_vdec_err(inst->ctx, "lat decode timeout: pic_%d", inst->slice_dec_num); + inst->vsi->dec.timeout = !!timeout; + + err = vpu_dec_end(vpu); + if (err == SLICE_HEADER_FULL || err == TRANS_BUFFER_FULL) { + if (!IS_VDEC_INNER_RACING(inst->ctx->dev->dec_capability)) + vdec_msg_queue_qbuf(&inst->ctx->msg_queue.lat_ctx, lat_buf); + inst->slice_dec_num++; + mtk_vdec_err(inst->ctx, "lat dec fail: pic_%d err:%d", inst->slice_dec_num, err); + return -EINVAL; + } + + share_info->trans_end = inst->ctx->msg_queue.wdma_addr.dma_addr + + inst->vsi->wdma_end_addr_offset; + vdec_msg_queue_update_ube_wptr(&lat_buf->ctx->msg_queue, share_info->trans_end); + + if (!IS_VDEC_INNER_RACING(inst->ctx->dev->dec_capability)) { + memcpy(&share_info->h264_slice_params, &inst->vsi->h264_slice_params, + sizeof(share_info->h264_slice_params)); + vdec_msg_queue_qbuf(&inst->ctx->msg_queue.core_ctx, lat_buf); + } + mtk_vdec_debug(inst->ctx, "dec num: %d lat crc: 0x%x 0x%x 0x%x", inst->slice_dec_num, + inst->vsi->dec.crc[0], inst->vsi->dec.crc[1], inst->vsi->dec.crc[2]); + + inst->slice_dec_num++; + return 0; +err_free_fb_out: + vdec_msg_queue_qbuf(&inst->ctx->msg_queue.lat_ctx, lat_buf); + mtk_vdec_err(inst->ctx, "slice dec number: %d err: %d", inst->slice_dec_num, err); + return err; +} + +static int vdec_h264_slice_single_decode(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *unused, bool *res_chg) +{ + struct vdec_h264_slice_inst *inst = h_vdec; + struct vdec_vpu_inst *vpu = &inst->vpu; + struct mtk_video_dec_buf *src_buf_info, *dst_buf_info; + struct vdec_fb *fb; + unsigned char *buf; + unsigned int data[2], i; + u64 y_fb_dma, c_fb_dma; + struct mtk_vcodec_mem *mem; + int err, nal_start_idx; + + /* bs NULL means flush decoder */ + if (!bs) + return vpu_dec_reset(vpu); + + fb = inst->ctx->dev->vdec_pdata->get_cap_buffer(inst->ctx); + src_buf_info = container_of(bs, struct mtk_video_dec_buf, bs_buffer); + dst_buf_info = container_of(fb, struct mtk_video_dec_buf, frame_buffer); + + y_fb_dma = fb ? (u64)fb->base_y.dma_addr : 0; + c_fb_dma = fb ? (u64)fb->base_c.dma_addr : 0; + mtk_vdec_debug(inst->ctx, "[h264-dec] [%d] y_dma=%llx c_dma=%llx", + inst->ctx->decoded_frame_cnt, y_fb_dma, c_fb_dma); + + inst->vsi_ctx.dec.bs_buf_addr = (u64)bs->dma_addr; + inst->vsi_ctx.dec.bs_buf_size = bs->size; + inst->vsi_ctx.dec.y_fb_dma = y_fb_dma; + inst->vsi_ctx.dec.c_fb_dma = c_fb_dma; + inst->vsi_ctx.dec.vdec_fb_va = (u64)(uintptr_t)fb; + + v4l2_m2m_buf_copy_metadata(&src_buf_info->m2m_buf.vb, + &dst_buf_info->m2m_buf.vb, true); + err = get_vdec_sig_decode_parameters(inst); + if (err) + goto err_free_fb_out; + + buf = (unsigned char *)bs->va; + nal_start_idx = mtk_vdec_h264_find_start_code(buf, bs->size); + if (nal_start_idx < 0) { + err = -EINVAL; + goto err_free_fb_out; + } + inst->vsi_ctx.dec.nal_info = buf[nal_start_idx]; + + *res_chg = inst->resolution_changed; + if (inst->resolution_changed) { + mtk_vdec_debug(inst->ctx, "- resolution changed -"); + if (inst->realloc_mv_buf) { + err = vdec_h264_slice_alloc_mv_buf(inst, &inst->ctx->picinfo); + inst->realloc_mv_buf = false; + if (err) + goto err_free_fb_out; + } + inst->resolution_changed = false; + + for (i = 0; i < H264_MAX_MV_NUM; i++) { + mem = &inst->mv_buf[i]; + inst->vsi_ctx.mv_buf_dma[i] = mem->dma_addr; + } + } + + memcpy(inst->vpu.vsi, &inst->vsi_ctx, sizeof(inst->vsi_ctx)); + err = vpu_dec_start(vpu, data, 2); + if (err) + goto err_free_fb_out; + + /* wait decoder done interrupt */ + err = mtk_vcodec_wait_for_done_ctx(inst->ctx, MTK_INST_IRQ_RECEIVED, + WAIT_INTR_TIMEOUT_MS, MTK_VDEC_CORE); + if (err) + mtk_vdec_err(inst->ctx, "decode timeout: pic_%d", inst->ctx->decoded_frame_cnt); + + inst->vsi->dec.timeout = !!err; + err = vpu_dec_end(vpu); + if (err) + goto err_free_fb_out; + + memcpy(&inst->vsi_ctx, inst->vpu.vsi, sizeof(inst->vsi_ctx)); + mtk_vdec_debug(inst->ctx, "pic[%d] crc: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x", + inst->ctx->decoded_frame_cnt, + inst->vsi_ctx.dec.crc[0], inst->vsi_ctx.dec.crc[1], + inst->vsi_ctx.dec.crc[2], inst->vsi_ctx.dec.crc[3], + inst->vsi_ctx.dec.crc[4], inst->vsi_ctx.dec.crc[5], + inst->vsi_ctx.dec.crc[6], inst->vsi_ctx.dec.crc[7]); + + inst->ctx->decoded_frame_cnt++; + return 0; + +err_free_fb_out: + mtk_vdec_err(inst->ctx, "dec frame number: %d err: %d", inst->ctx->decoded_frame_cnt, err); + return err; +} + +static int vdec_h264_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *unused, bool *res_chg) +{ + struct vdec_h264_slice_inst *inst = h_vdec; + int ret; + + if (!h_vdec) + return -EINVAL; + + if (inst->ctx->dev->vdec_pdata->hw_arch == MTK_VDEC_PURE_SINGLE_CORE) + ret = vdec_h264_slice_single_decode(h_vdec, bs, unused, res_chg); + else + ret = vdec_h264_slice_lat_decode(h_vdec, bs, unused, res_chg); + + return ret; +} + +static int vdec_h264_slice_get_param(void *h_vdec, enum vdec_get_param_type type, + void *out) +{ + struct vdec_h264_slice_inst *inst = h_vdec; + + switch (type) { + case GET_PARAM_PIC_INFO: + vdec_h264_slice_get_pic_info(inst); + break; + case GET_PARAM_DPB_SIZE: + *(unsigned int *)out = 6; + break; + case GET_PARAM_CROP_INFO: + vdec_h264_slice_get_crop_info(inst, out); + break; + default: + mtk_vdec_err(inst->ctx, "invalid get parameter type=%d", type); + return -EINVAL; + } + return 0; +} + +const struct vdec_common_if vdec_h264_slice_multi_if = { + .init = vdec_h264_slice_init, + .decode = vdec_h264_slice_decode, + .get_param = vdec_h264_slice_get_param, + .deinit = vdec_h264_slice_deinit, +}; diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c new file mode 100644 index 0000000000..06ed47df69 --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c @@ -0,0 +1,1092 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2023 MediaTek Inc. + * Author: Yunfei Dong <yunfei.dong@mediatek.com> + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <media/videobuf2-dma-contig.h> + +#include "../mtk_vcodec_dec.h" +#include "../../common/mtk_vcodec_intr.h" +#include "../vdec_drv_base.h" +#include "../vdec_drv_if.h" +#include "../vdec_vpu_if.h" + +/* the size used to store hevc wrap information */ +#define VDEC_HEVC_WRAP_SZ (532 * SZ_1K) + +#define HEVC_MAX_MV_NUM 32 + +/* get used parameters for sps/pps */ +#define GET_HEVC_VDEC_FLAG(cond, flag) \ + { dst_param->cond = ((src_param->flags & (flag)) ? (1) : (0)); } +#define GET_HEVC_VDEC_PARAM(param) \ + { dst_param->param = src_param->param; } + +/** + * enum vdec_hevc_core_dec_err_type - core decode error type + * + * @TRANS_BUFFER_FULL: trans buffer is full + * @SLICE_HEADER_FULL: slice header buffer is full + */ +enum vdec_hevc_core_dec_err_type { + TRANS_BUFFER_FULL = 1, + SLICE_HEADER_FULL, +}; + +/** + * struct mtk_hevc_dpb_info - hevc dpb information + * + * @y_dma_addr: Y plane physical address + * @c_dma_addr: CbCr plane physical address + * @reference_flag: reference picture flag (short/long term reference picture) + * @field: field picture flag + */ +struct mtk_hevc_dpb_info { + dma_addr_t y_dma_addr; + dma_addr_t c_dma_addr; + int reference_flag; + int field; +}; + +/* + * struct mtk_hevc_sps_param - parameters for sps + */ +struct mtk_hevc_sps_param { + unsigned char video_parameter_set_id; + unsigned char seq_parameter_set_id; + unsigned short pic_width_in_luma_samples; + unsigned short pic_height_in_luma_samples; + unsigned char bit_depth_luma_minus8; + unsigned char bit_depth_chroma_minus8; + unsigned char log2_max_pic_order_cnt_lsb_minus4; + unsigned char sps_max_dec_pic_buffering_minus1; + unsigned char sps_max_num_reorder_pics; + unsigned char sps_max_latency_increase_plus1; + unsigned char log2_min_luma_coding_block_size_minus3; + unsigned char log2_diff_max_min_luma_coding_block_size; + unsigned char log2_min_luma_transform_block_size_minus2; + unsigned char log2_diff_max_min_luma_transform_block_size; + unsigned char max_transform_hierarchy_depth_inter; + unsigned char max_transform_hierarchy_depth_intra; + unsigned char pcm_sample_bit_depth_luma_minus1; + unsigned char pcm_sample_bit_depth_chroma_minus1; + unsigned char log2_min_pcm_luma_coding_block_size_minus3; + unsigned char log2_diff_max_min_pcm_luma_coding_block_size; + unsigned char num_short_term_ref_pic_sets; + unsigned char num_long_term_ref_pics_sps; + unsigned char chroma_format_idc; + unsigned char sps_max_sub_layers_minus1; + unsigned char separate_colour_plane; + unsigned char scaling_list_enabled; + unsigned char amp_enabled; + unsigned char sample_adaptive_offset; + unsigned char pcm_enabled; + unsigned char pcm_loop_filter_disabled; + unsigned char long_term_ref_pics_enabled; + unsigned char sps_temporal_mvp_enabled; + unsigned char strong_intra_smoothing_enabled; + unsigned char reserved[5]; +}; + +/* + * struct mtk_hevc_pps_param - parameters for pps + */ +struct mtk_hevc_pps_param { + unsigned char pic_parameter_set_id; + unsigned char num_extra_slice_header_bits; + unsigned char num_ref_idx_l0_default_active_minus1; + unsigned char num_ref_idx_l1_default_active_minus1; + char init_qp_minus26; + unsigned char diff_cu_qp_delta_depth; + char pps_cb_qp_offset; + char pps_cr_qp_offset; + unsigned char num_tile_columns_minus1; + unsigned char num_tile_rows_minus1; + unsigned char column_width_minus1[20]; + unsigned char row_height_minus1[22]; + char pps_beta_offset_div2; + char pps_tc_offset_div2; + unsigned char log2_parallel_merge_level_minus2; + char dependent_slice_segment_enabled; + char output_flag_present; + char sign_data_hiding_enabled; + char cabac_init_present; + char constrained_intra_pred; + char transform_skip_enabled; + char cu_qp_delta_enabled; + char pps_slice_chroma_qp_offsets_present; + char weighted_pred; + char weighted_bipred; + char transquant_bypass_enabled; + char pps_flag_tiles_enabled; + char entropy_coding_sync_enabled; + char loop_filter_across_tiles_enabled; + char pps_loop_filter_across_slices_enabled; + char deblocking_filter_override_enabled; + char pps_disable_deflocking_filter; + char lists_modification_present; + char slice_segment_header_extersion_present; + char deblocking_filter_control_present; + char uniform_spacing; + char reserved[6]; +}; + +/* + * struct mtk_hevc_slice_header_param - parameters for slice header + */ +struct mtk_hevc_slice_header_param { + unsigned int slice_type; + unsigned int num_active_ref_layer_pics; + int slice_qp; + int slice_qp_delta_cb; + int slice_qp_delta_cr; + int num_ref_idx[3]; + unsigned int col_ref_idx; + unsigned int five_minus_max_num_merge_cand; + int slice_deblocking_filter_beta_offset_div2; + int slice_deblocking_filter_tc_offset_div2; + unsigned char sao_enable_flag; + unsigned char sao_enable_flag_chroma; + unsigned char cabac_init_flag; + unsigned char slice_tmvp_flags_present; + unsigned char col_from_l0_flag; + unsigned char mvd_l1_zero_flag; + unsigned char slice_loop_filter_across_slices_enabled_flag; + unsigned char deblocking_filter_disable_flag; + unsigned int slice_reg0; + unsigned int slice_reg1; + unsigned int slice_reg2; + unsigned int num_rps_curr_temp_list; + unsigned int ref_list_mode; + int str_num_delta_pocs; + int str_num_negtive_pos_pics; + int num_long_term; + int num_long_term_sps; + unsigned int max_cu_width; + unsigned int max_cu_height; + unsigned int num_entry_point_offsets; + unsigned int last_lcu_x_in_tile[17]; + unsigned int last_lcu_y_in_tile[17]; + unsigned char nal_unit_type; +}; + +/* + * struct slice_api_hevc_scaling_matrix - parameters for scaling list + */ +struct slice_api_hevc_scaling_matrix { + unsigned char scaling_list_4x4[6][16]; + unsigned char scaling_list_8x8[6][64]; + unsigned char scaling_list_16x16[6][64]; + unsigned char scaling_list_32x32[2][64]; + unsigned char scaling_list_dc_coef_16x16[6]; + unsigned char scaling_list_dc_coef_32x32[2]; +}; + +/* + * struct slice_hevc_dpb_entry - each dpb information + */ +struct slice_hevc_dpb_entry { + u64 timestamp; + unsigned char flags; + unsigned char field_pic; + int pic_order_cnt_val; +}; + +/* + * struct slice_api_hevc_decode_param - parameters for decode. + */ +struct slice_api_hevc_decode_param { + struct slice_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + int pic_order_cnt_val; + unsigned short short_term_ref_pic_set_size; + unsigned short long_term_ref_pic_set_size; + unsigned char num_active_dpb_entries; + unsigned char num_poc_st_curr_before; + unsigned char num_poc_st_curr_after; + unsigned char num_poc_lt_curr; + unsigned char poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + unsigned char poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + unsigned char poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + unsigned char num_delta_pocs_of_ref_rps_idx; + int flags; +}; + +/** + * struct hevc_fb - hevc decode frame buffer information + * + * @vdec_fb_va: virtual address of struct vdec_fb + * @y_fb_dma: dma address of Y frame buffer (luma) + * @c_fb_dma: dma address of C frame buffer (chroma) + * @poc: picture order count of frame buffer + * @reserved: for 8 bytes alignment + */ +struct hevc_fb { + u64 vdec_fb_va; + u64 y_fb_dma; + u64 c_fb_dma; + s32 poc; + u32 reserved; +}; + +/** + * struct vdec_hevc_slice_lat_dec_param - parameters for decode current frame + * + * @sps: hevc sps syntax parameters + * @pps: hevc pps syntax parameters + * @slice_header: hevc slice header syntax parameters + * @scaling_matrix: hevc scaling list parameters + * @decode_params: decoder parameters of each frame used for hardware decode + * @hevc_dpb_info: dpb reference list + */ +struct vdec_hevc_slice_lat_dec_param { + struct mtk_hevc_sps_param sps; + struct mtk_hevc_pps_param pps; + struct mtk_hevc_slice_header_param slice_header; + struct slice_api_hevc_scaling_matrix scaling_matrix; + struct slice_api_hevc_decode_param decode_params; + struct mtk_hevc_dpb_info hevc_dpb_info[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; +}; + +/** + * struct vdec_hevc_slice_info - decode information + * + * @wdma_end_addr_offset: wdma end address offset + * @timeout: Decode timeout: 1 timeout, 0 no timeount + * @vdec_fb_va: VDEC frame buffer struct virtual address + * @crc: Used to check whether hardware's status is right + */ +struct vdec_hevc_slice_info { + u64 wdma_end_addr_offset; + u64 timeout; + u64 vdec_fb_va; + u32 crc[8]; +}; + +/* + * struct vdec_hevc_slice_mem - memory address and size + */ +struct vdec_hevc_slice_mem { + union { + u64 buf; + dma_addr_t dma_addr; + }; + union { + size_t size; + dma_addr_t dma_addr_end; + u64 padding; + }; +}; + +/** + * struct vdec_hevc_slice_fb - frame buffer for decoding + * @y: current y buffer address info + * @c: current c buffer address info + */ +struct vdec_hevc_slice_fb { + struct vdec_hevc_slice_mem y; + struct vdec_hevc_slice_mem c; +}; + +/** + * struct vdec_hevc_slice_vsi - shared memory for decode information exchange + * between SCP and Host. + * + * @bs: input buffer info + * + * @ube: ube buffer + * @trans: transcoded buffer + * @err_map: err map buffer + * @slice_bc: slice bc buffer + * @wrap: temp buffer + * + * @fb: current y/c buffer + * @mv_buf_dma: HW working motion vector buffer + * @dec: decode information (AP-R, VPU-W) + * @hevc_slice_params: decode parameters for hw used + */ +struct vdec_hevc_slice_vsi { + /* used in LAT stage */ + struct vdec_hevc_slice_mem bs; + + struct vdec_hevc_slice_mem ube; + struct vdec_hevc_slice_mem trans; + struct vdec_hevc_slice_mem err_map; + struct vdec_hevc_slice_mem slice_bc; + struct vdec_hevc_slice_mem wrap; + + struct vdec_hevc_slice_fb fb; + struct vdec_hevc_slice_mem mv_buf_dma[HEVC_MAX_MV_NUM]; + struct vdec_hevc_slice_info dec; + struct vdec_hevc_slice_lat_dec_param hevc_slice_params; +}; + +/** + * struct vdec_hevc_slice_share_info - shared information used to exchange + * message between lat and core + * + * @sps: sequence header information from user space + * @dec_params: decoder params from user space + * @hevc_slice_params: decoder params used for hardware + * @trans: trans buffer dma address + */ +struct vdec_hevc_slice_share_info { + struct v4l2_ctrl_hevc_sps sps; + struct v4l2_ctrl_hevc_decode_params dec_params; + struct vdec_hevc_slice_lat_dec_param hevc_slice_params; + struct vdec_hevc_slice_mem trans; +}; + +/** + * struct vdec_hevc_slice_inst - hevc decoder instance + * + * @slice_dec_num: how many picture be decoded + * @ctx: point to mtk_vcodec_dec_ctx + * @mv_buf: HW working motion vector buffer + * @vpu: VPU instance + * @vsi: vsi used for lat + * @vsi_core: vsi used for core + * @wrap_addr: wrap address used for hevc + * + * @hevc_slice_param: the parameters that hardware use to decode + * + * @resolution_changed: resolution changed + * @realloc_mv_buf: reallocate mv buffer + * @cap_num_planes: number of capture queue plane + */ +struct vdec_hevc_slice_inst { + unsigned int slice_dec_num; + struct mtk_vcodec_dec_ctx *ctx; + struct mtk_vcodec_mem mv_buf[HEVC_MAX_MV_NUM]; + struct vdec_vpu_inst vpu; + struct vdec_hevc_slice_vsi *vsi; + struct vdec_hevc_slice_vsi *vsi_core; + struct mtk_vcodec_mem wrap_addr; + + struct vdec_hevc_slice_lat_dec_param hevc_slice_param; + + unsigned int resolution_changed; + unsigned int realloc_mv_buf; + unsigned int cap_num_planes; +}; + +static unsigned int vdec_hevc_get_mv_buf_size(unsigned int width, unsigned int height) +{ + const unsigned int unit_size = (width / 16) * (height / 16) + 8; + + return 64 * unit_size; +} + +static void *vdec_hevc_get_ctrl_ptr(struct mtk_vcodec_dec_ctx *ctx, int id) +{ + struct v4l2_ctrl *ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, id); + + if (!ctrl) + return ERR_PTR(-EINVAL); + + return ctrl->p_cur.p; +} + +static void vdec_hevc_fill_dpb_info(struct mtk_vcodec_dec_ctx *ctx, + struct slice_api_hevc_decode_param *decode_params, + struct mtk_hevc_dpb_info *hevc_dpb_info) +{ + const struct slice_hevc_dpb_entry *dpb; + struct vb2_queue *vq; + struct vb2_buffer *vb; + int index; + + vq = v4l2_m2m_get_vq(ctx->m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE); + for (index = 0; index < V4L2_HEVC_DPB_ENTRIES_NUM_MAX; index++) { + dpb = &decode_params->dpb[index]; + if (index >= decode_params->num_active_dpb_entries) + continue; + + vb = vb2_find_buffer(vq, dpb->timestamp); + if (!vb) { + dev_err(&ctx->dev->plat_dev->dev, + "Reference invalid: dpb_index(%d) timestamp(%lld)", + index, dpb->timestamp); + continue; + } + + hevc_dpb_info[index].field = dpb->field_pic; + + hevc_dpb_info[index].y_dma_addr = vb2_dma_contig_plane_dma_addr(vb, 0); + if (ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 2) + hevc_dpb_info[index].c_dma_addr = vb2_dma_contig_plane_dma_addr(vb, 1); + else + hevc_dpb_info[index].c_dma_addr = + hevc_dpb_info[index].y_dma_addr + ctx->picinfo.fb_sz[0]; + } +} + +static void vdec_hevc_copy_sps_params(struct mtk_hevc_sps_param *dst_param, + const struct v4l2_ctrl_hevc_sps *src_param) +{ + GET_HEVC_VDEC_PARAM(video_parameter_set_id); + GET_HEVC_VDEC_PARAM(seq_parameter_set_id); + GET_HEVC_VDEC_PARAM(pic_width_in_luma_samples); + GET_HEVC_VDEC_PARAM(pic_height_in_luma_samples); + GET_HEVC_VDEC_PARAM(bit_depth_luma_minus8); + GET_HEVC_VDEC_PARAM(bit_depth_chroma_minus8); + GET_HEVC_VDEC_PARAM(log2_max_pic_order_cnt_lsb_minus4); + GET_HEVC_VDEC_PARAM(sps_max_dec_pic_buffering_minus1); + GET_HEVC_VDEC_PARAM(sps_max_num_reorder_pics); + GET_HEVC_VDEC_PARAM(sps_max_latency_increase_plus1); + GET_HEVC_VDEC_PARAM(log2_min_luma_coding_block_size_minus3); + GET_HEVC_VDEC_PARAM(log2_diff_max_min_luma_coding_block_size); + GET_HEVC_VDEC_PARAM(log2_min_luma_transform_block_size_minus2); + GET_HEVC_VDEC_PARAM(log2_diff_max_min_luma_transform_block_size); + GET_HEVC_VDEC_PARAM(max_transform_hierarchy_depth_inter); + GET_HEVC_VDEC_PARAM(max_transform_hierarchy_depth_intra); + GET_HEVC_VDEC_PARAM(pcm_sample_bit_depth_luma_minus1); + GET_HEVC_VDEC_PARAM(pcm_sample_bit_depth_chroma_minus1); + GET_HEVC_VDEC_PARAM(log2_min_pcm_luma_coding_block_size_minus3); + GET_HEVC_VDEC_PARAM(log2_diff_max_min_pcm_luma_coding_block_size); + GET_HEVC_VDEC_PARAM(num_short_term_ref_pic_sets); + GET_HEVC_VDEC_PARAM(num_long_term_ref_pics_sps); + GET_HEVC_VDEC_PARAM(chroma_format_idc); + GET_HEVC_VDEC_PARAM(sps_max_sub_layers_minus1); + + GET_HEVC_VDEC_FLAG(separate_colour_plane, + V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE); + GET_HEVC_VDEC_FLAG(scaling_list_enabled, + V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED); + GET_HEVC_VDEC_FLAG(amp_enabled, + V4L2_HEVC_SPS_FLAG_AMP_ENABLED); + GET_HEVC_VDEC_FLAG(sample_adaptive_offset, + V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET); + GET_HEVC_VDEC_FLAG(pcm_enabled, + V4L2_HEVC_SPS_FLAG_PCM_ENABLED); + GET_HEVC_VDEC_FLAG(pcm_loop_filter_disabled, + V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED); + GET_HEVC_VDEC_FLAG(long_term_ref_pics_enabled, + V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT); + GET_HEVC_VDEC_FLAG(sps_temporal_mvp_enabled, + V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED); + GET_HEVC_VDEC_FLAG(strong_intra_smoothing_enabled, + V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED); +} + +static void vdec_hevc_copy_pps_params(struct mtk_hevc_pps_param *dst_param, + const struct v4l2_ctrl_hevc_pps *src_param) +{ + int i; + + GET_HEVC_VDEC_PARAM(pic_parameter_set_id); + GET_HEVC_VDEC_PARAM(num_extra_slice_header_bits); + GET_HEVC_VDEC_PARAM(num_ref_idx_l0_default_active_minus1); + GET_HEVC_VDEC_PARAM(num_ref_idx_l1_default_active_minus1); + GET_HEVC_VDEC_PARAM(init_qp_minus26); + GET_HEVC_VDEC_PARAM(diff_cu_qp_delta_depth); + GET_HEVC_VDEC_PARAM(pps_cb_qp_offset); + GET_HEVC_VDEC_PARAM(pps_cr_qp_offset); + GET_HEVC_VDEC_PARAM(num_tile_columns_minus1); + GET_HEVC_VDEC_PARAM(num_tile_rows_minus1); + GET_HEVC_VDEC_PARAM(init_qp_minus26); + GET_HEVC_VDEC_PARAM(diff_cu_qp_delta_depth); + GET_HEVC_VDEC_PARAM(pic_parameter_set_id); + GET_HEVC_VDEC_PARAM(num_extra_slice_header_bits); + GET_HEVC_VDEC_PARAM(num_ref_idx_l0_default_active_minus1); + GET_HEVC_VDEC_PARAM(num_ref_idx_l1_default_active_minus1); + GET_HEVC_VDEC_PARAM(pps_beta_offset_div2); + GET_HEVC_VDEC_PARAM(pps_tc_offset_div2); + GET_HEVC_VDEC_PARAM(log2_parallel_merge_level_minus2); + + for (i = 0; i < ARRAY_SIZE(src_param->column_width_minus1); i++) + GET_HEVC_VDEC_PARAM(column_width_minus1[i]); + for (i = 0; i < ARRAY_SIZE(src_param->row_height_minus1); i++) + GET_HEVC_VDEC_PARAM(row_height_minus1[i]); + + GET_HEVC_VDEC_FLAG(dependent_slice_segment_enabled, + V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED); + GET_HEVC_VDEC_FLAG(output_flag_present, + V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT); + GET_HEVC_VDEC_FLAG(sign_data_hiding_enabled, + V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED); + GET_HEVC_VDEC_FLAG(cabac_init_present, + V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT); + GET_HEVC_VDEC_FLAG(constrained_intra_pred, + V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED); + GET_HEVC_VDEC_FLAG(transform_skip_enabled, + V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED); + GET_HEVC_VDEC_FLAG(cu_qp_delta_enabled, + V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED); + GET_HEVC_VDEC_FLAG(pps_slice_chroma_qp_offsets_present, + V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT); + GET_HEVC_VDEC_FLAG(weighted_pred, + V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED); + GET_HEVC_VDEC_FLAG(weighted_bipred, + V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED); + GET_HEVC_VDEC_FLAG(transquant_bypass_enabled, + V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED); + GET_HEVC_VDEC_FLAG(pps_flag_tiles_enabled, + V4L2_HEVC_PPS_FLAG_TILES_ENABLED); + GET_HEVC_VDEC_FLAG(entropy_coding_sync_enabled, + V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED); + GET_HEVC_VDEC_FLAG(loop_filter_across_tiles_enabled, + V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED); + GET_HEVC_VDEC_FLAG(pps_loop_filter_across_slices_enabled, + V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED); + GET_HEVC_VDEC_FLAG(deblocking_filter_override_enabled, + V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED); + GET_HEVC_VDEC_FLAG(pps_disable_deflocking_filter, + V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER); + GET_HEVC_VDEC_FLAG(lists_modification_present, + V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT); + GET_HEVC_VDEC_FLAG(slice_segment_header_extersion_present, + V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT); + GET_HEVC_VDEC_FLAG(deblocking_filter_control_present, + V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT); + GET_HEVC_VDEC_FLAG(uniform_spacing, + V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING); +} + +static void vdec_hevc_copy_scaling_matrix(struct slice_api_hevc_scaling_matrix *dst_matrix, + const struct v4l2_ctrl_hevc_scaling_matrix *src_matrix) +{ + memcpy(dst_matrix, src_matrix, sizeof(*src_matrix)); +} + +static void +vdec_hevc_copy_decode_params(struct slice_api_hevc_decode_param *dst_param, + const struct v4l2_ctrl_hevc_decode_params *src_param, + const struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]) +{ + struct slice_hevc_dpb_entry *dst_entry; + const struct v4l2_hevc_dpb_entry *src_entry; + int i; + + for (i = 0; i < ARRAY_SIZE(dst_param->dpb); i++) { + dst_entry = &dst_param->dpb[i]; + src_entry = &dpb[i]; + + dst_entry->timestamp = src_entry->timestamp; + dst_entry->flags = src_entry->flags; + dst_entry->field_pic = src_entry->field_pic; + dst_entry->pic_order_cnt_val = src_entry->pic_order_cnt_val; + + GET_HEVC_VDEC_PARAM(poc_st_curr_before[i]); + GET_HEVC_VDEC_PARAM(poc_st_curr_after[i]); + GET_HEVC_VDEC_PARAM(poc_lt_curr[i]); + } + + GET_HEVC_VDEC_PARAM(pic_order_cnt_val); + GET_HEVC_VDEC_PARAM(short_term_ref_pic_set_size); + GET_HEVC_VDEC_PARAM(long_term_ref_pic_set_size); + GET_HEVC_VDEC_PARAM(num_active_dpb_entries); + GET_HEVC_VDEC_PARAM(num_poc_st_curr_before); + GET_HEVC_VDEC_PARAM(num_poc_st_curr_after); + GET_HEVC_VDEC_PARAM(num_delta_pocs_of_ref_rps_idx); + GET_HEVC_VDEC_PARAM(num_poc_lt_curr); + GET_HEVC_VDEC_PARAM(flags); +} + +static int vdec_hevc_slice_fill_decode_parameters(struct vdec_hevc_slice_inst *inst, + struct vdec_hevc_slice_share_info *share_info) +{ + struct vdec_hevc_slice_lat_dec_param *slice_param = &inst->vsi->hevc_slice_params; + const struct v4l2_ctrl_hevc_decode_params *dec_params; + const struct v4l2_ctrl_hevc_scaling_matrix *src_matrix; + const struct v4l2_ctrl_hevc_sps *sps; + const struct v4l2_ctrl_hevc_pps *pps; + + dec_params = + vdec_hevc_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_HEVC_DECODE_PARAMS); + if (IS_ERR(dec_params)) + return PTR_ERR(dec_params); + + src_matrix = + vdec_hevc_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_HEVC_SCALING_MATRIX); + if (IS_ERR(src_matrix)) + return PTR_ERR(src_matrix); + + sps = vdec_hevc_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_HEVC_SPS); + if (IS_ERR(sps)) + return PTR_ERR(sps); + + pps = vdec_hevc_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_HEVC_PPS); + if (IS_ERR(pps)) + return PTR_ERR(pps); + + vdec_hevc_copy_sps_params(&slice_param->sps, sps); + vdec_hevc_copy_pps_params(&slice_param->pps, pps); + vdec_hevc_copy_scaling_matrix(&slice_param->scaling_matrix, src_matrix); + + memcpy(&share_info->sps, sps, sizeof(*sps)); + memcpy(&share_info->dec_params, dec_params, sizeof(*dec_params)); + + slice_param->decode_params.num_poc_st_curr_before = dec_params->num_poc_st_curr_before; + slice_param->decode_params.num_poc_st_curr_after = dec_params->num_poc_st_curr_after; + slice_param->decode_params.num_poc_lt_curr = dec_params->num_poc_lt_curr; + slice_param->decode_params.num_delta_pocs_of_ref_rps_idx = + dec_params->num_delta_pocs_of_ref_rps_idx; + + return 0; +} + +static void vdec_hevc_slice_fill_decode_reflist(struct vdec_hevc_slice_inst *inst, + struct vdec_hevc_slice_lat_dec_param *slice_param, + struct vdec_hevc_slice_share_info *share_info) +{ + struct v4l2_ctrl_hevc_decode_params *dec_params = &share_info->dec_params; + + vdec_hevc_copy_decode_params(&slice_param->decode_params, dec_params, + share_info->dec_params.dpb); + + vdec_hevc_fill_dpb_info(inst->ctx, &slice_param->decode_params, + slice_param->hevc_dpb_info); +} + +static int vdec_hevc_slice_alloc_mv_buf(struct vdec_hevc_slice_inst *inst, + struct vdec_pic_info *pic) +{ + unsigned int buf_sz = vdec_hevc_get_mv_buf_size(pic->buf_w, pic->buf_h); + struct mtk_vcodec_mem *mem; + int i, err; + + mtk_v4l2_vdec_dbg(3, inst->ctx, "allocate mv buffer size = 0x%x", buf_sz); + for (i = 0; i < HEVC_MAX_MV_NUM; i++) { + mem = &inst->mv_buf[i]; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + mem->size = buf_sz; + err = mtk_vcodec_mem_alloc(inst->ctx, mem); + if (err) { + mtk_vdec_err(inst->ctx, "failed to allocate mv buf"); + return err; + } + } + + return 0; +} + +static void vdec_hevc_slice_free_mv_buf(struct vdec_hevc_slice_inst *inst) +{ + int i; + struct mtk_vcodec_mem *mem; + + for (i = 0; i < HEVC_MAX_MV_NUM; i++) { + mem = &inst->mv_buf[i]; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + } +} + +static void vdec_hevc_slice_get_pic_info(struct vdec_hevc_slice_inst *inst) +{ + struct mtk_vcodec_dec_ctx *ctx = inst->ctx; + u32 data[3]; + + data[0] = ctx->picinfo.pic_w; + data[1] = ctx->picinfo.pic_h; + data[2] = ctx->capture_fourcc; + vpu_dec_get_param(&inst->vpu, data, 3, GET_PARAM_PIC_INFO); + + ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, VCODEC_DEC_ALIGNED_64); + ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, VCODEC_DEC_ALIGNED_64); + ctx->picinfo.fb_sz[0] = inst->vpu.fb_sz[0]; + ctx->picinfo.fb_sz[1] = inst->vpu.fb_sz[1]; + inst->cap_num_planes = + ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes; + + mtk_vdec_debug(ctx, "pic(%d, %d), buf(%d, %d)", + ctx->picinfo.pic_w, ctx->picinfo.pic_h, + ctx->picinfo.buf_w, ctx->picinfo.buf_h); + mtk_vdec_debug(ctx, "Y/C(%d, %d)", ctx->picinfo.fb_sz[0], + ctx->picinfo.fb_sz[1]); + + if (ctx->last_decoded_picinfo.pic_w != ctx->picinfo.pic_w || + ctx->last_decoded_picinfo.pic_h != ctx->picinfo.pic_h) { + inst->resolution_changed = true; + if (ctx->last_decoded_picinfo.buf_w != ctx->picinfo.buf_w || + ctx->last_decoded_picinfo.buf_h != ctx->picinfo.buf_h) + inst->realloc_mv_buf = true; + + mtk_v4l2_vdec_dbg(1, inst->ctx, "resChg: (%d %d) : old(%d, %d) -> new(%d, %d)", + inst->resolution_changed, + inst->realloc_mv_buf, + ctx->last_decoded_picinfo.pic_w, + ctx->last_decoded_picinfo.pic_h, + ctx->picinfo.pic_w, ctx->picinfo.pic_h); + } +} + +static void vdec_hevc_slice_get_crop_info(struct vdec_hevc_slice_inst *inst, + struct v4l2_rect *cr) +{ + cr->left = 0; + cr->top = 0; + cr->width = inst->ctx->picinfo.pic_w; + cr->height = inst->ctx->picinfo.pic_h; + + mtk_vdec_debug(inst->ctx, "l=%d, t=%d, w=%d, h=%d", + cr->left, cr->top, cr->width, cr->height); +} + +static int vdec_hevc_slice_setup_lat_buffer(struct vdec_hevc_slice_inst *inst, + struct mtk_vcodec_mem *bs, + struct vdec_lat_buf *lat_buf, + bool *res_chg) +{ + struct mtk_vcodec_mem *mem; + struct mtk_video_dec_buf *src_buf_info; + struct vdec_hevc_slice_share_info *share_info; + int i, err; + + inst->vsi->bs.dma_addr = (u64)bs->dma_addr; + inst->vsi->bs.size = bs->size; + + src_buf_info = container_of(bs, struct mtk_video_dec_buf, bs_buffer); + lat_buf->src_buf_req = src_buf_info->m2m_buf.vb.vb2_buf.req_obj.req; + v4l2_m2m_buf_copy_metadata(&src_buf_info->m2m_buf.vb, &lat_buf->ts_info, true); + + *res_chg = inst->resolution_changed; + if (inst->resolution_changed) { + mtk_vdec_debug(inst->ctx, "- resolution changed -"); + if (inst->realloc_mv_buf) { + err = vdec_hevc_slice_alloc_mv_buf(inst, &inst->ctx->picinfo); + inst->realloc_mv_buf = false; + if (err) + return err; + } + inst->resolution_changed = false; + } + + for (i = 0; i < HEVC_MAX_MV_NUM; i++) { + mem = &inst->mv_buf[i]; + inst->vsi->mv_buf_dma[i].dma_addr = mem->dma_addr; + inst->vsi->mv_buf_dma[i].size = mem->size; + } + + inst->vsi->ube.dma_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr; + inst->vsi->ube.size = lat_buf->ctx->msg_queue.wdma_addr.size; + + inst->vsi->err_map.dma_addr = lat_buf->wdma_err_addr.dma_addr; + inst->vsi->err_map.size = lat_buf->wdma_err_addr.size; + + inst->vsi->slice_bc.dma_addr = lat_buf->slice_bc_addr.dma_addr; + inst->vsi->slice_bc.size = lat_buf->slice_bc_addr.size; + + inst->vsi->trans.dma_addr_end = inst->ctx->msg_queue.wdma_rptr_addr; + inst->vsi->trans.dma_addr = inst->ctx->msg_queue.wdma_wptr_addr; + + share_info = lat_buf->private_data; + share_info->trans.dma_addr = inst->vsi->trans.dma_addr; + share_info->trans.dma_addr_end = inst->vsi->trans.dma_addr_end; + + mtk_vdec_debug(inst->ctx, "lat: ube addr/size(0x%llx 0x%llx) err:0x%llx", + inst->vsi->ube.buf, + inst->vsi->ube.padding, + inst->vsi->err_map.buf); + + mtk_vdec_debug(inst->ctx, "slice addr/size(0x%llx 0x%llx) trans start/end((0x%llx 0x%llx))", + inst->vsi->slice_bc.buf, + inst->vsi->slice_bc.padding, + inst->vsi->trans.buf, + inst->vsi->trans.padding); + + return 0; +} + +static int vdec_hevc_slice_setup_core_buffer(struct vdec_hevc_slice_inst *inst, + struct vdec_hevc_slice_share_info *share_info, + struct vdec_lat_buf *lat_buf) +{ + struct mtk_vcodec_mem *mem; + struct mtk_vcodec_dec_ctx *ctx = inst->ctx; + struct vb2_v4l2_buffer *vb2_v4l2; + struct vdec_fb *fb; + u64 y_fb_dma, c_fb_dma; + int i; + + fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx); + if (!fb) { + mtk_vdec_err(inst->ctx, "fb buffer is NULL"); + return -EBUSY; + } + + y_fb_dma = (u64)fb->base_y.dma_addr; + if (ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 1) + c_fb_dma = + y_fb_dma + inst->ctx->picinfo.buf_w * inst->ctx->picinfo.buf_h; + else + c_fb_dma = (u64)fb->base_c.dma_addr; + + mtk_vdec_debug(inst->ctx, "[hevc-core] y/c addr = 0x%llx 0x%llx", y_fb_dma, c_fb_dma); + + inst->vsi_core->fb.y.dma_addr = y_fb_dma; + inst->vsi_core->fb.y.size = ctx->picinfo.fb_sz[0]; + inst->vsi_core->fb.c.dma_addr = c_fb_dma; + inst->vsi_core->fb.y.size = ctx->picinfo.fb_sz[1]; + + inst->vsi_core->dec.vdec_fb_va = (unsigned long)fb; + + inst->vsi_core->ube.dma_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr; + inst->vsi_core->ube.size = lat_buf->ctx->msg_queue.wdma_addr.size; + + inst->vsi_core->err_map.dma_addr = lat_buf->wdma_err_addr.dma_addr; + inst->vsi_core->err_map.size = lat_buf->wdma_err_addr.size; + + inst->vsi_core->slice_bc.dma_addr = lat_buf->slice_bc_addr.dma_addr; + inst->vsi_core->slice_bc.size = lat_buf->slice_bc_addr.size; + + inst->vsi_core->trans.dma_addr = share_info->trans.dma_addr; + inst->vsi_core->trans.dma_addr_end = share_info->trans.dma_addr_end; + + inst->vsi_core->wrap.dma_addr = inst->wrap_addr.dma_addr; + inst->vsi_core->wrap.size = inst->wrap_addr.size; + + for (i = 0; i < HEVC_MAX_MV_NUM; i++) { + mem = &inst->mv_buf[i]; + inst->vsi_core->mv_buf_dma[i].dma_addr = mem->dma_addr; + inst->vsi_core->mv_buf_dma[i].size = mem->size; + } + + vb2_v4l2 = v4l2_m2m_next_dst_buf(ctx->m2m_ctx); + v4l2_m2m_buf_copy_metadata(&lat_buf->ts_info, vb2_v4l2, true); + + return 0; +} + +static int vdec_hevc_slice_init(struct mtk_vcodec_dec_ctx *ctx) +{ + struct vdec_hevc_slice_inst *inst; + int err, vsi_size; + + inst = kzalloc(sizeof(*inst), GFP_KERNEL); + if (!inst) + return -ENOMEM; + + inst->ctx = ctx; + + inst->vpu.id = SCP_IPI_VDEC_LAT; + inst->vpu.core_id = SCP_IPI_VDEC_CORE; + inst->vpu.ctx = ctx; + inst->vpu.codec_type = ctx->current_codec; + inst->vpu.capture_type = ctx->capture_fourcc; + + ctx->drv_handle = inst; + err = vpu_dec_init(&inst->vpu); + if (err) { + mtk_vdec_err(ctx, "vdec_hevc init err=%d", err); + goto error_free_inst; + } + + vsi_size = round_up(sizeof(struct vdec_hevc_slice_vsi), VCODEC_DEC_ALIGNED_64); + inst->vsi = inst->vpu.vsi; + inst->vsi_core = + (struct vdec_hevc_slice_vsi *)(((char *)inst->vpu.vsi) + vsi_size); + + inst->resolution_changed = true; + inst->realloc_mv_buf = true; + + inst->wrap_addr.size = VDEC_HEVC_WRAP_SZ; + err = mtk_vcodec_mem_alloc(ctx, &inst->wrap_addr); + if (err) + goto error_free_inst; + + mtk_vdec_debug(ctx, "lat struct size = %d,%d,%d,%d vsi: %d\n", + (int)sizeof(struct mtk_hevc_sps_param), + (int)sizeof(struct mtk_hevc_pps_param), + (int)sizeof(struct vdec_hevc_slice_lat_dec_param), + (int)sizeof(struct mtk_hevc_dpb_info), + vsi_size); + mtk_vdec_debug(ctx, "lat hevc instance >> %p, codec_type = 0x%x", + inst, inst->vpu.codec_type); + + return 0; +error_free_inst: + kfree(inst); + return err; +} + +static void vdec_hevc_slice_deinit(void *h_vdec) +{ + struct vdec_hevc_slice_inst *inst = h_vdec; + struct mtk_vcodec_mem *mem; + + vpu_dec_deinit(&inst->vpu); + vdec_hevc_slice_free_mv_buf(inst); + + mem = &inst->wrap_addr; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + + vdec_msg_queue_deinit(&inst->ctx->msg_queue, inst->ctx); + kfree(inst); +} + +static int vdec_hevc_slice_core_decode(struct vdec_lat_buf *lat_buf) +{ + int err, timeout; + struct mtk_vcodec_dec_ctx *ctx = lat_buf->ctx; + struct vdec_hevc_slice_inst *inst = ctx->drv_handle; + struct vdec_hevc_slice_share_info *share_info = lat_buf->private_data; + struct vdec_vpu_inst *vpu = &inst->vpu; + + mtk_vdec_debug(ctx, "[hevc-core] vdec_hevc core decode"); + memcpy(&inst->vsi_core->hevc_slice_params, &share_info->hevc_slice_params, + sizeof(share_info->hevc_slice_params)); + + err = vdec_hevc_slice_setup_core_buffer(inst, share_info, lat_buf); + if (err) + goto vdec_dec_end; + + vdec_hevc_slice_fill_decode_reflist(inst, &inst->vsi_core->hevc_slice_params, + share_info); + err = vpu_dec_core(vpu); + if (err) { + mtk_vdec_err(ctx, "core decode err=%d", err); + goto vdec_dec_end; + } + + /* wait decoder done interrupt */ + timeout = mtk_vcodec_wait_for_done_ctx(inst->ctx, MTK_INST_IRQ_RECEIVED, + WAIT_INTR_TIMEOUT_MS, MTK_VDEC_CORE); + if (timeout) + mtk_vdec_err(ctx, "core decode timeout: pic_%d", ctx->decoded_frame_cnt); + inst->vsi_core->dec.timeout = !!timeout; + + vpu_dec_core_end(vpu); + mtk_vdec_debug(ctx, "pic[%d] crc: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x", + ctx->decoded_frame_cnt, + inst->vsi_core->dec.crc[0], inst->vsi_core->dec.crc[1], + inst->vsi_core->dec.crc[2], inst->vsi_core->dec.crc[3], + inst->vsi_core->dec.crc[4], inst->vsi_core->dec.crc[5], + inst->vsi_core->dec.crc[6], inst->vsi_core->dec.crc[7]); + +vdec_dec_end: + vdec_msg_queue_update_ube_rptr(&lat_buf->ctx->msg_queue, share_info->trans.dma_addr_end); + ctx->dev->vdec_pdata->cap_to_disp(ctx, !!err, lat_buf->src_buf_req); + mtk_vdec_debug(ctx, "core decode done err=%d", err); + ctx->decoded_frame_cnt++; + return 0; +} + +static int vdec_hevc_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *fb, bool *res_chg) +{ + struct vdec_hevc_slice_inst *inst = h_vdec; + struct vdec_vpu_inst *vpu = &inst->vpu; + int err, timeout = 0; + unsigned int data[2]; + struct vdec_lat_buf *lat_buf; + struct vdec_hevc_slice_share_info *share_info; + + if (vdec_msg_queue_init(&inst->ctx->msg_queue, inst->ctx, + vdec_hevc_slice_core_decode, + sizeof(*share_info))) + return -ENOMEM; + + /* bs NULL means flush decoder */ + if (!bs) { + vdec_msg_queue_wait_lat_buf_full(&inst->ctx->msg_queue); + return vpu_dec_reset(vpu); + } + + lat_buf = vdec_msg_queue_dqbuf(&inst->ctx->msg_queue.lat_ctx); + if (!lat_buf) { + mtk_vdec_debug(inst->ctx, "failed to get lat buffer"); + return -EAGAIN; + } + + share_info = lat_buf->private_data; + err = vdec_hevc_slice_fill_decode_parameters(inst, share_info); + if (err) + goto err_free_fb_out; + + err = vdec_hevc_slice_setup_lat_buffer(inst, bs, lat_buf, res_chg); + if (err) + goto err_free_fb_out; + + err = vpu_dec_start(vpu, data, 2); + if (err) { + mtk_vdec_debug(inst->ctx, "lat decode err: %d", err); + goto err_free_fb_out; + } + + if (IS_VDEC_INNER_RACING(inst->ctx->dev->dec_capability)) { + memcpy(&share_info->hevc_slice_params, &inst->vsi->hevc_slice_params, + sizeof(share_info->hevc_slice_params)); + vdec_msg_queue_qbuf(&inst->ctx->msg_queue.core_ctx, lat_buf); + } + + /* wait decoder done interrupt */ + timeout = mtk_vcodec_wait_for_done_ctx(inst->ctx, MTK_INST_IRQ_RECEIVED, + WAIT_INTR_TIMEOUT_MS, MTK_VDEC_LAT0); + if (timeout) + mtk_vdec_err(inst->ctx, "lat decode timeout: pic_%d", inst->slice_dec_num); + inst->vsi->dec.timeout = !!timeout; + + err = vpu_dec_end(vpu); + if (err == SLICE_HEADER_FULL || err == TRANS_BUFFER_FULL) { + if (!IS_VDEC_INNER_RACING(inst->ctx->dev->dec_capability)) + vdec_msg_queue_qbuf(&inst->ctx->msg_queue.lat_ctx, lat_buf); + inst->slice_dec_num++; + mtk_vdec_err(inst->ctx, "lat dec fail: pic_%d err:%d", inst->slice_dec_num, err); + return -EINVAL; + } + + share_info->trans.dma_addr_end = inst->ctx->msg_queue.wdma_addr.dma_addr + + inst->vsi->dec.wdma_end_addr_offset; + vdec_msg_queue_update_ube_wptr(&lat_buf->ctx->msg_queue, share_info->trans.dma_addr_end); + + if (!IS_VDEC_INNER_RACING(inst->ctx->dev->dec_capability)) { + memcpy(&share_info->hevc_slice_params, &inst->vsi->hevc_slice_params, + sizeof(share_info->hevc_slice_params)); + vdec_msg_queue_qbuf(&inst->ctx->msg_queue.core_ctx, lat_buf); + } + mtk_vdec_debug(inst->ctx, "dec num: %d lat crc: 0x%x 0x%x 0x%x", inst->slice_dec_num, + inst->vsi->dec.crc[0], inst->vsi->dec.crc[1], inst->vsi->dec.crc[2]); + + inst->slice_dec_num++; + return 0; +err_free_fb_out: + vdec_msg_queue_qbuf(&inst->ctx->msg_queue.lat_ctx, lat_buf); + mtk_vdec_err(inst->ctx, "slice dec number: %d err: %d", inst->slice_dec_num, err); + return err; +} + +static int vdec_hevc_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *unused, bool *res_chg) +{ + struct vdec_hevc_slice_inst *inst = h_vdec; + + if (!h_vdec || inst->ctx->dev->vdec_pdata->hw_arch == MTK_VDEC_PURE_SINGLE_CORE) + return -EINVAL; + + return vdec_hevc_slice_lat_decode(h_vdec, bs, unused, res_chg); +} + +static int vdec_hevc_slice_get_param(void *h_vdec, enum vdec_get_param_type type, + void *out) +{ + struct vdec_hevc_slice_inst *inst = h_vdec; + + switch (type) { + case GET_PARAM_PIC_INFO: + vdec_hevc_slice_get_pic_info(inst); + break; + case GET_PARAM_DPB_SIZE: + *(unsigned int *)out = 6; + break; + case GET_PARAM_CROP_INFO: + vdec_hevc_slice_get_crop_info(inst, out); + break; + default: + mtk_vdec_err(inst->ctx, "invalid get parameter type=%d", type); + return -EINVAL; + } + return 0; +} + +const struct vdec_common_if vdec_hevc_slice_multi_if = { + .init = vdec_hevc_slice_init, + .decode = vdec_hevc_slice_decode, + .get_param = vdec_hevc_slice_get_param, + .deinit = vdec_hevc_slice_deinit, +}; diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c new file mode 100644 index 0000000000..19407f9bc7 --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c @@ -0,0 +1,608 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2016 MediaTek Inc. + * Author: Jungchang Tsao <jungchang.tsao@mediatek.com> + * PC Chen <pc.chen@mediatek.com> + */ + +#include <linux/slab.h> +#include "../vdec_drv_if.h" +#include "../mtk_vcodec_dec.h" +#include "../../common/mtk_vcodec_intr.h" +#include "../vdec_vpu_if.h" +#include "../vdec_drv_base.h" + +/* Decoding picture buffer size (3 reference frames plus current frame) */ +#define VP8_DPB_SIZE 4 + +/* HW working buffer size (bytes) */ +#define VP8_WORKING_BUF_SZ (45 * 4096) + +/* HW control register address */ +#define VP8_SEGID_DRAM_ADDR 0x3c +#define VP8_HW_VLD_ADDR 0x93C +#define VP8_HW_VLD_VALUE 0x940 +#define VP8_BSASET 0x100 +#define VP8_BSDSET 0x104 +#define VP8_RW_CKEN_SET 0x0 +#define VP8_RW_DCM_CON 0x18 +#define VP8_WO_VLD_SRST 0x108 +#define VP8_RW_MISC_SYS_SEL 0x84 +#define VP8_RW_MISC_SPEC_CON 0xC8 +#define VP8_WO_VLD_SRST 0x108 +#define VP8_RW_VP8_CTRL 0xA4 +#define VP8_RW_MISC_DCM_CON 0xEC +#define VP8_RW_MISC_SRST 0xF4 +#define VP8_RW_MISC_FUNC_CON 0xCC + +#define VP8_MAX_FRM_BUF_NUM 5 +#define VP8_MAX_FRM_BUF_NODE_NUM (VP8_MAX_FRM_BUF_NUM * 2) + +/* required buffer size (bytes) to store decode information */ +#define VP8_HW_SEGMENT_DATA_SZ 272 +#define VP8_HW_SEGMENT_UINT 4 + +#define VP8_DEC_TABLE_PROC_LOOP 96 +#define VP8_DEC_TABLE_UNIT 3 +#define VP8_DEC_TABLE_SZ 300 +#define VP8_DEC_TABLE_OFFSET 2 +#define VP8_DEC_TABLE_RW_UNIT 4 + +/** + * struct vdec_vp8_dec_info - decode misc information + * @working_buf_dma : working buffer dma address + * @prev_y_dma : previous decoded frame buffer Y plane address + * @cur_y_fb_dma : current plane Y frame buffer dma address + * @cur_c_fb_dma : current plane C frame buffer dma address + * @bs_dma : bitstream dma address + * @bs_sz : bitstream size + * @resolution_changed: resolution change flag 1 - changed, 0 - not change + * @show_frame : display this frame or not + * @wait_key_frame : wait key frame coming + */ +struct vdec_vp8_dec_info { + uint64_t working_buf_dma; + uint64_t prev_y_dma; + uint64_t cur_y_fb_dma; + uint64_t cur_c_fb_dma; + uint64_t bs_dma; + uint32_t bs_sz; + uint32_t resolution_changed; + uint32_t show_frame; + uint32_t wait_key_frame; +}; + +/** + * struct vdec_vp8_vsi - VPU shared information + * @dec : decoding information + * @pic : picture information + * @dec_table : decoder coefficient table + * @segment_buf : segmentation buffer + * @load_data : flag to indicate reload decode data + */ +struct vdec_vp8_vsi { + struct vdec_vp8_dec_info dec; + struct vdec_pic_info pic; + uint32_t dec_table[VP8_DEC_TABLE_SZ]; + uint32_t segment_buf[VP8_HW_SEGMENT_DATA_SZ][VP8_HW_SEGMENT_UINT]; + uint32_t load_data; +}; + +/** + * struct vdec_vp8_hw_reg_base - HW register base + * @misc : base address for misc + * @ld : base address for ld + * @top : base address for top + * @cm : base address for cm + * @hwd : base address for hwd + * @hwb : base address for hwb + */ +struct vdec_vp8_hw_reg_base { + void __iomem *misc; + void __iomem *ld; + void __iomem *top; + void __iomem *cm; + void __iomem *hwd; + void __iomem *hwb; +}; + +/** + * struct vdec_vp8_vpu_inst - VPU instance for VP8 decode + * @wq_hd : Wait queue to wait VPU message ack + * @signaled : 1 - Host has received ack message from VPU, 0 - not receive + * @failure : VPU execution result status 0 - success, others - fail + * @inst_addr : VPU decoder instance address + */ +struct vdec_vp8_vpu_inst { + wait_queue_head_t wq_hd; + int signaled; + int failure; + uint32_t inst_addr; +}; + +/* frame buffer (fb) list + * [available_fb_node_list] - decode fb are initialized to 0 and populated in + * [fb_use_list] - fb is set after decode and is moved to this list + * [fb_free_list] - fb is not needed for reference will be moved from + * [fb_use_list] to [fb_free_list] and + * once user remove fb from [fb_free_list], + * it is circulated back to [available_fb_node_list] + * [fb_disp_list] - fb is set after decode and is moved to this list + * once user remove fb from [fb_disp_list] it is + * circulated back to [available_fb_node_list] + */ + +/** + * struct vdec_vp8_inst - VP8 decoder instance + * @cur_fb : current frame buffer + * @dec_fb : decode frame buffer node + * @available_fb_node_list : list to store available frame buffer node + * @fb_use_list : list to store frame buffer in use + * @fb_free_list : list to store free frame buffer + * @fb_disp_list : list to store display ready frame buffer + * @working_buf : HW decoder working buffer + * @reg_base : HW register base address + * @frm_cnt : decode frame count + * @ctx : V4L2 context + * @vpu : VPU instance for decoder + * @vsi : VPU share information + */ +struct vdec_vp8_inst { + struct vdec_fb *cur_fb; + struct vdec_fb_node dec_fb[VP8_MAX_FRM_BUF_NODE_NUM]; + struct list_head available_fb_node_list; + struct list_head fb_use_list; + struct list_head fb_free_list; + struct list_head fb_disp_list; + struct mtk_vcodec_mem working_buf; + struct vdec_vp8_hw_reg_base reg_base; + unsigned int frm_cnt; + struct mtk_vcodec_dec_ctx *ctx; + struct vdec_vpu_inst vpu; + struct vdec_vp8_vsi *vsi; +}; + +static void get_hw_reg_base(struct vdec_vp8_inst *inst) +{ + void __iomem **reg_base = inst->ctx->dev->reg_base; + + inst->reg_base.top = mtk_vcodec_get_reg_addr(reg_base, VDEC_TOP); + inst->reg_base.cm = mtk_vcodec_get_reg_addr(reg_base, VDEC_CM); + inst->reg_base.hwd = mtk_vcodec_get_reg_addr(reg_base, VDEC_HWD); + inst->reg_base.misc = mtk_vcodec_get_reg_addr(reg_base, VDEC_MISC); + inst->reg_base.ld = mtk_vcodec_get_reg_addr(reg_base, VDEC_LD); + inst->reg_base.hwb = mtk_vcodec_get_reg_addr(reg_base, VDEC_HWB); +} + +static void write_hw_segmentation_data(struct vdec_vp8_inst *inst) +{ + int i, j; + u32 seg_id_addr; + u32 val; + void __iomem *cm = inst->reg_base.cm; + struct vdec_vp8_vsi *vsi = inst->vsi; + + seg_id_addr = readl(inst->reg_base.top + VP8_SEGID_DRAM_ADDR) >> 4; + + for (i = 0; i < ARRAY_SIZE(vsi->segment_buf); i++) { + for (j = ARRAY_SIZE(vsi->segment_buf[i]) - 1; j >= 0; j--) { + val = (1 << 16) + ((seg_id_addr + i) << 2) + j; + writel(val, cm + VP8_HW_VLD_ADDR); + + val = vsi->segment_buf[i][j]; + writel(val, cm + VP8_HW_VLD_VALUE); + } + } +} + +static void read_hw_segmentation_data(struct vdec_vp8_inst *inst) +{ + int i, j; + u32 seg_id_addr; + u32 val; + void __iomem *cm = inst->reg_base.cm; + struct vdec_vp8_vsi *vsi = inst->vsi; + + seg_id_addr = readl(inst->reg_base.top + VP8_SEGID_DRAM_ADDR) >> 4; + + for (i = 0; i < ARRAY_SIZE(vsi->segment_buf); i++) { + for (j = ARRAY_SIZE(vsi->segment_buf[i]) - 1; j >= 0; j--) { + val = ((seg_id_addr + i) << 2) + j; + writel(val, cm + VP8_HW_VLD_ADDR); + + val = readl(cm + VP8_HW_VLD_VALUE); + vsi->segment_buf[i][j] = val; + } + } +} + +/* reset HW and enable HW read/write data function */ +static void enable_hw_rw_function(struct vdec_vp8_inst *inst) +{ + u32 val = 0; + void __iomem *misc = inst->reg_base.misc; + void __iomem *ld = inst->reg_base.ld; + void __iomem *hwb = inst->reg_base.hwb; + void __iomem *hwd = inst->reg_base.hwd; + + mtk_vcodec_write_vdecsys(inst->ctx, VP8_RW_CKEN_SET, 0x1); + writel(0x101, ld + VP8_WO_VLD_SRST); + writel(0x101, hwb + VP8_WO_VLD_SRST); + + mtk_vcodec_write_vdecsys(inst->ctx, 0, 0x1); + val = readl(misc + VP8_RW_MISC_SRST); + writel((val & 0xFFFFFFFE), misc + VP8_RW_MISC_SRST); + + writel(0x1, misc + VP8_RW_MISC_SYS_SEL); + writel(0x17F, misc + VP8_RW_MISC_SPEC_CON); + writel(0x71201100, misc + VP8_RW_MISC_FUNC_CON); + writel(0x0, ld + VP8_WO_VLD_SRST); + writel(0x0, hwb + VP8_WO_VLD_SRST); + mtk_vcodec_write_vdecsys(inst->ctx, VP8_RW_DCM_CON, 0x1); + writel(0x1, misc + VP8_RW_MISC_DCM_CON); + writel(0x1, hwd + VP8_RW_VP8_CTRL); +} + +static void store_dec_table(struct vdec_vp8_inst *inst) +{ + int i, j; + u32 addr = 0, val = 0; + void __iomem *hwd = inst->reg_base.hwd; + u32 *p = &inst->vsi->dec_table[VP8_DEC_TABLE_OFFSET]; + + for (i = 0; i < VP8_DEC_TABLE_PROC_LOOP; i++) { + writel(addr, hwd + VP8_BSASET); + for (j = 0; j < VP8_DEC_TABLE_UNIT ; j++) { + val = *p++; + writel(val, hwd + VP8_BSDSET); + } + addr += VP8_DEC_TABLE_RW_UNIT; + } +} + +static void load_dec_table(struct vdec_vp8_inst *inst) +{ + int i; + u32 addr = 0; + u32 *p = &inst->vsi->dec_table[VP8_DEC_TABLE_OFFSET]; + void __iomem *hwd = inst->reg_base.hwd; + + for (i = 0; i < VP8_DEC_TABLE_PROC_LOOP; i++) { + writel(addr, hwd + VP8_BSASET); + /* read total 11 bytes */ + *p++ = readl(hwd + VP8_BSDSET); + *p++ = readl(hwd + VP8_BSDSET); + *p++ = readl(hwd + VP8_BSDSET) & 0xFFFFFF; + addr += VP8_DEC_TABLE_RW_UNIT; + } +} + +static void get_pic_info(struct vdec_vp8_inst *inst, struct vdec_pic_info *pic) +{ + *pic = inst->vsi->pic; + + mtk_vdec_debug(inst->ctx, "pic(%d, %d), buf(%d, %d)", + pic->pic_w, pic->pic_h, pic->buf_w, pic->buf_h); + mtk_vdec_debug(inst->ctx, "fb size: Y(%d), C(%d)", + pic->fb_sz[0], pic->fb_sz[1]); +} + +static void vp8_dec_finish(struct vdec_vp8_inst *inst) +{ + struct vdec_fb_node *node; + uint64_t prev_y_dma = inst->vsi->dec.prev_y_dma; + + mtk_vdec_debug(inst->ctx, "prev fb base dma=%llx", prev_y_dma); + + /* put last decode ok frame to fb_free_list */ + if (prev_y_dma != 0) { + list_for_each_entry(node, &inst->fb_use_list, list) { + struct vdec_fb *fb = (struct vdec_fb *)node->fb; + + if (prev_y_dma == (uint64_t)fb->base_y.dma_addr) { + list_move_tail(&node->list, + &inst->fb_free_list); + break; + } + } + } + + /* available_fb_node_list -> fb_use_list */ + node = list_first_entry(&inst->available_fb_node_list, + struct vdec_fb_node, list); + node->fb = inst->cur_fb; + list_move_tail(&node->list, &inst->fb_use_list); + + /* available_fb_node_list -> fb_disp_list */ + if (inst->vsi->dec.show_frame) { + node = list_first_entry(&inst->available_fb_node_list, + struct vdec_fb_node, list); + node->fb = inst->cur_fb; + list_move_tail(&node->list, &inst->fb_disp_list); + } +} + +static void move_fb_list_use_to_free(struct vdec_vp8_inst *inst) +{ + struct vdec_fb_node *node, *tmp; + + list_for_each_entry_safe(node, tmp, &inst->fb_use_list, list) + list_move_tail(&node->list, &inst->fb_free_list); +} + +static void init_list(struct vdec_vp8_inst *inst) +{ + int i; + + INIT_LIST_HEAD(&inst->available_fb_node_list); + INIT_LIST_HEAD(&inst->fb_use_list); + INIT_LIST_HEAD(&inst->fb_free_list); + INIT_LIST_HEAD(&inst->fb_disp_list); + + for (i = 0; i < ARRAY_SIZE(inst->dec_fb); i++) { + INIT_LIST_HEAD(&inst->dec_fb[i].list); + inst->dec_fb[i].fb = NULL; + list_add_tail(&inst->dec_fb[i].list, + &inst->available_fb_node_list); + } +} + +static void add_fb_to_free_list(struct vdec_vp8_inst *inst, void *fb) +{ + struct vdec_fb_node *node; + + if (fb) { + node = list_first_entry(&inst->available_fb_node_list, + struct vdec_fb_node, list); + node->fb = fb; + list_move_tail(&node->list, &inst->fb_free_list); + } +} + +static int alloc_working_buf(struct vdec_vp8_inst *inst) +{ + int err; + struct mtk_vcodec_mem *mem = &inst->working_buf; + + mem->size = VP8_WORKING_BUF_SZ; + err = mtk_vcodec_mem_alloc(inst->ctx, mem); + if (err) { + mtk_vdec_err(inst->ctx, "Cannot allocate working buffer"); + return err; + } + + inst->vsi->dec.working_buf_dma = (uint64_t)mem->dma_addr; + return 0; +} + +static void free_working_buf(struct vdec_vp8_inst *inst) +{ + struct mtk_vcodec_mem *mem = &inst->working_buf; + + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + + inst->vsi->dec.working_buf_dma = 0; +} + +static int vdec_vp8_init(struct mtk_vcodec_dec_ctx *ctx) +{ + struct vdec_vp8_inst *inst; + int err; + + inst = kzalloc(sizeof(*inst), GFP_KERNEL); + if (!inst) + return -ENOMEM; + + inst->ctx = ctx; + + inst->vpu.id = IPI_VDEC_VP8; + inst->vpu.ctx = ctx; + + err = vpu_dec_init(&inst->vpu); + if (err) { + mtk_vdec_err(ctx, "vdec_vp8 init err=%d", err); + goto error_free_inst; + } + + inst->vsi = (struct vdec_vp8_vsi *)inst->vpu.vsi; + init_list(inst); + err = alloc_working_buf(inst); + if (err) + goto error_deinit; + + get_hw_reg_base(inst); + mtk_vdec_debug(ctx, "VP8 Instance >> %p", inst); + + ctx->drv_handle = inst; + return 0; + +error_deinit: + vpu_dec_deinit(&inst->vpu); +error_free_inst: + kfree(inst); + return err; +} + +static int vdec_vp8_decode(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *fb, bool *res_chg) +{ + struct vdec_vp8_inst *inst = (struct vdec_vp8_inst *)h_vdec; + struct vdec_vp8_dec_info *dec = &inst->vsi->dec; + struct vdec_vpu_inst *vpu = &inst->vpu; + unsigned char *bs_va; + unsigned int data; + int err = 0; + uint64_t y_fb_dma; + uint64_t c_fb_dma; + + /* bs NULL means flush decoder */ + if (bs == NULL) { + move_fb_list_use_to_free(inst); + return vpu_dec_reset(vpu); + } + + y_fb_dma = fb ? (u64)fb->base_y.dma_addr : 0; + c_fb_dma = fb ? (u64)fb->base_c.dma_addr : 0; + + mtk_vdec_debug(inst->ctx, "+ [%d] FB y_dma=%llx c_dma=%llx fb=%p", + inst->frm_cnt, y_fb_dma, c_fb_dma, fb); + + inst->cur_fb = fb; + dec->bs_dma = (unsigned long)bs->dma_addr; + dec->bs_sz = bs->size; + dec->cur_y_fb_dma = y_fb_dma; + dec->cur_c_fb_dma = c_fb_dma; + + mtk_vdec_debug(inst->ctx, "\n + FRAME[%d] +\n", inst->frm_cnt); + + write_hw_segmentation_data(inst); + enable_hw_rw_function(inst); + store_dec_table(inst); + + bs_va = (unsigned char *)bs->va; + + /* retrieve width/hight and scale info from header */ + data = (*(bs_va + 9) << 24) | (*(bs_va + 8) << 16) | + (*(bs_va + 7) << 8) | *(bs_va + 6); + err = vpu_dec_start(vpu, &data, 1); + if (err) { + add_fb_to_free_list(inst, fb); + if (dec->wait_key_frame) { + mtk_vdec_debug(inst->ctx, "wait key frame !"); + return 0; + } + + goto error; + } + + if (dec->resolution_changed) { + mtk_vdec_debug(inst->ctx, "- resolution_changed -"); + *res_chg = true; + add_fb_to_free_list(inst, fb); + return 0; + } + + /* wait decoder done interrupt */ + mtk_vcodec_wait_for_done_ctx(inst->ctx, MTK_INST_IRQ_RECEIVED, + WAIT_INTR_TIMEOUT_MS, 0); + + if (inst->vsi->load_data) + load_dec_table(inst); + + vp8_dec_finish(inst); + read_hw_segmentation_data(inst); + + err = vpu_dec_end(vpu); + if (err) + goto error; + + mtk_vdec_debug(inst->ctx, "\n - FRAME[%d] - show=%d\n", inst->frm_cnt, dec->show_frame); + inst->frm_cnt++; + *res_chg = false; + return 0; + +error: + mtk_vdec_err(inst->ctx, "\n - FRAME[%d] - err=%d\n", inst->frm_cnt, err); + return err; +} + +static void get_disp_fb(struct vdec_vp8_inst *inst, struct vdec_fb **out_fb) +{ + struct vdec_fb_node *node; + struct vdec_fb *fb; + + node = list_first_entry_or_null(&inst->fb_disp_list, + struct vdec_fb_node, list); + if (node) { + list_move_tail(&node->list, &inst->available_fb_node_list); + fb = (struct vdec_fb *)node->fb; + fb->status |= FB_ST_DISPLAY; + mtk_vdec_debug(inst->ctx, "[FB] get disp fb %p st=%d", node->fb, fb->status); + } else { + fb = NULL; + mtk_vdec_debug(inst->ctx, "[FB] there is no disp fb"); + } + + *out_fb = fb; +} + +static void get_free_fb(struct vdec_vp8_inst *inst, struct vdec_fb **out_fb) +{ + struct vdec_fb_node *node; + struct vdec_fb *fb; + + node = list_first_entry_or_null(&inst->fb_free_list, + struct vdec_fb_node, list); + if (node) { + list_move_tail(&node->list, &inst->available_fb_node_list); + fb = (struct vdec_fb *)node->fb; + fb->status |= FB_ST_FREE; + mtk_vdec_debug(inst->ctx, "[FB] get free fb %p st=%d", node->fb, fb->status); + } else { + fb = NULL; + mtk_vdec_debug(inst->ctx, "[FB] there is no free fb"); + } + + *out_fb = fb; +} + +static void get_crop_info(struct vdec_vp8_inst *inst, struct v4l2_rect *cr) +{ + cr->left = 0; + cr->top = 0; + cr->width = inst->vsi->pic.pic_w; + cr->height = inst->vsi->pic.pic_h; + mtk_vdec_debug(inst->ctx, "get crop info l=%d, t=%d, w=%d, h=%d", + cr->left, cr->top, cr->width, cr->height); +} + +static int vdec_vp8_get_param(void *h_vdec, enum vdec_get_param_type type, + void *out) +{ + struct vdec_vp8_inst *inst = (struct vdec_vp8_inst *)h_vdec; + + switch (type) { + case GET_PARAM_DISP_FRAME_BUFFER: + get_disp_fb(inst, out); + break; + + case GET_PARAM_FREE_FRAME_BUFFER: + get_free_fb(inst, out); + break; + + case GET_PARAM_PIC_INFO: + get_pic_info(inst, out); + break; + + case GET_PARAM_CROP_INFO: + get_crop_info(inst, out); + break; + + case GET_PARAM_DPB_SIZE: + *((unsigned int *)out) = VP8_DPB_SIZE; + break; + + default: + mtk_vdec_err(inst->ctx, "invalid get parameter type=%d", type); + return -EINVAL; + } + + return 0; +} + +static void vdec_vp8_deinit(void *h_vdec) +{ + struct vdec_vp8_inst *inst = (struct vdec_vp8_inst *)h_vdec; + + vpu_dec_deinit(&inst->vpu); + free_working_buf(inst); + kfree(inst); +} + +const struct vdec_common_if vdec_vp8_if = { + .init = vdec_vp8_init, + .decode = vdec_vp8_decode, + .get_param = vdec_vp8_get_param, + .deinit = vdec_vp8_deinit, +}; diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_req_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_req_if.c new file mode 100644 index 0000000000..f64b21c071 --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_req_if.c @@ -0,0 +1,433 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2021 MediaTek Inc. + * Author: Yunfei Dong <yunfei.dong@mediatek.com> + */ + +#include <linux/slab.h> +#include <media/v4l2-mem2mem.h> +#include <media/videobuf2-dma-contig.h> +#include <uapi/linux/v4l2-controls.h> + +#include "../mtk_vcodec_dec.h" +#include "../../common/mtk_vcodec_intr.h" +#include "../vdec_drv_base.h" +#include "../vdec_drv_if.h" +#include "../vdec_vpu_if.h" + +/* Decoding picture buffer size (3 reference frames plus current frame) */ +#define VP8_DPB_SIZE 4 + +/* HW working buffer size (bytes) */ +#define VP8_SEG_ID_SZ SZ_256K +#define VP8_PP_WRAPY_SZ SZ_64K +#define VP8_PP_WRAPC_SZ SZ_64K +#define VP8_VLD_PRED_SZ SZ_64K + +/** + * struct vdec_vp8_slice_info - decode misc information + * + * @vld_wrapper_dma: vld wrapper dma address + * @seg_id_buf_dma: seg id dma address + * @wrap_y_dma: wrap y dma address + * @wrap_c_dma: wrap y dma address + * @cur_y_fb_dma: current plane Y frame buffer dma address + * @cur_c_fb_dma: current plane C frame buffer dma address + * @bs_dma: bitstream dma address + * @bs_sz: bitstream size + * @resolution_changed:resolution change flag 1 - changed, 0 - not change + * @frame_header_type: current frame header type + * @wait_key_frame: wait key frame coming + * @crc: used to check whether hardware's status is right + * @reserved: reserved, currently unused + */ +struct vdec_vp8_slice_info { + u64 vld_wrapper_dma; + u64 seg_id_buf_dma; + u64 wrap_y_dma; + u64 wrap_c_dma; + u64 cur_y_fb_dma; + u64 cur_c_fb_dma; + u64 bs_dma; + u32 bs_sz; + u32 resolution_changed; + u32 frame_header_type; + u32 crc[8]; + u32 reserved; +}; + +/** + * struct vdec_vp8_slice_dpb_info - vp8 reference information + * + * @y_dma_addr: Y bitstream physical address + * @c_dma_addr: CbCr bitstream physical address + * @reference_flag: reference picture flag + * @reserved: 64bit align + */ +struct vdec_vp8_slice_dpb_info { + dma_addr_t y_dma_addr; + dma_addr_t c_dma_addr; + int reference_flag; + int reserved; +}; + +/** + * struct vdec_vp8_slice_vsi - VPU shared information + * + * @dec: decoding information + * @pic: picture information + * @vp8_dpb_info: reference buffer information + */ +struct vdec_vp8_slice_vsi { + struct vdec_vp8_slice_info dec; + struct vdec_pic_info pic; + struct vdec_vp8_slice_dpb_info vp8_dpb_info[3]; +}; + +/** + * struct vdec_vp8_slice_inst - VP8 decoder instance + * + * @seg_id_buf: seg buffer + * @wrap_y_buf: wrapper y buffer + * @wrap_c_buf: wrapper c buffer + * @vld_wrapper_buf: vld wrapper buffer + * @ctx: V4L2 context + * @vpu: VPU instance for decoder + * @vsi: VPU share information + */ +struct vdec_vp8_slice_inst { + struct mtk_vcodec_mem seg_id_buf; + struct mtk_vcodec_mem wrap_y_buf; + struct mtk_vcodec_mem wrap_c_buf; + struct mtk_vcodec_mem vld_wrapper_buf; + struct mtk_vcodec_dec_ctx *ctx; + struct vdec_vpu_inst vpu; + struct vdec_vp8_slice_vsi *vsi; +}; + +static void *vdec_vp8_slice_get_ctrl_ptr(struct mtk_vcodec_dec_ctx *ctx, int id) +{ + struct v4l2_ctrl *ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, id); + + if (!ctrl) + return ERR_PTR(-EINVAL); + + return ctrl->p_cur.p; +} + +static void vdec_vp8_slice_get_pic_info(struct vdec_vp8_slice_inst *inst) +{ + struct mtk_vcodec_dec_ctx *ctx = inst->ctx; + unsigned int data[3]; + + data[0] = ctx->picinfo.pic_w; + data[1] = ctx->picinfo.pic_h; + data[2] = ctx->capture_fourcc; + vpu_dec_get_param(&inst->vpu, data, 3, GET_PARAM_PIC_INFO); + + ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, 64); + ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, 64); + ctx->picinfo.fb_sz[0] = inst->vpu.fb_sz[0]; + ctx->picinfo.fb_sz[1] = inst->vpu.fb_sz[1]; + + inst->vsi->pic.pic_w = ctx->picinfo.pic_w; + inst->vsi->pic.pic_h = ctx->picinfo.pic_h; + inst->vsi->pic.buf_w = ctx->picinfo.buf_w; + inst->vsi->pic.buf_h = ctx->picinfo.buf_h; + inst->vsi->pic.fb_sz[0] = ctx->picinfo.fb_sz[0]; + inst->vsi->pic.fb_sz[1] = ctx->picinfo.fb_sz[1]; + mtk_vdec_debug(inst->ctx, "pic(%d, %d), buf(%d, %d)", + ctx->picinfo.pic_w, ctx->picinfo.pic_h, + ctx->picinfo.buf_w, ctx->picinfo.buf_h); + mtk_vdec_debug(inst->ctx, "fb size: Y(%d), C(%d)", + ctx->picinfo.fb_sz[0], ctx->picinfo.fb_sz[1]); +} + +static int vdec_vp8_slice_alloc_working_buf(struct vdec_vp8_slice_inst *inst) +{ + int err; + struct mtk_vcodec_mem *mem; + + mem = &inst->seg_id_buf; + mem->size = VP8_SEG_ID_SZ; + err = mtk_vcodec_mem_alloc(inst->ctx, mem); + if (err) { + mtk_vdec_err(inst->ctx, "Cannot allocate working buffer"); + return err; + } + inst->vsi->dec.seg_id_buf_dma = (u64)mem->dma_addr; + + mem = &inst->wrap_y_buf; + mem->size = VP8_PP_WRAPY_SZ; + err = mtk_vcodec_mem_alloc(inst->ctx, mem); + if (err) { + mtk_vdec_err(inst->ctx, "cannot allocate WRAP Y buffer"); + return err; + } + inst->vsi->dec.wrap_y_dma = (u64)mem->dma_addr; + + mem = &inst->wrap_c_buf; + mem->size = VP8_PP_WRAPC_SZ; + err = mtk_vcodec_mem_alloc(inst->ctx, mem); + if (err) { + mtk_vdec_err(inst->ctx, "cannot allocate WRAP C buffer"); + return err; + } + inst->vsi->dec.wrap_c_dma = (u64)mem->dma_addr; + + mem = &inst->vld_wrapper_buf; + mem->size = VP8_VLD_PRED_SZ; + err = mtk_vcodec_mem_alloc(inst->ctx, mem); + if (err) { + mtk_vdec_err(inst->ctx, "cannot allocate vld wrapper buffer"); + return err; + } + inst->vsi->dec.vld_wrapper_dma = (u64)mem->dma_addr; + + return 0; +} + +static void vdec_vp8_slice_free_working_buf(struct vdec_vp8_slice_inst *inst) +{ + struct mtk_vcodec_mem *mem; + + mem = &inst->seg_id_buf; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + inst->vsi->dec.seg_id_buf_dma = 0; + + mem = &inst->wrap_y_buf; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + inst->vsi->dec.wrap_y_dma = 0; + + mem = &inst->wrap_c_buf; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + inst->vsi->dec.wrap_c_dma = 0; + + mem = &inst->vld_wrapper_buf; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + inst->vsi->dec.vld_wrapper_dma = 0; +} + +static u64 vdec_vp8_slice_get_ref_by_ts(const struct v4l2_ctrl_vp8_frame *frame_header, + int index) +{ + switch (index) { + case 0: + return frame_header->last_frame_ts; + case 1: + return frame_header->golden_frame_ts; + case 2: + return frame_header->alt_frame_ts; + default: + break; + } + + return -1; +} + +static int vdec_vp8_slice_get_decode_parameters(struct vdec_vp8_slice_inst *inst) +{ + const struct v4l2_ctrl_vp8_frame *frame_header; + struct mtk_vcodec_dec_ctx *ctx = inst->ctx; + struct vb2_queue *vq; + struct vb2_buffer *vb; + u64 referenct_ts; + int index; + + frame_header = vdec_vp8_slice_get_ctrl_ptr(inst->ctx, V4L2_CID_STATELESS_VP8_FRAME); + if (IS_ERR(frame_header)) + return PTR_ERR(frame_header); + + vq = v4l2_m2m_get_vq(ctx->m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE); + for (index = 0; index < 3; index++) { + referenct_ts = vdec_vp8_slice_get_ref_by_ts(frame_header, index); + vb = vb2_find_buffer(vq, referenct_ts); + if (!vb) { + if (!V4L2_VP8_FRAME_IS_KEY_FRAME(frame_header)) + mtk_vdec_err(inst->ctx, "reference invalid: index(%d) ts(%lld)", + index, referenct_ts); + inst->vsi->vp8_dpb_info[index].reference_flag = 0; + continue; + } + inst->vsi->vp8_dpb_info[index].reference_flag = 1; + + inst->vsi->vp8_dpb_info[index].y_dma_addr = + vb2_dma_contig_plane_dma_addr(vb, 0); + if (ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 2) + inst->vsi->vp8_dpb_info[index].c_dma_addr = + vb2_dma_contig_plane_dma_addr(vb, 1); + else + inst->vsi->vp8_dpb_info[index].c_dma_addr = + inst->vsi->vp8_dpb_info[index].y_dma_addr + + ctx->picinfo.fb_sz[0]; + } + + inst->vsi->dec.frame_header_type = frame_header->flags >> 1; + + return 0; +} + +static int vdec_vp8_slice_init(struct mtk_vcodec_dec_ctx *ctx) +{ + struct vdec_vp8_slice_inst *inst; + int err; + + inst = kzalloc(sizeof(*inst), GFP_KERNEL); + if (!inst) + return -ENOMEM; + + inst->ctx = ctx; + + inst->vpu.id = SCP_IPI_VDEC_LAT; + inst->vpu.core_id = SCP_IPI_VDEC_CORE; + inst->vpu.ctx = ctx; + inst->vpu.codec_type = ctx->current_codec; + inst->vpu.capture_type = ctx->capture_fourcc; + + err = vpu_dec_init(&inst->vpu); + if (err) { + mtk_vdec_err(ctx, "vdec_vp8 init err=%d", err); + goto error_free_inst; + } + + inst->vsi = inst->vpu.vsi; + err = vdec_vp8_slice_alloc_working_buf(inst); + if (err) + goto error_deinit; + + mtk_vdec_debug(ctx, "vp8 struct size = %d vsi: %d\n", + (int)sizeof(struct v4l2_ctrl_vp8_frame), + (int)sizeof(struct vdec_vp8_slice_vsi)); + mtk_vdec_debug(ctx, "vp8:%p, codec_type = 0x%x vsi: 0x%p", + inst, inst->vpu.codec_type, inst->vpu.vsi); + + ctx->drv_handle = inst; + return 0; + +error_deinit: + vpu_dec_deinit(&inst->vpu); +error_free_inst: + kfree(inst); + return err; +} + +static int vdec_vp8_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *fb, bool *res_chg) +{ + struct vdec_vp8_slice_inst *inst = h_vdec; + struct vdec_vpu_inst *vpu = &inst->vpu; + struct mtk_video_dec_buf *src_buf_info, *dst_buf_info; + unsigned int data; + u64 y_fb_dma, c_fb_dma; + int err, timeout; + + /* Resolution changes are never initiated by us */ + *res_chg = false; + + /* bs NULL means flush decoder */ + if (!bs) + return vpu_dec_reset(vpu); + + src_buf_info = container_of(bs, struct mtk_video_dec_buf, bs_buffer); + + fb = inst->ctx->dev->vdec_pdata->get_cap_buffer(inst->ctx); + dst_buf_info = container_of(fb, struct mtk_video_dec_buf, frame_buffer); + + y_fb_dma = fb ? (u64)fb->base_y.dma_addr : 0; + if (inst->ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 1) + c_fb_dma = y_fb_dma + + inst->ctx->picinfo.buf_w * inst->ctx->picinfo.buf_h; + else + c_fb_dma = fb ? (u64)fb->base_c.dma_addr : 0; + + inst->vsi->dec.bs_dma = (u64)bs->dma_addr; + inst->vsi->dec.bs_sz = bs->size; + inst->vsi->dec.cur_y_fb_dma = y_fb_dma; + inst->vsi->dec.cur_c_fb_dma = c_fb_dma; + + mtk_vdec_debug(inst->ctx, "frame[%d] bs(%zu 0x%llx) y/c(0x%llx 0x%llx)", + inst->ctx->decoded_frame_cnt, + bs->size, (u64)bs->dma_addr, + y_fb_dma, c_fb_dma); + + v4l2_m2m_buf_copy_metadata(&src_buf_info->m2m_buf.vb, + &dst_buf_info->m2m_buf.vb, true); + + err = vdec_vp8_slice_get_decode_parameters(inst); + if (err) + goto error; + + err = vpu_dec_start(vpu, &data, 1); + if (err) { + mtk_vdec_debug(inst->ctx, "vp8 dec start err!"); + goto error; + } + + if (inst->vsi->dec.resolution_changed) { + mtk_vdec_debug(inst->ctx, "- resolution_changed -"); + *res_chg = true; + return 0; + } + + /* wait decode done interrupt */ + timeout = mtk_vcodec_wait_for_done_ctx(inst->ctx, MTK_INST_IRQ_RECEIVED, + 50, MTK_VDEC_CORE); + + err = vpu_dec_end(vpu); + if (err || timeout) + mtk_vdec_debug(inst->ctx, "vp8 dec error timeout:%d err: %d pic_%d", + timeout, err, inst->ctx->decoded_frame_cnt); + + mtk_vdec_debug(inst->ctx, "pic[%d] crc: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x", + inst->ctx->decoded_frame_cnt, + inst->vsi->dec.crc[0], inst->vsi->dec.crc[1], + inst->vsi->dec.crc[2], inst->vsi->dec.crc[3], + inst->vsi->dec.crc[4], inst->vsi->dec.crc[5], + inst->vsi->dec.crc[6], inst->vsi->dec.crc[7]); + + inst->ctx->decoded_frame_cnt++; +error: + return err; +} + +static int vdec_vp8_slice_get_param(void *h_vdec, enum vdec_get_param_type type, void *out) +{ + struct vdec_vp8_slice_inst *inst = h_vdec; + + switch (type) { + case GET_PARAM_PIC_INFO: + vdec_vp8_slice_get_pic_info(inst); + break; + case GET_PARAM_CROP_INFO: + mtk_vdec_debug(inst->ctx, "No need to get vp8 crop information."); + break; + case GET_PARAM_DPB_SIZE: + *((unsigned int *)out) = VP8_DPB_SIZE; + break; + default: + mtk_vdec_err(inst->ctx, "invalid get parameter type=%d", type); + return -EINVAL; + } + + return 0; +} + +static void vdec_vp8_slice_deinit(void *h_vdec) +{ + struct vdec_vp8_slice_inst *inst = h_vdec; + + vpu_dec_deinit(&inst->vpu); + vdec_vp8_slice_free_working_buf(inst); + kfree(inst); +} + +const struct vdec_common_if vdec_vp8_slice_if = { + .init = vdec_vp8_slice_init, + .decode = vdec_vp8_slice_decode, + .get_param = vdec_vp8_slice_get_param, + .deinit = vdec_vp8_slice_deinit, +}; diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c new file mode 100644 index 0000000000..55355fa700 --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c @@ -0,0 +1,1017 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2016 MediaTek Inc. + * Author: Daniel Hsiao <daniel.hsiao@mediatek.com> + * Kai-Sean Yang <kai-sean.yang@mediatek.com> + * Tiffany Lin <tiffany.lin@mediatek.com> + */ + +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/syscalls.h> +#include <linux/delay.h> +#include <linux/time.h> + +#include "../../common/mtk_vcodec_intr.h" +#include "../vdec_drv_base.h" +#include "../vdec_vpu_if.h" + +#define VP9_SUPER_FRAME_BS_SZ 64 +#define MAX_VP9_DPB_SIZE 9 + +#define REFS_PER_FRAME 3 +#define MAX_NUM_REF_FRAMES 8 +#define VP9_MAX_FRM_BUF_NUM 9 +#define VP9_MAX_FRM_BUF_NODE_NUM (VP9_MAX_FRM_BUF_NUM * 2) +#define VP9_SEG_ID_SZ 0x12000 + +/** + * struct vp9_dram_buf - contains buffer info for vpu + * @va : cpu address + * @pa : iova address + * @sz : buffer size + * @padding : for 64 bytes alignment + */ +struct vp9_dram_buf { + unsigned long va; + unsigned long pa; + unsigned int sz; + unsigned int padding; +}; + +/** + * struct vp9_fb_info - contains frame buffer info + * @fb : frmae buffer + * @reserved : reserved field used by vpu + */ +struct vp9_fb_info { + struct vdec_fb *fb; + unsigned int reserved[32]; +}; + +/** + * struct vp9_ref_cnt_buf - contains reference buffer information + * @buf : referenced frame buffer + * @ref_cnt : referenced frame buffer's reference count. + * When reference count=0, remove it from reference list + */ +struct vp9_ref_cnt_buf { + struct vp9_fb_info buf; + unsigned int ref_cnt; +}; + +/** + * struct vp9_ref_buf - contains current frame's reference buffer information + * @buf : reference buffer + * @idx : reference buffer index to frm_bufs + * @reserved : reserved field used by vpu + */ +struct vp9_ref_buf { + struct vp9_fb_info *buf; + unsigned int idx; + unsigned int reserved[6]; +}; + +/** + * struct vp9_sf_ref_fb - contains frame buffer info + * @fb : super frame reference frame buffer + * @used : this reference frame info entry is used + * @padding : for 64 bytes size align + */ +struct vp9_sf_ref_fb { + struct vdec_fb fb; + int used; + int padding; +}; + +/* + * struct vdec_vp9_vsi - shared buffer between host and VPU firmware + * AP-W/R : AP is writer/reader on this item + * VPU-W/R: VPU is write/reader on this item + * @sf_bs_buf : super frame backup buffer (AP-W, VPU-R) + * @sf_ref_fb : record supoer frame reference buffer information + * (AP-R/W, VPU-R/W) + * @sf_next_ref_fb_idx : next available super frame (AP-W, VPU-R) + * @sf_frm_cnt : super frame count, filled by vpu (AP-R, VPU-W) + * @sf_frm_offset : super frame offset, filled by vpu (AP-R, VPU-W) + * @sf_frm_sz : super frame size, filled by vpu (AP-R, VPU-W) + * @sf_frm_idx : current super frame (AP-R, VPU-W) + * @sf_init : inform super frame info already parsed by vpu (AP-R, VPU-W) + * @fb : capture buffer (AP-W, VPU-R) + * @bs : bs buffer (AP-W, VPU-R) + * @cur_fb : current show capture buffer (AP-R/W, VPU-R/W) + * @pic_w : picture width (AP-R, VPU-W) + * @pic_h : picture height (AP-R, VPU-W) + * @buf_w : codec width (AP-R, VPU-W) + * @buf_h : coded height (AP-R, VPU-W) + * @buf_sz_y_bs : ufo compressed y plane size (AP-R, VPU-W) + * @buf_sz_c_bs : ufo compressed cbcr plane size (AP-R, VPU-W) + * @buf_len_sz_y : size used to store y plane ufo info (AP-R, VPU-W) + * @buf_len_sz_c : size used to store cbcr plane ufo info (AP-R, VPU-W) + + * @profile : profile sparsed from vpu (AP-R, VPU-W) + * @show_frame : [BIT(0)] display this frame or not (AP-R, VPU-W) + * [BIT(1)] reset segment data or not (AP-R, VPU-W) + * [BIT(2)] trig decoder hardware or not (AP-R, VPU-W) + * [BIT(3)] ask VPU to set bits(0~4) accordingly (AP-W, VPU-R) + * [BIT(4)] do not reset segment data before every frame (AP-R, VPU-W) + * @show_existing_frame : inform this frame is show existing frame + * (AP-R, VPU-W) + * @frm_to_show_idx : index to show frame (AP-R, VPU-W) + + * @refresh_frm_flags : indicate when frame need to refine reference count + * (AP-R, VPU-W) + * @resolution_changed : resolution change in this frame (AP-R, VPU-W) + + * @frm_bufs : maintain reference buffer info (AP-R/W, VPU-R/W) + * @ref_frm_map : maintain reference buffer map info (AP-R/W, VPU-R/W) + * @new_fb_idx : index to frm_bufs array (AP-R, VPU-W) + * @frm_num : decoded frame number, include sub-frame count (AP-R, VPU-W) + * @mv_buf : motion vector working buffer (AP-W, VPU-R) + * @frm_refs : maintain three reference buffer info (AP-R/W, VPU-R/W) + * @seg_id_buf : segmentation map working buffer (AP-W, VPU-R) + */ +struct vdec_vp9_vsi { + unsigned char sf_bs_buf[VP9_SUPER_FRAME_BS_SZ]; + struct vp9_sf_ref_fb sf_ref_fb[VP9_MAX_FRM_BUF_NUM-1]; + int sf_next_ref_fb_idx; + unsigned int sf_frm_cnt; + unsigned int sf_frm_offset[VP9_MAX_FRM_BUF_NUM-1]; + unsigned int sf_frm_sz[VP9_MAX_FRM_BUF_NUM-1]; + unsigned int sf_frm_idx; + unsigned int sf_init; + struct vdec_fb fb; + struct mtk_vcodec_mem bs; + struct vdec_fb cur_fb; + unsigned int pic_w; + unsigned int pic_h; + unsigned int buf_w; + unsigned int buf_h; + unsigned int buf_sz_y_bs; + unsigned int buf_sz_c_bs; + unsigned int buf_len_sz_y; + unsigned int buf_len_sz_c; + unsigned int profile; + unsigned int show_frame; + unsigned int show_existing_frame; + unsigned int frm_to_show_idx; + unsigned int refresh_frm_flags; + unsigned int resolution_changed; + + struct vp9_ref_cnt_buf frm_bufs[VP9_MAX_FRM_BUF_NUM]; + int ref_frm_map[MAX_NUM_REF_FRAMES]; + unsigned int new_fb_idx; + unsigned int frm_num; + struct vp9_dram_buf mv_buf; + + struct vp9_ref_buf frm_refs[REFS_PER_FRAME]; + struct vp9_dram_buf seg_id_buf; + +}; + +/* + * struct vdec_vp9_inst - vp9 decode instance + * @mv_buf : working buffer for mv + * @seg_id_buf : working buffer for segmentation map + * @dec_fb : vdec_fb node to link fb to different fb_xxx_list + * @available_fb_node_list : current available vdec_fb node + * @fb_use_list : current used or referenced vdec_fb + * @fb_free_list : current available to free vdec_fb + * @fb_disp_list : current available to display vdec_fb + * @cur_fb : current frame buffer + * @ctx : current decode context + * @vpu : vpu instance information + * @vsi : shared buffer between host and VPU firmware + * @total_frm_cnt : total frame count, it do not include sub-frames in super + * frame + * @mem : instance memory information + */ +struct vdec_vp9_inst { + struct mtk_vcodec_mem mv_buf; + struct mtk_vcodec_mem seg_id_buf; + + struct vdec_fb_node dec_fb[VP9_MAX_FRM_BUF_NODE_NUM]; + struct list_head available_fb_node_list; + struct list_head fb_use_list; + struct list_head fb_free_list; + struct list_head fb_disp_list; + struct vdec_fb *cur_fb; + struct mtk_vcodec_dec_ctx *ctx; + struct vdec_vpu_inst vpu; + struct vdec_vp9_vsi *vsi; + unsigned int total_frm_cnt; + struct mtk_vcodec_mem mem; +}; + +static bool vp9_is_sf_ref_fb(struct vdec_vp9_inst *inst, struct vdec_fb *fb) +{ + int i; + struct vdec_vp9_vsi *vsi = inst->vsi; + + for (i = 0; i < ARRAY_SIZE(vsi->sf_ref_fb); i++) { + if (fb == &vsi->sf_ref_fb[i].fb) + return true; + } + return false; +} + +static struct vdec_fb *vp9_rm_from_fb_use_list(struct vdec_vp9_inst + *inst, void *addr) +{ + struct vdec_fb *fb = NULL; + struct vdec_fb_node *node; + + list_for_each_entry(node, &inst->fb_use_list, list) { + fb = (struct vdec_fb *)node->fb; + if (fb->base_y.va == addr) { + list_move_tail(&node->list, + &inst->available_fb_node_list); + return fb; + } + } + + return NULL; +} + +static void vp9_add_to_fb_free_list(struct vdec_vp9_inst *inst, + struct vdec_fb *fb) +{ + struct vdec_fb_node *node; + + if (fb) { + node = list_first_entry_or_null(&inst->available_fb_node_list, + struct vdec_fb_node, list); + + if (node) { + node->fb = fb; + list_move_tail(&node->list, &inst->fb_free_list); + } + } else { + mtk_vdec_debug(inst->ctx, "No free fb node"); + } +} + +static void vp9_free_sf_ref_fb(struct vdec_fb *fb) +{ + struct vp9_sf_ref_fb *sf_ref_fb = + container_of(fb, struct vp9_sf_ref_fb, fb); + + sf_ref_fb->used = 0; +} + +static void vp9_ref_cnt_fb(struct vdec_vp9_inst *inst, int *idx, + int new_idx) +{ + struct vdec_vp9_vsi *vsi = inst->vsi; + int ref_idx = *idx; + + if (ref_idx >= 0 && vsi->frm_bufs[ref_idx].ref_cnt > 0) { + vsi->frm_bufs[ref_idx].ref_cnt--; + + if (vsi->frm_bufs[ref_idx].ref_cnt == 0) { + if (!vp9_is_sf_ref_fb(inst, + vsi->frm_bufs[ref_idx].buf.fb)) { + struct vdec_fb *fb; + + fb = vp9_rm_from_fb_use_list(inst, + vsi->frm_bufs[ref_idx].buf.fb->base_y.va); + vp9_add_to_fb_free_list(inst, fb); + } else + vp9_free_sf_ref_fb( + vsi->frm_bufs[ref_idx].buf.fb); + } + } + + *idx = new_idx; + vsi->frm_bufs[new_idx].ref_cnt++; +} + +static void vp9_free_all_sf_ref_fb(struct vdec_vp9_inst *inst) +{ + int i; + struct vdec_vp9_vsi *vsi = inst->vsi; + + for (i = 0; i < ARRAY_SIZE(vsi->sf_ref_fb); i++) { + if (vsi->sf_ref_fb[i].fb.base_y.va) { + mtk_vcodec_mem_free(inst->ctx, + &vsi->sf_ref_fb[i].fb.base_y); + mtk_vcodec_mem_free(inst->ctx, + &vsi->sf_ref_fb[i].fb.base_c); + vsi->sf_ref_fb[i].used = 0; + } + } +} + +/* For each sub-frame except the last one, the driver will dynamically + * allocate reference buffer by calling vp9_get_sf_ref_fb() + * The last sub-frame will use the original fb provided by the + * vp9_dec_decode() interface + */ +static int vp9_get_sf_ref_fb(struct vdec_vp9_inst *inst) +{ + int idx; + struct mtk_vcodec_mem *mem_basy_y; + struct mtk_vcodec_mem *mem_basy_c; + struct vdec_vp9_vsi *vsi = inst->vsi; + + for (idx = 0; + idx < ARRAY_SIZE(vsi->sf_ref_fb); + idx++) { + if (vsi->sf_ref_fb[idx].fb.base_y.va && + vsi->sf_ref_fb[idx].used == 0) { + return idx; + } + } + + for (idx = 0; + idx < ARRAY_SIZE(vsi->sf_ref_fb); + idx++) { + if (vsi->sf_ref_fb[idx].fb.base_y.va == NULL) + break; + } + + if (idx == ARRAY_SIZE(vsi->sf_ref_fb)) { + mtk_vdec_err(inst->ctx, "List Full"); + return -1; + } + + mem_basy_y = &vsi->sf_ref_fb[idx].fb.base_y; + mem_basy_y->size = vsi->buf_sz_y_bs + + vsi->buf_len_sz_y; + + if (mtk_vcodec_mem_alloc(inst->ctx, mem_basy_y)) { + mtk_vdec_err(inst->ctx, "Cannot allocate sf_ref_buf y_buf"); + return -1; + } + + mem_basy_c = &vsi->sf_ref_fb[idx].fb.base_c; + mem_basy_c->size = vsi->buf_sz_c_bs + + vsi->buf_len_sz_c; + + if (mtk_vcodec_mem_alloc(inst->ctx, mem_basy_c)) { + mtk_vdec_err(inst->ctx, "Cannot allocate sf_ref_fb c_buf"); + return -1; + } + vsi->sf_ref_fb[idx].used = 0; + + return idx; +} + +static bool vp9_alloc_work_buf(struct vdec_vp9_inst *inst) +{ + struct vdec_vp9_vsi *vsi = inst->vsi; + int result; + struct mtk_vcodec_mem *mem; + + unsigned int max_pic_w; + unsigned int max_pic_h; + + + if (!(inst->ctx->dev->dec_capability & + VCODEC_CAPABILITY_4K_DISABLED)) { + max_pic_w = VCODEC_DEC_4K_CODED_WIDTH; + max_pic_h = VCODEC_DEC_4K_CODED_HEIGHT; + } else { + max_pic_w = MTK_VDEC_MAX_W; + max_pic_h = MTK_VDEC_MAX_H; + } + + if ((vsi->pic_w > max_pic_w) || + (vsi->pic_h > max_pic_h)) { + mtk_vdec_err(inst->ctx, "Invalid w/h %d/%d", vsi->pic_w, vsi->pic_h); + return false; + } + + mtk_vdec_debug(inst->ctx, "BUF CHG(%d): w/h/sb_w/sb_h=%d/%d/%d/%d", + vsi->resolution_changed, vsi->pic_w, + vsi->pic_h, vsi->buf_w, vsi->buf_h); + + mem = &inst->mv_buf; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + + mem->size = ((vsi->buf_w / 64) * + (vsi->buf_h / 64) + 2) * 36 * 16; + result = mtk_vcodec_mem_alloc(inst->ctx, mem); + if (result) { + mem->size = 0; + mtk_vdec_err(inst->ctx, "Cannot allocate mv_buf"); + return false; + } + /* Set the va again */ + vsi->mv_buf.va = (unsigned long)mem->va; + vsi->mv_buf.pa = (unsigned long)mem->dma_addr; + vsi->mv_buf.sz = (unsigned int)mem->size; + + + mem = &inst->seg_id_buf; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + + mem->size = VP9_SEG_ID_SZ; + result = mtk_vcodec_mem_alloc(inst->ctx, mem); + if (result) { + mem->size = 0; + mtk_vdec_err(inst->ctx, "Cannot allocate seg_id_buf"); + return false; + } + /* Set the va again */ + vsi->seg_id_buf.va = (unsigned long)mem->va; + vsi->seg_id_buf.pa = (unsigned long)mem->dma_addr; + vsi->seg_id_buf.sz = (unsigned int)mem->size; + + + vp9_free_all_sf_ref_fb(inst); + vsi->sf_next_ref_fb_idx = vp9_get_sf_ref_fb(inst); + + return true; +} + +static bool vp9_add_to_fb_disp_list(struct vdec_vp9_inst *inst, + struct vdec_fb *fb) +{ + struct vdec_fb_node *node; + + if (!fb) { + mtk_vdec_err(inst->ctx, "fb == NULL"); + return false; + } + + node = list_first_entry_or_null(&inst->available_fb_node_list, + struct vdec_fb_node, list); + if (node) { + node->fb = fb; + list_move_tail(&node->list, &inst->fb_disp_list); + } else { + mtk_vdec_err(inst->ctx, "No available fb node"); + return false; + } + + return true; +} + +/* If any buffer updating is signaled it should be done here. */ +static void vp9_swap_frm_bufs(struct vdec_vp9_inst *inst) +{ + struct vdec_vp9_vsi *vsi = inst->vsi; + struct vp9_fb_info *frm_to_show; + int ref_index = 0, mask; + + for (mask = vsi->refresh_frm_flags; mask; mask >>= 1) { + if (mask & 1) + vp9_ref_cnt_fb(inst, &vsi->ref_frm_map[ref_index], + vsi->new_fb_idx); + ++ref_index; + } + + frm_to_show = &vsi->frm_bufs[vsi->new_fb_idx].buf; + vsi->frm_bufs[vsi->new_fb_idx].ref_cnt--; + + if (frm_to_show->fb != inst->cur_fb) { + /* This frame is show exist frame and no decode output + * copy frame data from frm_to_show to current CAPTURE + * buffer + */ + if ((frm_to_show->fb != NULL) && + (inst->cur_fb->base_y.size >= + frm_to_show->fb->base_y.size) && + (inst->cur_fb->base_c.size >= + frm_to_show->fb->base_c.size)) { + memcpy((void *)inst->cur_fb->base_y.va, + (void *)frm_to_show->fb->base_y.va, + frm_to_show->fb->base_y.size); + memcpy((void *)inst->cur_fb->base_c.va, + (void *)frm_to_show->fb->base_c.va, + frm_to_show->fb->base_c.size); + } else { + /* After resolution change case, current CAPTURE buffer + * may have less buffer size than frm_to_show buffer + * size + */ + if (frm_to_show->fb != NULL) + mtk_vdec_err(inst->ctx, + "base_y.size=%zu, frm_to_show: base_y.size=%zu", + inst->cur_fb->base_y.size, + frm_to_show->fb->base_y.size); + } + if (!vp9_is_sf_ref_fb(inst, inst->cur_fb)) { + if (vsi->show_frame & BIT(0)) + vp9_add_to_fb_disp_list(inst, inst->cur_fb); + } + } else { + if (!vp9_is_sf_ref_fb(inst, inst->cur_fb)) { + if (vsi->show_frame & BIT(0)) + vp9_add_to_fb_disp_list(inst, frm_to_show->fb); + } + } + + /* when ref_cnt ==0, move this fb to fb_free_list. v4l2 driver will + * clean fb_free_list + */ + if (vsi->frm_bufs[vsi->new_fb_idx].ref_cnt == 0) { + if (!vp9_is_sf_ref_fb( + inst, vsi->frm_bufs[vsi->new_fb_idx].buf.fb)) { + struct vdec_fb *fb; + + fb = vp9_rm_from_fb_use_list(inst, + vsi->frm_bufs[vsi->new_fb_idx].buf.fb->base_y.va); + + vp9_add_to_fb_free_list(inst, fb); + } else { + vp9_free_sf_ref_fb( + vsi->frm_bufs[vsi->new_fb_idx].buf.fb); + } + } + + /* if this super frame and it is not last sub-frame, get next fb for + * sub-frame decode + */ + if (vsi->sf_frm_cnt > 0 && vsi->sf_frm_idx != vsi->sf_frm_cnt - 1) + vsi->sf_next_ref_fb_idx = vp9_get_sf_ref_fb(inst); +} + +static bool vp9_wait_dec_end(struct vdec_vp9_inst *inst) +{ + struct mtk_vcodec_dec_ctx *ctx = inst->ctx; + + mtk_vcodec_wait_for_done_ctx(inst->ctx, + MTK_INST_IRQ_RECEIVED, + WAIT_INTR_TIMEOUT_MS, 0); + + if (ctx->irq_status & MTK_VDEC_IRQ_STATUS_DEC_SUCCESS) + return true; + else + return false; +} + +static struct vdec_vp9_inst *vp9_alloc_inst(struct mtk_vcodec_dec_ctx *ctx) +{ + int result; + struct mtk_vcodec_mem mem; + struct vdec_vp9_inst *inst; + + memset(&mem, 0, sizeof(mem)); + mem.size = sizeof(struct vdec_vp9_inst); + result = mtk_vcodec_mem_alloc(ctx, &mem); + if (result) + return NULL; + + inst = mem.va; + inst->mem = mem; + + return inst; +} + +static void vp9_free_inst(struct vdec_vp9_inst *inst) +{ + struct mtk_vcodec_mem mem; + + mem = inst->mem; + if (mem.va) + mtk_vcodec_mem_free(inst->ctx, &mem); +} + +static bool vp9_decode_end_proc(struct vdec_vp9_inst *inst) +{ + struct vdec_vp9_vsi *vsi = inst->vsi; + bool ret = false; + + if (!vsi->show_existing_frame) { + ret = vp9_wait_dec_end(inst); + if (!ret) { + mtk_vdec_err(inst->ctx, "Decode failed, Decode Timeout @[%d]", + vsi->frm_num); + return false; + } + + if (vpu_dec_end(&inst->vpu)) { + mtk_vdec_err(inst->ctx, "vp9_dec_vpu_end failed"); + return false; + } + mtk_vdec_debug(inst->ctx, "Decode Ok @%d (%d/%d)", vsi->frm_num, + vsi->pic_w, vsi->pic_h); + } else { + mtk_vdec_debug(inst->ctx, "Decode Ok @%d (show_existing_frame)", vsi->frm_num); + } + + vp9_swap_frm_bufs(inst); + vsi->frm_num++; + return true; +} + +static bool vp9_is_last_sub_frm(struct vdec_vp9_inst *inst) +{ + struct vdec_vp9_vsi *vsi = inst->vsi; + + if (vsi->sf_frm_cnt <= 0 || vsi->sf_frm_idx == vsi->sf_frm_cnt) + return true; + + return false; +} + +static struct vdec_fb *vp9_rm_from_fb_disp_list(struct vdec_vp9_inst *inst) +{ + struct vdec_fb_node *node; + struct vdec_fb *fb = NULL; + + node = list_first_entry_or_null(&inst->fb_disp_list, + struct vdec_fb_node, list); + if (node) { + fb = (struct vdec_fb *)node->fb; + fb->status |= FB_ST_DISPLAY; + list_move_tail(&node->list, &inst->available_fb_node_list); + mtk_vdec_debug(inst->ctx, "[FB] get disp fb %p st=%d", node->fb, fb->status); + } else + mtk_vdec_debug(inst->ctx, "[FB] there is no disp fb"); + + return fb; +} + +static bool vp9_add_to_fb_use_list(struct vdec_vp9_inst *inst, + struct vdec_fb *fb) +{ + struct vdec_fb_node *node; + + if (!fb) { + mtk_vdec_debug(inst->ctx, "fb == NULL"); + return false; + } + + node = list_first_entry_or_null(&inst->available_fb_node_list, + struct vdec_fb_node, list); + if (node) { + node->fb = fb; + list_move_tail(&node->list, &inst->fb_use_list); + } else { + mtk_vdec_err(inst->ctx, "No free fb node"); + return false; + } + return true; +} + +static void vp9_reset(struct vdec_vp9_inst *inst) +{ + struct vdec_fb_node *node, *tmp; + + list_for_each_entry_safe(node, tmp, &inst->fb_use_list, list) + list_move_tail(&node->list, &inst->fb_free_list); + + vp9_free_all_sf_ref_fb(inst); + inst->vsi->sf_next_ref_fb_idx = vp9_get_sf_ref_fb(inst); + + if (vpu_dec_reset(&inst->vpu)) + mtk_vdec_err(inst->ctx, "vp9_dec_vpu_reset failed"); + + /* Set the va again, since vpu_dec_reset will clear mv_buf in vpu */ + inst->vsi->mv_buf.va = (unsigned long)inst->mv_buf.va; + inst->vsi->mv_buf.pa = (unsigned long)inst->mv_buf.dma_addr; + inst->vsi->mv_buf.sz = (unsigned long)inst->mv_buf.size; + + /* Set the va again, since vpu_dec_reset will clear seg_id_buf in vpu */ + inst->vsi->seg_id_buf.va = (unsigned long)inst->seg_id_buf.va; + inst->vsi->seg_id_buf.pa = (unsigned long)inst->seg_id_buf.dma_addr; + inst->vsi->seg_id_buf.sz = (unsigned long)inst->seg_id_buf.size; + +} + +static void init_all_fb_lists(struct vdec_vp9_inst *inst) +{ + int i; + + INIT_LIST_HEAD(&inst->available_fb_node_list); + INIT_LIST_HEAD(&inst->fb_use_list); + INIT_LIST_HEAD(&inst->fb_free_list); + INIT_LIST_HEAD(&inst->fb_disp_list); + + for (i = 0; i < ARRAY_SIZE(inst->dec_fb); i++) { + INIT_LIST_HEAD(&inst->dec_fb[i].list); + inst->dec_fb[i].fb = NULL; + list_add_tail(&inst->dec_fb[i].list, + &inst->available_fb_node_list); + } +} + +static void get_pic_info(struct vdec_vp9_inst *inst, struct vdec_pic_info *pic) +{ + pic->fb_sz[0] = inst->vsi->buf_sz_y_bs + inst->vsi->buf_len_sz_y; + pic->fb_sz[1] = inst->vsi->buf_sz_c_bs + inst->vsi->buf_len_sz_c; + + pic->pic_w = inst->vsi->pic_w; + pic->pic_h = inst->vsi->pic_h; + pic->buf_w = inst->vsi->buf_w; + pic->buf_h = inst->vsi->buf_h; + + mtk_vdec_debug(inst->ctx, "pic(%d, %d), buf(%d, %d)", + pic->pic_w, pic->pic_h, pic->buf_w, pic->buf_h); + mtk_vdec_debug(inst->ctx, "fb size: Y(%d), C(%d)", pic->fb_sz[0], pic->fb_sz[1]); +} + +static void get_disp_fb(struct vdec_vp9_inst *inst, struct vdec_fb **out_fb) +{ + + *out_fb = vp9_rm_from_fb_disp_list(inst); + if (*out_fb) + (*out_fb)->status |= FB_ST_DISPLAY; +} + +static void get_free_fb(struct vdec_vp9_inst *inst, struct vdec_fb **out_fb) +{ + struct vdec_fb_node *node; + struct vdec_fb *fb = NULL; + + node = list_first_entry_or_null(&inst->fb_free_list, + struct vdec_fb_node, list); + if (node) { + list_move_tail(&node->list, &inst->available_fb_node_list); + fb = (struct vdec_fb *)node->fb; + fb->status |= FB_ST_FREE; + mtk_vdec_debug(inst->ctx, "[FB] get free fb %p st=%d", node->fb, fb->status); + } else { + mtk_vdec_debug(inst->ctx, "[FB] there is no free fb"); + } + + *out_fb = fb; +} + +static int validate_vsi_array_indexes(struct vdec_vp9_inst *inst, + struct vdec_vp9_vsi *vsi) { + if (vsi->sf_frm_idx >= VP9_MAX_FRM_BUF_NUM - 1) { + mtk_vdec_err(inst->ctx, "Invalid vsi->sf_frm_idx=%u.", vsi->sf_frm_idx); + return -EIO; + } + if (vsi->frm_to_show_idx >= VP9_MAX_FRM_BUF_NUM) { + mtk_vdec_err(inst->ctx, "Invalid vsi->frm_to_show_idx=%u.", vsi->frm_to_show_idx); + return -EIO; + } + if (vsi->new_fb_idx >= VP9_MAX_FRM_BUF_NUM) { + mtk_vdec_err(inst->ctx, "Invalid vsi->new_fb_idx=%u.", vsi->new_fb_idx); + return -EIO; + } + return 0; +} + +static void vdec_vp9_deinit(void *h_vdec) +{ + struct vdec_vp9_inst *inst = (struct vdec_vp9_inst *)h_vdec; + struct mtk_vcodec_mem *mem; + int ret = 0; + + ret = vpu_dec_deinit(&inst->vpu); + if (ret) + mtk_vdec_err(inst->ctx, "vpu_dec_deinit failed"); + + mem = &inst->mv_buf; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + + mem = &inst->seg_id_buf; + if (mem->va) + mtk_vcodec_mem_free(inst->ctx, mem); + + vp9_free_all_sf_ref_fb(inst); + vp9_free_inst(inst); +} + +static int vdec_vp9_init(struct mtk_vcodec_dec_ctx *ctx) +{ + struct vdec_vp9_inst *inst; + + inst = vp9_alloc_inst(ctx); + if (!inst) + return -ENOMEM; + + inst->total_frm_cnt = 0; + inst->ctx = ctx; + + inst->vpu.id = IPI_VDEC_VP9; + inst->vpu.ctx = ctx; + + if (vpu_dec_init(&inst->vpu)) { + mtk_vdec_err(inst->ctx, "vp9_dec_vpu_init failed"); + goto err_deinit_inst; + } + + inst->vsi = (struct vdec_vp9_vsi *)inst->vpu.vsi; + + inst->vsi->show_frame |= BIT(3); + + init_all_fb_lists(inst); + + ctx->drv_handle = inst; + return 0; + +err_deinit_inst: + vp9_free_inst(inst); + + return -EINVAL; +} + +static int vdec_vp9_decode(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *fb, bool *res_chg) +{ + int ret = 0; + struct vdec_vp9_inst *inst = (struct vdec_vp9_inst *)h_vdec; + struct vdec_vp9_vsi *vsi = inst->vsi; + u32 data[3]; + int i; + + *res_chg = false; + + if ((bs == NULL) && (fb == NULL)) { + mtk_vdec_debug(inst->ctx, "[EOS]"); + vp9_reset(inst); + return ret; + } + + if (bs == NULL) { + mtk_vdec_err(inst->ctx, "bs == NULL"); + return -EINVAL; + } + + mtk_vdec_debug(inst->ctx, "Input BS Size = %zu", bs->size); + + while (1) { + struct vdec_fb *cur_fb = NULL; + + data[0] = *((unsigned int *)bs->va); + data[1] = *((unsigned int *)(bs->va + 4)); + data[2] = *((unsigned int *)(bs->va + 8)); + + vsi->bs = *bs; + + if (fb) + vsi->fb = *fb; + + if (!vsi->sf_init) { + unsigned int sf_bs_sz; + unsigned int sf_bs_off; + unsigned char *sf_bs_src; + unsigned char *sf_bs_dst; + + sf_bs_sz = bs->size > VP9_SUPER_FRAME_BS_SZ ? + VP9_SUPER_FRAME_BS_SZ : bs->size; + sf_bs_off = VP9_SUPER_FRAME_BS_SZ - sf_bs_sz; + sf_bs_src = bs->va + bs->size - sf_bs_sz; + sf_bs_dst = vsi->sf_bs_buf + sf_bs_off; + memcpy(sf_bs_dst, sf_bs_src, sf_bs_sz); + } else { + if ((vsi->sf_frm_cnt > 0) && + (vsi->sf_frm_idx < vsi->sf_frm_cnt)) { + unsigned int idx = vsi->sf_frm_idx; + + memcpy((void *)bs->va, + (void *)(bs->va + + vsi->sf_frm_offset[idx]), + vsi->sf_frm_sz[idx]); + } + } + + if (!(vsi->show_frame & BIT(4))) + memset(inst->seg_id_buf.va, 0, inst->seg_id_buf.size); + + ret = vpu_dec_start(&inst->vpu, data, 3); + if (ret) { + mtk_vdec_err(inst->ctx, "vpu_dec_start failed"); + goto DECODE_ERROR; + } + + if (vsi->show_frame & BIT(1)) { + memset(inst->seg_id_buf.va, 0, inst->seg_id_buf.size); + + if (vsi->show_frame & BIT(2)) { + ret = vpu_dec_start(&inst->vpu, NULL, 0); + if (ret) { + mtk_vdec_err(inst->ctx, "vpu trig decoder failed"); + goto DECODE_ERROR; + } + } + } + + ret = validate_vsi_array_indexes(inst, vsi); + if (ret) { + mtk_vdec_err(inst->ctx, "Invalid values from VPU."); + goto DECODE_ERROR; + } + + if (vsi->resolution_changed) { + if (!vp9_alloc_work_buf(inst)) { + ret = -EIO; + goto DECODE_ERROR; + } + } + + if (vsi->sf_frm_cnt > 0) { + cur_fb = &vsi->sf_ref_fb[vsi->sf_next_ref_fb_idx].fb; + + if (vsi->sf_frm_idx < vsi->sf_frm_cnt) + inst->cur_fb = cur_fb; + else + inst->cur_fb = fb; + } else { + inst->cur_fb = fb; + } + + vsi->frm_bufs[vsi->new_fb_idx].buf.fb = inst->cur_fb; + if (!vp9_is_sf_ref_fb(inst, inst->cur_fb)) + vp9_add_to_fb_use_list(inst, inst->cur_fb); + + mtk_vdec_debug(inst->ctx, "[#pic %d]", vsi->frm_num); + + if (vsi->show_existing_frame) + mtk_vdec_debug(inst->ctx, + "drv->new_fb_idx=%d, drv->frm_to_show_idx=%d", + vsi->new_fb_idx, vsi->frm_to_show_idx); + + if (vsi->show_existing_frame && (vsi->frm_to_show_idx < + VP9_MAX_FRM_BUF_NUM)) { + mtk_vdec_debug(inst->ctx, + "Skip Decode drv->new_fb_idx=%d, drv->frm_to_show_idx=%d", + vsi->new_fb_idx, vsi->frm_to_show_idx); + + vp9_ref_cnt_fb(inst, &vsi->new_fb_idx, + vsi->frm_to_show_idx); + } + + /* VPU assign the buffer pointer in its address space, + * reassign here + */ + for (i = 0; i < ARRAY_SIZE(vsi->frm_refs); i++) { + unsigned int idx = vsi->frm_refs[i].idx; + + vsi->frm_refs[i].buf = &vsi->frm_bufs[idx].buf; + } + + if (vsi->resolution_changed) { + *res_chg = true; + mtk_vdec_debug(inst->ctx, "VDEC_ST_RESOLUTION_CHANGED"); + + ret = 0; + goto DECODE_ERROR; + } + + if (!vp9_decode_end_proc(inst)) { + mtk_vdec_err(inst->ctx, "vp9_decode_end_proc"); + ret = -EINVAL; + goto DECODE_ERROR; + } + + if (vp9_is_last_sub_frm(inst)) + break; + + } + inst->total_frm_cnt++; + +DECODE_ERROR: + if (ret < 0) + vp9_add_to_fb_free_list(inst, fb); + + return ret; +} + +static void get_crop_info(struct vdec_vp9_inst *inst, struct v4l2_rect *cr) +{ + cr->left = 0; + cr->top = 0; + cr->width = inst->vsi->pic_w; + cr->height = inst->vsi->pic_h; + mtk_vdec_debug(inst->ctx, "get crop info l=%d, t=%d, w=%d, h=%d\n", + cr->left, cr->top, cr->width, cr->height); +} + +static int vdec_vp9_get_param(void *h_vdec, enum vdec_get_param_type type, + void *out) +{ + struct vdec_vp9_inst *inst = (struct vdec_vp9_inst *)h_vdec; + int ret = 0; + + switch (type) { + case GET_PARAM_DISP_FRAME_BUFFER: + get_disp_fb(inst, out); + break; + case GET_PARAM_FREE_FRAME_BUFFER: + get_free_fb(inst, out); + break; + case GET_PARAM_PIC_INFO: + get_pic_info(inst, out); + break; + case GET_PARAM_DPB_SIZE: + *((unsigned int *)out) = MAX_VP9_DPB_SIZE; + break; + case GET_PARAM_CROP_INFO: + get_crop_info(inst, out); + break; + default: + mtk_vdec_err(inst->ctx, "not supported param type %d", type); + ret = -EINVAL; + break; + } + + return ret; +} + +const struct vdec_common_if vdec_vp9_if = { + .init = vdec_vp9_init, + .decode = vdec_vp9_decode, + .get_param = vdec_vp9_get_param, + .deinit = vdec_vp9_deinit, +}; diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c new file mode 100644 index 0000000000..e393e3e668 --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c @@ -0,0 +1,2216 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2021 MediaTek Inc. + * Author: George Sun <george.sun@mediatek.com> + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <media/videobuf2-dma-contig.h> +#include <media/v4l2-vp9.h> + +#include "../mtk_vcodec_dec.h" +#include "../../common/mtk_vcodec_intr.h" +#include "../vdec_drv_base.h" +#include "../vdec_drv_if.h" +#include "../vdec_vpu_if.h" + +/* reset_frame_context defined in VP9 spec */ +#define VP9_RESET_FRAME_CONTEXT_NONE0 0 +#define VP9_RESET_FRAME_CONTEXT_NONE1 1 +#define VP9_RESET_FRAME_CONTEXT_SPEC 2 +#define VP9_RESET_FRAME_CONTEXT_ALL 3 + +#define VP9_TILE_BUF_SIZE 4096 +#define VP9_PROB_BUF_SIZE 2560 +#define VP9_COUNTS_BUF_SIZE 16384 + +#define HDR_FLAG(x) (!!((hdr)->flags & V4L2_VP9_FRAME_FLAG_##x)) +#define LF_FLAG(x) (!!((lf)->flags & V4L2_VP9_LOOP_FILTER_FLAG_##x)) +#define SEG_FLAG(x) (!!((seg)->flags & V4L2_VP9_SEGMENTATION_FLAG_##x)) +#define VP9_BAND_6(band) ((band) == 0 ? 3 : 6) + +/* + * struct vdec_vp9_slice_frame_ctx - vp9 prob tables footprint + */ +struct vdec_vp9_slice_frame_ctx { + struct { + u8 probs[6][3]; + u8 padding[2]; + } coef_probs[4][2][2][6]; + + u8 y_mode_prob[4][16]; + u8 switch_interp_prob[4][16]; + u8 seg[32]; /* ignore */ + u8 comp_inter_prob[16]; + u8 comp_ref_prob[16]; + u8 single_ref_prob[5][2]; + u8 single_ref_prob_padding[6]; + + u8 joint[3]; + u8 joint_padding[13]; + struct { + u8 sign; + u8 classes[10]; + u8 padding[5]; + } sign_classes[2]; + struct { + u8 class0[1]; + u8 bits[10]; + u8 padding[5]; + } class0_bits[2]; + struct { + u8 class0_fp[2][3]; + u8 fp[3]; + u8 class0_hp; + u8 hp; + u8 padding[5]; + } class0_fp_hp[2]; + + u8 uv_mode_prob[10][16]; + u8 uv_mode_prob_padding[2][16]; + + u8 partition_prob[16][4]; + + u8 inter_mode_probs[7][4]; + u8 skip_probs[4]; + + u8 tx_p8x8[2][4]; + u8 tx_p16x16[2][4]; + u8 tx_p32x32[2][4]; + u8 intra_inter_prob[8]; +}; + +/* + * struct vdec_vp9_slice_frame_counts - vp9 counts tables footprint + */ +struct vdec_vp9_slice_frame_counts { + union { + struct { + u32 band_0[3]; + u32 padding0[1]; + u32 band_1_5[5][6]; + u32 padding1[2]; + } eob_branch[4][2][2]; + u32 eob_branch_space[256 * 4]; + }; + + struct { + u32 band_0[3][4]; + u32 band_1_5[5][6][4]; + } coef_probs[4][2][2]; + + u32 intra_inter[4][2]; + u32 comp_inter[5][2]; + u32 comp_inter_padding[2]; + u32 comp_ref[5][2]; + u32 comp_ref_padding[2]; + u32 single_ref[5][2][2]; + u32 inter_mode[7][4]; + u32 y_mode[4][12]; + u32 uv_mode[10][10]; + u32 partition[16][4]; + u32 switchable_interp[4][4]; + + u32 tx_p8x8[2][2]; + u32 tx_p16x16[2][4]; + u32 tx_p32x32[2][4]; + + u32 skip[3][4]; + + u32 joint[4]; + + struct { + u32 sign[2]; + u32 class0[2]; + u32 classes[12]; + u32 bits[10][2]; + u32 padding[4]; + u32 class0_fp[2][4]; + u32 fp[4]; + u32 class0_hp[2]; + u32 hp[2]; + } mvcomp[2]; + + u32 reserved[126][4]; +}; + +/** + * struct vdec_vp9_slice_counts_map - vp9 counts tables to map + * v4l2_vp9_frame_symbol_counts + * @skip: skip counts. + * @y_mode: Y prediction mode counts. + * @filter: interpolation filter counts. + * @mv_joint: motion vector joint counts. + * @sign: motion vector sign counts. + * @classes: motion vector class counts. + * @class0: motion vector class0 bit counts. + * @bits: motion vector bits counts. + * @class0_fp: motion vector class0 fractional bit counts. + * @fp: motion vector fractional bit counts. + * @class0_hp: motion vector class0 high precision fractional bit counts. + * @hp: motion vector high precision fractional bit counts. + */ +struct vdec_vp9_slice_counts_map { + u32 skip[3][2]; + u32 y_mode[4][10]; + u32 filter[4][3]; + u32 sign[2][2]; + u32 classes[2][11]; + u32 class0[2][2]; + u32 bits[2][10][2]; + u32 class0_fp[2][2][4]; + u32 fp[2][4]; + u32 class0_hp[2][2]; + u32 hp[2][2]; +}; + +/* + * struct vdec_vp9_slice_uncompressed_header - vp9 uncompressed header syntax + * used for decoding + */ +struct vdec_vp9_slice_uncompressed_header { + u8 profile; + u8 last_frame_type; + u8 frame_type; + + u8 last_show_frame; + u8 show_frame; + u8 error_resilient_mode; + + u8 bit_depth; + u8 padding0[1]; + u16 last_frame_width; + u16 last_frame_height; + u16 frame_width; + u16 frame_height; + + u8 intra_only; + u8 reset_frame_context; + u8 ref_frame_sign_bias[4]; + u8 allow_high_precision_mv; + u8 interpolation_filter; + + u8 refresh_frame_context; + u8 frame_parallel_decoding_mode; + u8 frame_context_idx; + + /* loop_filter_params */ + u8 loop_filter_level; + u8 loop_filter_sharpness; + u8 loop_filter_delta_enabled; + s8 loop_filter_ref_deltas[4]; + s8 loop_filter_mode_deltas[2]; + + /* quantization_params */ + u8 base_q_idx; + s8 delta_q_y_dc; + s8 delta_q_uv_dc; + s8 delta_q_uv_ac; + + /* segmentation_params */ + u8 segmentation_enabled; + u8 segmentation_update_map; + u8 segmentation_tree_probs[7]; + u8 padding1[1]; + u8 segmentation_temporal_udpate; + u8 segmentation_pred_prob[3]; + u8 segmentation_update_data; + u8 segmentation_abs_or_delta_update; + u8 feature_enabled[8]; + s16 feature_value[8][4]; + + /* tile_info */ + u8 tile_cols_log2; + u8 tile_rows_log2; + u8 padding2[2]; + + u16 uncompressed_header_size; + u16 header_size_in_bytes; + + /* LAT OUT, CORE IN */ + u32 dequant[8][4]; +}; + +/* + * struct vdec_vp9_slice_compressed_header - vp9 compressed header syntax + * used for decoding. + */ +struct vdec_vp9_slice_compressed_header { + u8 tx_mode; + u8 ref_mode; + u8 comp_fixed_ref; + u8 comp_var_ref[2]; + u8 padding[3]; +}; + +/* + * struct vdec_vp9_slice_tiles - vp9 tile syntax + */ +struct vdec_vp9_slice_tiles { + u32 size[4][64]; + u32 mi_rows[4]; + u32 mi_cols[64]; + u8 actual_rows; + u8 padding[7]; +}; + +/* + * struct vdec_vp9_slice_reference - vp9 reference frame information + */ +struct vdec_vp9_slice_reference { + u16 frame_width; + u16 frame_height; + u8 bit_depth; + u8 subsampling_x; + u8 subsampling_y; + u8 padding; +}; + +/* + * struct vdec_vp9_slice_frame - vp9 syntax used for decoding + */ +struct vdec_vp9_slice_frame { + struct vdec_vp9_slice_uncompressed_header uh; + struct vdec_vp9_slice_compressed_header ch; + struct vdec_vp9_slice_tiles tiles; + struct vdec_vp9_slice_reference ref[3]; +}; + +/* + * struct vdec_vp9_slice_init_vsi - VSI used to initialize instance + */ +struct vdec_vp9_slice_init_vsi { + unsigned int architecture; + unsigned int reserved; + u64 core_vsi; + /* default frame context's position in MicroP */ + u64 default_frame_ctx; +}; + +/* + * struct vdec_vp9_slice_mem - memory address and size + */ +struct vdec_vp9_slice_mem { + union { + u64 buf; + dma_addr_t dma_addr; + }; + union { + size_t size; + dma_addr_t dma_addr_end; + u64 padding; + }; +}; + +/* + * struct vdec_vp9_slice_bs - input buffer for decoding + */ +struct vdec_vp9_slice_bs { + struct vdec_vp9_slice_mem buf; + struct vdec_vp9_slice_mem frame; +}; + +/* + * struct vdec_vp9_slice_fb - frame buffer for decoding + */ +struct vdec_vp9_slice_fb { + struct vdec_vp9_slice_mem y; + struct vdec_vp9_slice_mem c; +}; + +/* + * struct vdec_vp9_slice_state - decoding state + */ +struct vdec_vp9_slice_state { + int err; + unsigned int full; + unsigned int timeout; + unsigned int perf; + + unsigned int crc[12]; +}; + +/** + * struct vdec_vp9_slice_vsi - exchange decoding information + * between Main CPU and MicroP + * + * @bs: input buffer + * @fb: output buffer + * @ref: 3 reference buffers + * @mv: mv working buffer + * @seg: segmentation working buffer + * @tile: tile buffer + * @prob: prob table buffer, used to set/update prob table + * @counts: counts table buffer, used to update prob table + * @ube: general buffer + * @trans: trans buffer position in general buffer + * @err_map: error buffer + * @row_info: row info buffer + * @frame: decoding syntax + * @state: decoding state + */ +struct vdec_vp9_slice_vsi { + /* used in LAT stage */ + struct vdec_vp9_slice_bs bs; + /* used in Core stage */ + struct vdec_vp9_slice_fb fb; + struct vdec_vp9_slice_fb ref[3]; + + struct vdec_vp9_slice_mem mv[2]; + struct vdec_vp9_slice_mem seg[2]; + struct vdec_vp9_slice_mem tile; + struct vdec_vp9_slice_mem prob; + struct vdec_vp9_slice_mem counts; + + /* LAT stage's output, Core stage's input */ + struct vdec_vp9_slice_mem ube; + struct vdec_vp9_slice_mem trans; + struct vdec_vp9_slice_mem err_map; + struct vdec_vp9_slice_mem row_info; + + /* decoding parameters */ + struct vdec_vp9_slice_frame frame; + + struct vdec_vp9_slice_state state; +}; + +/** + * struct vdec_vp9_slice_pfc - per-frame context that contains a local vsi. + * pass it from lat to core + * + * @vsi: local vsi. copy to/from remote vsi before/after decoding + * @ref_idx: reference buffer index + * @seq: picture sequence + * @state: decoding state + */ +struct vdec_vp9_slice_pfc { + struct vdec_vp9_slice_vsi vsi; + + u64 ref_idx[3]; + + int seq; + + /* LAT/Core CRC */ + struct vdec_vp9_slice_state state[2]; +}; + +/* + * enum vdec_vp9_slice_resolution_level + */ +enum vdec_vp9_slice_resolution_level { + VP9_RES_NONE, + VP9_RES_FHD, + VP9_RES_4K, + VP9_RES_8K, +}; + +/* + * struct vdec_vp9_slice_ref - picture's width & height should kept + * for later decoding as reference picture + */ +struct vdec_vp9_slice_ref { + unsigned int width; + unsigned int height; +}; + +/** + * struct vdec_vp9_slice_instance - represent one vp9 instance + * + * @ctx: pointer to codec's context + * @vpu: VPU instance + * @seq: global picture sequence + * @level: level of current resolution + * @width: width of last picture + * @height: height of last picture + * @frame_type: frame_type of last picture + * @irq: irq to Main CPU or MicroP + * @show_frame: show_frame of last picture + * @dpb: picture information (width/height) for reference + * @mv: mv working buffer + * @seg: segmentation working buffer + * @tile: tile buffer + * @prob: prob table buffer, used to set/update prob table + * @counts: counts table buffer, used to update prob table + * @frame_ctx: 4 frame context according to VP9 Spec + * @frame_ctx_helper: 4 frame context according to newest kernel spec + * @dirty: state of each frame context + * @init_vsi: vsi used for initialized VP9 instance + * @vsi: vsi used for decoding/flush ... + * @core_vsi: vsi used for Core stage + * + * @sc_pfc: per frame context single core + * @counts_map: used map to counts_helper + * @counts_helper: counts table according to newest kernel spec + */ +struct vdec_vp9_slice_instance { + struct mtk_vcodec_dec_ctx *ctx; + struct vdec_vpu_inst vpu; + + int seq; + + enum vdec_vp9_slice_resolution_level level; + + /* for resolution change and get_pic_info */ + unsigned int width; + unsigned int height; + + /* for last_frame_type */ + unsigned int frame_type; + unsigned int irq; + + unsigned int show_frame; + + /* maintain vp9 reference frame state */ + struct vdec_vp9_slice_ref dpb[VB2_MAX_FRAME]; + + /* + * normal working buffers + * mv[0]/seg[0]/tile/prob/counts is used for LAT + * mv[1]/seg[1] is used for CORE + */ + struct mtk_vcodec_mem mv[2]; + struct mtk_vcodec_mem seg[2]; + struct mtk_vcodec_mem tile; + struct mtk_vcodec_mem prob; + struct mtk_vcodec_mem counts; + + /* 4 prob tables */ + struct vdec_vp9_slice_frame_ctx frame_ctx[4]; + /*4 helper tables */ + struct v4l2_vp9_frame_context frame_ctx_helper; + unsigned char dirty[4]; + + /* MicroP vsi */ + union { + struct vdec_vp9_slice_init_vsi *init_vsi; + struct vdec_vp9_slice_vsi *vsi; + }; + struct vdec_vp9_slice_vsi *core_vsi; + + struct vdec_vp9_slice_pfc sc_pfc; + struct vdec_vp9_slice_counts_map counts_map; + struct v4l2_vp9_frame_symbol_counts counts_helper; +}; + +/* + * all VP9 instances could share this default frame context. + */ +static struct vdec_vp9_slice_frame_ctx *vdec_vp9_slice_default_frame_ctx; +static DEFINE_MUTEX(vdec_vp9_slice_frame_ctx_lock); + +static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf); + +static int vdec_vp9_slice_init_default_frame_ctx(struct vdec_vp9_slice_instance *instance) +{ + struct vdec_vp9_slice_frame_ctx *remote_frame_ctx; + struct vdec_vp9_slice_frame_ctx *frame_ctx; + struct mtk_vcodec_dec_ctx *ctx; + struct vdec_vp9_slice_init_vsi *vsi; + int ret = 0; + + ctx = instance->ctx; + vsi = instance->vpu.vsi; + if (!ctx || !vsi) + return -EINVAL; + + remote_frame_ctx = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler, + (u32)vsi->default_frame_ctx); + if (!remote_frame_ctx) { + mtk_vdec_err(ctx, "failed to map default frame ctx\n"); + return -EINVAL; + } + + mutex_lock(&vdec_vp9_slice_frame_ctx_lock); + if (vdec_vp9_slice_default_frame_ctx) + goto out; + + frame_ctx = kmemdup(remote_frame_ctx, sizeof(*frame_ctx), GFP_KERNEL); + if (!frame_ctx) { + ret = -ENOMEM; + goto out; + } + + vdec_vp9_slice_default_frame_ctx = frame_ctx; + +out: + mutex_unlock(&vdec_vp9_slice_frame_ctx_lock); + + return ret; +} + +static int vdec_vp9_slice_alloc_working_buffer(struct vdec_vp9_slice_instance *instance, + struct vdec_vp9_slice_vsi *vsi) +{ + struct mtk_vcodec_dec_ctx *ctx = instance->ctx; + enum vdec_vp9_slice_resolution_level level; + /* super blocks */ + unsigned int max_sb_w; + unsigned int max_sb_h; + unsigned int max_w; + unsigned int max_h; + unsigned int w; + unsigned int h; + size_t size; + int ret; + int i; + + w = vsi->frame.uh.frame_width; + h = vsi->frame.uh.frame_height; + + if (w > VCODEC_DEC_4K_CODED_WIDTH || + h > VCODEC_DEC_4K_CODED_HEIGHT) { + return -EINVAL; + } else if (w > MTK_VDEC_MAX_W || h > MTK_VDEC_MAX_H) { + /* 4K */ + level = VP9_RES_4K; + max_w = VCODEC_DEC_4K_CODED_WIDTH; + max_h = VCODEC_DEC_4K_CODED_HEIGHT; + } else { + /* FHD */ + level = VP9_RES_FHD; + max_w = MTK_VDEC_MAX_W; + max_h = MTK_VDEC_MAX_H; + } + + if (level == instance->level) + return 0; + + mtk_vdec_debug(ctx, "resolution level changed, from %u to %u, %ux%u", + instance->level, level, w, h); + + max_sb_w = DIV_ROUND_UP(max_w, 64); + max_sb_h = DIV_ROUND_UP(max_h, 64); + ret = -ENOMEM; + + /* + * Lat-flush must wait core idle, otherwise core will + * use released buffers + */ + + size = (max_sb_w * max_sb_h + 2) * 576; + for (i = 0; i < 2; i++) { + if (instance->mv[i].va) + mtk_vcodec_mem_free(ctx, &instance->mv[i]); + instance->mv[i].size = size; + if (mtk_vcodec_mem_alloc(ctx, &instance->mv[i])) + goto err; + } + + size = (max_sb_w * max_sb_h * 32) + 256; + for (i = 0; i < 2; i++) { + if (instance->seg[i].va) + mtk_vcodec_mem_free(ctx, &instance->seg[i]); + instance->seg[i].size = size; + if (mtk_vcodec_mem_alloc(ctx, &instance->seg[i])) + goto err; + } + + if (!instance->tile.va) { + instance->tile.size = VP9_TILE_BUF_SIZE; + if (mtk_vcodec_mem_alloc(ctx, &instance->tile)) + goto err; + } + + if (!instance->prob.va) { + instance->prob.size = VP9_PROB_BUF_SIZE; + if (mtk_vcodec_mem_alloc(ctx, &instance->prob)) + goto err; + } + + if (!instance->counts.va) { + instance->counts.size = VP9_COUNTS_BUF_SIZE; + if (mtk_vcodec_mem_alloc(ctx, &instance->counts)) + goto err; + } + + instance->level = level; + return 0; + +err: + instance->level = VP9_RES_NONE; + return ret; +} + +static void vdec_vp9_slice_free_working_buffer(struct vdec_vp9_slice_instance *instance) +{ + struct mtk_vcodec_dec_ctx *ctx = instance->ctx; + int i; + + for (i = 0; i < ARRAY_SIZE(instance->mv); i++) { + if (instance->mv[i].va) + mtk_vcodec_mem_free(ctx, &instance->mv[i]); + } + for (i = 0; i < ARRAY_SIZE(instance->seg); i++) { + if (instance->seg[i].va) + mtk_vcodec_mem_free(ctx, &instance->seg[i]); + } + if (instance->tile.va) + mtk_vcodec_mem_free(ctx, &instance->tile); + if (instance->prob.va) + mtk_vcodec_mem_free(ctx, &instance->prob); + if (instance->counts.va) + mtk_vcodec_mem_free(ctx, &instance->counts); + + instance->level = VP9_RES_NONE; +} + +static void vdec_vp9_slice_vsi_from_remote(struct vdec_vp9_slice_vsi *vsi, + struct vdec_vp9_slice_vsi *remote_vsi, + int skip) +{ + struct vdec_vp9_slice_frame *rf; + struct vdec_vp9_slice_frame *f; + + /* + * compressed header + * dequant + * buffer position + * decode state + */ + if (!skip) { + rf = &remote_vsi->frame; + f = &vsi->frame; + memcpy(&f->ch, &rf->ch, sizeof(f->ch)); + memcpy(&f->uh.dequant, &rf->uh.dequant, sizeof(f->uh.dequant)); + memcpy(&vsi->trans, &remote_vsi->trans, sizeof(vsi->trans)); + } + + memcpy(&vsi->state, &remote_vsi->state, sizeof(vsi->state)); +} + +static void vdec_vp9_slice_vsi_to_remote(struct vdec_vp9_slice_vsi *vsi, + struct vdec_vp9_slice_vsi *remote_vsi) +{ + memcpy(remote_vsi, vsi, sizeof(*vsi)); +} + +static int vdec_vp9_slice_tile_offset(int idx, int mi_num, int tile_log2) +{ + int sbs = (mi_num + 7) >> 3; + int offset = ((idx * sbs) >> tile_log2) << 3; + + return min(offset, mi_num); +} + +static +int vdec_vp9_slice_setup_single_from_src_to_dst(struct vdec_vp9_slice_instance *instance) +{ + struct vb2_v4l2_buffer *src; + struct vb2_v4l2_buffer *dst; + + src = v4l2_m2m_next_src_buf(instance->ctx->m2m_ctx); + if (!src) + return -EINVAL; + + dst = v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx); + if (!dst) + return -EINVAL; + + v4l2_m2m_buf_copy_metadata(src, dst, true); + + return 0; +} + +static int vdec_vp9_slice_setup_lat_from_src_buf(struct vdec_vp9_slice_instance *instance, + struct vdec_lat_buf *lat_buf) +{ + struct vb2_v4l2_buffer *src; + struct vb2_v4l2_buffer *dst; + + src = v4l2_m2m_next_src_buf(instance->ctx->m2m_ctx); + if (!src) + return -EINVAL; + + lat_buf->src_buf_req = src->vb2_buf.req_obj.req; + + dst = &lat_buf->ts_info; + v4l2_m2m_buf_copy_metadata(src, dst, true); + return 0; +} + +static void vdec_vp9_slice_setup_hdr(struct vdec_vp9_slice_instance *instance, + struct vdec_vp9_slice_uncompressed_header *uh, + struct v4l2_ctrl_vp9_frame *hdr) +{ + int i; + + uh->profile = hdr->profile; + uh->last_frame_type = instance->frame_type; + uh->frame_type = !HDR_FLAG(KEY_FRAME); + uh->last_show_frame = instance->show_frame; + uh->show_frame = HDR_FLAG(SHOW_FRAME); + uh->error_resilient_mode = HDR_FLAG(ERROR_RESILIENT); + uh->bit_depth = hdr->bit_depth; + uh->last_frame_width = instance->width; + uh->last_frame_height = instance->height; + uh->frame_width = hdr->frame_width_minus_1 + 1; + uh->frame_height = hdr->frame_height_minus_1 + 1; + uh->intra_only = HDR_FLAG(INTRA_ONLY); + /* map v4l2 enum to values defined in VP9 spec for firmware */ + switch (hdr->reset_frame_context) { + case V4L2_VP9_RESET_FRAME_CTX_NONE: + uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_NONE0; + break; + case V4L2_VP9_RESET_FRAME_CTX_SPEC: + uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_SPEC; + break; + case V4L2_VP9_RESET_FRAME_CTX_ALL: + uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_ALL; + break; + default: + uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_NONE0; + break; + } + /* + * ref_frame_sign_bias specifies the intended direction + * of the motion vector in time for each reference frame. + * - INTRA_FRAME = 0, + * - LAST_FRAME = 1, + * - GOLDEN_FRAME = 2, + * - ALTREF_FRAME = 3, + * ref_frame_sign_bias[INTRA_FRAME] is always 0 + * and VDA only passes another 3 directions + */ + uh->ref_frame_sign_bias[0] = 0; + for (i = 0; i < 3; i++) + uh->ref_frame_sign_bias[i + 1] = + !!(hdr->ref_frame_sign_bias & (1 << i)); + uh->allow_high_precision_mv = HDR_FLAG(ALLOW_HIGH_PREC_MV); + uh->interpolation_filter = hdr->interpolation_filter; + uh->refresh_frame_context = HDR_FLAG(REFRESH_FRAME_CTX); + uh->frame_parallel_decoding_mode = HDR_FLAG(PARALLEL_DEC_MODE); + uh->frame_context_idx = hdr->frame_context_idx; + + /* tile info */ + uh->tile_cols_log2 = hdr->tile_cols_log2; + uh->tile_rows_log2 = hdr->tile_rows_log2; + + uh->uncompressed_header_size = hdr->uncompressed_header_size; + uh->header_size_in_bytes = hdr->compressed_header_size; +} + +static void vdec_vp9_slice_setup_frame_ctx(struct vdec_vp9_slice_instance *instance, + struct vdec_vp9_slice_uncompressed_header *uh, + struct v4l2_ctrl_vp9_frame *hdr) +{ + int error_resilient_mode; + int reset_frame_context; + int key_frame; + int intra_only; + int i; + + key_frame = HDR_FLAG(KEY_FRAME); + intra_only = HDR_FLAG(INTRA_ONLY); + error_resilient_mode = HDR_FLAG(ERROR_RESILIENT); + reset_frame_context = uh->reset_frame_context; + + /* + * according to "6.2 Uncompressed header syntax" in + * "VP9 Bitstream & Decoding Process Specification", + * reset @frame_context_idx when (FrameIsIntra || error_resilient_mode) + */ + if (key_frame || intra_only || error_resilient_mode) { + /* + * @reset_frame_context specifies + * whether the frame context should be + * reset to default values: + * 0 or 1 means do not reset any frame context + * 2 resets just the context specified in the frame header + * 3 resets all contexts + */ + if (key_frame || error_resilient_mode || + reset_frame_context == 3) { + /* use default table */ + for (i = 0; i < 4; i++) + instance->dirty[i] = 0; + } else if (reset_frame_context == 2) { + instance->dirty[uh->frame_context_idx] = 0; + } + uh->frame_context_idx = 0; + } +} + +static void vdec_vp9_slice_setup_loop_filter(struct vdec_vp9_slice_uncompressed_header *uh, + struct v4l2_vp9_loop_filter *lf) +{ + int i; + + uh->loop_filter_level = lf->level; + uh->loop_filter_sharpness = lf->sharpness; + uh->loop_filter_delta_enabled = LF_FLAG(DELTA_ENABLED); + for (i = 0; i < 4; i++) + uh->loop_filter_ref_deltas[i] = lf->ref_deltas[i]; + for (i = 0; i < 2; i++) + uh->loop_filter_mode_deltas[i] = lf->mode_deltas[i]; +} + +static void vdec_vp9_slice_setup_quantization(struct vdec_vp9_slice_uncompressed_header *uh, + struct v4l2_vp9_quantization *quant) +{ + uh->base_q_idx = quant->base_q_idx; + uh->delta_q_y_dc = quant->delta_q_y_dc; + uh->delta_q_uv_dc = quant->delta_q_uv_dc; + uh->delta_q_uv_ac = quant->delta_q_uv_ac; +} + +static void vdec_vp9_slice_setup_segmentation(struct vdec_vp9_slice_uncompressed_header *uh, + struct v4l2_vp9_segmentation *seg) +{ + int i; + int j; + + uh->segmentation_enabled = SEG_FLAG(ENABLED); + uh->segmentation_update_map = SEG_FLAG(UPDATE_MAP); + for (i = 0; i < 7; i++) + uh->segmentation_tree_probs[i] = seg->tree_probs[i]; + uh->segmentation_temporal_udpate = SEG_FLAG(TEMPORAL_UPDATE); + for (i = 0; i < 3; i++) + uh->segmentation_pred_prob[i] = seg->pred_probs[i]; + uh->segmentation_update_data = SEG_FLAG(UPDATE_DATA); + uh->segmentation_abs_or_delta_update = SEG_FLAG(ABS_OR_DELTA_UPDATE); + for (i = 0; i < 8; i++) { + uh->feature_enabled[i] = seg->feature_enabled[i]; + for (j = 0; j < 4; j++) + uh->feature_value[i][j] = seg->feature_data[i][j]; + } +} + +static int vdec_vp9_slice_setup_tile(struct vdec_vp9_slice_vsi *vsi, + struct v4l2_ctrl_vp9_frame *hdr) +{ + unsigned int rows_log2; + unsigned int cols_log2; + unsigned int rows; + unsigned int cols; + unsigned int mi_rows; + unsigned int mi_cols; + struct vdec_vp9_slice_tiles *tiles; + int offset; + int start; + int end; + int i; + + rows_log2 = hdr->tile_rows_log2; + cols_log2 = hdr->tile_cols_log2; + rows = 1 << rows_log2; + cols = 1 << cols_log2; + tiles = &vsi->frame.tiles; + tiles->actual_rows = 0; + + if (rows > 4 || cols > 64) + return -EINVAL; + + /* setup mi rows/cols information */ + mi_rows = (hdr->frame_height_minus_1 + 1 + 7) >> 3; + mi_cols = (hdr->frame_width_minus_1 + 1 + 7) >> 3; + + for (i = 0; i < rows; i++) { + start = vdec_vp9_slice_tile_offset(i, mi_rows, rows_log2); + end = vdec_vp9_slice_tile_offset(i + 1, mi_rows, rows_log2); + offset = end - start; + tiles->mi_rows[i] = (offset + 7) >> 3; + if (tiles->mi_rows[i]) + tiles->actual_rows++; + } + + for (i = 0; i < cols; i++) { + start = vdec_vp9_slice_tile_offset(i, mi_cols, cols_log2); + end = vdec_vp9_slice_tile_offset(i + 1, mi_cols, cols_log2); + offset = end - start; + tiles->mi_cols[i] = (offset + 7) >> 3; + } + + return 0; +} + +static void vdec_vp9_slice_setup_state(struct vdec_vp9_slice_vsi *vsi) +{ + memset(&vsi->state, 0, sizeof(vsi->state)); +} + +static void vdec_vp9_slice_setup_ref_idx(struct vdec_vp9_slice_pfc *pfc, + struct v4l2_ctrl_vp9_frame *hdr) +{ + pfc->ref_idx[0] = hdr->last_frame_ts; + pfc->ref_idx[1] = hdr->golden_frame_ts; + pfc->ref_idx[2] = hdr->alt_frame_ts; +} + +static int vdec_vp9_slice_setup_pfc(struct vdec_vp9_slice_instance *instance, + struct vdec_vp9_slice_pfc *pfc) +{ + struct v4l2_ctrl_vp9_frame *hdr; + struct vdec_vp9_slice_uncompressed_header *uh; + struct v4l2_ctrl *hdr_ctrl; + struct vdec_vp9_slice_vsi *vsi; + int ret; + + /* frame header */ + hdr_ctrl = v4l2_ctrl_find(&instance->ctx->ctrl_hdl, V4L2_CID_STATELESS_VP9_FRAME); + if (!hdr_ctrl || !hdr_ctrl->p_cur.p) + return -EINVAL; + + hdr = hdr_ctrl->p_cur.p; + vsi = &pfc->vsi; + uh = &vsi->frame.uh; + + /* setup vsi information */ + vdec_vp9_slice_setup_hdr(instance, uh, hdr); + vdec_vp9_slice_setup_frame_ctx(instance, uh, hdr); + vdec_vp9_slice_setup_loop_filter(uh, &hdr->lf); + vdec_vp9_slice_setup_quantization(uh, &hdr->quant); + vdec_vp9_slice_setup_segmentation(uh, &hdr->seg); + ret = vdec_vp9_slice_setup_tile(vsi, hdr); + if (ret) + return ret; + vdec_vp9_slice_setup_state(vsi); + + /* core stage needs buffer index to get ref y/c ... */ + vdec_vp9_slice_setup_ref_idx(pfc, hdr); + + pfc->seq = instance->seq; + instance->seq++; + + return 0; +} + +static int vdec_vp9_slice_setup_lat_buffer(struct vdec_vp9_slice_instance *instance, + struct vdec_vp9_slice_vsi *vsi, + struct mtk_vcodec_mem *bs, + struct vdec_lat_buf *lat_buf) +{ + int i; + + vsi->bs.buf.dma_addr = bs->dma_addr; + vsi->bs.buf.size = bs->size; + vsi->bs.frame.dma_addr = bs->dma_addr; + vsi->bs.frame.size = bs->size; + + for (i = 0; i < 2; i++) { + vsi->mv[i].dma_addr = instance->mv[i].dma_addr; + vsi->mv[i].size = instance->mv[i].size; + } + for (i = 0; i < 2; i++) { + vsi->seg[i].dma_addr = instance->seg[i].dma_addr; + vsi->seg[i].size = instance->seg[i].size; + } + vsi->tile.dma_addr = instance->tile.dma_addr; + vsi->tile.size = instance->tile.size; + vsi->prob.dma_addr = instance->prob.dma_addr; + vsi->prob.size = instance->prob.size; + vsi->counts.dma_addr = instance->counts.dma_addr; + vsi->counts.size = instance->counts.size; + + vsi->ube.dma_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr; + vsi->ube.size = lat_buf->ctx->msg_queue.wdma_addr.size; + vsi->trans.dma_addr = lat_buf->ctx->msg_queue.wdma_wptr_addr; + /* used to store trans end */ + vsi->trans.dma_addr_end = lat_buf->ctx->msg_queue.wdma_rptr_addr; + vsi->err_map.dma_addr = lat_buf->wdma_err_addr.dma_addr; + vsi->err_map.size = lat_buf->wdma_err_addr.size; + + vsi->row_info.buf = 0; + vsi->row_info.size = 0; + + return 0; +} + +static int vdec_vp9_slice_setup_prob_buffer(struct vdec_vp9_slice_instance *instance, + struct vdec_vp9_slice_vsi *vsi) +{ + struct vdec_vp9_slice_frame_ctx *frame_ctx; + struct vdec_vp9_slice_uncompressed_header *uh; + + uh = &vsi->frame.uh; + + mtk_vdec_debug(instance->ctx, "ctx dirty %u idx %d\n", + instance->dirty[uh->frame_context_idx], + uh->frame_context_idx); + + if (instance->dirty[uh->frame_context_idx]) + frame_ctx = &instance->frame_ctx[uh->frame_context_idx]; + else + frame_ctx = vdec_vp9_slice_default_frame_ctx; + memcpy(instance->prob.va, frame_ctx, sizeof(*frame_ctx)); + + return 0; +} + +static void vdec_vp9_slice_setup_seg_buffer(struct vdec_vp9_slice_instance *instance, + struct vdec_vp9_slice_vsi *vsi, + struct mtk_vcodec_mem *buf) +{ + struct vdec_vp9_slice_uncompressed_header *uh; + + /* reset segment buffer */ + uh = &vsi->frame.uh; + if (uh->frame_type == 0 || + uh->intra_only || + uh->error_resilient_mode || + uh->frame_width != instance->width || + uh->frame_height != instance->height) { + mtk_vdec_debug(instance->ctx, "reset seg\n"); + memset(buf->va, 0, buf->size); + } +} + +/* + * parse tiles according to `6.4 Decode tiles syntax` + * in "vp9-bitstream-specification" + * + * frame contains uncompress header, compressed header and several tiles. + * this function parses tiles' position and size, stores them to tile buffer + * for decoding. + */ +static int vdec_vp9_slice_setup_tile_buffer(struct vdec_vp9_slice_instance *instance, + struct vdec_vp9_slice_vsi *vsi, + struct mtk_vcodec_mem *bs) +{ + struct vdec_vp9_slice_uncompressed_header *uh; + unsigned int rows_log2; + unsigned int cols_log2; + unsigned int rows; + unsigned int cols; + unsigned int mi_row; + unsigned int mi_col; + unsigned int offset; + unsigned int pa; + unsigned int size; + struct vdec_vp9_slice_tiles *tiles; + unsigned char *pos; + unsigned char *end; + unsigned char *va; + unsigned int *tb; + int i; + int j; + + uh = &vsi->frame.uh; + rows_log2 = uh->tile_rows_log2; + cols_log2 = uh->tile_cols_log2; + rows = 1 << rows_log2; + cols = 1 << cols_log2; + + if (rows > 4 || cols > 64) { + mtk_vdec_err(instance->ctx, "tile_rows %u tile_cols %u\n", rows, cols); + return -EINVAL; + } + + offset = uh->uncompressed_header_size + + uh->header_size_in_bytes; + if (bs->size <= offset) { + mtk_vdec_err(instance->ctx, "bs size %zu tile offset %u\n", bs->size, offset); + return -EINVAL; + } + + tiles = &vsi->frame.tiles; + /* setup tile buffer */ + + va = (unsigned char *)bs->va; + pos = va + offset; + end = va + bs->size; + /* truncated */ + pa = (unsigned int)bs->dma_addr + offset; + tb = instance->tile.va; + for (i = 0; i < rows; i++) { + for (j = 0; j < cols; j++) { + if (i == rows - 1 && + j == cols - 1) { + size = (unsigned int)(end - pos); + } else { + if (end - pos < 4) + return -EINVAL; + + size = (pos[0] << 24) | (pos[1] << 16) | + (pos[2] << 8) | pos[3]; + pos += 4; + pa += 4; + offset += 4; + if (end - pos < size) + return -EINVAL; + } + tiles->size[i][j] = size; + if (tiles->mi_rows[i]) { + *tb++ = (size << 3) + ((offset << 3) & 0x7f); + *tb++ = pa & ~0xf; + *tb++ = (pa << 3) & 0x7f; + mi_row = (tiles->mi_rows[i] - 1) & 0x1ff; + mi_col = (tiles->mi_cols[j] - 1) & 0x3f; + *tb++ = (mi_row << 6) + mi_col; + } + pos += size; + pa += size; + offset += size; + } + } + + return 0; +} + +static int vdec_vp9_slice_setup_lat(struct vdec_vp9_slice_instance *instance, + struct mtk_vcodec_mem *bs, + struct vdec_lat_buf *lat_buf, + struct vdec_vp9_slice_pfc *pfc) +{ + struct vdec_vp9_slice_vsi *vsi = &pfc->vsi; + int ret; + + ret = vdec_vp9_slice_setup_lat_from_src_buf(instance, lat_buf); + if (ret) + goto err; + + ret = vdec_vp9_slice_setup_pfc(instance, pfc); + if (ret) + goto err; + + ret = vdec_vp9_slice_alloc_working_buffer(instance, vsi); + if (ret) + goto err; + + ret = vdec_vp9_slice_setup_lat_buffer(instance, vsi, bs, lat_buf); + if (ret) + goto err; + + vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[0]); + + /* setup prob/tile buffers for LAT */ + + ret = vdec_vp9_slice_setup_prob_buffer(instance, vsi); + if (ret) + goto err; + + ret = vdec_vp9_slice_setup_tile_buffer(instance, vsi, bs); + if (ret) + goto err; + + return 0; + +err: + return ret; +} + +static +void vdec_vp9_slice_map_counts_eob_coef(unsigned int i, unsigned int j, unsigned int k, + struct vdec_vp9_slice_frame_counts *counts, + struct v4l2_vp9_frame_symbol_counts *counts_helper) +{ + u32 l = 0, m; + + /* + * helper eo -> mtk eo + * helpre e1 -> mtk c3 + * helper c0 -> c0 + * helper c1 -> c1 + * helper c2 -> c2 + */ + for (m = 0; m < 3; m++) { + counts_helper->coeff[i][j][k][l][m] = + (u32 (*)[3]) & counts->coef_probs[i][j][k].band_0[m]; + counts_helper->eob[i][j][k][l][m][0] = + &counts->eob_branch[i][j][k].band_0[m]; + counts_helper->eob[i][j][k][l][m][1] = + &counts->coef_probs[i][j][k].band_0[m][3]; + } + + for (l = 1; l < 6; l++) { + for (m = 0; m < 6; m++) { + counts_helper->coeff[i][j][k][l][m] = + (u32 (*)[3]) & counts->coef_probs[i][j][k].band_1_5[l - 1][m]; + counts_helper->eob[i][j][k][l][m][0] = + &counts->eob_branch[i][j][k].band_1_5[l - 1][m]; + counts_helper->eob[i][j][k][l][m][1] = + &counts->coef_probs[i][j][k].band_1_5[l - 1][m][3]; + } + } +} + +static void vdec_vp9_slice_counts_map_helper(struct vdec_vp9_slice_counts_map *counts_map, + struct vdec_vp9_slice_frame_counts *counts, + struct v4l2_vp9_frame_symbol_counts *counts_helper) +{ + int i, j, k; + + counts_helper->partition = &counts->partition; + counts_helper->intra_inter = &counts->intra_inter; + counts_helper->tx32p = &counts->tx_p32x32; + counts_helper->tx16p = &counts->tx_p16x16; + counts_helper->tx8p = &counts->tx_p8x8; + counts_helper->uv_mode = &counts->uv_mode; + + counts_helper->comp = &counts->comp_inter; + counts_helper->comp_ref = &counts->comp_ref; + counts_helper->single_ref = &counts->single_ref; + counts_helper->mv_mode = &counts->inter_mode; + counts_helper->mv_joint = &counts->joint; + + for (i = 0; i < ARRAY_SIZE(counts_map->skip); i++) + memcpy(counts_map->skip[i], counts->skip[i], + sizeof(counts_map->skip[0])); + counts_helper->skip = &counts_map->skip; + + for (i = 0; i < ARRAY_SIZE(counts_map->y_mode); i++) + memcpy(counts_map->y_mode[i], counts->y_mode[i], + sizeof(counts_map->y_mode[0])); + counts_helper->y_mode = &counts_map->y_mode; + + for (i = 0; i < ARRAY_SIZE(counts_map->filter); i++) + memcpy(counts_map->filter[i], counts->switchable_interp[i], + sizeof(counts_map->filter[0])); + counts_helper->filter = &counts_map->filter; + + for (i = 0; i < ARRAY_SIZE(counts_map->sign); i++) + memcpy(counts_map->sign[i], counts->mvcomp[i].sign, + sizeof(counts_map->sign[0])); + counts_helper->sign = &counts_map->sign; + + for (i = 0; i < ARRAY_SIZE(counts_map->classes); i++) + memcpy(counts_map->classes[i], counts->mvcomp[i].classes, + sizeof(counts_map->classes[0])); + counts_helper->classes = &counts_map->classes; + + for (i = 0; i < ARRAY_SIZE(counts_map->class0); i++) + memcpy(counts_map->class0[i], counts->mvcomp[i].class0, + sizeof(counts_map->class0[0])); + counts_helper->class0 = &counts_map->class0; + + for (i = 0; i < ARRAY_SIZE(counts_map->bits); i++) + for (j = 0; j < ARRAY_SIZE(counts_map->bits[0]); j++) + memcpy(counts_map->bits[i][j], counts->mvcomp[i].bits[j], + sizeof(counts_map->bits[0][0])); + counts_helper->bits = &counts_map->bits; + + for (i = 0; i < ARRAY_SIZE(counts_map->class0_fp); i++) + for (j = 0; j < ARRAY_SIZE(counts_map->class0_fp[0]); j++) + memcpy(counts_map->class0_fp[i][j], counts->mvcomp[i].class0_fp[j], + sizeof(counts_map->class0_fp[0][0])); + counts_helper->class0_fp = &counts_map->class0_fp; + + for (i = 0; i < ARRAY_SIZE(counts_map->fp); i++) + memcpy(counts_map->fp[i], counts->mvcomp[i].fp, + sizeof(counts_map->fp[0])); + counts_helper->fp = &counts_map->fp; + + for (i = 0; i < ARRAY_SIZE(counts_map->class0_hp); i++) + memcpy(counts_map->class0_hp[i], counts->mvcomp[i].class0_hp, + sizeof(counts_map->class0_hp[0])); + counts_helper->class0_hp = &counts_map->class0_hp; + + for (i = 0; i < ARRAY_SIZE(counts_map->hp); i++) + memcpy(counts_map->hp[i], counts->mvcomp[i].hp, sizeof(counts_map->hp[0])); + + counts_helper->hp = &counts_map->hp; + + for (i = 0; i < 4; i++) + for (j = 0; j < 2; j++) + for (k = 0; k < 2; k++) + vdec_vp9_slice_map_counts_eob_coef(i, j, k, counts, counts_helper); +} + +static void vdec_vp9_slice_map_to_coef(unsigned int i, unsigned int j, unsigned int k, + struct vdec_vp9_slice_frame_ctx *frame_ctx, + struct v4l2_vp9_frame_context *frame_ctx_helper) +{ + u32 l, m; + + for (l = 0; l < ARRAY_SIZE(frame_ctx_helper->coef[0][0][0]); l++) { + for (m = 0; m < VP9_BAND_6(l); m++) { + memcpy(frame_ctx_helper->coef[i][j][k][l][m], + frame_ctx->coef_probs[i][j][k][l].probs[m], + sizeof(frame_ctx_helper->coef[i][j][k][l][0])); + } + } +} + +static void vdec_vp9_slice_map_from_coef(unsigned int i, unsigned int j, unsigned int k, + struct vdec_vp9_slice_frame_ctx *frame_ctx, + struct v4l2_vp9_frame_context *frame_ctx_helper) +{ + u32 l, m; + + for (l = 0; l < ARRAY_SIZE(frame_ctx_helper->coef[0][0][0]); l++) { + for (m = 0; m < VP9_BAND_6(l); m++) { + memcpy(frame_ctx->coef_probs[i][j][k][l].probs[m], + frame_ctx_helper->coef[i][j][k][l][m], + sizeof(frame_ctx_helper->coef[i][j][k][l][0])); + } + } +} + +static +void vdec_vp9_slice_framectx_map_helper(bool frame_is_intra, + struct vdec_vp9_slice_frame_ctx *pre_frame_ctx, + struct vdec_vp9_slice_frame_ctx *frame_ctx, + struct v4l2_vp9_frame_context *frame_ctx_helper) +{ + struct v4l2_vp9_frame_mv_context *mv = &frame_ctx_helper->mv; + u32 i, j, k; + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->coef); i++) + for (j = 0; j < ARRAY_SIZE(frame_ctx_helper->coef[0]); j++) + for (k = 0; k < ARRAY_SIZE(frame_ctx_helper->coef[0][0]); k++) + vdec_vp9_slice_map_to_coef(i, j, k, pre_frame_ctx, + frame_ctx_helper); + + /* + * use previous prob when frame is not intra or + * we should use the prob updated by the compressed header parse + */ + if (!frame_is_intra) + frame_ctx = pre_frame_ctx; + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx8); i++) + memcpy(frame_ctx_helper->tx8[i], frame_ctx->tx_p8x8[i], + sizeof(frame_ctx_helper->tx8[0])); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx16); i++) + memcpy(frame_ctx_helper->tx16[i], frame_ctx->tx_p16x16[i], + sizeof(frame_ctx_helper->tx16[0])); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx32); i++) + memcpy(frame_ctx_helper->tx32[i], frame_ctx->tx_p32x32[i], + sizeof(frame_ctx_helper->tx32[0])); + + memcpy(frame_ctx_helper->skip, frame_ctx->skip_probs, sizeof(frame_ctx_helper->skip)); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->inter_mode); i++) + memcpy(frame_ctx_helper->inter_mode[i], frame_ctx->inter_mode_probs[i], + sizeof(frame_ctx_helper->inter_mode[0])); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->interp_filter); i++) + memcpy(frame_ctx_helper->interp_filter[i], frame_ctx->switch_interp_prob[i], + sizeof(frame_ctx_helper->interp_filter[0])); + + memcpy(frame_ctx_helper->is_inter, frame_ctx->intra_inter_prob, + sizeof(frame_ctx_helper->is_inter)); + + memcpy(frame_ctx_helper->comp_mode, frame_ctx->comp_inter_prob, + sizeof(frame_ctx_helper->comp_mode)); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->single_ref); i++) + memcpy(frame_ctx_helper->single_ref[i], frame_ctx->single_ref_prob[i], + sizeof(frame_ctx_helper->single_ref[0])); + + memcpy(frame_ctx_helper->comp_ref, frame_ctx->comp_ref_prob, + sizeof(frame_ctx_helper->comp_ref)); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->y_mode); i++) + memcpy(frame_ctx_helper->y_mode[i], frame_ctx->y_mode_prob[i], + sizeof(frame_ctx_helper->y_mode[0])); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->uv_mode); i++) + memcpy(frame_ctx_helper->uv_mode[i], frame_ctx->uv_mode_prob[i], + sizeof(frame_ctx_helper->uv_mode[0])); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->partition); i++) + memcpy(frame_ctx_helper->partition[i], frame_ctx->partition_prob[i], + sizeof(frame_ctx_helper->partition[0])); + + memcpy(mv->joint, frame_ctx->joint, sizeof(mv->joint)); + + for (i = 0; i < ARRAY_SIZE(mv->sign); i++) + mv->sign[i] = frame_ctx->sign_classes[i].sign; + + for (i = 0; i < ARRAY_SIZE(mv->classes); i++) + memcpy(mv->classes[i], frame_ctx->sign_classes[i].classes, + sizeof(mv->classes[i])); + + for (i = 0; i < ARRAY_SIZE(mv->class0_bit); i++) + mv->class0_bit[i] = frame_ctx->class0_bits[i].class0[0]; + + for (i = 0; i < ARRAY_SIZE(mv->bits); i++) + memcpy(mv->bits[i], frame_ctx->class0_bits[i].bits, sizeof(mv->bits[0])); + + for (i = 0; i < ARRAY_SIZE(mv->class0_fr); i++) + for (j = 0; j < ARRAY_SIZE(mv->class0_fr[0]); j++) + memcpy(mv->class0_fr[i][j], frame_ctx->class0_fp_hp[i].class0_fp[j], + sizeof(mv->class0_fr[0][0])); + + for (i = 0; i < ARRAY_SIZE(mv->fr); i++) + memcpy(mv->fr[i], frame_ctx->class0_fp_hp[i].fp, sizeof(mv->fr[0])); + + for (i = 0; i < ARRAY_SIZE(mv->class0_hp); i++) + mv->class0_hp[i] = frame_ctx->class0_fp_hp[i].class0_hp; + + for (i = 0; i < ARRAY_SIZE(mv->hp); i++) + mv->hp[i] = frame_ctx->class0_fp_hp[i].hp; +} + +static void vdec_vp9_slice_helper_map_framectx(struct v4l2_vp9_frame_context *frame_ctx_helper, + struct vdec_vp9_slice_frame_ctx *frame_ctx) +{ + struct v4l2_vp9_frame_mv_context *mv = &frame_ctx_helper->mv; + u32 i, j, k; + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx8); i++) + memcpy(frame_ctx->tx_p8x8[i], frame_ctx_helper->tx8[i], + sizeof(frame_ctx_helper->tx8[0])); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx16); i++) + memcpy(frame_ctx->tx_p16x16[i], frame_ctx_helper->tx16[i], + sizeof(frame_ctx_helper->tx16[0])); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx32); i++) + memcpy(frame_ctx->tx_p32x32[i], frame_ctx_helper->tx32[i], + sizeof(frame_ctx_helper->tx32[0])); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->coef); i++) + for (j = 0; j < ARRAY_SIZE(frame_ctx_helper->coef[0]); j++) + for (k = 0; k < ARRAY_SIZE(frame_ctx_helper->coef[0][0]); k++) + vdec_vp9_slice_map_from_coef(i, j, k, frame_ctx, + frame_ctx_helper); + + memcpy(frame_ctx->skip_probs, frame_ctx_helper->skip, sizeof(frame_ctx_helper->skip)); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->inter_mode); i++) + memcpy(frame_ctx->inter_mode_probs[i], frame_ctx_helper->inter_mode[i], + sizeof(frame_ctx_helper->inter_mode[0])); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->interp_filter); i++) + memcpy(frame_ctx->switch_interp_prob[i], frame_ctx_helper->interp_filter[i], + sizeof(frame_ctx_helper->interp_filter[0])); + + memcpy(frame_ctx->intra_inter_prob, frame_ctx_helper->is_inter, + sizeof(frame_ctx_helper->is_inter)); + + memcpy(frame_ctx->comp_inter_prob, frame_ctx_helper->comp_mode, + sizeof(frame_ctx_helper->comp_mode)); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->single_ref); i++) + memcpy(frame_ctx->single_ref_prob[i], frame_ctx_helper->single_ref[i], + sizeof(frame_ctx_helper->single_ref[0])); + + memcpy(frame_ctx->comp_ref_prob, frame_ctx_helper->comp_ref, + sizeof(frame_ctx_helper->comp_ref)); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->y_mode); i++) + memcpy(frame_ctx->y_mode_prob[i], frame_ctx_helper->y_mode[i], + sizeof(frame_ctx_helper->y_mode[0])); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->uv_mode); i++) + memcpy(frame_ctx->uv_mode_prob[i], frame_ctx_helper->uv_mode[i], + sizeof(frame_ctx_helper->uv_mode[0])); + + for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->partition); i++) + memcpy(frame_ctx->partition_prob[i], frame_ctx_helper->partition[i], + sizeof(frame_ctx_helper->partition[0])); + + memcpy(frame_ctx->joint, mv->joint, sizeof(mv->joint)); + + for (i = 0; i < ARRAY_SIZE(mv->sign); i++) + frame_ctx->sign_classes[i].sign = mv->sign[i]; + + for (i = 0; i < ARRAY_SIZE(mv->classes); i++) + memcpy(frame_ctx->sign_classes[i].classes, mv->classes[i], + sizeof(mv->classes[i])); + + for (i = 0; i < ARRAY_SIZE(mv->class0_bit); i++) + frame_ctx->class0_bits[i].class0[0] = mv->class0_bit[i]; + + for (i = 0; i < ARRAY_SIZE(mv->bits); i++) + memcpy(frame_ctx->class0_bits[i].bits, mv->bits[i], sizeof(mv->bits[0])); + + for (i = 0; i < ARRAY_SIZE(mv->class0_fr); i++) + for (j = 0; j < ARRAY_SIZE(mv->class0_fr[0]); j++) + memcpy(frame_ctx->class0_fp_hp[i].class0_fp[j], mv->class0_fr[i][j], + sizeof(mv->class0_fr[0][0])); + + for (i = 0; i < ARRAY_SIZE(mv->fr); i++) + memcpy(frame_ctx->class0_fp_hp[i].fp, mv->fr[i], sizeof(mv->fr[0])); + + for (i = 0; i < ARRAY_SIZE(mv->class0_hp); i++) + frame_ctx->class0_fp_hp[i].class0_hp = mv->class0_hp[i]; + + for (i = 0; i < ARRAY_SIZE(mv->hp); i++) + frame_ctx->class0_fp_hp[i].hp = mv->hp[i]; +} + +static int vdec_vp9_slice_update_prob(struct vdec_vp9_slice_instance *instance, + struct vdec_vp9_slice_vsi *vsi) +{ + struct vdec_vp9_slice_frame_ctx *pre_frame_ctx; + struct v4l2_vp9_frame_context *pre_frame_ctx_helper; + struct vdec_vp9_slice_frame_ctx *frame_ctx; + struct vdec_vp9_slice_frame_counts *counts; + struct v4l2_vp9_frame_symbol_counts *counts_helper; + struct vdec_vp9_slice_uncompressed_header *uh; + bool frame_is_intra; + bool use_128; + + uh = &vsi->frame.uh; + pre_frame_ctx = &instance->frame_ctx[uh->frame_context_idx]; + pre_frame_ctx_helper = &instance->frame_ctx_helper; + frame_ctx = (struct vdec_vp9_slice_frame_ctx *)instance->prob.va; + counts = (struct vdec_vp9_slice_frame_counts *)instance->counts.va; + counts_helper = &instance->counts_helper; + + if (!uh->refresh_frame_context) + return 0; + + if (!uh->frame_parallel_decoding_mode) { + vdec_vp9_slice_counts_map_helper(&instance->counts_map, counts, counts_helper); + + frame_is_intra = !vsi->frame.uh.frame_type || vsi->frame.uh.intra_only; + /* check default prob */ + if (!instance->dirty[uh->frame_context_idx]) + vdec_vp9_slice_framectx_map_helper(frame_is_intra, + vdec_vp9_slice_default_frame_ctx, + frame_ctx, + pre_frame_ctx_helper); + else + vdec_vp9_slice_framectx_map_helper(frame_is_intra, + pre_frame_ctx, + frame_ctx, + pre_frame_ctx_helper); + + use_128 = !frame_is_intra && !vsi->frame.uh.last_frame_type; + v4l2_vp9_adapt_coef_probs(pre_frame_ctx_helper, + counts_helper, + use_128, + frame_is_intra); + if (!frame_is_intra) + v4l2_vp9_adapt_noncoef_probs(pre_frame_ctx_helper, + counts_helper, + V4L2_VP9_REFERENCE_MODE_SINGLE_REFERENCE, + vsi->frame.uh.interpolation_filter, + vsi->frame.ch.tx_mode, + vsi->frame.uh.allow_high_precision_mv ? + V4L2_VP9_FRAME_FLAG_ALLOW_HIGH_PREC_MV : 0); + vdec_vp9_slice_helper_map_framectx(pre_frame_ctx_helper, pre_frame_ctx); + } else { + memcpy(pre_frame_ctx, frame_ctx, sizeof(*frame_ctx)); + } + + instance->dirty[uh->frame_context_idx] = 1; + + return 0; +} + +static int vdec_vp9_slice_update_single(struct vdec_vp9_slice_instance *instance, + struct vdec_vp9_slice_pfc *pfc) +{ + struct vdec_vp9_slice_vsi *vsi; + + vsi = &pfc->vsi; + memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state)); + + mtk_vdec_debug(instance->ctx, "Frame %u Y_CRC %08x %08x %08x %08x\n", + pfc->seq, vsi->state.crc[0], vsi->state.crc[1], + vsi->state.crc[2], vsi->state.crc[3]); + mtk_vdec_debug(instance->ctx, "Frame %u C_CRC %08x %08x %08x %08x\n", + pfc->seq, vsi->state.crc[4], vsi->state.crc[5], + vsi->state.crc[6], vsi->state.crc[7]); + + vdec_vp9_slice_update_prob(instance, vsi); + + instance->width = vsi->frame.uh.frame_width; + instance->height = vsi->frame.uh.frame_height; + instance->frame_type = vsi->frame.uh.frame_type; + instance->show_frame = vsi->frame.uh.show_frame; + + return 0; +} + +static int vdec_vp9_slice_update_lat(struct vdec_vp9_slice_instance *instance, + struct vdec_lat_buf *lat_buf, + struct vdec_vp9_slice_pfc *pfc) +{ + struct vdec_vp9_slice_vsi *vsi; + + vsi = &pfc->vsi; + memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state)); + + mtk_vdec_debug(instance->ctx, "Frame %u LAT CRC 0x%08x %lx %lx\n", + pfc->seq, vsi->state.crc[0], + (unsigned long)vsi->trans.dma_addr, + (unsigned long)vsi->trans.dma_addr_end); + + /* buffer full, need to re-decode */ + if (vsi->state.full) { + /* buffer not enough */ + if (vsi->trans.dma_addr_end - vsi->trans.dma_addr == + vsi->ube.size) + return -ENOMEM; + return -EAGAIN; + } + + vdec_vp9_slice_update_prob(instance, vsi); + + instance->width = vsi->frame.uh.frame_width; + instance->height = vsi->frame.uh.frame_height; + instance->frame_type = vsi->frame.uh.frame_type; + instance->show_frame = vsi->frame.uh.show_frame; + + return 0; +} + +static int vdec_vp9_slice_setup_core_to_dst_buf(struct vdec_vp9_slice_instance *instance, + struct vdec_lat_buf *lat_buf) +{ + struct vb2_v4l2_buffer *dst; + + dst = v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx); + if (!dst) + return -EINVAL; + + v4l2_m2m_buf_copy_metadata(&lat_buf->ts_info, dst, true); + return 0; +} + +static int vdec_vp9_slice_setup_core_buffer(struct vdec_vp9_slice_instance *instance, + struct vdec_vp9_slice_pfc *pfc, + struct vdec_vp9_slice_vsi *vsi, + struct vdec_fb *fb, + struct vdec_lat_buf *lat_buf) +{ + struct vb2_buffer *vb; + struct vb2_queue *vq; + struct vdec_vp9_slice_reference *ref; + int plane; + int size; + int w; + int h; + int i; + + plane = instance->ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes; + w = vsi->frame.uh.frame_width; + h = vsi->frame.uh.frame_height; + size = ALIGN(w, 64) * ALIGN(h, 64); + + /* frame buffer */ + vsi->fb.y.dma_addr = fb->base_y.dma_addr; + if (plane == 1) + vsi->fb.c.dma_addr = fb->base_y.dma_addr + size; + else + vsi->fb.c.dma_addr = fb->base_c.dma_addr; + + /* reference buffers */ + vq = v4l2_m2m_get_vq(instance->ctx->m2m_ctx, + V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE); + if (!vq) + return -EINVAL; + + /* get current output buffer */ + vb = &v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx)->vb2_buf; + if (!vb) + return -EINVAL; + + /* update internal buffer's width/height */ + for (i = 0; i < vq->num_buffers; i++) { + if (vb == vq->bufs[i]) { + instance->dpb[i].width = w; + instance->dpb[i].height = h; + break; + } + } + + /* + * get buffer's width/height from instance + * get buffer address from vb2buf + */ + for (i = 0; i < 3; i++) { + ref = &vsi->frame.ref[i]; + vb = vb2_find_buffer(vq, pfc->ref_idx[i]); + if (!vb) { + ref->frame_width = w; + ref->frame_height = h; + memset(&vsi->ref[i], 0, sizeof(vsi->ref[i])); + } else { + int idx = vb->index; + + ref->frame_width = instance->dpb[idx].width; + ref->frame_height = instance->dpb[idx].height; + vsi->ref[i].y.dma_addr = + vb2_dma_contig_plane_dma_addr(vb, 0); + if (plane == 1) + vsi->ref[i].c.dma_addr = + vsi->ref[i].y.dma_addr + size; + else + vsi->ref[i].c.dma_addr = + vb2_dma_contig_plane_dma_addr(vb, 1); + } + } + + return 0; +} + +static void vdec_vp9_slice_setup_single_buffer(struct vdec_vp9_slice_instance *instance, + struct vdec_vp9_slice_pfc *pfc, + struct vdec_vp9_slice_vsi *vsi, + struct mtk_vcodec_mem *bs, + struct vdec_fb *fb) +{ + int i; + + vsi->bs.buf.dma_addr = bs->dma_addr; + vsi->bs.buf.size = bs->size; + vsi->bs.frame.dma_addr = bs->dma_addr; + vsi->bs.frame.size = bs->size; + + for (i = 0; i < 2; i++) { + vsi->mv[i].dma_addr = instance->mv[i].dma_addr; + vsi->mv[i].size = instance->mv[i].size; + } + for (i = 0; i < 2; i++) { + vsi->seg[i].dma_addr = instance->seg[i].dma_addr; + vsi->seg[i].size = instance->seg[i].size; + } + vsi->tile.dma_addr = instance->tile.dma_addr; + vsi->tile.size = instance->tile.size; + vsi->prob.dma_addr = instance->prob.dma_addr; + vsi->prob.size = instance->prob.size; + vsi->counts.dma_addr = instance->counts.dma_addr; + vsi->counts.size = instance->counts.size; + + vsi->row_info.buf = 0; + vsi->row_info.size = 0; + + vdec_vp9_slice_setup_core_buffer(instance, pfc, vsi, fb, NULL); +} + +static int vdec_vp9_slice_setup_core(struct vdec_vp9_slice_instance *instance, + struct vdec_fb *fb, + struct vdec_lat_buf *lat_buf, + struct vdec_vp9_slice_pfc *pfc) +{ + struct vdec_vp9_slice_vsi *vsi = &pfc->vsi; + int ret; + + vdec_vp9_slice_setup_state(vsi); + + ret = vdec_vp9_slice_setup_core_to_dst_buf(instance, lat_buf); + if (ret) + goto err; + + ret = vdec_vp9_slice_setup_core_buffer(instance, pfc, vsi, fb, lat_buf); + if (ret) + goto err; + + vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[1]); + + return 0; + +err: + return ret; +} + +static int vdec_vp9_slice_setup_single(struct vdec_vp9_slice_instance *instance, + struct mtk_vcodec_mem *bs, + struct vdec_fb *fb, + struct vdec_vp9_slice_pfc *pfc) +{ + struct vdec_vp9_slice_vsi *vsi = &pfc->vsi; + int ret; + + ret = vdec_vp9_slice_setup_single_from_src_to_dst(instance); + if (ret) + goto err; + + ret = vdec_vp9_slice_setup_pfc(instance, pfc); + if (ret) + goto err; + + ret = vdec_vp9_slice_alloc_working_buffer(instance, vsi); + if (ret) + goto err; + + vdec_vp9_slice_setup_single_buffer(instance, pfc, vsi, bs, fb); + vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[0]); + + ret = vdec_vp9_slice_setup_prob_buffer(instance, vsi); + if (ret) + goto err; + + ret = vdec_vp9_slice_setup_tile_buffer(instance, vsi, bs); + if (ret) + goto err; + + return 0; + +err: + return ret; +} + +static int vdec_vp9_slice_update_core(struct vdec_vp9_slice_instance *instance, + struct vdec_lat_buf *lat_buf, + struct vdec_vp9_slice_pfc *pfc) +{ + struct vdec_vp9_slice_vsi *vsi; + + vsi = &pfc->vsi; + memcpy(&pfc->state[1], &vsi->state, sizeof(vsi->state)); + + mtk_vdec_debug(instance->ctx, "Frame %u Y_CRC %08x %08x %08x %08x\n", + pfc->seq, vsi->state.crc[0], vsi->state.crc[1], + vsi->state.crc[2], vsi->state.crc[3]); + mtk_vdec_debug(instance->ctx, "Frame %u C_CRC %08x %08x %08x %08x\n", + pfc->seq, vsi->state.crc[4], vsi->state.crc[5], + vsi->state.crc[6], vsi->state.crc[7]); + + return 0; +} + +static int vdec_vp9_slice_init(struct mtk_vcodec_dec_ctx *ctx) +{ + struct vdec_vp9_slice_instance *instance; + struct vdec_vp9_slice_init_vsi *vsi; + int ret; + + instance = kzalloc(sizeof(*instance), GFP_KERNEL); + if (!instance) + return -ENOMEM; + + instance->ctx = ctx; + instance->vpu.id = SCP_IPI_VDEC_LAT; + instance->vpu.core_id = SCP_IPI_VDEC_CORE; + instance->vpu.ctx = ctx; + instance->vpu.codec_type = ctx->current_codec; + + ret = vpu_dec_init(&instance->vpu); + if (ret) { + mtk_vdec_err(ctx, "failed to init vpu dec, ret %d\n", ret); + goto error_vpu_init; + } + + /* init vsi and global flags */ + + vsi = instance->vpu.vsi; + if (!vsi) { + mtk_vdec_err(ctx, "failed to get VP9 vsi\n"); + ret = -EINVAL; + goto error_vsi; + } + instance->init_vsi = vsi; + instance->core_vsi = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler, + (u32)vsi->core_vsi); + if (!instance->core_vsi) { + mtk_vdec_err(ctx, "failed to get VP9 core vsi\n"); + ret = -EINVAL; + goto error_vsi; + } + + instance->irq = 1; + + ret = vdec_vp9_slice_init_default_frame_ctx(instance); + if (ret) + goto error_default_frame_ctx; + + ctx->drv_handle = instance; + + return 0; + +error_default_frame_ctx: +error_vsi: + vpu_dec_deinit(&instance->vpu); +error_vpu_init: + kfree(instance); + return ret; +} + +static void vdec_vp9_slice_deinit(void *h_vdec) +{ + struct vdec_vp9_slice_instance *instance = h_vdec; + + if (!instance) + return; + + vpu_dec_deinit(&instance->vpu); + vdec_vp9_slice_free_working_buffer(instance); + vdec_msg_queue_deinit(&instance->ctx->msg_queue, instance->ctx); + kfree(instance); +} + +static int vdec_vp9_slice_flush(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *fb, bool *res_chg) +{ + struct vdec_vp9_slice_instance *instance = h_vdec; + + mtk_vdec_debug(instance->ctx, "flush ...\n"); + if (instance->ctx->dev->vdec_pdata->hw_arch != MTK_VDEC_PURE_SINGLE_CORE) + vdec_msg_queue_wait_lat_buf_full(&instance->ctx->msg_queue); + return vpu_dec_reset(&instance->vpu); +} + +static void vdec_vp9_slice_get_pic_info(struct vdec_vp9_slice_instance *instance) +{ + struct mtk_vcodec_dec_ctx *ctx = instance->ctx; + unsigned int data[3]; + + mtk_vdec_debug(instance->ctx, "w %u h %u\n", ctx->picinfo.pic_w, ctx->picinfo.pic_h); + + data[0] = ctx->picinfo.pic_w; + data[1] = ctx->picinfo.pic_h; + data[2] = ctx->capture_fourcc; + vpu_dec_get_param(&instance->vpu, data, 3, GET_PARAM_PIC_INFO); + + ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, 64); + ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, 64); + ctx->picinfo.fb_sz[0] = instance->vpu.fb_sz[0]; + ctx->picinfo.fb_sz[1] = instance->vpu.fb_sz[1]; +} + +static void vdec_vp9_slice_get_dpb_size(struct vdec_vp9_slice_instance *instance, + unsigned int *dpb_sz) +{ + /* refer VP9 specification */ + *dpb_sz = 9; +} + +static int vdec_vp9_slice_get_param(void *h_vdec, enum vdec_get_param_type type, void *out) +{ + struct vdec_vp9_slice_instance *instance = h_vdec; + + switch (type) { + case GET_PARAM_PIC_INFO: + vdec_vp9_slice_get_pic_info(instance); + break; + case GET_PARAM_DPB_SIZE: + vdec_vp9_slice_get_dpb_size(instance, out); + break; + case GET_PARAM_CROP_INFO: + mtk_vdec_debug(instance->ctx, "No need to get vp9 crop information."); + break; + default: + mtk_vdec_err(instance->ctx, "invalid get parameter type=%d\n", type); + return -EINVAL; + } + + return 0; +} + +static int vdec_vp9_slice_single_decode(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *fb, bool *res_chg) +{ + struct vdec_vp9_slice_instance *instance = h_vdec; + struct vdec_vp9_slice_pfc *pfc = &instance->sc_pfc; + struct vdec_vp9_slice_vsi *vsi; + struct mtk_vcodec_dec_ctx *ctx; + int ret; + + if (!instance || !instance->ctx) + return -EINVAL; + ctx = instance->ctx; + + /* bs NULL means flush decoder */ + if (!bs) + return vdec_vp9_slice_flush(h_vdec, bs, fb, res_chg); + + fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx); + if (!fb) + return -EBUSY; + + vsi = &pfc->vsi; + + ret = vdec_vp9_slice_setup_single(instance, bs, fb, pfc); + if (ret) { + mtk_vdec_err(ctx, "Failed to setup VP9 single ret %d\n", ret); + return ret; + } + vdec_vp9_slice_vsi_to_remote(vsi, instance->vsi); + + ret = vpu_dec_start(&instance->vpu, NULL, 0); + if (ret) { + mtk_vdec_err(ctx, "Failed to dec VP9 ret %d\n", ret); + return ret; + } + + ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED, + WAIT_INTR_TIMEOUT_MS, MTK_VDEC_CORE); + /* update remote vsi if decode timeout */ + if (ret) { + mtk_vdec_err(ctx, "VP9 decode timeout %d\n", ret); + WRITE_ONCE(instance->vsi->state.timeout, 1); + } + + vpu_dec_end(&instance->vpu); + + vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0); + ret = vdec_vp9_slice_update_single(instance, pfc); + if (ret) { + mtk_vdec_err(ctx, "VP9 decode error: %d\n", ret); + return ret; + } + + instance->ctx->decoded_frame_cnt++; + return 0; +} + +static int vdec_vp9_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *fb, bool *res_chg) +{ + struct vdec_vp9_slice_instance *instance = h_vdec; + struct vdec_lat_buf *lat_buf; + struct vdec_vp9_slice_pfc *pfc; + struct vdec_vp9_slice_vsi *vsi; + struct mtk_vcodec_dec_ctx *ctx; + int ret; + + if (!instance || !instance->ctx) + return -EINVAL; + ctx = instance->ctx; + + /* init msgQ for the first time */ + if (vdec_msg_queue_init(&ctx->msg_queue, ctx, + vdec_vp9_slice_core_decode, + sizeof(*pfc))) + return -ENOMEM; + + /* bs NULL means flush decoder */ + if (!bs) + return vdec_vp9_slice_flush(h_vdec, bs, fb, res_chg); + + lat_buf = vdec_msg_queue_dqbuf(&instance->ctx->msg_queue.lat_ctx); + if (!lat_buf) { + mtk_vdec_debug(ctx, "Failed to get VP9 lat buf\n"); + return -EAGAIN; + } + pfc = (struct vdec_vp9_slice_pfc *)lat_buf->private_data; + if (!pfc) { + ret = -EINVAL; + goto err_free_fb_out; + } + vsi = &pfc->vsi; + + ret = vdec_vp9_slice_setup_lat(instance, bs, lat_buf, pfc); + if (ret) { + mtk_vdec_err(ctx, "Failed to setup VP9 lat ret %d\n", ret); + goto err_free_fb_out; + } + vdec_vp9_slice_vsi_to_remote(vsi, instance->vsi); + + ret = vpu_dec_start(&instance->vpu, NULL, 0); + if (ret) { + mtk_vdec_err(ctx, "Failed to dec VP9 ret %d\n", ret); + goto err_free_fb_out; + } + + if (instance->irq) { + ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED, + WAIT_INTR_TIMEOUT_MS, MTK_VDEC_LAT0); + /* update remote vsi if decode timeout */ + if (ret) { + mtk_vdec_err(ctx, "VP9 decode timeout %d pic %d\n", ret, pfc->seq); + WRITE_ONCE(instance->vsi->state.timeout, 1); + } + vpu_dec_end(&instance->vpu); + } + + vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0); + ret = vdec_vp9_slice_update_lat(instance, lat_buf, pfc); + + /* LAT trans full, no more UBE or decode timeout */ + if (ret) { + mtk_vdec_err(ctx, "VP9 decode error: %d\n", ret); + goto err_free_fb_out; + } + + mtk_vdec_debug(ctx, "lat dma addr: 0x%lx 0x%lx\n", + (unsigned long)pfc->vsi.trans.dma_addr, + (unsigned long)pfc->vsi.trans.dma_addr_end); + + vdec_msg_queue_update_ube_wptr(&ctx->msg_queue, + vsi->trans.dma_addr_end + + ctx->msg_queue.wdma_addr.dma_addr); + vdec_msg_queue_qbuf(&ctx->msg_queue.core_ctx, lat_buf); + + return 0; +err_free_fb_out: + vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf); + return ret; +} + +static int vdec_vp9_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *fb, bool *res_chg) +{ + struct vdec_vp9_slice_instance *instance = h_vdec; + int ret; + + if (instance->ctx->dev->vdec_pdata->hw_arch == MTK_VDEC_PURE_SINGLE_CORE) + ret = vdec_vp9_slice_single_decode(h_vdec, bs, fb, res_chg); + else + ret = vdec_vp9_slice_lat_decode(h_vdec, bs, fb, res_chg); + + return ret; +} + +static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf) +{ + struct vdec_vp9_slice_instance *instance; + struct vdec_vp9_slice_pfc *pfc; + struct mtk_vcodec_dec_ctx *ctx = NULL; + struct vdec_fb *fb = NULL; + int ret = -EINVAL; + + if (!lat_buf) + goto err; + + pfc = lat_buf->private_data; + ctx = lat_buf->ctx; + if (!pfc || !ctx) + goto err; + + instance = ctx->drv_handle; + if (!instance) + goto err; + + fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx); + if (!fb) { + ret = -EBUSY; + goto err; + } + + ret = vdec_vp9_slice_setup_core(instance, fb, lat_buf, pfc); + if (ret) { + mtk_vdec_err(ctx, "vdec_vp9_slice_setup_core\n"); + goto err; + } + vdec_vp9_slice_vsi_to_remote(&pfc->vsi, instance->core_vsi); + + ret = vpu_dec_core(&instance->vpu); + if (ret) { + mtk_vdec_err(ctx, "vpu_dec_core\n"); + goto err; + } + + if (instance->irq) { + ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED, + WAIT_INTR_TIMEOUT_MS, MTK_VDEC_CORE); + /* update remote vsi if decode timeout */ + if (ret) { + mtk_vdec_err(ctx, "VP9 core timeout pic %d\n", pfc->seq); + WRITE_ONCE(instance->core_vsi->state.timeout, 1); + } + vpu_dec_core_end(&instance->vpu); + } + + vdec_vp9_slice_vsi_from_remote(&pfc->vsi, instance->core_vsi, 1); + ret = vdec_vp9_slice_update_core(instance, lat_buf, pfc); + if (ret) { + mtk_vdec_err(ctx, "vdec_vp9_slice_update_core\n"); + goto err; + } + + pfc->vsi.trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr; + mtk_vdec_debug(ctx, "core dma_addr_end 0x%lx\n", + (unsigned long)pfc->vsi.trans.dma_addr_end); + vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end); + ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf->src_buf_req); + + return 0; + +err: + if (ctx && pfc) { + /* always update read pointer */ + vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end); + + if (fb) + ctx->dev->vdec_pdata->cap_to_disp(ctx, 1, lat_buf->src_buf_req); + } + return ret; +} + +const struct vdec_common_if vdec_vp9_slice_lat_if = { + .init = vdec_vp9_slice_init, + .decode = vdec_vp9_slice_decode, + .get_param = vdec_vp9_slice_get_param, + .deinit = vdec_vp9_slice_deinit, +}; diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec_drv_base.h b/drivers/media/platform/mediatek/vcodec/decoder/vdec_drv_base.h new file mode 100644 index 0000000000..f6abb93652 --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec_drv_base.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2016 MediaTek Inc. + * Author: PC Chen <pc.chen@mediatek.com> + */ + +#ifndef _VDEC_DRV_BASE_ +#define _VDEC_DRV_BASE_ + +#include "vdec_drv_if.h" + +struct vdec_common_if { + /** + * (*init)() - initialize decode driver + * @ctx : [in] mtk v4l2 context + * @h_vdec : [out] driver handle + */ + int (*init)(struct mtk_vcodec_dec_ctx *ctx); + + /** + * (*decode)() - trigger decode + * @h_vdec : [in] driver handle + * @bs : [in] input bitstream + * @fb : [in] frame buffer to store decoded frame + * @res_chg : [out] resolution change happen + */ + int (*decode)(void *h_vdec, struct mtk_vcodec_mem *bs, + struct vdec_fb *fb, bool *res_chg); + + /** + * (*get_param)() - get driver's parameter + * @h_vdec : [in] driver handle + * @type : [in] input parameter type + * @out : [out] buffer to store query result + */ + int (*get_param)(void *h_vdec, enum vdec_get_param_type type, + void *out); + + /** + * (*deinit)() - deinitialize driver. + * @h_vdec : [in] driver handle to be deinit + */ + void (*deinit)(void *h_vdec); +}; + +#endif diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec_drv_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec_drv_if.c new file mode 100644 index 0000000000..d0b459b160 --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec_drv_if.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2016 MediaTek Inc. + * Author: PC Chen <pc.chen@mediatek.com> + * Tiffany Lin <tiffany.lin@mediatek.com> + */ + +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/slab.h> + +#include "vdec_drv_if.h" +#include "mtk_vcodec_dec.h" +#include "vdec_drv_base.h" +#include "mtk_vcodec_dec_pm.h" + +int vdec_if_init(struct mtk_vcodec_dec_ctx *ctx, unsigned int fourcc) +{ + enum mtk_vdec_hw_arch hw_arch = ctx->dev->vdec_pdata->hw_arch; + int ret = 0; + + switch (fourcc) { + case V4L2_PIX_FMT_H264_SLICE: + if (!ctx->dev->vdec_pdata->is_subdev_supported) { + ctx->dec_if = &vdec_h264_slice_if; + ctx->hw_id = MTK_VDEC_CORE; + } else { + ctx->dec_if = &vdec_h264_slice_multi_if; + ctx->hw_id = IS_VDEC_LAT_ARCH(hw_arch) ? MTK_VDEC_LAT0 : MTK_VDEC_CORE; + } + break; + case V4L2_PIX_FMT_H264: + ctx->dec_if = &vdec_h264_if; + ctx->hw_id = MTK_VDEC_CORE; + break; + case V4L2_PIX_FMT_VP8_FRAME: + ctx->dec_if = &vdec_vp8_slice_if; + ctx->hw_id = MTK_VDEC_CORE; + break; + case V4L2_PIX_FMT_VP8: + ctx->dec_if = &vdec_vp8_if; + ctx->hw_id = MTK_VDEC_CORE; + break; + case V4L2_PIX_FMT_VP9: + ctx->dec_if = &vdec_vp9_if; + ctx->hw_id = MTK_VDEC_CORE; + break; + case V4L2_PIX_FMT_VP9_FRAME: + ctx->dec_if = &vdec_vp9_slice_lat_if; + ctx->hw_id = IS_VDEC_LAT_ARCH(hw_arch) ? MTK_VDEC_LAT0 : MTK_VDEC_CORE; + break; + case V4L2_PIX_FMT_HEVC_SLICE: + ctx->dec_if = &vdec_hevc_slice_multi_if; + ctx->hw_id = MTK_VDEC_LAT0; + break; + case V4L2_PIX_FMT_AV1_FRAME: + ctx->dec_if = &vdec_av1_slice_lat_if; + ctx->hw_id = MTK_VDEC_LAT0; + break; + default: + return -EINVAL; + } + + mtk_vcodec_dec_enable_hardware(ctx, ctx->hw_id); + ret = ctx->dec_if->init(ctx); + mtk_vcodec_dec_disable_hardware(ctx, ctx->hw_id); + + return ret; +} + +int vdec_if_decode(struct mtk_vcodec_dec_ctx *ctx, struct mtk_vcodec_mem *bs, + struct vdec_fb *fb, bool *res_chg) +{ + int ret = 0; + + if (bs) { + if ((bs->dma_addr & 63) != 0) { + mtk_v4l2_vdec_err(ctx, "bs dma_addr should 64 byte align"); + return -EINVAL; + } + } + + if (fb) { + if (((fb->base_y.dma_addr & 511) != 0) || + ((fb->base_c.dma_addr & 511) != 0)) { + mtk_v4l2_vdec_err(ctx, "frame buffer dma_addr should 512 byte align"); + return -EINVAL; + } + } + + if (!ctx->drv_handle) + return -EIO; + + mtk_vcodec_dec_enable_hardware(ctx, ctx->hw_id); + mtk_vcodec_set_curr_ctx(ctx->dev, ctx, ctx->hw_id); + ret = ctx->dec_if->decode(ctx->drv_handle, bs, fb, res_chg); + mtk_vcodec_set_curr_ctx(ctx->dev, NULL, ctx->hw_id); + mtk_vcodec_dec_disable_hardware(ctx, ctx->hw_id); + + return ret; +} + +int vdec_if_get_param(struct mtk_vcodec_dec_ctx *ctx, enum vdec_get_param_type type, + void *out) +{ + int ret = 0; + + if (!ctx->drv_handle) + return -EIO; + + mtk_vdec_lock(ctx); + ret = ctx->dec_if->get_param(ctx->drv_handle, type, out); + mtk_vdec_unlock(ctx); + + return ret; +} + +void vdec_if_deinit(struct mtk_vcodec_dec_ctx *ctx) +{ + if (!ctx->drv_handle) + return; + + mtk_vcodec_dec_enable_hardware(ctx, ctx->hw_id); + ctx->dec_if->deinit(ctx->drv_handle); + mtk_vcodec_dec_disable_hardware(ctx, ctx->hw_id); + + ctx->drv_handle = NULL; +} diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec_drv_if.h b/drivers/media/platform/mediatek/vcodec/decoder/vdec_drv_if.h new file mode 100644 index 0000000000..bfd297c968 --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec_drv_if.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2016 MediaTek Inc. + * Author: PC Chen <pc.chen@mediatek.com> + * Tiffany Lin <tiffany.lin@mediatek.com> + */ + +#ifndef _VDEC_DRV_IF_H_ +#define _VDEC_DRV_IF_H_ + +#include "mtk_vcodec_dec.h" + + +/** + * enum vdec_fb_status - decoder frame buffer status + * @FB_ST_NORMAL: initial state + * @FB_ST_DISPLAY: frame buffer is ready to be displayed + * @FB_ST_FREE: frame buffer is not used by decoder any more + */ +enum vdec_fb_status { + FB_ST_NORMAL = 0, + FB_ST_DISPLAY = (1 << 0), + FB_ST_FREE = (1 << 1) +}; + +/* For GET_PARAM_DISP_FRAME_BUFFER and GET_PARAM_FREE_FRAME_BUFFER, + * the caller does not own the returned buffer. The buffer will not be + * released before vdec_if_deinit. + * GET_PARAM_DISP_FRAME_BUFFER : get next displayable frame buffer, + * struct vdec_fb** + * GET_PARAM_FREE_FRAME_BUFFER : get non-referenced framebuffer, vdec_fb** + * GET_PARAM_PIC_INFO : get picture info, struct vdec_pic_info* + * GET_PARAM_CROP_INFO : get crop info, struct v4l2_crop* + * GET_PARAM_DPB_SIZE : get dpb size, unsigned int* + */ +enum vdec_get_param_type { + GET_PARAM_DISP_FRAME_BUFFER, + GET_PARAM_FREE_FRAME_BUFFER, + GET_PARAM_PIC_INFO, + GET_PARAM_CROP_INFO, + GET_PARAM_DPB_SIZE +}; + +/** + * struct vdec_fb_node - decoder frame buffer node + * @list : list to hold this node + * @fb : point to frame buffer (vdec_fb), fb could point to frame buffer and + * working buffer this is for maintain buffers in different state + */ +struct vdec_fb_node { + struct list_head list; + struct vdec_fb *fb; +}; + +extern const struct vdec_common_if vdec_h264_if; +extern const struct vdec_common_if vdec_h264_slice_if; +extern const struct vdec_common_if vdec_h264_slice_multi_if; +extern const struct vdec_common_if vdec_vp8_if; +extern const struct vdec_common_if vdec_vp8_slice_if; +extern const struct vdec_common_if vdec_vp9_if; +extern const struct vdec_common_if vdec_vp9_slice_lat_if; +extern const struct vdec_common_if vdec_hevc_slice_multi_if; +extern const struct vdec_common_if vdec_av1_slice_lat_if; + +/** + * vdec_if_init() - initialize decode driver + * @ctx : [in] v4l2 context + * @fourcc : [in] video format fourcc, V4L2_PIX_FMT_H264/VP8/VP9.. + */ +int vdec_if_init(struct mtk_vcodec_dec_ctx *ctx, unsigned int fourcc); + +/** + * vdec_if_deinit() - deinitialize decode driver + * @ctx : [in] v4l2 context + * + */ +void vdec_if_deinit(struct mtk_vcodec_dec_ctx *ctx); + +/** + * vdec_if_decode() - trigger decode + * @ctx : [in] v4l2 context + * @bs : [in] input bitstream + * @fb : [in] frame buffer to store decoded frame, when null means parse + * header only + * @res_chg : [out] resolution change happens if current bs have different + * picture width/height + * Note: To flush the decoder when reaching EOF, set input bitstream as NULL. + * + * Return: 0 on success. -EIO on unrecoverable error. + */ +int vdec_if_decode(struct mtk_vcodec_dec_ctx *ctx, struct mtk_vcodec_mem *bs, + struct vdec_fb *fb, bool *res_chg); + +/** + * vdec_if_get_param() - get driver's parameter + * @ctx : [in] v4l2 context + * @type : [in] input parameter type + * @out : [out] buffer to store query result + */ +int vdec_if_get_param(struct mtk_vcodec_dec_ctx *ctx, enum vdec_get_param_type type, + void *out); + +#endif diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec_ipi_msg.h b/drivers/media/platform/mediatek/vcodec/decoder/vdec_ipi_msg.h new file mode 100644 index 0000000000..47070be2a9 --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec_ipi_msg.h @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2016 MediaTek Inc. + * Author: PC Chen <pc.chen@mediatek.com> + */ + +#ifndef _VDEC_IPI_MSG_H_ +#define _VDEC_IPI_MSG_H_ + +/* + * enum vdec_ipi_msgid - message id between AP and VPU + * @AP_IPIMSG_XXX : AP to VPU cmd message id + * @VPU_IPIMSG_XXX_ACK : VPU ack AP cmd message id + */ +enum vdec_ipi_msgid { + AP_IPIMSG_DEC_INIT = 0xA000, + AP_IPIMSG_DEC_START = 0xA001, + AP_IPIMSG_DEC_END = 0xA002, + AP_IPIMSG_DEC_DEINIT = 0xA003, + AP_IPIMSG_DEC_RESET = 0xA004, + AP_IPIMSG_DEC_CORE = 0xA005, + AP_IPIMSG_DEC_CORE_END = 0xA006, + AP_IPIMSG_DEC_GET_PARAM = 0xA007, + + VPU_IPIMSG_DEC_INIT_ACK = 0xB000, + VPU_IPIMSG_DEC_START_ACK = 0xB001, + VPU_IPIMSG_DEC_END_ACK = 0xB002, + VPU_IPIMSG_DEC_DEINIT_ACK = 0xB003, + VPU_IPIMSG_DEC_RESET_ACK = 0xB004, + VPU_IPIMSG_DEC_CORE_ACK = 0xB005, + VPU_IPIMSG_DEC_CORE_END_ACK = 0xB006, + VPU_IPIMSG_DEC_GET_PARAM_ACK = 0xB007, +}; + +/** + * struct vdec_ap_ipi_cmd - generic AP to VPU ipi command format + * @msg_id : vdec_ipi_msgid + * @vpu_inst_addr : VPU decoder instance address. Used if ABI version < 2. + * @inst_id : instance ID. Used if the ABI version >= 2. + * @codec_type : codec fourcc + * @reserved : reserved param + */ +struct vdec_ap_ipi_cmd { + uint32_t msg_id; + union { + uint32_t vpu_inst_addr; + uint32_t inst_id; + }; + u32 codec_type; + u32 reserved; +}; + +/** + * struct vdec_vpu_ipi_ack - generic VPU to AP ipi command format + * @msg_id : vdec_ipi_msgid + * @status : VPU exeuction result + * @ap_inst_addr : AP video decoder instance address + */ +struct vdec_vpu_ipi_ack { + uint32_t msg_id; + int32_t status; + uint64_t ap_inst_addr; +}; + +/** + * struct vdec_ap_ipi_init - for AP_IPIMSG_DEC_INIT + * @msg_id : AP_IPIMSG_DEC_INIT + * @codec_type : codec fourcc + * @ap_inst_addr : AP video decoder instance address + */ +struct vdec_ap_ipi_init { + uint32_t msg_id; + u32 codec_type; + uint64_t ap_inst_addr; +}; + +/** + * struct vdec_ap_ipi_dec_start - for AP_IPIMSG_DEC_START + * @msg_id : AP_IPIMSG_DEC_START + * @vpu_inst_addr : VPU decoder instance address. Used if ABI version < 2. + * @inst_id : instance ID. Used if the ABI version >= 2. + * @data : Header info + * H264 decoder [0]:buf_sz [1]:nal_start + * VP8 decoder [0]:width/height + * VP9 decoder [0]:profile, [1][2] width/height + * @codec_type : codec fourcc + */ +struct vdec_ap_ipi_dec_start { + uint32_t msg_id; + union { + uint32_t vpu_inst_addr; + uint32_t inst_id; + }; + uint32_t data[3]; + u32 codec_type; +}; + +/** + * struct vdec_vpu_ipi_init_ack - for VPU_IPIMSG_DEC_INIT_ACK + * @msg_id : VPU_IPIMSG_DEC_INIT_ACK + * @status : VPU exeuction result + * @ap_inst_addr : AP vcodec_vpu_inst instance address + * @vpu_inst_addr : VPU decoder instance address + * @vdec_abi_version: ABI version of the firmware. Kernel can use it to + * ensure that it is compatible with the firmware. + * This field is not valid for MT8173 and must not be + * accessed for this chip. + * @inst_id : instance ID. Valid only if the ABI version >= 2. + */ +struct vdec_vpu_ipi_init_ack { + uint32_t msg_id; + int32_t status; + uint64_t ap_inst_addr; + uint32_t vpu_inst_addr; + uint32_t vdec_abi_version; + uint32_t inst_id; +}; + +/** + * struct vdec_ap_ipi_get_param - for AP_IPIMSG_DEC_GET_PARAM + * @msg_id : AP_IPIMSG_DEC_GET_PARAM + * @inst_id : instance ID. Used if the ABI version >= 2. + * @data : picture information + * @param_type : get param type + * @codec_type : Codec fourcc + */ +struct vdec_ap_ipi_get_param { + u32 msg_id; + u32 inst_id; + u32 data[4]; + u32 param_type; + u32 codec_type; +}; + +/** + * struct vdec_vpu_ipi_get_param_ack - for VPU_IPIMSG_DEC_GET_PARAM_ACK + * @msg_id : VPU_IPIMSG_DEC_GET_PARAM_ACK + * @status : VPU execution result + * @ap_inst_addr : AP vcodec_vpu_inst instance address + * @data : picture information from SCP. + * @param_type : get param type + * @reserved : reserved param + */ +struct vdec_vpu_ipi_get_param_ack { + u32 msg_id; + s32 status; + u64 ap_inst_addr; + u32 data[4]; + u32 param_type; + u32 reserved; +}; + +#endif diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec_msg_queue.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec_msg_queue.c new file mode 100644 index 0000000000..f283c4703d --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec_msg_queue.c @@ -0,0 +1,372 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2021 MediaTek Inc. + * Author: Yunfei Dong <yunfei.dong@mediatek.com> + */ + +#include <linux/freezer.h> +#include <linux/interrupt.h> +#include <linux/kthread.h> + +#include "mtk_vcodec_dec_drv.h" +#include "mtk_vcodec_dec_pm.h" +#include "vdec_msg_queue.h" + +#define VDEC_MSG_QUEUE_TIMEOUT_MS 1500 + +/* the size used to store lat slice header information */ +#define VDEC_LAT_SLICE_HEADER_SZ (640 * SZ_1K) + +/* the size used to store avc error information */ +#define VDEC_ERR_MAP_SZ_AVC (17 * SZ_1K) + +#define VDEC_RD_MV_BUFFER_SZ (((SZ_4K * 2304 >> 4) + SZ_1K) << 1) +#define VDEC_LAT_TILE_SZ (64 * V4L2_AV1_MAX_TILE_COUNT) + +/* core will read the trans buffer which decoded by lat to decode again. + * The trans buffer size of FHD and 4K bitstreams are different. + */ +static int vde_msg_queue_get_trans_size(int width, int height) +{ + if (width > 1920 || height > 1088) + return 30 * SZ_1M; + else + return 6 * SZ_1M; +} + +void vdec_msg_queue_init_ctx(struct vdec_msg_queue_ctx *ctx, int hardware_index) +{ + init_waitqueue_head(&ctx->ready_to_use); + INIT_LIST_HEAD(&ctx->ready_queue); + spin_lock_init(&ctx->ready_lock); + ctx->ready_num = 0; + ctx->hardware_index = hardware_index; +} + +static struct list_head *vdec_get_buf_list(int hardware_index, struct vdec_lat_buf *buf) +{ + switch (hardware_index) { + case MTK_VDEC_CORE: + return &buf->core_list; + case MTK_VDEC_LAT0: + return &buf->lat_list; + default: + return NULL; + } +} + +static void vdec_msg_queue_inc(struct vdec_msg_queue *msg_queue, int hardware_index) +{ + if (hardware_index == MTK_VDEC_CORE) + atomic_inc(&msg_queue->core_list_cnt); + else + atomic_inc(&msg_queue->lat_list_cnt); +} + +static void vdec_msg_queue_dec(struct vdec_msg_queue *msg_queue, int hardware_index) +{ + if (hardware_index == MTK_VDEC_CORE) + atomic_dec(&msg_queue->core_list_cnt); + else + atomic_dec(&msg_queue->lat_list_cnt); +} + +int vdec_msg_queue_qbuf(struct vdec_msg_queue_ctx *msg_ctx, struct vdec_lat_buf *buf) +{ + struct list_head *head; + + head = vdec_get_buf_list(msg_ctx->hardware_index, buf); + if (!head) { + mtk_v4l2_vdec_err(buf->ctx, "fail to qbuf: %d", msg_ctx->hardware_index); + return -EINVAL; + } + + spin_lock(&msg_ctx->ready_lock); + list_add_tail(head, &msg_ctx->ready_queue); + msg_ctx->ready_num++; + + vdec_msg_queue_inc(&buf->ctx->msg_queue, msg_ctx->hardware_index); + if (msg_ctx->hardware_index != MTK_VDEC_CORE) { + wake_up_all(&msg_ctx->ready_to_use); + } else { + if (!(buf->ctx->msg_queue.status & CONTEXT_LIST_QUEUED)) { + queue_work(buf->ctx->dev->core_workqueue, &buf->ctx->msg_queue.core_work); + buf->ctx->msg_queue.status |= CONTEXT_LIST_QUEUED; + } + } + + mtk_v4l2_vdec_dbg(3, buf->ctx, "enqueue buf type: %d addr: 0x%p num: %d", + msg_ctx->hardware_index, buf, msg_ctx->ready_num); + spin_unlock(&msg_ctx->ready_lock); + + return 0; +} + +static bool vdec_msg_queue_wait_event(struct vdec_msg_queue_ctx *msg_ctx) +{ + int ret; + + ret = wait_event_timeout(msg_ctx->ready_to_use, + !list_empty(&msg_ctx->ready_queue), + msecs_to_jiffies(VDEC_MSG_QUEUE_TIMEOUT_MS)); + if (!ret) + return false; + + return true; +} + +struct vdec_lat_buf *vdec_msg_queue_dqbuf(struct vdec_msg_queue_ctx *msg_ctx) +{ + struct vdec_lat_buf *buf; + struct list_head *head; + int ret; + + spin_lock(&msg_ctx->ready_lock); + if (list_empty(&msg_ctx->ready_queue)) { + spin_unlock(&msg_ctx->ready_lock); + + if (msg_ctx->hardware_index == MTK_VDEC_CORE) + return NULL; + + ret = vdec_msg_queue_wait_event(msg_ctx); + if (!ret) + return NULL; + spin_lock(&msg_ctx->ready_lock); + } + + if (msg_ctx->hardware_index == MTK_VDEC_CORE) + buf = list_first_entry(&msg_ctx->ready_queue, + struct vdec_lat_buf, core_list); + else + buf = list_first_entry(&msg_ctx->ready_queue, + struct vdec_lat_buf, lat_list); + + head = vdec_get_buf_list(msg_ctx->hardware_index, buf); + if (!head) { + spin_unlock(&msg_ctx->ready_lock); + mtk_v4l2_vdec_err(buf->ctx, "fail to dqbuf: %d", msg_ctx->hardware_index); + return NULL; + } + list_del(head); + vdec_msg_queue_dec(&buf->ctx->msg_queue, msg_ctx->hardware_index); + + msg_ctx->ready_num--; + mtk_v4l2_vdec_dbg(3, buf->ctx, "dqueue buf type:%d addr: 0x%p num: %d", + msg_ctx->hardware_index, buf, msg_ctx->ready_num); + spin_unlock(&msg_ctx->ready_lock); + + return buf; +} + +void vdec_msg_queue_update_ube_rptr(struct vdec_msg_queue *msg_queue, uint64_t ube_rptr) +{ + spin_lock(&msg_queue->lat_ctx.ready_lock); + msg_queue->wdma_rptr_addr = ube_rptr; + mtk_v4l2_vdec_dbg(3, msg_queue->ctx, "update ube rprt (0x%llx)", ube_rptr); + spin_unlock(&msg_queue->lat_ctx.ready_lock); +} + +void vdec_msg_queue_update_ube_wptr(struct vdec_msg_queue *msg_queue, uint64_t ube_wptr) +{ + spin_lock(&msg_queue->lat_ctx.ready_lock); + msg_queue->wdma_wptr_addr = ube_wptr; + mtk_v4l2_vdec_dbg(3, msg_queue->ctx, "update ube wprt: (0x%llx 0x%llx) offset: 0x%llx", + msg_queue->wdma_rptr_addr, msg_queue->wdma_wptr_addr, + ube_wptr); + spin_unlock(&msg_queue->lat_ctx.ready_lock); +} + +bool vdec_msg_queue_wait_lat_buf_full(struct vdec_msg_queue *msg_queue) +{ + if (atomic_read(&msg_queue->lat_list_cnt) == NUM_BUFFER_COUNT) { + mtk_v4l2_vdec_dbg(3, msg_queue->ctx, "wait buf full: (%d %d) ready:%d status:%d", + atomic_read(&msg_queue->lat_list_cnt), + atomic_read(&msg_queue->core_list_cnt), + msg_queue->lat_ctx.ready_num, msg_queue->status); + return true; + } + + msg_queue->flush_done = false; + vdec_msg_queue_qbuf(&msg_queue->core_ctx, &msg_queue->empty_lat_buf); + wait_event(msg_queue->core_dec_done, msg_queue->flush_done); + + mtk_v4l2_vdec_dbg(3, msg_queue->ctx, "flush done => ready_num:%d status:%d list(%d %d)", + msg_queue->lat_ctx.ready_num, msg_queue->status, + atomic_read(&msg_queue->lat_list_cnt), + atomic_read(&msg_queue->core_list_cnt)); + + return false; +} + +void vdec_msg_queue_deinit(struct vdec_msg_queue *msg_queue, + struct mtk_vcodec_dec_ctx *ctx) +{ + struct vdec_lat_buf *lat_buf; + struct mtk_vcodec_mem *mem; + int i; + + mem = &msg_queue->wdma_addr; + if (mem->va) + mtk_vcodec_mem_free(ctx, mem); + for (i = 0; i < NUM_BUFFER_COUNT; i++) { + lat_buf = &msg_queue->lat_buf[i]; + + mem = &lat_buf->wdma_err_addr; + if (mem->va) + mtk_vcodec_mem_free(ctx, mem); + + mem = &lat_buf->slice_bc_addr; + if (mem->va) + mtk_vcodec_mem_free(ctx, mem); + + mem = &lat_buf->rd_mv_addr; + if (mem->va) + mtk_vcodec_mem_free(ctx, mem); + + mem = &lat_buf->tile_addr; + if (mem->va) + mtk_vcodec_mem_free(ctx, mem); + + kfree(lat_buf->private_data); + lat_buf->private_data = NULL; + } + + if (msg_queue->wdma_addr.size) + cancel_work_sync(&msg_queue->core_work); +} + +static void vdec_msg_queue_core_work(struct work_struct *work) +{ + struct vdec_msg_queue *msg_queue = + container_of(work, struct vdec_msg_queue, core_work); + struct mtk_vcodec_dec_ctx *ctx = + container_of(msg_queue, struct mtk_vcodec_dec_ctx, msg_queue); + struct mtk_vcodec_dec_dev *dev = ctx->dev; + struct vdec_lat_buf *lat_buf; + + spin_lock(&msg_queue->core_ctx.ready_lock); + ctx->msg_queue.status &= ~CONTEXT_LIST_QUEUED; + spin_unlock(&msg_queue->core_ctx.ready_lock); + + lat_buf = vdec_msg_queue_dqbuf(&msg_queue->core_ctx); + if (!lat_buf) + return; + + if (lat_buf->is_last_frame) { + ctx->msg_queue.status = CONTEXT_LIST_DEC_DONE; + msg_queue->flush_done = true; + wake_up(&ctx->msg_queue.core_dec_done); + + return; + } + + ctx = lat_buf->ctx; + mtk_vcodec_dec_enable_hardware(ctx, MTK_VDEC_CORE); + mtk_vcodec_set_curr_ctx(dev, ctx, MTK_VDEC_CORE); + + lat_buf->core_decode(lat_buf); + + mtk_vcodec_set_curr_ctx(dev, NULL, MTK_VDEC_CORE); + mtk_vcodec_dec_disable_hardware(ctx, MTK_VDEC_CORE); + vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf); + + if (!(ctx->msg_queue.status & CONTEXT_LIST_QUEUED) && + atomic_read(&msg_queue->core_list_cnt)) { + spin_lock(&msg_queue->core_ctx.ready_lock); + ctx->msg_queue.status |= CONTEXT_LIST_QUEUED; + spin_unlock(&msg_queue->core_ctx.ready_lock); + queue_work(ctx->dev->core_workqueue, &msg_queue->core_work); + } +} + +int vdec_msg_queue_init(struct vdec_msg_queue *msg_queue, + struct mtk_vcodec_dec_ctx *ctx, core_decode_cb_t core_decode, + int private_size) +{ + struct vdec_lat_buf *lat_buf; + int i, err; + + /* already init msg queue */ + if (msg_queue->wdma_addr.size) + return 0; + + vdec_msg_queue_init_ctx(&msg_queue->lat_ctx, MTK_VDEC_LAT0); + vdec_msg_queue_init_ctx(&msg_queue->core_ctx, MTK_VDEC_CORE); + INIT_WORK(&msg_queue->core_work, vdec_msg_queue_core_work); + + atomic_set(&msg_queue->lat_list_cnt, 0); + atomic_set(&msg_queue->core_list_cnt, 0); + init_waitqueue_head(&msg_queue->core_dec_done); + msg_queue->status = CONTEXT_LIST_EMPTY; + + msg_queue->wdma_addr.size = + vde_msg_queue_get_trans_size(ctx->picinfo.buf_w, + ctx->picinfo.buf_h); + err = mtk_vcodec_mem_alloc(ctx, &msg_queue->wdma_addr); + if (err) { + mtk_v4l2_vdec_err(ctx, "failed to allocate wdma_addr buf"); + msg_queue->wdma_addr.size = 0; + return -ENOMEM; + } + msg_queue->wdma_rptr_addr = msg_queue->wdma_addr.dma_addr; + msg_queue->wdma_wptr_addr = msg_queue->wdma_addr.dma_addr; + + msg_queue->empty_lat_buf.ctx = ctx; + msg_queue->empty_lat_buf.core_decode = NULL; + msg_queue->empty_lat_buf.is_last_frame = true; + + msg_queue->ctx = ctx; + for (i = 0; i < NUM_BUFFER_COUNT; i++) { + lat_buf = &msg_queue->lat_buf[i]; + + lat_buf->wdma_err_addr.size = VDEC_ERR_MAP_SZ_AVC; + err = mtk_vcodec_mem_alloc(ctx, &lat_buf->wdma_err_addr); + if (err) { + mtk_v4l2_vdec_err(ctx, "failed to allocate wdma_err_addr buf[%d]", i); + goto mem_alloc_err; + } + + lat_buf->slice_bc_addr.size = VDEC_LAT_SLICE_HEADER_SZ; + err = mtk_vcodec_mem_alloc(ctx, &lat_buf->slice_bc_addr); + if (err) { + mtk_v4l2_vdec_err(ctx, "failed to allocate wdma_addr buf[%d]", i); + goto mem_alloc_err; + } + + if (ctx->current_codec == V4L2_PIX_FMT_AV1_FRAME) { + lat_buf->rd_mv_addr.size = VDEC_RD_MV_BUFFER_SZ; + err = mtk_vcodec_mem_alloc(ctx, &lat_buf->rd_mv_addr); + if (err) { + mtk_v4l2_vdec_err(ctx, "failed to allocate rd_mv_addr buf[%d]", i); + goto mem_alloc_err; + } + + lat_buf->tile_addr.size = VDEC_LAT_TILE_SZ; + err = mtk_vcodec_mem_alloc(ctx, &lat_buf->tile_addr); + if (err) { + mtk_v4l2_vdec_err(ctx, "failed to allocate tile_addr buf[%d]", i); + goto mem_alloc_err; + } + } + + lat_buf->private_data = kzalloc(private_size, GFP_KERNEL); + if (!lat_buf->private_data) { + err = -ENOMEM; + goto mem_alloc_err; + } + + lat_buf->ctx = ctx; + lat_buf->core_decode = core_decode; + lat_buf->is_last_frame = false; + err = vdec_msg_queue_qbuf(&msg_queue->lat_ctx, lat_buf); + if (err) { + mtk_v4l2_vdec_err(ctx, "failed to qbuf buf[%d]", i); + goto mem_alloc_err; + } + } + return 0; + +mem_alloc_err: + vdec_msg_queue_deinit(msg_queue, ctx); + return err; +} diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec_msg_queue.h b/drivers/media/platform/mediatek/vcodec/decoder/vdec_msg_queue.h new file mode 100644 index 0000000000..1d9beb9e4a --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec_msg_queue.h @@ -0,0 +1,191 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2021 MediaTek Inc. + * Author: Yunfei Dong <yunfei.dong@mediatek.com> + */ + +#ifndef _VDEC_MSG_QUEUE_H_ +#define _VDEC_MSG_QUEUE_H_ + +#include <linux/sched.h> +#include <linux/semaphore.h> +#include <linux/slab.h> +#include <media/videobuf2-v4l2.h> + +#define NUM_BUFFER_COUNT 3 + +struct vdec_lat_buf; +struct mtk_vcodec_dec_ctx; +struct mtk_vcodec_dec_dev; +typedef int (*core_decode_cb_t)(struct vdec_lat_buf *lat_buf); + +/** + * enum core_ctx_status - Context decode status for core hardwre. + * @CONTEXT_LIST_EMPTY: No buffer queued on core hardware(must always be 0) + * @CONTEXT_LIST_QUEUED: Buffer queued to core work list + * @CONTEXT_LIST_DEC_DONE: context decode done + */ +enum core_ctx_status { + CONTEXT_LIST_EMPTY = 0, + CONTEXT_LIST_QUEUED, + CONTEXT_LIST_DEC_DONE, +}; + +/** + * struct vdec_msg_queue_ctx - represents a queue for buffers ready to be processed + * @ready_to_use: ready used queue used to signalize when get a job queue + * @ready_queue: list of ready lat buffer queues + * @ready_lock: spin lock to protect the lat buffer usage + * @ready_num: number of buffers ready to be processed + * @hardware_index: hardware id that this queue is used for + */ +struct vdec_msg_queue_ctx { + wait_queue_head_t ready_to_use; + struct list_head ready_queue; + /* protect lat buffer */ + spinlock_t ready_lock; + int ready_num; + int hardware_index; +}; + +/** + * struct vdec_lat_buf - lat buffer message used to store lat info for core decode + * @wdma_err_addr: wdma error address used for lat hardware + * @slice_bc_addr: slice bc address used for lat hardware + * @rd_mv_addr: mv addr for av1 lat hardware output, core hardware input + * @tile_addr: tile buffer for av1 core input + * @ts_info: need to set timestamp from output to capture + * @src_buf_req: output buffer media request object + * + * @private_data: shared information used to lat and core hardware + * @ctx: mtk vcodec context information + * @core_decode: different codec use different decode callback function + * @lat_list: add lat buffer to lat head list + * @core_list: add lat buffer to core head list + * + * @is_last_frame: meaning this buffer is the last frame + */ +struct vdec_lat_buf { + struct mtk_vcodec_mem wdma_err_addr; + struct mtk_vcodec_mem slice_bc_addr; + struct mtk_vcodec_mem rd_mv_addr; + struct mtk_vcodec_mem tile_addr; + struct vb2_v4l2_buffer ts_info; + struct media_request *src_buf_req; + + void *private_data; + struct mtk_vcodec_dec_ctx *ctx; + core_decode_cb_t core_decode; + struct list_head lat_list; + struct list_head core_list; + + bool is_last_frame; +}; + +/** + * struct vdec_msg_queue - used to store lat buffer message + * @lat_buf: lat buffer used to store lat buffer information + * @wdma_addr: wdma address used for ube + * @wdma_rptr_addr: ube read point + * @wdma_wptr_addr: ube write point + * @core_work: core hardware work + * @lat_ctx: used to store lat buffer list + * @core_ctx: used to store core buffer list + * + * @lat_list_cnt: used to record each instance lat list count + * @core_list_cnt: used to record each instance core list count + * @flush_done: core flush done status + * @empty_lat_buf: the last lat buf used to flush decode + * @core_dec_done: core work queue decode done event + * @status: current context decode status for core hardware + * @ctx: mtk vcodec context information + */ +struct vdec_msg_queue { + struct vdec_lat_buf lat_buf[NUM_BUFFER_COUNT]; + + struct mtk_vcodec_mem wdma_addr; + u64 wdma_rptr_addr; + u64 wdma_wptr_addr; + + struct work_struct core_work; + struct vdec_msg_queue_ctx lat_ctx; + struct vdec_msg_queue_ctx core_ctx; + + atomic_t lat_list_cnt; + atomic_t core_list_cnt; + bool flush_done; + struct vdec_lat_buf empty_lat_buf; + wait_queue_head_t core_dec_done; + int status; + struct mtk_vcodec_dec_ctx *ctx; +}; + +/** + * vdec_msg_queue_init - init lat buffer information. + * @msg_queue: used to store the lat buffer information + * @ctx: v4l2 ctx + * @core_decode: core decode callback for each codec + * @private_size: the private data size used to share with core + * + * Return: returns 0 if init successfully, or fail. + */ +int vdec_msg_queue_init(struct vdec_msg_queue *msg_queue, + struct mtk_vcodec_dec_ctx *ctx, core_decode_cb_t core_decode, + int private_size); + +/** + * vdec_msg_queue_init_ctx - used to init msg queue context information. + * @ctx: message queue context + * @hardware_index: hardware index + */ +void vdec_msg_queue_init_ctx(struct vdec_msg_queue_ctx *ctx, int hardware_index); + +/** + * vdec_msg_queue_qbuf - enqueue lat buffer to queue list. + * @ctx: message queue context + * @buf: current lat buffer + * + * Return: returns 0 if qbuf successfully, or fail. + */ +int vdec_msg_queue_qbuf(struct vdec_msg_queue_ctx *ctx, struct vdec_lat_buf *buf); + +/** + * vdec_msg_queue_dqbuf - dequeue lat buffer from queue list. + * @ctx: message queue context + * + * Return: returns not null if dq successfully, or fail. + */ +struct vdec_lat_buf *vdec_msg_queue_dqbuf(struct vdec_msg_queue_ctx *ctx); + +/** + * vdec_msg_queue_update_ube_rptr - used to updata the ube read point. + * @msg_queue: used to store the lat buffer information + * @ube_rptr: current ube read point + */ +void vdec_msg_queue_update_ube_rptr(struct vdec_msg_queue *msg_queue, uint64_t ube_rptr); + +/** + * vdec_msg_queue_update_ube_wptr - used to updata the ube write point. + * @msg_queue: used to store the lat buffer information + * @ube_wptr: current ube write point + */ +void vdec_msg_queue_update_ube_wptr(struct vdec_msg_queue *msg_queue, uint64_t ube_wptr); + +/** + * vdec_msg_queue_wait_lat_buf_full - used to check whether all lat buffer + * in lat list. + * @msg_queue: used to store the lat buffer information + * + * Return: returns true if successfully, or fail. + */ +bool vdec_msg_queue_wait_lat_buf_full(struct vdec_msg_queue *msg_queue); + +/** + * vdec_msg_queue_deinit - deinit lat buffer information. + * @msg_queue: used to store the lat buffer information + * @ctx: v4l2 ctx + */ +void vdec_msg_queue_deinit(struct vdec_msg_queue *msg_queue, + struct mtk_vcodec_dec_ctx *ctx); + +#endif diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.c new file mode 100644 index 0000000000..82e57ae983 --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.c @@ -0,0 +1,311 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2016 MediaTek Inc. + * Author: PC Chen <pc.chen@mediatek.com> + */ + +#include "mtk_vcodec_dec_drv.h" +#include "vdec_drv_if.h" +#include "vdec_ipi_msg.h" +#include "vdec_vpu_if.h" + +static void handle_init_ack_msg(const struct vdec_vpu_ipi_init_ack *msg) +{ + struct vdec_vpu_inst *vpu = (struct vdec_vpu_inst *) + (unsigned long)msg->ap_inst_addr; + + mtk_vdec_debug(vpu->ctx, "+ ap_inst_addr = 0x%llx", msg->ap_inst_addr); + + /* mapping VPU address to kernel virtual address */ + /* the content in vsi is initialized to 0 in VPU */ + vpu->vsi = mtk_vcodec_fw_map_dm_addr(vpu->ctx->dev->fw_handler, + msg->vpu_inst_addr); + vpu->inst_addr = msg->vpu_inst_addr; + + mtk_vdec_debug(vpu->ctx, "- vpu_inst_addr = 0x%x", vpu->inst_addr); + + /* Set default ABI version if dealing with unversioned firmware. */ + vpu->fw_abi_version = 0; + /* + * Instance ID is only used if ABI version >= 2. Initialize it with + * garbage by default. + */ + vpu->inst_id = 0xdeadbeef; + + /* VPU firmware does not contain a version field. */ + if (mtk_vcodec_fw_get_type(vpu->ctx->dev->fw_handler) == VPU) + return; + + /* Check firmware version. */ + vpu->fw_abi_version = msg->vdec_abi_version; + mtk_vdec_debug(vpu->ctx, "firmware version 0x%x\n", vpu->fw_abi_version); + switch (vpu->fw_abi_version) { + case 1: + break; + case 2: + vpu->inst_id = msg->inst_id; + break; + default: + mtk_vdec_err(vpu->ctx, "unhandled firmware version 0x%x\n", vpu->fw_abi_version); + vpu->failure = 1; + break; + } +} + +static void handle_get_param_msg_ack(const struct vdec_vpu_ipi_get_param_ack *msg) +{ + struct vdec_vpu_inst *vpu = (struct vdec_vpu_inst *) + (unsigned long)msg->ap_inst_addr; + + mtk_vdec_debug(vpu->ctx, "+ ap_inst_addr = 0x%llx", msg->ap_inst_addr); + + /* param_type is enum vdec_get_param_type */ + switch (msg->param_type) { + case GET_PARAM_PIC_INFO: + vpu->fb_sz[0] = msg->data[0]; + vpu->fb_sz[1] = msg->data[1]; + break; + default: + mtk_vdec_err(vpu->ctx, "invalid get param type=%d", msg->param_type); + vpu->failure = 1; + break; + } +} + +static bool vpu_dec_check_ap_inst(struct mtk_vcodec_dec_dev *dec_dev, struct vdec_vpu_inst *vpu) +{ + struct mtk_vcodec_dec_ctx *ctx; + int ret = false; + + list_for_each_entry(ctx, &dec_dev->ctx_list, list) { + if (!IS_ERR_OR_NULL(ctx) && ctx->vpu_inst == vpu) { + ret = true; + break; + } + } + + return ret; +} + +/* + * vpu_dec_ipi_handler - Handler for VPU ipi message. + * + * @data: ipi message + * @len : length of ipi message + * @priv: callback private data which is passed by decoder when register. + * + * This function runs in interrupt context and it means there's an IPI MSG + * from VPU. + */ +static void vpu_dec_ipi_handler(void *data, unsigned int len, void *priv) +{ + struct mtk_vcodec_dec_dev *dec_dev; + const struct vdec_vpu_ipi_ack *msg = data; + struct vdec_vpu_inst *vpu; + + dec_dev = (struct mtk_vcodec_dec_dev *)priv; + vpu = (struct vdec_vpu_inst *)(unsigned long)msg->ap_inst_addr; + if (!priv || !vpu) { + pr_err(MTK_DBG_V4L2_STR "ap_inst_addr is NULL, did the SCP hang or crash?"); + return; + } + + if (!vpu_dec_check_ap_inst(dec_dev, vpu) || msg->msg_id < VPU_IPIMSG_DEC_INIT_ACK || + msg->msg_id > VPU_IPIMSG_DEC_GET_PARAM_ACK) { + mtk_v4l2_vdec_err(vpu->ctx, "vdec msg id not correctly => 0x%x", msg->msg_id); + vpu->failure = -EINVAL; + goto error; + } + + vpu->failure = msg->status; + if (msg->status != 0) + goto error; + + switch (msg->msg_id) { + case VPU_IPIMSG_DEC_INIT_ACK: + handle_init_ack_msg(data); + break; + + case VPU_IPIMSG_DEC_START_ACK: + case VPU_IPIMSG_DEC_END_ACK: + case VPU_IPIMSG_DEC_DEINIT_ACK: + case VPU_IPIMSG_DEC_RESET_ACK: + case VPU_IPIMSG_DEC_CORE_ACK: + case VPU_IPIMSG_DEC_CORE_END_ACK: + break; + + case VPU_IPIMSG_DEC_GET_PARAM_ACK: + handle_get_param_msg_ack(data); + break; + default: + mtk_vdec_err(vpu->ctx, "invalid msg=%X", msg->msg_id); + break; + } + +error: + vpu->signaled = 1; +} + +static int vcodec_vpu_send_msg(struct vdec_vpu_inst *vpu, void *msg, int len) +{ + int err, id, msgid; + + msgid = *(uint32_t *)msg; + mtk_vdec_debug(vpu->ctx, "id=%X", msgid); + + vpu->failure = 0; + vpu->signaled = 0; + + if (vpu->ctx->dev->vdec_pdata->hw_arch == MTK_VDEC_LAT_SINGLE_CORE) { + if (msgid == AP_IPIMSG_DEC_CORE || + msgid == AP_IPIMSG_DEC_CORE_END) + id = vpu->core_id; + else + id = vpu->id; + } else { + id = vpu->id; + } + + err = mtk_vcodec_fw_ipi_send(vpu->ctx->dev->fw_handler, id, msg, + len, 2000); + if (err) { + mtk_vdec_err(vpu->ctx, "send fail vpu_id=%d msg_id=%X status=%d", + id, msgid, err); + return err; + } + + return vpu->failure; +} + +static int vcodec_send_ap_ipi(struct vdec_vpu_inst *vpu, unsigned int msg_id) +{ + struct vdec_ap_ipi_cmd msg; + int err = 0; + + mtk_vdec_debug(vpu->ctx, "+ id=%X", msg_id); + + memset(&msg, 0, sizeof(msg)); + msg.msg_id = msg_id; + if (vpu->fw_abi_version < 2) + msg.vpu_inst_addr = vpu->inst_addr; + else + msg.inst_id = vpu->inst_id; + msg.codec_type = vpu->codec_type; + + err = vcodec_vpu_send_msg(vpu, &msg, sizeof(msg)); + mtk_vdec_debug(vpu->ctx, "- id=%X ret=%d", msg_id, err); + return err; +} + +int vpu_dec_init(struct vdec_vpu_inst *vpu) +{ + struct vdec_ap_ipi_init msg; + int err; + + init_waitqueue_head(&vpu->wq); + vpu->handler = vpu_dec_ipi_handler; + vpu->ctx->vpu_inst = vpu; + + err = mtk_vcodec_fw_ipi_register(vpu->ctx->dev->fw_handler, vpu->id, + vpu->handler, "vdec", vpu->ctx->dev); + if (err) { + mtk_vdec_err(vpu->ctx, "vpu_ipi_register fail status=%d", err); + return err; + } + + if (vpu->ctx->dev->vdec_pdata->hw_arch == MTK_VDEC_LAT_SINGLE_CORE) { + err = mtk_vcodec_fw_ipi_register(vpu->ctx->dev->fw_handler, + vpu->core_id, vpu->handler, + "vdec", vpu->ctx->dev); + if (err) { + mtk_vdec_err(vpu->ctx, "vpu_ipi_register core fail status=%d", err); + return err; + } + } + + memset(&msg, 0, sizeof(msg)); + msg.msg_id = AP_IPIMSG_DEC_INIT; + msg.ap_inst_addr = (unsigned long)vpu; + msg.codec_type = vpu->codec_type; + + mtk_vdec_debug(vpu->ctx, "vdec_inst=%p", vpu); + + err = vcodec_vpu_send_msg(vpu, (void *)&msg, sizeof(msg)); + mtk_vdec_debug(vpu->ctx, "- ret=%d", err); + return err; +} + +int vpu_dec_start(struct vdec_vpu_inst *vpu, uint32_t *data, unsigned int len) +{ + struct vdec_ap_ipi_dec_start msg; + int i; + int err = 0; + + if (len > ARRAY_SIZE(msg.data)) { + mtk_vdec_err(vpu->ctx, "invalid len = %d\n", len); + return -EINVAL; + } + + memset(&msg, 0, sizeof(msg)); + msg.msg_id = AP_IPIMSG_DEC_START; + if (vpu->fw_abi_version < 2) + msg.vpu_inst_addr = vpu->inst_addr; + else + msg.inst_id = vpu->inst_id; + + for (i = 0; i < len; i++) + msg.data[i] = data[i]; + msg.codec_type = vpu->codec_type; + + err = vcodec_vpu_send_msg(vpu, (void *)&msg, sizeof(msg)); + mtk_vdec_debug(vpu->ctx, "- ret=%d", err); + return err; +} + +int vpu_dec_get_param(struct vdec_vpu_inst *vpu, uint32_t *data, + unsigned int len, unsigned int param_type) +{ + struct vdec_ap_ipi_get_param msg; + int err; + + if (len > ARRAY_SIZE(msg.data)) { + mtk_vdec_err(vpu->ctx, "invalid len = %d\n", len); + return -EINVAL; + } + + memset(&msg, 0, sizeof(msg)); + msg.msg_id = AP_IPIMSG_DEC_GET_PARAM; + msg.inst_id = vpu->inst_id; + memcpy(msg.data, data, sizeof(unsigned int) * len); + msg.param_type = param_type; + msg.codec_type = vpu->codec_type; + + err = vcodec_vpu_send_msg(vpu, (void *)&msg, sizeof(msg)); + mtk_vdec_debug(vpu->ctx, "- ret=%d", err); + return err; +} + +int vpu_dec_core(struct vdec_vpu_inst *vpu) +{ + return vcodec_send_ap_ipi(vpu, AP_IPIMSG_DEC_CORE); +} + +int vpu_dec_end(struct vdec_vpu_inst *vpu) +{ + return vcodec_send_ap_ipi(vpu, AP_IPIMSG_DEC_END); +} + +int vpu_dec_core_end(struct vdec_vpu_inst *vpu) +{ + return vcodec_send_ap_ipi(vpu, AP_IPIMSG_DEC_CORE_END); +} + +int vpu_dec_deinit(struct vdec_vpu_inst *vpu) +{ + return vcodec_send_ap_ipi(vpu, AP_IPIMSG_DEC_DEINIT); +} + +int vpu_dec_reset(struct vdec_vpu_inst *vpu) +{ + return vcodec_send_ap_ipi(vpu, AP_IPIMSG_DEC_RESET); +} diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.h b/drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.h new file mode 100644 index 0000000000..fbb3f34a73 --- /dev/null +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2016 MediaTek Inc. + * Author: PC Chen <pc.chen@mediatek.com> + */ + +#ifndef _VDEC_VPU_IF_H_ +#define _VDEC_VPU_IF_H_ + +struct mtk_vcodec_dec_ctx; + +/** + * struct vdec_vpu_inst - VPU instance for video codec + * @id : ipi msg id for each decoder + * @core_id : core id used to separate different hardware + * @vsi : driver structure allocated by VPU side and shared to AP side + * for control and info share + * @failure : VPU execution result status, 0: success, others: fail + * @inst_addr : VPU decoder instance address + * @fw_abi_version : ABI version of the firmware. + * @inst_id : if fw_abi_version >= 2, contains the instance ID to be given + * in place of inst_addr in messages. + * @signaled : 1 - Host has received ack message from VPU, 0 - not received + * @ctx : context for v4l2 layer integration + * @dev : platform device of VPU + * @wq : wait queue to wait VPU message ack + * @handler : ipi handler for each decoder + * @codec_type : use codec type to separate different codecs + * @capture_type: used capture type to separate different capture format + * @fb_sz : frame buffer size of each plane + */ +struct vdec_vpu_inst { + int id; + int core_id; + void *vsi; + int32_t failure; + uint32_t inst_addr; + uint32_t fw_abi_version; + uint32_t inst_id; + unsigned int signaled; + struct mtk_vcodec_dec_ctx *ctx; + wait_queue_head_t wq; + mtk_vcodec_ipi_handler handler; + unsigned int codec_type; + unsigned int capture_type; + unsigned int fb_sz[2]; +}; + +/** + * vpu_dec_init - init decoder instance and allocate required resource in VPU. + * + * @vpu: instance for vdec_vpu_inst + */ +int vpu_dec_init(struct vdec_vpu_inst *vpu); + +/** + * vpu_dec_start - start decoding, basically the function will be invoked once + * every frame. + * + * @vpu : instance for vdec_vpu_inst + * @data: meta data to pass bitstream info to VPU decoder + * @len : meta data length + */ +int vpu_dec_start(struct vdec_vpu_inst *vpu, uint32_t *data, unsigned int len); + +/** + * vpu_dec_end - end decoding, basically the function will be invoked once + * when HW decoding done interrupt received successfully. The + * decoder in VPU will continue to do reference frame management + * and check if there is a new decoded frame available to display. + * + * @vpu : instance for vdec_vpu_inst + */ +int vpu_dec_end(struct vdec_vpu_inst *vpu); + +/** + * vpu_dec_deinit - deinit decoder instance and resource freed in VPU. + * + * @vpu: instance for vdec_vpu_inst + */ +int vpu_dec_deinit(struct vdec_vpu_inst *vpu); + +/** + * vpu_dec_reset - reset decoder, use for flush decoder when end of stream or + * seek. Remainig non displayed frame will be pushed to display. + * + * @vpu: instance for vdec_vpu_inst + */ +int vpu_dec_reset(struct vdec_vpu_inst *vpu); + +/** + * vpu_dec_core - core start decoding, basically the function will be invoked once + * every frame. + * + * @vpu : instance for vdec_vpu_inst + */ +int vpu_dec_core(struct vdec_vpu_inst *vpu); + +/** + * vpu_dec_core_end - core end decoding, basically the function will be invoked once + * when core HW decoding done and receive interrupt successfully. The + * decoder in VPU will updata hardware information and deinit hardware + * and check if there is a new decoded frame available to display. + * + * @vpu : instance for vdec_vpu_inst + */ +int vpu_dec_core_end(struct vdec_vpu_inst *vpu); + +/** + * vpu_dec_get_param - get param from scp + * + * @vpu : instance for vdec_vpu_inst + * @data: meta data to pass bitstream info to VPU decoder + * @len : meta data length + * @param_type : get param type + */ +int vpu_dec_get_param(struct vdec_vpu_inst *vpu, uint32_t *data, + unsigned int len, unsigned int param_type); + +#endif |