diff options
Diffstat (limited to 'drivers/staging/media/sunxi/cedrus')
-rw-r--r-- | drivers/staging/media/sunxi/cedrus/Kconfig | 18 | ||||
-rw-r--r-- | drivers/staging/media/sunxi/cedrus/Makefile | 6 | ||||
-rw-r--r-- | drivers/staging/media/sunxi/cedrus/TODO | 7 | ||||
-rw-r--r-- | drivers/staging/media/sunxi/cedrus/cedrus.c | 717 | ||||
-rw-r--r-- | drivers/staging/media/sunxi/cedrus/cedrus.h | 274 | ||||
-rw-r--r-- | drivers/staging/media/sunxi/cedrus/cedrus_dec.c | 119 | ||||
-rw-r--r-- | drivers/staging/media/sunxi/cedrus/cedrus_dec.h | 21 | ||||
-rw-r--r-- | drivers/staging/media/sunxi/cedrus/cedrus_h264.c | 708 | ||||
-rw-r--r-- | drivers/staging/media/sunxi/cedrus/cedrus_h265.c | 882 | ||||
-rw-r--r-- | drivers/staging/media/sunxi/cedrus/cedrus_hw.c | 341 | ||||
-rw-r--r-- | drivers/staging/media/sunxi/cedrus/cedrus_hw.h | 33 | ||||
-rw-r--r-- | drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c | 198 | ||||
-rw-r--r-- | drivers/staging/media/sunxi/cedrus/cedrus_regs.h | 701 | ||||
-rw-r--r-- | drivers/staging/media/sunxi/cedrus/cedrus_video.c | 596 | ||||
-rw-r--r-- | drivers/staging/media/sunxi/cedrus/cedrus_video.h | 31 | ||||
-rw-r--r-- | drivers/staging/media/sunxi/cedrus/cedrus_vp8.c | 882 |
16 files changed, 5534 insertions, 0 deletions
diff --git a/drivers/staging/media/sunxi/cedrus/Kconfig b/drivers/staging/media/sunxi/cedrus/Kconfig new file mode 100644 index 000000000..621944f99 --- /dev/null +++ b/drivers/staging/media/sunxi/cedrus/Kconfig @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0 +config VIDEO_SUNXI_CEDRUS + tristate "Allwinner Cedrus VPU driver" + depends on VIDEO_DEV + depends on RESET_CONTROLLER + depends on HAS_DMA + depends on OF + select MEDIA_CONTROLLER + select MEDIA_CONTROLLER_REQUEST_API + select SUNXI_SRAM + select VIDEOBUF2_DMA_CONTIG + select V4L2_MEM2MEM_DEV + help + Support for the VPU found in Allwinner SoCs, also known as the Cedar + video engine. + + To compile this driver as a module, choose M here: the module + will be called sunxi-cedrus. diff --git a/drivers/staging/media/sunxi/cedrus/Makefile b/drivers/staging/media/sunxi/cedrus/Makefile new file mode 100644 index 000000000..a647b3690 --- /dev/null +++ b/drivers/staging/media/sunxi/cedrus/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_VIDEO_SUNXI_CEDRUS) += sunxi-cedrus.o + +sunxi-cedrus-y = cedrus.o cedrus_video.o cedrus_hw.o cedrus_dec.o \ + cedrus_mpeg2.o cedrus_h264.o cedrus_h265.o \ + cedrus_vp8.o diff --git a/drivers/staging/media/sunxi/cedrus/TODO b/drivers/staging/media/sunxi/cedrus/TODO new file mode 100644 index 000000000..ec277ece4 --- /dev/null +++ b/drivers/staging/media/sunxi/cedrus/TODO @@ -0,0 +1,7 @@ +Before this stateless decoder driver can leave the staging area: +* The Request API needs to be stabilized; +* The codec-specific controls need to be thoroughly reviewed to ensure they + cover all intended uses cases; +* Userspace support for the Request API needs to be reviewed; +* Another stateless decoder driver should be submitted; +* At least one stateless encoder driver should be submitted. diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c b/drivers/staging/media/sunxi/cedrus/cedrus.c new file mode 100644 index 000000000..d2419319a --- /dev/null +++ b/drivers/staging/media/sunxi/cedrus/cedrus.c @@ -0,0 +1,717 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Cedrus VPU driver + * + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com> + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com> + * Copyright (C) 2018 Bootlin + * + * Based on the vim2m driver, that is: + * + * Copyright (c) 2009-2010 Samsung Electronics Co., Ltd. + * Pawel Osciak, <pawel@osciak.com> + * Marek Szyprowski, <m.szyprowski@samsung.com> + */ + +#include <linux/platform_device.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/pm.h> + +#include <media/v4l2-device.h> +#include <media/v4l2-ioctl.h> +#include <media/v4l2-ctrls.h> +#include <media/v4l2-mem2mem.h> + +#include "cedrus.h" +#include "cedrus_video.h" +#include "cedrus_dec.h" +#include "cedrus_hw.h" + +static int cedrus_try_ctrl(struct v4l2_ctrl *ctrl) +{ + if (ctrl->id == V4L2_CID_STATELESS_H264_SPS) { + const struct v4l2_ctrl_h264_sps *sps = ctrl->p_new.p_h264_sps; + + if (sps->chroma_format_idc != 1) + /* Only 4:2:0 is supported */ + return -EINVAL; + if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8) + /* Luma and chroma bit depth mismatch */ + return -EINVAL; + if (sps->bit_depth_luma_minus8 != 0) + /* Only 8-bit is supported */ + return -EINVAL; + } else if (ctrl->id == V4L2_CID_STATELESS_HEVC_SPS) { + const struct v4l2_ctrl_hevc_sps *sps = ctrl->p_new.p_hevc_sps; + struct cedrus_ctx *ctx = container_of(ctrl->handler, struct cedrus_ctx, hdl); + + if (sps->chroma_format_idc != 1) + /* Only 4:2:0 is supported */ + return -EINVAL; + + if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8) + /* Luma and chroma bit depth mismatch */ + return -EINVAL; + + if (ctx->dev->capabilities & CEDRUS_CAPABILITY_H265_10_DEC) { + if (sps->bit_depth_luma_minus8 != 0 && sps->bit_depth_luma_minus8 != 2) + /* Only 8-bit and 10-bit are supported */ + return -EINVAL; + } else { + if (sps->bit_depth_luma_minus8 != 0) + /* Only 8-bit is supported */ + return -EINVAL; + } + } + + return 0; +} + +static const struct v4l2_ctrl_ops cedrus_ctrl_ops = { + .try_ctrl = cedrus_try_ctrl, +}; + +static const struct cedrus_control cedrus_controls[] = { + { + .cfg = { + .id = V4L2_CID_STATELESS_MPEG2_SEQUENCE, + }, + .codec = CEDRUS_CODEC_MPEG2, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_MPEG2_PICTURE, + }, + .codec = CEDRUS_CODEC_MPEG2, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_MPEG2_QUANTISATION, + }, + .codec = CEDRUS_CODEC_MPEG2, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_H264_DECODE_PARAMS, + }, + .codec = CEDRUS_CODEC_H264, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_H264_SLICE_PARAMS, + }, + .codec = CEDRUS_CODEC_H264, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_H264_SPS, + .ops = &cedrus_ctrl_ops, + }, + .codec = CEDRUS_CODEC_H264, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_H264_PPS, + }, + .codec = CEDRUS_CODEC_H264, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_H264_SCALING_MATRIX, + }, + .codec = CEDRUS_CODEC_H264, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_H264_PRED_WEIGHTS, + }, + .codec = CEDRUS_CODEC_H264, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_H264_DECODE_MODE, + .max = V4L2_STATELESS_H264_DECODE_MODE_SLICE_BASED, + .def = V4L2_STATELESS_H264_DECODE_MODE_SLICE_BASED, + }, + .codec = CEDRUS_CODEC_H264, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_H264_START_CODE, + .max = V4L2_STATELESS_H264_START_CODE_NONE, + .def = V4L2_STATELESS_H264_START_CODE_NONE, + }, + .codec = CEDRUS_CODEC_H264, + }, + /* + * We only expose supported profiles information, + * and not levels as it's not clear what is supported + * for each hardware/core version. + * In any case, TRY/S_FMT will clamp the format resolution + * to the maximum supported. + */ + { + .cfg = { + .id = V4L2_CID_MPEG_VIDEO_H264_PROFILE, + .min = V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE, + .def = V4L2_MPEG_VIDEO_H264_PROFILE_MAIN, + .max = V4L2_MPEG_VIDEO_H264_PROFILE_HIGH, + .menu_skip_mask = + BIT(V4L2_MPEG_VIDEO_H264_PROFILE_EXTENDED), + }, + .codec = CEDRUS_CODEC_H264, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_HEVC_SPS, + .ops = &cedrus_ctrl_ops, + }, + .codec = CEDRUS_CODEC_H265, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_HEVC_PPS, + }, + .codec = CEDRUS_CODEC_H265, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, + /* The driver can only handle 1 entry per slice for now */ + .dims = { 1 }, + }, + .codec = CEDRUS_CODEC_H265, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX, + }, + .codec = CEDRUS_CODEC_H265, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS, + /* maximum 256 entry point offsets per slice */ + .dims = { 256 }, + .max = 0xffffffff, + .step = 1, + }, + .codec = CEDRUS_CODEC_H265, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, + .max = V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED, + .def = V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED, + }, + .codec = CEDRUS_CODEC_H265, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_HEVC_START_CODE, + .max = V4L2_STATELESS_HEVC_START_CODE_NONE, + .def = V4L2_STATELESS_HEVC_START_CODE_NONE, + }, + .codec = CEDRUS_CODEC_H265, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_VP8_FRAME, + }, + .codec = CEDRUS_CODEC_VP8, + }, + { + .cfg = { + .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS, + }, + .codec = CEDRUS_CODEC_H265, + }, +}; + +#define CEDRUS_CONTROLS_COUNT ARRAY_SIZE(cedrus_controls) + +void *cedrus_find_control_data(struct cedrus_ctx *ctx, u32 id) +{ + unsigned int i; + + for (i = 0; ctx->ctrls[i]; i++) + if (ctx->ctrls[i]->id == id) + return ctx->ctrls[i]->p_cur.p; + + return NULL; +} + +u32 cedrus_get_num_of_controls(struct cedrus_ctx *ctx, u32 id) +{ + unsigned int i; + + for (i = 0; ctx->ctrls[i]; i++) + if (ctx->ctrls[i]->id == id) + return ctx->ctrls[i]->elems; + + return 0; +} + +static int cedrus_init_ctrls(struct cedrus_dev *dev, struct cedrus_ctx *ctx) +{ + struct v4l2_ctrl_handler *hdl = &ctx->hdl; + struct v4l2_ctrl *ctrl; + unsigned int ctrl_size; + unsigned int i; + + v4l2_ctrl_handler_init(hdl, CEDRUS_CONTROLS_COUNT); + if (hdl->error) { + v4l2_err(&dev->v4l2_dev, + "Failed to initialize control handler: %d\n", + hdl->error); + return hdl->error; + } + + ctrl_size = sizeof(ctrl) * CEDRUS_CONTROLS_COUNT + 1; + + ctx->ctrls = kzalloc(ctrl_size, GFP_KERNEL); + if (!ctx->ctrls) + return -ENOMEM; + + for (i = 0; i < CEDRUS_CONTROLS_COUNT; i++) { + ctrl = v4l2_ctrl_new_custom(hdl, &cedrus_controls[i].cfg, + NULL); + if (hdl->error) { + v4l2_err(&dev->v4l2_dev, + "Failed to create %s control: %d\n", + v4l2_ctrl_get_name(cedrus_controls[i].cfg.id), + hdl->error); + + v4l2_ctrl_handler_free(hdl); + kfree(ctx->ctrls); + ctx->ctrls = NULL; + return hdl->error; + } + + ctx->ctrls[i] = ctrl; + } + + ctx->fh.ctrl_handler = hdl; + v4l2_ctrl_handler_setup(hdl); + + return 0; +} + +static int cedrus_request_validate(struct media_request *req) +{ + struct media_request_object *obj; + struct cedrus_ctx *ctx = NULL; + unsigned int count; + + list_for_each_entry(obj, &req->objects, list) { + struct vb2_buffer *vb; + + if (vb2_request_object_is_buffer(obj)) { + vb = container_of(obj, struct vb2_buffer, req_obj); + ctx = vb2_get_drv_priv(vb->vb2_queue); + + break; + } + } + + if (!ctx) + return -ENOENT; + + count = vb2_request_buffer_cnt(req); + if (!count) { + v4l2_info(&ctx->dev->v4l2_dev, + "No buffer was provided with the request\n"); + return -ENOENT; + } else if (count > 1) { + v4l2_info(&ctx->dev->v4l2_dev, + "More than one buffer was provided with the request\n"); + return -EINVAL; + } + + return vb2_request_validate(req); +} + +static int cedrus_open(struct file *file) +{ + struct cedrus_dev *dev = video_drvdata(file); + struct cedrus_ctx *ctx = NULL; + int ret; + + if (mutex_lock_interruptible(&dev->dev_mutex)) + return -ERESTARTSYS; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) { + mutex_unlock(&dev->dev_mutex); + return -ENOMEM; + } + + v4l2_fh_init(&ctx->fh, video_devdata(file)); + file->private_data = &ctx->fh; + ctx->dev = dev; + + ret = cedrus_init_ctrls(dev, ctx); + if (ret) + goto err_free; + + ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(dev->m2m_dev, ctx, + &cedrus_queue_init); + if (IS_ERR(ctx->fh.m2m_ctx)) { + ret = PTR_ERR(ctx->fh.m2m_ctx); + goto err_ctrls; + } + ctx->dst_fmt.pixelformat = V4L2_PIX_FMT_NV12_32L32; + cedrus_prepare_format(&ctx->dst_fmt); + ctx->src_fmt.pixelformat = V4L2_PIX_FMT_MPEG2_SLICE; + /* + * TILED_NV12 has more strict requirements, so copy the width and + * height to src_fmt to ensure that is matches the dst_fmt resolution. + */ + ctx->src_fmt.width = ctx->dst_fmt.width; + ctx->src_fmt.height = ctx->dst_fmt.height; + cedrus_prepare_format(&ctx->src_fmt); + + v4l2_fh_add(&ctx->fh); + + mutex_unlock(&dev->dev_mutex); + + return 0; + +err_ctrls: + v4l2_ctrl_handler_free(&ctx->hdl); +err_free: + kfree(ctx); + mutex_unlock(&dev->dev_mutex); + + return ret; +} + +static int cedrus_release(struct file *file) +{ + struct cedrus_dev *dev = video_drvdata(file); + struct cedrus_ctx *ctx = container_of(file->private_data, + struct cedrus_ctx, fh); + + mutex_lock(&dev->dev_mutex); + + v4l2_fh_del(&ctx->fh); + v4l2_m2m_ctx_release(ctx->fh.m2m_ctx); + + v4l2_ctrl_handler_free(&ctx->hdl); + kfree(ctx->ctrls); + + v4l2_fh_exit(&ctx->fh); + + kfree(ctx); + + mutex_unlock(&dev->dev_mutex); + + return 0; +} + +static const struct v4l2_file_operations cedrus_fops = { + .owner = THIS_MODULE, + .open = cedrus_open, + .release = cedrus_release, + .poll = v4l2_m2m_fop_poll, + .unlocked_ioctl = video_ioctl2, + .mmap = v4l2_m2m_fop_mmap, +}; + +static const struct video_device cedrus_video_device = { + .name = CEDRUS_NAME, + .vfl_dir = VFL_DIR_M2M, + .fops = &cedrus_fops, + .ioctl_ops = &cedrus_ioctl_ops, + .minor = -1, + .release = video_device_release_empty, + .device_caps = V4L2_CAP_VIDEO_M2M | V4L2_CAP_STREAMING, +}; + +static const struct v4l2_m2m_ops cedrus_m2m_ops = { + .device_run = cedrus_device_run, +}; + +static const struct media_device_ops cedrus_m2m_media_ops = { + .req_validate = cedrus_request_validate, + .req_queue = v4l2_m2m_request_queue, +}; + +static int cedrus_probe(struct platform_device *pdev) +{ + struct cedrus_dev *dev; + struct video_device *vfd; + int ret; + + dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + platform_set_drvdata(pdev, dev); + + dev->vfd = cedrus_video_device; + dev->dev = &pdev->dev; + dev->pdev = pdev; + + ret = cedrus_hw_probe(dev); + if (ret) { + dev_err(&pdev->dev, "Failed to probe hardware\n"); + return ret; + } + + dev->dec_ops[CEDRUS_CODEC_MPEG2] = &cedrus_dec_ops_mpeg2; + dev->dec_ops[CEDRUS_CODEC_H264] = &cedrus_dec_ops_h264; + dev->dec_ops[CEDRUS_CODEC_H265] = &cedrus_dec_ops_h265; + dev->dec_ops[CEDRUS_CODEC_VP8] = &cedrus_dec_ops_vp8; + + mutex_init(&dev->dev_mutex); + + INIT_DELAYED_WORK(&dev->watchdog_work, cedrus_watchdog); + + ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev); + if (ret) { + dev_err(&pdev->dev, "Failed to register V4L2 device\n"); + return ret; + } + + vfd = &dev->vfd; + vfd->lock = &dev->dev_mutex; + vfd->v4l2_dev = &dev->v4l2_dev; + + snprintf(vfd->name, sizeof(vfd->name), "%s", cedrus_video_device.name); + video_set_drvdata(vfd, dev); + + dev->m2m_dev = v4l2_m2m_init(&cedrus_m2m_ops); + if (IS_ERR(dev->m2m_dev)) { + v4l2_err(&dev->v4l2_dev, + "Failed to initialize V4L2 M2M device\n"); + ret = PTR_ERR(dev->m2m_dev); + + goto err_v4l2; + } + + dev->mdev.dev = &pdev->dev; + strscpy(dev->mdev.model, CEDRUS_NAME, sizeof(dev->mdev.model)); + strscpy(dev->mdev.bus_info, "platform:" CEDRUS_NAME, + sizeof(dev->mdev.bus_info)); + + media_device_init(&dev->mdev); + dev->mdev.ops = &cedrus_m2m_media_ops; + dev->v4l2_dev.mdev = &dev->mdev; + + ret = video_register_device(vfd, VFL_TYPE_VIDEO, 0); + if (ret) { + v4l2_err(&dev->v4l2_dev, "Failed to register video device\n"); + goto err_m2m; + } + + v4l2_info(&dev->v4l2_dev, + "Device registered as /dev/video%d\n", vfd->num); + + ret = v4l2_m2m_register_media_controller(dev->m2m_dev, vfd, + MEDIA_ENT_F_PROC_VIDEO_DECODER); + if (ret) { + v4l2_err(&dev->v4l2_dev, + "Failed to initialize V4L2 M2M media controller\n"); + goto err_video; + } + + ret = media_device_register(&dev->mdev); + if (ret) { + v4l2_err(&dev->v4l2_dev, "Failed to register media device\n"); + goto err_m2m_mc; + } + + return 0; + +err_m2m_mc: + v4l2_m2m_unregister_media_controller(dev->m2m_dev); +err_video: + video_unregister_device(&dev->vfd); +err_m2m: + v4l2_m2m_release(dev->m2m_dev); +err_v4l2: + v4l2_device_unregister(&dev->v4l2_dev); + + return ret; +} + +static int cedrus_remove(struct platform_device *pdev) +{ + struct cedrus_dev *dev = platform_get_drvdata(pdev); + + cancel_delayed_work_sync(&dev->watchdog_work); + if (media_devnode_is_registered(dev->mdev.devnode)) { + media_device_unregister(&dev->mdev); + v4l2_m2m_unregister_media_controller(dev->m2m_dev); + media_device_cleanup(&dev->mdev); + } + + v4l2_m2m_release(dev->m2m_dev); + video_unregister_device(&dev->vfd); + v4l2_device_unregister(&dev->v4l2_dev); + + cedrus_hw_remove(dev); + + return 0; +} + +static const struct cedrus_variant sun4i_a10_cedrus_variant = { + .capabilities = CEDRUS_CAPABILITY_MPEG2_DEC | + CEDRUS_CAPABILITY_H264_DEC | + CEDRUS_CAPABILITY_VP8_DEC, + .mod_rate = 320000000, +}; + +static const struct cedrus_variant sun5i_a13_cedrus_variant = { + .capabilities = CEDRUS_CAPABILITY_MPEG2_DEC | + CEDRUS_CAPABILITY_H264_DEC | + CEDRUS_CAPABILITY_VP8_DEC, + .mod_rate = 320000000, +}; + +static const struct cedrus_variant sun7i_a20_cedrus_variant = { + .capabilities = CEDRUS_CAPABILITY_MPEG2_DEC | + CEDRUS_CAPABILITY_H264_DEC | + CEDRUS_CAPABILITY_VP8_DEC, + .mod_rate = 320000000, +}; + +static const struct cedrus_variant sun8i_a33_cedrus_variant = { + .capabilities = CEDRUS_CAPABILITY_UNTILED | + CEDRUS_CAPABILITY_MPEG2_DEC | + CEDRUS_CAPABILITY_H264_DEC | + CEDRUS_CAPABILITY_VP8_DEC, + .mod_rate = 320000000, +}; + +static const struct cedrus_variant sun8i_h3_cedrus_variant = { + .capabilities = CEDRUS_CAPABILITY_UNTILED | + CEDRUS_CAPABILITY_MPEG2_DEC | + CEDRUS_CAPABILITY_H264_DEC | + CEDRUS_CAPABILITY_H265_DEC | + CEDRUS_CAPABILITY_VP8_DEC, + .mod_rate = 402000000, +}; + +static const struct cedrus_variant sun8i_v3s_cedrus_variant = { + .capabilities = CEDRUS_CAPABILITY_UNTILED | + CEDRUS_CAPABILITY_H264_DEC, + .mod_rate = 297000000, +}; + +static const struct cedrus_variant sun8i_r40_cedrus_variant = { + .capabilities = CEDRUS_CAPABILITY_UNTILED | + CEDRUS_CAPABILITY_MPEG2_DEC | + CEDRUS_CAPABILITY_H264_DEC | + CEDRUS_CAPABILITY_VP8_DEC, + .mod_rate = 297000000, +}; + +static const struct cedrus_variant sun20i_d1_cedrus_variant = { + .capabilities = CEDRUS_CAPABILITY_UNTILED | + CEDRUS_CAPABILITY_MPEG2_DEC | + CEDRUS_CAPABILITY_H264_DEC | + CEDRUS_CAPABILITY_H265_DEC, + .mod_rate = 432000000, +}; + +static const struct cedrus_variant sun50i_a64_cedrus_variant = { + .capabilities = CEDRUS_CAPABILITY_UNTILED | + CEDRUS_CAPABILITY_MPEG2_DEC | + CEDRUS_CAPABILITY_H264_DEC | + CEDRUS_CAPABILITY_H265_DEC | + CEDRUS_CAPABILITY_VP8_DEC, + .mod_rate = 402000000, +}; + +static const struct cedrus_variant sun50i_h5_cedrus_variant = { + .capabilities = CEDRUS_CAPABILITY_UNTILED | + CEDRUS_CAPABILITY_MPEG2_DEC | + CEDRUS_CAPABILITY_H264_DEC | + CEDRUS_CAPABILITY_H265_DEC | + CEDRUS_CAPABILITY_VP8_DEC, + .mod_rate = 402000000, +}; + +static const struct cedrus_variant sun50i_h6_cedrus_variant = { + .capabilities = CEDRUS_CAPABILITY_UNTILED | + CEDRUS_CAPABILITY_MPEG2_DEC | + CEDRUS_CAPABILITY_H264_DEC | + CEDRUS_CAPABILITY_H265_DEC | + CEDRUS_CAPABILITY_H265_10_DEC | + CEDRUS_CAPABILITY_VP8_DEC, + .mod_rate = 600000000, +}; + +static const struct of_device_id cedrus_dt_match[] = { + { + .compatible = "allwinner,sun4i-a10-video-engine", + .data = &sun4i_a10_cedrus_variant, + }, + { + .compatible = "allwinner,sun5i-a13-video-engine", + .data = &sun5i_a13_cedrus_variant, + }, + { + .compatible = "allwinner,sun7i-a20-video-engine", + .data = &sun7i_a20_cedrus_variant, + }, + { + .compatible = "allwinner,sun8i-a33-video-engine", + .data = &sun8i_a33_cedrus_variant, + }, + { + .compatible = "allwinner,sun8i-h3-video-engine", + .data = &sun8i_h3_cedrus_variant, + }, + { + .compatible = "allwinner,sun8i-v3s-video-engine", + .data = &sun8i_v3s_cedrus_variant, + }, + { + .compatible = "allwinner,sun8i-r40-video-engine", + .data = &sun8i_r40_cedrus_variant, + }, + { + .compatible = "allwinner,sun20i-d1-video-engine", + .data = &sun20i_d1_cedrus_variant, + }, + { + .compatible = "allwinner,sun50i-a64-video-engine", + .data = &sun50i_a64_cedrus_variant, + }, + { + .compatible = "allwinner,sun50i-h5-video-engine", + .data = &sun50i_h5_cedrus_variant, + }, + { + .compatible = "allwinner,sun50i-h6-video-engine", + .data = &sun50i_h6_cedrus_variant, + }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, cedrus_dt_match); + +static const struct dev_pm_ops cedrus_dev_pm_ops = { + SET_RUNTIME_PM_OPS(cedrus_hw_suspend, + cedrus_hw_resume, NULL) +}; + +static struct platform_driver cedrus_driver = { + .probe = cedrus_probe, + .remove = cedrus_remove, + .driver = { + .name = CEDRUS_NAME, + .of_match_table = of_match_ptr(cedrus_dt_match), + .pm = &cedrus_dev_pm_ops, + }, +}; +module_platform_driver(cedrus_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Florent Revest <florent.revest@free-electrons.com>"); +MODULE_AUTHOR("Paul Kocialkowski <paul.kocialkowski@bootlin.com>"); +MODULE_AUTHOR("Maxime Ripard <maxime.ripard@bootlin.com>"); +MODULE_DESCRIPTION("Cedrus VPU driver"); diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h b/drivers/staging/media/sunxi/cedrus/cedrus.h new file mode 100644 index 000000000..93a219600 --- /dev/null +++ b/drivers/staging/media/sunxi/cedrus/cedrus.h @@ -0,0 +1,274 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Cedrus VPU driver + * + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com> + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com> + * Copyright (C) 2018 Bootlin + * + * Based on the vim2m driver, that is: + * + * Copyright (c) 2009-2010 Samsung Electronics Co., Ltd. + * Pawel Osciak, <pawel@osciak.com> + * Marek Szyprowski, <m.szyprowski@samsung.com> + */ + +#ifndef _CEDRUS_H_ +#define _CEDRUS_H_ + +#include <media/v4l2-ctrls.h> +#include <media/v4l2-device.h> +#include <media/v4l2-mem2mem.h> +#include <media/videobuf2-v4l2.h> +#include <media/videobuf2-dma-contig.h> + +#include <linux/iopoll.h> +#include <linux/platform_device.h> +#include <linux/workqueue.h> + +#define CEDRUS_NAME "cedrus" + +#define CEDRUS_CAPABILITY_UNTILED BIT(0) +#define CEDRUS_CAPABILITY_H265_DEC BIT(1) +#define CEDRUS_CAPABILITY_H264_DEC BIT(2) +#define CEDRUS_CAPABILITY_MPEG2_DEC BIT(3) +#define CEDRUS_CAPABILITY_VP8_DEC BIT(4) +#define CEDRUS_CAPABILITY_H265_10_DEC BIT(5) + +enum cedrus_codec { + CEDRUS_CODEC_MPEG2, + CEDRUS_CODEC_H264, + CEDRUS_CODEC_H265, + CEDRUS_CODEC_VP8, + CEDRUS_CODEC_LAST, +}; + +enum cedrus_irq_status { + CEDRUS_IRQ_NONE, + CEDRUS_IRQ_ERROR, + CEDRUS_IRQ_OK, +}; + +enum cedrus_h264_pic_type { + CEDRUS_H264_PIC_TYPE_FRAME = 0, + CEDRUS_H264_PIC_TYPE_FIELD, + CEDRUS_H264_PIC_TYPE_MBAFF, +}; + +struct cedrus_control { + struct v4l2_ctrl_config cfg; + enum cedrus_codec codec; +}; + +struct cedrus_h264_run { + const struct v4l2_ctrl_h264_decode_params *decode_params; + const struct v4l2_ctrl_h264_pps *pps; + const struct v4l2_ctrl_h264_scaling_matrix *scaling_matrix; + const struct v4l2_ctrl_h264_slice_params *slice_params; + const struct v4l2_ctrl_h264_sps *sps; + const struct v4l2_ctrl_h264_pred_weights *pred_weights; +}; + +struct cedrus_mpeg2_run { + const struct v4l2_ctrl_mpeg2_sequence *sequence; + const struct v4l2_ctrl_mpeg2_picture *picture; + const struct v4l2_ctrl_mpeg2_quantisation *quantisation; +}; + +struct cedrus_h265_run { + const struct v4l2_ctrl_hevc_sps *sps; + const struct v4l2_ctrl_hevc_pps *pps; + const struct v4l2_ctrl_hevc_slice_params *slice_params; + const struct v4l2_ctrl_hevc_decode_params *decode_params; + const struct v4l2_ctrl_hevc_scaling_matrix *scaling_matrix; + const u32 *entry_points; + u32 entry_points_count; +}; + +struct cedrus_vp8_run { + const struct v4l2_ctrl_vp8_frame *frame_params; +}; + +struct cedrus_run { + struct vb2_v4l2_buffer *src; + struct vb2_v4l2_buffer *dst; + + union { + struct cedrus_h264_run h264; + struct cedrus_mpeg2_run mpeg2; + struct cedrus_h265_run h265; + struct cedrus_vp8_run vp8; + }; +}; + +struct cedrus_buffer { + struct v4l2_m2m_buffer m2m_buf; + + union { + struct { + unsigned int position; + enum cedrus_h264_pic_type pic_type; + } h264; + } codec; +}; + +struct cedrus_ctx { + struct v4l2_fh fh; + struct cedrus_dev *dev; + + struct v4l2_pix_format src_fmt; + struct v4l2_pix_format dst_fmt; + enum cedrus_codec current_codec; + + struct v4l2_ctrl_handler hdl; + struct v4l2_ctrl **ctrls; + + union { + struct { + void *mv_col_buf; + dma_addr_t mv_col_buf_dma; + ssize_t mv_col_buf_field_size; + ssize_t mv_col_buf_size; + void *pic_info_buf; + dma_addr_t pic_info_buf_dma; + ssize_t pic_info_buf_size; + void *neighbor_info_buf; + dma_addr_t neighbor_info_buf_dma; + void *deblk_buf; + dma_addr_t deblk_buf_dma; + ssize_t deblk_buf_size; + void *intra_pred_buf; + dma_addr_t intra_pred_buf_dma; + ssize_t intra_pred_buf_size; + } h264; + struct { + void *mv_col_buf; + dma_addr_t mv_col_buf_addr; + ssize_t mv_col_buf_size; + ssize_t mv_col_buf_unit_size; + void *neighbor_info_buf; + dma_addr_t neighbor_info_buf_addr; + void *entry_points_buf; + dma_addr_t entry_points_buf_addr; + } h265; + struct { + unsigned int last_frame_p_type; + unsigned int last_filter_type; + unsigned int last_sharpness_level; + + u8 *entropy_probs_buf; + dma_addr_t entropy_probs_buf_dma; + } vp8; + } codec; +}; + +struct cedrus_dec_ops { + void (*irq_clear)(struct cedrus_ctx *ctx); + void (*irq_disable)(struct cedrus_ctx *ctx); + enum cedrus_irq_status (*irq_status)(struct cedrus_ctx *ctx); + int (*setup)(struct cedrus_ctx *ctx, struct cedrus_run *run); + int (*start)(struct cedrus_ctx *ctx); + void (*stop)(struct cedrus_ctx *ctx); + void (*trigger)(struct cedrus_ctx *ctx); +}; + +struct cedrus_variant { + unsigned int capabilities; + unsigned int mod_rate; +}; + +struct cedrus_dev { + struct v4l2_device v4l2_dev; + struct video_device vfd; + struct media_device mdev; + struct media_pad pad[2]; + struct platform_device *pdev; + struct device *dev; + struct v4l2_m2m_dev *m2m_dev; + struct cedrus_dec_ops *dec_ops[CEDRUS_CODEC_LAST]; + + /* Device file mutex */ + struct mutex dev_mutex; + + void __iomem *base; + + struct clk *mod_clk; + struct clk *ahb_clk; + struct clk *ram_clk; + + struct reset_control *rstc; + + unsigned int capabilities; + + struct delayed_work watchdog_work; +}; + +extern struct cedrus_dec_ops cedrus_dec_ops_mpeg2; +extern struct cedrus_dec_ops cedrus_dec_ops_h264; +extern struct cedrus_dec_ops cedrus_dec_ops_h265; +extern struct cedrus_dec_ops cedrus_dec_ops_vp8; + +static inline void cedrus_write(struct cedrus_dev *dev, u32 reg, u32 val) +{ + writel(val, dev->base + reg); +} + +static inline u32 cedrus_read(struct cedrus_dev *dev, u32 reg) +{ + return readl(dev->base + reg); +} + +static inline u32 cedrus_wait_for(struct cedrus_dev *dev, u32 reg, u32 flag) +{ + u32 value; + + return readl_poll_timeout_atomic(dev->base + reg, value, + (value & flag) == 0, 10, 1000); +} + +static inline dma_addr_t cedrus_buf_addr(struct vb2_buffer *buf, + struct v4l2_pix_format *pix_fmt, + unsigned int plane) +{ + dma_addr_t addr = vb2_dma_contig_plane_dma_addr(buf, 0); + + return addr + (pix_fmt ? (dma_addr_t)pix_fmt->bytesperline * + pix_fmt->height * plane : 0); +} + +static inline dma_addr_t cedrus_dst_buf_addr(struct cedrus_ctx *ctx, + struct vb2_buffer *buf, + unsigned int plane) +{ + return buf ? cedrus_buf_addr(buf, &ctx->dst_fmt, plane) : 0; +} + +static inline void cedrus_write_ref_buf_addr(struct cedrus_ctx *ctx, + struct vb2_queue *q, + u64 timestamp, + u32 luma_reg, + u32 chroma_reg) +{ + struct cedrus_dev *dev = ctx->dev; + struct vb2_buffer *buf = vb2_find_buffer(q, timestamp); + + cedrus_write(dev, luma_reg, cedrus_dst_buf_addr(ctx, buf, 0)); + cedrus_write(dev, chroma_reg, cedrus_dst_buf_addr(ctx, buf, 1)); +} + +static inline struct cedrus_buffer * +vb2_v4l2_to_cedrus_buffer(const struct vb2_v4l2_buffer *p) +{ + return container_of(p, struct cedrus_buffer, m2m_buf.vb); +} + +static inline struct cedrus_buffer * +vb2_to_cedrus_buffer(const struct vb2_buffer *p) +{ + return vb2_v4l2_to_cedrus_buffer(to_vb2_v4l2_buffer(p)); +} + +void *cedrus_find_control_data(struct cedrus_ctx *ctx, u32 id); +u32 cedrus_get_num_of_controls(struct cedrus_ctx *ctx, u32 id); + +#endif diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c new file mode 100644 index 000000000..e7f7602a5 --- /dev/null +++ b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Cedrus VPU driver + * + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com> + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com> + * Copyright (C) 2018 Bootlin + * + * Based on the vim2m driver, that is: + * + * Copyright (c) 2009-2010 Samsung Electronics Co., Ltd. + * Pawel Osciak, <pawel@osciak.com> + * Marek Szyprowski, <m.szyprowski@samsung.com> + */ + +#include <media/v4l2-device.h> +#include <media/v4l2-ioctl.h> +#include <media/v4l2-event.h> +#include <media/v4l2-mem2mem.h> + +#include "cedrus.h" +#include "cedrus_dec.h" +#include "cedrus_hw.h" + +void cedrus_device_run(void *priv) +{ + struct cedrus_ctx *ctx = priv; + struct cedrus_dev *dev = ctx->dev; + struct cedrus_run run = {}; + struct media_request *src_req; + int error; + + run.src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); + run.dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); + + /* Apply request(s) controls if needed. */ + src_req = run.src->vb2_buf.req_obj.req; + + if (src_req) + v4l2_ctrl_request_setup(src_req, &ctx->hdl); + + switch (ctx->src_fmt.pixelformat) { + case V4L2_PIX_FMT_MPEG2_SLICE: + run.mpeg2.sequence = cedrus_find_control_data(ctx, + V4L2_CID_STATELESS_MPEG2_SEQUENCE); + run.mpeg2.picture = cedrus_find_control_data(ctx, + V4L2_CID_STATELESS_MPEG2_PICTURE); + run.mpeg2.quantisation = cedrus_find_control_data(ctx, + V4L2_CID_STATELESS_MPEG2_QUANTISATION); + break; + + case V4L2_PIX_FMT_H264_SLICE: + run.h264.decode_params = cedrus_find_control_data(ctx, + V4L2_CID_STATELESS_H264_DECODE_PARAMS); + run.h264.pps = cedrus_find_control_data(ctx, + V4L2_CID_STATELESS_H264_PPS); + run.h264.scaling_matrix = cedrus_find_control_data(ctx, + V4L2_CID_STATELESS_H264_SCALING_MATRIX); + run.h264.slice_params = cedrus_find_control_data(ctx, + V4L2_CID_STATELESS_H264_SLICE_PARAMS); + run.h264.sps = cedrus_find_control_data(ctx, + V4L2_CID_STATELESS_H264_SPS); + run.h264.pred_weights = cedrus_find_control_data(ctx, + V4L2_CID_STATELESS_H264_PRED_WEIGHTS); + break; + + case V4L2_PIX_FMT_HEVC_SLICE: + run.h265.sps = cedrus_find_control_data(ctx, + V4L2_CID_STATELESS_HEVC_SPS); + run.h265.pps = cedrus_find_control_data(ctx, + V4L2_CID_STATELESS_HEVC_PPS); + run.h265.slice_params = cedrus_find_control_data(ctx, + V4L2_CID_STATELESS_HEVC_SLICE_PARAMS); + run.h265.decode_params = cedrus_find_control_data(ctx, + V4L2_CID_STATELESS_HEVC_DECODE_PARAMS); + run.h265.scaling_matrix = cedrus_find_control_data(ctx, + V4L2_CID_STATELESS_HEVC_SCALING_MATRIX); + run.h265.entry_points = cedrus_find_control_data(ctx, + V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS); + run.h265.entry_points_count = cedrus_get_num_of_controls(ctx, + V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS); + break; + + case V4L2_PIX_FMT_VP8_FRAME: + run.vp8.frame_params = cedrus_find_control_data(ctx, + V4L2_CID_STATELESS_VP8_FRAME); + break; + + default: + break; + } + + v4l2_m2m_buf_copy_metadata(run.src, run.dst, true); + + cedrus_dst_format_set(dev, &ctx->dst_fmt); + + error = dev->dec_ops[ctx->current_codec]->setup(ctx, &run); + if (error) + v4l2_err(&ctx->dev->v4l2_dev, + "Failed to setup decoding job: %d\n", error); + + /* Complete request(s) controls if needed. */ + + if (src_req) + v4l2_ctrl_request_complete(src_req, &ctx->hdl); + + /* Trigger decoding if setup went well, bail out otherwise. */ + if (!error) { + /* Start the watchdog timer. */ + schedule_delayed_work(&dev->watchdog_work, + msecs_to_jiffies(2000)); + + dev->dec_ops[ctx->current_codec]->trigger(ctx); + } else { + v4l2_m2m_buf_done_and_job_finish(ctx->dev->m2m_dev, + ctx->fh.m2m_ctx, + VB2_BUF_STATE_ERROR); + } +} diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_dec.h b/drivers/staging/media/sunxi/cedrus/cedrus_dec.h new file mode 100644 index 000000000..d1ae79036 --- /dev/null +++ b/drivers/staging/media/sunxi/cedrus/cedrus_dec.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Cedrus VPU driver + * + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com> + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com> + * Copyright (C) 2018 Bootlin + * + * Based on the vim2m driver, that is: + * + * Copyright (c) 2009-2010 Samsung Electronics Co., Ltd. + * Pawel Osciak, <pawel@osciak.com> + * Marek Szyprowski, <m.szyprowski@samsung.com> + */ + +#ifndef _CEDRUS_DEC_H_ +#define _CEDRUS_DEC_H_ + +void cedrus_device_run(void *priv); + +#endif diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c new file mode 100644 index 000000000..a8b236cd3 --- /dev/null +++ b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c @@ -0,0 +1,708 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Cedrus VPU driver + * + * Copyright (c) 2013 Jens Kuske <jenskuske@gmail.com> + * Copyright (c) 2018 Bootlin + */ + +#include <linux/delay.h> +#include <linux/types.h> + +#include <media/videobuf2-dma-contig.h> + +#include "cedrus.h" +#include "cedrus_hw.h" +#include "cedrus_regs.h" + +enum cedrus_h264_sram_off { + CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE = 0x000, + CEDRUS_SRAM_H264_FRAMEBUFFER_LIST = 0x100, + CEDRUS_SRAM_H264_REF_LIST_0 = 0x190, + CEDRUS_SRAM_H264_REF_LIST_1 = 0x199, + CEDRUS_SRAM_H264_SCALING_LIST_8x8_0 = 0x200, + CEDRUS_SRAM_H264_SCALING_LIST_8x8_1 = 0x210, + CEDRUS_SRAM_H264_SCALING_LIST_4x4 = 0x220, +}; + +struct cedrus_h264_sram_ref_pic { + __le32 top_field_order_cnt; + __le32 bottom_field_order_cnt; + __le32 frame_info; + __le32 luma_ptr; + __le32 chroma_ptr; + __le32 mv_col_top_ptr; + __le32 mv_col_bot_ptr; + __le32 reserved; +} __packed; + +#define CEDRUS_H264_FRAME_NUM 18 + +#define CEDRUS_NEIGHBOR_INFO_BUF_SIZE (32 * SZ_1K) +#define CEDRUS_MIN_PIC_INFO_BUF_SIZE (130 * SZ_1K) + +static void cedrus_h264_write_sram(struct cedrus_dev *dev, + enum cedrus_h264_sram_off off, + const void *data, size_t len) +{ + const u32 *buffer = data; + size_t count = DIV_ROUND_UP(len, 4); + + cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET, off << 2); + + while (count--) + cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, *buffer++); +} + +static dma_addr_t cedrus_h264_mv_col_buf_addr(struct cedrus_ctx *ctx, + unsigned int position, + unsigned int field) +{ + dma_addr_t addr = ctx->codec.h264.mv_col_buf_dma; + + /* Adjust for the position */ + addr += position * ctx->codec.h264.mv_col_buf_field_size * 2; + + /* Adjust for the field */ + addr += field * ctx->codec.h264.mv_col_buf_field_size; + + return addr; +} + +static void cedrus_fill_ref_pic(struct cedrus_ctx *ctx, + struct cedrus_buffer *buf, + unsigned int top_field_order_cnt, + unsigned int bottom_field_order_cnt, + struct cedrus_h264_sram_ref_pic *pic) +{ + struct vb2_buffer *vbuf = &buf->m2m_buf.vb.vb2_buf; + unsigned int position = buf->codec.h264.position; + + pic->top_field_order_cnt = cpu_to_le32(top_field_order_cnt); + pic->bottom_field_order_cnt = cpu_to_le32(bottom_field_order_cnt); + pic->frame_info = cpu_to_le32(buf->codec.h264.pic_type << 8); + + pic->luma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 0)); + pic->chroma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 1)); + pic->mv_col_top_ptr = + cpu_to_le32(cedrus_h264_mv_col_buf_addr(ctx, position, 0)); + pic->mv_col_bot_ptr = + cpu_to_le32(cedrus_h264_mv_col_buf_addr(ctx, position, 1)); +} + +static void cedrus_write_frame_list(struct cedrus_ctx *ctx, + struct cedrus_run *run) +{ + struct cedrus_h264_sram_ref_pic pic_list[CEDRUS_H264_FRAME_NUM]; + const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params; + const struct v4l2_ctrl_h264_sps *sps = run->h264.sps; + struct vb2_queue *cap_q; + struct cedrus_buffer *output_buf; + struct cedrus_dev *dev = ctx->dev; + unsigned long used_dpbs = 0; + unsigned int position; + int output = -1; + unsigned int i; + + cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); + + memset(pic_list, 0, sizeof(pic_list)); + + for (i = 0; i < ARRAY_SIZE(decode->dpb); i++) { + const struct v4l2_h264_dpb_entry *dpb = &decode->dpb[i]; + struct cedrus_buffer *cedrus_buf; + struct vb2_buffer *buf; + + if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_VALID)) + continue; + + buf = vb2_find_buffer(cap_q, dpb->reference_ts); + if (!buf) + continue; + + cedrus_buf = vb2_to_cedrus_buffer(buf); + position = cedrus_buf->codec.h264.position; + used_dpbs |= BIT(position); + + if (run->dst->vb2_buf.timestamp == dpb->reference_ts) { + output = position; + continue; + } + + if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) + continue; + + cedrus_fill_ref_pic(ctx, cedrus_buf, + dpb->top_field_order_cnt, + dpb->bottom_field_order_cnt, + &pic_list[position]); + } + + if (output >= 0) + position = output; + else + position = find_first_zero_bit(&used_dpbs, CEDRUS_H264_FRAME_NUM); + + output_buf = vb2_to_cedrus_buffer(&run->dst->vb2_buf); + output_buf->codec.h264.position = position; + + if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) + output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FIELD; + else if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD) + output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_MBAFF; + else + output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FRAME; + + cedrus_fill_ref_pic(ctx, output_buf, + decode->top_field_order_cnt, + decode->bottom_field_order_cnt, + &pic_list[position]); + + cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_FRAMEBUFFER_LIST, + pic_list, sizeof(pic_list)); + + cedrus_write(dev, VE_H264_OUTPUT_FRAME_IDX, position); +} + +#define CEDRUS_MAX_REF_IDX 32 + +static void _cedrus_write_ref_list(struct cedrus_ctx *ctx, + struct cedrus_run *run, + const struct v4l2_h264_reference *ref_list, + u8 num_ref, enum cedrus_h264_sram_off sram) +{ + const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params; + struct vb2_queue *cap_q; + struct cedrus_dev *dev = ctx->dev; + u8 sram_array[CEDRUS_MAX_REF_IDX]; + unsigned int i; + size_t size; + + cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); + + memset(sram_array, 0, sizeof(sram_array)); + + for (i = 0; i < num_ref; i++) { + const struct v4l2_h264_dpb_entry *dpb; + const struct cedrus_buffer *cedrus_buf; + unsigned int position; + struct vb2_buffer *buf; + u8 dpb_idx; + + dpb_idx = ref_list[i].index; + dpb = &decode->dpb[dpb_idx]; + + if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) + continue; + + buf = vb2_find_buffer(cap_q, dpb->reference_ts); + if (!buf) + continue; + + cedrus_buf = vb2_to_cedrus_buffer(buf); + position = cedrus_buf->codec.h264.position; + + sram_array[i] |= position << 1; + if (ref_list[i].fields == V4L2_H264_BOTTOM_FIELD_REF) + sram_array[i] |= BIT(0); + } + + size = min_t(size_t, ALIGN(num_ref, 4), sizeof(sram_array)); + cedrus_h264_write_sram(dev, sram, &sram_array, size); +} + +static void cedrus_write_ref_list0(struct cedrus_ctx *ctx, + struct cedrus_run *run) +{ + const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params; + + _cedrus_write_ref_list(ctx, run, + slice->ref_pic_list0, + slice->num_ref_idx_l0_active_minus1 + 1, + CEDRUS_SRAM_H264_REF_LIST_0); +} + +static void cedrus_write_ref_list1(struct cedrus_ctx *ctx, + struct cedrus_run *run) +{ + const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params; + + _cedrus_write_ref_list(ctx, run, + slice->ref_pic_list1, + slice->num_ref_idx_l1_active_minus1 + 1, + CEDRUS_SRAM_H264_REF_LIST_1); +} + +static void cedrus_write_scaling_lists(struct cedrus_ctx *ctx, + struct cedrus_run *run) +{ + const struct v4l2_ctrl_h264_scaling_matrix *scaling = + run->h264.scaling_matrix; + const struct v4l2_ctrl_h264_pps *pps = run->h264.pps; + struct cedrus_dev *dev = ctx->dev; + + if (!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT)) + return; + + cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_0, + scaling->scaling_list_8x8[0], + sizeof(scaling->scaling_list_8x8[0])); + + cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_1, + scaling->scaling_list_8x8[1], + sizeof(scaling->scaling_list_8x8[1])); + + cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_4x4, + scaling->scaling_list_4x4, + sizeof(scaling->scaling_list_4x4)); +} + +static void cedrus_write_pred_weight_table(struct cedrus_ctx *ctx, + struct cedrus_run *run) +{ + const struct v4l2_ctrl_h264_pred_weights *pred_weight = + run->h264.pred_weights; + struct cedrus_dev *dev = ctx->dev; + int i, j, k; + + cedrus_write(dev, VE_H264_SHS_WP, + ((pred_weight->chroma_log2_weight_denom & 0x7) << 4) | + ((pred_weight->luma_log2_weight_denom & 0x7) << 0)); + + cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET, + CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE << 2); + + for (i = 0; i < ARRAY_SIZE(pred_weight->weight_factors); i++) { + const struct v4l2_h264_weight_factors *factors = + &pred_weight->weight_factors[i]; + + for (j = 0; j < ARRAY_SIZE(factors->luma_weight); j++) { + u32 val; + + val = (((u32)factors->luma_offset[j] & 0x1ff) << 16) | + (factors->luma_weight[j] & 0x1ff); + cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, val); + } + + for (j = 0; j < ARRAY_SIZE(factors->chroma_weight); j++) { + for (k = 0; k < ARRAY_SIZE(factors->chroma_weight[0]); k++) { + u32 val; + + val = (((u32)factors->chroma_offset[j][k] & 0x1ff) << 16) | + (factors->chroma_weight[j][k] & 0x1ff); + cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, val); + } + } + } +} + +/* + * It turns out that using VE_H264_VLD_OFFSET to skip bits is not reliable. In + * rare cases frame is not decoded correctly. However, setting offset to 0 and + * skipping appropriate amount of bits with flush bits trigger always works. + */ +static void cedrus_skip_bits(struct cedrus_dev *dev, int num) +{ + int count = 0; + + while (count < num) { + int tmp = min(num - count, 32); + + cedrus_write(dev, VE_H264_TRIGGER_TYPE, + VE_H264_TRIGGER_TYPE_FLUSH_BITS | + VE_H264_TRIGGER_TYPE_N_BITS(tmp)); + while (cedrus_read(dev, VE_H264_STATUS) & VE_H264_STATUS_VLD_BUSY) + udelay(1); + + count += tmp; + } +} + +static void cedrus_set_params(struct cedrus_ctx *ctx, + struct cedrus_run *run) +{ + const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params; + const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params; + const struct v4l2_ctrl_h264_pps *pps = run->h264.pps; + const struct v4l2_ctrl_h264_sps *sps = run->h264.sps; + struct vb2_buffer *src_buf = &run->src->vb2_buf; + struct cedrus_dev *dev = ctx->dev; + dma_addr_t src_buf_addr; + size_t slice_bytes = vb2_get_plane_payload(src_buf, 0); + unsigned int pic_width_in_mbs; + bool mbaff_pic; + u32 reg; + + cedrus_write(dev, VE_H264_VLD_LEN, slice_bytes * 8); + cedrus_write(dev, VE_H264_VLD_OFFSET, 0); + + src_buf_addr = vb2_dma_contig_plane_dma_addr(src_buf, 0); + cedrus_write(dev, VE_H264_VLD_END, src_buf_addr + slice_bytes); + cedrus_write(dev, VE_H264_VLD_ADDR, + VE_H264_VLD_ADDR_VAL(src_buf_addr) | + VE_H264_VLD_ADDR_FIRST | VE_H264_VLD_ADDR_VALID | + VE_H264_VLD_ADDR_LAST); + + if (ctx->src_fmt.width > 2048) { + cedrus_write(dev, VE_BUF_CTRL, + VE_BUF_CTRL_INTRAPRED_MIXED_RAM | + VE_BUF_CTRL_DBLK_MIXED_RAM); + cedrus_write(dev, VE_DBLK_DRAM_BUF_ADDR, + ctx->codec.h264.deblk_buf_dma); + cedrus_write(dev, VE_INTRAPRED_DRAM_BUF_ADDR, + ctx->codec.h264.intra_pred_buf_dma); + } else { + cedrus_write(dev, VE_BUF_CTRL, + VE_BUF_CTRL_INTRAPRED_INT_SRAM | + VE_BUF_CTRL_DBLK_INT_SRAM); + } + + /* + * FIXME: Since the bitstream parsing is done in software, and + * in userspace, this shouldn't be needed anymore. But it + * turns out that removing it breaks the decoding process, + * without any clear indication why. + */ + cedrus_write(dev, VE_H264_TRIGGER_TYPE, + VE_H264_TRIGGER_TYPE_INIT_SWDEC); + + cedrus_skip_bits(dev, slice->header_bit_size); + + if (V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED(pps, slice)) + cedrus_write_pred_weight_table(ctx, run); + + if ((slice->slice_type == V4L2_H264_SLICE_TYPE_P) || + (slice->slice_type == V4L2_H264_SLICE_TYPE_SP) || + (slice->slice_type == V4L2_H264_SLICE_TYPE_B)) + cedrus_write_ref_list0(ctx, run); + + if (slice->slice_type == V4L2_H264_SLICE_TYPE_B) + cedrus_write_ref_list1(ctx, run); + + // picture parameters + reg = 0; + /* + * FIXME: the kernel headers are allowing the default value to + * be passed, but the libva doesn't give us that. + */ + reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 10; + reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 5; + reg |= (pps->weighted_bipred_idc & 0x3) << 2; + if (pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE) + reg |= VE_H264_PPS_ENTROPY_CODING_MODE; + if (pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED) + reg |= VE_H264_PPS_WEIGHTED_PRED; + if (pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED) + reg |= VE_H264_PPS_CONSTRAINED_INTRA_PRED; + if (pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE) + reg |= VE_H264_PPS_TRANSFORM_8X8_MODE; + cedrus_write(dev, VE_H264_PPS, reg); + + // sequence parameters + reg = 0; + reg |= (sps->chroma_format_idc & 0x7) << 19; + reg |= (sps->pic_width_in_mbs_minus1 & 0xff) << 8; + reg |= sps->pic_height_in_map_units_minus1 & 0xff; + if (sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY) + reg |= VE_H264_SPS_MBS_ONLY; + if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD) + reg |= VE_H264_SPS_MB_ADAPTIVE_FRAME_FIELD; + if (sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE) + reg |= VE_H264_SPS_DIRECT_8X8_INFERENCE; + cedrus_write(dev, VE_H264_SPS, reg); + + mbaff_pic = !(decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) && + (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD); + pic_width_in_mbs = sps->pic_width_in_mbs_minus1 + 1; + + // slice parameters + reg = 0; + reg |= ((slice->first_mb_in_slice % pic_width_in_mbs) & 0xff) << 24; + reg |= (((slice->first_mb_in_slice / pic_width_in_mbs) * + (mbaff_pic + 1)) & 0xff) << 16; + reg |= decode->nal_ref_idc ? BIT(12) : 0; + reg |= (slice->slice_type & 0xf) << 8; + reg |= slice->cabac_init_idc & 0x3; + if (ctx->fh.m2m_ctx->new_frame) + reg |= VE_H264_SHS_FIRST_SLICE_IN_PIC; + if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) + reg |= VE_H264_SHS_FIELD_PIC; + if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD) + reg |= VE_H264_SHS_BOTTOM_FIELD; + if (slice->flags & V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED) + reg |= VE_H264_SHS_DIRECT_SPATIAL_MV_PRED; + cedrus_write(dev, VE_H264_SHS, reg); + + reg = 0; + reg |= VE_H264_SHS2_NUM_REF_IDX_ACTIVE_OVRD; + reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 24; + reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 16; + reg |= (slice->disable_deblocking_filter_idc & 0x3) << 8; + reg |= (slice->slice_alpha_c0_offset_div2 & 0xf) << 4; + reg |= slice->slice_beta_offset_div2 & 0xf; + cedrus_write(dev, VE_H264_SHS2, reg); + + reg = 0; + reg |= (pps->second_chroma_qp_index_offset & 0x3f) << 16; + reg |= (pps->chroma_qp_index_offset & 0x3f) << 8; + reg |= (pps->pic_init_qp_minus26 + 26 + slice->slice_qp_delta) & 0x3f; + if (!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT)) + reg |= VE_H264_SHS_QP_SCALING_MATRIX_DEFAULT; + cedrus_write(dev, VE_H264_SHS_QP, reg); + + // clear status flags + cedrus_write(dev, VE_H264_STATUS, cedrus_read(dev, VE_H264_STATUS)); + + // enable int + cedrus_write(dev, VE_H264_CTRL, + VE_H264_CTRL_SLICE_DECODE_INT | + VE_H264_CTRL_DECODE_ERR_INT | + VE_H264_CTRL_VLD_DATA_REQ_INT); +} + +static enum cedrus_irq_status +cedrus_h264_irq_status(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + u32 reg = cedrus_read(dev, VE_H264_STATUS); + + if (reg & (VE_H264_STATUS_DECODE_ERR_INT | + VE_H264_STATUS_VLD_DATA_REQ_INT)) + return CEDRUS_IRQ_ERROR; + + if (reg & VE_H264_CTRL_SLICE_DECODE_INT) + return CEDRUS_IRQ_OK; + + return CEDRUS_IRQ_NONE; +} + +static void cedrus_h264_irq_clear(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + + cedrus_write(dev, VE_H264_STATUS, + VE_H264_STATUS_INT_MASK); +} + +static void cedrus_h264_irq_disable(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + u32 reg = cedrus_read(dev, VE_H264_CTRL); + + cedrus_write(dev, VE_H264_CTRL, + reg & ~VE_H264_CTRL_INT_MASK); +} + +static int cedrus_h264_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) +{ + struct cedrus_dev *dev = ctx->dev; + + cedrus_engine_enable(ctx, CEDRUS_CODEC_H264); + + cedrus_write(dev, VE_H264_SDROT_CTRL, 0); + cedrus_write(dev, VE_H264_EXTRA_BUFFER1, + ctx->codec.h264.pic_info_buf_dma); + cedrus_write(dev, VE_H264_EXTRA_BUFFER2, + ctx->codec.h264.neighbor_info_buf_dma); + + cedrus_write_scaling_lists(ctx, run); + cedrus_write_frame_list(ctx, run); + + cedrus_set_params(ctx, run); + + return 0; +} + +static int cedrus_h264_start(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + unsigned int pic_info_size; + unsigned int field_size; + unsigned int mv_col_size; + int ret; + + /* + * NOTE: All buffers allocated here are only used by HW, so we + * can add DMA_ATTR_NO_KERNEL_MAPPING flag when allocating them. + */ + + /* Formula for picture buffer size is taken from CedarX source. */ + + if (ctx->src_fmt.width > 2048) + pic_info_size = CEDRUS_H264_FRAME_NUM * 0x4000; + else + pic_info_size = CEDRUS_H264_FRAME_NUM * 0x1000; + + /* + * FIXME: If V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY is set, + * there is no need to multiply by 2. + */ + pic_info_size += ctx->src_fmt.height * 2 * 64; + + if (pic_info_size < CEDRUS_MIN_PIC_INFO_BUF_SIZE) + pic_info_size = CEDRUS_MIN_PIC_INFO_BUF_SIZE; + + ctx->codec.h264.pic_info_buf_size = pic_info_size; + ctx->codec.h264.pic_info_buf = + dma_alloc_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size, + &ctx->codec.h264.pic_info_buf_dma, + GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); + if (!ctx->codec.h264.pic_info_buf) + return -ENOMEM; + + /* + * That buffer is supposed to be 16kiB in size, and be aligned + * on 16kiB as well. However, dma_alloc_attrs provides the + * guarantee that we'll have a DMA address aligned on the + * smallest page order that is greater to the requested size, + * so we don't have to overallocate. + */ + ctx->codec.h264.neighbor_info_buf = + dma_alloc_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, + &ctx->codec.h264.neighbor_info_buf_dma, + GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); + if (!ctx->codec.h264.neighbor_info_buf) { + ret = -ENOMEM; + goto err_pic_buf; + } + + field_size = DIV_ROUND_UP(ctx->src_fmt.width, 16) * + DIV_ROUND_UP(ctx->src_fmt.height, 16) * 16; + + /* + * FIXME: This is actually conditional to + * V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE not being set, we + * might have to rework this if memory efficiency ever is + * something we need to work on. + */ + field_size = field_size * 2; + + /* + * FIXME: This is actually conditional to + * V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY not being set, we might + * have to rework this if memory efficiency ever is something + * we need to work on. + */ + field_size = field_size * 2; + ctx->codec.h264.mv_col_buf_field_size = field_size; + + mv_col_size = field_size * 2 * CEDRUS_H264_FRAME_NUM; + ctx->codec.h264.mv_col_buf_size = mv_col_size; + ctx->codec.h264.mv_col_buf = + dma_alloc_attrs(dev->dev, + ctx->codec.h264.mv_col_buf_size, + &ctx->codec.h264.mv_col_buf_dma, + GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); + if (!ctx->codec.h264.mv_col_buf) { + ret = -ENOMEM; + goto err_neighbor_buf; + } + + if (ctx->src_fmt.width > 2048) { + /* + * Formulas for deblock and intra prediction buffer sizes + * are taken from CedarX source. + */ + + ctx->codec.h264.deblk_buf_size = + ALIGN(ctx->src_fmt.width, 32) * 12; + ctx->codec.h264.deblk_buf = + dma_alloc_attrs(dev->dev, + ctx->codec.h264.deblk_buf_size, + &ctx->codec.h264.deblk_buf_dma, + GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); + if (!ctx->codec.h264.deblk_buf) { + ret = -ENOMEM; + goto err_mv_col_buf; + } + + /* + * NOTE: Multiplying by two deviates from CedarX logic, but it + * is for some unknown reason needed for H264 4K decoding on H6. + */ + ctx->codec.h264.intra_pred_buf_size = + ALIGN(ctx->src_fmt.width, 64) * 5 * 2; + ctx->codec.h264.intra_pred_buf = + dma_alloc_attrs(dev->dev, + ctx->codec.h264.intra_pred_buf_size, + &ctx->codec.h264.intra_pred_buf_dma, + GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); + if (!ctx->codec.h264.intra_pred_buf) { + ret = -ENOMEM; + goto err_deblk_buf; + } + } + + return 0; + +err_deblk_buf: + dma_free_attrs(dev->dev, ctx->codec.h264.deblk_buf_size, + ctx->codec.h264.deblk_buf, + ctx->codec.h264.deblk_buf_dma, + DMA_ATTR_NO_KERNEL_MAPPING); + +err_mv_col_buf: + dma_free_attrs(dev->dev, ctx->codec.h264.mv_col_buf_size, + ctx->codec.h264.mv_col_buf, + ctx->codec.h264.mv_col_buf_dma, + DMA_ATTR_NO_KERNEL_MAPPING); + +err_neighbor_buf: + dma_free_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, + ctx->codec.h264.neighbor_info_buf, + ctx->codec.h264.neighbor_info_buf_dma, + DMA_ATTR_NO_KERNEL_MAPPING); + +err_pic_buf: + dma_free_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size, + ctx->codec.h264.pic_info_buf, + ctx->codec.h264.pic_info_buf_dma, + DMA_ATTR_NO_KERNEL_MAPPING); + return ret; +} + +static void cedrus_h264_stop(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + + dma_free_attrs(dev->dev, ctx->codec.h264.mv_col_buf_size, + ctx->codec.h264.mv_col_buf, + ctx->codec.h264.mv_col_buf_dma, + DMA_ATTR_NO_KERNEL_MAPPING); + dma_free_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, + ctx->codec.h264.neighbor_info_buf, + ctx->codec.h264.neighbor_info_buf_dma, + DMA_ATTR_NO_KERNEL_MAPPING); + dma_free_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size, + ctx->codec.h264.pic_info_buf, + ctx->codec.h264.pic_info_buf_dma, + DMA_ATTR_NO_KERNEL_MAPPING); + if (ctx->codec.h264.deblk_buf_size) + dma_free_attrs(dev->dev, ctx->codec.h264.deblk_buf_size, + ctx->codec.h264.deblk_buf, + ctx->codec.h264.deblk_buf_dma, + DMA_ATTR_NO_KERNEL_MAPPING); + if (ctx->codec.h264.intra_pred_buf_size) + dma_free_attrs(dev->dev, ctx->codec.h264.intra_pred_buf_size, + ctx->codec.h264.intra_pred_buf, + ctx->codec.h264.intra_pred_buf_dma, + DMA_ATTR_NO_KERNEL_MAPPING); +} + +static void cedrus_h264_trigger(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + + cedrus_write(dev, VE_H264_TRIGGER_TYPE, + VE_H264_TRIGGER_TYPE_AVC_SLICE_DECODE); +} + +struct cedrus_dec_ops cedrus_dec_ops_h264 = { + .irq_clear = cedrus_h264_irq_clear, + .irq_disable = cedrus_h264_irq_disable, + .irq_status = cedrus_h264_irq_status, + .setup = cedrus_h264_setup, + .start = cedrus_h264_start, + .stop = cedrus_h264_stop, + .trigger = cedrus_h264_trigger, +}; diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h265.c b/drivers/staging/media/sunxi/cedrus/cedrus_h265.c new file mode 100644 index 000000000..625f77a8c --- /dev/null +++ b/drivers/staging/media/sunxi/cedrus/cedrus_h265.c @@ -0,0 +1,882 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Cedrus VPU driver + * + * Copyright (C) 2013 Jens Kuske <jenskuske@gmail.com> + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com> + * Copyright (C) 2018 Bootlin + */ + +#include <linux/delay.h> +#include <linux/types.h> + +#include <media/videobuf2-dma-contig.h> + +#include "cedrus.h" +#include "cedrus_hw.h" +#include "cedrus_regs.h" + +/* + * These are the sizes for side buffers required by the hardware for storing + * internal decoding metadata. They match the values used by the early BSP + * implementations, that were initially exposed in libvdpau-sunxi. + * Subsequent BSP implementations seem to double the neighbor info buffer size + * for the H6 SoC, which may be related to 10 bit H265 support. + */ +#define CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE (794 * SZ_1K) +#define CEDRUS_H265_ENTRY_POINTS_BUF_SIZE (4 * SZ_1K) +#define CEDRUS_H265_MV_COL_BUF_UNIT_CTB_SIZE 160 + +struct cedrus_h265_sram_frame_info { + __le32 top_pic_order_cnt; + __le32 bottom_pic_order_cnt; + __le32 top_mv_col_buf_addr; + __le32 bottom_mv_col_buf_addr; + __le32 luma_addr; + __le32 chroma_addr; +} __packed; + +struct cedrus_h265_sram_pred_weight { + __s8 delta_weight; + __s8 offset; +} __packed; + +static enum cedrus_irq_status cedrus_h265_irq_status(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + u32 reg; + + reg = cedrus_read(dev, VE_DEC_H265_STATUS); + reg &= VE_DEC_H265_STATUS_CHECK_MASK; + + if (reg & VE_DEC_H265_STATUS_CHECK_ERROR || + !(reg & VE_DEC_H265_STATUS_SUCCESS)) + return CEDRUS_IRQ_ERROR; + + return CEDRUS_IRQ_OK; +} + +static void cedrus_h265_irq_clear(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + + cedrus_write(dev, VE_DEC_H265_STATUS, VE_DEC_H265_STATUS_CHECK_MASK); +} + +static void cedrus_h265_irq_disable(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + u32 reg = cedrus_read(dev, VE_DEC_H265_CTRL); + + reg &= ~VE_DEC_H265_CTRL_IRQ_MASK; + + cedrus_write(dev, VE_DEC_H265_CTRL, reg); +} + +static void cedrus_h265_sram_write_offset(struct cedrus_dev *dev, u32 offset) +{ + cedrus_write(dev, VE_DEC_H265_SRAM_OFFSET, offset); +} + +static void cedrus_h265_sram_write_data(struct cedrus_dev *dev, void *data, + unsigned int size) +{ + u32 *word = data; + + while (size >= sizeof(u32)) { + cedrus_write(dev, VE_DEC_H265_SRAM_DATA, *word++); + size -= sizeof(u32); + } +} + +static inline dma_addr_t +cedrus_h265_frame_info_mv_col_buf_addr(struct cedrus_ctx *ctx, + unsigned int index, unsigned int field) +{ + return ctx->codec.h265.mv_col_buf_addr + index * + ctx->codec.h265.mv_col_buf_unit_size + + field * ctx->codec.h265.mv_col_buf_unit_size / 2; +} + +static void cedrus_h265_frame_info_write_single(struct cedrus_ctx *ctx, + unsigned int index, + bool field_pic, + u32 pic_order_cnt[], + struct vb2_buffer *buf) +{ + struct cedrus_dev *dev = ctx->dev; + dma_addr_t dst_luma_addr = cedrus_dst_buf_addr(ctx, buf, 0); + dma_addr_t dst_chroma_addr = cedrus_dst_buf_addr(ctx, buf, 1); + dma_addr_t mv_col_buf_addr[2] = { + cedrus_h265_frame_info_mv_col_buf_addr(ctx, buf->index, 0), + cedrus_h265_frame_info_mv_col_buf_addr(ctx, buf->index, + field_pic ? 1 : 0) + }; + u32 offset = VE_DEC_H265_SRAM_OFFSET_FRAME_INFO + + VE_DEC_H265_SRAM_OFFSET_FRAME_INFO_UNIT * index; + struct cedrus_h265_sram_frame_info frame_info = { + .top_pic_order_cnt = cpu_to_le32(pic_order_cnt[0]), + .bottom_pic_order_cnt = cpu_to_le32(field_pic ? + pic_order_cnt[1] : + pic_order_cnt[0]), + .top_mv_col_buf_addr = + cpu_to_le32(VE_DEC_H265_SRAM_DATA_ADDR_BASE(mv_col_buf_addr[0])), + .bottom_mv_col_buf_addr = cpu_to_le32(field_pic ? + VE_DEC_H265_SRAM_DATA_ADDR_BASE(mv_col_buf_addr[1]) : + VE_DEC_H265_SRAM_DATA_ADDR_BASE(mv_col_buf_addr[0])), + .luma_addr = cpu_to_le32(VE_DEC_H265_SRAM_DATA_ADDR_BASE(dst_luma_addr)), + .chroma_addr = cpu_to_le32(VE_DEC_H265_SRAM_DATA_ADDR_BASE(dst_chroma_addr)), + }; + + cedrus_h265_sram_write_offset(dev, offset); + cedrus_h265_sram_write_data(dev, &frame_info, sizeof(frame_info)); +} + +static void cedrus_h265_frame_info_write_dpb(struct cedrus_ctx *ctx, + const struct v4l2_hevc_dpb_entry *dpb, + u8 num_active_dpb_entries) +{ + struct vb2_queue *vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, + V4L2_BUF_TYPE_VIDEO_CAPTURE); + unsigned int i; + + for (i = 0; i < num_active_dpb_entries; i++) { + struct vb2_buffer *buf = vb2_find_buffer(vq, dpb[i].timestamp); + u32 pic_order_cnt[2] = { + dpb[i].pic_order_cnt_val, + dpb[i].pic_order_cnt_val + }; + + if (!buf) + continue; + + cedrus_h265_frame_info_write_single(ctx, i, dpb[i].field_pic, + pic_order_cnt, + buf); + } +} + +static void cedrus_h265_ref_pic_list_write(struct cedrus_dev *dev, + const struct v4l2_hevc_dpb_entry *dpb, + const u8 list[], + u8 num_ref_idx_active, + u32 sram_offset) +{ + unsigned int i; + u32 word = 0; + + cedrus_h265_sram_write_offset(dev, sram_offset); + + for (i = 0; i < num_ref_idx_active; i++) { + unsigned int shift = (i % 4) * 8; + unsigned int index = list[i]; + u8 value = list[i]; + + if (dpb[index].flags & V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE) + value |= VE_DEC_H265_SRAM_REF_PIC_LIST_LT_REF; + + /* Each SRAM word gathers up to 4 references. */ + word |= value << shift; + + /* Write the word to SRAM and clear it for the next batch. */ + if ((i % 4) == 3 || i == (num_ref_idx_active - 1)) { + cedrus_h265_sram_write_data(dev, &word, sizeof(word)); + word = 0; + } + } +} + +static void cedrus_h265_pred_weight_write(struct cedrus_dev *dev, + const s8 delta_luma_weight[], + const s8 luma_offset[], + const s8 delta_chroma_weight[][2], + const s8 chroma_offset[][2], + u8 num_ref_idx_active, + u32 sram_luma_offset, + u32 sram_chroma_offset) +{ + struct cedrus_h265_sram_pred_weight pred_weight[2] = { { 0 } }; + unsigned int i, j; + + cedrus_h265_sram_write_offset(dev, sram_luma_offset); + + for (i = 0; i < num_ref_idx_active; i++) { + unsigned int index = i % 2; + + pred_weight[index].delta_weight = delta_luma_weight[i]; + pred_weight[index].offset = luma_offset[i]; + + if (index == 1 || i == (num_ref_idx_active - 1)) + cedrus_h265_sram_write_data(dev, (u32 *)&pred_weight, + sizeof(pred_weight)); + } + + cedrus_h265_sram_write_offset(dev, sram_chroma_offset); + + for (i = 0; i < num_ref_idx_active; i++) { + for (j = 0; j < 2; j++) { + pred_weight[j].delta_weight = delta_chroma_weight[i][j]; + pred_weight[j].offset = chroma_offset[i][j]; + } + + cedrus_h265_sram_write_data(dev, &pred_weight, + sizeof(pred_weight)); + } +} + +static void cedrus_h265_skip_bits(struct cedrus_dev *dev, int num) +{ + int count = 0; + + while (count < num) { + int tmp = min(num - count, 32); + + cedrus_write(dev, VE_DEC_H265_TRIGGER, + VE_DEC_H265_TRIGGER_FLUSH_BITS | + VE_DEC_H265_TRIGGER_TYPE_N_BITS(tmp)); + + if (cedrus_wait_for(dev, VE_DEC_H265_STATUS, VE_DEC_H265_STATUS_VLD_BUSY)) + dev_err_ratelimited(dev->dev, "timed out waiting to skip bits\n"); + + count += tmp; + } +} + +static u32 cedrus_h265_show_bits(struct cedrus_dev *dev, int num) +{ + cedrus_write(dev, VE_DEC_H265_TRIGGER, + VE_DEC_H265_TRIGGER_SHOW_BITS | + VE_DEC_H265_TRIGGER_TYPE_N_BITS(num)); + + cedrus_wait_for(dev, VE_DEC_H265_STATUS, + VE_DEC_H265_STATUS_VLD_BUSY); + + return cedrus_read(dev, VE_DEC_H265_BITS_READ); +} + +static void cedrus_h265_write_scaling_list(struct cedrus_ctx *ctx, + struct cedrus_run *run) +{ + const struct v4l2_ctrl_hevc_scaling_matrix *scaling; + struct cedrus_dev *dev = ctx->dev; + u32 i, j, k, val; + + scaling = run->h265.scaling_matrix; + + cedrus_write(dev, VE_DEC_H265_SCALING_LIST_DC_COEF0, + (scaling->scaling_list_dc_coef_32x32[1] << 24) | + (scaling->scaling_list_dc_coef_32x32[0] << 16) | + (scaling->scaling_list_dc_coef_16x16[1] << 8) | + (scaling->scaling_list_dc_coef_16x16[0] << 0)); + + cedrus_write(dev, VE_DEC_H265_SCALING_LIST_DC_COEF1, + (scaling->scaling_list_dc_coef_16x16[5] << 24) | + (scaling->scaling_list_dc_coef_16x16[4] << 16) | + (scaling->scaling_list_dc_coef_16x16[3] << 8) | + (scaling->scaling_list_dc_coef_16x16[2] << 0)); + + cedrus_h265_sram_write_offset(dev, VE_DEC_H265_SRAM_OFFSET_SCALING_LISTS); + + for (i = 0; i < 6; i++) + for (j = 0; j < 8; j++) + for (k = 0; k < 8; k += 4) { + val = ((u32)scaling->scaling_list_8x8[i][j + (k + 3) * 8] << 24) | + ((u32)scaling->scaling_list_8x8[i][j + (k + 2) * 8] << 16) | + ((u32)scaling->scaling_list_8x8[i][j + (k + 1) * 8] << 8) | + scaling->scaling_list_8x8[i][j + k * 8]; + cedrus_write(dev, VE_DEC_H265_SRAM_DATA, val); + } + + for (i = 0; i < 2; i++) + for (j = 0; j < 8; j++) + for (k = 0; k < 8; k += 4) { + val = ((u32)scaling->scaling_list_32x32[i][j + (k + 3) * 8] << 24) | + ((u32)scaling->scaling_list_32x32[i][j + (k + 2) * 8] << 16) | + ((u32)scaling->scaling_list_32x32[i][j + (k + 1) * 8] << 8) | + scaling->scaling_list_32x32[i][j + k * 8]; + cedrus_write(dev, VE_DEC_H265_SRAM_DATA, val); + } + + for (i = 0; i < 6; i++) + for (j = 0; j < 8; j++) + for (k = 0; k < 8; k += 4) { + val = ((u32)scaling->scaling_list_16x16[i][j + (k + 3) * 8] << 24) | + ((u32)scaling->scaling_list_16x16[i][j + (k + 2) * 8] << 16) | + ((u32)scaling->scaling_list_16x16[i][j + (k + 1) * 8] << 8) | + scaling->scaling_list_16x16[i][j + k * 8]; + cedrus_write(dev, VE_DEC_H265_SRAM_DATA, val); + } + + for (i = 0; i < 6; i++) + for (j = 0; j < 4; j++) { + val = ((u32)scaling->scaling_list_4x4[i][j + 12] << 24) | + ((u32)scaling->scaling_list_4x4[i][j + 8] << 16) | + ((u32)scaling->scaling_list_4x4[i][j + 4] << 8) | + scaling->scaling_list_4x4[i][j]; + cedrus_write(dev, VE_DEC_H265_SRAM_DATA, val); + } +} + +static int cedrus_h265_is_low_delay(struct cedrus_run *run) +{ + const struct v4l2_ctrl_hevc_slice_params *slice_params; + const struct v4l2_hevc_dpb_entry *dpb; + s32 poc; + int i; + + slice_params = run->h265.slice_params; + poc = run->h265.decode_params->pic_order_cnt_val; + dpb = run->h265.decode_params->dpb; + + for (i = 0; i < slice_params->num_ref_idx_l0_active_minus1 + 1; i++) + if (dpb[slice_params->ref_idx_l0[i]].pic_order_cnt_val > poc) + return 1; + + if (slice_params->slice_type != V4L2_HEVC_SLICE_TYPE_B) + return 0; + + for (i = 0; i < slice_params->num_ref_idx_l1_active_minus1 + 1; i++) + if (dpb[slice_params->ref_idx_l1[i]].pic_order_cnt_val > poc) + return 1; + + return 0; +} + +static void cedrus_h265_write_tiles(struct cedrus_ctx *ctx, + struct cedrus_run *run, + unsigned int ctb_addr_x, + unsigned int ctb_addr_y) +{ + const struct v4l2_ctrl_hevc_slice_params *slice_params; + const struct v4l2_ctrl_hevc_pps *pps; + struct cedrus_dev *dev = ctx->dev; + const u32 *entry_points; + u32 *entry_points_buf; + int i, x, tx, y, ty; + + pps = run->h265.pps; + slice_params = run->h265.slice_params; + entry_points = run->h265.entry_points; + entry_points_buf = ctx->codec.h265.entry_points_buf; + + for (x = 0, tx = 0; tx < pps->num_tile_columns_minus1 + 1; tx++) { + if (x + pps->column_width_minus1[tx] + 1 > ctb_addr_x) + break; + + x += pps->column_width_minus1[tx] + 1; + } + + for (y = 0, ty = 0; ty < pps->num_tile_rows_minus1 + 1; ty++) { + if (y + pps->row_height_minus1[ty] + 1 > ctb_addr_y) + break; + + y += pps->row_height_minus1[ty] + 1; + } + + cedrus_write(dev, VE_DEC_H265_TILE_START_CTB, (y << 16) | (x << 0)); + cedrus_write(dev, VE_DEC_H265_TILE_END_CTB, + ((y + pps->row_height_minus1[ty]) << 16) | + ((x + pps->column_width_minus1[tx]) << 0)); + + if (pps->flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED) { + for (i = 0; i < slice_params->num_entry_point_offsets; i++) + entry_points_buf[i] = entry_points[i]; + } else { + for (i = 0; i < slice_params->num_entry_point_offsets; i++) { + if (tx + 1 >= pps->num_tile_columns_minus1 + 1) { + x = 0; + tx = 0; + y += pps->row_height_minus1[ty++] + 1; + } else { + x += pps->column_width_minus1[tx++] + 1; + } + + entry_points_buf[i * 4 + 0] = entry_points[i]; + entry_points_buf[i * 4 + 1] = 0x0; + entry_points_buf[i * 4 + 2] = (y << 16) | (x << 0); + entry_points_buf[i * 4 + 3] = + ((y + pps->row_height_minus1[ty]) << 16) | + ((x + pps->column_width_minus1[tx]) << 0); + } + } +} + +static int cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) +{ + struct cedrus_dev *dev = ctx->dev; + const struct v4l2_ctrl_hevc_sps *sps; + const struct v4l2_ctrl_hevc_pps *pps; + const struct v4l2_ctrl_hevc_slice_params *slice_params; + const struct v4l2_ctrl_hevc_decode_params *decode_params; + const struct v4l2_hevc_pred_weight_table *pred_weight_table; + unsigned int width_in_ctb_luma, ctb_size_luma; + unsigned int log2_max_luma_coding_block_size; + unsigned int ctb_addr_x, ctb_addr_y; + dma_addr_t src_buf_addr; + dma_addr_t src_buf_end_addr; + u32 chroma_log2_weight_denom; + u32 num_entry_point_offsets; + u32 output_pic_list_index; + u32 pic_order_cnt[2]; + u8 padding; + int count; + u32 reg; + + sps = run->h265.sps; + pps = run->h265.pps; + slice_params = run->h265.slice_params; + decode_params = run->h265.decode_params; + pred_weight_table = &slice_params->pred_weight_table; + num_entry_point_offsets = slice_params->num_entry_point_offsets; + + /* + * If entry points offsets are present, we should get them + * exactly the right amount. + */ + if (num_entry_point_offsets && + num_entry_point_offsets != run->h265.entry_points_count) + return -ERANGE; + + log2_max_luma_coding_block_size = + sps->log2_min_luma_coding_block_size_minus3 + 3 + + sps->log2_diff_max_min_luma_coding_block_size; + ctb_size_luma = 1UL << log2_max_luma_coding_block_size; + width_in_ctb_luma = + DIV_ROUND_UP(sps->pic_width_in_luma_samples, ctb_size_luma); + + /* MV column buffer size and allocation. */ + if (!ctx->codec.h265.mv_col_buf_size) { + unsigned int num_buffers = + run->dst->vb2_buf.vb2_queue->num_buffers; + + /* + * Each CTB requires a MV col buffer with a specific unit size. + * Since the address is given with missing lsb bits, 1 KiB is + * added to each buffer to ensure proper alignment. + */ + ctx->codec.h265.mv_col_buf_unit_size = + DIV_ROUND_UP(ctx->src_fmt.width, ctb_size_luma) * + DIV_ROUND_UP(ctx->src_fmt.height, ctb_size_luma) * + CEDRUS_H265_MV_COL_BUF_UNIT_CTB_SIZE + SZ_1K; + + ctx->codec.h265.mv_col_buf_size = num_buffers * + ctx->codec.h265.mv_col_buf_unit_size; + + /* Buffer is never accessed by CPU, so we can skip kernel mapping. */ + ctx->codec.h265.mv_col_buf = + dma_alloc_attrs(dev->dev, + ctx->codec.h265.mv_col_buf_size, + &ctx->codec.h265.mv_col_buf_addr, + GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); + if (!ctx->codec.h265.mv_col_buf) { + ctx->codec.h265.mv_col_buf_size = 0; + return -ENOMEM; + } + } + + /* Activate H265 engine. */ + cedrus_engine_enable(ctx, CEDRUS_CODEC_H265); + + /* Source offset and length in bits. */ + + cedrus_write(dev, VE_DEC_H265_BITS_OFFSET, 0); + + reg = slice_params->bit_size; + cedrus_write(dev, VE_DEC_H265_BITS_LEN, reg); + + /* Source beginning and end addresses. */ + + src_buf_addr = vb2_dma_contig_plane_dma_addr(&run->src->vb2_buf, 0); + + reg = VE_DEC_H265_BITS_ADDR_BASE(src_buf_addr); + reg |= VE_DEC_H265_BITS_ADDR_VALID_SLICE_DATA; + reg |= VE_DEC_H265_BITS_ADDR_LAST_SLICE_DATA; + reg |= VE_DEC_H265_BITS_ADDR_FIRST_SLICE_DATA; + + cedrus_write(dev, VE_DEC_H265_BITS_ADDR, reg); + + src_buf_end_addr = src_buf_addr + + DIV_ROUND_UP(slice_params->bit_size, 8); + + reg = VE_DEC_H265_BITS_END_ADDR_BASE(src_buf_end_addr); + cedrus_write(dev, VE_DEC_H265_BITS_END_ADDR, reg); + + /* Coding tree block address */ + ctb_addr_x = slice_params->slice_segment_addr % width_in_ctb_luma; + ctb_addr_y = slice_params->slice_segment_addr / width_in_ctb_luma; + reg = VE_DEC_H265_DEC_CTB_ADDR_X(ctb_addr_x); + reg |= VE_DEC_H265_DEC_CTB_ADDR_Y(ctb_addr_y); + cedrus_write(dev, VE_DEC_H265_DEC_CTB_ADDR, reg); + + if ((pps->flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED) || + (pps->flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED)) { + cedrus_h265_write_tiles(ctx, run, ctb_addr_x, ctb_addr_y); + } else { + cedrus_write(dev, VE_DEC_H265_TILE_START_CTB, 0); + cedrus_write(dev, VE_DEC_H265_TILE_END_CTB, 0); + } + + /* Clear the number of correctly-decoded coding tree blocks. */ + if (ctx->fh.m2m_ctx->new_frame) + cedrus_write(dev, VE_DEC_H265_DEC_CTB_NUM, 0); + + /* Initialize bitstream access. */ + cedrus_write(dev, VE_DEC_H265_TRIGGER, VE_DEC_H265_TRIGGER_INIT_SWDEC); + + /* + * Cedrus expects that bitstream pointer is actually at the end of the slice header + * instead of start of slice data. Padding is 8 bits at most (one bit set to 1 and + * at most seven bits set to 0), so we have to inspect only one byte before slice data. + */ + + if (slice_params->data_byte_offset == 0) + return -EOPNOTSUPP; + + cedrus_h265_skip_bits(dev, (slice_params->data_byte_offset - 1) * 8); + + padding = cedrus_h265_show_bits(dev, 8); + + /* at least one bit must be set in that byte */ + if (padding == 0) + return -EINVAL; + + for (count = 0; count < 8; count++) + if (padding & (1 << count)) + break; + + /* Include the one bit. */ + count++; + + cedrus_h265_skip_bits(dev, 8 - count); + + /* Bitstream parameters. */ + + reg = VE_DEC_H265_DEC_NAL_HDR_NAL_UNIT_TYPE(slice_params->nal_unit_type) | + VE_DEC_H265_DEC_NAL_HDR_NUH_TEMPORAL_ID_PLUS1(slice_params->nuh_temporal_id_plus1); + + cedrus_write(dev, VE_DEC_H265_DEC_NAL_HDR, reg); + + /* SPS. */ + + reg = VE_DEC_H265_DEC_SPS_HDR_MAX_TRANSFORM_HIERARCHY_DEPTH_INTRA(sps->max_transform_hierarchy_depth_intra) | + VE_DEC_H265_DEC_SPS_HDR_MAX_TRANSFORM_HIERARCHY_DEPTH_INTER(sps->max_transform_hierarchy_depth_inter) | + VE_DEC_H265_DEC_SPS_HDR_LOG2_DIFF_MAX_MIN_TRANSFORM_BLOCK_SIZE(sps->log2_diff_max_min_luma_transform_block_size) | + VE_DEC_H265_DEC_SPS_HDR_LOG2_MIN_TRANSFORM_BLOCK_SIZE_MINUS2(sps->log2_min_luma_transform_block_size_minus2) | + VE_DEC_H265_DEC_SPS_HDR_LOG2_DIFF_MAX_MIN_LUMA_CODING_BLOCK_SIZE(sps->log2_diff_max_min_luma_coding_block_size) | + VE_DEC_H265_DEC_SPS_HDR_LOG2_MIN_LUMA_CODING_BLOCK_SIZE_MINUS3(sps->log2_min_luma_coding_block_size_minus3) | + VE_DEC_H265_DEC_SPS_HDR_BIT_DEPTH_CHROMA_MINUS8(sps->bit_depth_chroma_minus8) | + VE_DEC_H265_DEC_SPS_HDR_BIT_DEPTH_LUMA_MINUS8(sps->bit_depth_luma_minus8) | + VE_DEC_H265_DEC_SPS_HDR_CHROMA_FORMAT_IDC(sps->chroma_format_idc); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SPS_HDR_FLAG_STRONG_INTRA_SMOOTHING_ENABLE, + V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED, + sps->flags); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SPS_HDR_FLAG_SPS_TEMPORAL_MVP_ENABLED, + V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED, + sps->flags); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SPS_HDR_FLAG_SAMPLE_ADAPTIVE_OFFSET_ENABLED, + V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET, + sps->flags); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SPS_HDR_FLAG_AMP_ENABLED, + V4L2_HEVC_SPS_FLAG_AMP_ENABLED, sps->flags); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SPS_HDR_FLAG_SEPARATE_COLOUR_PLANE, + V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE, + sps->flags); + + cedrus_write(dev, VE_DEC_H265_DEC_SPS_HDR, reg); + + reg = VE_DEC_H265_DEC_PCM_CTRL_LOG2_DIFF_MAX_MIN_PCM_LUMA_CODING_BLOCK_SIZE(sps->log2_diff_max_min_pcm_luma_coding_block_size) | + VE_DEC_H265_DEC_PCM_CTRL_LOG2_MIN_PCM_LUMA_CODING_BLOCK_SIZE_MINUS3(sps->log2_min_pcm_luma_coding_block_size_minus3) | + VE_DEC_H265_DEC_PCM_CTRL_PCM_SAMPLE_BIT_DEPTH_CHROMA_MINUS1(sps->pcm_sample_bit_depth_chroma_minus1) | + VE_DEC_H265_DEC_PCM_CTRL_PCM_SAMPLE_BIT_DEPTH_LUMA_MINUS1(sps->pcm_sample_bit_depth_luma_minus1); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PCM_CTRL_FLAG_PCM_ENABLED, + V4L2_HEVC_SPS_FLAG_PCM_ENABLED, sps->flags); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PCM_CTRL_FLAG_PCM_LOOP_FILTER_DISABLED, + V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED, + sps->flags); + + cedrus_write(dev, VE_DEC_H265_DEC_PCM_CTRL, reg); + + /* PPS. */ + + reg = VE_DEC_H265_DEC_PPS_CTRL0_PPS_CR_QP_OFFSET(pps->pps_cr_qp_offset) | + VE_DEC_H265_DEC_PPS_CTRL0_PPS_CB_QP_OFFSET(pps->pps_cb_qp_offset) | + VE_DEC_H265_DEC_PPS_CTRL0_INIT_QP_MINUS26(pps->init_qp_minus26) | + VE_DEC_H265_DEC_PPS_CTRL0_DIFF_CU_QP_DELTA_DEPTH(pps->diff_cu_qp_delta_depth); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL0_FLAG_CU_QP_DELTA_ENABLED, + V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED, + pps->flags); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL0_FLAG_TRANSFORM_SKIP_ENABLED, + V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED, + pps->flags); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL0_FLAG_CONSTRAINED_INTRA_PRED, + V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED, + pps->flags); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL0_FLAG_SIGN_DATA_HIDING_ENABLED, + V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED, + pps->flags); + + cedrus_write(dev, VE_DEC_H265_DEC_PPS_CTRL0, reg); + + reg = VE_DEC_H265_DEC_PPS_CTRL1_LOG2_PARALLEL_MERGE_LEVEL_MINUS2(pps->log2_parallel_merge_level_minus2); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED, + V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED, + pps->flags); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED, + V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED, + pps->flags); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_ENTROPY_CODING_SYNC_ENABLED, + V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED, + pps->flags); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_TILES_ENABLED, + V4L2_HEVC_PPS_FLAG_TILES_ENABLED, + pps->flags); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_TRANSQUANT_BYPASS_ENABLED, + V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED, + pps->flags); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_WEIGHTED_BIPRED, + V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED, pps->flags); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_WEIGHTED_PRED, + V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED, pps->flags); + + cedrus_write(dev, VE_DEC_H265_DEC_PPS_CTRL1, reg); + + /* Slice Parameters. */ + + reg = VE_DEC_H265_DEC_SLICE_HDR_INFO0_PICTURE_TYPE(slice_params->pic_struct) | + VE_DEC_H265_DEC_SLICE_HDR_INFO0_FIVE_MINUS_MAX_NUM_MERGE_CAND(slice_params->five_minus_max_num_merge_cand) | + VE_DEC_H265_DEC_SLICE_HDR_INFO0_NUM_REF_IDX_L1_ACTIVE_MINUS1(slice_params->num_ref_idx_l1_active_minus1) | + VE_DEC_H265_DEC_SLICE_HDR_INFO0_NUM_REF_IDX_L0_ACTIVE_MINUS1(slice_params->num_ref_idx_l0_active_minus1) | + VE_DEC_H265_DEC_SLICE_HDR_INFO0_COLLOCATED_REF_IDX(slice_params->collocated_ref_idx) | + VE_DEC_H265_DEC_SLICE_HDR_INFO0_COLOUR_PLANE_ID(slice_params->colour_plane_id) | + VE_DEC_H265_DEC_SLICE_HDR_INFO0_SLICE_TYPE(slice_params->slice_type); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_COLLOCATED_FROM_L0, + V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0, + slice_params->flags); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_CABAC_INIT, + V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT, + slice_params->flags); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_MVD_L1_ZERO, + V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO, + slice_params->flags); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_SLICE_SAO_CHROMA, + V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA, + slice_params->flags); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_SLICE_SAO_LUMA, + V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA, + slice_params->flags); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_SLICE_TEMPORAL_MVP_ENABLE, + V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED, + slice_params->flags); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_DEPENDENT_SLICE_SEGMENT, + V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT, + slice_params->flags); + + if (ctx->fh.m2m_ctx->new_frame) + reg |= VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_FIRST_SLICE_SEGMENT_IN_PIC; + + cedrus_write(dev, VE_DEC_H265_DEC_SLICE_HDR_INFO0, reg); + + reg = VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_TC_OFFSET_DIV2(slice_params->slice_tc_offset_div2) | + VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_BETA_OFFSET_DIV2(slice_params->slice_beta_offset_div2) | + VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_CR_QP_OFFSET(slice_params->slice_cr_qp_offset) | + VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_CB_QP_OFFSET(slice_params->slice_cb_qp_offset) | + VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_QP_DELTA(slice_params->slice_qp_delta); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO1_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED, + V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED, + slice_params->flags); + + reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO1_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED, + V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED, + slice_params->flags); + + if (slice_params->slice_type != V4L2_HEVC_SLICE_TYPE_I && !cedrus_h265_is_low_delay(run)) + reg |= VE_DEC_H265_DEC_SLICE_HDR_INFO1_FLAG_SLICE_NOT_LOW_DELAY; + + cedrus_write(dev, VE_DEC_H265_DEC_SLICE_HDR_INFO1, reg); + + chroma_log2_weight_denom = pred_weight_table->luma_log2_weight_denom + + pred_weight_table->delta_chroma_log2_weight_denom; + reg = VE_DEC_H265_DEC_SLICE_HDR_INFO2_NUM_ENTRY_POINT_OFFSETS(num_entry_point_offsets) | + VE_DEC_H265_DEC_SLICE_HDR_INFO2_CHROMA_LOG2_WEIGHT_DENOM(chroma_log2_weight_denom) | + VE_DEC_H265_DEC_SLICE_HDR_INFO2_LUMA_LOG2_WEIGHT_DENOM(pred_weight_table->luma_log2_weight_denom); + + cedrus_write(dev, VE_DEC_H265_DEC_SLICE_HDR_INFO2, reg); + + cedrus_write(dev, VE_DEC_H265_ENTRY_POINT_OFFSET_ADDR, + ctx->codec.h265.entry_points_buf_addr >> 8); + + /* Decoded picture size. */ + + reg = VE_DEC_H265_DEC_PIC_SIZE_WIDTH(ctx->src_fmt.width) | + VE_DEC_H265_DEC_PIC_SIZE_HEIGHT(ctx->src_fmt.height); + + cedrus_write(dev, VE_DEC_H265_DEC_PIC_SIZE, reg); + + /* Scaling list. */ + + if (sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) { + cedrus_h265_write_scaling_list(ctx, run); + reg = VE_DEC_H265_SCALING_LIST_CTRL0_FLAG_ENABLED; + } else { + reg = VE_DEC_H265_SCALING_LIST_CTRL0_DEFAULT; + } + cedrus_write(dev, VE_DEC_H265_SCALING_LIST_CTRL0, reg); + + /* Neightbor information address. */ + reg = VE_DEC_H265_NEIGHBOR_INFO_ADDR_BASE(ctx->codec.h265.neighbor_info_buf_addr); + cedrus_write(dev, VE_DEC_H265_NEIGHBOR_INFO_ADDR, reg); + + /* Write decoded picture buffer in pic list. */ + cedrus_h265_frame_info_write_dpb(ctx, decode_params->dpb, + decode_params->num_active_dpb_entries); + + /* Output frame. */ + + output_pic_list_index = V4L2_HEVC_DPB_ENTRIES_NUM_MAX; + pic_order_cnt[0] = slice_params->slice_pic_order_cnt; + pic_order_cnt[1] = slice_params->slice_pic_order_cnt; + + cedrus_h265_frame_info_write_single(ctx, output_pic_list_index, + slice_params->pic_struct != 0, + pic_order_cnt, + &run->dst->vb2_buf); + + cedrus_write(dev, VE_DEC_H265_OUTPUT_FRAME_IDX, output_pic_list_index); + + /* Reference picture list 0 (for P/B frames). */ + if (slice_params->slice_type != V4L2_HEVC_SLICE_TYPE_I) { + cedrus_h265_ref_pic_list_write(dev, decode_params->dpb, + slice_params->ref_idx_l0, + slice_params->num_ref_idx_l0_active_minus1 + 1, + VE_DEC_H265_SRAM_OFFSET_REF_PIC_LIST0); + + if ((pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED) || + (pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED)) + cedrus_h265_pred_weight_write(dev, + pred_weight_table->delta_luma_weight_l0, + pred_weight_table->luma_offset_l0, + pred_weight_table->delta_chroma_weight_l0, + pred_weight_table->chroma_offset_l0, + slice_params->num_ref_idx_l0_active_minus1 + 1, + VE_DEC_H265_SRAM_OFFSET_PRED_WEIGHT_LUMA_L0, + VE_DEC_H265_SRAM_OFFSET_PRED_WEIGHT_CHROMA_L0); + } + + /* Reference picture list 1 (for B frames). */ + if (slice_params->slice_type == V4L2_HEVC_SLICE_TYPE_B) { + cedrus_h265_ref_pic_list_write(dev, decode_params->dpb, + slice_params->ref_idx_l1, + slice_params->num_ref_idx_l1_active_minus1 + 1, + VE_DEC_H265_SRAM_OFFSET_REF_PIC_LIST1); + + if (pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED) + cedrus_h265_pred_weight_write(dev, + pred_weight_table->delta_luma_weight_l1, + pred_weight_table->luma_offset_l1, + pred_weight_table->delta_chroma_weight_l1, + pred_weight_table->chroma_offset_l1, + slice_params->num_ref_idx_l1_active_minus1 + 1, + VE_DEC_H265_SRAM_OFFSET_PRED_WEIGHT_LUMA_L1, + VE_DEC_H265_SRAM_OFFSET_PRED_WEIGHT_CHROMA_L1); + } + + /* Enable appropriate interruptions. */ + cedrus_write(dev, VE_DEC_H265_CTRL, VE_DEC_H265_CTRL_IRQ_MASK); + + return 0; +} + +static int cedrus_h265_start(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + + /* The buffer size is calculated at setup time. */ + ctx->codec.h265.mv_col_buf_size = 0; + + /* Buffer is never accessed by CPU, so we can skip kernel mapping. */ + ctx->codec.h265.neighbor_info_buf = + dma_alloc_attrs(dev->dev, CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE, + &ctx->codec.h265.neighbor_info_buf_addr, + GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); + if (!ctx->codec.h265.neighbor_info_buf) + return -ENOMEM; + + ctx->codec.h265.entry_points_buf = + dma_alloc_coherent(dev->dev, CEDRUS_H265_ENTRY_POINTS_BUF_SIZE, + &ctx->codec.h265.entry_points_buf_addr, + GFP_KERNEL); + if (!ctx->codec.h265.entry_points_buf) { + dma_free_attrs(dev->dev, CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE, + ctx->codec.h265.neighbor_info_buf, + ctx->codec.h265.neighbor_info_buf_addr, + DMA_ATTR_NO_KERNEL_MAPPING); + return -ENOMEM; + } + + return 0; +} + +static void cedrus_h265_stop(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + + if (ctx->codec.h265.mv_col_buf_size > 0) { + dma_free_attrs(dev->dev, ctx->codec.h265.mv_col_buf_size, + ctx->codec.h265.mv_col_buf, + ctx->codec.h265.mv_col_buf_addr, + DMA_ATTR_NO_KERNEL_MAPPING); + + ctx->codec.h265.mv_col_buf_size = 0; + } + + dma_free_attrs(dev->dev, CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE, + ctx->codec.h265.neighbor_info_buf, + ctx->codec.h265.neighbor_info_buf_addr, + DMA_ATTR_NO_KERNEL_MAPPING); + dma_free_coherent(dev->dev, CEDRUS_H265_ENTRY_POINTS_BUF_SIZE, + ctx->codec.h265.entry_points_buf, + ctx->codec.h265.entry_points_buf_addr); +} + +static void cedrus_h265_trigger(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + + cedrus_write(dev, VE_DEC_H265_TRIGGER, VE_DEC_H265_TRIGGER_DEC_SLICE); +} + +struct cedrus_dec_ops cedrus_dec_ops_h265 = { + .irq_clear = cedrus_h265_irq_clear, + .irq_disable = cedrus_h265_irq_disable, + .irq_status = cedrus_h265_irq_status, + .setup = cedrus_h265_setup, + .start = cedrus_h265_start, + .stop = cedrus_h265_stop, + .trigger = cedrus_h265_trigger, +}; diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c new file mode 100644 index 000000000..fe5fbf6cf --- /dev/null +++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c @@ -0,0 +1,341 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Cedrus VPU driver + * + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com> + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com> + * Copyright (C) 2018 Bootlin + * + * Based on the vim2m driver, that is: + * + * Copyright (c) 2009-2010 Samsung Electronics Co., Ltd. + * Pawel Osciak, <pawel@osciak.com> + * Marek Szyprowski, <m.szyprowski@samsung.com> + */ + +#include <linux/platform_device.h> +#include <linux/of_reserved_mem.h> +#include <linux/of_device.h> +#include <linux/dma-mapping.h> +#include <linux/interrupt.h> +#include <linux/clk.h> +#include <linux/pm_runtime.h> +#include <linux/regmap.h> +#include <linux/reset.h> +#include <linux/soc/sunxi/sunxi_sram.h> + +#include <media/videobuf2-core.h> +#include <media/v4l2-mem2mem.h> + +#include "cedrus.h" +#include "cedrus_hw.h" +#include "cedrus_regs.h" + +int cedrus_engine_enable(struct cedrus_ctx *ctx, enum cedrus_codec codec) +{ + u32 reg = 0; + + /* + * FIXME: This is only valid on 32-bits DDR's, we should test + * it on the A13/A33. + */ + reg |= VE_MODE_REC_WR_MODE_2MB; + reg |= VE_MODE_DDR_MODE_BW_128; + + switch (codec) { + case CEDRUS_CODEC_MPEG2: + reg |= VE_MODE_DEC_MPEG; + break; + + /* H.264 and VP8 both use the same decoding mode bit. */ + case CEDRUS_CODEC_H264: + case CEDRUS_CODEC_VP8: + reg |= VE_MODE_DEC_H264; + break; + + case CEDRUS_CODEC_H265: + reg |= VE_MODE_DEC_H265; + break; + + default: + return -EINVAL; + } + + if (ctx->src_fmt.width == 4096) + reg |= VE_MODE_PIC_WIDTH_IS_4096; + if (ctx->src_fmt.width > 2048) + reg |= VE_MODE_PIC_WIDTH_MORE_2048; + + cedrus_write(ctx->dev, VE_MODE, reg); + + return 0; +} + +void cedrus_engine_disable(struct cedrus_dev *dev) +{ + cedrus_write(dev, VE_MODE, VE_MODE_DISABLED); +} + +void cedrus_dst_format_set(struct cedrus_dev *dev, + struct v4l2_pix_format *fmt) +{ + unsigned int width = fmt->width; + unsigned int height = fmt->height; + u32 chroma_size; + u32 reg; + + switch (fmt->pixelformat) { + case V4L2_PIX_FMT_NV12: + chroma_size = ALIGN(width, 16) * ALIGN(height, 16) / 2; + + reg = VE_PRIMARY_OUT_FMT_NV12; + cedrus_write(dev, VE_PRIMARY_OUT_FMT, reg); + + reg = chroma_size / 2; + cedrus_write(dev, VE_PRIMARY_CHROMA_BUF_LEN, reg); + + reg = VE_PRIMARY_FB_LINE_STRIDE_LUMA(ALIGN(width, 16)) | + VE_PRIMARY_FB_LINE_STRIDE_CHROMA(ALIGN(width, 16) / 2); + cedrus_write(dev, VE_PRIMARY_FB_LINE_STRIDE, reg); + + break; + case V4L2_PIX_FMT_NV12_32L32: + default: + reg = VE_PRIMARY_OUT_FMT_TILED_32_NV12; + cedrus_write(dev, VE_PRIMARY_OUT_FMT, reg); + + reg = VE_SECONDARY_OUT_FMT_TILED_32_NV12; + cedrus_write(dev, VE_CHROMA_BUF_LEN, reg); + + break; + } +} + +static irqreturn_t cedrus_irq(int irq, void *data) +{ + struct cedrus_dev *dev = data; + struct cedrus_ctx *ctx; + enum vb2_buffer_state state; + enum cedrus_irq_status status; + + /* + * If cancel_delayed_work returns false it means watchdog already + * executed and finished the job. + */ + if (!cancel_delayed_work(&dev->watchdog_work)) + return IRQ_HANDLED; + + ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev); + if (!ctx) { + v4l2_err(&dev->v4l2_dev, + "Instance released before the end of transaction\n"); + return IRQ_NONE; + } + + status = dev->dec_ops[ctx->current_codec]->irq_status(ctx); + if (status == CEDRUS_IRQ_NONE) + return IRQ_NONE; + + dev->dec_ops[ctx->current_codec]->irq_disable(ctx); + dev->dec_ops[ctx->current_codec]->irq_clear(ctx); + + if (status == CEDRUS_IRQ_ERROR) + state = VB2_BUF_STATE_ERROR; + else + state = VB2_BUF_STATE_DONE; + + v4l2_m2m_buf_done_and_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx, + state); + + return IRQ_HANDLED; +} + +void cedrus_watchdog(struct work_struct *work) +{ + struct cedrus_dev *dev; + struct cedrus_ctx *ctx; + + dev = container_of(to_delayed_work(work), + struct cedrus_dev, watchdog_work); + + ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev); + if (!ctx) + return; + + v4l2_err(&dev->v4l2_dev, "frame processing timed out!\n"); + reset_control_reset(dev->rstc); + v4l2_m2m_buf_done_and_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx, + VB2_BUF_STATE_ERROR); +} + +int cedrus_hw_suspend(struct device *device) +{ + struct cedrus_dev *dev = dev_get_drvdata(device); + + clk_disable_unprepare(dev->ram_clk); + clk_disable_unprepare(dev->mod_clk); + clk_disable_unprepare(dev->ahb_clk); + + reset_control_assert(dev->rstc); + + return 0; +} + +int cedrus_hw_resume(struct device *device) +{ + struct cedrus_dev *dev = dev_get_drvdata(device); + int ret; + + ret = reset_control_reset(dev->rstc); + if (ret) { + dev_err(dev->dev, "Failed to apply reset\n"); + + return ret; + } + + ret = clk_prepare_enable(dev->ahb_clk); + if (ret) { + dev_err(dev->dev, "Failed to enable AHB clock\n"); + + goto err_rst; + } + + ret = clk_prepare_enable(dev->mod_clk); + if (ret) { + dev_err(dev->dev, "Failed to enable MOD clock\n"); + + goto err_ahb_clk; + } + + ret = clk_prepare_enable(dev->ram_clk); + if (ret) { + dev_err(dev->dev, "Failed to enable RAM clock\n"); + + goto err_mod_clk; + } + + return 0; + +err_mod_clk: + clk_disable_unprepare(dev->mod_clk); +err_ahb_clk: + clk_disable_unprepare(dev->ahb_clk); +err_rst: + reset_control_assert(dev->rstc); + + return ret; +} + +int cedrus_hw_probe(struct cedrus_dev *dev) +{ + const struct cedrus_variant *variant; + int irq_dec; + int ret; + + variant = of_device_get_match_data(dev->dev); + if (!variant) + return -EINVAL; + + dev->capabilities = variant->capabilities; + + irq_dec = platform_get_irq(dev->pdev, 0); + if (irq_dec <= 0) + return irq_dec; + ret = devm_request_irq(dev->dev, irq_dec, cedrus_irq, + 0, dev_name(dev->dev), dev); + if (ret) { + dev_err(dev->dev, "Failed to request IRQ\n"); + + return ret; + } + + ret = of_reserved_mem_device_init(dev->dev); + if (ret && ret != -ENODEV) { + dev_err(dev->dev, "Failed to reserve memory\n"); + + return ret; + } + + ret = sunxi_sram_claim(dev->dev); + if (ret) { + dev_err(dev->dev, "Failed to claim SRAM\n"); + + goto err_mem; + } + + dev->ahb_clk = devm_clk_get(dev->dev, "ahb"); + if (IS_ERR(dev->ahb_clk)) { + dev_err(dev->dev, "Failed to get AHB clock\n"); + + ret = PTR_ERR(dev->ahb_clk); + goto err_sram; + } + + dev->mod_clk = devm_clk_get(dev->dev, "mod"); + if (IS_ERR(dev->mod_clk)) { + dev_err(dev->dev, "Failed to get MOD clock\n"); + + ret = PTR_ERR(dev->mod_clk); + goto err_sram; + } + + dev->ram_clk = devm_clk_get(dev->dev, "ram"); + if (IS_ERR(dev->ram_clk)) { + dev_err(dev->dev, "Failed to get RAM clock\n"); + + ret = PTR_ERR(dev->ram_clk); + goto err_sram; + } + + dev->rstc = devm_reset_control_get(dev->dev, NULL); + if (IS_ERR(dev->rstc)) { + dev_err(dev->dev, "Failed to get reset control\n"); + + ret = PTR_ERR(dev->rstc); + goto err_sram; + } + + dev->base = devm_platform_ioremap_resource(dev->pdev, 0); + if (IS_ERR(dev->base)) { + dev_err(dev->dev, "Failed to map registers\n"); + + ret = PTR_ERR(dev->base); + goto err_sram; + } + + ret = clk_set_rate(dev->mod_clk, variant->mod_rate); + if (ret) { + dev_err(dev->dev, "Failed to set clock rate\n"); + + goto err_sram; + } + + pm_runtime_enable(dev->dev); + if (!pm_runtime_enabled(dev->dev)) { + ret = cedrus_hw_resume(dev->dev); + if (ret) + goto err_pm; + } + + return 0; + +err_pm: + pm_runtime_disable(dev->dev); +err_sram: + sunxi_sram_release(dev->dev); +err_mem: + of_reserved_mem_device_release(dev->dev); + + return ret; +} + +void cedrus_hw_remove(struct cedrus_dev *dev) +{ + pm_runtime_disable(dev->dev); + if (!pm_runtime_status_suspended(dev->dev)) + cedrus_hw_suspend(dev->dev); + + sunxi_sram_release(dev->dev); + + of_reserved_mem_device_release(dev->dev); +} diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.h b/drivers/staging/media/sunxi/cedrus/cedrus_hw.h new file mode 100644 index 000000000..7c92f00e3 --- /dev/null +++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Cedrus VPU driver + * + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com> + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com> + * Copyright (C) 2018 Bootlin + * + * Based on the vim2m driver, that is: + * + * Copyright (c) 2009-2010 Samsung Electronics Co., Ltd. + * Pawel Osciak, <pawel@osciak.com> + * Marek Szyprowski, <m.szyprowski@samsung.com> + */ + +#ifndef _CEDRUS_HW_H_ +#define _CEDRUS_HW_H_ + +int cedrus_engine_enable(struct cedrus_ctx *ctx, enum cedrus_codec codec); +void cedrus_engine_disable(struct cedrus_dev *dev); + +void cedrus_dst_format_set(struct cedrus_dev *dev, + struct v4l2_pix_format *fmt); + +int cedrus_hw_suspend(struct device *device); +int cedrus_hw_resume(struct device *device); + +int cedrus_hw_probe(struct cedrus_dev *dev); +void cedrus_hw_remove(struct cedrus_dev *dev); + +void cedrus_watchdog(struct work_struct *work); + +#endif diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c b/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c new file mode 100644 index 000000000..c1128d2cd --- /dev/null +++ b/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Cedrus VPU driver + * + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com> + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com> + * Copyright (C) 2018 Bootlin + */ + +#include <media/videobuf2-dma-contig.h> + +#include "cedrus.h" +#include "cedrus_hw.h" +#include "cedrus_regs.h" + +static enum cedrus_irq_status cedrus_mpeg2_irq_status(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + u32 reg; + + reg = cedrus_read(dev, VE_DEC_MPEG_STATUS); + reg &= VE_DEC_MPEG_STATUS_CHECK_MASK; + + if (!reg) + return CEDRUS_IRQ_NONE; + + if (reg & VE_DEC_MPEG_STATUS_CHECK_ERROR || + !(reg & VE_DEC_MPEG_STATUS_SUCCESS)) + return CEDRUS_IRQ_ERROR; + + return CEDRUS_IRQ_OK; +} + +static void cedrus_mpeg2_irq_clear(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + + cedrus_write(dev, VE_DEC_MPEG_STATUS, VE_DEC_MPEG_STATUS_CHECK_MASK); +} + +static void cedrus_mpeg2_irq_disable(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + u32 reg = cedrus_read(dev, VE_DEC_MPEG_CTRL); + + reg &= ~VE_DEC_MPEG_CTRL_IRQ_MASK; + + cedrus_write(dev, VE_DEC_MPEG_CTRL, reg); +} + +static int cedrus_mpeg2_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) +{ + const struct v4l2_ctrl_mpeg2_sequence *seq; + const struct v4l2_ctrl_mpeg2_picture *pic; + const struct v4l2_ctrl_mpeg2_quantisation *quantisation; + dma_addr_t src_buf_addr, dst_luma_addr, dst_chroma_addr; + struct cedrus_dev *dev = ctx->dev; + struct vb2_queue *vq; + const u8 *matrix; + unsigned int i; + u32 reg; + + seq = run->mpeg2.sequence; + pic = run->mpeg2.picture; + + quantisation = run->mpeg2.quantisation; + + /* Activate MPEG engine. */ + cedrus_engine_enable(ctx, CEDRUS_CODEC_MPEG2); + + /* Set intra quantisation matrix. */ + matrix = quantisation->intra_quantiser_matrix; + for (i = 0; i < 64; i++) { + reg = VE_DEC_MPEG_IQMINPUT_WEIGHT(i, matrix[i]); + reg |= VE_DEC_MPEG_IQMINPUT_FLAG_INTRA; + + cedrus_write(dev, VE_DEC_MPEG_IQMINPUT, reg); + } + + /* Set non-intra quantisation matrix. */ + matrix = quantisation->non_intra_quantiser_matrix; + for (i = 0; i < 64; i++) { + reg = VE_DEC_MPEG_IQMINPUT_WEIGHT(i, matrix[i]); + reg |= VE_DEC_MPEG_IQMINPUT_FLAG_NON_INTRA; + + cedrus_write(dev, VE_DEC_MPEG_IQMINPUT, reg); + } + + /* Set MPEG picture header. */ + + reg = VE_DEC_MPEG_MP12HDR_SLICE_TYPE(pic->picture_coding_type); + reg |= VE_DEC_MPEG_MP12HDR_F_CODE(0, 0, pic->f_code[0][0]); + reg |= VE_DEC_MPEG_MP12HDR_F_CODE(0, 1, pic->f_code[0][1]); + reg |= VE_DEC_MPEG_MP12HDR_F_CODE(1, 0, pic->f_code[1][0]); + reg |= VE_DEC_MPEG_MP12HDR_F_CODE(1, 1, pic->f_code[1][1]); + reg |= VE_DEC_MPEG_MP12HDR_INTRA_DC_PRECISION(pic->intra_dc_precision); + reg |= VE_DEC_MPEG_MP12HDR_INTRA_PICTURE_STRUCTURE(pic->picture_structure); + reg |= VE_DEC_MPEG_MP12HDR_TOP_FIELD_FIRST(pic->flags & V4L2_MPEG2_PIC_FLAG_TOP_FIELD_FIRST); + reg |= VE_DEC_MPEG_MP12HDR_FRAME_PRED_FRAME_DCT(pic->flags & V4L2_MPEG2_PIC_FLAG_FRAME_PRED_DCT); + reg |= VE_DEC_MPEG_MP12HDR_CONCEALMENT_MOTION_VECTORS(pic->flags & V4L2_MPEG2_PIC_FLAG_CONCEALMENT_MV); + reg |= VE_DEC_MPEG_MP12HDR_Q_SCALE_TYPE(pic->flags & V4L2_MPEG2_PIC_FLAG_Q_SCALE_TYPE); + reg |= VE_DEC_MPEG_MP12HDR_INTRA_VLC_FORMAT(pic->flags & V4L2_MPEG2_PIC_FLAG_INTRA_VLC); + reg |= VE_DEC_MPEG_MP12HDR_ALTERNATE_SCAN(pic->flags & V4L2_MPEG2_PIC_FLAG_ALT_SCAN); + reg |= VE_DEC_MPEG_MP12HDR_FULL_PEL_FORWARD_VECTOR(0); + reg |= VE_DEC_MPEG_MP12HDR_FULL_PEL_BACKWARD_VECTOR(0); + + cedrus_write(dev, VE_DEC_MPEG_MP12HDR, reg); + + /* Set frame dimensions. */ + + reg = VE_DEC_MPEG_PICCODEDSIZE_WIDTH(seq->horizontal_size); + reg |= VE_DEC_MPEG_PICCODEDSIZE_HEIGHT(seq->vertical_size); + + cedrus_write(dev, VE_DEC_MPEG_PICCODEDSIZE, reg); + + reg = VE_DEC_MPEG_PICBOUNDSIZE_WIDTH(ctx->src_fmt.width); + reg |= VE_DEC_MPEG_PICBOUNDSIZE_HEIGHT(ctx->src_fmt.height); + + cedrus_write(dev, VE_DEC_MPEG_PICBOUNDSIZE, reg); + + /* Forward and backward prediction reference buffers. */ + vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); + + cedrus_write_ref_buf_addr(ctx, vq, pic->forward_ref_ts, + VE_DEC_MPEG_FWD_REF_LUMA_ADDR, + VE_DEC_MPEG_FWD_REF_CHROMA_ADDR); + cedrus_write_ref_buf_addr(ctx, vq, pic->backward_ref_ts, + VE_DEC_MPEG_BWD_REF_LUMA_ADDR, + VE_DEC_MPEG_BWD_REF_CHROMA_ADDR); + + /* Destination luma and chroma buffers. */ + + dst_luma_addr = cedrus_dst_buf_addr(ctx, &run->dst->vb2_buf, 0); + dst_chroma_addr = cedrus_dst_buf_addr(ctx, &run->dst->vb2_buf, 1); + + cedrus_write(dev, VE_DEC_MPEG_REC_LUMA, dst_luma_addr); + cedrus_write(dev, VE_DEC_MPEG_REC_CHROMA, dst_chroma_addr); + + /* Source offset and length in bits. */ + + cedrus_write(dev, VE_DEC_MPEG_VLD_OFFSET, 0); + + reg = vb2_get_plane_payload(&run->src->vb2_buf, 0) * 8; + cedrus_write(dev, VE_DEC_MPEG_VLD_LEN, reg); + + /* Source beginning and end addresses. */ + + src_buf_addr = vb2_dma_contig_plane_dma_addr(&run->src->vb2_buf, 0); + + reg = VE_DEC_MPEG_VLD_ADDR_BASE(src_buf_addr); + reg |= VE_DEC_MPEG_VLD_ADDR_VALID_PIC_DATA; + reg |= VE_DEC_MPEG_VLD_ADDR_LAST_PIC_DATA; + reg |= VE_DEC_MPEG_VLD_ADDR_FIRST_PIC_DATA; + + cedrus_write(dev, VE_DEC_MPEG_VLD_ADDR, reg); + + reg = src_buf_addr + vb2_get_plane_payload(&run->src->vb2_buf, 0); + cedrus_write(dev, VE_DEC_MPEG_VLD_END_ADDR, reg); + + /* Macroblock address: start at the beginning. */ + reg = VE_DEC_MPEG_MBADDR_Y(0) | VE_DEC_MPEG_MBADDR_X(0); + cedrus_write(dev, VE_DEC_MPEG_MBADDR, reg); + + /* Clear previous errors. */ + cedrus_write(dev, VE_DEC_MPEG_ERROR, 0); + + /* Clear correct macroblocks register. */ + cedrus_write(dev, VE_DEC_MPEG_CRTMBADDR, 0); + + /* Enable appropriate interruptions and components. */ + + reg = VE_DEC_MPEG_CTRL_IRQ_MASK | VE_DEC_MPEG_CTRL_MC_NO_WRITEBACK | + VE_DEC_MPEG_CTRL_MC_CACHE_EN; + + cedrus_write(dev, VE_DEC_MPEG_CTRL, reg); + + return 0; +} + +static void cedrus_mpeg2_trigger(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + u32 reg; + + /* Trigger MPEG engine. */ + reg = VE_DEC_MPEG_TRIGGER_HW_MPEG_VLD | VE_DEC_MPEG_TRIGGER_MPEG2 | + VE_DEC_MPEG_TRIGGER_MB_BOUNDARY; + + cedrus_write(dev, VE_DEC_MPEG_TRIGGER, reg); +} + +struct cedrus_dec_ops cedrus_dec_ops_mpeg2 = { + .irq_clear = cedrus_mpeg2_irq_clear, + .irq_disable = cedrus_mpeg2_irq_disable, + .irq_status = cedrus_mpeg2_irq_status, + .setup = cedrus_mpeg2_setup, + .trigger = cedrus_mpeg2_trigger, +}; diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_regs.h b/drivers/staging/media/sunxi/cedrus/cedrus_regs.h new file mode 100644 index 000000000..655c05b38 --- /dev/null +++ b/drivers/staging/media/sunxi/cedrus/cedrus_regs.h @@ -0,0 +1,701 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Cedrus VPU driver + * + * Copyright (c) 2013-2016 Jens Kuske <jenskuske@gmail.com> + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com> + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com> + */ + +#ifndef _CEDRUS_REGS_H_ +#define _CEDRUS_REGS_H_ + +#define SHIFT_AND_MASK_BITS(v, h, l) \ + (((unsigned long)(v) << (l)) & GENMASK(h, l)) + +/* + * Common acronyms and contractions used in register descriptions: + * * VLD : Variable-Length Decoder + * * IQ: Inverse Quantization + * * IDCT: Inverse Discrete Cosine Transform + * * MC: Motion Compensation + * * STCD: Start Code Detect + * * SDRT: Scale Down and Rotate + * * WB: Writeback + * * BITS/BS: Bitstream + * * MB: Macroblock + * * CTU: Coding Tree Unit + * * CTB: Coding Tree Block + * * IDX: Index + */ + +#define VE_ENGINE_DEC_MPEG 0x100 +#define VE_ENGINE_DEC_H264 0x200 +#define VE_ENGINE_DEC_H265 0x500 + +#define VE_MODE 0x00 + +#define VE_MODE_PIC_WIDTH_IS_4096 BIT(22) +#define VE_MODE_PIC_WIDTH_MORE_2048 BIT(21) +#define VE_MODE_REC_WR_MODE_2MB (0x01 << 20) +#define VE_MODE_REC_WR_MODE_1MB (0x00 << 20) +#define VE_MODE_DDR_MODE_BW_128 (0x03 << 16) +#define VE_MODE_DDR_MODE_BW_256 (0x02 << 16) +#define VE_MODE_DISABLED (0x07 << 0) +#define VE_MODE_DEC_H265 (0x04 << 0) +#define VE_MODE_DEC_H264 (0x01 << 0) +#define VE_MODE_DEC_MPEG (0x00 << 0) + +#define VE_BUF_CTRL 0x50 + +#define VE_BUF_CTRL_INTRAPRED_EXT_RAM (0x02 << 2) +#define VE_BUF_CTRL_INTRAPRED_MIXED_RAM (0x01 << 2) +#define VE_BUF_CTRL_INTRAPRED_INT_SRAM (0x00 << 2) +#define VE_BUF_CTRL_DBLK_EXT_RAM (0x02 << 0) +#define VE_BUF_CTRL_DBLK_MIXED_RAM (0x01 << 0) +#define VE_BUF_CTRL_DBLK_INT_SRAM (0x00 << 0) + +#define VE_DBLK_DRAM_BUF_ADDR 0x54 +#define VE_INTRAPRED_DRAM_BUF_ADDR 0x58 +#define VE_PRIMARY_CHROMA_BUF_LEN 0xc4 +#define VE_PRIMARY_FB_LINE_STRIDE 0xc8 + +#define VE_PRIMARY_FB_LINE_STRIDE_CHROMA(s) SHIFT_AND_MASK_BITS(s, 31, 16) +#define VE_PRIMARY_FB_LINE_STRIDE_LUMA(s) SHIFT_AND_MASK_BITS(s, 15, 0) + +#define VE_CHROMA_BUF_LEN 0xe8 + +#define VE_SECONDARY_OUT_FMT_TILED_32_NV12 (0x00 << 30) +#define VE_SECONDARY_OUT_FMT_EXT (0x01 << 30) +#define VE_SECONDARY_OUT_FMT_YU12 (0x02 << 30) +#define VE_SECONDARY_OUT_FMT_YV12 (0x03 << 30) +#define VE_CHROMA_BUF_LEN_SDRT(l) SHIFT_AND_MASK_BITS(l, 27, 0) + +#define VE_PRIMARY_OUT_FMT 0xec + +#define VE_PRIMARY_OUT_FMT_TILED_32_NV12 (0x00 << 4) +#define VE_PRIMARY_OUT_FMT_TILED_128_NV12 (0x01 << 4) +#define VE_PRIMARY_OUT_FMT_YU12 (0x02 << 4) +#define VE_PRIMARY_OUT_FMT_YV12 (0x03 << 4) +#define VE_PRIMARY_OUT_FMT_NV12 (0x04 << 4) +#define VE_PRIMARY_OUT_FMT_NV21 (0x05 << 4) +#define VE_SECONDARY_OUT_FMT_EXT_TILED_32_NV12 (0x00 << 0) +#define VE_SECONDARY_OUT_FMT_EXT_TILED_128_NV12 (0x01 << 0) +#define VE_SECONDARY_OUT_FMT_EXT_YU12 (0x02 << 0) +#define VE_SECONDARY_OUT_FMT_EXT_YV12 (0x03 << 0) +#define VE_SECONDARY_OUT_FMT_EXT_NV12 (0x04 << 0) +#define VE_SECONDARY_OUT_FMT_EXT_NV21 (0x05 << 0) + +#define VE_VERSION 0xf0 + +#define VE_VERSION_SHIFT 16 + +#define VE_DEC_MPEG_MP12HDR (VE_ENGINE_DEC_MPEG + 0x00) + +#define VE_DEC_MPEG_MP12HDR_SLICE_TYPE(t) SHIFT_AND_MASK_BITS(t, 30, 28) +#define VE_DEC_MPEG_MP12HDR_F_CODE_SHIFT(x, y) (24 - 4 * (y) - 8 * (x)) +#define VE_DEC_MPEG_MP12HDR_F_CODE(__x, __y, __v) \ + (((unsigned long)(__v) & GENMASK(3, 0)) << VE_DEC_MPEG_MP12HDR_F_CODE_SHIFT(__x, __y)) + +#define VE_DEC_MPEG_MP12HDR_INTRA_DC_PRECISION(p) \ + SHIFT_AND_MASK_BITS(p, 11, 10) +#define VE_DEC_MPEG_MP12HDR_INTRA_PICTURE_STRUCTURE(s) \ + SHIFT_AND_MASK_BITS(s, 9, 8) +#define VE_DEC_MPEG_MP12HDR_TOP_FIELD_FIRST(v) \ + ((v) ? BIT(7) : 0) +#define VE_DEC_MPEG_MP12HDR_FRAME_PRED_FRAME_DCT(v) \ + ((v) ? BIT(6) : 0) +#define VE_DEC_MPEG_MP12HDR_CONCEALMENT_MOTION_VECTORS(v) \ + ((v) ? BIT(5) : 0) +#define VE_DEC_MPEG_MP12HDR_Q_SCALE_TYPE(v) \ + ((v) ? BIT(4) : 0) +#define VE_DEC_MPEG_MP12HDR_INTRA_VLC_FORMAT(v) \ + ((v) ? BIT(3) : 0) +#define VE_DEC_MPEG_MP12HDR_ALTERNATE_SCAN(v) \ + ((v) ? BIT(2) : 0) +#define VE_DEC_MPEG_MP12HDR_FULL_PEL_FORWARD_VECTOR(v) \ + ((v) ? BIT(1) : 0) +#define VE_DEC_MPEG_MP12HDR_FULL_PEL_BACKWARD_VECTOR(v) \ + ((v) ? BIT(0) : 0) + +#define VE_DEC_MPEG_PICCODEDSIZE (VE_ENGINE_DEC_MPEG + 0x08) + +#define VE_DEC_MPEG_PICCODEDSIZE_WIDTH(w) \ + SHIFT_AND_MASK_BITS(DIV_ROUND_UP(w, 16), 15, 8) +#define VE_DEC_MPEG_PICCODEDSIZE_HEIGHT(h) \ + SHIFT_AND_MASK_BITS(DIV_ROUND_UP(h, 16), 7, 0) + +#define VE_DEC_MPEG_PICBOUNDSIZE (VE_ENGINE_DEC_MPEG + 0x0c) + +#define VE_DEC_MPEG_PICBOUNDSIZE_WIDTH(w) SHIFT_AND_MASK_BITS(w, 27, 16) +#define VE_DEC_MPEG_PICBOUNDSIZE_HEIGHT(h) SHIFT_AND_MASK_BITS(h, 11, 0) + +#define VE_DEC_MPEG_MBADDR (VE_ENGINE_DEC_MPEG + 0x10) + +#define VE_DEC_MPEG_MBADDR_X(w) SHIFT_AND_MASK_BITS(w, 15, 8) +#define VE_DEC_MPEG_MBADDR_Y(h) SHIFT_AND_MASK_BITS(h, 7, 0) + +#define VE_DEC_MPEG_CTRL (VE_ENGINE_DEC_MPEG + 0x14) + +#define VE_DEC_MPEG_CTRL_MC_CACHE_EN BIT(31) +#define VE_DEC_MPEG_CTRL_SW_VLD BIT(27) +#define VE_DEC_MPEG_CTRL_SW_IQ_IS BIT(17) +#define VE_DEC_MPEG_CTRL_QP_AC_DC_OUT_EN BIT(14) +#define VE_DEC_MPEG_CTRL_ROTATE_SCALE_OUT_EN BIT(8) +#define VE_DEC_MPEG_CTRL_MC_NO_WRITEBACK BIT(7) +#define VE_DEC_MPEG_CTRL_ROTATE_IRQ_EN BIT(6) +#define VE_DEC_MPEG_CTRL_VLD_DATA_REQ_IRQ_EN BIT(5) +#define VE_DEC_MPEG_CTRL_ERROR_IRQ_EN BIT(4) +#define VE_DEC_MPEG_CTRL_FINISH_IRQ_EN BIT(3) +#define VE_DEC_MPEG_CTRL_IRQ_MASK \ + (VE_DEC_MPEG_CTRL_FINISH_IRQ_EN | VE_DEC_MPEG_CTRL_ERROR_IRQ_EN | \ + VE_DEC_MPEG_CTRL_VLD_DATA_REQ_IRQ_EN) + +#define VE_DEC_MPEG_TRIGGER (VE_ENGINE_DEC_MPEG + 0x18) + +#define VE_DEC_MPEG_TRIGGER_MB_BOUNDARY BIT(31) + +#define VE_DEC_MPEG_TRIGGER_CHROMA_FMT_420 (0x00 << 27) +#define VE_DEC_MPEG_TRIGGER_CHROMA_FMT_411 (0x01 << 27) +#define VE_DEC_MPEG_TRIGGER_CHROMA_FMT_422 (0x02 << 27) +#define VE_DEC_MPEG_TRIGGER_CHROMA_FMT_444 (0x03 << 27) +#define VE_DEC_MPEG_TRIGGER_CHROMA_FMT_422T (0x04 << 27) + +#define VE_DEC_MPEG_TRIGGER_MPEG1 (0x01 << 24) +#define VE_DEC_MPEG_TRIGGER_MPEG2 (0x02 << 24) +#define VE_DEC_MPEG_TRIGGER_JPEG (0x03 << 24) +#define VE_DEC_MPEG_TRIGGER_MPEG4 (0x04 << 24) +#define VE_DEC_MPEG_TRIGGER_VP62 (0x05 << 24) + +#define VE_DEC_MPEG_TRIGGER_VP62_AC_GET_BITS BIT(7) + +#define VE_DEC_MPEG_TRIGGER_STCD_VC1 (0x02 << 4) +#define VE_DEC_MPEG_TRIGGER_STCD_MPEG2 (0x01 << 4) +#define VE_DEC_MPEG_TRIGGER_STCD_AVC (0x00 << 4) + +#define VE_DEC_MPEG_TRIGGER_HW_MPEG_VLD (0x0f << 0) +#define VE_DEC_MPEG_TRIGGER_HW_JPEG_VLD (0x0e << 0) +#define VE_DEC_MPEG_TRIGGER_HW_MB (0x0d << 0) +#define VE_DEC_MPEG_TRIGGER_HW_ROTATE (0x0c << 0) +#define VE_DEC_MPEG_TRIGGER_HW_VP6_VLD (0x0b << 0) +#define VE_DEC_MPEG_TRIGGER_HW_MAF (0x0a << 0) +#define VE_DEC_MPEG_TRIGGER_HW_STCD_END (0x09 << 0) +#define VE_DEC_MPEG_TRIGGER_HW_STCD_BEGIN (0x08 << 0) +#define VE_DEC_MPEG_TRIGGER_SW_MC (0x07 << 0) +#define VE_DEC_MPEG_TRIGGER_SW_IQ (0x06 << 0) +#define VE_DEC_MPEG_TRIGGER_SW_IDCT (0x05 << 0) +#define VE_DEC_MPEG_TRIGGER_SW_SCALE (0x04 << 0) +#define VE_DEC_MPEG_TRIGGER_SW_VP6 (0x03 << 0) +#define VE_DEC_MPEG_TRIGGER_SW_VP62_AC_GET_BITS (0x02 << 0) + +#define VE_DEC_MPEG_STATUS (VE_ENGINE_DEC_MPEG + 0x1c) + +#define VE_DEC_MPEG_STATUS_START_DETECT_BUSY BIT(27) +#define VE_DEC_MPEG_STATUS_VP6_BIT BIT(26) +#define VE_DEC_MPEG_STATUS_VP6_BIT_BUSY BIT(25) +#define VE_DEC_MPEG_STATUS_MAF_BUSY BIT(23) +#define VE_DEC_MPEG_STATUS_VP6_MVP_BUSY BIT(22) +#define VE_DEC_MPEG_STATUS_JPEG_BIT_END BIT(21) +#define VE_DEC_MPEG_STATUS_JPEG_RESTART_ERROR BIT(20) +#define VE_DEC_MPEG_STATUS_JPEG_MARKER BIT(19) +#define VE_DEC_MPEG_STATUS_ROTATE_BUSY BIT(18) +#define VE_DEC_MPEG_STATUS_DEBLOCKING_BUSY BIT(17) +#define VE_DEC_MPEG_STATUS_SCALE_DOWN_BUSY BIT(16) +#define VE_DEC_MPEG_STATUS_IQIS_BUF_EMPTY BIT(15) +#define VE_DEC_MPEG_STATUS_IDCT_BUF_EMPTY BIT(14) +#define VE_DEC_MPEG_STATUS_VE_BUSY BIT(13) +#define VE_DEC_MPEG_STATUS_MC_BUSY BIT(12) +#define VE_DEC_MPEG_STATUS_IDCT_BUSY BIT(11) +#define VE_DEC_MPEG_STATUS_IQIS_BUSY BIT(10) +#define VE_DEC_MPEG_STATUS_DCAC_BUSY BIT(9) +#define VE_DEC_MPEG_STATUS_VLD_BUSY BIT(8) +#define VE_DEC_MPEG_STATUS_ROTATE_SUCCESS BIT(3) +#define VE_DEC_MPEG_STATUS_VLD_DATA_REQ BIT(2) +#define VE_DEC_MPEG_STATUS_ERROR BIT(1) +#define VE_DEC_MPEG_STATUS_SUCCESS BIT(0) +#define VE_DEC_MPEG_STATUS_CHECK_MASK \ + (VE_DEC_MPEG_STATUS_SUCCESS | VE_DEC_MPEG_STATUS_ERROR | \ + VE_DEC_MPEG_STATUS_VLD_DATA_REQ) +#define VE_DEC_MPEG_STATUS_CHECK_ERROR \ + (VE_DEC_MPEG_STATUS_ERROR | VE_DEC_MPEG_STATUS_VLD_DATA_REQ) + +#define VE_DEC_MPEG_VLD_ADDR (VE_ENGINE_DEC_MPEG + 0x28) + +#define VE_DEC_MPEG_VLD_ADDR_FIRST_PIC_DATA BIT(30) +#define VE_DEC_MPEG_VLD_ADDR_LAST_PIC_DATA BIT(29) +#define VE_DEC_MPEG_VLD_ADDR_VALID_PIC_DATA BIT(28) +#define VE_DEC_MPEG_VLD_ADDR_BASE(a) \ + ({ \ + u32 _tmp = (a); \ + u32 _lo = _tmp & GENMASK(27, 4); \ + u32 _hi = (_tmp >> 28) & GENMASK(3, 0); \ + (_lo | _hi); \ + }) + +#define VE_DEC_MPEG_VLD_OFFSET (VE_ENGINE_DEC_MPEG + 0x2c) +#define VE_DEC_MPEG_VLD_LEN (VE_ENGINE_DEC_MPEG + 0x30) +#define VE_DEC_MPEG_VLD_END_ADDR (VE_ENGINE_DEC_MPEG + 0x34) + +#define VE_DEC_MPEG_REC_LUMA (VE_ENGINE_DEC_MPEG + 0x48) +#define VE_DEC_MPEG_REC_CHROMA (VE_ENGINE_DEC_MPEG + 0x4c) +#define VE_DEC_MPEG_FWD_REF_LUMA_ADDR (VE_ENGINE_DEC_MPEG + 0x50) +#define VE_DEC_MPEG_FWD_REF_CHROMA_ADDR (VE_ENGINE_DEC_MPEG + 0x54) +#define VE_DEC_MPEG_BWD_REF_LUMA_ADDR (VE_ENGINE_DEC_MPEG + 0x58) +#define VE_DEC_MPEG_BWD_REF_CHROMA_ADDR (VE_ENGINE_DEC_MPEG + 0x5c) + +#define VE_DEC_MPEG_IQMINPUT (VE_ENGINE_DEC_MPEG + 0x80) + +#define VE_DEC_MPEG_IQMINPUT_FLAG_INTRA (0x01 << 14) +#define VE_DEC_MPEG_IQMINPUT_FLAG_NON_INTRA (0x00 << 14) +#define VE_DEC_MPEG_IQMINPUT_WEIGHT(i, v) \ + (SHIFT_AND_MASK_BITS(i, 13, 8) | SHIFT_AND_MASK_BITS(v, 7, 0)) + +#define VE_DEC_MPEG_ERROR (VE_ENGINE_DEC_MPEG + 0xc4) +#define VE_DEC_MPEG_CRTMBADDR (VE_ENGINE_DEC_MPEG + 0xc8) +#define VE_DEC_MPEG_ROT_LUMA (VE_ENGINE_DEC_MPEG + 0xcc) +#define VE_DEC_MPEG_ROT_CHROMA (VE_ENGINE_DEC_MPEG + 0xd0) + +#define VE_DEC_H265_DEC_NAL_HDR (VE_ENGINE_DEC_H265 + 0x00) + +#define VE_DEC_H265_DEC_NAL_HDR_NUH_TEMPORAL_ID_PLUS1(v) \ + SHIFT_AND_MASK_BITS(v, 8, 6) +#define VE_DEC_H265_DEC_NAL_HDR_NAL_UNIT_TYPE(v) \ + SHIFT_AND_MASK_BITS(v, 5, 0) + +#define VE_DEC_H265_FLAG(reg_flag, ctrl_flag, flags) \ + (((flags) & (ctrl_flag)) ? reg_flag : 0) + +#define VE_DEC_H265_DEC_SPS_HDR (VE_ENGINE_DEC_H265 + 0x04) + +#define VE_DEC_H265_DEC_SPS_HDR_FLAG_STRONG_INTRA_SMOOTHING_ENABLE BIT(26) +#define VE_DEC_H265_DEC_SPS_HDR_FLAG_SPS_TEMPORAL_MVP_ENABLED BIT(25) +#define VE_DEC_H265_DEC_SPS_HDR_FLAG_SAMPLE_ADAPTIVE_OFFSET_ENABLED BIT(24) +#define VE_DEC_H265_DEC_SPS_HDR_FLAG_AMP_ENABLED BIT(23) +#define VE_DEC_H265_DEC_SPS_HDR_FLAG_SEPARATE_COLOUR_PLANE BIT(2) + +#define VE_DEC_H265_DEC_SPS_HDR_MAX_TRANSFORM_HIERARCHY_DEPTH_INTRA(v) \ + SHIFT_AND_MASK_BITS(v, 22, 20) +#define VE_DEC_H265_DEC_SPS_HDR_MAX_TRANSFORM_HIERARCHY_DEPTH_INTER(v) \ + SHIFT_AND_MASK_BITS(v, 19, 17) +#define VE_DEC_H265_DEC_SPS_HDR_LOG2_DIFF_MAX_MIN_TRANSFORM_BLOCK_SIZE(v) \ + SHIFT_AND_MASK_BITS(v, 16, 15) +#define VE_DEC_H265_DEC_SPS_HDR_LOG2_MIN_TRANSFORM_BLOCK_SIZE_MINUS2(v) \ + SHIFT_AND_MASK_BITS(v, 14, 13) +#define VE_DEC_H265_DEC_SPS_HDR_LOG2_DIFF_MAX_MIN_LUMA_CODING_BLOCK_SIZE(v) \ + SHIFT_AND_MASK_BITS(v, 12, 11) +#define VE_DEC_H265_DEC_SPS_HDR_LOG2_MIN_LUMA_CODING_BLOCK_SIZE_MINUS3(v) \ + SHIFT_AND_MASK_BITS(v, 10, 9) +#define VE_DEC_H265_DEC_SPS_HDR_BIT_DEPTH_CHROMA_MINUS8(v) \ + SHIFT_AND_MASK_BITS(v, 8, 6) +#define VE_DEC_H265_DEC_SPS_HDR_BIT_DEPTH_LUMA_MINUS8(v) \ + SHIFT_AND_MASK_BITS(v, 5, 3) +#define VE_DEC_H265_DEC_SPS_HDR_CHROMA_FORMAT_IDC(v) \ + SHIFT_AND_MASK_BITS(v, 1, 0) + +#define VE_DEC_H265_DEC_PIC_SIZE (VE_ENGINE_DEC_H265 + 0x08) + +#define VE_DEC_H265_DEC_PIC_SIZE_WIDTH(w) (((w) << 0) & GENMASK(13, 0)) +#define VE_DEC_H265_DEC_PIC_SIZE_HEIGHT(h) (((h) << 16) & GENMASK(29, 16)) + +#define VE_DEC_H265_DEC_PCM_CTRL (VE_ENGINE_DEC_H265 + 0x0c) + +#define VE_DEC_H265_DEC_PCM_CTRL_FLAG_PCM_ENABLED BIT(15) +#define VE_DEC_H265_DEC_PCM_CTRL_FLAG_PCM_LOOP_FILTER_DISABLED BIT(14) + +#define VE_DEC_H265_DEC_PCM_CTRL_LOG2_DIFF_MAX_MIN_PCM_LUMA_CODING_BLOCK_SIZE(v) \ + SHIFT_AND_MASK_BITS(v, 11, 10) +#define VE_DEC_H265_DEC_PCM_CTRL_LOG2_MIN_PCM_LUMA_CODING_BLOCK_SIZE_MINUS3(v) \ + SHIFT_AND_MASK_BITS(v, 9, 8) +#define VE_DEC_H265_DEC_PCM_CTRL_PCM_SAMPLE_BIT_DEPTH_CHROMA_MINUS1(v) \ + SHIFT_AND_MASK_BITS(v, 7, 4) +#define VE_DEC_H265_DEC_PCM_CTRL_PCM_SAMPLE_BIT_DEPTH_LUMA_MINUS1(v) \ + SHIFT_AND_MASK_BITS(v, 3, 0) + +#define VE_DEC_H265_DEC_PPS_CTRL0 (VE_ENGINE_DEC_H265 + 0x10) + +#define VE_DEC_H265_DEC_PPS_CTRL0_FLAG_CU_QP_DELTA_ENABLED BIT(3) +#define VE_DEC_H265_DEC_PPS_CTRL0_FLAG_TRANSFORM_SKIP_ENABLED BIT(2) +#define VE_DEC_H265_DEC_PPS_CTRL0_FLAG_CONSTRAINED_INTRA_PRED BIT(1) +#define VE_DEC_H265_DEC_PPS_CTRL0_FLAG_SIGN_DATA_HIDING_ENABLED BIT(0) + +#define VE_DEC_H265_DEC_PPS_CTRL0_PPS_CR_QP_OFFSET(v) \ + SHIFT_AND_MASK_BITS(v, 29, 24) +#define VE_DEC_H265_DEC_PPS_CTRL0_PPS_CB_QP_OFFSET(v) \ + SHIFT_AND_MASK_BITS(v, 21, 16) +#define VE_DEC_H265_DEC_PPS_CTRL0_INIT_QP_MINUS26(v) \ + SHIFT_AND_MASK_BITS(v, 14, 8) +#define VE_DEC_H265_DEC_PPS_CTRL0_DIFF_CU_QP_DELTA_DEPTH(v) \ + SHIFT_AND_MASK_BITS(v, 5, 4) + +#define VE_DEC_H265_DEC_PPS_CTRL1 (VE_ENGINE_DEC_H265 + 0x14) + +#define VE_DEC_H265_DEC_PPS_CTRL1_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED BIT(6) +#define VE_DEC_H265_DEC_PPS_CTRL1_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED BIT(5) +#define VE_DEC_H265_DEC_PPS_CTRL1_FLAG_ENTROPY_CODING_SYNC_ENABLED BIT(4) +#define VE_DEC_H265_DEC_PPS_CTRL1_FLAG_TILES_ENABLED BIT(3) +#define VE_DEC_H265_DEC_PPS_CTRL1_FLAG_TRANSQUANT_BYPASS_ENABLED BIT(2) +#define VE_DEC_H265_DEC_PPS_CTRL1_FLAG_WEIGHTED_BIPRED BIT(1) +#define VE_DEC_H265_DEC_PPS_CTRL1_FLAG_WEIGHTED_PRED BIT(0) + +#define VE_DEC_H265_DEC_PPS_CTRL1_LOG2_PARALLEL_MERGE_LEVEL_MINUS2(v) \ + SHIFT_AND_MASK_BITS(v, 10, 8) + +#define VE_DEC_H265_SCALING_LIST_CTRL0 (VE_ENGINE_DEC_H265 + 0x18) + +#define VE_DEC_H265_SCALING_LIST_CTRL0_FLAG_ENABLED BIT(31) + +#define VE_DEC_H265_SCALING_LIST_CTRL0_SRAM (0 << 30) +#define VE_DEC_H265_SCALING_LIST_CTRL0_DEFAULT (1 << 30) + +#define VE_DEC_H265_DEC_SLICE_HDR_INFO0 (VE_ENGINE_DEC_H265 + 0x20) + +#define VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_COLLOCATED_FROM_L0 BIT(11) +#define VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_CABAC_INIT BIT(10) +#define VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_MVD_L1_ZERO BIT(9) +#define VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_SLICE_SAO_CHROMA BIT(8) +#define VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_SLICE_SAO_LUMA BIT(7) +#define VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_SLICE_TEMPORAL_MVP_ENABLE BIT(6) +#define VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_DEPENDENT_SLICE_SEGMENT BIT(1) +#define VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_FIRST_SLICE_SEGMENT_IN_PIC BIT(0) + +#define VE_DEC_H265_DEC_SLICE_HDR_INFO0_PICTURE_TYPE(v) \ + SHIFT_AND_MASK_BITS(v, 29, 28) +#define VE_DEC_H265_DEC_SLICE_HDR_INFO0_FIVE_MINUS_MAX_NUM_MERGE_CAND(v) \ + SHIFT_AND_MASK_BITS(v, 26, 24) +#define VE_DEC_H265_DEC_SLICE_HDR_INFO0_NUM_REF_IDX_L1_ACTIVE_MINUS1(v) \ + SHIFT_AND_MASK_BITS(v, 23, 20) +#define VE_DEC_H265_DEC_SLICE_HDR_INFO0_NUM_REF_IDX_L0_ACTIVE_MINUS1(v) \ + SHIFT_AND_MASK_BITS(v, 19, 16) +#define VE_DEC_H265_DEC_SLICE_HDR_INFO0_COLLOCATED_REF_IDX(v) \ + SHIFT_AND_MASK_BITS(v, 15, 12) +#define VE_DEC_H265_DEC_SLICE_HDR_INFO0_COLOUR_PLANE_ID(v) \ + SHIFT_AND_MASK_BITS(v, 5, 4) +#define VE_DEC_H265_DEC_SLICE_HDR_INFO0_SLICE_TYPE(v) \ + SHIFT_AND_MASK_BITS(v, 3, 2) + +#define VE_DEC_H265_DEC_SLICE_HDR_INFO1 (VE_ENGINE_DEC_H265 + 0x24) + +#define VE_DEC_H265_DEC_SLICE_HDR_INFO1_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED BIT(23) +#define VE_DEC_H265_DEC_SLICE_HDR_INFO1_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED BIT(22) +#define VE_DEC_H265_DEC_SLICE_HDR_INFO1_FLAG_SLICE_NOT_LOW_DELAY BIT(21) + +#define VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_TC_OFFSET_DIV2(v) \ + SHIFT_AND_MASK_BITS(v, 31, 28) +#define VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_BETA_OFFSET_DIV2(v) \ + SHIFT_AND_MASK_BITS(v, 27, 24) +#define VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_CR_QP_OFFSET(v) \ + SHIFT_AND_MASK_BITS(v, 20, 16) +#define VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_CB_QP_OFFSET(v) \ + SHIFT_AND_MASK_BITS(v, 12, 8) +#define VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_QP_DELTA(v) \ + SHIFT_AND_MASK_BITS(v, 6, 0) + +#define VE_DEC_H265_DEC_SLICE_HDR_INFO2 (VE_ENGINE_DEC_H265 + 0x28) + +#define VE_DEC_H265_DEC_SLICE_HDR_INFO2_NUM_ENTRY_POINT_OFFSETS(v) \ + SHIFT_AND_MASK_BITS(v, 21, 8) +#define VE_DEC_H265_DEC_SLICE_HDR_INFO2_CHROMA_LOG2_WEIGHT_DENOM(v) \ + SHIFT_AND_MASK_BITS(v, 6, 4) +#define VE_DEC_H265_DEC_SLICE_HDR_INFO2_LUMA_LOG2_WEIGHT_DENOM(v) \ + SHIFT_AND_MASK_BITS(v, 2, 0) + +#define VE_DEC_H265_DEC_CTB_ADDR (VE_ENGINE_DEC_H265 + 0x2c) + +#define VE_DEC_H265_DEC_CTB_ADDR_Y(y) SHIFT_AND_MASK_BITS(y, 25, 16) +#define VE_DEC_H265_DEC_CTB_ADDR_X(x) SHIFT_AND_MASK_BITS(x, 9, 0) + +#define VE_DEC_H265_CTRL (VE_ENGINE_DEC_H265 + 0x30) + +#define VE_DEC_H265_CTRL_DDR_CONSISTENCY_EN BIT(31) +#define VE_DEC_H265_CTRL_STCD_EN BIT(25) +#define VE_DEC_H265_CTRL_EPTB_DEC_BYPASS_EN BIT(24) +#define VE_DEC_H265_CTRL_TQ_BYPASS_EN BIT(12) +#define VE_DEC_H265_CTRL_VLD_BYPASS_EN BIT(11) +#define VE_DEC_H265_CTRL_NCRI_CACHE_DISABLE BIT(10) +#define VE_DEC_H265_CTRL_ROTATE_SCALE_OUT_EN BIT(9) +#define VE_DEC_H265_CTRL_MC_NO_WRITEBACK BIT(8) +#define VE_DEC_H265_CTRL_VLD_DATA_REQ_IRQ_EN BIT(2) +#define VE_DEC_H265_CTRL_ERROR_IRQ_EN BIT(1) +#define VE_DEC_H265_CTRL_FINISH_IRQ_EN BIT(0) +#define VE_DEC_H265_CTRL_IRQ_MASK \ + (VE_DEC_H265_CTRL_FINISH_IRQ_EN | VE_DEC_H265_CTRL_ERROR_IRQ_EN | \ + VE_DEC_H265_CTRL_VLD_DATA_REQ_IRQ_EN) + +#define VE_DEC_H265_TRIGGER (VE_ENGINE_DEC_H265 + 0x34) + +#define VE_DEC_H265_TRIGGER_TYPE_N_BITS(x) (((x) & 0x3f) << 8) +#define VE_DEC_H265_TRIGGER_STCD_VC1 (0x02 << 4) +#define VE_DEC_H265_TRIGGER_STCD_AVS (0x01 << 4) +#define VE_DEC_H265_TRIGGER_STCD_HEVC (0x00 << 4) +#define VE_DEC_H265_TRIGGER_DEC_SLICE (0x08 << 0) +#define VE_DEC_H265_TRIGGER_INIT_SWDEC (0x07 << 0) +#define VE_DEC_H265_TRIGGER_BYTE_ALIGN (0x06 << 0) +#define VE_DEC_H265_TRIGGER_GET_VLCUE (0x05 << 0) +#define VE_DEC_H265_TRIGGER_GET_VLCSE (0x04 << 0) +#define VE_DEC_H265_TRIGGER_FLUSH_BITS (0x03 << 0) +#define VE_DEC_H265_TRIGGER_GET_BITS (0x02 << 0) +#define VE_DEC_H265_TRIGGER_SHOW_BITS (0x01 << 0) + +#define VE_DEC_H265_STATUS (VE_ENGINE_DEC_H265 + 0x38) + +#define VE_DEC_H265_STATUS_STCD BIT(24) +#define VE_DEC_H265_STATUS_STCD_BUSY BIT(21) +#define VE_DEC_H265_STATUS_WB_BUSY BIT(20) +#define VE_DEC_H265_STATUS_BS_DMA_BUSY BIT(19) +#define VE_DEC_H265_STATUS_IT_BUSY BIT(18) +#define VE_DEC_H265_STATUS_INTER_BUSY BIT(17) +#define VE_DEC_H265_STATUS_MORE_DATA BIT(16) +#define VE_DEC_H265_STATUS_DBLK_BUSY BIT(15) +#define VE_DEC_H265_STATUS_IREC_BUSY BIT(14) +#define VE_DEC_H265_STATUS_INTRA_BUSY BIT(13) +#define VE_DEC_H265_STATUS_MCRI_BUSY BIT(12) +#define VE_DEC_H265_STATUS_IQIT_BUSY BIT(11) +#define VE_DEC_H265_STATUS_MVP_BUSY BIT(10) +#define VE_DEC_H265_STATUS_IS_BUSY BIT(9) +#define VE_DEC_H265_STATUS_VLD_BUSY BIT(8) +#define VE_DEC_H265_STATUS_OVER_TIME BIT(3) +#define VE_DEC_H265_STATUS_VLD_DATA_REQ BIT(2) +#define VE_DEC_H265_STATUS_ERROR BIT(1) +#define VE_DEC_H265_STATUS_SUCCESS BIT(0) +#define VE_DEC_H265_STATUS_STCD_TYPE_MASK GENMASK(23, 22) +#define VE_DEC_H265_STATUS_CHECK_MASK \ + (VE_DEC_H265_STATUS_SUCCESS | VE_DEC_H265_STATUS_ERROR | \ + VE_DEC_H265_STATUS_VLD_DATA_REQ) +#define VE_DEC_H265_STATUS_CHECK_ERROR \ + (VE_DEC_H265_STATUS_ERROR | VE_DEC_H265_STATUS_VLD_DATA_REQ) + +#define VE_DEC_H265_DEC_CTB_NUM (VE_ENGINE_DEC_H265 + 0x3c) + +#define VE_DEC_H265_BITS_ADDR (VE_ENGINE_DEC_H265 + 0x40) + +#define VE_DEC_H265_BITS_ADDR_FIRST_SLICE_DATA BIT(30) +#define VE_DEC_H265_BITS_ADDR_LAST_SLICE_DATA BIT(29) +#define VE_DEC_H265_BITS_ADDR_VALID_SLICE_DATA BIT(28) +#define VE_DEC_H265_BITS_ADDR_BASE(a) (((a) >> 8) & GENMASK(27, 0)) + +#define VE_DEC_H265_BITS_OFFSET (VE_ENGINE_DEC_H265 + 0x44) +#define VE_DEC_H265_BITS_LEN (VE_ENGINE_DEC_H265 + 0x48) + +#define VE_DEC_H265_BITS_END_ADDR (VE_ENGINE_DEC_H265 + 0x4c) + +#define VE_DEC_H265_BITS_END_ADDR_BASE(a) ((a) >> 8) + +#define VE_DEC_H265_SDRT_CTRL (VE_ENGINE_DEC_H265 + 0x50) +#define VE_DEC_H265_SDRT_LUMA_ADDR (VE_ENGINE_DEC_H265 + 0x54) +#define VE_DEC_H265_SDRT_CHROMA_ADDR (VE_ENGINE_DEC_H265 + 0x58) + +#define VE_DEC_H265_OUTPUT_FRAME_IDX (VE_ENGINE_DEC_H265 + 0x5c) + +#define VE_DEC_H265_NEIGHBOR_INFO_ADDR (VE_ENGINE_DEC_H265 + 0x60) + +#define VE_DEC_H265_NEIGHBOR_INFO_ADDR_BASE(a) ((a) >> 8) + +#define VE_DEC_H265_ENTRY_POINT_OFFSET_ADDR (VE_ENGINE_DEC_H265 + 0x64) +#define VE_DEC_H265_TILE_START_CTB (VE_ENGINE_DEC_H265 + 0x68) +#define VE_DEC_H265_TILE_END_CTB (VE_ENGINE_DEC_H265 + 0x6c) +#define VE_DEC_H265_SCALING_LIST_DC_COEF0 (VE_ENGINE_DEC_H265 + 0x78) +#define VE_DEC_H265_SCALING_LIST_DC_COEF1 (VE_ENGINE_DEC_H265 + 0x7c) + +#define VE_DEC_H265_LOW_ADDR (VE_ENGINE_DEC_H265 + 0x80) + +#define VE_DEC_H265_LOW_ADDR_PRIMARY_CHROMA(a) \ + SHIFT_AND_MASK_BITS(a, 31, 24) +#define VE_DEC_H265_LOW_ADDR_SECONDARY_CHROMA(a) \ + SHIFT_AND_MASK_BITS(a, 23, 16) +#define VE_DEC_H265_LOW_ADDR_ENTRY_POINTS_BUF(a) \ + SHIFT_AND_MASK_BITS(a, 7, 0) + +#define VE_DEC_H265_BITS_READ (VE_ENGINE_DEC_H265 + 0xdc) + +#define VE_DEC_H265_SRAM_OFFSET (VE_ENGINE_DEC_H265 + 0xe0) + +#define VE_DEC_H265_SRAM_OFFSET_PRED_WEIGHT_LUMA_L0 0x00 +#define VE_DEC_H265_SRAM_OFFSET_PRED_WEIGHT_CHROMA_L0 0x20 +#define VE_DEC_H265_SRAM_OFFSET_PRED_WEIGHT_LUMA_L1 0x60 +#define VE_DEC_H265_SRAM_OFFSET_PRED_WEIGHT_CHROMA_L1 0x80 +#define VE_DEC_H265_SRAM_OFFSET_FRAME_INFO 0x400 +#define VE_DEC_H265_SRAM_OFFSET_FRAME_INFO_UNIT 0x20 +#define VE_DEC_H265_SRAM_OFFSET_SCALING_LISTS 0x800 +#define VE_DEC_H265_SRAM_OFFSET_REF_PIC_LIST0 0xc00 +#define VE_DEC_H265_SRAM_OFFSET_REF_PIC_LIST1 0xc10 + +#define VE_DEC_H265_SRAM_DATA (VE_ENGINE_DEC_H265 + 0xe4) + +#define VE_DEC_H265_SRAM_DATA_ADDR_BASE(a) ((a) >> 8) +#define VE_DEC_H265_SRAM_REF_PIC_LIST_LT_REF BIT(7) + +#define VE_H264_SPS 0x200 +#define VE_H264_SPS_MBS_ONLY BIT(18) +#define VE_H264_SPS_MB_ADAPTIVE_FRAME_FIELD BIT(17) +#define VE_H264_SPS_DIRECT_8X8_INFERENCE BIT(16) + +#define VE_H264_PPS 0x204 +#define VE_H264_PPS_ENTROPY_CODING_MODE BIT(15) +#define VE_H264_PPS_WEIGHTED_PRED BIT(4) +#define VE_H264_PPS_CONSTRAINED_INTRA_PRED BIT(1) +#define VE_H264_PPS_TRANSFORM_8X8_MODE BIT(0) + +#define VE_H264_SHS 0x208 +#define VE_H264_SHS_FIRST_SLICE_IN_PIC BIT(5) +#define VE_H264_SHS_FIELD_PIC BIT(4) +#define VE_H264_SHS_BOTTOM_FIELD BIT(3) +#define VE_H264_SHS_DIRECT_SPATIAL_MV_PRED BIT(2) + +#define VE_H264_SHS2 0x20c +#define VE_H264_SHS2_NUM_REF_IDX_ACTIVE_OVRD BIT(12) + +#define VE_H264_SHS_WP 0x210 + +#define VE_H264_SHS_QP 0x21c +#define VE_H264_SHS_QP_SCALING_MATRIX_DEFAULT BIT(24) + +#define VE_H264_CTRL 0x220 +#define VE_H264_CTRL_VP8 BIT(29) +#define VE_H264_CTRL_VLD_DATA_REQ_INT BIT(2) +#define VE_H264_CTRL_DECODE_ERR_INT BIT(1) +#define VE_H264_CTRL_SLICE_DECODE_INT BIT(0) + +#define VE_H264_CTRL_INT_MASK (VE_H264_CTRL_VLD_DATA_REQ_INT | \ + VE_H264_CTRL_DECODE_ERR_INT | \ + VE_H264_CTRL_SLICE_DECODE_INT) + +#define VE_H264_TRIGGER_TYPE 0x224 +#define VE_H264_TRIGGER_TYPE_PROBABILITY(x) SHIFT_AND_MASK_BITS(x, 31, 24) +#define VE_H264_TRIGGER_TYPE_BIN_LENS(x) SHIFT_AND_MASK_BITS((x) - 1, 18, 16) +#define VE_H264_TRIGGER_TYPE_N_BITS(x) (((x) & 0x3f) << 8) +#define VE_H264_TRIGGER_TYPE_VP8_GET_BITS (15 << 0) +#define VE_H264_TRIGGER_TYPE_VP8_UPDATE_COEF (14 << 0) +#define VE_H264_TRIGGER_TYPE_VP8_SLICE_DECODE (10 << 0) +#define VE_H264_TRIGGER_TYPE_AVC_SLICE_DECODE (8 << 0) +#define VE_H264_TRIGGER_TYPE_INIT_SWDEC (7 << 0) +#define VE_H264_TRIGGER_TYPE_FLUSH_BITS (3 << 0) + +#define VE_H264_STATUS 0x228 +#define VE_H264_STATUS_VLD_DATA_REQ_INT VE_H264_CTRL_VLD_DATA_REQ_INT +#define VE_H264_STATUS_DECODE_ERR_INT VE_H264_CTRL_DECODE_ERR_INT +#define VE_H264_STATUS_SLICE_DECODE_INT VE_H264_CTRL_SLICE_DECODE_INT +#define VE_H264_STATUS_VLD_BUSY BIT(8) +#define VE_H264_STATUS_VP8_UPPROB_BUSY BIT(17) + +#define VE_H264_STATUS_INT_MASK VE_H264_CTRL_INT_MASK + +#define VE_H264_CUR_MB_NUM 0x22c + +#define VE_H264_VLD_ADDR 0x230 +#define VE_H264_VLD_ADDR_FIRST BIT(30) +#define VE_H264_VLD_ADDR_LAST BIT(29) +#define VE_H264_VLD_ADDR_VALID BIT(28) +#define VE_H264_VLD_ADDR_VAL(x) (((x) & 0x0ffffff0) | ((x) >> 28)) + +#define VE_H264_VLD_OFFSET 0x234 +#define VE_H264_VLD_LEN 0x238 +#define VE_H264_VLD_END 0x23c +#define VE_H264_SDROT_CTRL 0x240 +#define VE_H264_OUTPUT_FRAME_IDX 0x24c +#define VE_H264_EXTRA_BUFFER1 0x250 +#define VE_H264_EXTRA_BUFFER2 0x254 +#define VE_H264_MB_ADDR 0x260 +#define VE_H264_ERROR_CASE 0x2b8 +#define VE_H264_BASIC_BITS 0x2dc +#define VE_AVC_SRAM_PORT_OFFSET 0x2e0 +#define VE_AVC_SRAM_PORT_DATA 0x2e4 + +#define VE_VP8_PPS 0x214 +#define VE_VP8_PPS_PIC_TYPE_P_FRAME BIT(31) +#define VE_VP8_PPS_LAST_SHARPNESS_LEVEL(v) SHIFT_AND_MASK_BITS(v, 30, 28) +#define VE_VP8_PPS_LAST_PIC_TYPE_P_FRAME BIT(27) +#define VE_VP8_PPS_ALTREF_SIGN_BIAS BIT(26) +#define VE_VP8_PPS_GOLDEN_SIGN_BIAS BIT(25) +#define VE_VP8_PPS_RELOAD_ENTROPY_PROBS BIT(24) +#define VE_VP8_PPS_REFRESH_ENTROPY_PROBS BIT(23) +#define VE_VP8_PPS_MB_NO_COEFF_SKIP BIT(22) +#define VE_VP8_PPS_TOKEN_PARTITION(v) SHIFT_AND_MASK_BITS(v, 21, 20) +#define VE_VP8_PPS_MODE_REF_LF_DELTA_UPDATE BIT(19) +#define VE_VP8_PPS_MODE_REF_LF_DELTA_ENABLE BIT(18) +#define VE_VP8_PPS_LOOP_FILTER_LEVEL(v) SHIFT_AND_MASK_BITS(v, 17, 12) +#define VE_VP8_PPS_LOOP_FILTER_SIMPLE BIT(11) +#define VE_VP8_PPS_SHARPNESS_LEVEL(v) SHIFT_AND_MASK_BITS(v, 10, 8) +#define VE_VP8_PPS_LAST_LOOP_FILTER_SIMPLE BIT(7) +#define VE_VP8_PPS_SEGMENTATION_ENABLE BIT(6) +#define VE_VP8_PPS_MB_SEGMENT_ABS_DELTA BIT(5) +#define VE_VP8_PPS_UPDATE_MB_SEGMENTATION_MAP BIT(4) +#define VE_VP8_PPS_FULL_PIXEL BIT(3) +#define VE_VP8_PPS_BILINEAR_MC_FILTER BIT(2) +#define VE_VP8_PPS_FILTER_TYPE_SIMPLE BIT(1) +#define VE_VP8_PPS_LPF_DISABLE BIT(0) + +#define VE_VP8_QP_INDEX_DELTA 0x218 +#define VE_VP8_QP_INDEX_DELTA_UVAC(v) SHIFT_AND_MASK_BITS(v, 31, 27) +#define VE_VP8_QP_INDEX_DELTA_UVDC(v) SHIFT_AND_MASK_BITS(v, 26, 22) +#define VE_VP8_QP_INDEX_DELTA_Y2AC(v) SHIFT_AND_MASK_BITS(v, 21, 17) +#define VE_VP8_QP_INDEX_DELTA_Y2DC(v) SHIFT_AND_MASK_BITS(v, 16, 12) +#define VE_VP8_QP_INDEX_DELTA_Y1DC(v) SHIFT_AND_MASK_BITS(v, 11, 7) +#define VE_VP8_QP_INDEX_DELTA_BASE_QINDEX(v) SHIFT_AND_MASK_BITS(v, 6, 0) + +#define VE_VP8_PART_SIZE_OFFSET 0x21c +#define VE_VP8_ENTROPY_PROBS_ADDR 0x250 +#define VE_VP8_FIRST_DATA_PART_LEN 0x254 + +#define VE_VP8_FSIZE 0x258 +#define VE_VP8_FSIZE_WIDTH(w) \ + SHIFT_AND_MASK_BITS(DIV_ROUND_UP(w, 16), 15, 8) +#define VE_VP8_FSIZE_HEIGHT(h) \ + SHIFT_AND_MASK_BITS(DIV_ROUND_UP(h, 16), 7, 0) + +#define VE_VP8_PICSIZE 0x25c +#define VE_VP8_PICSIZE_WIDTH(w) SHIFT_AND_MASK_BITS(w, 27, 16) +#define VE_VP8_PICSIZE_HEIGHT(h) SHIFT_AND_MASK_BITS(h, 11, 0) + +#define VE_VP8_REC_LUMA 0x2ac +#define VE_VP8_FWD_LUMA 0x2b0 +#define VE_VP8_BWD_LUMA 0x2b4 +#define VE_VP8_REC_CHROMA 0x2d0 +#define VE_VP8_FWD_CHROMA 0x2d4 +#define VE_VP8_BWD_CHROMA 0x2d8 +#define VE_VP8_ALT_LUMA 0x2e8 +#define VE_VP8_ALT_CHROMA 0x2ec + +#define VE_VP8_SEGMENT_FEAT_MB_LV0 0x2f0 +#define VE_VP8_SEGMENT_FEAT_MB_LV1 0x2f4 + +#define VE_VP8_SEGMENT3(v) SHIFT_AND_MASK_BITS(v, 31, 24) +#define VE_VP8_SEGMENT2(v) SHIFT_AND_MASK_BITS(v, 23, 16) +#define VE_VP8_SEGMENT1(v) SHIFT_AND_MASK_BITS(v, 15, 8) +#define VE_VP8_SEGMENT0(v) SHIFT_AND_MASK_BITS(v, 7, 0) + +#define VE_VP8_REF_LF_DELTA 0x2f8 +#define VE_VP8_MODE_LF_DELTA 0x2fc + +#define VE_VP8_LF_DELTA3(v) SHIFT_AND_MASK_BITS(v, 30, 24) +#define VE_VP8_LF_DELTA2(v) SHIFT_AND_MASK_BITS(v, 22, 16) +#define VE_VP8_LF_DELTA1(v) SHIFT_AND_MASK_BITS(v, 14, 8) +#define VE_VP8_LF_DELTA0(v) SHIFT_AND_MASK_BITS(v, 6, 0) + +#define VE_ISP_INPUT_SIZE 0xa00 +#define VE_ISP_INPUT_STRIDE 0xa04 +#define VE_ISP_CTRL 0xa08 +#define VE_ISP_INPUT_LUMA 0xa78 +#define VE_ISP_INPUT_CHROMA 0xa7c + +#define VE_AVC_PARAM 0xb04 +#define VE_AVC_QP 0xb08 +#define VE_AVC_MOTION_EST 0xb10 +#define VE_AVC_CTRL 0xb14 +#define VE_AVC_TRIGGER 0xb18 +#define VE_AVC_STATUS 0xb1c +#define VE_AVC_BASIC_BITS 0xb20 +#define VE_AVC_UNK_BUF 0xb60 +#define VE_AVC_VLE_ADDR 0xb80 +#define VE_AVC_VLE_END 0xb84 +#define VE_AVC_VLE_OFFSET 0xb88 +#define VE_AVC_VLE_MAX 0xb8c +#define VE_AVC_VLE_LENGTH 0xb90 +#define VE_AVC_REF_LUMA 0xba0 +#define VE_AVC_REF_CHROMA 0xba4 +#define VE_AVC_REC_LUMA 0xbb0 +#define VE_AVC_REC_CHROMA 0xbb4 +#define VE_AVC_REF_SLUMA 0xbb8 +#define VE_AVC_REC_SLUMA 0xbbc +#define VE_AVC_MB_INFO 0xbc0 + +#endif diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_video.c b/drivers/staging/media/sunxi/cedrus/cedrus_video.c new file mode 100644 index 000000000..66714609b --- /dev/null +++ b/drivers/staging/media/sunxi/cedrus/cedrus_video.c @@ -0,0 +1,596 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Cedrus VPU driver + * + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com> + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com> + * Copyright (C) 2018 Bootlin + * + * Based on the vim2m driver, that is: + * + * Copyright (c) 2009-2010 Samsung Electronics Co., Ltd. + * Pawel Osciak, <pawel@osciak.com> + * Marek Szyprowski, <m.szyprowski@samsung.com> + */ + +#include <linux/pm_runtime.h> + +#include <media/videobuf2-dma-contig.h> +#include <media/v4l2-device.h> +#include <media/v4l2-ioctl.h> +#include <media/v4l2-event.h> +#include <media/v4l2-mem2mem.h> + +#include "cedrus.h" +#include "cedrus_video.h" +#include "cedrus_dec.h" +#include "cedrus_hw.h" + +#define CEDRUS_DECODE_SRC BIT(0) +#define CEDRUS_DECODE_DST BIT(1) + +#define CEDRUS_MIN_WIDTH 16U +#define CEDRUS_MIN_HEIGHT 16U +#define CEDRUS_MAX_WIDTH 4096U +#define CEDRUS_MAX_HEIGHT 2304U + +static struct cedrus_format cedrus_formats[] = { + { + .pixelformat = V4L2_PIX_FMT_MPEG2_SLICE, + .directions = CEDRUS_DECODE_SRC, + .capabilities = CEDRUS_CAPABILITY_MPEG2_DEC, + }, + { + .pixelformat = V4L2_PIX_FMT_H264_SLICE, + .directions = CEDRUS_DECODE_SRC, + .capabilities = CEDRUS_CAPABILITY_H264_DEC, + }, + { + .pixelformat = V4L2_PIX_FMT_HEVC_SLICE, + .directions = CEDRUS_DECODE_SRC, + .capabilities = CEDRUS_CAPABILITY_H265_DEC, + }, + { + .pixelformat = V4L2_PIX_FMT_VP8_FRAME, + .directions = CEDRUS_DECODE_SRC, + .capabilities = CEDRUS_CAPABILITY_VP8_DEC, + }, + { + .pixelformat = V4L2_PIX_FMT_NV12_32L32, + .directions = CEDRUS_DECODE_DST, + }, + { + .pixelformat = V4L2_PIX_FMT_NV12, + .directions = CEDRUS_DECODE_DST, + .capabilities = CEDRUS_CAPABILITY_UNTILED, + }, +}; + +#define CEDRUS_FORMATS_COUNT ARRAY_SIZE(cedrus_formats) + +static inline struct cedrus_ctx *cedrus_file2ctx(struct file *file) +{ + return container_of(file->private_data, struct cedrus_ctx, fh); +} + +static struct cedrus_format *cedrus_find_format(u32 pixelformat, u32 directions, + unsigned int capabilities) +{ + struct cedrus_format *first_valid_fmt = NULL; + struct cedrus_format *fmt; + unsigned int i; + + for (i = 0; i < CEDRUS_FORMATS_COUNT; i++) { + fmt = &cedrus_formats[i]; + + if ((fmt->capabilities & capabilities) != fmt->capabilities || + !(fmt->directions & directions)) + continue; + + if (fmt->pixelformat == pixelformat) + break; + + if (!first_valid_fmt) + first_valid_fmt = fmt; + } + + if (i == CEDRUS_FORMATS_COUNT) + return first_valid_fmt; + + return &cedrus_formats[i]; +} + +void cedrus_prepare_format(struct v4l2_pix_format *pix_fmt) +{ + unsigned int width = pix_fmt->width; + unsigned int height = pix_fmt->height; + unsigned int sizeimage = pix_fmt->sizeimage; + unsigned int bytesperline = pix_fmt->bytesperline; + + pix_fmt->field = V4L2_FIELD_NONE; + + /* Limit to hardware min/max. */ + width = clamp(width, CEDRUS_MIN_WIDTH, CEDRUS_MAX_WIDTH); + height = clamp(height, CEDRUS_MIN_HEIGHT, CEDRUS_MAX_HEIGHT); + + switch (pix_fmt->pixelformat) { + case V4L2_PIX_FMT_MPEG2_SLICE: + case V4L2_PIX_FMT_H264_SLICE: + case V4L2_PIX_FMT_HEVC_SLICE: + case V4L2_PIX_FMT_VP8_FRAME: + /* Zero bytes per line for encoded source. */ + bytesperline = 0; + /* Choose some minimum size since this can't be 0 */ + sizeimage = max_t(u32, SZ_1K, sizeimage); + break; + + case V4L2_PIX_FMT_NV12_32L32: + /* 32-aligned stride. */ + bytesperline = ALIGN(width, 32); + + /* 32-aligned height. */ + height = ALIGN(height, 32); + + /* Luma plane size. */ + sizeimage = bytesperline * height; + + /* Chroma plane size. */ + sizeimage += bytesperline * ALIGN(height, 64) / 2; + + break; + + case V4L2_PIX_FMT_NV12: + /* 16-aligned stride. */ + bytesperline = ALIGN(width, 16); + + /* 16-aligned height. */ + height = ALIGN(height, 16); + + /* Luma plane size. */ + sizeimage = bytesperline * height; + + /* Chroma plane size. */ + sizeimage += bytesperline * height / 2; + + break; + } + + pix_fmt->width = width; + pix_fmt->height = height; + + pix_fmt->bytesperline = bytesperline; + pix_fmt->sizeimage = sizeimage; +} + +static int cedrus_querycap(struct file *file, void *priv, + struct v4l2_capability *cap) +{ + strscpy(cap->driver, CEDRUS_NAME, sizeof(cap->driver)); + strscpy(cap->card, CEDRUS_NAME, sizeof(cap->card)); + snprintf(cap->bus_info, sizeof(cap->bus_info), + "platform:%s", CEDRUS_NAME); + + return 0; +} + +static int cedrus_enum_fmt(struct file *file, struct v4l2_fmtdesc *f, + u32 direction) +{ + struct cedrus_ctx *ctx = cedrus_file2ctx(file); + struct cedrus_dev *dev = ctx->dev; + unsigned int capabilities = dev->capabilities; + struct cedrus_format *fmt; + unsigned int i, index; + + /* Index among formats that match the requested direction. */ + index = 0; + + for (i = 0; i < CEDRUS_FORMATS_COUNT; i++) { + fmt = &cedrus_formats[i]; + + if (fmt->capabilities && (fmt->capabilities & capabilities) != + fmt->capabilities) + continue; + + if (!(cedrus_formats[i].directions & direction)) + continue; + + if (index == f->index) + break; + + index++; + } + + /* Matched format. */ + if (i < CEDRUS_FORMATS_COUNT) { + f->pixelformat = cedrus_formats[i].pixelformat; + + return 0; + } + + return -EINVAL; +} + +static int cedrus_enum_fmt_vid_cap(struct file *file, void *priv, + struct v4l2_fmtdesc *f) +{ + return cedrus_enum_fmt(file, f, CEDRUS_DECODE_DST); +} + +static int cedrus_enum_fmt_vid_out(struct file *file, void *priv, + struct v4l2_fmtdesc *f) +{ + return cedrus_enum_fmt(file, f, CEDRUS_DECODE_SRC); +} + +static int cedrus_g_fmt_vid_cap(struct file *file, void *priv, + struct v4l2_format *f) +{ + struct cedrus_ctx *ctx = cedrus_file2ctx(file); + + f->fmt.pix = ctx->dst_fmt; + return 0; +} + +static int cedrus_g_fmt_vid_out(struct file *file, void *priv, + struct v4l2_format *f) +{ + struct cedrus_ctx *ctx = cedrus_file2ctx(file); + + f->fmt.pix = ctx->src_fmt; + return 0; +} + +static int cedrus_try_fmt_vid_cap(struct file *file, void *priv, + struct v4l2_format *f) +{ + struct cedrus_ctx *ctx = cedrus_file2ctx(file); + struct cedrus_dev *dev = ctx->dev; + struct v4l2_pix_format *pix_fmt = &f->fmt.pix; + struct cedrus_format *fmt = + cedrus_find_format(pix_fmt->pixelformat, CEDRUS_DECODE_DST, + dev->capabilities); + + if (!fmt) + return -EINVAL; + + pix_fmt->pixelformat = fmt->pixelformat; + pix_fmt->width = ctx->src_fmt.width; + pix_fmt->height = ctx->src_fmt.height; + cedrus_prepare_format(pix_fmt); + + return 0; +} + +static int cedrus_try_fmt_vid_out(struct file *file, void *priv, + struct v4l2_format *f) +{ + struct cedrus_ctx *ctx = cedrus_file2ctx(file); + struct cedrus_dev *dev = ctx->dev; + struct v4l2_pix_format *pix_fmt = &f->fmt.pix; + struct cedrus_format *fmt = + cedrus_find_format(pix_fmt->pixelformat, CEDRUS_DECODE_SRC, + dev->capabilities); + + if (!fmt) + return -EINVAL; + + pix_fmt->pixelformat = fmt->pixelformat; + cedrus_prepare_format(pix_fmt); + + return 0; +} + +static int cedrus_s_fmt_vid_cap(struct file *file, void *priv, + struct v4l2_format *f) +{ + struct cedrus_ctx *ctx = cedrus_file2ctx(file); + struct vb2_queue *vq; + int ret; + + vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type); + if (vb2_is_busy(vq)) + return -EBUSY; + + ret = cedrus_try_fmt_vid_cap(file, priv, f); + if (ret) + return ret; + + ctx->dst_fmt = f->fmt.pix; + + return 0; +} + +static int cedrus_s_fmt_vid_out(struct file *file, void *priv, + struct v4l2_format *f) +{ + struct cedrus_ctx *ctx = cedrus_file2ctx(file); + struct vb2_queue *vq; + struct vb2_queue *peer_vq; + int ret; + + ret = cedrus_try_fmt_vid_out(file, priv, f); + if (ret) + return ret; + + vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type); + /* + * In order to support dynamic resolution change, + * the decoder admits a resolution change, as long + * as the pixelformat remains. Can't be done if streaming. + */ + if (vb2_is_streaming(vq) || (vb2_is_busy(vq) && + f->fmt.pix.pixelformat != ctx->src_fmt.pixelformat)) + return -EBUSY; + /* + * Since format change on the OUTPUT queue will reset + * the CAPTURE queue, we can't allow doing so + * when the CAPTURE queue has buffers allocated. + */ + peer_vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, + V4L2_BUF_TYPE_VIDEO_CAPTURE); + if (vb2_is_busy(peer_vq)) + return -EBUSY; + + ret = cedrus_try_fmt_vid_out(file, priv, f); + if (ret) + return ret; + + ctx->src_fmt = f->fmt.pix; + + switch (ctx->src_fmt.pixelformat) { + case V4L2_PIX_FMT_H264_SLICE: + case V4L2_PIX_FMT_HEVC_SLICE: + vq->subsystem_flags |= + VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF; + break; + default: + vq->subsystem_flags &= + ~VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF; + break; + } + + /* Propagate format information to capture. */ + ctx->dst_fmt.colorspace = f->fmt.pix.colorspace; + ctx->dst_fmt.xfer_func = f->fmt.pix.xfer_func; + ctx->dst_fmt.ycbcr_enc = f->fmt.pix.ycbcr_enc; + ctx->dst_fmt.quantization = f->fmt.pix.quantization; + ctx->dst_fmt.width = ctx->src_fmt.width; + ctx->dst_fmt.height = ctx->src_fmt.height; + cedrus_prepare_format(&ctx->dst_fmt); + + return 0; +} + +const struct v4l2_ioctl_ops cedrus_ioctl_ops = { + .vidioc_querycap = cedrus_querycap, + + .vidioc_enum_fmt_vid_cap = cedrus_enum_fmt_vid_cap, + .vidioc_g_fmt_vid_cap = cedrus_g_fmt_vid_cap, + .vidioc_try_fmt_vid_cap = cedrus_try_fmt_vid_cap, + .vidioc_s_fmt_vid_cap = cedrus_s_fmt_vid_cap, + + .vidioc_enum_fmt_vid_out = cedrus_enum_fmt_vid_out, + .vidioc_g_fmt_vid_out = cedrus_g_fmt_vid_out, + .vidioc_try_fmt_vid_out = cedrus_try_fmt_vid_out, + .vidioc_s_fmt_vid_out = cedrus_s_fmt_vid_out, + + .vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs, + .vidioc_querybuf = v4l2_m2m_ioctl_querybuf, + .vidioc_qbuf = v4l2_m2m_ioctl_qbuf, + .vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf, + .vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf, + .vidioc_create_bufs = v4l2_m2m_ioctl_create_bufs, + .vidioc_expbuf = v4l2_m2m_ioctl_expbuf, + + .vidioc_streamon = v4l2_m2m_ioctl_streamon, + .vidioc_streamoff = v4l2_m2m_ioctl_streamoff, + + .vidioc_try_decoder_cmd = v4l2_m2m_ioctl_stateless_try_decoder_cmd, + .vidioc_decoder_cmd = v4l2_m2m_ioctl_stateless_decoder_cmd, + + .vidioc_subscribe_event = v4l2_ctrl_subscribe_event, + .vidioc_unsubscribe_event = v4l2_event_unsubscribe, +}; + +static int cedrus_queue_setup(struct vb2_queue *vq, unsigned int *nbufs, + unsigned int *nplanes, unsigned int sizes[], + struct device *alloc_devs[]) +{ + struct cedrus_ctx *ctx = vb2_get_drv_priv(vq); + struct v4l2_pix_format *pix_fmt; + + if (V4L2_TYPE_IS_OUTPUT(vq->type)) + pix_fmt = &ctx->src_fmt; + else + pix_fmt = &ctx->dst_fmt; + + if (*nplanes) { + if (sizes[0] < pix_fmt->sizeimage) + return -EINVAL; + } else { + sizes[0] = pix_fmt->sizeimage; + *nplanes = 1; + } + + return 0; +} + +static void cedrus_queue_cleanup(struct vb2_queue *vq, u32 state) +{ + struct cedrus_ctx *ctx = vb2_get_drv_priv(vq); + struct vb2_v4l2_buffer *vbuf; + + for (;;) { + if (V4L2_TYPE_IS_OUTPUT(vq->type)) + vbuf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); + else + vbuf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); + + if (!vbuf) + return; + + v4l2_ctrl_request_complete(vbuf->vb2_buf.req_obj.req, + &ctx->hdl); + v4l2_m2m_buf_done(vbuf, state); + } +} + +static int cedrus_buf_out_validate(struct vb2_buffer *vb) +{ + struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); + + vbuf->field = V4L2_FIELD_NONE; + return 0; +} + +static int cedrus_buf_prepare(struct vb2_buffer *vb) +{ + struct vb2_queue *vq = vb->vb2_queue; + struct cedrus_ctx *ctx = vb2_get_drv_priv(vq); + struct v4l2_pix_format *pix_fmt; + + if (V4L2_TYPE_IS_OUTPUT(vq->type)) + pix_fmt = &ctx->src_fmt; + else + pix_fmt = &ctx->dst_fmt; + + if (vb2_plane_size(vb, 0) < pix_fmt->sizeimage) + return -EINVAL; + + /* + * Buffer's bytesused must be written by driver for CAPTURE buffers. + * (for OUTPUT buffers, if userspace passes 0 bytesused, v4l2-core sets + * it to buffer length). + */ + if (V4L2_TYPE_IS_CAPTURE(vq->type)) + vb2_set_plane_payload(vb, 0, pix_fmt->sizeimage); + + return 0; +} + +static int cedrus_start_streaming(struct vb2_queue *vq, unsigned int count) +{ + struct cedrus_ctx *ctx = vb2_get_drv_priv(vq); + struct cedrus_dev *dev = ctx->dev; + int ret = 0; + + switch (ctx->src_fmt.pixelformat) { + case V4L2_PIX_FMT_MPEG2_SLICE: + ctx->current_codec = CEDRUS_CODEC_MPEG2; + break; + + case V4L2_PIX_FMT_H264_SLICE: + ctx->current_codec = CEDRUS_CODEC_H264; + break; + + case V4L2_PIX_FMT_HEVC_SLICE: + ctx->current_codec = CEDRUS_CODEC_H265; + break; + + case V4L2_PIX_FMT_VP8_FRAME: + ctx->current_codec = CEDRUS_CODEC_VP8; + break; + + default: + return -EINVAL; + } + + if (V4L2_TYPE_IS_OUTPUT(vq->type)) { + ret = pm_runtime_resume_and_get(dev->dev); + if (ret < 0) + goto err_cleanup; + + if (dev->dec_ops[ctx->current_codec]->start) { + ret = dev->dec_ops[ctx->current_codec]->start(ctx); + if (ret) + goto err_pm; + } + } + + return 0; + +err_pm: + pm_runtime_put(dev->dev); +err_cleanup: + cedrus_queue_cleanup(vq, VB2_BUF_STATE_QUEUED); + + return ret; +} + +static void cedrus_stop_streaming(struct vb2_queue *vq) +{ + struct cedrus_ctx *ctx = vb2_get_drv_priv(vq); + struct cedrus_dev *dev = ctx->dev; + + if (V4L2_TYPE_IS_OUTPUT(vq->type)) { + if (dev->dec_ops[ctx->current_codec]->stop) + dev->dec_ops[ctx->current_codec]->stop(ctx); + + pm_runtime_put(dev->dev); + } + + cedrus_queue_cleanup(vq, VB2_BUF_STATE_ERROR); +} + +static void cedrus_buf_queue(struct vb2_buffer *vb) +{ + struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); + struct cedrus_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); + + v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vbuf); +} + +static void cedrus_buf_request_complete(struct vb2_buffer *vb) +{ + struct cedrus_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); + + v4l2_ctrl_request_complete(vb->req_obj.req, &ctx->hdl); +} + +static struct vb2_ops cedrus_qops = { + .queue_setup = cedrus_queue_setup, + .buf_prepare = cedrus_buf_prepare, + .buf_queue = cedrus_buf_queue, + .buf_out_validate = cedrus_buf_out_validate, + .buf_request_complete = cedrus_buf_request_complete, + .start_streaming = cedrus_start_streaming, + .stop_streaming = cedrus_stop_streaming, + .wait_prepare = vb2_ops_wait_prepare, + .wait_finish = vb2_ops_wait_finish, +}; + +int cedrus_queue_init(void *priv, struct vb2_queue *src_vq, + struct vb2_queue *dst_vq) +{ + struct cedrus_ctx *ctx = priv; + int ret; + + src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT; + src_vq->io_modes = VB2_MMAP | VB2_DMABUF; + src_vq->drv_priv = ctx; + src_vq->buf_struct_size = sizeof(struct cedrus_buffer); + src_vq->ops = &cedrus_qops; + src_vq->mem_ops = &vb2_dma_contig_memops; + src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY; + src_vq->lock = &ctx->dev->dev_mutex; + src_vq->dev = ctx->dev->dev; + src_vq->supports_requests = true; + src_vq->requires_requests = true; + + ret = vb2_queue_init(src_vq); + if (ret) + return ret; + + dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + dst_vq->io_modes = VB2_MMAP | VB2_DMABUF; + dst_vq->drv_priv = ctx; + dst_vq->buf_struct_size = sizeof(struct cedrus_buffer); + dst_vq->ops = &cedrus_qops; + dst_vq->mem_ops = &vb2_dma_contig_memops; + dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY; + dst_vq->lock = &ctx->dev->dev_mutex; + dst_vq->dev = ctx->dev->dev; + + return vb2_queue_init(dst_vq); +} diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_video.h b/drivers/staging/media/sunxi/cedrus/cedrus_video.h new file mode 100644 index 000000000..05050c0a0 --- /dev/null +++ b/drivers/staging/media/sunxi/cedrus/cedrus_video.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Cedrus VPU driver + * + * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com> + * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com> + * Copyright (C) 2018 Bootlin + * + * Based on the vim2m driver, that is: + * + * Copyright (c) 2009-2010 Samsung Electronics Co., Ltd. + * Pawel Osciak, <pawel@osciak.com> + * Marek Szyprowski, <m.szyprowski@samsung.com> + */ + +#ifndef _CEDRUS_VIDEO_H_ +#define _CEDRUS_VIDEO_H_ + +struct cedrus_format { + u32 pixelformat; + u32 directions; + unsigned int capabilities; +}; + +extern const struct v4l2_ioctl_ops cedrus_ioctl_ops; + +int cedrus_queue_init(void *priv, struct vb2_queue *src_vq, + struct vb2_queue *dst_vq); +void cedrus_prepare_format(struct v4l2_pix_format *pix_fmt); + +#endif diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_vp8.c b/drivers/staging/media/sunxi/cedrus/cedrus_vp8.c new file mode 100644 index 000000000..f7714baae --- /dev/null +++ b/drivers/staging/media/sunxi/cedrus/cedrus_vp8.c @@ -0,0 +1,882 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Cedrus VPU driver + * + * Copyright (c) 2019 Jernej Skrabec <jernej.skrabec@siol.net> + */ + +/* + * VP8 in Cedrus shares same engine as H264. + * + * Note that it seems necessary to call bitstream parsing functions, + * to parse frame header, otherwise decoded image is garbage. This is + * contrary to what is driver supposed to do. However, values are not + * really used, so this might be acceptable. It's possible that bitstream + * parsing functions set some internal VPU state, which is later necessary + * for proper decoding. Biggest suspect is "VP8 probs update" trigger. + */ + +#include <linux/delay.h> +#include <linux/types.h> + +#include <media/videobuf2-dma-contig.h> + +#include "cedrus.h" +#include "cedrus_hw.h" +#include "cedrus_regs.h" + +#define CEDRUS_ENTROPY_PROBS_SIZE 0x2400 +#define VP8_PROB_HALF 128 +#define QUANT_DELTA_COUNT 5 + +/* + * This table comes from the concatenation of k_coeff_entropy_update_probs, + * kf_ymode_prob, default_mv_context, etc. It is provided in this form in + * order to avoid computing it every time the driver is initialised, and is + * suitable for direct consumption by the hardware. + */ +static const u8 prob_table_init[] = { + /* k_coeff_entropy_update_probs */ + /* block 0 */ + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xB0, 0xF6, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xDF, 0xF1, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xF9, 0xFD, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xF4, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xEA, 0xFE, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xF6, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xEF, 0xFD, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFE, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xF8, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFB, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xFD, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFB, 0xFE, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFE, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xFE, 0xFD, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFA, 0xFF, 0xFE, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + /* block 1 */ + 0xD9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xE1, 0xFC, 0xF1, 0xFD, 0xFF, 0xFF, 0xFE, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xEA, 0xFA, 0xF1, 0xFA, 0xFD, 0xFF, 0xFD, 0xFE, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xDF, 0xFE, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xEE, 0xFD, 0xFE, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xF8, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xF9, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xF7, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xFD, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xFE, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xFE, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFA, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + /* block 2 */ + 0xBA, 0xFB, 0xFA, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xEA, 0xFB, 0xF4, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFB, 0xFB, 0xF3, 0xFD, 0xFE, 0xFF, 0xFE, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xFD, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xEC, 0xFD, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFB, 0xFD, 0xFD, 0xFE, 0xFE, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xFE, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFE, 0xFE, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFE, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + /* block 3 */ + 0xF8, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFA, 0xFE, 0xFC, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xF8, 0xFE, 0xF9, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xFD, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xF6, 0xFD, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFC, 0xFE, 0xFB, 0xFE, 0xFE, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xFE, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xF8, 0xFE, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFD, 0xFF, 0xFE, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xFB, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xF5, 0xFB, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFD, 0xFD, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xFB, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFC, 0xFD, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xF9, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xFF, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFA, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + /* kf_y_mode_probs */ + 0x91, 0x9C, 0xA3, 0x80, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + /* split_mv_probs */ + 0x6E, 0x6F, 0x96, 0x00, 0x00, 0x00, 0x00, 0x00, + + /* bmode_prob */ + 0x78, 0x5A, 0x4F, 0x85, 0x57, 0x55, 0x50, 0x6F, + 0x97, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + /* sub_mv_ref_prob */ + 0x93, 0x88, 0x12, 0x00, + 0x6A, 0x91, 0x01, 0x00, + 0xB3, 0x79, 0x01, 0x00, + 0xDF, 0x01, 0x22, 0x00, + 0xD0, 0x01, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + /* mv_counts_to_probs */ + 0x07, 0x01, 0x01, 0x8F, + 0x0E, 0x12, 0x0E, 0x6B, + 0x87, 0x40, 0x39, 0x44, + 0x3C, 0x38, 0x80, 0x41, + 0x9F, 0x86, 0x80, 0x22, + 0xEA, 0xBC, 0x80, 0x1C, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + /* kf_y_mode_tree */ + 0x84, 0x02, 0x04, 0x06, 0x80, 0x81, 0x82, 0x83, + + /* y_mode_tree */ + 0x80, 0x02, 0x04, 0x06, 0x81, 0x82, 0x83, 0x84, + + /* uv_mode_tree */ + 0x80, 0x02, 0x81, 0x04, 0x82, 0x83, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, + + /* small_mv_tree */ + 0x02, 0x08, 0x04, 0x06, 0x80, 0x81, 0x82, 0x83, + 0x0A, 0x0C, 0x84, 0x85, 0x86, 0x87, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + /* small_mv_tree again */ + 0x02, 0x08, 0x04, 0x06, 0x80, 0x81, 0x82, 0x83, + 0x0A, 0x0C, 0x84, 0x85, 0x86, 0x87, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + /* split_mv_tree */ + 0x83, 0x02, 0x82, 0x04, 0x80, 0x81, 0x00, 0x00, + + /* b_mode_tree */ + 0x80, 0x02, 0x81, 0x04, 0x82, 0x06, 0x08, 0x0C, + 0x83, 0x0A, 0x85, 0x86, 0x84, 0x0E, 0x87, 0x10, + 0x88, 0x89, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + /* submv_ref_tree */ + 0x8A, 0x02, 0x8B, 0x04, 0x8C, 0x8D, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + /* mv_ref_tree */ + 0x87, 0x02, 0x85, 0x04, 0x86, 0x06, 0x88, 0x89, +}; + +/* + * This table is a copy of k_mv_entropy_update_probs from the VP8 + * specification. + * + * FIXME: If any other driver uses it, we can consider moving + * this table so it can be shared. + */ +static const u8 k_mv_entropy_update_probs[2][V4L2_VP8_MV_PROB_CNT] = { + { 237, 246, 253, 253, 254, 254, 254, 254, 254, + 254, 254, 254, 254, 254, 250, 250, 252, 254, 254 }, + { 231, 243, 245, 253, 254, 254, 254, 254, 254, + 254, 254, 254, 254, 254, 251, 251, 254, 254, 254 } +}; + +static uint8_t read_bits(struct cedrus_dev *dev, unsigned int bits_count, + unsigned int probability) +{ + cedrus_write(dev, VE_H264_TRIGGER_TYPE, + VE_H264_TRIGGER_TYPE_VP8_GET_BITS | + VE_H264_TRIGGER_TYPE_BIN_LENS(bits_count) | + VE_H264_TRIGGER_TYPE_PROBABILITY(probability)); + + cedrus_wait_for(dev, VE_H264_STATUS, VE_H264_STATUS_VLD_BUSY); + + return cedrus_read(dev, VE_H264_BASIC_BITS); +} + +static void get_delta_q(struct cedrus_dev *dev) +{ + if (read_bits(dev, 1, VP8_PROB_HALF)) { + read_bits(dev, 4, VP8_PROB_HALF); + read_bits(dev, 1, VP8_PROB_HALF); + } +} + +static void process_segmentation_info(struct cedrus_dev *dev) +{ + int update, i; + + update = read_bits(dev, 1, VP8_PROB_HALF); + + if (read_bits(dev, 1, VP8_PROB_HALF)) { + read_bits(dev, 1, VP8_PROB_HALF); + + for (i = 0; i < 4; i++) + if (read_bits(dev, 1, VP8_PROB_HALF)) { + read_bits(dev, 7, VP8_PROB_HALF); + read_bits(dev, 1, VP8_PROB_HALF); + } + + for (i = 0; i < 4; i++) + if (read_bits(dev, 1, VP8_PROB_HALF)) { + read_bits(dev, 6, VP8_PROB_HALF); + read_bits(dev, 1, VP8_PROB_HALF); + } + } + + if (update) + for (i = 0; i < 3; i++) + if (read_bits(dev, 1, VP8_PROB_HALF)) + read_bits(dev, 8, VP8_PROB_HALF); +} + +static void process_ref_lf_delta_info(struct cedrus_dev *dev) +{ + if (read_bits(dev, 1, VP8_PROB_HALF)) { + int i; + + for (i = 0; i < 4; i++) + if (read_bits(dev, 1, VP8_PROB_HALF)) { + read_bits(dev, 6, VP8_PROB_HALF); + read_bits(dev, 1, VP8_PROB_HALF); + } + + for (i = 0; i < 4; i++) + if (read_bits(dev, 1, VP8_PROB_HALF)) { + read_bits(dev, 6, VP8_PROB_HALF); + read_bits(dev, 1, VP8_PROB_HALF); + } + } +} + +static void process_ref_frame_info(struct cedrus_dev *dev) +{ + u8 refresh_golden_frame = read_bits(dev, 1, VP8_PROB_HALF); + u8 refresh_alt_ref_frame = read_bits(dev, 1, VP8_PROB_HALF); + + if (!refresh_golden_frame) + read_bits(dev, 2, VP8_PROB_HALF); + + if (!refresh_alt_ref_frame) + read_bits(dev, 2, VP8_PROB_HALF); + + read_bits(dev, 1, VP8_PROB_HALF); + read_bits(dev, 1, VP8_PROB_HALF); +} + +static void cedrus_irq_clear(struct cedrus_dev *dev) +{ + cedrus_write(dev, VE_H264_STATUS, + VE_H264_STATUS_INT_MASK); +} + +static void cedrus_read_header(struct cedrus_dev *dev, + const struct v4l2_ctrl_vp8_frame *slice) +{ + int i, j; + + if (V4L2_VP8_FRAME_IS_KEY_FRAME(slice)) { + read_bits(dev, 1, VP8_PROB_HALF); + read_bits(dev, 1, VP8_PROB_HALF); + } + + if (read_bits(dev, 1, VP8_PROB_HALF)) + process_segmentation_info(dev); + + read_bits(dev, 1, VP8_PROB_HALF); + read_bits(dev, 6, VP8_PROB_HALF); + read_bits(dev, 3, VP8_PROB_HALF); + + if (read_bits(dev, 1, VP8_PROB_HALF)) + process_ref_lf_delta_info(dev); + + read_bits(dev, 2, VP8_PROB_HALF); + + /* y_ac_qi */ + read_bits(dev, 7, VP8_PROB_HALF); + + /* Parses y_dc_delta, y2_dc_delta, etc. */ + for (i = 0; i < QUANT_DELTA_COUNT; i++) + get_delta_q(dev); + + if (!V4L2_VP8_FRAME_IS_KEY_FRAME(slice)) + process_ref_frame_info(dev); + + read_bits(dev, 1, VP8_PROB_HALF); + + if (!V4L2_VP8_FRAME_IS_KEY_FRAME(slice)) + read_bits(dev, 1, VP8_PROB_HALF); + + cedrus_write(dev, VE_H264_TRIGGER_TYPE, VE_H264_TRIGGER_TYPE_VP8_UPDATE_COEF); + cedrus_wait_for(dev, VE_H264_STATUS, VE_H264_STATUS_VP8_UPPROB_BUSY); + cedrus_irq_clear(dev); + + if (read_bits(dev, 1, VP8_PROB_HALF)) + read_bits(dev, 8, VP8_PROB_HALF); + + if (!V4L2_VP8_FRAME_IS_KEY_FRAME(slice)) { + read_bits(dev, 8, VP8_PROB_HALF); + read_bits(dev, 8, VP8_PROB_HALF); + read_bits(dev, 8, VP8_PROB_HALF); + + if (read_bits(dev, 1, VP8_PROB_HALF)) { + read_bits(dev, 8, VP8_PROB_HALF); + read_bits(dev, 8, VP8_PROB_HALF); + read_bits(dev, 8, VP8_PROB_HALF); + read_bits(dev, 8, VP8_PROB_HALF); + } + + if (read_bits(dev, 1, VP8_PROB_HALF)) { + read_bits(dev, 8, VP8_PROB_HALF); + read_bits(dev, 8, VP8_PROB_HALF); + read_bits(dev, 8, VP8_PROB_HALF); + } + + for (i = 0; i < 2; i++) + for (j = 0; j < V4L2_VP8_MV_PROB_CNT; j++) + if (read_bits(dev, 1, k_mv_entropy_update_probs[i][j])) + read_bits(dev, 7, VP8_PROB_HALF); + } +} + +static void cedrus_vp8_update_probs(const struct v4l2_ctrl_vp8_frame *slice, + u8 *prob_table) +{ + int i, j, k; + + memcpy(&prob_table[0x1008], slice->entropy.y_mode_probs, + sizeof(slice->entropy.y_mode_probs)); + memcpy(&prob_table[0x1010], slice->entropy.uv_mode_probs, + sizeof(slice->entropy.uv_mode_probs)); + + memcpy(&prob_table[0x1018], slice->segment.segment_probs, + sizeof(slice->segment.segment_probs)); + + prob_table[0x101c] = slice->prob_skip_false; + prob_table[0x101d] = slice->prob_intra; + prob_table[0x101e] = slice->prob_last; + prob_table[0x101f] = slice->prob_gf; + + memcpy(&prob_table[0x1020], slice->entropy.mv_probs[0], + V4L2_VP8_MV_PROB_CNT); + memcpy(&prob_table[0x1040], slice->entropy.mv_probs[1], + V4L2_VP8_MV_PROB_CNT); + + for (i = 0; i < 4; ++i) + for (j = 0; j < 8; ++j) + for (k = 0; k < 3; ++k) + memcpy(&prob_table[i * 512 + j * 64 + k * 16], + slice->entropy.coeff_probs[i][j][k], 11); +} + +static enum cedrus_irq_status +cedrus_vp8_irq_status(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + u32 reg = cedrus_read(dev, VE_H264_STATUS); + + if (reg & (VE_H264_STATUS_DECODE_ERR_INT | + VE_H264_STATUS_VLD_DATA_REQ_INT)) + return CEDRUS_IRQ_ERROR; + + if (reg & VE_H264_CTRL_SLICE_DECODE_INT) + return CEDRUS_IRQ_OK; + + return CEDRUS_IRQ_NONE; +} + +static void cedrus_vp8_irq_clear(struct cedrus_ctx *ctx) +{ + cedrus_irq_clear(ctx->dev); +} + +static void cedrus_vp8_irq_disable(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + u32 reg = cedrus_read(dev, VE_H264_CTRL); + + cedrus_write(dev, VE_H264_CTRL, + reg & ~VE_H264_CTRL_INT_MASK); +} + +static int cedrus_vp8_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) +{ + const struct v4l2_ctrl_vp8_frame *slice = run->vp8.frame_params; + struct vb2_queue *cap_q = &ctx->fh.m2m_ctx->cap_q_ctx.q; + struct vb2_buffer *src_buf = &run->src->vb2_buf; + struct cedrus_dev *dev = ctx->dev; + dma_addr_t luma_addr, chroma_addr; + dma_addr_t src_buf_addr; + int header_size; + u32 reg; + + cedrus_engine_enable(ctx, CEDRUS_CODEC_VP8); + + cedrus_write(dev, VE_H264_CTRL, VE_H264_CTRL_VP8); + + cedrus_vp8_update_probs(slice, ctx->codec.vp8.entropy_probs_buf); + + reg = slice->first_part_size * 8; + cedrus_write(dev, VE_VP8_FIRST_DATA_PART_LEN, reg); + + header_size = V4L2_VP8_FRAME_IS_KEY_FRAME(slice) ? 10 : 3; + + reg = slice->first_part_size + header_size; + cedrus_write(dev, VE_VP8_PART_SIZE_OFFSET, reg); + + reg = vb2_plane_size(src_buf, 0) * 8; + cedrus_write(dev, VE_H264_VLD_LEN, reg); + + /* + * FIXME: There is a problem if frame header is skipped (adding + * first_part_header_bits to offset). It seems that functions + * for parsing bitstreams change internal state of VPU in some + * way that can't be otherwise set. Maybe this can be bypassed + * by somehow fixing probability table buffer? + */ + reg = header_size * 8; + cedrus_write(dev, VE_H264_VLD_OFFSET, reg); + + src_buf_addr = vb2_dma_contig_plane_dma_addr(src_buf, 0); + cedrus_write(dev, VE_H264_VLD_END, + src_buf_addr + vb2_get_plane_payload(src_buf, 0)); + cedrus_write(dev, VE_H264_VLD_ADDR, + VE_H264_VLD_ADDR_VAL(src_buf_addr) | + VE_H264_VLD_ADDR_FIRST | VE_H264_VLD_ADDR_VALID | + VE_H264_VLD_ADDR_LAST); + + cedrus_write(dev, VE_H264_TRIGGER_TYPE, + VE_H264_TRIGGER_TYPE_INIT_SWDEC); + + cedrus_write(dev, VE_VP8_ENTROPY_PROBS_ADDR, + ctx->codec.vp8.entropy_probs_buf_dma); + + reg = 0; + switch (slice->version) { + case 1: + reg |= VE_VP8_PPS_FILTER_TYPE_SIMPLE; + reg |= VE_VP8_PPS_BILINEAR_MC_FILTER; + break; + case 2: + reg |= VE_VP8_PPS_LPF_DISABLE; + reg |= VE_VP8_PPS_BILINEAR_MC_FILTER; + break; + case 3: + reg |= VE_VP8_PPS_LPF_DISABLE; + reg |= VE_VP8_PPS_FULL_PIXEL; + break; + } + if (slice->segment.flags & V4L2_VP8_SEGMENT_FLAG_UPDATE_MAP) + reg |= VE_VP8_PPS_UPDATE_MB_SEGMENTATION_MAP; + if (!(slice->segment.flags & V4L2_VP8_SEGMENT_FLAG_DELTA_VALUE_MODE)) + reg |= VE_VP8_PPS_MB_SEGMENT_ABS_DELTA; + if (slice->segment.flags & V4L2_VP8_SEGMENT_FLAG_ENABLED) + reg |= VE_VP8_PPS_SEGMENTATION_ENABLE; + if (ctx->codec.vp8.last_filter_type) + reg |= VE_VP8_PPS_LAST_LOOP_FILTER_SIMPLE; + reg |= VE_VP8_PPS_SHARPNESS_LEVEL(slice->lf.sharpness_level); + if (slice->lf.flags & V4L2_VP8_LF_FILTER_TYPE_SIMPLE) + reg |= VE_VP8_PPS_LOOP_FILTER_SIMPLE; + reg |= VE_VP8_PPS_LOOP_FILTER_LEVEL(slice->lf.level); + if (slice->lf.flags & V4L2_VP8_LF_ADJ_ENABLE) + reg |= VE_VP8_PPS_MODE_REF_LF_DELTA_ENABLE; + if (slice->lf.flags & V4L2_VP8_LF_DELTA_UPDATE) + reg |= VE_VP8_PPS_MODE_REF_LF_DELTA_UPDATE; + reg |= VE_VP8_PPS_TOKEN_PARTITION(ilog2(slice->num_dct_parts)); + if (slice->flags & V4L2_VP8_FRAME_FLAG_MB_NO_SKIP_COEFF) + reg |= VE_VP8_PPS_MB_NO_COEFF_SKIP; + reg |= VE_VP8_PPS_RELOAD_ENTROPY_PROBS; + if (slice->flags & V4L2_VP8_FRAME_FLAG_SIGN_BIAS_GOLDEN) + reg |= VE_VP8_PPS_GOLDEN_SIGN_BIAS; + if (slice->flags & V4L2_VP8_FRAME_FLAG_SIGN_BIAS_ALT) + reg |= VE_VP8_PPS_ALTREF_SIGN_BIAS; + if (ctx->codec.vp8.last_frame_p_type) + reg |= VE_VP8_PPS_LAST_PIC_TYPE_P_FRAME; + reg |= VE_VP8_PPS_LAST_SHARPNESS_LEVEL(ctx->codec.vp8.last_sharpness_level); + if (!(slice->flags & V4L2_VP8_FRAME_FLAG_KEY_FRAME)) + reg |= VE_VP8_PPS_PIC_TYPE_P_FRAME; + cedrus_write(dev, VE_VP8_PPS, reg); + + cedrus_read_header(dev, slice); + + /* reset registers changed by HW */ + cedrus_write(dev, VE_H264_CUR_MB_NUM, 0); + cedrus_write(dev, VE_H264_MB_ADDR, 0); + cedrus_write(dev, VE_H264_ERROR_CASE, 0); + + reg = 0; + reg |= VE_VP8_QP_INDEX_DELTA_UVAC(slice->quant.uv_ac_delta); + reg |= VE_VP8_QP_INDEX_DELTA_UVDC(slice->quant.uv_dc_delta); + reg |= VE_VP8_QP_INDEX_DELTA_Y2AC(slice->quant.y2_ac_delta); + reg |= VE_VP8_QP_INDEX_DELTA_Y2DC(slice->quant.y2_dc_delta); + reg |= VE_VP8_QP_INDEX_DELTA_Y1DC(slice->quant.y_dc_delta); + reg |= VE_VP8_QP_INDEX_DELTA_BASE_QINDEX(slice->quant.y_ac_qi); + cedrus_write(dev, VE_VP8_QP_INDEX_DELTA, reg); + + reg = 0; + reg |= VE_VP8_FSIZE_WIDTH(slice->width); + reg |= VE_VP8_FSIZE_HEIGHT(slice->height); + cedrus_write(dev, VE_VP8_FSIZE, reg); + + reg = 0; + reg |= VE_VP8_PICSIZE_WIDTH(slice->width); + reg |= VE_VP8_PICSIZE_HEIGHT(slice->height); + cedrus_write(dev, VE_VP8_PICSIZE, reg); + + reg = 0; + reg |= VE_VP8_SEGMENT3(slice->segment.quant_update[3]); + reg |= VE_VP8_SEGMENT2(slice->segment.quant_update[2]); + reg |= VE_VP8_SEGMENT1(slice->segment.quant_update[1]); + reg |= VE_VP8_SEGMENT0(slice->segment.quant_update[0]); + cedrus_write(dev, VE_VP8_SEGMENT_FEAT_MB_LV0, reg); + + reg = 0; + reg |= VE_VP8_SEGMENT3(slice->segment.lf_update[3]); + reg |= VE_VP8_SEGMENT2(slice->segment.lf_update[2]); + reg |= VE_VP8_SEGMENT1(slice->segment.lf_update[1]); + reg |= VE_VP8_SEGMENT0(slice->segment.lf_update[0]); + cedrus_write(dev, VE_VP8_SEGMENT_FEAT_MB_LV1, reg); + + reg = 0; + reg |= VE_VP8_LF_DELTA3(slice->lf.ref_frm_delta[3]); + reg |= VE_VP8_LF_DELTA2(slice->lf.ref_frm_delta[2]); + reg |= VE_VP8_LF_DELTA1(slice->lf.ref_frm_delta[1]); + reg |= VE_VP8_LF_DELTA0(slice->lf.ref_frm_delta[0]); + cedrus_write(dev, VE_VP8_REF_LF_DELTA, reg); + + reg = 0; + reg |= VE_VP8_LF_DELTA3(slice->lf.mb_mode_delta[3]); + reg |= VE_VP8_LF_DELTA2(slice->lf.mb_mode_delta[2]); + reg |= VE_VP8_LF_DELTA1(slice->lf.mb_mode_delta[1]); + reg |= VE_VP8_LF_DELTA0(slice->lf.mb_mode_delta[0]); + cedrus_write(dev, VE_VP8_MODE_LF_DELTA, reg); + + luma_addr = cedrus_dst_buf_addr(ctx, &run->dst->vb2_buf, 0); + chroma_addr = cedrus_dst_buf_addr(ctx, &run->dst->vb2_buf, 1); + cedrus_write(dev, VE_VP8_REC_LUMA, luma_addr); + cedrus_write(dev, VE_VP8_REC_CHROMA, chroma_addr); + + cedrus_write_ref_buf_addr(ctx, cap_q, slice->last_frame_ts, + VE_VP8_FWD_LUMA, VE_VP8_FWD_CHROMA); + cedrus_write_ref_buf_addr(ctx, cap_q, slice->golden_frame_ts, + VE_VP8_BWD_LUMA, VE_VP8_BWD_CHROMA); + cedrus_write_ref_buf_addr(ctx, cap_q, slice->alt_frame_ts, + VE_VP8_ALT_LUMA, VE_VP8_ALT_CHROMA); + + cedrus_write(dev, VE_H264_CTRL, VE_H264_CTRL_VP8 | + VE_H264_CTRL_DECODE_ERR_INT | + VE_H264_CTRL_SLICE_DECODE_INT); + + if (slice->lf.level) { + ctx->codec.vp8.last_filter_type = + !!(slice->lf.flags & V4L2_VP8_LF_FILTER_TYPE_SIMPLE); + ctx->codec.vp8.last_frame_p_type = + !V4L2_VP8_FRAME_IS_KEY_FRAME(slice); + ctx->codec.vp8.last_sharpness_level = + slice->lf.sharpness_level; + } + + return 0; +} + +static int cedrus_vp8_start(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + + ctx->codec.vp8.entropy_probs_buf = + dma_alloc_coherent(dev->dev, CEDRUS_ENTROPY_PROBS_SIZE, + &ctx->codec.vp8.entropy_probs_buf_dma, + GFP_KERNEL); + if (!ctx->codec.vp8.entropy_probs_buf) + return -ENOMEM; + + /* + * This offset has been discovered by reverse engineering, we don’t know + * what it actually means. + */ + memcpy(&ctx->codec.vp8.entropy_probs_buf[2048], + prob_table_init, sizeof(prob_table_init)); + + return 0; +} + +static void cedrus_vp8_stop(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + + cedrus_engine_disable(dev); + + dma_free_coherent(dev->dev, CEDRUS_ENTROPY_PROBS_SIZE, + ctx->codec.vp8.entropy_probs_buf, + ctx->codec.vp8.entropy_probs_buf_dma); +} + +static void cedrus_vp8_trigger(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + + cedrus_write(dev, VE_H264_TRIGGER_TYPE, + VE_H264_TRIGGER_TYPE_VP8_SLICE_DECODE); +} + +struct cedrus_dec_ops cedrus_dec_ops_vp8 = { + .irq_clear = cedrus_vp8_irq_clear, + .irq_disable = cedrus_vp8_irq_disable, + .irq_status = cedrus_vp8_irq_status, + .setup = cedrus_vp8_setup, + .start = cedrus_vp8_start, + .stop = cedrus_vp8_stop, + .trigger = cedrus_vp8_trigger, +}; |