diff options
Diffstat (limited to 'drivers/media/platform/verisilicon')
36 files changed, 12485 insertions, 0 deletions
diff --git a/drivers/media/platform/verisilicon/Kconfig b/drivers/media/platform/verisilicon/Kconfig new file mode 100644 index 000000000..e65b836b9 --- /dev/null +++ b/drivers/media/platform/verisilicon/Kconfig @@ -0,0 +1,54 @@ +# SPDX-License-Identifier: GPL-2.0-only + +comment "Verisilicon media platform drivers" + +config VIDEO_HANTRO + tristate "Hantro VPU driver" + depends on ARCH_MXC || ARCH_ROCKCHIP || ARCH_AT91 || ARCH_SUNXI || COMPILE_TEST + depends on V4L_MEM2MEM_DRIVERS + depends on VIDEO_DEV + select MEDIA_CONTROLLER + select MEDIA_CONTROLLER_REQUEST_API + select VIDEOBUF2_DMA_CONTIG + select VIDEOBUF2_VMALLOC + select V4L2_MEM2MEM_DEV + select V4L2_H264 + select V4L2_VP9 + help + Support for the Hantro IP based Video Processing Units present on + Rockchip and NXP i.MX8M SoCs, which accelerate video and image + encoding and decoding. + To compile this driver as a module, choose M here: the module + will be called hantro-vpu. + +config VIDEO_HANTRO_IMX8M + bool "Hantro VPU i.MX8M support" + depends on VIDEO_HANTRO + depends on ARCH_MXC || COMPILE_TEST + default y + help + Enable support for i.MX8M SoCs. + +config VIDEO_HANTRO_SAMA5D4 + bool "Hantro VDEC SAMA5D4 support" + depends on VIDEO_HANTRO + depends on ARCH_AT91 || COMPILE_TEST + default y + help + Enable support for Microchip SAMA5D4 SoCs. + +config VIDEO_HANTRO_ROCKCHIP + bool "Hantro VPU Rockchip support" + depends on VIDEO_HANTRO + depends on ARCH_ROCKCHIP || COMPILE_TEST + default y + help + Enable support for RK3288, RK3328, and RK3399 SoCs. + +config VIDEO_HANTRO_SUNXI + bool "Hantro VPU Allwinner support" + depends on VIDEO_HANTRO + depends on ARCH_SUNXI || COMPILE_TEST + default y + help + Enable support for H6 SoC. diff --git a/drivers/media/platform/verisilicon/Makefile b/drivers/media/platform/verisilicon/Makefile new file mode 100644 index 000000000..ebd5ede7b --- /dev/null +++ b/drivers/media/platform/verisilicon/Makefile @@ -0,0 +1,38 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_VIDEO_HANTRO) += hantro-vpu.o + +hantro-vpu-y += \ + hantro_drv.o \ + hantro_v4l2.o \ + hantro_postproc.o \ + hantro_h1_jpeg_enc.o \ + hantro_g1.o \ + hantro_g1_h264_dec.o \ + hantro_g1_mpeg2_dec.o \ + hantro_g1_vp8_dec.o \ + hantro_g2.o \ + hantro_g2_hevc_dec.o \ + hantro_g2_vp9_dec.o \ + rockchip_vpu2_hw_jpeg_enc.o \ + rockchip_vpu2_hw_h264_dec.o \ + rockchip_vpu2_hw_mpeg2_dec.o \ + rockchip_vpu2_hw_vp8_dec.o \ + hantro_jpeg.o \ + hantro_h264.o \ + hantro_hevc.o \ + hantro_mpeg2.o \ + hantro_vp8.o \ + hantro_vp9.o + +hantro-vpu-$(CONFIG_VIDEO_HANTRO_IMX8M) += \ + imx8m_vpu_hw.o + +hantro-vpu-$(CONFIG_VIDEO_HANTRO_SAMA5D4) += \ + sama5d4_vdec_hw.o + +hantro-vpu-$(CONFIG_VIDEO_HANTRO_ROCKCHIP) += \ + rockchip_vpu_hw.o + +hantro-vpu-$(CONFIG_VIDEO_HANTRO_SUNXI) += \ + sunxi_vpu_hw.o diff --git a/drivers/media/platform/verisilicon/hantro.h b/drivers/media/platform/verisilicon/hantro.h new file mode 100644 index 000000000..2989ebc63 --- /dev/null +++ b/drivers/media/platform/verisilicon/hantro.h @@ -0,0 +1,485 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Hantro VPU codec driver + * + * Copyright 2018 Google LLC. + * Tomasz Figa <tfiga@chromium.org> + * + * Based on s5p-mfc driver by Samsung Electronics Co., Ltd. + * Copyright (C) 2011 Samsung Electronics Co., Ltd. + */ + +#ifndef HANTRO_H_ +#define HANTRO_H_ + +#include <linux/platform_device.h> +#include <linux/videodev2.h> +#include <linux/wait.h> +#include <linux/clk.h> +#include <linux/reset.h> + +#include <media/v4l2-ctrls.h> +#include <media/v4l2-device.h> +#include <media/v4l2-ioctl.h> +#include <media/v4l2-mem2mem.h> +#include <media/videobuf2-core.h> +#include <media/videobuf2-dma-contig.h> + +#include "hantro_hw.h" + +struct hantro_ctx; +struct hantro_codec_ops; +struct hantro_postproc_ops; + +#define HANTRO_JPEG_ENCODER BIT(0) +#define HANTRO_ENCODERS 0x0000ffff +#define HANTRO_MPEG2_DECODER BIT(16) +#define HANTRO_VP8_DECODER BIT(17) +#define HANTRO_H264_DECODER BIT(18) +#define HANTRO_HEVC_DECODER BIT(19) +#define HANTRO_VP9_DECODER BIT(20) +#define HANTRO_DECODERS 0xffff0000 + +/** + * struct hantro_irq - irq handler and name + * + * @name: irq name for device tree lookup + * @handler: interrupt handler + */ +struct hantro_irq { + const char *name; + irqreturn_t (*handler)(int irq, void *priv); +}; + +/** + * struct hantro_variant - information about VPU hardware variant + * + * @enc_offset: Offset from VPU base to encoder registers. + * @dec_offset: Offset from VPU base to decoder registers. + * @enc_fmts: Encoder formats. + * @num_enc_fmts: Number of encoder formats. + * @dec_fmts: Decoder formats. + * @num_dec_fmts: Number of decoder formats. + * @postproc_fmts: Post-processor formats. + * @num_postproc_fmts: Number of post-processor formats. + * @postproc_ops: Post-processor ops. + * @codec: Supported codecs + * @codec_ops: Codec ops. + * @init: Initialize hardware, optional. + * @runtime_resume: reenable hardware after power gating, optional. + * @irqs: array of irq names and interrupt handlers + * @num_irqs: number of irqs in the array + * @clk_names: array of clock names + * @num_clocks: number of clocks in the array + * @reg_names: array of register range names + * @num_regs: number of register range names in the array + * @double_buffer: core needs double buffering + * @legacy_regs: core uses legacy register set + * @late_postproc: postproc must be set up at the end of the job + */ +struct hantro_variant { + unsigned int enc_offset; + unsigned int dec_offset; + const struct hantro_fmt *enc_fmts; + unsigned int num_enc_fmts; + const struct hantro_fmt *dec_fmts; + unsigned int num_dec_fmts; + const struct hantro_fmt *postproc_fmts; + unsigned int num_postproc_fmts; + const struct hantro_postproc_ops *postproc_ops; + unsigned int codec; + const struct hantro_codec_ops *codec_ops; + int (*init)(struct hantro_dev *vpu); + int (*runtime_resume)(struct hantro_dev *vpu); + const struct hantro_irq *irqs; + int num_irqs; + const char * const *clk_names; + int num_clocks; + const char * const *reg_names; + int num_regs; + unsigned int double_buffer : 1; + unsigned int legacy_regs : 1; + unsigned int late_postproc : 1; +}; + +/** + * enum hantro_codec_mode - codec operating mode. + * @HANTRO_MODE_NONE: No operating mode. Used for RAW video formats. + * @HANTRO_MODE_JPEG_ENC: JPEG encoder. + * @HANTRO_MODE_H264_DEC: H264 decoder. + * @HANTRO_MODE_MPEG2_DEC: MPEG-2 decoder. + * @HANTRO_MODE_VP8_DEC: VP8 decoder. + * @HANTRO_MODE_HEVC_DEC: HEVC decoder. + * @HANTRO_MODE_VP9_DEC: VP9 decoder. + */ +enum hantro_codec_mode { + HANTRO_MODE_NONE = -1, + HANTRO_MODE_JPEG_ENC, + HANTRO_MODE_H264_DEC, + HANTRO_MODE_MPEG2_DEC, + HANTRO_MODE_VP8_DEC, + HANTRO_MODE_HEVC_DEC, + HANTRO_MODE_VP9_DEC, +}; + +/* + * struct hantro_ctrl - helper type to declare supported controls + * @codec: codec id this control belong to (HANTRO_JPEG_ENCODER, etc.) + * @cfg: control configuration + */ +struct hantro_ctrl { + unsigned int codec; + struct v4l2_ctrl_config cfg; +}; + +/* + * struct hantro_func - Hantro VPU functionality + * + * @id: processing functionality ID (can be + * %MEDIA_ENT_F_PROC_VIDEO_ENCODER or + * %MEDIA_ENT_F_PROC_VIDEO_DECODER) + * @vdev: &struct video_device that exposes the encoder or + * decoder functionality + * @source_pad: &struct media_pad with the source pad. + * @sink: &struct media_entity pointer with the sink entity + * @sink_pad: &struct media_pad with the sink pad. + * @proc: &struct media_entity pointer with the M2M device itself. + * @proc_pads: &struct media_pad with the @proc pads. + * @intf_devnode: &struct media_intf devnode pointer with the interface + * with controls the M2M device. + * + * Contains everything needed to attach the video device to the media device. + */ +struct hantro_func { + unsigned int id; + struct video_device vdev; + struct media_pad source_pad; + struct media_entity sink; + struct media_pad sink_pad; + struct media_entity proc; + struct media_pad proc_pads[2]; + struct media_intf_devnode *intf_devnode; +}; + +static inline struct hantro_func * +hantro_vdev_to_func(struct video_device *vdev) +{ + return container_of(vdev, struct hantro_func, vdev); +} + +/** + * struct hantro_dev - driver data + * @v4l2_dev: V4L2 device to register video devices for. + * @m2m_dev: mem2mem device associated to this device. + * @mdev: media device associated to this device. + * @encoder: encoder functionality. + * @decoder: decoder functionality. + * @pdev: Pointer to VPU platform device. + * @dev: Pointer to device for convenient logging using + * dev_ macros. + * @clocks: Array of clock handles. + * @resets: Array of reset handles. + * @reg_bases: Mapped addresses of VPU registers. + * @enc_base: Mapped address of VPU encoder register for convenience. + * @dec_base: Mapped address of VPU decoder register for convenience. + * @ctrl_base: Mapped address of VPU control block. + * @vpu_mutex: Mutex to synchronize V4L2 calls. + * @irqlock: Spinlock to synchronize access to data structures + * shared with interrupt handlers. + * @variant: Hardware variant-specific parameters. + * @watchdog_work: Delayed work for hardware timeout handling. + */ +struct hantro_dev { + struct v4l2_device v4l2_dev; + struct v4l2_m2m_dev *m2m_dev; + struct media_device mdev; + struct hantro_func *encoder; + struct hantro_func *decoder; + struct platform_device *pdev; + struct device *dev; + struct clk_bulk_data *clocks; + struct reset_control *resets; + void __iomem **reg_bases; + void __iomem *enc_base; + void __iomem *dec_base; + void __iomem *ctrl_base; + + struct mutex vpu_mutex; /* video_device lock */ + spinlock_t irqlock; + const struct hantro_variant *variant; + struct delayed_work watchdog_work; +}; + +/** + * struct hantro_ctx - Context (instance) private data. + * + * @dev: VPU driver data to which the context belongs. + * @fh: V4L2 file handler. + * @is_encoder: Decoder or encoder context? + * + * @sequence_cap: Sequence counter for capture queue + * @sequence_out: Sequence counter for output queue + * + * @vpu_src_fmt: Descriptor of active source format. + * @src_fmt: V4L2 pixel format of active source format. + * @vpu_dst_fmt: Descriptor of active destination format. + * @dst_fmt: V4L2 pixel format of active destination format. + * + * @ctrl_handler: Control handler used to register controls. + * @jpeg_quality: User-specified JPEG compression quality. + * @bit_depth: Bit depth of current frame + * + * @codec_ops: Set of operations related to codec mode. + * @postproc: Post-processing context. + * @h264_dec: H.264-decoding context. + * @jpeg_enc: JPEG-encoding context. + * @mpeg2_dec: MPEG-2-decoding context. + * @vp8_dec: VP8-decoding context. + * @hevc_dec: HEVC-decoding context. + * @vp9_dec: VP9-decoding context. + */ +struct hantro_ctx { + struct hantro_dev *dev; + struct v4l2_fh fh; + bool is_encoder; + + u32 sequence_cap; + u32 sequence_out; + + const struct hantro_fmt *vpu_src_fmt; + struct v4l2_pix_format_mplane src_fmt; + const struct hantro_fmt *vpu_dst_fmt; + struct v4l2_pix_format_mplane dst_fmt; + + struct v4l2_ctrl_handler ctrl_handler; + int jpeg_quality; + int bit_depth; + + const struct hantro_codec_ops *codec_ops; + struct hantro_postproc_ctx postproc; + + /* Specific for particular codec modes. */ + union { + struct hantro_h264_dec_hw_ctx h264_dec; + struct hantro_mpeg2_dec_hw_ctx mpeg2_dec; + struct hantro_vp8_dec_hw_ctx vp8_dec; + struct hantro_hevc_dec_hw_ctx hevc_dec; + struct hantro_vp9_dec_hw_ctx vp9_dec; + }; +}; + +/** + * struct hantro_fmt - information about supported video formats. + * @name: Human readable name of the format. + * @fourcc: FourCC code of the format. See V4L2_PIX_FMT_*. + * @codec_mode: Codec mode related to this format. See + * enum hantro_codec_mode. + * @header_size: Optional header size. Currently used by JPEG encoder. + * @max_depth: Maximum depth, for bitstream formats + * @enc_fmt: Format identifier for encoder registers. + * @frmsize: Supported range of frame sizes (only for bitstream formats). + * @postprocessed: Indicates if this format needs the post-processor. + * @match_depth: Indicates if format bit depth must match video bit depth + */ +struct hantro_fmt { + char *name; + u32 fourcc; + enum hantro_codec_mode codec_mode; + int header_size; + int max_depth; + enum hantro_enc_fmt enc_fmt; + struct v4l2_frmsize_stepwise frmsize; + bool postprocessed; + bool match_depth; +}; + +struct hantro_reg { + u32 base; + u32 shift; + u32 mask; +}; + +struct hantro_postproc_regs { + struct hantro_reg pipeline_en; + struct hantro_reg max_burst; + struct hantro_reg clk_gate; + struct hantro_reg out_swap32; + struct hantro_reg out_endian; + struct hantro_reg out_luma_base; + struct hantro_reg input_width; + struct hantro_reg input_height; + struct hantro_reg output_width; + struct hantro_reg output_height; + struct hantro_reg input_fmt; + struct hantro_reg output_fmt; + struct hantro_reg orig_width; + struct hantro_reg display_width; +}; + +struct hantro_vp9_decoded_buffer_info { + /* Info needed when the decoded frame serves as a reference frame. */ + unsigned short width; + unsigned short height; + u32 bit_depth : 4; +}; + +struct hantro_decoded_buffer { + /* Must be the first field in this struct. */ + struct v4l2_m2m_buffer base; + + union { + struct hantro_vp9_decoded_buffer_info vp9; + }; +}; + +/* Logging helpers */ + +/** + * DOC: hantro_debug: Module parameter to control level of debugging messages. + * + * Level of debugging messages can be controlled by bits of + * module parameter called "debug". Meaning of particular + * bits is as follows: + * + * bit 0 - global information: mode, size, init, release + * bit 1 - each run start/result information + * bit 2 - contents of small controls from userspace + * bit 3 - contents of big controls from userspace + * bit 4 - detail fmt, ctrl, buffer q/dq information + * bit 5 - detail function enter/leave trace information + * bit 6 - register write/read information + */ +extern int hantro_debug; + +#define vpu_debug(level, fmt, args...) \ + do { \ + if (hantro_debug & BIT(level)) \ + pr_info("%s:%d: " fmt, \ + __func__, __LINE__, ##args); \ + } while (0) + +#define vpu_err(fmt, args...) \ + pr_err("%s:%d: " fmt, __func__, __LINE__, ##args) + +/* Structure access helpers. */ +static inline struct hantro_ctx *fh_to_ctx(struct v4l2_fh *fh) +{ + return container_of(fh, struct hantro_ctx, fh); +} + +/* Register accessors. */ +static inline void vepu_write_relaxed(struct hantro_dev *vpu, + u32 val, u32 reg) +{ + vpu_debug(6, "0x%04x = 0x%08x\n", reg / 4, val); + writel_relaxed(val, vpu->enc_base + reg); +} + +static inline void vepu_write(struct hantro_dev *vpu, u32 val, u32 reg) +{ + vpu_debug(6, "0x%04x = 0x%08x\n", reg / 4, val); + writel(val, vpu->enc_base + reg); +} + +static inline u32 vepu_read(struct hantro_dev *vpu, u32 reg) +{ + u32 val = readl(vpu->enc_base + reg); + + vpu_debug(6, "0x%04x = 0x%08x\n", reg / 4, val); + return val; +} + +static inline void vdpu_write_relaxed(struct hantro_dev *vpu, + u32 val, u32 reg) +{ + vpu_debug(6, "0x%04x = 0x%08x\n", reg / 4, val); + writel_relaxed(val, vpu->dec_base + reg); +} + +static inline void vdpu_write(struct hantro_dev *vpu, u32 val, u32 reg) +{ + vpu_debug(6, "0x%04x = 0x%08x\n", reg / 4, val); + writel(val, vpu->dec_base + reg); +} + +static inline void hantro_write_addr(struct hantro_dev *vpu, + unsigned long offset, + dma_addr_t addr) +{ + vdpu_write(vpu, addr & 0xffffffff, offset); +} + +static inline u32 vdpu_read(struct hantro_dev *vpu, u32 reg) +{ + u32 val = readl(vpu->dec_base + reg); + + vpu_debug(6, "0x%04x = 0x%08x\n", reg / 4, val); + return val; +} + +static inline u32 vdpu_read_mask(struct hantro_dev *vpu, + const struct hantro_reg *reg, + u32 val) +{ + u32 v; + + v = vdpu_read(vpu, reg->base); + v &= ~(reg->mask << reg->shift); + v |= ((val & reg->mask) << reg->shift); + return v; +} + +static inline void hantro_reg_write(struct hantro_dev *vpu, + const struct hantro_reg *reg, + u32 val) +{ + vdpu_write_relaxed(vpu, vdpu_read_mask(vpu, reg, val), reg->base); +} + +static inline void hantro_reg_write_s(struct hantro_dev *vpu, + const struct hantro_reg *reg, + u32 val) +{ + vdpu_write(vpu, vdpu_read_mask(vpu, reg, val), reg->base); +} + +void *hantro_get_ctrl(struct hantro_ctx *ctx, u32 id); +dma_addr_t hantro_get_ref(struct hantro_ctx *ctx, u64 ts); + +static inline struct vb2_v4l2_buffer * +hantro_get_src_buf(struct hantro_ctx *ctx) +{ + return v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); +} + +static inline struct vb2_v4l2_buffer * +hantro_get_dst_buf(struct hantro_ctx *ctx) +{ + return v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); +} + +bool hantro_needs_postproc(const struct hantro_ctx *ctx, + const struct hantro_fmt *fmt); + +static inline dma_addr_t +hantro_get_dec_buf_addr(struct hantro_ctx *ctx, struct vb2_buffer *vb) +{ + if (hantro_needs_postproc(ctx, ctx->vpu_dst_fmt)) + return ctx->postproc.dec_q[vb->index].dma; + return vb2_dma_contig_plane_dma_addr(vb, 0); +} + +static inline struct hantro_decoded_buffer * +vb2_to_hantro_decoded_buf(struct vb2_buffer *buf) +{ + return container_of(buf, struct hantro_decoded_buffer, base.vb.vb2_buf); +} + +void hantro_postproc_disable(struct hantro_ctx *ctx); +void hantro_postproc_enable(struct hantro_ctx *ctx); +void hantro_postproc_free(struct hantro_ctx *ctx); +int hantro_postproc_alloc(struct hantro_ctx *ctx); +int hanto_postproc_enum_framesizes(struct hantro_ctx *ctx, + struct v4l2_frmsizeenum *fsize); + +#endif /* HANTRO_H_ */ diff --git a/drivers/media/platform/verisilicon/hantro_drv.c b/drivers/media/platform/verisilicon/hantro_drv.c new file mode 100644 index 000000000..69a2442f3 --- /dev/null +++ b/drivers/media/platform/verisilicon/hantro_drv.c @@ -0,0 +1,1153 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hantro VPU codec driver + * + * Copyright (C) 2018 Collabora, Ltd. + * Copyright 2018 Google LLC. + * Tomasz Figa <tfiga@chromium.org> + * + * Based on s5p-mfc driver by Samsung Electronics Co., Ltd. + * Copyright (C) 2011 Samsung Electronics Co., Ltd. + */ + +#include <linux/clk.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/pm.h> +#include <linux/pm_runtime.h> +#include <linux/slab.h> +#include <linux/videodev2.h> +#include <linux/workqueue.h> +#include <media/v4l2-event.h> +#include <media/v4l2-mem2mem.h> +#include <media/videobuf2-core.h> +#include <media/videobuf2-vmalloc.h> + +#include "hantro_v4l2.h" +#include "hantro.h" +#include "hantro_hw.h" + +#define DRIVER_NAME "hantro-vpu" + +int hantro_debug; +module_param_named(debug, hantro_debug, int, 0644); +MODULE_PARM_DESC(debug, + "Debug level - higher value produces more verbose messages"); + +void *hantro_get_ctrl(struct hantro_ctx *ctx, u32 id) +{ + struct v4l2_ctrl *ctrl; + + ctrl = v4l2_ctrl_find(&ctx->ctrl_handler, id); + return ctrl ? ctrl->p_cur.p : NULL; +} + +dma_addr_t hantro_get_ref(struct hantro_ctx *ctx, u64 ts) +{ + struct vb2_queue *q = v4l2_m2m_get_dst_vq(ctx->fh.m2m_ctx); + struct vb2_buffer *buf; + + buf = vb2_find_buffer(q, ts); + if (!buf) + return 0; + return hantro_get_dec_buf_addr(ctx, buf); +} + +static const struct v4l2_event hantro_eos_event = { + .type = V4L2_EVENT_EOS +}; + +static void hantro_job_finish_no_pm(struct hantro_dev *vpu, + struct hantro_ctx *ctx, + enum vb2_buffer_state result) +{ + struct vb2_v4l2_buffer *src, *dst; + + src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); + dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); + + if (WARN_ON(!src)) + return; + if (WARN_ON(!dst)) + return; + + src->sequence = ctx->sequence_out++; + dst->sequence = ctx->sequence_cap++; + + if (v4l2_m2m_is_last_draining_src_buf(ctx->fh.m2m_ctx, src)) { + dst->flags |= V4L2_BUF_FLAG_LAST; + v4l2_event_queue_fh(&ctx->fh, &hantro_eos_event); + v4l2_m2m_mark_stopped(ctx->fh.m2m_ctx); + } + + v4l2_m2m_buf_done_and_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx, + result); +} + +static void hantro_job_finish(struct hantro_dev *vpu, + struct hantro_ctx *ctx, + enum vb2_buffer_state result) +{ + pm_runtime_mark_last_busy(vpu->dev); + pm_runtime_put_autosuspend(vpu->dev); + + clk_bulk_disable(vpu->variant->num_clocks, vpu->clocks); + + hantro_job_finish_no_pm(vpu, ctx, result); +} + +void hantro_irq_done(struct hantro_dev *vpu, + enum vb2_buffer_state result) +{ + struct hantro_ctx *ctx = + v4l2_m2m_get_curr_priv(vpu->m2m_dev); + + /* + * If cancel_delayed_work returns false + * the timeout expired. The watchdog is running, + * and will take care of finishing the job. + */ + if (cancel_delayed_work(&vpu->watchdog_work)) { + if (result == VB2_BUF_STATE_DONE && ctx->codec_ops->done) + ctx->codec_ops->done(ctx); + hantro_job_finish(vpu, ctx, result); + } +} + +void hantro_watchdog(struct work_struct *work) +{ + struct hantro_dev *vpu; + struct hantro_ctx *ctx; + + vpu = container_of(to_delayed_work(work), + struct hantro_dev, watchdog_work); + ctx = v4l2_m2m_get_curr_priv(vpu->m2m_dev); + if (ctx) { + vpu_err("frame processing timed out!\n"); + if (ctx->codec_ops->reset) + ctx->codec_ops->reset(ctx); + hantro_job_finish(vpu, ctx, VB2_BUF_STATE_ERROR); + } +} + +void hantro_start_prepare_run(struct hantro_ctx *ctx) +{ + struct vb2_v4l2_buffer *src_buf; + + src_buf = hantro_get_src_buf(ctx); + v4l2_ctrl_request_setup(src_buf->vb2_buf.req_obj.req, + &ctx->ctrl_handler); + + if (!ctx->is_encoder && !ctx->dev->variant->late_postproc) { + if (hantro_needs_postproc(ctx, ctx->vpu_dst_fmt)) + hantro_postproc_enable(ctx); + else + hantro_postproc_disable(ctx); + } +} + +void hantro_end_prepare_run(struct hantro_ctx *ctx) +{ + struct vb2_v4l2_buffer *src_buf; + + if (!ctx->is_encoder && ctx->dev->variant->late_postproc) { + if (hantro_needs_postproc(ctx, ctx->vpu_dst_fmt)) + hantro_postproc_enable(ctx); + else + hantro_postproc_disable(ctx); + } + + src_buf = hantro_get_src_buf(ctx); + v4l2_ctrl_request_complete(src_buf->vb2_buf.req_obj.req, + &ctx->ctrl_handler); + + /* Kick the watchdog. */ + schedule_delayed_work(&ctx->dev->watchdog_work, + msecs_to_jiffies(2000)); +} + +static void device_run(void *priv) +{ + struct hantro_ctx *ctx = priv; + struct vb2_v4l2_buffer *src, *dst; + int ret; + + src = hantro_get_src_buf(ctx); + dst = hantro_get_dst_buf(ctx); + + ret = pm_runtime_resume_and_get(ctx->dev->dev); + if (ret < 0) + goto err_cancel_job; + + ret = clk_bulk_enable(ctx->dev->variant->num_clocks, ctx->dev->clocks); + if (ret) + goto err_cancel_job; + + v4l2_m2m_buf_copy_metadata(src, dst, true); + + if (ctx->codec_ops->run(ctx)) + goto err_cancel_job; + + return; + +err_cancel_job: + hantro_job_finish_no_pm(ctx->dev, ctx, VB2_BUF_STATE_ERROR); +} + +static const struct v4l2_m2m_ops vpu_m2m_ops = { + .device_run = device_run, +}; + +static int +queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq) +{ + struct hantro_ctx *ctx = priv; + int ret; + + src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; + src_vq->io_modes = VB2_MMAP | VB2_DMABUF; + src_vq->drv_priv = ctx; + src_vq->ops = &hantro_queue_ops; + src_vq->mem_ops = &vb2_dma_contig_memops; + + /* + * Driver does mostly sequential access, so sacrifice TLB efficiency + * for faster allocation. Also, no CPU access on the source queue, + * so no kernel mapping needed. + */ + src_vq->dma_attrs = DMA_ATTR_ALLOC_SINGLE_PAGES | + DMA_ATTR_NO_KERNEL_MAPPING; + src_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer); + src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY; + src_vq->lock = &ctx->dev->vpu_mutex; + src_vq->dev = ctx->dev->v4l2_dev.dev; + src_vq->supports_requests = true; + + ret = vb2_queue_init(src_vq); + if (ret) + return ret; + + dst_vq->bidirectional = true; + dst_vq->mem_ops = &vb2_dma_contig_memops; + dst_vq->dma_attrs = DMA_ATTR_ALLOC_SINGLE_PAGES; + /* + * The Kernel needs access to the JPEG destination buffer for the + * JPEG encoder to fill in the JPEG headers. + */ + if (!ctx->is_encoder) + dst_vq->dma_attrs |= DMA_ATTR_NO_KERNEL_MAPPING; + + dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; + dst_vq->io_modes = VB2_MMAP | VB2_DMABUF; + dst_vq->drv_priv = ctx; + dst_vq->ops = &hantro_queue_ops; + dst_vq->buf_struct_size = sizeof(struct hantro_decoded_buffer); + dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY; + dst_vq->lock = &ctx->dev->vpu_mutex; + dst_vq->dev = ctx->dev->v4l2_dev.dev; + + return vb2_queue_init(dst_vq); +} + +static int hantro_try_ctrl(struct v4l2_ctrl *ctrl) +{ + struct hantro_ctx *ctx; + + ctx = container_of(ctrl->handler, + struct hantro_ctx, ctrl_handler); + + if (ctrl->id == V4L2_CID_STATELESS_H264_SPS) { + const struct v4l2_ctrl_h264_sps *sps = ctrl->p_new.p_h264_sps; + + if (sps->chroma_format_idc > 1) + /* Only 4:0:0 and 4:2:0 are supported */ + return -EINVAL; + if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8) + /* Luma and chroma bit depth mismatch */ + return -EINVAL; + if (sps->bit_depth_luma_minus8 != 0) + /* Only 8-bit is supported */ + return -EINVAL; + } else if (ctrl->id == V4L2_CID_STATELESS_HEVC_SPS) { + const struct v4l2_ctrl_hevc_sps *sps = ctrl->p_new.p_hevc_sps; + + if (sps->bit_depth_luma_minus8 != 0 && sps->bit_depth_luma_minus8 != 2) + /* Only 8-bit and 10-bit are supported */ + return -EINVAL; + + ctx->bit_depth = sps->bit_depth_luma_minus8 + 8; + } else if (ctrl->id == V4L2_CID_STATELESS_VP9_FRAME) { + const struct v4l2_ctrl_vp9_frame *dec_params = ctrl->p_new.p_vp9_frame; + + /* We only support profile 0 */ + if (dec_params->profile != 0) + return -EINVAL; + } + return 0; +} + +static int hantro_jpeg_s_ctrl(struct v4l2_ctrl *ctrl) +{ + struct hantro_ctx *ctx; + + ctx = container_of(ctrl->handler, + struct hantro_ctx, ctrl_handler); + + vpu_debug(1, "s_ctrl: id = %d, val = %d\n", ctrl->id, ctrl->val); + + switch (ctrl->id) { + case V4L2_CID_JPEG_COMPRESSION_QUALITY: + ctx->jpeg_quality = ctrl->val; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int hantro_vp9_s_ctrl(struct v4l2_ctrl *ctrl) +{ + struct hantro_ctx *ctx; + + ctx = container_of(ctrl->handler, + struct hantro_ctx, ctrl_handler); + + switch (ctrl->id) { + case V4L2_CID_STATELESS_VP9_FRAME: + ctx->bit_depth = ctrl->p_new.p_vp9_frame->bit_depth; + break; + default: + return -EINVAL; + } + + return 0; +} + +static const struct v4l2_ctrl_ops hantro_ctrl_ops = { + .try_ctrl = hantro_try_ctrl, +}; + +static const struct v4l2_ctrl_ops hantro_jpeg_ctrl_ops = { + .s_ctrl = hantro_jpeg_s_ctrl, +}; + +static const struct v4l2_ctrl_ops hantro_vp9_ctrl_ops = { + .s_ctrl = hantro_vp9_s_ctrl, +}; + +#define HANTRO_JPEG_ACTIVE_MARKERS (V4L2_JPEG_ACTIVE_MARKER_APP0 | \ + V4L2_JPEG_ACTIVE_MARKER_COM | \ + V4L2_JPEG_ACTIVE_MARKER_DQT | \ + V4L2_JPEG_ACTIVE_MARKER_DHT) + +static const struct hantro_ctrl controls[] = { + { + .codec = HANTRO_JPEG_ENCODER, + .cfg = { + .id = V4L2_CID_JPEG_COMPRESSION_QUALITY, + .min = 5, + .max = 100, + .step = 1, + .def = 50, + .ops = &hantro_jpeg_ctrl_ops, + }, + }, { + .codec = HANTRO_JPEG_ENCODER, + .cfg = { + .id = V4L2_CID_JPEG_ACTIVE_MARKER, + .max = HANTRO_JPEG_ACTIVE_MARKERS, + .def = HANTRO_JPEG_ACTIVE_MARKERS, + /* + * Changing the set of active markers/segments also + * messes up the alignment of the JPEG header, which + * is needed to allow the hardware to write directly + * to the output buffer. Implementing this introduces + * a lot of complexity for little gain, as the markers + * enabled is already the minimum required set. + */ + .flags = V4L2_CTRL_FLAG_READ_ONLY, + }, + }, { + .codec = HANTRO_MPEG2_DECODER, + .cfg = { + .id = V4L2_CID_STATELESS_MPEG2_SEQUENCE, + }, + }, { + .codec = HANTRO_MPEG2_DECODER, + .cfg = { + .id = V4L2_CID_STATELESS_MPEG2_PICTURE, + }, + }, { + .codec = HANTRO_MPEG2_DECODER, + .cfg = { + .id = V4L2_CID_STATELESS_MPEG2_QUANTISATION, + }, + }, { + .codec = HANTRO_VP8_DECODER, + .cfg = { + .id = V4L2_CID_STATELESS_VP8_FRAME, + }, + }, { + .codec = HANTRO_H264_DECODER, + .cfg = { + .id = V4L2_CID_STATELESS_H264_DECODE_PARAMS, + }, + }, { + .codec = HANTRO_H264_DECODER, + .cfg = { + .id = V4L2_CID_STATELESS_H264_SPS, + .ops = &hantro_ctrl_ops, + }, + }, { + .codec = HANTRO_H264_DECODER, + .cfg = { + .id = V4L2_CID_STATELESS_H264_PPS, + }, + }, { + .codec = HANTRO_H264_DECODER, + .cfg = { + .id = V4L2_CID_STATELESS_H264_SCALING_MATRIX, + }, + }, { + .codec = HANTRO_H264_DECODER, + .cfg = { + .id = V4L2_CID_STATELESS_H264_DECODE_MODE, + .min = V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED, + .def = V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED, + .max = V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED, + }, + }, { + .codec = HANTRO_H264_DECODER, + .cfg = { + .id = V4L2_CID_STATELESS_H264_START_CODE, + .min = V4L2_STATELESS_H264_START_CODE_ANNEX_B, + .def = V4L2_STATELESS_H264_START_CODE_ANNEX_B, + .max = V4L2_STATELESS_H264_START_CODE_ANNEX_B, + }, + }, { + .codec = HANTRO_H264_DECODER, + .cfg = { + .id = V4L2_CID_MPEG_VIDEO_H264_PROFILE, + .min = V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE, + .max = V4L2_MPEG_VIDEO_H264_PROFILE_HIGH, + .menu_skip_mask = + BIT(V4L2_MPEG_VIDEO_H264_PROFILE_EXTENDED), + .def = V4L2_MPEG_VIDEO_H264_PROFILE_MAIN, + } + }, { + .codec = HANTRO_HEVC_DECODER, + .cfg = { + .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, + .min = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED, + .max = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED, + .def = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED, + }, + }, { + .codec = HANTRO_HEVC_DECODER, + .cfg = { + .id = V4L2_CID_STATELESS_HEVC_START_CODE, + .min = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B, + .max = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B, + .def = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B, + }, + }, { + .codec = HANTRO_HEVC_DECODER, + .cfg = { + .id = V4L2_CID_MPEG_VIDEO_HEVC_PROFILE, + .min = V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN, + .max = V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN_10, + .def = V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN, + }, + }, { + .codec = HANTRO_HEVC_DECODER, + .cfg = { + .id = V4L2_CID_MPEG_VIDEO_HEVC_LEVEL, + .min = V4L2_MPEG_VIDEO_HEVC_LEVEL_1, + .max = V4L2_MPEG_VIDEO_HEVC_LEVEL_5_1, + }, + }, { + .codec = HANTRO_HEVC_DECODER, + .cfg = { + .id = V4L2_CID_STATELESS_HEVC_SPS, + .ops = &hantro_ctrl_ops, + }, + }, { + .codec = HANTRO_HEVC_DECODER, + .cfg = { + .id = V4L2_CID_STATELESS_HEVC_PPS, + }, + }, { + .codec = HANTRO_HEVC_DECODER, + .cfg = { + .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS, + }, + }, { + .codec = HANTRO_HEVC_DECODER, + .cfg = { + .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX, + }, + }, { + .codec = HANTRO_VP9_DECODER, + .cfg = { + .id = V4L2_CID_STATELESS_VP9_FRAME, + .ops = &hantro_vp9_ctrl_ops, + }, + }, { + .codec = HANTRO_VP9_DECODER, + .cfg = { + .id = V4L2_CID_STATELESS_VP9_COMPRESSED_HDR, + }, + }, +}; + +static int hantro_ctrls_setup(struct hantro_dev *vpu, + struct hantro_ctx *ctx, + int allowed_codecs) +{ + int i, num_ctrls = ARRAY_SIZE(controls); + + v4l2_ctrl_handler_init(&ctx->ctrl_handler, num_ctrls); + + for (i = 0; i < num_ctrls; i++) { + if (!(allowed_codecs & controls[i].codec)) + continue; + + v4l2_ctrl_new_custom(&ctx->ctrl_handler, + &controls[i].cfg, NULL); + if (ctx->ctrl_handler.error) { + vpu_err("Adding control (%d) failed %d\n", + controls[i].cfg.id, + ctx->ctrl_handler.error); + v4l2_ctrl_handler_free(&ctx->ctrl_handler); + return ctx->ctrl_handler.error; + } + } + return v4l2_ctrl_handler_setup(&ctx->ctrl_handler); +} + +/* + * V4L2 file operations. + */ + +static int hantro_open(struct file *filp) +{ + struct hantro_dev *vpu = video_drvdata(filp); + struct video_device *vdev = video_devdata(filp); + struct hantro_func *func = hantro_vdev_to_func(vdev); + struct hantro_ctx *ctx; + int allowed_codecs, ret; + + /* + * We do not need any extra locking here, because we operate only + * on local data here, except reading few fields from dev, which + * do not change through device's lifetime (which is guaranteed by + * reference on module from open()) and V4L2 internal objects (such + * as vdev and ctx->fh), which have proper locking done in respective + * helper functions used here. + */ + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->dev = vpu; + if (func->id == MEDIA_ENT_F_PROC_VIDEO_ENCODER) { + allowed_codecs = vpu->variant->codec & HANTRO_ENCODERS; + ctx->is_encoder = true; + } else if (func->id == MEDIA_ENT_F_PROC_VIDEO_DECODER) { + allowed_codecs = vpu->variant->codec & HANTRO_DECODERS; + ctx->is_encoder = false; + } else { + ret = -ENODEV; + goto err_ctx_free; + } + + ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(vpu->m2m_dev, ctx, queue_init); + if (IS_ERR(ctx->fh.m2m_ctx)) { + ret = PTR_ERR(ctx->fh.m2m_ctx); + goto err_ctx_free; + } + + v4l2_fh_init(&ctx->fh, vdev); + filp->private_data = &ctx->fh; + v4l2_fh_add(&ctx->fh); + + hantro_reset_fmts(ctx); + + ret = hantro_ctrls_setup(vpu, ctx, allowed_codecs); + if (ret) { + vpu_err("Failed to set up controls\n"); + goto err_fh_free; + } + ctx->fh.ctrl_handler = &ctx->ctrl_handler; + + return 0; + +err_fh_free: + v4l2_fh_del(&ctx->fh); + v4l2_fh_exit(&ctx->fh); +err_ctx_free: + kfree(ctx); + return ret; +} + +static int hantro_release(struct file *filp) +{ + struct hantro_ctx *ctx = + container_of(filp->private_data, struct hantro_ctx, fh); + + /* + * No need for extra locking because this was the last reference + * to this file. + */ + v4l2_m2m_ctx_release(ctx->fh.m2m_ctx); + v4l2_fh_del(&ctx->fh); + v4l2_fh_exit(&ctx->fh); + v4l2_ctrl_handler_free(&ctx->ctrl_handler); + kfree(ctx); + + return 0; +} + +static const struct v4l2_file_operations hantro_fops = { + .owner = THIS_MODULE, + .open = hantro_open, + .release = hantro_release, + .poll = v4l2_m2m_fop_poll, + .unlocked_ioctl = video_ioctl2, + .mmap = v4l2_m2m_fop_mmap, +}; + +static const struct of_device_id of_hantro_match[] = { +#ifdef CONFIG_VIDEO_HANTRO_ROCKCHIP + { .compatible = "rockchip,px30-vpu", .data = &px30_vpu_variant, }, + { .compatible = "rockchip,rk3036-vpu", .data = &rk3036_vpu_variant, }, + { .compatible = "rockchip,rk3066-vpu", .data = &rk3066_vpu_variant, }, + { .compatible = "rockchip,rk3288-vpu", .data = &rk3288_vpu_variant, }, + { .compatible = "rockchip,rk3328-vpu", .data = &rk3328_vpu_variant, }, + { .compatible = "rockchip,rk3399-vpu", .data = &rk3399_vpu_variant, }, + { .compatible = "rockchip,rk3568-vepu", .data = &rk3568_vepu_variant, }, + { .compatible = "rockchip,rk3568-vpu", .data = &rk3568_vpu_variant, }, +#endif +#ifdef CONFIG_VIDEO_HANTRO_IMX8M + { .compatible = "nxp,imx8mm-vpu-g1", .data = &imx8mm_vpu_g1_variant, }, + { .compatible = "nxp,imx8mq-vpu", .data = &imx8mq_vpu_variant, }, + { .compatible = "nxp,imx8mq-vpu-g1", .data = &imx8mq_vpu_g1_variant }, + { .compatible = "nxp,imx8mq-vpu-g2", .data = &imx8mq_vpu_g2_variant }, +#endif +#ifdef CONFIG_VIDEO_HANTRO_SAMA5D4 + { .compatible = "microchip,sama5d4-vdec", .data = &sama5d4_vdec_variant, }, +#endif +#ifdef CONFIG_VIDEO_HANTRO_SUNXI + { .compatible = "allwinner,sun50i-h6-vpu-g2", .data = &sunxi_vpu_variant, }, +#endif + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, of_hantro_match); + +static int hantro_register_entity(struct media_device *mdev, + struct media_entity *entity, + const char *entity_name, + struct media_pad *pads, int num_pads, + int function, struct video_device *vdev) +{ + char *name; + int ret; + + entity->obj_type = MEDIA_ENTITY_TYPE_BASE; + if (function == MEDIA_ENT_F_IO_V4L) { + entity->info.dev.major = VIDEO_MAJOR; + entity->info.dev.minor = vdev->minor; + } + + name = devm_kasprintf(mdev->dev, GFP_KERNEL, "%s-%s", vdev->name, + entity_name); + if (!name) + return -ENOMEM; + + entity->name = name; + entity->function = function; + + ret = media_entity_pads_init(entity, num_pads, pads); + if (ret) + return ret; + + ret = media_device_register_entity(mdev, entity); + if (ret) + return ret; + + return 0; +} + +static int hantro_attach_func(struct hantro_dev *vpu, + struct hantro_func *func) +{ + struct media_device *mdev = &vpu->mdev; + struct media_link *link; + int ret; + + /* Create the three encoder entities with their pads */ + func->source_pad.flags = MEDIA_PAD_FL_SOURCE; + ret = hantro_register_entity(mdev, &func->vdev.entity, "source", + &func->source_pad, 1, MEDIA_ENT_F_IO_V4L, + &func->vdev); + if (ret) + return ret; + + func->proc_pads[0].flags = MEDIA_PAD_FL_SINK; + func->proc_pads[1].flags = MEDIA_PAD_FL_SOURCE; + ret = hantro_register_entity(mdev, &func->proc, "proc", + func->proc_pads, 2, func->id, + &func->vdev); + if (ret) + goto err_rel_entity0; + + func->sink_pad.flags = MEDIA_PAD_FL_SINK; + ret = hantro_register_entity(mdev, &func->sink, "sink", + &func->sink_pad, 1, MEDIA_ENT_F_IO_V4L, + &func->vdev); + if (ret) + goto err_rel_entity1; + + /* Connect the three entities */ + ret = media_create_pad_link(&func->vdev.entity, 0, &func->proc, 0, + MEDIA_LNK_FL_IMMUTABLE | + MEDIA_LNK_FL_ENABLED); + if (ret) + goto err_rel_entity2; + + ret = media_create_pad_link(&func->proc, 1, &func->sink, 0, + MEDIA_LNK_FL_IMMUTABLE | + MEDIA_LNK_FL_ENABLED); + if (ret) + goto err_rm_links0; + + /* Create video interface */ + func->intf_devnode = media_devnode_create(mdev, MEDIA_INTF_T_V4L_VIDEO, + 0, VIDEO_MAJOR, + func->vdev.minor); + if (!func->intf_devnode) { + ret = -ENOMEM; + goto err_rm_links1; + } + + /* Connect the two DMA engines to the interface */ + link = media_create_intf_link(&func->vdev.entity, + &func->intf_devnode->intf, + MEDIA_LNK_FL_IMMUTABLE | + MEDIA_LNK_FL_ENABLED); + if (!link) { + ret = -ENOMEM; + goto err_rm_devnode; + } + + link = media_create_intf_link(&func->sink, &func->intf_devnode->intf, + MEDIA_LNK_FL_IMMUTABLE | + MEDIA_LNK_FL_ENABLED); + if (!link) { + ret = -ENOMEM; + goto err_rm_devnode; + } + return 0; + +err_rm_devnode: + media_devnode_remove(func->intf_devnode); + +err_rm_links1: + media_entity_remove_links(&func->sink); + +err_rm_links0: + media_entity_remove_links(&func->proc); + media_entity_remove_links(&func->vdev.entity); + +err_rel_entity2: + media_device_unregister_entity(&func->sink); + +err_rel_entity1: + media_device_unregister_entity(&func->proc); + +err_rel_entity0: + media_device_unregister_entity(&func->vdev.entity); + return ret; +} + +static void hantro_detach_func(struct hantro_func *func) +{ + media_devnode_remove(func->intf_devnode); + media_entity_remove_links(&func->sink); + media_entity_remove_links(&func->proc); + media_entity_remove_links(&func->vdev.entity); + media_device_unregister_entity(&func->sink); + media_device_unregister_entity(&func->proc); + media_device_unregister_entity(&func->vdev.entity); +} + +static int hantro_add_func(struct hantro_dev *vpu, unsigned int funcid) +{ + const struct of_device_id *match; + struct hantro_func *func; + struct video_device *vfd; + int ret; + + match = of_match_node(of_hantro_match, vpu->dev->of_node); + func = devm_kzalloc(vpu->dev, sizeof(*func), GFP_KERNEL); + if (!func) { + v4l2_err(&vpu->v4l2_dev, "Failed to allocate video device\n"); + return -ENOMEM; + } + + func->id = funcid; + + vfd = &func->vdev; + vfd->fops = &hantro_fops; + vfd->release = video_device_release_empty; + vfd->lock = &vpu->vpu_mutex; + vfd->v4l2_dev = &vpu->v4l2_dev; + vfd->vfl_dir = VFL_DIR_M2M; + vfd->device_caps = V4L2_CAP_STREAMING | V4L2_CAP_VIDEO_M2M_MPLANE; + vfd->ioctl_ops = &hantro_ioctl_ops; + snprintf(vfd->name, sizeof(vfd->name), "%s-%s", match->compatible, + funcid == MEDIA_ENT_F_PROC_VIDEO_ENCODER ? "enc" : "dec"); + + if (funcid == MEDIA_ENT_F_PROC_VIDEO_ENCODER) { + vpu->encoder = func; + v4l2_disable_ioctl(vfd, VIDIOC_TRY_DECODER_CMD); + v4l2_disable_ioctl(vfd, VIDIOC_DECODER_CMD); + } else { + vpu->decoder = func; + v4l2_disable_ioctl(vfd, VIDIOC_TRY_ENCODER_CMD); + v4l2_disable_ioctl(vfd, VIDIOC_ENCODER_CMD); + } + + video_set_drvdata(vfd, vpu); + + ret = video_register_device(vfd, VFL_TYPE_VIDEO, -1); + if (ret) { + v4l2_err(&vpu->v4l2_dev, "Failed to register video device\n"); + return ret; + } + + ret = hantro_attach_func(vpu, func); + if (ret) { + v4l2_err(&vpu->v4l2_dev, + "Failed to attach functionality to the media device\n"); + goto err_unreg_dev; + } + + v4l2_info(&vpu->v4l2_dev, "registered %s as /dev/video%d\n", vfd->name, + vfd->num); + + return 0; + +err_unreg_dev: + video_unregister_device(vfd); + return ret; +} + +static int hantro_add_enc_func(struct hantro_dev *vpu) +{ + if (!vpu->variant->enc_fmts) + return 0; + + return hantro_add_func(vpu, MEDIA_ENT_F_PROC_VIDEO_ENCODER); +} + +static int hantro_add_dec_func(struct hantro_dev *vpu) +{ + if (!vpu->variant->dec_fmts) + return 0; + + return hantro_add_func(vpu, MEDIA_ENT_F_PROC_VIDEO_DECODER); +} + +static void hantro_remove_func(struct hantro_dev *vpu, + unsigned int funcid) +{ + struct hantro_func *func; + + if (funcid == MEDIA_ENT_F_PROC_VIDEO_ENCODER) + func = vpu->encoder; + else + func = vpu->decoder; + + if (!func) + return; + + hantro_detach_func(func); + video_unregister_device(&func->vdev); +} + +static void hantro_remove_enc_func(struct hantro_dev *vpu) +{ + hantro_remove_func(vpu, MEDIA_ENT_F_PROC_VIDEO_ENCODER); +} + +static void hantro_remove_dec_func(struct hantro_dev *vpu) +{ + hantro_remove_func(vpu, MEDIA_ENT_F_PROC_VIDEO_DECODER); +} + +static const struct media_device_ops hantro_m2m_media_ops = { + .req_validate = vb2_request_validate, + .req_queue = v4l2_m2m_request_queue, +}; + +static int hantro_probe(struct platform_device *pdev) +{ + const struct of_device_id *match; + struct hantro_dev *vpu; + struct resource *res; + int num_bases; + int i, ret; + + vpu = devm_kzalloc(&pdev->dev, sizeof(*vpu), GFP_KERNEL); + if (!vpu) + return -ENOMEM; + + vpu->dev = &pdev->dev; + vpu->pdev = pdev; + mutex_init(&vpu->vpu_mutex); + spin_lock_init(&vpu->irqlock); + + match = of_match_node(of_hantro_match, pdev->dev.of_node); + vpu->variant = match->data; + + /* + * Support for nxp,imx8mq-vpu is kept for backwards compatibility + * but it's deprecated. Please update your DTS file to use + * nxp,imx8mq-vpu-g1 or nxp,imx8mq-vpu-g2 instead. + */ + if (of_device_is_compatible(pdev->dev.of_node, "nxp,imx8mq-vpu")) + dev_warn(&pdev->dev, "%s compatible is deprecated\n", + match->compatible); + + INIT_DELAYED_WORK(&vpu->watchdog_work, hantro_watchdog); + + vpu->clocks = devm_kcalloc(&pdev->dev, vpu->variant->num_clocks, + sizeof(*vpu->clocks), GFP_KERNEL); + if (!vpu->clocks) + return -ENOMEM; + + if (vpu->variant->num_clocks > 1) { + for (i = 0; i < vpu->variant->num_clocks; i++) + vpu->clocks[i].id = vpu->variant->clk_names[i]; + + ret = devm_clk_bulk_get(&pdev->dev, vpu->variant->num_clocks, + vpu->clocks); + if (ret) + return ret; + } else { + /* + * If the driver has a single clk, chances are there will be no + * actual name in the DT bindings. + */ + vpu->clocks[0].clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(vpu->clocks[0].clk)) + return PTR_ERR(vpu->clocks[0].clk); + } + + vpu->resets = devm_reset_control_array_get(&pdev->dev, false, true); + if (IS_ERR(vpu->resets)) + return PTR_ERR(vpu->resets); + + num_bases = vpu->variant->num_regs ?: 1; + vpu->reg_bases = devm_kcalloc(&pdev->dev, num_bases, + sizeof(*vpu->reg_bases), GFP_KERNEL); + if (!vpu->reg_bases) + return -ENOMEM; + + for (i = 0; i < num_bases; i++) { + res = vpu->variant->reg_names ? + platform_get_resource_byname(vpu->pdev, IORESOURCE_MEM, + vpu->variant->reg_names[i]) : + platform_get_resource(vpu->pdev, IORESOURCE_MEM, 0); + vpu->reg_bases[i] = devm_ioremap_resource(vpu->dev, res); + if (IS_ERR(vpu->reg_bases[i])) + return PTR_ERR(vpu->reg_bases[i]); + } + vpu->enc_base = vpu->reg_bases[0] + vpu->variant->enc_offset; + vpu->dec_base = vpu->reg_bases[0] + vpu->variant->dec_offset; + + /** + * TODO: Eventually allow taking advantage of full 64-bit address space. + * Until then we assume the MSB portion of buffers' base addresses is + * always 0 due to this masking operation. + */ + ret = dma_set_coherent_mask(vpu->dev, DMA_BIT_MASK(32)); + if (ret) { + dev_err(vpu->dev, "Could not set DMA coherent mask.\n"); + return ret; + } + vb2_dma_contig_set_max_seg_size(&pdev->dev, DMA_BIT_MASK(32)); + + for (i = 0; i < vpu->variant->num_irqs; i++) { + const char *irq_name; + int irq; + + if (!vpu->variant->irqs[i].handler) + continue; + + if (vpu->variant->num_irqs > 1) { + irq_name = vpu->variant->irqs[i].name; + irq = platform_get_irq_byname(vpu->pdev, irq_name); + } else { + /* + * If the driver has a single IRQ, chances are there + * will be no actual name in the DT bindings. + */ + irq_name = "default"; + irq = platform_get_irq(vpu->pdev, 0); + } + if (irq <= 0) + return -ENXIO; + + ret = devm_request_irq(vpu->dev, irq, + vpu->variant->irqs[i].handler, 0, + dev_name(vpu->dev), vpu); + if (ret) { + dev_err(vpu->dev, "Could not request %s IRQ.\n", + irq_name); + return ret; + } + } + + if (vpu->variant->init) { + ret = vpu->variant->init(vpu); + if (ret) { + dev_err(&pdev->dev, "Failed to init VPU hardware\n"); + return ret; + } + } + + pm_runtime_set_autosuspend_delay(vpu->dev, 100); + pm_runtime_use_autosuspend(vpu->dev); + pm_runtime_enable(vpu->dev); + + ret = reset_control_deassert(vpu->resets); + if (ret) { + dev_err(&pdev->dev, "Failed to deassert resets\n"); + goto err_pm_disable; + } + + ret = clk_bulk_prepare(vpu->variant->num_clocks, vpu->clocks); + if (ret) { + dev_err(&pdev->dev, "Failed to prepare clocks\n"); + goto err_rst_assert; + } + + ret = v4l2_device_register(&pdev->dev, &vpu->v4l2_dev); + if (ret) { + dev_err(&pdev->dev, "Failed to register v4l2 device\n"); + goto err_clk_unprepare; + } + platform_set_drvdata(pdev, vpu); + + vpu->m2m_dev = v4l2_m2m_init(&vpu_m2m_ops); + if (IS_ERR(vpu->m2m_dev)) { + v4l2_err(&vpu->v4l2_dev, "Failed to init mem2mem device\n"); + ret = PTR_ERR(vpu->m2m_dev); + goto err_v4l2_unreg; + } + + vpu->mdev.dev = vpu->dev; + strscpy(vpu->mdev.model, DRIVER_NAME, sizeof(vpu->mdev.model)); + strscpy(vpu->mdev.bus_info, "platform: " DRIVER_NAME, + sizeof(vpu->mdev.bus_info)); + media_device_init(&vpu->mdev); + vpu->mdev.ops = &hantro_m2m_media_ops; + vpu->v4l2_dev.mdev = &vpu->mdev; + + ret = hantro_add_enc_func(vpu); + if (ret) { + dev_err(&pdev->dev, "Failed to register encoder\n"); + goto err_m2m_rel; + } + + ret = hantro_add_dec_func(vpu); + if (ret) { + dev_err(&pdev->dev, "Failed to register decoder\n"); + goto err_rm_enc_func; + } + + ret = media_device_register(&vpu->mdev); + if (ret) { + v4l2_err(&vpu->v4l2_dev, "Failed to register mem2mem media device\n"); + goto err_rm_dec_func; + } + + return 0; + +err_rm_dec_func: + hantro_remove_dec_func(vpu); +err_rm_enc_func: + hantro_remove_enc_func(vpu); +err_m2m_rel: + media_device_cleanup(&vpu->mdev); + v4l2_m2m_release(vpu->m2m_dev); +err_v4l2_unreg: + v4l2_device_unregister(&vpu->v4l2_dev); +err_clk_unprepare: + clk_bulk_unprepare(vpu->variant->num_clocks, vpu->clocks); +err_rst_assert: + reset_control_assert(vpu->resets); +err_pm_disable: + pm_runtime_dont_use_autosuspend(vpu->dev); + pm_runtime_disable(vpu->dev); + return ret; +} + +static int hantro_remove(struct platform_device *pdev) +{ + struct hantro_dev *vpu = platform_get_drvdata(pdev); + + v4l2_info(&vpu->v4l2_dev, "Removing %s\n", pdev->name); + + media_device_unregister(&vpu->mdev); + hantro_remove_dec_func(vpu); + hantro_remove_enc_func(vpu); + media_device_cleanup(&vpu->mdev); + v4l2_m2m_release(vpu->m2m_dev); + v4l2_device_unregister(&vpu->v4l2_dev); + clk_bulk_unprepare(vpu->variant->num_clocks, vpu->clocks); + reset_control_assert(vpu->resets); + pm_runtime_dont_use_autosuspend(vpu->dev); + pm_runtime_disable(vpu->dev); + return 0; +} + +#ifdef CONFIG_PM +static int hantro_runtime_resume(struct device *dev) +{ + struct hantro_dev *vpu = dev_get_drvdata(dev); + + if (vpu->variant->runtime_resume) + return vpu->variant->runtime_resume(vpu); + + return 0; +} +#endif + +static const struct dev_pm_ops hantro_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) + SET_RUNTIME_PM_OPS(NULL, hantro_runtime_resume, NULL) +}; + +static struct platform_driver hantro_driver = { + .probe = hantro_probe, + .remove = hantro_remove, + .driver = { + .name = DRIVER_NAME, + .of_match_table = of_match_ptr(of_hantro_match), + .pm = &hantro_pm_ops, + }, +}; +module_platform_driver(hantro_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Alpha Lin <Alpha.Lin@Rock-Chips.com>"); +MODULE_AUTHOR("Tomasz Figa <tfiga@chromium.org>"); +MODULE_AUTHOR("Ezequiel Garcia <ezequiel@collabora.com>"); +MODULE_DESCRIPTION("Hantro VPU codec driver"); diff --git a/drivers/media/platform/verisilicon/hantro_g1.c b/drivers/media/platform/verisilicon/hantro_g1.c new file mode 100644 index 000000000..0ab1cee62 --- /dev/null +++ b/drivers/media/platform/verisilicon/hantro_g1.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hantro VPU codec driver + * + * Copyright (C) 2018 Rockchip Electronics Co., Ltd. + * Jeffy Chen <jeffy.chen@rock-chips.com> + * Copyright (C) 2019 Pengutronix, Philipp Zabel <kernel@pengutronix.de> + * Copyright (C) 2021 Collabora Ltd, Emil Velikov <emil.velikov@collabora.com> + */ + +#include "hantro.h" +#include "hantro_g1_regs.h" + +irqreturn_t hantro_g1_irq(int irq, void *dev_id) +{ + struct hantro_dev *vpu = dev_id; + enum vb2_buffer_state state; + u32 status; + + status = vdpu_read(vpu, G1_REG_INTERRUPT); + state = (status & G1_REG_INTERRUPT_DEC_RDY_INT) ? + VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR; + + vdpu_write(vpu, 0, G1_REG_INTERRUPT); + vdpu_write(vpu, G1_REG_CONFIG_DEC_CLK_GATE_E, G1_REG_CONFIG); + + hantro_irq_done(vpu, state); + + return IRQ_HANDLED; +} + +void hantro_g1_reset(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + + vdpu_write(vpu, G1_REG_INTERRUPT_DEC_IRQ_DIS, G1_REG_INTERRUPT); + vdpu_write(vpu, G1_REG_CONFIG_DEC_CLK_GATE_E, G1_REG_CONFIG); + vdpu_write(vpu, 1, G1_REG_SOFT_RESET); +} diff --git a/drivers/media/platform/verisilicon/hantro_g1_h264_dec.c b/drivers/media/platform/verisilicon/hantro_g1_h264_dec.c new file mode 100644 index 000000000..9de7f05ef --- /dev/null +++ b/drivers/media/platform/verisilicon/hantro_g1_h264_dec.c @@ -0,0 +1,284 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Rockchip RK3288 VPU codec driver + * + * Copyright (c) 2014 Rockchip Electronics Co., Ltd. + * Hertz Wong <hertz.wong@rock-chips.com> + * Herman Chen <herman.chen@rock-chips.com> + * + * Copyright (C) 2014 Google, Inc. + * Tomasz Figa <tfiga@chromium.org> + */ + +#include <linux/types.h> +#include <linux/sort.h> + +#include <media/v4l2-mem2mem.h> + +#include "hantro_g1_regs.h" +#include "hantro_hw.h" +#include "hantro_v4l2.h" + +static void set_params(struct hantro_ctx *ctx, struct vb2_v4l2_buffer *src_buf) +{ + const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls; + const struct v4l2_ctrl_h264_decode_params *dec_param = ctrls->decode; + const struct v4l2_ctrl_h264_sps *sps = ctrls->sps; + const struct v4l2_ctrl_h264_pps *pps = ctrls->pps; + struct hantro_dev *vpu = ctx->dev; + u32 reg; + + /* Decoder control register 0. */ + reg = G1_REG_DEC_CTRL0_DEC_AXI_AUTO; + if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD) + reg |= G1_REG_DEC_CTRL0_SEQ_MBAFF_E; + if (sps->profile_idc > 66) { + reg |= G1_REG_DEC_CTRL0_PICORD_COUNT_E; + if (dec_param->nal_ref_idc) + reg |= G1_REG_DEC_CTRL0_WRITE_MVS_E; + } + + if (!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY) && + (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD || + dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC)) + reg |= G1_REG_DEC_CTRL0_PIC_INTERLACE_E; + if (dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) + reg |= G1_REG_DEC_CTRL0_PIC_FIELDMODE_E; + if (!(dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD)) + reg |= G1_REG_DEC_CTRL0_PIC_TOPFIELD_E; + vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL0); + + /* Decoder control register 1. */ + reg = G1_REG_DEC_CTRL1_PIC_MB_WIDTH(MB_WIDTH(ctx->src_fmt.width)) | + G1_REG_DEC_CTRL1_PIC_MB_HEIGHT_P(MB_HEIGHT(ctx->src_fmt.height)) | + G1_REG_DEC_CTRL1_REF_FRAMES(sps->max_num_ref_frames); + vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL1); + + /* Decoder control register 2. */ + reg = G1_REG_DEC_CTRL2_CH_QP_OFFSET(pps->chroma_qp_index_offset) | + G1_REG_DEC_CTRL2_CH_QP_OFFSET2(pps->second_chroma_qp_index_offset); + + if (pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT) + reg |= G1_REG_DEC_CTRL2_TYPE1_QUANT_E; + if (!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY)) + reg |= G1_REG_DEC_CTRL2_FIELDPIC_FLAG_E; + vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL2); + + /* Decoder control register 3. */ + reg = G1_REG_DEC_CTRL3_START_CODE_E | + G1_REG_DEC_CTRL3_INIT_QP(pps->pic_init_qp_minus26 + 26) | + G1_REG_DEC_CTRL3_STREAM_LEN(vb2_get_plane_payload(&src_buf->vb2_buf, 0)); + vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL3); + + /* Decoder control register 4. */ + reg = G1_REG_DEC_CTRL4_FRAMENUM_LEN(sps->log2_max_frame_num_minus4 + 4) | + G1_REG_DEC_CTRL4_FRAMENUM(dec_param->frame_num) | + G1_REG_DEC_CTRL4_WEIGHT_BIPR_IDC(pps->weighted_bipred_idc); + if (pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE) + reg |= G1_REG_DEC_CTRL4_CABAC_E; + if (sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE) + reg |= G1_REG_DEC_CTRL4_DIR_8X8_INFER_E; + if (sps->profile_idc >= 100 && sps->chroma_format_idc == 0) + reg |= G1_REG_DEC_CTRL4_BLACKWHITE_E; + if (pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED) + reg |= G1_REG_DEC_CTRL4_WEIGHT_PRED_E; + vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL4); + + /* Decoder control register 5. */ + reg = G1_REG_DEC_CTRL5_REFPIC_MK_LEN(dec_param->dec_ref_pic_marking_bit_size) | + G1_REG_DEC_CTRL5_IDR_PIC_ID(dec_param->idr_pic_id); + if (pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED) + reg |= G1_REG_DEC_CTRL5_CONST_INTRA_E; + if (pps->flags & V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT) + reg |= G1_REG_DEC_CTRL5_FILT_CTRL_PRES; + if (pps->flags & V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT) + reg |= G1_REG_DEC_CTRL5_RDPIC_CNT_PRES; + if (pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE) + reg |= G1_REG_DEC_CTRL5_8X8TRANS_FLAG_E; + if (dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC) + reg |= G1_REG_DEC_CTRL5_IDR_PIC_E; + vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL5); + + /* Decoder control register 6. */ + reg = G1_REG_DEC_CTRL6_PPS_ID(pps->pic_parameter_set_id) | + G1_REG_DEC_CTRL6_REFIDX0_ACTIVE(pps->num_ref_idx_l0_default_active_minus1 + 1) | + G1_REG_DEC_CTRL6_REFIDX1_ACTIVE(pps->num_ref_idx_l1_default_active_minus1 + 1) | + G1_REG_DEC_CTRL6_POC_LENGTH(dec_param->pic_order_cnt_bit_size); + vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL6); + + /* Error concealment register. */ + vdpu_write_relaxed(vpu, 0, G1_REG_ERR_CONC); + + /* Prediction filter tap register. */ + vdpu_write_relaxed(vpu, + G1_REG_PRED_FLT_PRED_BC_TAP_0_0(1) | + G1_REG_PRED_FLT_PRED_BC_TAP_0_1(-5 & 0x3ff) | + G1_REG_PRED_FLT_PRED_BC_TAP_0_2(20), + G1_REG_PRED_FLT); + + /* Reference picture buffer control register. */ + vdpu_write_relaxed(vpu, 0, G1_REG_REF_BUF_CTRL); + + /* Reference picture buffer control register 2. */ + vdpu_write_relaxed(vpu, G1_REG_REF_BUF_CTRL2_APF_THRESHOLD(8), + G1_REG_REF_BUF_CTRL2); +} + +static void set_ref(struct hantro_ctx *ctx) +{ + const struct v4l2_h264_reference *b0_reflist, *b1_reflist, *p_reflist; + struct hantro_dev *vpu = ctx->dev; + int reg_num; + u32 reg; + int i; + + vdpu_write_relaxed(vpu, ctx->h264_dec.dpb_valid, G1_REG_VALID_REF); + vdpu_write_relaxed(vpu, ctx->h264_dec.dpb_longterm, G1_REG_LT_REF); + + /* + * Set up reference frame picture numbers. + * + * Each G1_REG_REF_PIC(x) register contains numbers of two + * subsequential reference pictures. + */ + for (i = 0; i < HANTRO_H264_DPB_SIZE; i += 2) { + reg = G1_REG_REF_PIC_REFER0_NBR(hantro_h264_get_ref_nbr(ctx, i)) | + G1_REG_REF_PIC_REFER1_NBR(hantro_h264_get_ref_nbr(ctx, i + 1)); + vdpu_write_relaxed(vpu, reg, G1_REG_REF_PIC(i / 2)); + } + + b0_reflist = ctx->h264_dec.reflists.b0; + b1_reflist = ctx->h264_dec.reflists.b1; + p_reflist = ctx->h264_dec.reflists.p; + + /* + * Each G1_REG_BD_REF_PIC(x) register contains three entries + * of each forward and backward picture list. + */ + reg_num = 0; + for (i = 0; i < 15; i += 3) { + reg = G1_REG_BD_REF_PIC_BINIT_RLIST_F0(b0_reflist[i].index) | + G1_REG_BD_REF_PIC_BINIT_RLIST_F1(b0_reflist[i + 1].index) | + G1_REG_BD_REF_PIC_BINIT_RLIST_F2(b0_reflist[i + 2].index) | + G1_REG_BD_REF_PIC_BINIT_RLIST_B0(b1_reflist[i].index) | + G1_REG_BD_REF_PIC_BINIT_RLIST_B1(b1_reflist[i + 1].index) | + G1_REG_BD_REF_PIC_BINIT_RLIST_B2(b1_reflist[i + 2].index); + vdpu_write_relaxed(vpu, reg, G1_REG_BD_REF_PIC(reg_num++)); + } + + /* + * G1_REG_BD_P_REF_PIC register contains last entries (index 15) + * of forward and backward reference picture lists and first 4 entries + * of P forward picture list. + */ + reg = G1_REG_BD_P_REF_PIC_BINIT_RLIST_F15(b0_reflist[15].index) | + G1_REG_BD_P_REF_PIC_BINIT_RLIST_B15(b1_reflist[15].index) | + G1_REG_BD_P_REF_PIC_PINIT_RLIST_F0(p_reflist[0].index) | + G1_REG_BD_P_REF_PIC_PINIT_RLIST_F1(p_reflist[1].index) | + G1_REG_BD_P_REF_PIC_PINIT_RLIST_F2(p_reflist[2].index) | + G1_REG_BD_P_REF_PIC_PINIT_RLIST_F3(p_reflist[3].index); + vdpu_write_relaxed(vpu, reg, G1_REG_BD_P_REF_PIC); + + /* + * Each G1_REG_FWD_PIC(x) register contains six consecutive + * entries of P forward picture list, starting from index 4. + */ + reg_num = 0; + for (i = 4; i < HANTRO_H264_DPB_SIZE; i += 6) { + reg = G1_REG_FWD_PIC_PINIT_RLIST_F0(p_reflist[i].index) | + G1_REG_FWD_PIC_PINIT_RLIST_F1(p_reflist[i + 1].index) | + G1_REG_FWD_PIC_PINIT_RLIST_F2(p_reflist[i + 2].index) | + G1_REG_FWD_PIC_PINIT_RLIST_F3(p_reflist[i + 3].index) | + G1_REG_FWD_PIC_PINIT_RLIST_F4(p_reflist[i + 4].index) | + G1_REG_FWD_PIC_PINIT_RLIST_F5(p_reflist[i + 5].index); + vdpu_write_relaxed(vpu, reg, G1_REG_FWD_PIC(reg_num++)); + } + + /* Set up addresses of DPB buffers. */ + for (i = 0; i < HANTRO_H264_DPB_SIZE; i++) { + dma_addr_t dma_addr = hantro_h264_get_ref_buf(ctx, i); + + vdpu_write_relaxed(vpu, dma_addr, G1_REG_ADDR_REF(i)); + } +} + +static void set_buffers(struct hantro_ctx *ctx, struct vb2_v4l2_buffer *src_buf) +{ + const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls; + struct vb2_v4l2_buffer *dst_buf; + struct hantro_dev *vpu = ctx->dev; + dma_addr_t src_dma, dst_dma; + size_t offset = 0; + + /* Source (stream) buffer. */ + src_dma = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0); + vdpu_write_relaxed(vpu, src_dma, G1_REG_ADDR_STR); + + /* Destination (decoded frame) buffer. */ + dst_buf = hantro_get_dst_buf(ctx); + dst_dma = hantro_get_dec_buf_addr(ctx, &dst_buf->vb2_buf); + /* Adjust dma addr to start at second line for bottom field */ + if (ctrls->decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD) + offset = ALIGN(ctx->src_fmt.width, MB_DIM); + vdpu_write_relaxed(vpu, dst_dma + offset, G1_REG_ADDR_DST); + + /* Higher profiles require DMV buffer appended to reference frames. */ + if (ctrls->sps->profile_idc > 66 && ctrls->decode->nal_ref_idc) { + unsigned int bytes_per_mb = 384; + + /* DMV buffer for monochrome start directly after Y-plane */ + if (ctrls->sps->profile_idc >= 100 && + ctrls->sps->chroma_format_idc == 0) + bytes_per_mb = 256; + offset = bytes_per_mb * MB_WIDTH(ctx->src_fmt.width) * + MB_HEIGHT(ctx->src_fmt.height); + + /* + * DMV buffer is split in two for field encoded frames, + * adjust offset for bottom field + */ + if (ctrls->decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD) + offset += 32 * MB_WIDTH(ctx->src_fmt.width) * + MB_HEIGHT(ctx->src_fmt.height); + vdpu_write_relaxed(vpu, dst_dma + offset, G1_REG_ADDR_DIR_MV); + } + + /* Auxiliary buffer prepared in hantro_g1_h264_dec_prepare_table(). */ + vdpu_write_relaxed(vpu, ctx->h264_dec.priv.dma, G1_REG_ADDR_QTABLE); +} + +int hantro_g1_h264_dec_run(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + struct vb2_v4l2_buffer *src_buf; + int ret; + + /* Prepare the H264 decoder context. */ + ret = hantro_h264_dec_prepare_run(ctx); + if (ret) + return ret; + + /* Configure hardware registers. */ + src_buf = hantro_get_src_buf(ctx); + set_params(ctx, src_buf); + set_ref(ctx); + set_buffers(ctx, src_buf); + + hantro_end_prepare_run(ctx); + + /* Start decoding! */ + vdpu_write_relaxed(vpu, + G1_REG_CONFIG_DEC_AXI_RD_ID(0xffu) | + G1_REG_CONFIG_DEC_TIMEOUT_E | + G1_REG_CONFIG_DEC_OUT_ENDIAN | + G1_REG_CONFIG_DEC_STRENDIAN_E | + G1_REG_CONFIG_DEC_MAX_BURST(16) | + G1_REG_CONFIG_DEC_OUTSWAP32_E | + G1_REG_CONFIG_DEC_INSWAP32_E | + G1_REG_CONFIG_DEC_STRSWAP32_E | + G1_REG_CONFIG_DEC_CLK_GATE_E, + G1_REG_CONFIG); + vdpu_write(vpu, G1_REG_INTERRUPT_DEC_E, G1_REG_INTERRUPT); + + return 0; +} diff --git a/drivers/media/platform/verisilicon/hantro_g1_mpeg2_dec.c b/drivers/media/platform/verisilicon/hantro_g1_mpeg2_dec.c new file mode 100644 index 000000000..9aea331e1 --- /dev/null +++ b/drivers/media/platform/verisilicon/hantro_g1_mpeg2_dec.c @@ -0,0 +1,240 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hantro VPU codec driver + * + * Copyright (C) 2018 Rockchip Electronics Co., Ltd. + */ + +#include <asm/unaligned.h> +#include <linux/bitfield.h> +#include <media/v4l2-mem2mem.h> +#include "hantro.h" +#include "hantro_hw.h" +#include "hantro_g1_regs.h" + +#define G1_SWREG(nr) ((nr) * 4) + +#define G1_REG_RLC_VLC_BASE G1_SWREG(12) +#define G1_REG_DEC_OUT_BASE G1_SWREG(13) +#define G1_REG_REFER0_BASE G1_SWREG(14) +#define G1_REG_REFER1_BASE G1_SWREG(15) +#define G1_REG_REFER2_BASE G1_SWREG(16) +#define G1_REG_REFER3_BASE G1_SWREG(17) +#define G1_REG_QTABLE_BASE G1_SWREG(40) + +#define G1_REG_DEC_AXI_RD_ID(v) (((v) << 24) & GENMASK(31, 24)) +#define G1_REG_DEC_TIMEOUT_E(v) ((v) ? BIT(23) : 0) +#define G1_REG_DEC_STRSWAP32_E(v) ((v) ? BIT(22) : 0) +#define G1_REG_DEC_STRENDIAN_E(v) ((v) ? BIT(21) : 0) +#define G1_REG_DEC_INSWAP32_E(v) ((v) ? BIT(20) : 0) +#define G1_REG_DEC_OUTSWAP32_E(v) ((v) ? BIT(19) : 0) +#define G1_REG_DEC_DATA_DISC_E(v) ((v) ? BIT(18) : 0) +#define G1_REG_DEC_LATENCY(v) (((v) << 11) & GENMASK(16, 11)) +#define G1_REG_DEC_CLK_GATE_E(v) ((v) ? BIT(10) : 0) +#define G1_REG_DEC_IN_ENDIAN(v) ((v) ? BIT(9) : 0) +#define G1_REG_DEC_OUT_ENDIAN(v) ((v) ? BIT(8) : 0) +#define G1_REG_DEC_ADV_PRE_DIS(v) ((v) ? BIT(6) : 0) +#define G1_REG_DEC_SCMD_DIS(v) ((v) ? BIT(5) : 0) +#define G1_REG_DEC_MAX_BURST(v) (((v) << 0) & GENMASK(4, 0)) + +#define G1_REG_DEC_MODE(v) (((v) << 28) & GENMASK(31, 28)) +#define G1_REG_RLC_MODE_E(v) ((v) ? BIT(27) : 0) +#define G1_REG_PIC_INTERLACE_E(v) ((v) ? BIT(23) : 0) +#define G1_REG_PIC_FIELDMODE_E(v) ((v) ? BIT(22) : 0) +#define G1_REG_PIC_B_E(v) ((v) ? BIT(21) : 0) +#define G1_REG_PIC_INTER_E(v) ((v) ? BIT(20) : 0) +#define G1_REG_PIC_TOPFIELD_E(v) ((v) ? BIT(19) : 0) +#define G1_REG_FWD_INTERLACE_E(v) ((v) ? BIT(18) : 0) +#define G1_REG_FILTERING_DIS(v) ((v) ? BIT(14) : 0) +#define G1_REG_WRITE_MVS_E(v) ((v) ? BIT(12) : 0) +#define G1_REG_DEC_AXI_WR_ID(v) (((v) << 0) & GENMASK(7, 0)) + +#define G1_REG_PIC_MB_WIDTH(v) (((v) << 23) & GENMASK(31, 23)) +#define G1_REG_PIC_MB_HEIGHT_P(v) (((v) << 11) & GENMASK(18, 11)) +#define G1_REG_ALT_SCAN_E(v) ((v) ? BIT(6) : 0) +#define G1_REG_TOPFIELDFIRST_E(v) ((v) ? BIT(5) : 0) + +#define G1_REG_STRM_START_BIT(v) (((v) << 26) & GENMASK(31, 26)) +#define G1_REG_QSCALE_TYPE(v) ((v) ? BIT(24) : 0) +#define G1_REG_CON_MV_E(v) ((v) ? BIT(4) : 0) +#define G1_REG_INTRA_DC_PREC(v) (((v) << 2) & GENMASK(3, 2)) +#define G1_REG_INTRA_VLC_TAB(v) ((v) ? BIT(1) : 0) +#define G1_REG_FRAME_PRED_DCT(v) ((v) ? BIT(0) : 0) + +#define G1_REG_INIT_QP(v) (((v) << 25) & GENMASK(30, 25)) +#define G1_REG_STREAM_LEN(v) (((v) << 0) & GENMASK(23, 0)) + +#define G1_REG_ALT_SCAN_FLAG_E(v) ((v) ? BIT(19) : 0) +#define G1_REG_FCODE_FWD_HOR(v) (((v) << 15) & GENMASK(18, 15)) +#define G1_REG_FCODE_FWD_VER(v) (((v) << 11) & GENMASK(14, 11)) +#define G1_REG_FCODE_BWD_HOR(v) (((v) << 7) & GENMASK(10, 7)) +#define G1_REG_FCODE_BWD_VER(v) (((v) << 3) & GENMASK(6, 3)) +#define G1_REG_MV_ACCURACY_FWD(v) ((v) ? BIT(2) : 0) +#define G1_REG_MV_ACCURACY_BWD(v) ((v) ? BIT(1) : 0) + +#define G1_REG_STARTMB_X(v) (((v) << 23) & GENMASK(31, 23)) +#define G1_REG_STARTMB_Y(v) (((v) << 15) & GENMASK(22, 15)) + +#define G1_REG_APF_THRESHOLD(v) (((v) << 0) & GENMASK(13, 0)) + +static void +hantro_g1_mpeg2_dec_set_quantisation(struct hantro_dev *vpu, + struct hantro_ctx *ctx) +{ + struct v4l2_ctrl_mpeg2_quantisation *q; + + q = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_MPEG2_QUANTISATION); + hantro_mpeg2_dec_copy_qtable(ctx->mpeg2_dec.qtable.cpu, q); + vdpu_write_relaxed(vpu, ctx->mpeg2_dec.qtable.dma, G1_REG_QTABLE_BASE); +} + +static void +hantro_g1_mpeg2_dec_set_buffers(struct hantro_dev *vpu, struct hantro_ctx *ctx, + struct vb2_buffer *src_buf, + struct vb2_buffer *dst_buf, + const struct v4l2_ctrl_mpeg2_sequence *seq, + const struct v4l2_ctrl_mpeg2_picture *pic) +{ + dma_addr_t forward_addr = 0, backward_addr = 0; + dma_addr_t current_addr, addr; + + switch (pic->picture_coding_type) { + case V4L2_MPEG2_PIC_CODING_TYPE_B: + backward_addr = hantro_get_ref(ctx, pic->backward_ref_ts); + fallthrough; + case V4L2_MPEG2_PIC_CODING_TYPE_P: + forward_addr = hantro_get_ref(ctx, pic->forward_ref_ts); + } + + /* Source bitstream buffer */ + addr = vb2_dma_contig_plane_dma_addr(src_buf, 0); + vdpu_write_relaxed(vpu, addr, G1_REG_RLC_VLC_BASE); + + /* Destination frame buffer */ + addr = hantro_get_dec_buf_addr(ctx, dst_buf); + current_addr = addr; + + if (pic->picture_structure == V4L2_MPEG2_PIC_BOTTOM_FIELD) + addr += ALIGN(ctx->dst_fmt.width, 16); + vdpu_write_relaxed(vpu, addr, G1_REG_DEC_OUT_BASE); + + if (!forward_addr) + forward_addr = current_addr; + if (!backward_addr) + backward_addr = current_addr; + + /* Set forward ref frame (top/bottom field) */ + if (pic->picture_structure == V4L2_MPEG2_PIC_FRAME || + pic->picture_coding_type == V4L2_MPEG2_PIC_CODING_TYPE_B || + (pic->picture_structure == V4L2_MPEG2_PIC_TOP_FIELD && + pic->flags & V4L2_MPEG2_PIC_FLAG_TOP_FIELD_FIRST) || + (pic->picture_structure == V4L2_MPEG2_PIC_BOTTOM_FIELD && + !(pic->flags & V4L2_MPEG2_PIC_FLAG_TOP_FIELD_FIRST))) { + vdpu_write_relaxed(vpu, forward_addr, G1_REG_REFER0_BASE); + vdpu_write_relaxed(vpu, forward_addr, G1_REG_REFER1_BASE); + } else if (pic->picture_structure == V4L2_MPEG2_PIC_TOP_FIELD) { + vdpu_write_relaxed(vpu, forward_addr, G1_REG_REFER0_BASE); + vdpu_write_relaxed(vpu, current_addr, G1_REG_REFER1_BASE); + } else if (pic->picture_structure == V4L2_MPEG2_PIC_BOTTOM_FIELD) { + vdpu_write_relaxed(vpu, current_addr, G1_REG_REFER0_BASE); + vdpu_write_relaxed(vpu, forward_addr, G1_REG_REFER1_BASE); + } + + /* Set backward ref frame (top/bottom field) */ + vdpu_write_relaxed(vpu, backward_addr, G1_REG_REFER2_BASE); + vdpu_write_relaxed(vpu, backward_addr, G1_REG_REFER3_BASE); +} + +int hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + struct vb2_v4l2_buffer *src_buf, *dst_buf; + const struct v4l2_ctrl_mpeg2_sequence *seq; + const struct v4l2_ctrl_mpeg2_picture *pic; + u32 reg; + + src_buf = hantro_get_src_buf(ctx); + dst_buf = hantro_get_dst_buf(ctx); + + /* Apply request controls if any */ + hantro_start_prepare_run(ctx); + + seq = hantro_get_ctrl(ctx, + V4L2_CID_STATELESS_MPEG2_SEQUENCE); + pic = hantro_get_ctrl(ctx, + V4L2_CID_STATELESS_MPEG2_PICTURE); + + reg = G1_REG_DEC_AXI_RD_ID(0) | + G1_REG_DEC_TIMEOUT_E(1) | + G1_REG_DEC_STRSWAP32_E(1) | + G1_REG_DEC_STRENDIAN_E(1) | + G1_REG_DEC_INSWAP32_E(1) | + G1_REG_DEC_OUTSWAP32_E(1) | + G1_REG_DEC_DATA_DISC_E(0) | + G1_REG_DEC_LATENCY(0) | + G1_REG_DEC_CLK_GATE_E(1) | + G1_REG_DEC_IN_ENDIAN(1) | + G1_REG_DEC_OUT_ENDIAN(1) | + G1_REG_DEC_ADV_PRE_DIS(0) | + G1_REG_DEC_SCMD_DIS(0) | + G1_REG_DEC_MAX_BURST(16); + vdpu_write_relaxed(vpu, reg, G1_SWREG(2)); + + reg = G1_REG_DEC_MODE(5) | + G1_REG_RLC_MODE_E(0) | + G1_REG_PIC_INTERLACE_E(!(seq->flags & V4L2_MPEG2_SEQ_FLAG_PROGRESSIVE)) | + G1_REG_PIC_FIELDMODE_E(pic->picture_structure != V4L2_MPEG2_PIC_FRAME) | + G1_REG_PIC_B_E(pic->picture_coding_type == V4L2_MPEG2_PIC_CODING_TYPE_B) | + G1_REG_PIC_INTER_E(pic->picture_coding_type != V4L2_MPEG2_PIC_CODING_TYPE_I) | + G1_REG_PIC_TOPFIELD_E(pic->picture_structure == V4L2_MPEG2_PIC_TOP_FIELD) | + G1_REG_FWD_INTERLACE_E(0) | + G1_REG_FILTERING_DIS(1) | + G1_REG_WRITE_MVS_E(0) | + G1_REG_DEC_AXI_WR_ID(0); + vdpu_write_relaxed(vpu, reg, G1_SWREG(3)); + + reg = G1_REG_PIC_MB_WIDTH(MB_WIDTH(ctx->dst_fmt.width)) | + G1_REG_PIC_MB_HEIGHT_P(MB_HEIGHT(ctx->dst_fmt.height)) | + G1_REG_ALT_SCAN_E(pic->flags & V4L2_MPEG2_PIC_FLAG_ALT_SCAN) | + G1_REG_TOPFIELDFIRST_E(pic->flags & V4L2_MPEG2_PIC_FLAG_TOP_FIELD_FIRST); + vdpu_write_relaxed(vpu, reg, G1_SWREG(4)); + + reg = G1_REG_STRM_START_BIT(0) | + G1_REG_QSCALE_TYPE(pic->flags & V4L2_MPEG2_PIC_FLAG_Q_SCALE_TYPE) | + G1_REG_CON_MV_E(pic->flags & V4L2_MPEG2_PIC_FLAG_CONCEALMENT_MV) | + G1_REG_INTRA_DC_PREC(pic->intra_dc_precision) | + G1_REG_INTRA_VLC_TAB(pic->flags & V4L2_MPEG2_PIC_FLAG_INTRA_VLC) | + G1_REG_FRAME_PRED_DCT(pic->flags & V4L2_MPEG2_PIC_FLAG_FRAME_PRED_DCT); + vdpu_write_relaxed(vpu, reg, G1_SWREG(5)); + + reg = G1_REG_INIT_QP(1) | + G1_REG_STREAM_LEN(vb2_get_plane_payload(&src_buf->vb2_buf, 0)); + vdpu_write_relaxed(vpu, reg, G1_SWREG(6)); + + reg = G1_REG_ALT_SCAN_FLAG_E(pic->flags & V4L2_MPEG2_PIC_FLAG_ALT_SCAN) | + G1_REG_FCODE_FWD_HOR(pic->f_code[0][0]) | + G1_REG_FCODE_FWD_VER(pic->f_code[0][1]) | + G1_REG_FCODE_BWD_HOR(pic->f_code[1][0]) | + G1_REG_FCODE_BWD_VER(pic->f_code[1][1]) | + G1_REG_MV_ACCURACY_FWD(1) | + G1_REG_MV_ACCURACY_BWD(1); + vdpu_write_relaxed(vpu, reg, G1_SWREG(18)); + + reg = G1_REG_STARTMB_X(0) | + G1_REG_STARTMB_Y(0); + vdpu_write_relaxed(vpu, reg, G1_SWREG(48)); + + reg = G1_REG_APF_THRESHOLD(8); + vdpu_write_relaxed(vpu, reg, G1_SWREG(55)); + + hantro_g1_mpeg2_dec_set_quantisation(vpu, ctx); + hantro_g1_mpeg2_dec_set_buffers(vpu, ctx, &src_buf->vb2_buf, + &dst_buf->vb2_buf, + seq, pic); + + hantro_end_prepare_run(ctx); + + vdpu_write(vpu, G1_REG_INTERRUPT_DEC_E, G1_REG_INTERRUPT); + + return 0; +} diff --git a/drivers/media/platform/verisilicon/hantro_g1_regs.h b/drivers/media/platform/verisilicon/hantro_g1_regs.h new file mode 100644 index 000000000..c623b3b0b --- /dev/null +++ b/drivers/media/platform/verisilicon/hantro_g1_regs.h @@ -0,0 +1,356 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Hantro VPU codec driver + * + * Copyright 2018 Google LLC. + * Tomasz Figa <tfiga@chromium.org> + */ + +#ifndef HANTRO_G1_REGS_H_ +#define HANTRO_G1_REGS_H_ + +#define G1_SWREG(nr) ((nr) * 4) + +/* Decoder registers. */ +#define G1_REG_INTERRUPT 0x004 +#define G1_REG_INTERRUPT_DEC_PIC_INF BIT(24) +#define G1_REG_INTERRUPT_DEC_TIMEOUT BIT(18) +#define G1_REG_INTERRUPT_DEC_SLICE_INT BIT(17) +#define G1_REG_INTERRUPT_DEC_ERROR_INT BIT(16) +#define G1_REG_INTERRUPT_DEC_ASO_INT BIT(15) +#define G1_REG_INTERRUPT_DEC_BUFFER_INT BIT(14) +#define G1_REG_INTERRUPT_DEC_BUS_INT BIT(13) +#define G1_REG_INTERRUPT_DEC_RDY_INT BIT(12) +#define G1_REG_INTERRUPT_DEC_IRQ BIT(8) +#define G1_REG_INTERRUPT_DEC_IRQ_DIS BIT(4) +#define G1_REG_INTERRUPT_DEC_E BIT(0) +#define G1_REG_CONFIG 0x008 +#define G1_REG_CONFIG_DEC_AXI_RD_ID(x) (((x) & 0xff) << 24) +#define G1_REG_CONFIG_DEC_TIMEOUT_E BIT(23) +#define G1_REG_CONFIG_DEC_STRSWAP32_E BIT(22) +#define G1_REG_CONFIG_DEC_STRENDIAN_E BIT(21) +#define G1_REG_CONFIG_DEC_INSWAP32_E BIT(20) +#define G1_REG_CONFIG_DEC_OUTSWAP32_E BIT(19) +#define G1_REG_CONFIG_DEC_DATA_DISC_E BIT(18) +#define G1_REG_CONFIG_TILED_MODE_MSB BIT(17) +#define G1_REG_CONFIG_DEC_OUT_TILED_E BIT(17) +#define G1_REG_CONFIG_DEC_LATENCY(x) (((x) & 0x3f) << 11) +#define G1_REG_CONFIG_DEC_CLK_GATE_E BIT(10) +#define G1_REG_CONFIG_DEC_IN_ENDIAN BIT(9) +#define G1_REG_CONFIG_DEC_OUT_ENDIAN BIT(8) +#define G1_REG_CONFIG_PRIORITY_MODE(x) (((x) & 0x7) << 5) +#define G1_REG_CONFIG_TILED_MODE_LSB BIT(7) +#define G1_REG_CONFIG_DEC_ADV_PRE_DIS BIT(6) +#define G1_REG_CONFIG_DEC_SCMD_DIS BIT(5) +#define G1_REG_CONFIG_DEC_MAX_BURST(x) (((x) & 0x1f) << 0) +#define G1_REG_DEC_CTRL0 0x00c +#define G1_REG_DEC_CTRL0_DEC_MODE(x) (((x) & 0xf) << 28) +#define G1_REG_DEC_CTRL0_RLC_MODE_E BIT(27) +#define G1_REG_DEC_CTRL0_SKIP_MODE BIT(26) +#define G1_REG_DEC_CTRL0_DIVX3_E BIT(25) +#define G1_REG_DEC_CTRL0_PJPEG_E BIT(24) +#define G1_REG_DEC_CTRL0_PIC_INTERLACE_E BIT(23) +#define G1_REG_DEC_CTRL0_PIC_FIELDMODE_E BIT(22) +#define G1_REG_DEC_CTRL0_PIC_B_E BIT(21) +#define G1_REG_DEC_CTRL0_PIC_INTER_E BIT(20) +#define G1_REG_DEC_CTRL0_PIC_TOPFIELD_E BIT(19) +#define G1_REG_DEC_CTRL0_FWD_INTERLACE_E BIT(18) +#define G1_REG_DEC_CTRL0_SORENSON_E BIT(17) +#define G1_REG_DEC_CTRL0_REF_TOPFIELD_E BIT(16) +#define G1_REG_DEC_CTRL0_DEC_OUT_DIS BIT(15) +#define G1_REG_DEC_CTRL0_FILTERING_DIS BIT(14) +#define G1_REG_DEC_CTRL0_WEBP_E BIT(13) +#define G1_REG_DEC_CTRL0_MVC_E BIT(13) +#define G1_REG_DEC_CTRL0_PIC_FIXED_QUANT BIT(13) +#define G1_REG_DEC_CTRL0_WRITE_MVS_E BIT(12) +#define G1_REG_DEC_CTRL0_REFTOPFIRST_E BIT(11) +#define G1_REG_DEC_CTRL0_SEQ_MBAFF_E BIT(10) +#define G1_REG_DEC_CTRL0_PICORD_COUNT_E BIT(9) +#define G1_REG_DEC_CTRL0_DEC_AHB_HLOCK_E BIT(8) +#define G1_REG_DEC_CTRL0_DEC_AXI_WR_ID(x) (((x) & 0xff) << 0) +/* Setting AXI ID to 0xff to get auto generated ID to avoid possible conflicts */ +#define G1_REG_DEC_CTRL0_DEC_AXI_AUTO G1_REG_DEC_CTRL0_DEC_AXI_WR_ID(0xff) +#define G1_REG_DEC_CTRL1 0x010 +#define G1_REG_DEC_CTRL1_PIC_MB_WIDTH(x) (((x) & 0x1ff) << 23) +#define G1_REG_DEC_CTRL1_MB_WIDTH_OFF(x) (((x) & 0xf) << 19) +#define G1_REG_DEC_CTRL1_PIC_MB_HEIGHT_P(x) (((x) & 0xff) << 11) +#define G1_REG_DEC_CTRL1_MB_HEIGHT_OFF(x) (((x) & 0xf) << 7) +#define G1_REG_DEC_CTRL1_ALT_SCAN_E BIT(6) +#define G1_REG_DEC_CTRL1_TOPFIELDFIRST_E BIT(5) +#define G1_REG_DEC_CTRL1_REF_FRAMES(x) (((x) & 0x1f) << 0) +#define G1_REG_DEC_CTRL1_PIC_MB_W_EXT(x) (((x) & 0x7) << 3) +#define G1_REG_DEC_CTRL1_PIC_MB_H_EXT(x) (((x) & 0x7) << 0) +#define G1_REG_DEC_CTRL1_PIC_REFER_FLAG BIT(0) +#define G1_REG_DEC_CTRL2 0x014 +#define G1_REG_DEC_CTRL2_STRM_START_BIT(x) (((x) & 0x3f) << 26) +#define G1_REG_DEC_CTRL2_SYNC_MARKER_E BIT(25) +#define G1_REG_DEC_CTRL2_TYPE1_QUANT_E BIT(24) +#define G1_REG_DEC_CTRL2_CH_QP_OFFSET(x) (((x) & 0x1f) << 19) +#define G1_REG_DEC_CTRL2_CH_QP_OFFSET2(x) (((x) & 0x1f) << 14) +#define G1_REG_DEC_CTRL2_FIELDPIC_FLAG_E BIT(0) +#define G1_REG_DEC_CTRL2_INTRADC_VLC_THR(x) (((x) & 0x7) << 16) +#define G1_REG_DEC_CTRL2_VOP_TIME_INCR(x) (((x) & 0xffff) << 0) +#define G1_REG_DEC_CTRL2_DQ_PROFILE BIT(24) +#define G1_REG_DEC_CTRL2_DQBI_LEVEL BIT(23) +#define G1_REG_DEC_CTRL2_RANGE_RED_FRM_E BIT(22) +#define G1_REG_DEC_CTRL2_FAST_UVMC_E BIT(20) +#define G1_REG_DEC_CTRL2_TRANSDCTAB BIT(17) +#define G1_REG_DEC_CTRL2_TRANSACFRM(x) (((x) & 0x3) << 15) +#define G1_REG_DEC_CTRL2_TRANSACFRM2(x) (((x) & 0x3) << 13) +#define G1_REG_DEC_CTRL2_MB_MODE_TAB(x) (((x) & 0x7) << 10) +#define G1_REG_DEC_CTRL2_MVTAB(x) (((x) & 0x7) << 7) +#define G1_REG_DEC_CTRL2_CBPTAB(x) (((x) & 0x7) << 4) +#define G1_REG_DEC_CTRL2_2MV_BLK_PAT_TAB(x) (((x) & 0x3) << 2) +#define G1_REG_DEC_CTRL2_4MV_BLK_PAT_TAB(x) (((x) & 0x3) << 0) +#define G1_REG_DEC_CTRL2_QSCALE_TYPE BIT(24) +#define G1_REG_DEC_CTRL2_CON_MV_E BIT(4) +#define G1_REG_DEC_CTRL2_INTRA_DC_PREC(x) (((x) & 0x3) << 2) +#define G1_REG_DEC_CTRL2_INTRA_VLC_TAB BIT(1) +#define G1_REG_DEC_CTRL2_FRAME_PRED_DCT BIT(0) +#define G1_REG_DEC_CTRL2_JPEG_QTABLES(x) (((x) & 0x3) << 11) +#define G1_REG_DEC_CTRL2_JPEG_MODE(x) (((x) & 0x7) << 8) +#define G1_REG_DEC_CTRL2_JPEG_FILRIGHT_E BIT(7) +#define G1_REG_DEC_CTRL2_JPEG_STREAM_ALL BIT(6) +#define G1_REG_DEC_CTRL2_CR_AC_VLCTABLE BIT(5) +#define G1_REG_DEC_CTRL2_CB_AC_VLCTABLE BIT(4) +#define G1_REG_DEC_CTRL2_CR_DC_VLCTABLE BIT(3) +#define G1_REG_DEC_CTRL2_CB_DC_VLCTABLE BIT(2) +#define G1_REG_DEC_CTRL2_CR_DC_VLCTABLE3 BIT(1) +#define G1_REG_DEC_CTRL2_CB_DC_VLCTABLE3 BIT(0) +#define G1_REG_DEC_CTRL2_STRM1_START_BIT(x) (((x) & 0x3f) << 18) +#define G1_REG_DEC_CTRL2_HUFFMAN_E BIT(17) +#define G1_REG_DEC_CTRL2_MULTISTREAM_E BIT(16) +#define G1_REG_DEC_CTRL2_BOOLEAN_VALUE(x) (((x) & 0xff) << 8) +#define G1_REG_DEC_CTRL2_BOOLEAN_RANGE(x) (((x) & 0xff) << 0) +#define G1_REG_DEC_CTRL2_ALPHA_OFFSET(x) (((x) & 0x1f) << 5) +#define G1_REG_DEC_CTRL2_BETA_OFFSET(x) (((x) & 0x1f) << 0) +#define G1_REG_DEC_CTRL3 0x018 +#define G1_REG_DEC_CTRL3_START_CODE_E BIT(31) +#define G1_REG_DEC_CTRL3_INIT_QP(x) (((x) & 0x3f) << 25) +#define G1_REG_DEC_CTRL3_CH_8PIX_ILEAV_E BIT(24) +#define G1_REG_DEC_CTRL3_STREAM_LEN_EXT(x) (((x) & 0xff) << 24) +#define G1_REG_DEC_CTRL3_STREAM_LEN(x) (((x) & 0xffffff) << 0) +#define G1_REG_DEC_CTRL4 0x01c +#define G1_REG_DEC_CTRL4_CABAC_E BIT(31) +#define G1_REG_DEC_CTRL4_BLACKWHITE_E BIT(30) +#define G1_REG_DEC_CTRL4_DIR_8X8_INFER_E BIT(29) +#define G1_REG_DEC_CTRL4_WEIGHT_PRED_E BIT(28) +#define G1_REG_DEC_CTRL4_WEIGHT_BIPR_IDC(x) (((x) & 0x3) << 26) +#define G1_REG_DEC_CTRL4_AVS_H264_H_EXT BIT(25) +#define G1_REG_DEC_CTRL4_FRAMENUM_LEN(x) (((x) & 0x1f) << 16) +#define G1_REG_DEC_CTRL4_FRAMENUM(x) (((x) & 0xffff) << 0) +#define G1_REG_DEC_CTRL4_BITPLANE0_E BIT(31) +#define G1_REG_DEC_CTRL4_BITPLANE1_E BIT(30) +#define G1_REG_DEC_CTRL4_BITPLANE2_E BIT(29) +#define G1_REG_DEC_CTRL4_ALT_PQUANT(x) (((x) & 0x1f) << 24) +#define G1_REG_DEC_CTRL4_DQ_EDGES(x) (((x) & 0xf) << 20) +#define G1_REG_DEC_CTRL4_TTMBF BIT(19) +#define G1_REG_DEC_CTRL4_PQINDEX(x) (((x) & 0x1f) << 14) +#define G1_REG_DEC_CTRL4_VC1_HEIGHT_EXT BIT(13) +#define G1_REG_DEC_CTRL4_BILIN_MC_E BIT(12) +#define G1_REG_DEC_CTRL4_UNIQP_E BIT(11) +#define G1_REG_DEC_CTRL4_HALFQP_E BIT(10) +#define G1_REG_DEC_CTRL4_TTFRM(x) (((x) & 0x3) << 8) +#define G1_REG_DEC_CTRL4_2ND_BYTE_EMUL_E BIT(7) +#define G1_REG_DEC_CTRL4_DQUANT_E BIT(6) +#define G1_REG_DEC_CTRL4_VC1_ADV_E BIT(5) +#define G1_REG_DEC_CTRL4_PJPEG_FILDOWN_E BIT(26) +#define G1_REG_DEC_CTRL4_PJPEG_WDIV8 BIT(25) +#define G1_REG_DEC_CTRL4_PJPEG_HDIV8 BIT(24) +#define G1_REG_DEC_CTRL4_PJPEG_AH(x) (((x) & 0xf) << 20) +#define G1_REG_DEC_CTRL4_PJPEG_AL(x) (((x) & 0xf) << 16) +#define G1_REG_DEC_CTRL4_PJPEG_SS(x) (((x) & 0xff) << 8) +#define G1_REG_DEC_CTRL4_PJPEG_SE(x) (((x) & 0xff) << 0) +#define G1_REG_DEC_CTRL4_DCT1_START_BIT(x) (((x) & 0x3f) << 26) +#define G1_REG_DEC_CTRL4_DCT2_START_BIT(x) (((x) & 0x3f) << 20) +#define G1_REG_DEC_CTRL4_CH_MV_RES BIT(13) +#define G1_REG_DEC_CTRL4_INIT_DC_MATCH0(x) (((x) & 0x7) << 9) +#define G1_REG_DEC_CTRL4_INIT_DC_MATCH1(x) (((x) & 0x7) << 6) +#define G1_REG_DEC_CTRL4_VP7_VERSION BIT(5) +#define G1_REG_DEC_CTRL5 0x020 +#define G1_REG_DEC_CTRL5_CONST_INTRA_E BIT(31) +#define G1_REG_DEC_CTRL5_FILT_CTRL_PRES BIT(30) +#define G1_REG_DEC_CTRL5_RDPIC_CNT_PRES BIT(29) +#define G1_REG_DEC_CTRL5_8X8TRANS_FLAG_E BIT(28) +#define G1_REG_DEC_CTRL5_REFPIC_MK_LEN(x) (((x) & 0x7ff) << 17) +#define G1_REG_DEC_CTRL5_IDR_PIC_E BIT(16) +#define G1_REG_DEC_CTRL5_IDR_PIC_ID(x) (((x) & 0xffff) << 0) +#define G1_REG_DEC_CTRL5_MV_SCALEFACTOR(x) (((x) & 0xff) << 24) +#define G1_REG_DEC_CTRL5_REF_DIST_FWD(x) (((x) & 0x1f) << 19) +#define G1_REG_DEC_CTRL5_REF_DIST_BWD(x) (((x) & 0x1f) << 14) +#define G1_REG_DEC_CTRL5_LOOP_FILT_LIMIT(x) (((x) & 0xf) << 14) +#define G1_REG_DEC_CTRL5_VARIANCE_TEST_E BIT(13) +#define G1_REG_DEC_CTRL5_MV_THRESHOLD(x) (((x) & 0x7) << 10) +#define G1_REG_DEC_CTRL5_VAR_THRESHOLD(x) (((x) & 0x3ff) << 0) +#define G1_REG_DEC_CTRL5_DIVX_IDCT_E BIT(8) +#define G1_REG_DEC_CTRL5_DIVX3_SLICE_SIZE(x) (((x) & 0xff) << 0) +#define G1_REG_DEC_CTRL5_PJPEG_REST_FREQ(x) (((x) & 0xffff) << 0) +#define G1_REG_DEC_CTRL5_RV_PROFILE(x) (((x) & 0x3) << 30) +#define G1_REG_DEC_CTRL5_RV_OSV_QUANT(x) (((x) & 0x3) << 28) +#define G1_REG_DEC_CTRL5_RV_FWD_SCALE(x) (((x) & 0x3fff) << 14) +#define G1_REG_DEC_CTRL5_RV_BWD_SCALE(x) (((x) & 0x3fff) << 0) +#define G1_REG_DEC_CTRL5_INIT_DC_COMP0(x) (((x) & 0xffff) << 16) +#define G1_REG_DEC_CTRL5_INIT_DC_COMP1(x) (((x) & 0xffff) << 0) +#define G1_REG_DEC_CTRL6 0x024 +#define G1_REG_DEC_CTRL6_PPS_ID(x) (((x) & 0xff) << 24) +#define G1_REG_DEC_CTRL6_REFIDX1_ACTIVE(x) (((x) & 0x1f) << 19) +#define G1_REG_DEC_CTRL6_REFIDX0_ACTIVE(x) (((x) & 0x1f) << 14) +#define G1_REG_DEC_CTRL6_POC_LENGTH(x) (((x) & 0xff) << 0) +#define G1_REG_DEC_CTRL6_ICOMP0_E BIT(24) +#define G1_REG_DEC_CTRL6_ISCALE0(x) (((x) & 0xff) << 16) +#define G1_REG_DEC_CTRL6_ISHIFT0(x) (((x) & 0xffff) << 0) +#define G1_REG_DEC_CTRL6_STREAM1_LEN(x) (((x) & 0xffffff) << 0) +#define G1_REG_DEC_CTRL6_PIC_SLICE_AM(x) (((x) & 0x1fff) << 0) +#define G1_REG_DEC_CTRL6_COEFFS_PART_AM(x) (((x) & 0xf) << 24) +#define G1_REG_FWD_PIC(i) (0x028 + ((i) * 0x4)) +#define G1_REG_FWD_PIC_PINIT_RLIST_F5(x) (((x) & 0x1f) << 25) +#define G1_REG_FWD_PIC_PINIT_RLIST_F4(x) (((x) & 0x1f) << 20) +#define G1_REG_FWD_PIC_PINIT_RLIST_F3(x) (((x) & 0x1f) << 15) +#define G1_REG_FWD_PIC_PINIT_RLIST_F2(x) (((x) & 0x1f) << 10) +#define G1_REG_FWD_PIC_PINIT_RLIST_F1(x) (((x) & 0x1f) << 5) +#define G1_REG_FWD_PIC_PINIT_RLIST_F0(x) (((x) & 0x1f) << 0) +#define G1_REG_FWD_PIC1_ICOMP1_E BIT(24) +#define G1_REG_FWD_PIC1_ISCALE1(x) (((x) & 0xff) << 16) +#define G1_REG_FWD_PIC1_ISHIFT1(x) (((x) & 0xffff) << 0) +#define G1_REG_FWD_PIC1_SEGMENT_BASE(x) ((x) << 0) +#define G1_REG_FWD_PIC1_SEGMENT_UPD_E BIT(1) +#define G1_REG_FWD_PIC1_SEGMENT_E BIT(0) +#define G1_REG_DEC_CTRL7 0x02c +#define G1_REG_DEC_CTRL7_PINIT_RLIST_F15(x) (((x) & 0x1f) << 25) +#define G1_REG_DEC_CTRL7_PINIT_RLIST_F14(x) (((x) & 0x1f) << 20) +#define G1_REG_DEC_CTRL7_PINIT_RLIST_F13(x) (((x) & 0x1f) << 15) +#define G1_REG_DEC_CTRL7_PINIT_RLIST_F12(x) (((x) & 0x1f) << 10) +#define G1_REG_DEC_CTRL7_PINIT_RLIST_F11(x) (((x) & 0x1f) << 5) +#define G1_REG_DEC_CTRL7_PINIT_RLIST_F10(x) (((x) & 0x1f) << 0) +#define G1_REG_DEC_CTRL7_ICOMP2_E BIT(24) +#define G1_REG_DEC_CTRL7_ISCALE2(x) (((x) & 0xff) << 16) +#define G1_REG_DEC_CTRL7_ISHIFT2(x) (((x) & 0xffff) << 0) +#define G1_REG_DEC_CTRL7_DCT3_START_BIT(x) (((x) & 0x3f) << 24) +#define G1_REG_DEC_CTRL7_DCT4_START_BIT(x) (((x) & 0x3f) << 18) +#define G1_REG_DEC_CTRL7_DCT5_START_BIT(x) (((x) & 0x3f) << 12) +#define G1_REG_DEC_CTRL7_DCT6_START_BIT(x) (((x) & 0x3f) << 6) +#define G1_REG_DEC_CTRL7_DCT7_START_BIT(x) (((x) & 0x3f) << 0) +#define G1_REG_ADDR_STR 0x030 +#define G1_REG_ADDR_DST 0x034 +#define G1_REG_ADDR_REF(i) (0x038 + ((i) * 0x4)) +#define G1_REG_ADDR_REF_FIELD_E BIT(1) +#define G1_REG_ADDR_REF_TOPC_E BIT(0) +#define G1_REG_REF_PIC(i) (0x078 + ((i) * 0x4)) +#define G1_REG_REF_PIC_FILT_TYPE_E BIT(31) +#define G1_REG_REF_PIC_FILT_SHARPNESS(x) (((x) & 0x7) << 28) +#define G1_REG_REF_PIC_MB_ADJ_0(x) (((x) & 0x7f) << 21) +#define G1_REG_REF_PIC_MB_ADJ_1(x) (((x) & 0x7f) << 14) +#define G1_REG_REF_PIC_MB_ADJ_2(x) (((x) & 0x7f) << 7) +#define G1_REG_REF_PIC_MB_ADJ_3(x) (((x) & 0x7f) << 0) +#define G1_REG_REF_PIC_REFER1_NBR(x) (((x) & 0xffff) << 16) +#define G1_REG_REF_PIC_REFER0_NBR(x) (((x) & 0xffff) << 0) +#define G1_REG_REF_PIC_LF_LEVEL_0(x) (((x) & 0x3f) << 18) +#define G1_REG_REF_PIC_LF_LEVEL_1(x) (((x) & 0x3f) << 12) +#define G1_REG_REF_PIC_LF_LEVEL_2(x) (((x) & 0x3f) << 6) +#define G1_REG_REF_PIC_LF_LEVEL_3(x) (((x) & 0x3f) << 0) +#define G1_REG_REF_PIC_QUANT_DELTA_0(x) (((x) & 0x1f) << 27) +#define G1_REG_REF_PIC_QUANT_DELTA_1(x) (((x) & 0x1f) << 22) +#define G1_REG_REF_PIC_QUANT_0(x) (((x) & 0x7ff) << 11) +#define G1_REG_REF_PIC_QUANT_1(x) (((x) & 0x7ff) << 0) +#define G1_REG_LT_REF 0x098 +#define G1_REG_VALID_REF 0x09c +#define G1_REG_ADDR_QTABLE 0x0a0 +#define G1_REG_ADDR_DIR_MV 0x0a4 +#define G1_REG_BD_REF_PIC(i) (0x0a8 + ((i) * 0x4)) +#define G1_REG_BD_REF_PIC_BINIT_RLIST_B2(x) (((x) & 0x1f) << 25) +#define G1_REG_BD_REF_PIC_BINIT_RLIST_F2(x) (((x) & 0x1f) << 20) +#define G1_REG_BD_REF_PIC_BINIT_RLIST_B1(x) (((x) & 0x1f) << 15) +#define G1_REG_BD_REF_PIC_BINIT_RLIST_F1(x) (((x) & 0x1f) << 10) +#define G1_REG_BD_REF_PIC_BINIT_RLIST_B0(x) (((x) & 0x1f) << 5) +#define G1_REG_BD_REF_PIC_BINIT_RLIST_F0(x) (((x) & 0x1f) << 0) +#define G1_REG_BD_REF_PIC_PRED_TAP_2_M1(x) (((x) & 0x3) << 10) +#define G1_REG_BD_REF_PIC_PRED_TAP_2_4(x) (((x) & 0x3) << 8) +#define G1_REG_BD_REF_PIC_PRED_TAP_4_M1(x) (((x) & 0x3) << 6) +#define G1_REG_BD_REF_PIC_PRED_TAP_4_4(x) (((x) & 0x3) << 4) +#define G1_REG_BD_REF_PIC_PRED_TAP_6_M1(x) (((x) & 0x3) << 2) +#define G1_REG_BD_REF_PIC_PRED_TAP_6_4(x) (((x) & 0x3) << 0) +#define G1_REG_BD_REF_PIC_QUANT_DELTA_2(x) (((x) & 0x1f) << 27) +#define G1_REG_BD_REF_PIC_QUANT_DELTA_3(x) (((x) & 0x1f) << 22) +#define G1_REG_BD_REF_PIC_QUANT_2(x) (((x) & 0x7ff) << 11) +#define G1_REG_BD_REF_PIC_QUANT_3(x) (((x) & 0x7ff) << 0) +#define G1_REG_BD_P_REF_PIC 0x0bc +#define G1_REG_BD_P_REF_PIC_QUANT_DELTA_4(x) (((x) & 0x1f) << 27) +#define G1_REG_BD_P_REF_PIC_PINIT_RLIST_F3(x) (((x) & 0x1f) << 25) +#define G1_REG_BD_P_REF_PIC_PINIT_RLIST_F2(x) (((x) & 0x1f) << 20) +#define G1_REG_BD_P_REF_PIC_PINIT_RLIST_F1(x) (((x) & 0x1f) << 15) +#define G1_REG_BD_P_REF_PIC_PINIT_RLIST_F0(x) (((x) & 0x1f) << 10) +#define G1_REG_BD_P_REF_PIC_BINIT_RLIST_B15(x) (((x) & 0x1f) << 5) +#define G1_REG_BD_P_REF_PIC_BINIT_RLIST_F15(x) (((x) & 0x1f) << 0) +#define G1_REG_ERR_CONC 0x0c0 +#define G1_REG_ERR_CONC_STARTMB_X(x) (((x) & 0x1ff) << 23) +#define G1_REG_ERR_CONC_STARTMB_Y(x) (((x) & 0xff) << 15) +#define G1_REG_PRED_FLT 0x0c4 +#define G1_REG_PRED_FLT_PRED_BC_TAP_0_0(x) (((x) & 0x3ff) << 22) +#define G1_REG_PRED_FLT_PRED_BC_TAP_0_1(x) (((x) & 0x3ff) << 12) +#define G1_REG_PRED_FLT_PRED_BC_TAP_0_2(x) (((x) & 0x3ff) << 2) +#define G1_REG_REF_BUF_CTRL 0x0cc +#define G1_REG_REF_BUF_CTRL_REFBU_E BIT(31) +#define G1_REG_REF_BUF_CTRL_REFBU_THR(x) (((x) & 0xfff) << 19) +#define G1_REG_REF_BUF_CTRL_REFBU_PICID(x) (((x) & 0x1f) << 14) +#define G1_REG_REF_BUF_CTRL_REFBU_EVAL_E BIT(13) +#define G1_REG_REF_BUF_CTRL_REFBU_FPARMOD_E BIT(12) +#define G1_REG_REF_BUF_CTRL_REFBU_Y_OFFSET(x) (((x) & 0x1ff) << 0) +#define G1_REG_REF_BUF_CTRL2 0x0dc +#define G1_REG_REF_BUF_CTRL2_REFBU2_BUF_E BIT(31) +#define G1_REG_REF_BUF_CTRL2_REFBU2_THR(x) (((x) & 0xfff) << 19) +#define G1_REG_REF_BUF_CTRL2_REFBU2_PICID(x) (((x) & 0x1f) << 14) +#define G1_REG_REF_BUF_CTRL2_APF_THRESHOLD(x) (((x) & 0x3fff) << 0) +#define G1_REG_SOFT_RESET 0x194 + +/* Post-processor registers. */ +#define G1_REG_PP_INTERRUPT G1_SWREG(60) +#define G1_REG_PP_READY_IRQ BIT(12) +#define G1_REG_PP_IRQ BIT(8) +#define G1_REG_PP_IRQ_DIS BIT(4) +#define G1_REG_PP_PIPELINE_EN BIT(1) +#define G1_REG_PP_EXTERNAL_TRIGGER BIT(0) +#define G1_REG_PP_DEV_CONFIG G1_SWREG(61) +#define G1_REG_PP_AXI_RD_ID(v) (((v) << 24) & GENMASK(31, 24)) +#define G1_REG_PP_AXI_WR_ID(v) (((v) << 16) & GENMASK(23, 16)) +#define G1_REG_PP_INSWAP32_E(v) ((v) ? BIT(10) : 0) +#define G1_REG_PP_DATA_DISC_E(v) ((v) ? BIT(9) : 0) +#define G1_REG_PP_CLK_GATE_E(v) ((v) ? BIT(8) : 0) +#define G1_REG_PP_IN_ENDIAN(v) ((v) ? BIT(7) : 0) +#define G1_REG_PP_OUT_ENDIAN(v) ((v) ? BIT(6) : 0) +#define G1_REG_PP_OUTSWAP32_E(v) ((v) ? BIT(5) : 0) +#define G1_REG_PP_MAX_BURST(v) (((v) << 0) & GENMASK(4, 0)) +#define G1_REG_PP_IN_LUMA_BASE G1_SWREG(63) +#define G1_REG_PP_IN_CB_BASE G1_SWREG(64) +#define G1_REG_PP_IN_CR_BASE G1_SWREG(65) +#define G1_REG_PP_OUT_LUMA_BASE G1_SWREG(66) +#define G1_REG_PP_OUT_CHROMA_BASE G1_SWREG(67) +#define G1_REG_PP_CONTRAST_ADJUST G1_SWREG(68) +#define G1_REG_PP_COLOR_CONVERSION G1_SWREG(69) +#define G1_REG_PP_COLOR_CONVERSION0 G1_SWREG(70) +#define G1_REG_PP_COLOR_CONVERSION1 G1_SWREG(71) +#define G1_REG_PP_INPUT_SIZE G1_SWREG(72) +#define G1_REG_PP_INPUT_SIZE_HEIGHT(v) (((v) << 9) & GENMASK(16, 9)) +#define G1_REG_PP_INPUT_SIZE_WIDTH(v) (((v) << 0) & GENMASK(8, 0)) +#define G1_REG_PP_SCALING0 G1_SWREG(79) +#define G1_REG_PP_PADD_R(v) (((v) << 23) & GENMASK(27, 23)) +#define G1_REG_PP_PADD_G(v) (((v) << 18) & GENMASK(22, 18)) +#define G1_REG_PP_RANGEMAP_Y(v) ((v) ? BIT(31) : 0) +#define G1_REG_PP_RANGEMAP_C(v) ((v) ? BIT(30) : 0) +#define G1_REG_PP_YCBCR_RANGE(v) ((v) ? BIT(29) : 0) +#define G1_REG_PP_RGB_16(v) ((v) ? BIT(28) : 0) +#define G1_REG_PP_SCALING1 G1_SWREG(80) +#define G1_REG_PP_PADD_B(v) (((v) << 18) & GENMASK(22, 18)) +#define G1_REG_PP_MASK_R G1_SWREG(82) +#define G1_REG_PP_MASK_G G1_SWREG(83) +#define G1_REG_PP_MASK_B G1_SWREG(84) +#define G1_REG_PP_CONTROL G1_SWREG(85) +#define G1_REG_PP_CONTROL_IN_FMT(v) (((v) << 29) & GENMASK(31, 29)) +#define G1_REG_PP_CONTROL_OUT_FMT(v) (((v) << 26) & GENMASK(28, 26)) +#define G1_REG_PP_CONTROL_OUT_HEIGHT(v) (((v) << 15) & GENMASK(25, 15)) +#define G1_REG_PP_CONTROL_OUT_WIDTH(v) (((v) << 4) & GENMASK(14, 4)) +#define G1_REG_PP_MASK1_ORIG_WIDTH G1_SWREG(88) +#define G1_REG_PP_ORIG_WIDTH(v) (((v) << 23) & GENMASK(31, 23)) +#define G1_REG_PP_DISPLAY_WIDTH G1_SWREG(92) +#define G1_REG_PP_FUSE G1_SWREG(99) + +#endif /* HANTRO_G1_REGS_H_ */ diff --git a/drivers/media/platform/verisilicon/hantro_g1_vp8_dec.c b/drivers/media/platform/verisilicon/hantro_g1_vp8_dec.c new file mode 100644 index 000000000..851eb67f1 --- /dev/null +++ b/drivers/media/platform/verisilicon/hantro_g1_vp8_dec.c @@ -0,0 +1,511 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hantro VP8 codec driver + * + * Copyright (C) 2019 Rockchip Electronics Co., Ltd. + * ZhiChao Yu <zhichao.yu@rock-chips.com> + * + * Copyright (C) 2019 Google, Inc. + * Tomasz Figa <tfiga@chromium.org> + */ + +#include <media/v4l2-mem2mem.h> + +#include "hantro_hw.h" +#include "hantro.h" +#include "hantro_g1_regs.h" + +/* DCT partition base address regs */ +static const struct hantro_reg vp8_dec_dct_base[8] = { + { G1_REG_ADDR_STR, 0, 0xffffffff }, + { G1_REG_ADDR_REF(8), 0, 0xffffffff }, + { G1_REG_ADDR_REF(9), 0, 0xffffffff }, + { G1_REG_ADDR_REF(10), 0, 0xffffffff }, + { G1_REG_ADDR_REF(11), 0, 0xffffffff }, + { G1_REG_ADDR_REF(12), 0, 0xffffffff }, + { G1_REG_ADDR_REF(14), 0, 0xffffffff }, + { G1_REG_ADDR_REF(15), 0, 0xffffffff }, +}; + +/* Loop filter level regs */ +static const struct hantro_reg vp8_dec_lf_level[4] = { + { G1_REG_REF_PIC(2), 18, 0x3f }, + { G1_REG_REF_PIC(2), 12, 0x3f }, + { G1_REG_REF_PIC(2), 6, 0x3f }, + { G1_REG_REF_PIC(2), 0, 0x3f }, +}; + +/* Macroblock loop filter level adjustment regs */ +static const struct hantro_reg vp8_dec_mb_adj[4] = { + { G1_REG_REF_PIC(0), 21, 0x7f }, + { G1_REG_REF_PIC(0), 14, 0x7f }, + { G1_REG_REF_PIC(0), 7, 0x7f }, + { G1_REG_REF_PIC(0), 0, 0x7f }, +}; + +/* Reference frame adjustment regs */ +static const struct hantro_reg vp8_dec_ref_adj[4] = { + { G1_REG_REF_PIC(1), 21, 0x7f }, + { G1_REG_REF_PIC(1), 14, 0x7f }, + { G1_REG_REF_PIC(1), 7, 0x7f }, + { G1_REG_REF_PIC(1), 0, 0x7f }, +}; + +/* Quantizer */ +static const struct hantro_reg vp8_dec_quant[4] = { + { G1_REG_REF_PIC(3), 11, 0x7ff }, + { G1_REG_REF_PIC(3), 0, 0x7ff }, + { G1_REG_BD_REF_PIC(4), 11, 0x7ff }, + { G1_REG_BD_REF_PIC(4), 0, 0x7ff }, +}; + +/* Quantizer delta regs */ +static const struct hantro_reg vp8_dec_quant_delta[5] = { + { G1_REG_REF_PIC(3), 27, 0x1f }, + { G1_REG_REF_PIC(3), 22, 0x1f }, + { G1_REG_BD_REF_PIC(4), 27, 0x1f }, + { G1_REG_BD_REF_PIC(4), 22, 0x1f }, + { G1_REG_BD_P_REF_PIC, 27, 0x1f }, +}; + +/* DCT partition start bits regs */ +static const struct hantro_reg vp8_dec_dct_start_bits[8] = { + { G1_REG_DEC_CTRL2, 26, 0x3f }, { G1_REG_DEC_CTRL4, 26, 0x3f }, + { G1_REG_DEC_CTRL4, 20, 0x3f }, { G1_REG_DEC_CTRL7, 24, 0x3f }, + { G1_REG_DEC_CTRL7, 18, 0x3f }, { G1_REG_DEC_CTRL7, 12, 0x3f }, + { G1_REG_DEC_CTRL7, 6, 0x3f }, { G1_REG_DEC_CTRL7, 0, 0x3f }, +}; + +/* Precision filter tap regs */ +static const struct hantro_reg vp8_dec_pred_bc_tap[8][4] = { + { + { G1_REG_PRED_FLT, 22, 0x3ff }, + { G1_REG_PRED_FLT, 12, 0x3ff }, + { G1_REG_PRED_FLT, 2, 0x3ff }, + { G1_REG_REF_PIC(4), 22, 0x3ff }, + }, + { + { G1_REG_REF_PIC(4), 12, 0x3ff }, + { G1_REG_REF_PIC(4), 2, 0x3ff }, + { G1_REG_REF_PIC(5), 22, 0x3ff }, + { G1_REG_REF_PIC(5), 12, 0x3ff }, + }, + { + { G1_REG_REF_PIC(5), 2, 0x3ff }, + { G1_REG_REF_PIC(6), 22, 0x3ff }, + { G1_REG_REF_PIC(6), 12, 0x3ff }, + { G1_REG_REF_PIC(6), 2, 0x3ff }, + }, + { + { G1_REG_REF_PIC(7), 22, 0x3ff }, + { G1_REG_REF_PIC(7), 12, 0x3ff }, + { G1_REG_REF_PIC(7), 2, 0x3ff }, + { G1_REG_LT_REF, 22, 0x3ff }, + }, + { + { G1_REG_LT_REF, 12, 0x3ff }, + { G1_REG_LT_REF, 2, 0x3ff }, + { G1_REG_VALID_REF, 22, 0x3ff }, + { G1_REG_VALID_REF, 12, 0x3ff }, + }, + { + { G1_REG_VALID_REF, 2, 0x3ff }, + { G1_REG_BD_REF_PIC(0), 22, 0x3ff }, + { G1_REG_BD_REF_PIC(0), 12, 0x3ff }, + { G1_REG_BD_REF_PIC(0), 2, 0x3ff }, + }, + { + { G1_REG_BD_REF_PIC(1), 22, 0x3ff }, + { G1_REG_BD_REF_PIC(1), 12, 0x3ff }, + { G1_REG_BD_REF_PIC(1), 2, 0x3ff }, + { G1_REG_BD_REF_PIC(2), 22, 0x3ff }, + }, + { + { G1_REG_BD_REF_PIC(2), 12, 0x3ff }, + { G1_REG_BD_REF_PIC(2), 2, 0x3ff }, + { G1_REG_BD_REF_PIC(3), 22, 0x3ff }, + { G1_REG_BD_REF_PIC(3), 12, 0x3ff }, + }, +}; + +/* + * Set loop filters + */ +static void cfg_lf(struct hantro_ctx *ctx, + const struct v4l2_ctrl_vp8_frame *hdr) +{ + const struct v4l2_vp8_segment *seg = &hdr->segment; + const struct v4l2_vp8_loop_filter *lf = &hdr->lf; + struct hantro_dev *vpu = ctx->dev; + unsigned int i; + u32 reg; + + if (!(seg->flags & V4L2_VP8_SEGMENT_FLAG_ENABLED)) { + hantro_reg_write(vpu, &vp8_dec_lf_level[0], lf->level); + } else if (seg->flags & V4L2_VP8_SEGMENT_FLAG_DELTA_VALUE_MODE) { + for (i = 0; i < 4; i++) { + u32 lf_level = clamp(lf->level + seg->lf_update[i], + 0, 63); + + hantro_reg_write(vpu, &vp8_dec_lf_level[i], lf_level); + } + } else { + for (i = 0; i < 4; i++) + hantro_reg_write(vpu, &vp8_dec_lf_level[i], + seg->lf_update[i]); + } + + reg = G1_REG_REF_PIC_FILT_SHARPNESS(lf->sharpness_level); + if (lf->flags & V4L2_VP8_LF_FILTER_TYPE_SIMPLE) + reg |= G1_REG_REF_PIC_FILT_TYPE_E; + vdpu_write_relaxed(vpu, reg, G1_REG_REF_PIC(0)); + + if (lf->flags & V4L2_VP8_LF_ADJ_ENABLE) { + for (i = 0; i < 4; i++) { + hantro_reg_write(vpu, &vp8_dec_mb_adj[i], + lf->mb_mode_delta[i]); + hantro_reg_write(vpu, &vp8_dec_ref_adj[i], + lf->ref_frm_delta[i]); + } + } +} + +/* + * Set quantization parameters + */ +static void cfg_qp(struct hantro_ctx *ctx, + const struct v4l2_ctrl_vp8_frame *hdr) +{ + const struct v4l2_vp8_quantization *q = &hdr->quant; + const struct v4l2_vp8_segment *seg = &hdr->segment; + struct hantro_dev *vpu = ctx->dev; + unsigned int i; + + if (!(seg->flags & V4L2_VP8_SEGMENT_FLAG_ENABLED)) { + hantro_reg_write(vpu, &vp8_dec_quant[0], q->y_ac_qi); + } else if (seg->flags & V4L2_VP8_SEGMENT_FLAG_DELTA_VALUE_MODE) { + for (i = 0; i < 4; i++) { + u32 quant = clamp(q->y_ac_qi + seg->quant_update[i], + 0, 127); + + hantro_reg_write(vpu, &vp8_dec_quant[i], quant); + } + } else { + for (i = 0; i < 4; i++) + hantro_reg_write(vpu, &vp8_dec_quant[i], + seg->quant_update[i]); + } + + hantro_reg_write(vpu, &vp8_dec_quant_delta[0], q->y_dc_delta); + hantro_reg_write(vpu, &vp8_dec_quant_delta[1], q->y2_dc_delta); + hantro_reg_write(vpu, &vp8_dec_quant_delta[2], q->y2_ac_delta); + hantro_reg_write(vpu, &vp8_dec_quant_delta[3], q->uv_dc_delta); + hantro_reg_write(vpu, &vp8_dec_quant_delta[4], q->uv_ac_delta); +} + +/* + * set control partition and DCT partition regs + * + * VP8 frame stream data layout: + * + * first_part_size parttion_sizes[0] + * ^ ^ + * src_dma | | + * ^ +--------+------+ +-----+-----+ + * | | control part | | | + * +--------+----------------+------------------+-----------+-----+-----------+ + * | tag 3B | extra 7B | hdr | mb_data | DCT sz | DCT part0 | ... | DCT partn | + * +--------+-----------------------------------+-----------+-----+-----------+ + * | | | | + * v +----+---+ v + * mb_start | src_dma_end + * v + * DCT size part + * (num_dct-1)*3B + * Note: + * 1. only key-frames have extra 7-bytes + * 2. all offsets are base on src_dma + * 3. number of DCT parts is 1, 2, 4 or 8 + * 4. the addresses set to the VPU must be 64-bits aligned + */ +static void cfg_parts(struct hantro_ctx *ctx, + const struct v4l2_ctrl_vp8_frame *hdr) +{ + struct hantro_dev *vpu = ctx->dev; + struct vb2_v4l2_buffer *vb2_src; + u32 first_part_offset = V4L2_VP8_FRAME_IS_KEY_FRAME(hdr) ? 10 : 3; + u32 mb_size, mb_offset_bytes, mb_offset_bits, mb_start_bits; + u32 dct_size_part_size, dct_part_offset; + struct hantro_reg reg; + dma_addr_t src_dma; + u32 dct_part_total_len = 0; + u32 count = 0; + unsigned int i; + + vb2_src = hantro_get_src_buf(ctx); + src_dma = vb2_dma_contig_plane_dma_addr(&vb2_src->vb2_buf, 0); + + /* + * Calculate control partition mb data info + * @first_part_header_bits: bits offset of mb data from first + * part start pos + * @mb_offset_bits: bits offset of mb data from src_dma + * base addr + * @mb_offset_byte: bytes offset of mb data from src_dma + * base addr + * @mb_start_bits: bits offset of mb data from mb data + * 64bits alignment addr + */ + mb_offset_bits = first_part_offset * 8 + + hdr->first_part_header_bits + 8; + mb_offset_bytes = mb_offset_bits / 8; + mb_start_bits = mb_offset_bits - + (mb_offset_bytes & (~DEC_8190_ALIGN_MASK)) * 8; + mb_size = hdr->first_part_size - + (mb_offset_bytes - first_part_offset) + + (mb_offset_bytes & DEC_8190_ALIGN_MASK); + + /* Macroblock data aligned base addr */ + vdpu_write_relaxed(vpu, (mb_offset_bytes & (~DEC_8190_ALIGN_MASK)) + + src_dma, G1_REG_ADDR_REF(13)); + + /* Macroblock data start bits */ + reg.base = G1_REG_DEC_CTRL2; + reg.mask = 0x3f; + reg.shift = 18; + hantro_reg_write(vpu, ®, mb_start_bits); + + /* Macroblock aligned data length */ + reg.base = G1_REG_DEC_CTRL6; + reg.mask = 0x3fffff; + reg.shift = 0; + hantro_reg_write(vpu, ®, mb_size + 1); + + /* + * Calculate DCT partition info + * @dct_size_part_size: Containing sizes of DCT part, every DCT part + * has 3 bytes to store its size, except the last + * DCT part + * @dct_part_offset: bytes offset of DCT parts from src_dma base addr + * @dct_part_total_len: total size of all DCT parts + */ + dct_size_part_size = (hdr->num_dct_parts - 1) * 3; + dct_part_offset = first_part_offset + hdr->first_part_size; + for (i = 0; i < hdr->num_dct_parts; i++) + dct_part_total_len += hdr->dct_part_sizes[i]; + dct_part_total_len += dct_size_part_size; + dct_part_total_len += (dct_part_offset & DEC_8190_ALIGN_MASK); + + /* Number of DCT partitions */ + reg.base = G1_REG_DEC_CTRL6; + reg.mask = 0xf; + reg.shift = 24; + hantro_reg_write(vpu, ®, hdr->num_dct_parts - 1); + + /* DCT partition length */ + vdpu_write_relaxed(vpu, + G1_REG_DEC_CTRL3_STREAM_LEN(dct_part_total_len), + G1_REG_DEC_CTRL3); + + /* DCT partitions base address */ + for (i = 0; i < hdr->num_dct_parts; i++) { + u32 byte_offset = dct_part_offset + dct_size_part_size + count; + u32 base_addr = byte_offset + src_dma; + + hantro_reg_write(vpu, &vp8_dec_dct_base[i], + base_addr & (~DEC_8190_ALIGN_MASK)); + + hantro_reg_write(vpu, &vp8_dec_dct_start_bits[i], + (byte_offset & DEC_8190_ALIGN_MASK) * 8); + + count += hdr->dct_part_sizes[i]; + } +} + +/* + * prediction filter taps + * normal 6-tap filters + */ +static void cfg_tap(struct hantro_ctx *ctx, + const struct v4l2_ctrl_vp8_frame *hdr) +{ + struct hantro_dev *vpu = ctx->dev; + struct hantro_reg reg; + u32 val = 0; + int i, j; + + reg.base = G1_REG_BD_REF_PIC(3); + reg.mask = 0xf; + + if ((hdr->version & 0x03) != 0) + return; /* Tap filter not used. */ + + for (i = 0; i < 8; i++) { + val = (hantro_vp8_dec_mc_filter[i][0] << 2) | + hantro_vp8_dec_mc_filter[i][5]; + + for (j = 0; j < 4; j++) + hantro_reg_write(vpu, &vp8_dec_pred_bc_tap[i][j], + hantro_vp8_dec_mc_filter[i][j + 1]); + + switch (i) { + case 2: + reg.shift = 8; + break; + case 4: + reg.shift = 4; + break; + case 6: + reg.shift = 0; + break; + default: + continue; + } + + hantro_reg_write(vpu, ®, val); + } +} + +static void cfg_ref(struct hantro_ctx *ctx, + const struct v4l2_ctrl_vp8_frame *hdr, + struct vb2_v4l2_buffer *vb2_dst) +{ + struct hantro_dev *vpu = ctx->dev; + dma_addr_t ref; + + + ref = hantro_get_ref(ctx, hdr->last_frame_ts); + if (!ref) { + vpu_debug(0, "failed to find last frame ts=%llu\n", + hdr->last_frame_ts); + ref = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0); + } + vdpu_write_relaxed(vpu, ref, G1_REG_ADDR_REF(0)); + + ref = hantro_get_ref(ctx, hdr->golden_frame_ts); + if (!ref && hdr->golden_frame_ts) + vpu_debug(0, "failed to find golden frame ts=%llu\n", + hdr->golden_frame_ts); + if (!ref) + ref = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0); + if (hdr->flags & V4L2_VP8_FRAME_FLAG_SIGN_BIAS_GOLDEN) + ref |= G1_REG_ADDR_REF_TOPC_E; + vdpu_write_relaxed(vpu, ref, G1_REG_ADDR_REF(4)); + + ref = hantro_get_ref(ctx, hdr->alt_frame_ts); + if (!ref && hdr->alt_frame_ts) + vpu_debug(0, "failed to find alt frame ts=%llu\n", + hdr->alt_frame_ts); + if (!ref) + ref = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0); + if (hdr->flags & V4L2_VP8_FRAME_FLAG_SIGN_BIAS_ALT) + ref |= G1_REG_ADDR_REF_TOPC_E; + vdpu_write_relaxed(vpu, ref, G1_REG_ADDR_REF(5)); +} + +static void cfg_buffers(struct hantro_ctx *ctx, + const struct v4l2_ctrl_vp8_frame *hdr, + struct vb2_v4l2_buffer *vb2_dst) +{ + const struct v4l2_vp8_segment *seg = &hdr->segment; + struct hantro_dev *vpu = ctx->dev; + dma_addr_t dst_dma; + u32 reg; + + /* Set probability table buffer address */ + vdpu_write_relaxed(vpu, ctx->vp8_dec.prob_tbl.dma, + G1_REG_ADDR_QTABLE); + + /* Set segment map address */ + reg = G1_REG_FWD_PIC1_SEGMENT_BASE(ctx->vp8_dec.segment_map.dma); + if (seg->flags & V4L2_VP8_SEGMENT_FLAG_ENABLED) { + reg |= G1_REG_FWD_PIC1_SEGMENT_E; + if (seg->flags & V4L2_VP8_SEGMENT_FLAG_UPDATE_MAP) + reg |= G1_REG_FWD_PIC1_SEGMENT_UPD_E; + } + vdpu_write_relaxed(vpu, reg, G1_REG_FWD_PIC(0)); + + dst_dma = hantro_get_dec_buf_addr(ctx, &vb2_dst->vb2_buf); + vdpu_write_relaxed(vpu, dst_dma, G1_REG_ADDR_DST); +} + +int hantro_g1_vp8_dec_run(struct hantro_ctx *ctx) +{ + const struct v4l2_ctrl_vp8_frame *hdr; + struct hantro_dev *vpu = ctx->dev; + struct vb2_v4l2_buffer *vb2_dst; + size_t height = ctx->dst_fmt.height; + size_t width = ctx->dst_fmt.width; + u32 mb_width, mb_height; + u32 reg; + + hantro_start_prepare_run(ctx); + + hdr = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_VP8_FRAME); + if (WARN_ON(!hdr)) + return -EINVAL; + + /* Reset segment_map buffer in keyframe */ + if (V4L2_VP8_FRAME_IS_KEY_FRAME(hdr) && ctx->vp8_dec.segment_map.cpu) + memset(ctx->vp8_dec.segment_map.cpu, 0, + ctx->vp8_dec.segment_map.size); + + hantro_vp8_prob_update(ctx, hdr); + + reg = G1_REG_CONFIG_DEC_TIMEOUT_E | + G1_REG_CONFIG_DEC_STRENDIAN_E | + G1_REG_CONFIG_DEC_INSWAP32_E | + G1_REG_CONFIG_DEC_STRSWAP32_E | + G1_REG_CONFIG_DEC_OUTSWAP32_E | + G1_REG_CONFIG_DEC_CLK_GATE_E | + G1_REG_CONFIG_DEC_IN_ENDIAN | + G1_REG_CONFIG_DEC_OUT_ENDIAN | + G1_REG_CONFIG_DEC_MAX_BURST(16); + vdpu_write_relaxed(vpu, reg, G1_REG_CONFIG); + + reg = G1_REG_DEC_CTRL0_DEC_MODE(10) | + G1_REG_DEC_CTRL0_DEC_AXI_AUTO; + if (!V4L2_VP8_FRAME_IS_KEY_FRAME(hdr)) + reg |= G1_REG_DEC_CTRL0_PIC_INTER_E; + if (!(hdr->flags & V4L2_VP8_FRAME_FLAG_MB_NO_SKIP_COEFF)) + reg |= G1_REG_DEC_CTRL0_SKIP_MODE; + if (hdr->lf.level == 0) + reg |= G1_REG_DEC_CTRL0_FILTERING_DIS; + vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL0); + + /* Frame dimensions */ + mb_width = MB_WIDTH(width); + mb_height = MB_HEIGHT(height); + reg = G1_REG_DEC_CTRL1_PIC_MB_WIDTH(mb_width) | + G1_REG_DEC_CTRL1_PIC_MB_HEIGHT_P(mb_height) | + G1_REG_DEC_CTRL1_PIC_MB_W_EXT(mb_width >> 9) | + G1_REG_DEC_CTRL1_PIC_MB_H_EXT(mb_height >> 8); + vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL1); + + /* Boolean decoder */ + reg = G1_REG_DEC_CTRL2_BOOLEAN_RANGE(hdr->coder_state.range) + | G1_REG_DEC_CTRL2_BOOLEAN_VALUE(hdr->coder_state.value); + vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL2); + + reg = 0; + if (hdr->version != 3) + reg |= G1_REG_DEC_CTRL4_VC1_HEIGHT_EXT; + if (hdr->version & 0x3) + reg |= G1_REG_DEC_CTRL4_BILIN_MC_E; + vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL4); + + cfg_lf(ctx, hdr); + cfg_qp(ctx, hdr); + cfg_parts(ctx, hdr); + cfg_tap(ctx, hdr); + + vb2_dst = hantro_get_dst_buf(ctx); + cfg_ref(ctx, hdr, vb2_dst); + cfg_buffers(ctx, hdr, vb2_dst); + + hantro_end_prepare_run(ctx); + + vdpu_write(vpu, G1_REG_INTERRUPT_DEC_E, G1_REG_INTERRUPT); + + return 0; +} diff --git a/drivers/media/platform/verisilicon/hantro_g2.c b/drivers/media/platform/verisilicon/hantro_g2.c new file mode 100644 index 000000000..ee5f14c5f --- /dev/null +++ b/drivers/media/platform/verisilicon/hantro_g2.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hantro VPU codec driver + * + * Copyright (C) 2021 Collabora Ltd, Andrzej Pietrasiewicz <andrzej.p@collabora.com> + */ + +#include "hantro_hw.h" +#include "hantro_g2_regs.h" + +void hantro_g2_check_idle(struct hantro_dev *vpu) +{ + int i; + + for (i = 0; i < 3; i++) { + u32 status; + + /* Make sure the VPU is idle */ + status = vdpu_read(vpu, G2_REG_INTERRUPT); + if (status & G2_REG_INTERRUPT_DEC_E) { + dev_warn(vpu->dev, "device still running, aborting"); + status |= G2_REG_INTERRUPT_DEC_ABORT_E | G2_REG_INTERRUPT_DEC_IRQ_DIS; + vdpu_write(vpu, status, G2_REG_INTERRUPT); + } + } +} + +irqreturn_t hantro_g2_irq(int irq, void *dev_id) +{ + struct hantro_dev *vpu = dev_id; + enum vb2_buffer_state state; + u32 status; + + status = vdpu_read(vpu, G2_REG_INTERRUPT); + state = (status & G2_REG_INTERRUPT_DEC_RDY_INT) ? + VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR; + + vdpu_write(vpu, 0, G2_REG_INTERRUPT); + vdpu_write(vpu, G2_REG_CONFIG_DEC_CLK_GATE_E, G2_REG_CONFIG); + + hantro_irq_done(vpu, state); + + return IRQ_HANDLED; +} diff --git a/drivers/media/platform/verisilicon/hantro_g2_hevc_dec.c b/drivers/media/platform/verisilicon/hantro_g2_hevc_dec.c new file mode 100644 index 000000000..a9d4ac84a --- /dev/null +++ b/drivers/media/platform/verisilicon/hantro_g2_hevc_dec.c @@ -0,0 +1,627 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hantro VPU HEVC codec driver + * + * Copyright (C) 2020 Safran Passenger Innovations LLC + */ + +#include "hantro_hw.h" +#include "hantro_g2_regs.h" + +#define G2_ALIGN 16 + +static size_t hantro_hevc_chroma_offset(struct hantro_ctx *ctx) +{ + return ctx->dst_fmt.width * ctx->dst_fmt.height * ctx->bit_depth / 8; +} + +static size_t hantro_hevc_motion_vectors_offset(struct hantro_ctx *ctx) +{ + size_t cr_offset = hantro_hevc_chroma_offset(ctx); + + return ALIGN((cr_offset * 3) / 2, G2_ALIGN); +} + +static void prepare_tile_info_buffer(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls; + const struct v4l2_ctrl_hevc_pps *pps = ctrls->pps; + const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps; + u16 *p = (u16 *)((u8 *)ctx->hevc_dec.tile_sizes.cpu); + unsigned int num_tile_rows = pps->num_tile_rows_minus1 + 1; + unsigned int num_tile_cols = pps->num_tile_columns_minus1 + 1; + unsigned int pic_width_in_ctbs, pic_height_in_ctbs; + unsigned int max_log2_ctb_size, ctb_size; + bool tiles_enabled, uniform_spacing; + u32 no_chroma = 0; + + tiles_enabled = !!(pps->flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED); + uniform_spacing = !!(pps->flags & V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING); + + hantro_reg_write(vpu, &g2_tile_e, tiles_enabled); + + max_log2_ctb_size = sps->log2_min_luma_coding_block_size_minus3 + 3 + + sps->log2_diff_max_min_luma_coding_block_size; + pic_width_in_ctbs = (sps->pic_width_in_luma_samples + + (1 << max_log2_ctb_size) - 1) >> max_log2_ctb_size; + pic_height_in_ctbs = (sps->pic_height_in_luma_samples + (1 << max_log2_ctb_size) - 1) + >> max_log2_ctb_size; + ctb_size = 1 << max_log2_ctb_size; + + vpu_debug(1, "Preparing tile sizes buffer for %dx%d CTBs (CTB size %d)\n", + pic_width_in_ctbs, pic_height_in_ctbs, ctb_size); + + if (tiles_enabled) { + unsigned int i, j, h; + + vpu_debug(1, "Tiles enabled! %dx%d\n", num_tile_cols, num_tile_rows); + + hantro_reg_write(vpu, &g2_num_tile_rows, num_tile_rows); + hantro_reg_write(vpu, &g2_num_tile_cols, num_tile_cols); + + /* write width + height for each tile in pic */ + if (!uniform_spacing) { + u32 tmp_w = 0, tmp_h = 0; + + for (i = 0; i < num_tile_rows; i++) { + if (i == num_tile_rows - 1) + h = pic_height_in_ctbs - tmp_h; + else + h = pps->row_height_minus1[i] + 1; + tmp_h += h; + if (i == 0 && h == 1 && ctb_size == 16) + no_chroma = 1; + for (j = 0, tmp_w = 0; j < num_tile_cols - 1; j++) { + tmp_w += pps->column_width_minus1[j] + 1; + *p++ = pps->column_width_minus1[j] + 1; + *p++ = h; + if (i == 0 && h == 1 && ctb_size == 16) + no_chroma = 1; + } + /* last column */ + *p++ = pic_width_in_ctbs - tmp_w; + *p++ = h; + } + } else { /* uniform spacing */ + u32 tmp, prev_h, prev_w; + + for (i = 0, prev_h = 0; i < num_tile_rows; i++) { + tmp = (i + 1) * pic_height_in_ctbs / num_tile_rows; + h = tmp - prev_h; + prev_h = tmp; + if (i == 0 && h == 1 && ctb_size == 16) + no_chroma = 1; + for (j = 0, prev_w = 0; j < num_tile_cols; j++) { + tmp = (j + 1) * pic_width_in_ctbs / num_tile_cols; + *p++ = tmp - prev_w; + *p++ = h; + if (j == 0 && + (pps->column_width_minus1[0] + 1) == 1 && + ctb_size == 16) + no_chroma = 1; + prev_w = tmp; + } + } + } + } else { + hantro_reg_write(vpu, &g2_num_tile_rows, 1); + hantro_reg_write(vpu, &g2_num_tile_cols, 1); + + /* There's one tile, with dimensions equal to pic size. */ + p[0] = pic_width_in_ctbs; + p[1] = pic_height_in_ctbs; + } + + if (no_chroma) + vpu_debug(1, "%s: no chroma!\n", __func__); +} + +static int compute_header_skip_length(struct hantro_ctx *ctx) +{ + const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls; + const struct v4l2_ctrl_hevc_decode_params *decode_params = ctrls->decode_params; + const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps; + const struct v4l2_ctrl_hevc_pps *pps = ctrls->pps; + int skip = 0; + + if (pps->flags & V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT) + /* size of pic_output_flag */ + skip++; + + if (sps->flags & V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE) + /* size of pic_order_cnt_lsb */ + skip += 2; + + if (!(decode_params->flags & V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC)) { + /* size of pic_order_cnt_lsb */ + skip += sps->log2_max_pic_order_cnt_lsb_minus4 + 4; + + /* size of short_term_ref_pic_set_sps_flag */ + skip++; + + if (decode_params->short_term_ref_pic_set_size) + /* size of st_ref_pic_set( num_short_term_ref_pic_sets ) */ + skip += decode_params->short_term_ref_pic_set_size; + else if (sps->num_short_term_ref_pic_sets > 1) + skip += fls(sps->num_short_term_ref_pic_sets - 1); + + skip += decode_params->long_term_ref_pic_set_size; + } + + return skip; +} + +static void set_params(struct hantro_ctx *ctx) +{ + const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls; + const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps; + const struct v4l2_ctrl_hevc_pps *pps = ctrls->pps; + const struct v4l2_ctrl_hevc_decode_params *decode_params = ctrls->decode_params; + struct hantro_dev *vpu = ctx->dev; + u32 min_log2_cb_size, max_log2_ctb_size, min_cb_size, max_ctb_size; + u32 pic_width_in_min_cbs, pic_height_in_min_cbs; + u32 pic_width_aligned, pic_height_aligned; + u32 partial_ctb_x, partial_ctb_y; + + hantro_reg_write(vpu, &g2_bit_depth_y_minus8, sps->bit_depth_luma_minus8); + hantro_reg_write(vpu, &g2_bit_depth_c_minus8, sps->bit_depth_chroma_minus8); + + hantro_reg_write(vpu, &g2_hdr_skip_length, compute_header_skip_length(ctx)); + + min_log2_cb_size = sps->log2_min_luma_coding_block_size_minus3 + 3; + max_log2_ctb_size = min_log2_cb_size + sps->log2_diff_max_min_luma_coding_block_size; + + hantro_reg_write(vpu, &g2_min_cb_size, min_log2_cb_size); + hantro_reg_write(vpu, &g2_max_cb_size, max_log2_ctb_size); + + min_cb_size = 1 << min_log2_cb_size; + max_ctb_size = 1 << max_log2_ctb_size; + + pic_width_in_min_cbs = sps->pic_width_in_luma_samples / min_cb_size; + pic_height_in_min_cbs = sps->pic_height_in_luma_samples / min_cb_size; + pic_width_aligned = ALIGN(sps->pic_width_in_luma_samples, max_ctb_size); + pic_height_aligned = ALIGN(sps->pic_height_in_luma_samples, max_ctb_size); + + partial_ctb_x = !!(sps->pic_width_in_luma_samples != pic_width_aligned); + partial_ctb_y = !!(sps->pic_height_in_luma_samples != pic_height_aligned); + + hantro_reg_write(vpu, &g2_partial_ctb_x, partial_ctb_x); + hantro_reg_write(vpu, &g2_partial_ctb_y, partial_ctb_y); + + hantro_reg_write(vpu, &g2_pic_width_in_cbs, pic_width_in_min_cbs); + hantro_reg_write(vpu, &g2_pic_height_in_cbs, pic_height_in_min_cbs); + + hantro_reg_write(vpu, &g2_pic_width_4x4, + (pic_width_in_min_cbs * min_cb_size) / 4); + hantro_reg_write(vpu, &g2_pic_height_4x4, + (pic_height_in_min_cbs * min_cb_size) / 4); + + hantro_reg_write(vpu, &hevc_max_inter_hierdepth, + sps->max_transform_hierarchy_depth_inter); + hantro_reg_write(vpu, &hevc_max_intra_hierdepth, + sps->max_transform_hierarchy_depth_intra); + hantro_reg_write(vpu, &hevc_min_trb_size, + sps->log2_min_luma_transform_block_size_minus2 + 2); + hantro_reg_write(vpu, &hevc_max_trb_size, + sps->log2_min_luma_transform_block_size_minus2 + 2 + + sps->log2_diff_max_min_luma_transform_block_size); + + hantro_reg_write(vpu, &g2_tempor_mvp_e, + !!(sps->flags & V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED) && + !(decode_params->flags & V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC)); + hantro_reg_write(vpu, &g2_strong_smooth_e, + !!(sps->flags & V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED)); + hantro_reg_write(vpu, &g2_asym_pred_e, + !!(sps->flags & V4L2_HEVC_SPS_FLAG_AMP_ENABLED)); + hantro_reg_write(vpu, &g2_sao_e, + !!(sps->flags & V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET)); + hantro_reg_write(vpu, &g2_sign_data_hide, + !!(pps->flags & V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED)); + + if (pps->flags & V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED) { + hantro_reg_write(vpu, &g2_cu_qpd_e, 1); + hantro_reg_write(vpu, &g2_max_cu_qpd_depth, pps->diff_cu_qp_delta_depth); + } else { + hantro_reg_write(vpu, &g2_cu_qpd_e, 0); + hantro_reg_write(vpu, &g2_max_cu_qpd_depth, 0); + } + + hantro_reg_write(vpu, &g2_cb_qp_offset, pps->pps_cb_qp_offset); + hantro_reg_write(vpu, &g2_cr_qp_offset, pps->pps_cr_qp_offset); + + hantro_reg_write(vpu, &g2_filt_offset_beta, pps->pps_beta_offset_div2); + hantro_reg_write(vpu, &g2_filt_offset_tc, pps->pps_tc_offset_div2); + hantro_reg_write(vpu, &g2_slice_hdr_ext_e, + !!(pps->flags & V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT)); + hantro_reg_write(vpu, &g2_slice_hdr_ext_bits, pps->num_extra_slice_header_bits); + hantro_reg_write(vpu, &g2_slice_chqp_present, + !!(pps->flags & V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT)); + hantro_reg_write(vpu, &g2_weight_bipr_idc, + !!(pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED)); + hantro_reg_write(vpu, &g2_transq_bypass, + !!(pps->flags & V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED)); + hantro_reg_write(vpu, &g2_list_mod_e, + !!(pps->flags & V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT)); + hantro_reg_write(vpu, &g2_entropy_sync_e, + !!(pps->flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED)); + hantro_reg_write(vpu, &g2_cabac_init_present, + !!(pps->flags & V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT)); + hantro_reg_write(vpu, &g2_idr_pic_e, + !!(decode_params->flags & V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC)); + hantro_reg_write(vpu, &hevc_parallel_merge, + pps->log2_parallel_merge_level_minus2 + 2); + hantro_reg_write(vpu, &g2_pcm_filt_d, + !!(sps->flags & V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED)); + hantro_reg_write(vpu, &g2_pcm_e, + !!(sps->flags & V4L2_HEVC_SPS_FLAG_PCM_ENABLED)); + if (sps->flags & V4L2_HEVC_SPS_FLAG_PCM_ENABLED) { + hantro_reg_write(vpu, &g2_max_pcm_size, + sps->log2_diff_max_min_pcm_luma_coding_block_size + + sps->log2_min_pcm_luma_coding_block_size_minus3 + 3); + hantro_reg_write(vpu, &g2_min_pcm_size, + sps->log2_min_pcm_luma_coding_block_size_minus3 + 3); + hantro_reg_write(vpu, &g2_bit_depth_pcm_y, + sps->pcm_sample_bit_depth_luma_minus1 + 1); + hantro_reg_write(vpu, &g2_bit_depth_pcm_c, + sps->pcm_sample_bit_depth_chroma_minus1 + 1); + } else { + hantro_reg_write(vpu, &g2_max_pcm_size, 0); + hantro_reg_write(vpu, &g2_min_pcm_size, 0); + hantro_reg_write(vpu, &g2_bit_depth_pcm_y, 0); + hantro_reg_write(vpu, &g2_bit_depth_pcm_c, 0); + } + + hantro_reg_write(vpu, &g2_start_code_e, 1); + hantro_reg_write(vpu, &g2_init_qp, pps->init_qp_minus26 + 26); + hantro_reg_write(vpu, &g2_weight_pred_e, + !!(pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED)); + hantro_reg_write(vpu, &g2_cabac_init_present, + !!(pps->flags & V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT)); + hantro_reg_write(vpu, &g2_const_intra_e, + !!(pps->flags & V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED)); + hantro_reg_write(vpu, &g2_transform_skip, + !!(pps->flags & V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED)); + hantro_reg_write(vpu, &g2_out_filtering_dis, + !!(pps->flags & V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER)); + hantro_reg_write(vpu, &g2_filt_ctrl_pres, + !!(pps->flags & V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT)); + hantro_reg_write(vpu, &g2_dependent_slice, + !!(pps->flags & V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED)); + hantro_reg_write(vpu, &g2_filter_override, + !!(pps->flags & V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED)); + hantro_reg_write(vpu, &g2_refidx0_active, + pps->num_ref_idx_l0_default_active_minus1 + 1); + hantro_reg_write(vpu, &g2_refidx1_active, + pps->num_ref_idx_l1_default_active_minus1 + 1); + hantro_reg_write(vpu, &g2_apf_threshold, 8); +} + +static void set_ref_pic_list(struct hantro_ctx *ctx) +{ + const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls; + struct hantro_dev *vpu = ctx->dev; + const struct v4l2_ctrl_hevc_decode_params *decode_params = ctrls->decode_params; + u32 list0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX] = {}; + u32 list1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX] = {}; + static const struct hantro_reg ref_pic_regs0[] = { + hevc_rlist_f0, + hevc_rlist_f1, + hevc_rlist_f2, + hevc_rlist_f3, + hevc_rlist_f4, + hevc_rlist_f5, + hevc_rlist_f6, + hevc_rlist_f7, + hevc_rlist_f8, + hevc_rlist_f9, + hevc_rlist_f10, + hevc_rlist_f11, + hevc_rlist_f12, + hevc_rlist_f13, + hevc_rlist_f14, + hevc_rlist_f15, + }; + static const struct hantro_reg ref_pic_regs1[] = { + hevc_rlist_b0, + hevc_rlist_b1, + hevc_rlist_b2, + hevc_rlist_b3, + hevc_rlist_b4, + hevc_rlist_b5, + hevc_rlist_b6, + hevc_rlist_b7, + hevc_rlist_b8, + hevc_rlist_b9, + hevc_rlist_b10, + hevc_rlist_b11, + hevc_rlist_b12, + hevc_rlist_b13, + hevc_rlist_b14, + hevc_rlist_b15, + }; + unsigned int i, j; + + /* List 0 contains: short term before, short term after and long term */ + j = 0; + for (i = 0; i < decode_params->num_poc_st_curr_before && j < ARRAY_SIZE(list0); i++) + list0[j++] = decode_params->poc_st_curr_before[i]; + for (i = 0; i < decode_params->num_poc_st_curr_after && j < ARRAY_SIZE(list0); i++) + list0[j++] = decode_params->poc_st_curr_after[i]; + for (i = 0; i < decode_params->num_poc_lt_curr && j < ARRAY_SIZE(list0); i++) + list0[j++] = decode_params->poc_lt_curr[i]; + + /* Fill the list, copying over and over */ + i = 0; + while (j < ARRAY_SIZE(list0)) + list0[j++] = list0[i++]; + + j = 0; + for (i = 0; i < decode_params->num_poc_st_curr_after && j < ARRAY_SIZE(list1); i++) + list1[j++] = decode_params->poc_st_curr_after[i]; + for (i = 0; i < decode_params->num_poc_st_curr_before && j < ARRAY_SIZE(list1); i++) + list1[j++] = decode_params->poc_st_curr_before[i]; + for (i = 0; i < decode_params->num_poc_lt_curr && j < ARRAY_SIZE(list1); i++) + list1[j++] = decode_params->poc_lt_curr[i]; + + i = 0; + while (j < ARRAY_SIZE(list1)) + list1[j++] = list1[i++]; + + for (i = 0; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX; i++) { + hantro_reg_write(vpu, &ref_pic_regs0[i], list0[i]); + hantro_reg_write(vpu, &ref_pic_regs1[i], list1[i]); + } +} + +static int set_ref(struct hantro_ctx *ctx) +{ + const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls; + const struct v4l2_ctrl_hevc_pps *pps = ctrls->pps; + const struct v4l2_ctrl_hevc_decode_params *decode_params = ctrls->decode_params; + const struct v4l2_hevc_dpb_entry *dpb = decode_params->dpb; + dma_addr_t luma_addr, chroma_addr, mv_addr = 0; + struct hantro_dev *vpu = ctx->dev; + struct vb2_v4l2_buffer *vb2_dst; + struct hantro_decoded_buffer *dst; + size_t cr_offset = hantro_hevc_chroma_offset(ctx); + size_t mv_offset = hantro_hevc_motion_vectors_offset(ctx); + u32 max_ref_frames; + u16 dpb_longterm_e; + static const struct hantro_reg cur_poc[] = { + hevc_cur_poc_00, + hevc_cur_poc_01, + hevc_cur_poc_02, + hevc_cur_poc_03, + hevc_cur_poc_04, + hevc_cur_poc_05, + hevc_cur_poc_06, + hevc_cur_poc_07, + hevc_cur_poc_08, + hevc_cur_poc_09, + hevc_cur_poc_10, + hevc_cur_poc_11, + hevc_cur_poc_12, + hevc_cur_poc_13, + hevc_cur_poc_14, + hevc_cur_poc_15, + }; + unsigned int i; + + max_ref_frames = decode_params->num_poc_lt_curr + + decode_params->num_poc_st_curr_before + + decode_params->num_poc_st_curr_after; + /* + * Set max_ref_frames to non-zero to avoid HW hang when decoding + * badly marked I-frames. + */ + max_ref_frames = max_ref_frames ? max_ref_frames : 1; + hantro_reg_write(vpu, &g2_num_ref_frames, max_ref_frames); + hantro_reg_write(vpu, &g2_filter_over_slices, + !!(pps->flags & V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED)); + hantro_reg_write(vpu, &g2_filter_over_tiles, + !!(pps->flags & V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED)); + + /* + * Write POC count diff from current pic. + */ + for (i = 0; i < decode_params->num_active_dpb_entries && i < ARRAY_SIZE(cur_poc); i++) { + char poc_diff = decode_params->pic_order_cnt_val - dpb[i].pic_order_cnt_val; + + hantro_reg_write(vpu, &cur_poc[i], poc_diff); + } + + if (i < ARRAY_SIZE(cur_poc)) { + /* + * After the references, fill one entry pointing to itself, + * i.e. difference is zero. + */ + hantro_reg_write(vpu, &cur_poc[i], 0); + i++; + } + + /* Fill the rest with the current picture */ + for (; i < ARRAY_SIZE(cur_poc); i++) + hantro_reg_write(vpu, &cur_poc[i], decode_params->pic_order_cnt_val); + + set_ref_pic_list(ctx); + + /* We will only keep the reference pictures that are still used */ + hantro_hevc_ref_init(ctx); + + /* Set up addresses of DPB buffers */ + dpb_longterm_e = 0; + for (i = 0; i < decode_params->num_active_dpb_entries && + i < (V4L2_HEVC_DPB_ENTRIES_NUM_MAX - 1); i++) { + luma_addr = hantro_hevc_get_ref_buf(ctx, dpb[i].pic_order_cnt_val); + if (!luma_addr) + return -ENOMEM; + + chroma_addr = luma_addr + cr_offset; + mv_addr = luma_addr + mv_offset; + + if (dpb[i].flags & V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE) + dpb_longterm_e |= BIT(V4L2_HEVC_DPB_ENTRIES_NUM_MAX - 1 - i); + + hantro_write_addr(vpu, G2_REF_LUMA_ADDR(i), luma_addr); + hantro_write_addr(vpu, G2_REF_CHROMA_ADDR(i), chroma_addr); + hantro_write_addr(vpu, G2_REF_MV_ADDR(i), mv_addr); + } + + vb2_dst = hantro_get_dst_buf(ctx); + dst = vb2_to_hantro_decoded_buf(&vb2_dst->vb2_buf); + luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf); + if (!luma_addr) + return -ENOMEM; + + if (hantro_hevc_add_ref_buf(ctx, decode_params->pic_order_cnt_val, luma_addr)) + return -EINVAL; + + chroma_addr = luma_addr + cr_offset; + mv_addr = luma_addr + mv_offset; + + hantro_write_addr(vpu, G2_REF_LUMA_ADDR(i), luma_addr); + hantro_write_addr(vpu, G2_REF_CHROMA_ADDR(i), chroma_addr); + hantro_write_addr(vpu, G2_REF_MV_ADDR(i++), mv_addr); + + hantro_write_addr(vpu, G2_OUT_LUMA_ADDR, luma_addr); + hantro_write_addr(vpu, G2_OUT_CHROMA_ADDR, chroma_addr); + hantro_write_addr(vpu, G2_OUT_MV_ADDR, mv_addr); + + for (; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX; i++) { + hantro_write_addr(vpu, G2_REF_LUMA_ADDR(i), 0); + hantro_write_addr(vpu, G2_REF_CHROMA_ADDR(i), 0); + hantro_write_addr(vpu, G2_REF_MV_ADDR(i), 0); + } + + hantro_reg_write(vpu, &g2_refer_lterm_e, dpb_longterm_e); + + return 0; +} + +static void set_buffers(struct hantro_ctx *ctx) +{ + struct vb2_v4l2_buffer *src_buf; + struct hantro_dev *vpu = ctx->dev; + dma_addr_t src_dma; + u32 src_len, src_buf_len; + + src_buf = hantro_get_src_buf(ctx); + + /* Source (stream) buffer. */ + src_dma = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0); + src_len = vb2_get_plane_payload(&src_buf->vb2_buf, 0); + src_buf_len = vb2_plane_size(&src_buf->vb2_buf, 0); + + hantro_write_addr(vpu, G2_STREAM_ADDR, src_dma); + hantro_reg_write(vpu, &g2_stream_len, src_len); + hantro_reg_write(vpu, &g2_strm_buffer_len, src_buf_len); + hantro_reg_write(vpu, &g2_strm_start_offset, 0); + hantro_reg_write(vpu, &g2_write_mvs_e, 1); + + hantro_write_addr(vpu, G2_TILE_SIZES_ADDR, ctx->hevc_dec.tile_sizes.dma); + hantro_write_addr(vpu, G2_TILE_FILTER_ADDR, ctx->hevc_dec.tile_filter.dma); + hantro_write_addr(vpu, G2_TILE_SAO_ADDR, ctx->hevc_dec.tile_sao.dma); + hantro_write_addr(vpu, G2_TILE_BSD_ADDR, ctx->hevc_dec.tile_bsd.dma); +} + +static void prepare_scaling_list_buffer(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls; + const struct v4l2_ctrl_hevc_scaling_matrix *sc = ctrls->scaling; + const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps; + u8 *p = ((u8 *)ctx->hevc_dec.scaling_lists.cpu); + unsigned int scaling_list_enabled; + unsigned int i, j, k; + + scaling_list_enabled = !!(sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED); + hantro_reg_write(vpu, &g2_scaling_list_e, scaling_list_enabled); + + if (!scaling_list_enabled) + return; + + for (i = 0; i < ARRAY_SIZE(sc->scaling_list_dc_coef_16x16); i++) + *p++ = sc->scaling_list_dc_coef_16x16[i]; + + for (i = 0; i < ARRAY_SIZE(sc->scaling_list_dc_coef_32x32); i++) + *p++ = sc->scaling_list_dc_coef_32x32[i]; + + /* 128-bit boundary */ + p += 8; + + /* write scaling lists column by column */ + + for (i = 0; i < 6; i++) + for (j = 0; j < 4; j++) + for (k = 0; k < 4; k++) + *p++ = sc->scaling_list_4x4[i][4 * k + j]; + + for (i = 0; i < 6; i++) + for (j = 0; j < 8; j++) + for (k = 0; k < 8; k++) + *p++ = sc->scaling_list_8x8[i][8 * k + j]; + + for (i = 0; i < 6; i++) + for (j = 0; j < 8; j++) + for (k = 0; k < 8; k++) + *p++ = sc->scaling_list_16x16[i][8 * k + j]; + + for (i = 0; i < 2; i++) + for (j = 0; j < 8; j++) + for (k = 0; k < 8; k++) + *p++ = sc->scaling_list_32x32[i][8 * k + j]; + + hantro_write_addr(vpu, G2_HEVC_SCALING_LIST_ADDR, ctx->hevc_dec.scaling_lists.dma); +} + +int hantro_g2_hevc_dec_run(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + int ret; + + hantro_g2_check_idle(vpu); + + /* Prepare HEVC decoder context. */ + ret = hantro_hevc_dec_prepare_run(ctx); + if (ret) + return ret; + + /* Configure hardware registers. */ + set_params(ctx); + + /* set reference pictures */ + ret = set_ref(ctx); + if (ret) + return ret; + + set_buffers(ctx); + prepare_tile_info_buffer(ctx); + + prepare_scaling_list_buffer(ctx); + + hantro_end_prepare_run(ctx); + + hantro_reg_write(vpu, &g2_mode, HEVC_DEC_MODE); + hantro_reg_write(vpu, &g2_clk_gate_e, 1); + + /* Don't disable output */ + hantro_reg_write(vpu, &g2_out_dis, 0); + + /* Don't compress buffers */ + hantro_reg_write(vpu, &g2_ref_compress_bypass, 1); + + /* Bus width and max burst */ + hantro_reg_write(vpu, &g2_buswidth, BUS_WIDTH_128); + hantro_reg_write(vpu, &g2_max_burst, 16); + + /* Swap */ + hantro_reg_write(vpu, &g2_strm_swap, 0xf); + hantro_reg_write(vpu, &g2_dirmv_swap, 0xf); + hantro_reg_write(vpu, &g2_compress_swap, 0xf); + + /* Start decoding! */ + vdpu_write(vpu, G2_REG_INTERRUPT_DEC_E, G2_REG_INTERRUPT); + + return 0; +} diff --git a/drivers/media/platform/verisilicon/hantro_g2_regs.h b/drivers/media/platform/verisilicon/hantro_g2_regs.h new file mode 100644 index 000000000..826067835 --- /dev/null +++ b/drivers/media/platform/verisilicon/hantro_g2_regs.h @@ -0,0 +1,325 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, Collabora + * + * Author: Benjamin Gaignard <benjamin.gaignard@collabora.com> + */ + +#ifndef HANTRO_G2_REGS_H_ +#define HANTRO_G2_REGS_H_ + +#include "hantro.h" + +#define G2_SWREG(nr) ((nr) * 4) + +#define G2_DEC_REG(b, s, m) \ + ((const struct hantro_reg) { \ + .base = G2_SWREG(b), \ + .shift = s, \ + .mask = m, \ + }) + +#define G2_REG_VERSION G2_SWREG(0) + +#define G2_REG_INTERRUPT G2_SWREG(1) +#define G2_REG_INTERRUPT_DEC_RDY_INT BIT(12) +#define G2_REG_INTERRUPT_DEC_ABORT_E BIT(5) +#define G2_REG_INTERRUPT_DEC_IRQ_DIS BIT(4) +#define G2_REG_INTERRUPT_DEC_E BIT(0) + +#define HEVC_DEC_MODE 0xc +#define VP9_DEC_MODE 0xd + +#define BUS_WIDTH_32 0 +#define BUS_WIDTH_64 1 +#define BUS_WIDTH_128 2 +#define BUS_WIDTH_256 3 + +#define g2_strm_swap G2_DEC_REG(2, 28, 0xf) +#define g2_strm_swap_old G2_DEC_REG(2, 27, 0x1f) +#define g2_pic_swap G2_DEC_REG(2, 22, 0x1f) +#define g2_dirmv_swap G2_DEC_REG(2, 20, 0xf) +#define g2_dirmv_swap_old G2_DEC_REG(2, 17, 0x1f) +#define g2_tab0_swap_old G2_DEC_REG(2, 12, 0x1f) +#define g2_tab1_swap_old G2_DEC_REG(2, 7, 0x1f) +#define g2_tab2_swap_old G2_DEC_REG(2, 2, 0x1f) + +#define g2_mode G2_DEC_REG(3, 27, 0x1f) +#define g2_compress_swap G2_DEC_REG(3, 20, 0xf) +#define g2_ref_compress_bypass G2_DEC_REG(3, 17, 0x1) +#define g2_out_rs_e G2_DEC_REG(3, 16, 0x1) +#define g2_out_dis G2_DEC_REG(3, 15, 0x1) +#define g2_out_filtering_dis G2_DEC_REG(3, 14, 0x1) +#define g2_write_mvs_e G2_DEC_REG(3, 12, 0x1) +#define g2_tab3_swap_old G2_DEC_REG(3, 7, 0x1f) +#define g2_rscan_swap G2_DEC_REG(3, 2, 0x1f) + +#define g2_pic_width_in_cbs G2_DEC_REG(4, 19, 0x1fff) +#define g2_pic_height_in_cbs G2_DEC_REG(4, 6, 0x1fff) +#define g2_num_ref_frames G2_DEC_REG(4, 0, 0x1f) + +#define g2_start_bit G2_DEC_REG(5, 25, 0x7f) +#define g2_scaling_list_e G2_DEC_REG(5, 24, 0x1) +#define g2_cb_qp_offset G2_DEC_REG(5, 19, 0x1f) +#define g2_cr_qp_offset G2_DEC_REG(5, 14, 0x1f) +#define g2_sign_data_hide G2_DEC_REG(5, 12, 0x1) +#define g2_tempor_mvp_e G2_DEC_REG(5, 11, 0x1) +#define g2_max_cu_qpd_depth G2_DEC_REG(5, 5, 0x3f) +#define g2_cu_qpd_e G2_DEC_REG(5, 4, 0x1) +#define g2_pix_shift G2_DEC_REG(5, 0, 0xf) + +#define g2_stream_len G2_DEC_REG(6, 0, 0xffffffff) + +#define g2_cabac_init_present G2_DEC_REG(7, 31, 0x1) +#define g2_weight_pred_e G2_DEC_REG(7, 28, 0x1) +#define g2_weight_bipr_idc G2_DEC_REG(7, 26, 0x3) +#define g2_filter_over_slices G2_DEC_REG(7, 25, 0x1) +#define g2_filter_over_tiles G2_DEC_REG(7, 24, 0x1) +#define g2_asym_pred_e G2_DEC_REG(7, 23, 0x1) +#define g2_sao_e G2_DEC_REG(7, 22, 0x1) +#define g2_pcm_filt_d G2_DEC_REG(7, 21, 0x1) +#define g2_slice_chqp_present G2_DEC_REG(7, 20, 0x1) +#define g2_dependent_slice G2_DEC_REG(7, 19, 0x1) +#define g2_filter_override G2_DEC_REG(7, 18, 0x1) +#define g2_strong_smooth_e G2_DEC_REG(7, 17, 0x1) +#define g2_filt_offset_beta G2_DEC_REG(7, 12, 0x1f) +#define g2_filt_offset_tc G2_DEC_REG(7, 7, 0x1f) +#define g2_slice_hdr_ext_e G2_DEC_REG(7, 6, 0x1) +#define g2_slice_hdr_ext_bits G2_DEC_REG(7, 3, 0x7) + +#define g2_const_intra_e G2_DEC_REG(8, 31, 0x1) +#define g2_filt_ctrl_pres G2_DEC_REG(8, 30, 0x1) +#define g2_bit_depth_y G2_DEC_REG(8, 21, 0xf) +#define g2_bit_depth_c G2_DEC_REG(8, 17, 0xf) +#define g2_idr_pic_e G2_DEC_REG(8, 16, 0x1) +#define g2_bit_depth_pcm_y G2_DEC_REG(8, 12, 0xf) +#define g2_bit_depth_pcm_c G2_DEC_REG(8, 8, 0xf) +#define g2_bit_depth_y_minus8 G2_DEC_REG(8, 6, 0x3) +#define g2_bit_depth_c_minus8 G2_DEC_REG(8, 4, 0x3) +#define g2_rs_out_bit_depth G2_DEC_REG(8, 4, 0xf) +#define g2_output_8_bits G2_DEC_REG(8, 3, 0x1) +#define g2_output_format G2_DEC_REG(8, 0, 0x7) +#define g2_pp_pix_shift G2_DEC_REG(8, 0, 0xf) + +#define g2_refidx1_active G2_DEC_REG(9, 19, 0x1f) +#define g2_refidx0_active G2_DEC_REG(9, 14, 0x1f) +#define g2_hdr_skip_length G2_DEC_REG(9, 0, 0x3fff) + +#define g2_start_code_e G2_DEC_REG(10, 31, 0x1) +#define g2_init_qp_old G2_DEC_REG(10, 25, 0x3f) +#define g2_init_qp G2_DEC_REG(10, 24, 0x7f) +#define g2_num_tile_cols_old G2_DEC_REG(10, 20, 0x1f) +#define g2_num_tile_cols G2_DEC_REG(10, 19, 0x1f) +#define g2_num_tile_rows_old G2_DEC_REG(10, 15, 0x1f) +#define g2_num_tile_rows G2_DEC_REG(10, 14, 0x1f) +#define g2_tile_e G2_DEC_REG(10, 1, 0x1) +#define g2_entropy_sync_e G2_DEC_REG(10, 0, 0x1) + +#define vp9_transform_mode G2_DEC_REG(11, 27, 0x7) +#define vp9_filt_sharpness G2_DEC_REG(11, 21, 0x7) +#define vp9_mcomp_filt_type G2_DEC_REG(11, 8, 0x7) +#define vp9_high_prec_mv_e G2_DEC_REG(11, 7, 0x1) +#define vp9_comp_pred_mode G2_DEC_REG(11, 4, 0x3) +#define vp9_gref_sign_bias G2_DEC_REG(11, 2, 0x1) +#define vp9_aref_sign_bias G2_DEC_REG(11, 0, 0x1) + +#define g2_refer_lterm_e G2_DEC_REG(12, 16, 0xffff) +#define g2_min_cb_size G2_DEC_REG(12, 13, 0x7) +#define g2_max_cb_size G2_DEC_REG(12, 10, 0x7) +#define g2_min_pcm_size G2_DEC_REG(12, 7, 0x7) +#define g2_max_pcm_size G2_DEC_REG(12, 4, 0x7) +#define g2_pcm_e G2_DEC_REG(12, 3, 0x1) +#define g2_transform_skip G2_DEC_REG(12, 2, 0x1) +#define g2_transq_bypass G2_DEC_REG(12, 1, 0x1) +#define g2_list_mod_e G2_DEC_REG(12, 0, 0x1) + +#define hevc_min_trb_size G2_DEC_REG(13, 13, 0x7) +#define hevc_max_trb_size G2_DEC_REG(13, 10, 0x7) +#define hevc_max_intra_hierdepth G2_DEC_REG(13, 7, 0x7) +#define hevc_max_inter_hierdepth G2_DEC_REG(13, 4, 0x7) +#define hevc_parallel_merge G2_DEC_REG(13, 0, 0xf) + +#define hevc_rlist_f0 G2_DEC_REG(14, 0, 0x1f) +#define hevc_rlist_f1 G2_DEC_REG(14, 10, 0x1f) +#define hevc_rlist_f2 G2_DEC_REG(14, 20, 0x1f) +#define hevc_rlist_b0 G2_DEC_REG(14, 5, 0x1f) +#define hevc_rlist_b1 G2_DEC_REG(14, 15, 0x1f) +#define hevc_rlist_b2 G2_DEC_REG(14, 25, 0x1f) + +#define hevc_rlist_f3 G2_DEC_REG(15, 0, 0x1f) +#define hevc_rlist_f4 G2_DEC_REG(15, 10, 0x1f) +#define hevc_rlist_f5 G2_DEC_REG(15, 20, 0x1f) +#define hevc_rlist_b3 G2_DEC_REG(15, 5, 0x1f) +#define hevc_rlist_b4 G2_DEC_REG(15, 15, 0x1f) +#define hevc_rlist_b5 G2_DEC_REG(15, 25, 0x1f) + +#define hevc_rlist_f6 G2_DEC_REG(16, 0, 0x1f) +#define hevc_rlist_f7 G2_DEC_REG(16, 10, 0x1f) +#define hevc_rlist_f8 G2_DEC_REG(16, 20, 0x1f) +#define hevc_rlist_b6 G2_DEC_REG(16, 5, 0x1f) +#define hevc_rlist_b7 G2_DEC_REG(16, 15, 0x1f) +#define hevc_rlist_b8 G2_DEC_REG(16, 25, 0x1f) + +#define hevc_rlist_f9 G2_DEC_REG(17, 0, 0x1f) +#define hevc_rlist_f10 G2_DEC_REG(17, 10, 0x1f) +#define hevc_rlist_f11 G2_DEC_REG(17, 20, 0x1f) +#define hevc_rlist_b9 G2_DEC_REG(17, 5, 0x1f) +#define hevc_rlist_b10 G2_DEC_REG(17, 15, 0x1f) +#define hevc_rlist_b11 G2_DEC_REG(17, 25, 0x1f) + +#define hevc_rlist_f12 G2_DEC_REG(18, 0, 0x1f) +#define hevc_rlist_f13 G2_DEC_REG(18, 10, 0x1f) +#define hevc_rlist_f14 G2_DEC_REG(18, 20, 0x1f) +#define hevc_rlist_b12 G2_DEC_REG(18, 5, 0x1f) +#define hevc_rlist_b13 G2_DEC_REG(18, 15, 0x1f) +#define hevc_rlist_b14 G2_DEC_REG(18, 25, 0x1f) + +#define hevc_rlist_f15 G2_DEC_REG(19, 0, 0x1f) +#define hevc_rlist_b15 G2_DEC_REG(19, 5, 0x1f) + +#define g2_partial_ctb_x G2_DEC_REG(20, 31, 0x1) +#define g2_partial_ctb_y G2_DEC_REG(20, 30, 0x1) +#define g2_pic_width_4x4 G2_DEC_REG(20, 16, 0xfff) +#define g2_pic_height_4x4 G2_DEC_REG(20, 0, 0xfff) + +#define vp9_qp_delta_y_dc G2_DEC_REG(13, 23, 0x3f) +#define vp9_qp_delta_ch_dc G2_DEC_REG(13, 17, 0x3f) +#define vp9_qp_delta_ch_ac G2_DEC_REG(13, 11, 0x3f) +#define vp9_last_sign_bias G2_DEC_REG(13, 10, 0x1) +#define vp9_lossless_e G2_DEC_REG(13, 9, 0x1) +#define vp9_comp_pred_var_ref1 G2_DEC_REG(13, 7, 0x3) +#define vp9_comp_pred_var_ref0 G2_DEC_REG(13, 5, 0x3) +#define vp9_comp_pred_fixed_ref G2_DEC_REG(13, 3, 0x3) +#define vp9_segment_temp_upd_e G2_DEC_REG(13, 2, 0x1) +#define vp9_segment_upd_e G2_DEC_REG(13, 1, 0x1) +#define vp9_segment_e G2_DEC_REG(13, 0, 0x1) + +#define vp9_filt_level G2_DEC_REG(14, 18, 0x3f) +#define vp9_refpic_seg0 G2_DEC_REG(14, 15, 0x7) +#define vp9_skip_seg0 G2_DEC_REG(14, 14, 0x1) +#define vp9_filt_level_seg0 G2_DEC_REG(14, 8, 0x3f) +#define vp9_quant_seg0 G2_DEC_REG(14, 0, 0xff) + +#define vp9_refpic_seg1 G2_DEC_REG(15, 15, 0x7) +#define vp9_skip_seg1 G2_DEC_REG(15, 14, 0x1) +#define vp9_filt_level_seg1 G2_DEC_REG(15, 8, 0x3f) +#define vp9_quant_seg1 G2_DEC_REG(15, 0, 0xff) + +#define vp9_refpic_seg2 G2_DEC_REG(16, 15, 0x7) +#define vp9_skip_seg2 G2_DEC_REG(16, 14, 0x1) +#define vp9_filt_level_seg2 G2_DEC_REG(16, 8, 0x3f) +#define vp9_quant_seg2 G2_DEC_REG(16, 0, 0xff) + +#define vp9_refpic_seg3 G2_DEC_REG(17, 15, 0x7) +#define vp9_skip_seg3 G2_DEC_REG(17, 14, 0x1) +#define vp9_filt_level_seg3 G2_DEC_REG(17, 8, 0x3f) +#define vp9_quant_seg3 G2_DEC_REG(17, 0, 0xff) + +#define vp9_refpic_seg4 G2_DEC_REG(18, 15, 0x7) +#define vp9_skip_seg4 G2_DEC_REG(18, 14, 0x1) +#define vp9_filt_level_seg4 G2_DEC_REG(18, 8, 0x3f) +#define vp9_quant_seg4 G2_DEC_REG(18, 0, 0xff) + +#define vp9_refpic_seg5 G2_DEC_REG(19, 15, 0x7) +#define vp9_skip_seg5 G2_DEC_REG(19, 14, 0x1) +#define vp9_filt_level_seg5 G2_DEC_REG(19, 8, 0x3f) +#define vp9_quant_seg5 G2_DEC_REG(19, 0, 0xff) + +#define hevc_cur_poc_00 G2_DEC_REG(46, 24, 0xff) +#define hevc_cur_poc_01 G2_DEC_REG(46, 16, 0xff) +#define hevc_cur_poc_02 G2_DEC_REG(46, 8, 0xff) +#define hevc_cur_poc_03 G2_DEC_REG(46, 0, 0xff) + +#define hevc_cur_poc_04 G2_DEC_REG(47, 24, 0xff) +#define hevc_cur_poc_05 G2_DEC_REG(47, 16, 0xff) +#define hevc_cur_poc_06 G2_DEC_REG(47, 8, 0xff) +#define hevc_cur_poc_07 G2_DEC_REG(47, 0, 0xff) + +#define hevc_cur_poc_08 G2_DEC_REG(48, 24, 0xff) +#define hevc_cur_poc_09 G2_DEC_REG(48, 16, 0xff) +#define hevc_cur_poc_10 G2_DEC_REG(48, 8, 0xff) +#define hevc_cur_poc_11 G2_DEC_REG(48, 0, 0xff) + +#define hevc_cur_poc_12 G2_DEC_REG(49, 24, 0xff) +#define hevc_cur_poc_13 G2_DEC_REG(49, 16, 0xff) +#define hevc_cur_poc_14 G2_DEC_REG(49, 8, 0xff) +#define hevc_cur_poc_15 G2_DEC_REG(49, 0, 0xff) + +#define vp9_refpic_seg6 G2_DEC_REG(31, 15, 0x7) +#define vp9_skip_seg6 G2_DEC_REG(31, 14, 0x1) +#define vp9_filt_level_seg6 G2_DEC_REG(31, 8, 0x3f) +#define vp9_quant_seg6 G2_DEC_REG(31, 0, 0xff) + +#define vp9_refpic_seg7 G2_DEC_REG(32, 15, 0x7) +#define vp9_skip_seg7 G2_DEC_REG(32, 14, 0x1) +#define vp9_filt_level_seg7 G2_DEC_REG(32, 8, 0x3f) +#define vp9_quant_seg7 G2_DEC_REG(32, 0, 0xff) + +#define vp9_lref_width G2_DEC_REG(33, 16, 0xffff) +#define vp9_lref_height G2_DEC_REG(33, 0, 0xffff) + +#define vp9_gref_width G2_DEC_REG(34, 16, 0xffff) +#define vp9_gref_height G2_DEC_REG(34, 0, 0xffff) + +#define vp9_aref_width G2_DEC_REG(35, 16, 0xffff) +#define vp9_aref_height G2_DEC_REG(35, 0, 0xffff) + +#define vp9_lref_hor_scale G2_DEC_REG(36, 16, 0xffff) +#define vp9_lref_ver_scale G2_DEC_REG(36, 0, 0xffff) + +#define vp9_gref_hor_scale G2_DEC_REG(37, 16, 0xffff) +#define vp9_gref_ver_scale G2_DEC_REG(37, 0, 0xffff) + +#define vp9_aref_hor_scale G2_DEC_REG(38, 16, 0xffff) +#define vp9_aref_ver_scale G2_DEC_REG(38, 0, 0xffff) + +#define vp9_filt_ref_adj_0 G2_DEC_REG(46, 24, 0x7f) +#define vp9_filt_ref_adj_1 G2_DEC_REG(46, 16, 0x7f) +#define vp9_filt_ref_adj_2 G2_DEC_REG(46, 8, 0x7f) +#define vp9_filt_ref_adj_3 G2_DEC_REG(46, 0, 0x7f) + +#define vp9_filt_mb_adj_0 G2_DEC_REG(47, 24, 0x7f) +#define vp9_filt_mb_adj_1 G2_DEC_REG(47, 16, 0x7f) +#define vp9_filt_mb_adj_2 G2_DEC_REG(47, 8, 0x7f) +#define vp9_filt_mb_adj_3 G2_DEC_REG(47, 0, 0x7f) + +#define g2_apf_threshold G2_DEC_REG(55, 0, 0xffff) + +#define g2_clk_gate_e G2_DEC_REG(58, 16, 0x1) +#define g2_double_buffer_e G2_DEC_REG(58, 15, 0x1) +#define g2_buswidth G2_DEC_REG(58, 8, 0x7) +#define g2_max_burst G2_DEC_REG(58, 0, 0xff) + +#define g2_down_scale_e G2_DEC_REG(184, 7, 0x1) +#define g2_down_scale_y G2_DEC_REG(184, 2, 0x3) +#define g2_down_scale_x G2_DEC_REG(184, 0, 0x3) + +#define G2_REG_CONFIG G2_SWREG(58) +#define G2_REG_CONFIG_DEC_CLK_GATE_E BIT(16) +#define G2_REG_CONFIG_DEC_CLK_GATE_IDLE_E BIT(17) + +#define G2_OUT_LUMA_ADDR (G2_SWREG(65)) +#define G2_REF_LUMA_ADDR(i) (G2_SWREG(67) + ((i) * 0x8)) +#define G2_VP9_SEGMENT_WRITE_ADDR (G2_SWREG(79)) +#define G2_VP9_SEGMENT_READ_ADDR (G2_SWREG(81)) +#define G2_OUT_CHROMA_ADDR (G2_SWREG(99)) +#define G2_REF_CHROMA_ADDR(i) (G2_SWREG(101) + ((i) * 0x8)) +#define G2_OUT_MV_ADDR (G2_SWREG(133)) +#define G2_REF_MV_ADDR(i) (G2_SWREG(135) + ((i) * 0x8)) +#define G2_TILE_SIZES_ADDR (G2_SWREG(167)) +#define G2_STREAM_ADDR (G2_SWREG(169)) +#define G2_HEVC_SCALING_LIST_ADDR (G2_SWREG(171)) +#define G2_VP9_CTX_COUNT_ADDR (G2_SWREG(171)) +#define G2_VP9_PROBS_ADDR (G2_SWREG(173)) +#define G2_RS_OUT_LUMA_ADDR (G2_SWREG(175)) +#define G2_RS_OUT_CHROMA_ADDR (G2_SWREG(177)) +#define G2_TILE_FILTER_ADDR (G2_SWREG(179)) +#define G2_TILE_SAO_ADDR (G2_SWREG(181)) +#define G2_TILE_BSD_ADDR (G2_SWREG(183)) +#define G2_DS_DST (G2_SWREG(186)) +#define G2_DS_DST_CHR (G2_SWREG(188)) + +#define g2_strm_buffer_len G2_DEC_REG(258, 0, 0xffffffff) +#define g2_strm_start_offset G2_DEC_REG(259, 0, 0xffffffff) + +#endif diff --git a/drivers/media/platform/verisilicon/hantro_g2_vp9_dec.c b/drivers/media/platform/verisilicon/hantro_g2_vp9_dec.c new file mode 100644 index 000000000..6fc4b5555 --- /dev/null +++ b/drivers/media/platform/verisilicon/hantro_g2_vp9_dec.c @@ -0,0 +1,1014 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hantro VP9 codec driver + * + * Copyright (C) 2021 Collabora Ltd. + */ +#include "media/videobuf2-core.h" +#include "media/videobuf2-dma-contig.h" +#include "media/videobuf2-v4l2.h" +#include <linux/kernel.h> +#include <linux/vmalloc.h> +#include <media/v4l2-mem2mem.h> +#include <media/v4l2-vp9.h> + +#include "hantro.h" +#include "hantro_vp9.h" +#include "hantro_g2_regs.h" + +#define G2_ALIGN 16 + +enum hantro_ref_frames { + INTRA_FRAME = 0, + LAST_FRAME = 1, + GOLDEN_FRAME = 2, + ALTREF_FRAME = 3, + MAX_REF_FRAMES = 4 +}; + +static int start_prepare_run(struct hantro_ctx *ctx, const struct v4l2_ctrl_vp9_frame **dec_params) +{ + const struct v4l2_ctrl_vp9_compressed_hdr *prob_updates; + struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec; + struct v4l2_ctrl *ctrl; + unsigned int fctx_idx; + + /* v4l2-specific stuff */ + hantro_start_prepare_run(ctx); + + ctrl = v4l2_ctrl_find(&ctx->ctrl_handler, V4L2_CID_STATELESS_VP9_FRAME); + if (WARN_ON(!ctrl)) + return -EINVAL; + *dec_params = ctrl->p_cur.p; + + ctrl = v4l2_ctrl_find(&ctx->ctrl_handler, V4L2_CID_STATELESS_VP9_COMPRESSED_HDR); + if (WARN_ON(!ctrl)) + return -EINVAL; + prob_updates = ctrl->p_cur.p; + vp9_ctx->cur.tx_mode = prob_updates->tx_mode; + + /* + * vp9 stuff + * + * by this point the userspace has done all parts of 6.2 uncompressed_header() + * except this fragment: + * if ( FrameIsIntra || error_resilient_mode ) { + * setup_past_independence ( ) + * if ( frame_type == KEY_FRAME || error_resilient_mode == 1 || + * reset_frame_context == 3 ) { + * for ( i = 0; i < 4; i ++ ) { + * save_probs( i ) + * } + * } else if ( reset_frame_context == 2 ) { + * save_probs( frame_context_idx ) + * } + * frame_context_idx = 0 + * } + */ + fctx_idx = v4l2_vp9_reset_frame_ctx(*dec_params, vp9_ctx->frame_context); + vp9_ctx->cur.frame_context_idx = fctx_idx; + + /* 6.1 frame(sz): load_probs() and load_probs2() */ + vp9_ctx->probability_tables = vp9_ctx->frame_context[fctx_idx]; + + /* + * The userspace has also performed 6.3 compressed_header(), but handling the + * probs in a special way. All probs which need updating, except MV-related, + * have been read from the bitstream and translated through inv_map_table[], + * but no 6.3.6 inv_recenter_nonneg(v, m) has been performed. The values passed + * by userspace are either translated values (there are no 0 values in + * inv_map_table[]), or zero to indicate no update. All MV-related probs which need + * updating have been read from the bitstream and (mv_prob << 1) | 1 has been + * performed. The values passed by userspace are either new values + * to replace old ones (the above mentioned shift and bitwise or never result in + * a zero) or zero to indicate no update. + * fw_update_probs() performs actual probs updates or leaves probs as-is + * for values for which a zero was passed from userspace. + */ + v4l2_vp9_fw_update_probs(&vp9_ctx->probability_tables, prob_updates, *dec_params); + + return 0; +} + +static size_t chroma_offset(const struct hantro_ctx *ctx, + const struct v4l2_ctrl_vp9_frame *dec_params) +{ + int bytes_per_pixel = dec_params->bit_depth == 8 ? 1 : 2; + + return ctx->src_fmt.width * ctx->src_fmt.height * bytes_per_pixel; +} + +static size_t mv_offset(const struct hantro_ctx *ctx, + const struct v4l2_ctrl_vp9_frame *dec_params) +{ + size_t cr_offset = chroma_offset(ctx, dec_params); + + return ALIGN((cr_offset * 3) / 2, G2_ALIGN); +} + +static struct hantro_decoded_buffer * +get_ref_buf(struct hantro_ctx *ctx, struct vb2_v4l2_buffer *dst, u64 timestamp) +{ + struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx; + struct vb2_queue *cap_q = &m2m_ctx->cap_q_ctx.q; + struct vb2_buffer *buf; + + /* + * If a ref is unused or invalid, address of current destination + * buffer is returned. + */ + buf = vb2_find_buffer(cap_q, timestamp); + if (!buf) + buf = &dst->vb2_buf; + + return vb2_to_hantro_decoded_buf(buf); +} + +static void update_dec_buf_info(struct hantro_decoded_buffer *buf, + const struct v4l2_ctrl_vp9_frame *dec_params) +{ + buf->vp9.width = dec_params->frame_width_minus_1 + 1; + buf->vp9.height = dec_params->frame_height_minus_1 + 1; + buf->vp9.bit_depth = dec_params->bit_depth; +} + +static void update_ctx_cur_info(struct hantro_vp9_dec_hw_ctx *vp9_ctx, + struct hantro_decoded_buffer *buf, + const struct v4l2_ctrl_vp9_frame *dec_params) +{ + vp9_ctx->cur.valid = true; + vp9_ctx->cur.reference_mode = dec_params->reference_mode; + vp9_ctx->cur.interpolation_filter = dec_params->interpolation_filter; + vp9_ctx->cur.flags = dec_params->flags; + vp9_ctx->cur.timestamp = buf->base.vb.vb2_buf.timestamp; +} + +static void config_output(struct hantro_ctx *ctx, + struct hantro_decoded_buffer *dst, + const struct v4l2_ctrl_vp9_frame *dec_params) +{ + dma_addr_t luma_addr, chroma_addr, mv_addr; + + hantro_reg_write(ctx->dev, &g2_out_dis, 0); + if (!ctx->dev->variant->legacy_regs) + hantro_reg_write(ctx->dev, &g2_output_format, 0); + + luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf); + hantro_write_addr(ctx->dev, G2_OUT_LUMA_ADDR, luma_addr); + + chroma_addr = luma_addr + chroma_offset(ctx, dec_params); + hantro_write_addr(ctx->dev, G2_OUT_CHROMA_ADDR, chroma_addr); + + mv_addr = luma_addr + mv_offset(ctx, dec_params); + hantro_write_addr(ctx->dev, G2_OUT_MV_ADDR, mv_addr); +} + +struct hantro_vp9_ref_reg { + const struct hantro_reg width; + const struct hantro_reg height; + const struct hantro_reg hor_scale; + const struct hantro_reg ver_scale; + u32 y_base; + u32 c_base; +}; + +static void config_ref(struct hantro_ctx *ctx, + struct hantro_decoded_buffer *dst, + const struct hantro_vp9_ref_reg *ref_reg, + const struct v4l2_ctrl_vp9_frame *dec_params, + u64 ref_ts) +{ + struct hantro_decoded_buffer *buf; + dma_addr_t luma_addr, chroma_addr; + u32 refw, refh; + + buf = get_ref_buf(ctx, &dst->base.vb, ref_ts); + refw = buf->vp9.width; + refh = buf->vp9.height; + + hantro_reg_write(ctx->dev, &ref_reg->width, refw); + hantro_reg_write(ctx->dev, &ref_reg->height, refh); + + hantro_reg_write(ctx->dev, &ref_reg->hor_scale, (refw << 14) / dst->vp9.width); + hantro_reg_write(ctx->dev, &ref_reg->ver_scale, (refh << 14) / dst->vp9.height); + + luma_addr = hantro_get_dec_buf_addr(ctx, &buf->base.vb.vb2_buf); + hantro_write_addr(ctx->dev, ref_reg->y_base, luma_addr); + + chroma_addr = luma_addr + chroma_offset(ctx, dec_params); + hantro_write_addr(ctx->dev, ref_reg->c_base, chroma_addr); +} + +static void config_ref_registers(struct hantro_ctx *ctx, + const struct v4l2_ctrl_vp9_frame *dec_params, + struct hantro_decoded_buffer *dst, + struct hantro_decoded_buffer *mv_ref) +{ + static const struct hantro_vp9_ref_reg ref_regs[] = { + { + /* Last */ + .width = vp9_lref_width, + .height = vp9_lref_height, + .hor_scale = vp9_lref_hor_scale, + .ver_scale = vp9_lref_ver_scale, + .y_base = G2_REF_LUMA_ADDR(0), + .c_base = G2_REF_CHROMA_ADDR(0), + }, { + /* Golden */ + .width = vp9_gref_width, + .height = vp9_gref_height, + .hor_scale = vp9_gref_hor_scale, + .ver_scale = vp9_gref_ver_scale, + .y_base = G2_REF_LUMA_ADDR(4), + .c_base = G2_REF_CHROMA_ADDR(4), + }, { + /* Altref */ + .width = vp9_aref_width, + .height = vp9_aref_height, + .hor_scale = vp9_aref_hor_scale, + .ver_scale = vp9_aref_ver_scale, + .y_base = G2_REF_LUMA_ADDR(5), + .c_base = G2_REF_CHROMA_ADDR(5), + }, + }; + dma_addr_t mv_addr; + + config_ref(ctx, dst, &ref_regs[0], dec_params, dec_params->last_frame_ts); + config_ref(ctx, dst, &ref_regs[1], dec_params, dec_params->golden_frame_ts); + config_ref(ctx, dst, &ref_regs[2], dec_params, dec_params->alt_frame_ts); + + mv_addr = hantro_get_dec_buf_addr(ctx, &mv_ref->base.vb.vb2_buf) + + mv_offset(ctx, dec_params); + hantro_write_addr(ctx->dev, G2_REF_MV_ADDR(0), mv_addr); + + hantro_reg_write(ctx->dev, &vp9_last_sign_bias, + dec_params->ref_frame_sign_bias & V4L2_VP9_SIGN_BIAS_LAST ? 1 : 0); + + hantro_reg_write(ctx->dev, &vp9_gref_sign_bias, + dec_params->ref_frame_sign_bias & V4L2_VP9_SIGN_BIAS_GOLDEN ? 1 : 0); + + hantro_reg_write(ctx->dev, &vp9_aref_sign_bias, + dec_params->ref_frame_sign_bias & V4L2_VP9_SIGN_BIAS_ALT ? 1 : 0); +} + +static void recompute_tile_info(unsigned short *tile_info, unsigned int tiles, unsigned int sbs) +{ + int i; + unsigned int accumulated = 0; + unsigned int next_accumulated; + + for (i = 1; i <= tiles; ++i) { + next_accumulated = i * sbs / tiles; + *tile_info++ = next_accumulated - accumulated; + accumulated = next_accumulated; + } +} + +static void +recompute_tile_rc_info(struct hantro_ctx *ctx, + unsigned int tile_r, unsigned int tile_c, + unsigned int sbs_r, unsigned int sbs_c) +{ + struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec; + + recompute_tile_info(vp9_ctx->tile_r_info, tile_r, sbs_r); + recompute_tile_info(vp9_ctx->tile_c_info, tile_c, sbs_c); + + vp9_ctx->last_tile_r = tile_r; + vp9_ctx->last_tile_c = tile_c; + vp9_ctx->last_sbs_r = sbs_r; + vp9_ctx->last_sbs_c = sbs_c; +} + +static inline unsigned int first_tile_row(unsigned int tile_r, unsigned int sbs_r) +{ + if (tile_r == sbs_r + 1) + return 1; + + if (tile_r == sbs_r + 2) + return 2; + + return 0; +} + +static void +fill_tile_info(struct hantro_ctx *ctx, + unsigned int tile_r, unsigned int tile_c, + unsigned int sbs_r, unsigned int sbs_c, + unsigned short *tile_mem) +{ + struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec; + unsigned int i, j; + bool first = true; + + for (i = first_tile_row(tile_r, sbs_r); i < tile_r; ++i) { + unsigned short r_info = vp9_ctx->tile_r_info[i]; + + if (first) { + if (i > 0) + r_info += vp9_ctx->tile_r_info[0]; + if (i == 2) + r_info += vp9_ctx->tile_r_info[1]; + first = false; + } + for (j = 0; j < tile_c; ++j) { + *tile_mem++ = vp9_ctx->tile_c_info[j]; + *tile_mem++ = r_info; + } + } +} + +static void +config_tiles(struct hantro_ctx *ctx, + const struct v4l2_ctrl_vp9_frame *dec_params, + struct hantro_decoded_buffer *dst) +{ + struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec; + struct hantro_aux_buf *misc = &vp9_ctx->misc; + struct hantro_aux_buf *tile_edge = &vp9_ctx->tile_edge; + dma_addr_t addr; + unsigned short *tile_mem; + unsigned int rows, cols; + + addr = misc->dma + vp9_ctx->tile_info_offset; + hantro_write_addr(ctx->dev, G2_TILE_SIZES_ADDR, addr); + + tile_mem = misc->cpu + vp9_ctx->tile_info_offset; + if (dec_params->tile_cols_log2 || dec_params->tile_rows_log2) { + unsigned int tile_r = (1 << dec_params->tile_rows_log2); + unsigned int tile_c = (1 << dec_params->tile_cols_log2); + unsigned int sbs_r = hantro_vp9_num_sbs(dst->vp9.height); + unsigned int sbs_c = hantro_vp9_num_sbs(dst->vp9.width); + + if (tile_r != vp9_ctx->last_tile_r || tile_c != vp9_ctx->last_tile_c || + sbs_r != vp9_ctx->last_sbs_r || sbs_c != vp9_ctx->last_sbs_c) + recompute_tile_rc_info(ctx, tile_r, tile_c, sbs_r, sbs_c); + + fill_tile_info(ctx, tile_r, tile_c, sbs_r, sbs_c, tile_mem); + + cols = tile_c; + rows = tile_r; + hantro_reg_write(ctx->dev, &g2_tile_e, 1); + } else { + tile_mem[0] = hantro_vp9_num_sbs(dst->vp9.width); + tile_mem[1] = hantro_vp9_num_sbs(dst->vp9.height); + + cols = 1; + rows = 1; + hantro_reg_write(ctx->dev, &g2_tile_e, 0); + } + + if (ctx->dev->variant->legacy_regs) { + hantro_reg_write(ctx->dev, &g2_num_tile_cols_old, cols); + hantro_reg_write(ctx->dev, &g2_num_tile_rows_old, rows); + } else { + hantro_reg_write(ctx->dev, &g2_num_tile_cols, cols); + hantro_reg_write(ctx->dev, &g2_num_tile_rows, rows); + } + + /* provide aux buffers even if no tiles are used */ + addr = tile_edge->dma; + hantro_write_addr(ctx->dev, G2_TILE_FILTER_ADDR, addr); + + addr = tile_edge->dma + vp9_ctx->bsd_ctrl_offset; + hantro_write_addr(ctx->dev, G2_TILE_BSD_ADDR, addr); +} + +static void +update_feat_and_flag(struct hantro_vp9_dec_hw_ctx *vp9_ctx, + const struct v4l2_vp9_segmentation *seg, + unsigned int feature, + unsigned int segid) +{ + u8 mask = V4L2_VP9_SEGMENT_FEATURE_ENABLED(feature); + + vp9_ctx->feature_data[segid][feature] = seg->feature_data[segid][feature]; + vp9_ctx->feature_enabled[segid] &= ~mask; + vp9_ctx->feature_enabled[segid] |= (seg->feature_enabled[segid] & mask); +} + +static inline s16 clip3(s16 x, s16 y, s16 z) +{ + return (z < x) ? x : (z > y) ? y : z; +} + +static s16 feat_val_clip3(s16 feat_val, s16 feature_data, bool absolute, u8 clip) +{ + if (absolute) + return feature_data; + + return clip3(0, 255, feat_val + feature_data); +} + +static void config_segment(struct hantro_ctx *ctx, const struct v4l2_ctrl_vp9_frame *dec_params) +{ + struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec; + const struct v4l2_vp9_segmentation *seg; + s16 feat_val; + unsigned char feat_id; + unsigned int segid; + bool segment_enabled, absolute, update_data; + + static const struct hantro_reg seg_regs[8][V4L2_VP9_SEG_LVL_MAX] = { + { vp9_quant_seg0, vp9_filt_level_seg0, vp9_refpic_seg0, vp9_skip_seg0 }, + { vp9_quant_seg1, vp9_filt_level_seg1, vp9_refpic_seg1, vp9_skip_seg1 }, + { vp9_quant_seg2, vp9_filt_level_seg2, vp9_refpic_seg2, vp9_skip_seg2 }, + { vp9_quant_seg3, vp9_filt_level_seg3, vp9_refpic_seg3, vp9_skip_seg3 }, + { vp9_quant_seg4, vp9_filt_level_seg4, vp9_refpic_seg4, vp9_skip_seg4 }, + { vp9_quant_seg5, vp9_filt_level_seg5, vp9_refpic_seg5, vp9_skip_seg5 }, + { vp9_quant_seg6, vp9_filt_level_seg6, vp9_refpic_seg6, vp9_skip_seg6 }, + { vp9_quant_seg7, vp9_filt_level_seg7, vp9_refpic_seg7, vp9_skip_seg7 }, + }; + + segment_enabled = !!(dec_params->seg.flags & V4L2_VP9_SEGMENTATION_FLAG_ENABLED); + hantro_reg_write(ctx->dev, &vp9_segment_e, segment_enabled); + hantro_reg_write(ctx->dev, &vp9_segment_upd_e, + !!(dec_params->seg.flags & V4L2_VP9_SEGMENTATION_FLAG_UPDATE_MAP)); + hantro_reg_write(ctx->dev, &vp9_segment_temp_upd_e, + !!(dec_params->seg.flags & V4L2_VP9_SEGMENTATION_FLAG_TEMPORAL_UPDATE)); + + seg = &dec_params->seg; + absolute = !!(seg->flags & V4L2_VP9_SEGMENTATION_FLAG_ABS_OR_DELTA_UPDATE); + update_data = !!(seg->flags & V4L2_VP9_SEGMENTATION_FLAG_UPDATE_DATA); + + for (segid = 0; segid < 8; ++segid) { + /* Quantizer segment feature */ + feat_id = V4L2_VP9_SEG_LVL_ALT_Q; + feat_val = dec_params->quant.base_q_idx; + if (segment_enabled) { + if (update_data) + update_feat_and_flag(vp9_ctx, seg, feat_id, segid); + if (v4l2_vp9_seg_feat_enabled(vp9_ctx->feature_enabled, feat_id, segid)) + feat_val = feat_val_clip3(feat_val, + vp9_ctx->feature_data[segid][feat_id], + absolute, 255); + } + hantro_reg_write(ctx->dev, &seg_regs[segid][feat_id], feat_val); + + /* Loop filter segment feature */ + feat_id = V4L2_VP9_SEG_LVL_ALT_L; + feat_val = dec_params->lf.level; + if (segment_enabled) { + if (update_data) + update_feat_and_flag(vp9_ctx, seg, feat_id, segid); + if (v4l2_vp9_seg_feat_enabled(vp9_ctx->feature_enabled, feat_id, segid)) + feat_val = feat_val_clip3(feat_val, + vp9_ctx->feature_data[segid][feat_id], + absolute, 63); + } + hantro_reg_write(ctx->dev, &seg_regs[segid][feat_id], feat_val); + + /* Reference frame segment feature */ + feat_id = V4L2_VP9_SEG_LVL_REF_FRAME; + feat_val = 0; + if (segment_enabled) { + if (update_data) + update_feat_and_flag(vp9_ctx, seg, feat_id, segid); + if (!(dec_params->flags & V4L2_VP9_FRAME_FLAG_KEY_FRAME) && + v4l2_vp9_seg_feat_enabled(vp9_ctx->feature_enabled, feat_id, segid)) + feat_val = vp9_ctx->feature_data[segid][feat_id] + 1; + } + hantro_reg_write(ctx->dev, &seg_regs[segid][feat_id], feat_val); + + /* Skip segment feature */ + feat_id = V4L2_VP9_SEG_LVL_SKIP; + feat_val = 0; + if (segment_enabled) { + if (update_data) + update_feat_and_flag(vp9_ctx, seg, feat_id, segid); + feat_val = v4l2_vp9_seg_feat_enabled(vp9_ctx->feature_enabled, + feat_id, segid) ? 1 : 0; + } + hantro_reg_write(ctx->dev, &seg_regs[segid][feat_id], feat_val); + } +} + +static void config_loop_filter(struct hantro_ctx *ctx, const struct v4l2_ctrl_vp9_frame *dec_params) +{ + bool d = dec_params->lf.flags & V4L2_VP9_LOOP_FILTER_FLAG_DELTA_ENABLED; + + hantro_reg_write(ctx->dev, &vp9_filt_level, dec_params->lf.level); + hantro_reg_write(ctx->dev, &g2_out_filtering_dis, dec_params->lf.level == 0); + hantro_reg_write(ctx->dev, &vp9_filt_sharpness, dec_params->lf.sharpness); + + hantro_reg_write(ctx->dev, &vp9_filt_ref_adj_0, d ? dec_params->lf.ref_deltas[0] : 0); + hantro_reg_write(ctx->dev, &vp9_filt_ref_adj_1, d ? dec_params->lf.ref_deltas[1] : 0); + hantro_reg_write(ctx->dev, &vp9_filt_ref_adj_2, d ? dec_params->lf.ref_deltas[2] : 0); + hantro_reg_write(ctx->dev, &vp9_filt_ref_adj_3, d ? dec_params->lf.ref_deltas[3] : 0); + hantro_reg_write(ctx->dev, &vp9_filt_mb_adj_0, d ? dec_params->lf.mode_deltas[0] : 0); + hantro_reg_write(ctx->dev, &vp9_filt_mb_adj_1, d ? dec_params->lf.mode_deltas[1] : 0); +} + +static void config_picture_dimensions(struct hantro_ctx *ctx, struct hantro_decoded_buffer *dst) +{ + u32 pic_w_4x4, pic_h_4x4; + + hantro_reg_write(ctx->dev, &g2_pic_width_in_cbs, (dst->vp9.width + 7) / 8); + hantro_reg_write(ctx->dev, &g2_pic_height_in_cbs, (dst->vp9.height + 7) / 8); + pic_w_4x4 = roundup(dst->vp9.width, 8) >> 2; + pic_h_4x4 = roundup(dst->vp9.height, 8) >> 2; + hantro_reg_write(ctx->dev, &g2_pic_width_4x4, pic_w_4x4); + hantro_reg_write(ctx->dev, &g2_pic_height_4x4, pic_h_4x4); +} + +static void +config_bit_depth(struct hantro_ctx *ctx, const struct v4l2_ctrl_vp9_frame *dec_params) +{ + if (ctx->dev->variant->legacy_regs) { + hantro_reg_write(ctx->dev, &g2_bit_depth_y, dec_params->bit_depth); + hantro_reg_write(ctx->dev, &g2_bit_depth_c, dec_params->bit_depth); + hantro_reg_write(ctx->dev, &g2_pix_shift, 0); + } else { + hantro_reg_write(ctx->dev, &g2_bit_depth_y_minus8, dec_params->bit_depth - 8); + hantro_reg_write(ctx->dev, &g2_bit_depth_c_minus8, dec_params->bit_depth - 8); + } +} + +static inline bool is_lossless(const struct v4l2_vp9_quantization *quant) +{ + return quant->base_q_idx == 0 && quant->delta_q_uv_ac == 0 && + quant->delta_q_uv_dc == 0 && quant->delta_q_y_dc == 0; +} + +static void +config_quant(struct hantro_ctx *ctx, const struct v4l2_ctrl_vp9_frame *dec_params) +{ + hantro_reg_write(ctx->dev, &vp9_qp_delta_y_dc, dec_params->quant.delta_q_y_dc); + hantro_reg_write(ctx->dev, &vp9_qp_delta_ch_dc, dec_params->quant.delta_q_uv_dc); + hantro_reg_write(ctx->dev, &vp9_qp_delta_ch_ac, dec_params->quant.delta_q_uv_ac); + hantro_reg_write(ctx->dev, &vp9_lossless_e, is_lossless(&dec_params->quant)); +} + +static u32 +hantro_interp_filter_from_v4l2(unsigned int interpolation_filter) +{ + switch (interpolation_filter) { + case V4L2_VP9_INTERP_FILTER_EIGHTTAP: + return 0x1; + case V4L2_VP9_INTERP_FILTER_EIGHTTAP_SMOOTH: + return 0; + case V4L2_VP9_INTERP_FILTER_EIGHTTAP_SHARP: + return 0x2; + case V4L2_VP9_INTERP_FILTER_BILINEAR: + return 0x3; + case V4L2_VP9_INTERP_FILTER_SWITCHABLE: + return 0x4; + } + + return 0; +} + +static void +config_others(struct hantro_ctx *ctx, const struct v4l2_ctrl_vp9_frame *dec_params, + bool intra_only, bool resolution_change) +{ + struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec; + + hantro_reg_write(ctx->dev, &g2_idr_pic_e, intra_only); + + hantro_reg_write(ctx->dev, &vp9_transform_mode, vp9_ctx->cur.tx_mode); + + hantro_reg_write(ctx->dev, &vp9_mcomp_filt_type, intra_only ? + 0 : hantro_interp_filter_from_v4l2(dec_params->interpolation_filter)); + + hantro_reg_write(ctx->dev, &vp9_high_prec_mv_e, + !!(dec_params->flags & V4L2_VP9_FRAME_FLAG_ALLOW_HIGH_PREC_MV)); + + hantro_reg_write(ctx->dev, &vp9_comp_pred_mode, dec_params->reference_mode); + + hantro_reg_write(ctx->dev, &g2_tempor_mvp_e, + !(dec_params->flags & V4L2_VP9_FRAME_FLAG_ERROR_RESILIENT) && + !(dec_params->flags & V4L2_VP9_FRAME_FLAG_KEY_FRAME) && + !(vp9_ctx->last.flags & V4L2_VP9_FRAME_FLAG_KEY_FRAME) && + !(dec_params->flags & V4L2_VP9_FRAME_FLAG_INTRA_ONLY) && + !resolution_change && + vp9_ctx->last.flags & V4L2_VP9_FRAME_FLAG_SHOW_FRAME + ); + + hantro_reg_write(ctx->dev, &g2_write_mvs_e, + !(dec_params->flags & V4L2_VP9_FRAME_FLAG_KEY_FRAME)); +} + +static void +config_compound_reference(struct hantro_ctx *ctx, + const struct v4l2_ctrl_vp9_frame *dec_params) +{ + u32 comp_fixed_ref, comp_var_ref[2]; + bool last_ref_frame_sign_bias; + bool golden_ref_frame_sign_bias; + bool alt_ref_frame_sign_bias; + bool comp_ref_allowed = 0; + + comp_fixed_ref = 0; + comp_var_ref[0] = 0; + comp_var_ref[1] = 0; + + last_ref_frame_sign_bias = dec_params->ref_frame_sign_bias & V4L2_VP9_SIGN_BIAS_LAST; + golden_ref_frame_sign_bias = dec_params->ref_frame_sign_bias & V4L2_VP9_SIGN_BIAS_GOLDEN; + alt_ref_frame_sign_bias = dec_params->ref_frame_sign_bias & V4L2_VP9_SIGN_BIAS_ALT; + + /* 6.3.12 Frame reference mode syntax */ + comp_ref_allowed |= golden_ref_frame_sign_bias != last_ref_frame_sign_bias; + comp_ref_allowed |= alt_ref_frame_sign_bias != last_ref_frame_sign_bias; + + if (comp_ref_allowed) { + if (last_ref_frame_sign_bias == + golden_ref_frame_sign_bias) { + comp_fixed_ref = ALTREF_FRAME; + comp_var_ref[0] = LAST_FRAME; + comp_var_ref[1] = GOLDEN_FRAME; + } else if (last_ref_frame_sign_bias == + alt_ref_frame_sign_bias) { + comp_fixed_ref = GOLDEN_FRAME; + comp_var_ref[0] = LAST_FRAME; + comp_var_ref[1] = ALTREF_FRAME; + } else { + comp_fixed_ref = LAST_FRAME; + comp_var_ref[0] = GOLDEN_FRAME; + comp_var_ref[1] = ALTREF_FRAME; + } + } + + hantro_reg_write(ctx->dev, &vp9_comp_pred_fixed_ref, comp_fixed_ref); + hantro_reg_write(ctx->dev, &vp9_comp_pred_var_ref0, comp_var_ref[0]); + hantro_reg_write(ctx->dev, &vp9_comp_pred_var_ref1, comp_var_ref[1]); +} + +#define INNER_LOOP \ +do { \ + for (m = 0; m < ARRAY_SIZE(adaptive->coef[0][0][0][0]); ++m) { \ + memcpy(adaptive->coef[i][j][k][l][m], \ + probs->coef[i][j][k][l][m], \ + sizeof(probs->coef[i][j][k][l][m])); \ + \ + adaptive->coef[i][j][k][l][m][3] = 0; \ + } \ +} while (0) + +static void config_probs(struct hantro_ctx *ctx, const struct v4l2_ctrl_vp9_frame *dec_params) +{ + struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec; + struct hantro_aux_buf *misc = &vp9_ctx->misc; + struct hantro_g2_all_probs *all_probs = misc->cpu; + struct hantro_g2_probs *adaptive; + struct hantro_g2_mv_probs *mv; + const struct v4l2_vp9_segmentation *seg = &dec_params->seg; + const struct v4l2_vp9_frame_context *probs = &vp9_ctx->probability_tables; + int i, j, k, l, m; + + for (i = 0; i < ARRAY_SIZE(all_probs->kf_y_mode_prob); ++i) + for (j = 0; j < ARRAY_SIZE(all_probs->kf_y_mode_prob[0]); ++j) { + memcpy(all_probs->kf_y_mode_prob[i][j], + v4l2_vp9_kf_y_mode_prob[i][j], + ARRAY_SIZE(all_probs->kf_y_mode_prob[i][j])); + + all_probs->kf_y_mode_prob_tail[i][j][0] = + v4l2_vp9_kf_y_mode_prob[i][j][8]; + } + + memcpy(all_probs->mb_segment_tree_probs, seg->tree_probs, + sizeof(all_probs->mb_segment_tree_probs)); + + memcpy(all_probs->segment_pred_probs, seg->pred_probs, + sizeof(all_probs->segment_pred_probs)); + + for (i = 0; i < ARRAY_SIZE(all_probs->kf_uv_mode_prob); ++i) { + memcpy(all_probs->kf_uv_mode_prob[i], v4l2_vp9_kf_uv_mode_prob[i], + ARRAY_SIZE(all_probs->kf_uv_mode_prob[i])); + + all_probs->kf_uv_mode_prob_tail[i][0] = v4l2_vp9_kf_uv_mode_prob[i][8]; + } + + adaptive = &all_probs->probs; + + for (i = 0; i < ARRAY_SIZE(adaptive->inter_mode); ++i) { + memcpy(adaptive->inter_mode[i], probs->inter_mode[i], + ARRAY_SIZE(probs->inter_mode[i])); + + adaptive->inter_mode[i][3] = 0; + } + + memcpy(adaptive->is_inter, probs->is_inter, sizeof(adaptive->is_inter)); + + for (i = 0; i < ARRAY_SIZE(adaptive->uv_mode); ++i) { + memcpy(adaptive->uv_mode[i], probs->uv_mode[i], + sizeof(adaptive->uv_mode[i])); + adaptive->uv_mode_tail[i][0] = probs->uv_mode[i][8]; + } + + memcpy(adaptive->tx8, probs->tx8, sizeof(adaptive->tx8)); + memcpy(adaptive->tx16, probs->tx16, sizeof(adaptive->tx16)); + memcpy(adaptive->tx32, probs->tx32, sizeof(adaptive->tx32)); + + for (i = 0; i < ARRAY_SIZE(adaptive->y_mode); ++i) { + memcpy(adaptive->y_mode[i], probs->y_mode[i], + ARRAY_SIZE(adaptive->y_mode[i])); + + adaptive->y_mode_tail[i][0] = probs->y_mode[i][8]; + } + + for (i = 0; i < ARRAY_SIZE(adaptive->partition[0]); ++i) { + memcpy(adaptive->partition[0][i], v4l2_vp9_kf_partition_probs[i], + sizeof(v4l2_vp9_kf_partition_probs[i])); + + adaptive->partition[0][i][3] = 0; + } + + for (i = 0; i < ARRAY_SIZE(adaptive->partition[1]); ++i) { + memcpy(adaptive->partition[1][i], probs->partition[i], + sizeof(probs->partition[i])); + + adaptive->partition[1][i][3] = 0; + } + + memcpy(adaptive->interp_filter, probs->interp_filter, + sizeof(adaptive->interp_filter)); + + memcpy(adaptive->comp_mode, probs->comp_mode, sizeof(adaptive->comp_mode)); + + memcpy(adaptive->skip, probs->skip, sizeof(adaptive->skip)); + + mv = &adaptive->mv; + + memcpy(mv->joint, probs->mv.joint, sizeof(mv->joint)); + memcpy(mv->sign, probs->mv.sign, sizeof(mv->sign)); + memcpy(mv->class0_bit, probs->mv.class0_bit, sizeof(mv->class0_bit)); + memcpy(mv->fr, probs->mv.fr, sizeof(mv->fr)); + memcpy(mv->class0_hp, probs->mv.class0_hp, sizeof(mv->class0_hp)); + memcpy(mv->hp, probs->mv.hp, sizeof(mv->hp)); + memcpy(mv->classes, probs->mv.classes, sizeof(mv->classes)); + memcpy(mv->class0_fr, probs->mv.class0_fr, sizeof(mv->class0_fr)); + memcpy(mv->bits, probs->mv.bits, sizeof(mv->bits)); + + memcpy(adaptive->single_ref, probs->single_ref, sizeof(adaptive->single_ref)); + + memcpy(adaptive->comp_ref, probs->comp_ref, sizeof(adaptive->comp_ref)); + + for (i = 0; i < ARRAY_SIZE(adaptive->coef); ++i) + for (j = 0; j < ARRAY_SIZE(adaptive->coef[0]); ++j) + for (k = 0; k < ARRAY_SIZE(adaptive->coef[0][0]); ++k) + for (l = 0; l < ARRAY_SIZE(adaptive->coef[0][0][0]); ++l) + INNER_LOOP; + + hantro_write_addr(ctx->dev, G2_VP9_PROBS_ADDR, misc->dma); +} + +static void config_counts(struct hantro_ctx *ctx) +{ + struct hantro_vp9_dec_hw_ctx *vp9_dec = &ctx->vp9_dec; + struct hantro_aux_buf *misc = &vp9_dec->misc; + dma_addr_t addr = misc->dma + vp9_dec->ctx_counters_offset; + + hantro_write_addr(ctx->dev, G2_VP9_CTX_COUNT_ADDR, addr); +} + +static void config_seg_map(struct hantro_ctx *ctx, + const struct v4l2_ctrl_vp9_frame *dec_params, + bool intra_only, bool update_map) +{ + struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec; + struct hantro_aux_buf *segment_map = &vp9_ctx->segment_map; + dma_addr_t addr; + + if (intra_only || + (dec_params->flags & V4L2_VP9_FRAME_FLAG_ERROR_RESILIENT)) { + memset(segment_map->cpu, 0, segment_map->size); + memset(vp9_ctx->feature_data, 0, sizeof(vp9_ctx->feature_data)); + memset(vp9_ctx->feature_enabled, 0, sizeof(vp9_ctx->feature_enabled)); + } + + addr = segment_map->dma + vp9_ctx->active_segment * vp9_ctx->segment_map_size; + hantro_write_addr(ctx->dev, G2_VP9_SEGMENT_READ_ADDR, addr); + + addr = segment_map->dma + (1 - vp9_ctx->active_segment) * vp9_ctx->segment_map_size; + hantro_write_addr(ctx->dev, G2_VP9_SEGMENT_WRITE_ADDR, addr); + + if (update_map) + vp9_ctx->active_segment = 1 - vp9_ctx->active_segment; +} + +static void +config_source(struct hantro_ctx *ctx, const struct v4l2_ctrl_vp9_frame *dec_params, + struct vb2_v4l2_buffer *vb2_src) +{ + dma_addr_t stream_base, tmp_addr; + unsigned int headres_size; + u32 src_len, start_bit, src_buf_len; + + headres_size = dec_params->uncompressed_header_size + + dec_params->compressed_header_size; + + stream_base = vb2_dma_contig_plane_dma_addr(&vb2_src->vb2_buf, 0); + + tmp_addr = stream_base + headres_size; + if (ctx->dev->variant->legacy_regs) + hantro_write_addr(ctx->dev, G2_STREAM_ADDR, (tmp_addr & ~0xf)); + else + hantro_write_addr(ctx->dev, G2_STREAM_ADDR, stream_base); + + start_bit = (tmp_addr & 0xf) * 8; + hantro_reg_write(ctx->dev, &g2_start_bit, start_bit); + + src_len = vb2_get_plane_payload(&vb2_src->vb2_buf, 0); + src_len += start_bit / 8 - headres_size; + hantro_reg_write(ctx->dev, &g2_stream_len, src_len); + + if (!ctx->dev->variant->legacy_regs) { + tmp_addr &= ~0xf; + hantro_reg_write(ctx->dev, &g2_strm_start_offset, tmp_addr - stream_base); + src_buf_len = vb2_plane_size(&vb2_src->vb2_buf, 0); + hantro_reg_write(ctx->dev, &g2_strm_buffer_len, src_buf_len); + } +} + +static void +config_registers(struct hantro_ctx *ctx, const struct v4l2_ctrl_vp9_frame *dec_params, + struct vb2_v4l2_buffer *vb2_src, struct vb2_v4l2_buffer *vb2_dst) +{ + struct hantro_decoded_buffer *dst, *last, *mv_ref; + struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec; + const struct v4l2_vp9_segmentation *seg; + bool intra_only, resolution_change; + + /* vp9 stuff */ + dst = vb2_to_hantro_decoded_buf(&vb2_dst->vb2_buf); + + if (vp9_ctx->last.valid) + last = get_ref_buf(ctx, &dst->base.vb, vp9_ctx->last.timestamp); + else + last = dst; + + update_dec_buf_info(dst, dec_params); + update_ctx_cur_info(vp9_ctx, dst, dec_params); + seg = &dec_params->seg; + + intra_only = !!(dec_params->flags & + (V4L2_VP9_FRAME_FLAG_KEY_FRAME | + V4L2_VP9_FRAME_FLAG_INTRA_ONLY)); + + if (!intra_only && + !(dec_params->flags & V4L2_VP9_FRAME_FLAG_ERROR_RESILIENT) && + vp9_ctx->last.valid) + mv_ref = last; + else + mv_ref = dst; + + resolution_change = dst->vp9.width != last->vp9.width || + dst->vp9.height != last->vp9.height; + + /* configure basic registers */ + hantro_reg_write(ctx->dev, &g2_mode, VP9_DEC_MODE); + if (!ctx->dev->variant->legacy_regs) { + hantro_reg_write(ctx->dev, &g2_strm_swap, 0xf); + hantro_reg_write(ctx->dev, &g2_dirmv_swap, 0xf); + hantro_reg_write(ctx->dev, &g2_compress_swap, 0xf); + hantro_reg_write(ctx->dev, &g2_ref_compress_bypass, 1); + } else { + hantro_reg_write(ctx->dev, &g2_strm_swap_old, 0x1f); + hantro_reg_write(ctx->dev, &g2_pic_swap, 0x10); + hantro_reg_write(ctx->dev, &g2_dirmv_swap_old, 0x10); + hantro_reg_write(ctx->dev, &g2_tab0_swap_old, 0x10); + hantro_reg_write(ctx->dev, &g2_tab1_swap_old, 0x10); + hantro_reg_write(ctx->dev, &g2_tab2_swap_old, 0x10); + hantro_reg_write(ctx->dev, &g2_tab3_swap_old, 0x10); + hantro_reg_write(ctx->dev, &g2_rscan_swap, 0x10); + } + hantro_reg_write(ctx->dev, &g2_buswidth, BUS_WIDTH_128); + hantro_reg_write(ctx->dev, &g2_max_burst, 16); + hantro_reg_write(ctx->dev, &g2_apf_threshold, 8); + hantro_reg_write(ctx->dev, &g2_clk_gate_e, 1); + hantro_reg_write(ctx->dev, &g2_max_cb_size, 6); + hantro_reg_write(ctx->dev, &g2_min_cb_size, 3); + if (ctx->dev->variant->double_buffer) + hantro_reg_write(ctx->dev, &g2_double_buffer_e, 1); + + config_output(ctx, dst, dec_params); + + if (!intra_only) + config_ref_registers(ctx, dec_params, dst, mv_ref); + + config_tiles(ctx, dec_params, dst); + config_segment(ctx, dec_params); + config_loop_filter(ctx, dec_params); + config_picture_dimensions(ctx, dst); + config_bit_depth(ctx, dec_params); + config_quant(ctx, dec_params); + config_others(ctx, dec_params, intra_only, resolution_change); + config_compound_reference(ctx, dec_params); + config_probs(ctx, dec_params); + config_counts(ctx); + config_seg_map(ctx, dec_params, intra_only, + seg->flags & V4L2_VP9_SEGMENTATION_FLAG_UPDATE_MAP); + config_source(ctx, dec_params, vb2_src); +} + +int hantro_g2_vp9_dec_run(struct hantro_ctx *ctx) +{ + const struct v4l2_ctrl_vp9_frame *decode_params; + struct vb2_v4l2_buffer *src; + struct vb2_v4l2_buffer *dst; + int ret; + + hantro_g2_check_idle(ctx->dev); + + ret = start_prepare_run(ctx, &decode_params); + if (ret) { + hantro_end_prepare_run(ctx); + return ret; + } + + src = hantro_get_src_buf(ctx); + dst = hantro_get_dst_buf(ctx); + + config_registers(ctx, decode_params, src, dst); + + hantro_end_prepare_run(ctx); + + vdpu_write(ctx->dev, G2_REG_INTERRUPT_DEC_E, G2_REG_INTERRUPT); + + return 0; +} + +#define copy_tx_and_skip(p1, p2) \ +do { \ + memcpy((p1)->tx8, (p2)->tx8, sizeof((p1)->tx8)); \ + memcpy((p1)->tx16, (p2)->tx16, sizeof((p1)->tx16)); \ + memcpy((p1)->tx32, (p2)->tx32, sizeof((p1)->tx32)); \ + memcpy((p1)->skip, (p2)->skip, sizeof((p1)->skip)); \ +} while (0) + +void hantro_g2_vp9_dec_done(struct hantro_ctx *ctx) +{ + struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec; + unsigned int fctx_idx; + + if (!(vp9_ctx->cur.flags & V4L2_VP9_FRAME_FLAG_REFRESH_FRAME_CTX)) + goto out_update_last; + + fctx_idx = vp9_ctx->cur.frame_context_idx; + + if (!(vp9_ctx->cur.flags & V4L2_VP9_FRAME_FLAG_PARALLEL_DEC_MODE)) { + /* error_resilient_mode == 0 && frame_parallel_decoding_mode == 0 */ + struct v4l2_vp9_frame_context *probs = &vp9_ctx->probability_tables; + bool frame_is_intra = vp9_ctx->cur.flags & + (V4L2_VP9_FRAME_FLAG_KEY_FRAME | V4L2_VP9_FRAME_FLAG_INTRA_ONLY); + struct tx_and_skip { + u8 tx8[2][1]; + u8 tx16[2][2]; + u8 tx32[2][3]; + u8 skip[3]; + } _tx_skip, *tx_skip = &_tx_skip; + struct v4l2_vp9_frame_symbol_counts *counts; + struct symbol_counts *hantro_cnts; + u32 tx16p[2][4]; + int i; + + /* buffer the forward-updated TX and skip probs */ + if (frame_is_intra) + copy_tx_and_skip(tx_skip, probs); + + /* 6.1.2 refresh_probs(): load_probs() and load_probs2() */ + *probs = vp9_ctx->frame_context[fctx_idx]; + + /* if FrameIsIntra then undo the effect of load_probs2() */ + if (frame_is_intra) + copy_tx_and_skip(probs, tx_skip); + + counts = &vp9_ctx->cnts; + hantro_cnts = vp9_ctx->misc.cpu + vp9_ctx->ctx_counters_offset; + for (i = 0; i < ARRAY_SIZE(tx16p); ++i) { + memcpy(tx16p[i], + hantro_cnts->tx16x16_count[i], + sizeof(hantro_cnts->tx16x16_count[0])); + tx16p[i][3] = 0; + } + counts->tx16p = &tx16p; + + v4l2_vp9_adapt_coef_probs(probs, counts, + !vp9_ctx->last.valid || + vp9_ctx->last.flags & V4L2_VP9_FRAME_FLAG_KEY_FRAME, + frame_is_intra); + + if (!frame_is_intra) { + /* load_probs2() already done */ + u32 mv_mode[7][4]; + + for (i = 0; i < ARRAY_SIZE(mv_mode); ++i) { + mv_mode[i][0] = hantro_cnts->inter_mode_counts[i][1][0]; + mv_mode[i][1] = hantro_cnts->inter_mode_counts[i][2][0]; + mv_mode[i][2] = hantro_cnts->inter_mode_counts[i][0][0]; + mv_mode[i][3] = hantro_cnts->inter_mode_counts[i][2][1]; + } + counts->mv_mode = &mv_mode; + v4l2_vp9_adapt_noncoef_probs(&vp9_ctx->probability_tables, counts, + vp9_ctx->cur.reference_mode, + vp9_ctx->cur.interpolation_filter, + vp9_ctx->cur.tx_mode, vp9_ctx->cur.flags); + } + } + + vp9_ctx->frame_context[fctx_idx] = vp9_ctx->probability_tables; + +out_update_last: + vp9_ctx->last = vp9_ctx->cur; +} diff --git a/drivers/media/platform/verisilicon/hantro_h1_jpeg_enc.c b/drivers/media/platform/verisilicon/hantro_h1_jpeg_enc.c new file mode 100644 index 000000000..12d69503d --- /dev/null +++ b/drivers/media/platform/verisilicon/hantro_h1_jpeg_enc.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hantro VPU codec driver + * + * Copyright (C) 2018 Rockchip Electronics Co., Ltd. + */ + +#include <asm/unaligned.h> +#include <media/v4l2-mem2mem.h> +#include "hantro_jpeg.h" +#include "hantro.h" +#include "hantro_v4l2.h" +#include "hantro_hw.h" +#include "hantro_h1_regs.h" + +#define H1_JPEG_QUANT_TABLE_COUNT 16 + +static void hantro_h1_set_src_img_ctrl(struct hantro_dev *vpu, + struct hantro_ctx *ctx) +{ + u32 overfill_r, overfill_b; + u32 reg; + + /* + * The format width and height are already macroblock aligned + * by .vidioc_s_fmt_vid_cap_mplane() callback. Destination + * format width and height can be further modified by + * .vidioc_s_selection(), and the width is 4-aligned. + */ + overfill_r = ctx->src_fmt.width - ctx->dst_fmt.width; + overfill_b = ctx->src_fmt.height - ctx->dst_fmt.height; + + reg = H1_REG_IN_IMG_CTRL_ROW_LEN(ctx->src_fmt.width) + | H1_REG_IN_IMG_CTRL_OVRFLR_D4(overfill_r / 4) + | H1_REG_IN_IMG_CTRL_OVRFLB(overfill_b) + | H1_REG_IN_IMG_CTRL_FMT(ctx->vpu_src_fmt->enc_fmt); + vepu_write_relaxed(vpu, reg, H1_REG_IN_IMG_CTRL); +} + +static void hantro_h1_jpeg_enc_set_buffers(struct hantro_dev *vpu, + struct hantro_ctx *ctx, + struct vb2_buffer *src_buf, + struct vb2_buffer *dst_buf) +{ + struct v4l2_pix_format_mplane *pix_fmt = &ctx->src_fmt; + dma_addr_t src[3]; + u32 size_left; + + size_left = vb2_plane_size(dst_buf, 0) - ctx->vpu_dst_fmt->header_size; + if (WARN_ON(vb2_plane_size(dst_buf, 0) < ctx->vpu_dst_fmt->header_size)) + size_left = 0; + + WARN_ON(pix_fmt->num_planes > 3); + + vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr(dst_buf, 0) + + ctx->vpu_dst_fmt->header_size, + H1_REG_ADDR_OUTPUT_STREAM); + vepu_write_relaxed(vpu, size_left, H1_REG_STR_BUF_LIMIT); + + if (pix_fmt->num_planes == 1) { + src[0] = vb2_dma_contig_plane_dma_addr(src_buf, 0); + /* single plane formats we supported are all interlaced */ + vepu_write_relaxed(vpu, src[0], H1_REG_ADDR_IN_PLANE_0); + } else if (pix_fmt->num_planes == 2) { + src[0] = vb2_dma_contig_plane_dma_addr(src_buf, 0); + src[1] = vb2_dma_contig_plane_dma_addr(src_buf, 1); + vepu_write_relaxed(vpu, src[0], H1_REG_ADDR_IN_PLANE_0); + vepu_write_relaxed(vpu, src[1], H1_REG_ADDR_IN_PLANE_1); + } else { + src[0] = vb2_dma_contig_plane_dma_addr(src_buf, 0); + src[1] = vb2_dma_contig_plane_dma_addr(src_buf, 1); + src[2] = vb2_dma_contig_plane_dma_addr(src_buf, 2); + vepu_write_relaxed(vpu, src[0], H1_REG_ADDR_IN_PLANE_0); + vepu_write_relaxed(vpu, src[1], H1_REG_ADDR_IN_PLANE_1); + vepu_write_relaxed(vpu, src[2], H1_REG_ADDR_IN_PLANE_2); + } +} + +static void +hantro_h1_jpeg_enc_set_qtable(struct hantro_dev *vpu, + unsigned char *luma_qtable, + unsigned char *chroma_qtable) +{ + u32 reg, i; + __be32 *luma_qtable_p; + __be32 *chroma_qtable_p; + + luma_qtable_p = (__be32 *)luma_qtable; + chroma_qtable_p = (__be32 *)chroma_qtable; + + /* + * Quantization table registers must be written in contiguous blocks. + * DO NOT collapse the below two "for" loops into one. + */ + for (i = 0; i < H1_JPEG_QUANT_TABLE_COUNT; i++) { + reg = get_unaligned_be32(&luma_qtable_p[i]); + vepu_write_relaxed(vpu, reg, H1_REG_JPEG_LUMA_QUAT(i)); + } + + for (i = 0; i < H1_JPEG_QUANT_TABLE_COUNT; i++) { + reg = get_unaligned_be32(&chroma_qtable_p[i]); + vepu_write_relaxed(vpu, reg, H1_REG_JPEG_CHROMA_QUAT(i)); + } +} + +int hantro_h1_jpeg_enc_run(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + struct vb2_v4l2_buffer *src_buf, *dst_buf; + struct hantro_jpeg_ctx jpeg_ctx; + u32 reg; + + src_buf = hantro_get_src_buf(ctx); + dst_buf = hantro_get_dst_buf(ctx); + + hantro_start_prepare_run(ctx); + + memset(&jpeg_ctx, 0, sizeof(jpeg_ctx)); + jpeg_ctx.buffer = vb2_plane_vaddr(&dst_buf->vb2_buf, 0); + jpeg_ctx.width = ctx->dst_fmt.width; + jpeg_ctx.height = ctx->dst_fmt.height; + jpeg_ctx.quality = ctx->jpeg_quality; + hantro_jpeg_header_assemble(&jpeg_ctx); + + /* Switch to JPEG encoder mode before writing registers */ + vepu_write_relaxed(vpu, H1_REG_ENC_CTRL_ENC_MODE_JPEG, + H1_REG_ENC_CTRL); + + hantro_h1_set_src_img_ctrl(vpu, ctx); + hantro_h1_jpeg_enc_set_buffers(vpu, ctx, &src_buf->vb2_buf, + &dst_buf->vb2_buf); + hantro_h1_jpeg_enc_set_qtable(vpu, jpeg_ctx.hw_luma_qtable, + jpeg_ctx.hw_chroma_qtable); + + reg = H1_REG_AXI_CTRL_OUTPUT_SWAP16 + | H1_REG_AXI_CTRL_INPUT_SWAP16 + | H1_REG_AXI_CTRL_BURST_LEN(16) + | H1_REG_AXI_CTRL_OUTPUT_SWAP32 + | H1_REG_AXI_CTRL_INPUT_SWAP32 + | H1_REG_AXI_CTRL_OUTPUT_SWAP8 + | H1_REG_AXI_CTRL_INPUT_SWAP8; + /* Make sure that all registers are written at this point. */ + vepu_write(vpu, reg, H1_REG_AXI_CTRL); + + reg = H1_REG_ENC_CTRL_WIDTH(MB_WIDTH(ctx->src_fmt.width)) + | H1_REG_ENC_CTRL_HEIGHT(MB_HEIGHT(ctx->src_fmt.height)) + | H1_REG_ENC_CTRL_ENC_MODE_JPEG + | H1_REG_ENC_PIC_INTRA + | H1_REG_ENC_CTRL_EN_BIT; + + hantro_end_prepare_run(ctx); + + vepu_write(vpu, reg, H1_REG_ENC_CTRL); + + return 0; +} + +void hantro_h1_jpeg_enc_done(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + u32 bytesused = vepu_read(vpu, H1_REG_STR_BUF_LIMIT) / 8; + struct vb2_v4l2_buffer *dst_buf = hantro_get_dst_buf(ctx); + + vb2_set_plane_payload(&dst_buf->vb2_buf, 0, + ctx->vpu_dst_fmt->header_size + bytesused); +} diff --git a/drivers/media/platform/verisilicon/hantro_h1_regs.h b/drivers/media/platform/verisilicon/hantro_h1_regs.h new file mode 100644 index 000000000..30e7e7b92 --- /dev/null +++ b/drivers/media/platform/verisilicon/hantro_h1_regs.h @@ -0,0 +1,154 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Hantro VPU codec driver + * + * Copyright 2018 Google LLC. + * Tomasz Figa <tfiga@chromium.org> + */ + +#ifndef HANTRO_H1_REGS_H_ +#define HANTRO_H1_REGS_H_ + +/* Encoder registers. */ +#define H1_REG_INTERRUPT 0x004 +#define H1_REG_INTERRUPT_FRAME_RDY BIT(2) +#define H1_REG_INTERRUPT_DIS_BIT BIT(1) +#define H1_REG_INTERRUPT_BIT BIT(0) +#define H1_REG_AXI_CTRL 0x008 +#define H1_REG_AXI_CTRL_OUTPUT_SWAP16 BIT(15) +#define H1_REG_AXI_CTRL_INPUT_SWAP16 BIT(14) +#define H1_REG_AXI_CTRL_BURST_LEN(x) ((x) << 8) +#define H1_REG_AXI_CTRL_GATE_BIT BIT(4) +#define H1_REG_AXI_CTRL_OUTPUT_SWAP32 BIT(3) +#define H1_REG_AXI_CTRL_INPUT_SWAP32 BIT(2) +#define H1_REG_AXI_CTRL_OUTPUT_SWAP8 BIT(1) +#define H1_REG_AXI_CTRL_INPUT_SWAP8 BIT(0) +#define H1_REG_ADDR_OUTPUT_STREAM 0x014 +#define H1_REG_ADDR_OUTPUT_CTRL 0x018 +#define H1_REG_ADDR_REF_LUMA 0x01c +#define H1_REG_ADDR_REF_CHROMA 0x020 +#define H1_REG_ADDR_REC_LUMA 0x024 +#define H1_REG_ADDR_REC_CHROMA 0x028 +#define H1_REG_ADDR_IN_PLANE_0 0x02c +#define H1_REG_ADDR_IN_PLANE_1 0x030 +#define H1_REG_ADDR_IN_PLANE_2 0x034 +#define H1_REG_ENC_CTRL 0x038 +#define H1_REG_ENC_CTRL_TIMEOUT_EN BIT(31) +#define H1_REG_ENC_CTRL_NAL_MODE_BIT BIT(29) +#define H1_REG_ENC_CTRL_WIDTH(w) ((w) << 19) +#define H1_REG_ENC_CTRL_HEIGHT(h) ((h) << 10) +#define H1_REG_ENC_PIC_INTER (0x0 << 3) +#define H1_REG_ENC_PIC_INTRA (0x1 << 3) +#define H1_REG_ENC_PIC_MVCINTER (0x2 << 3) +#define H1_REG_ENC_CTRL_ENC_MODE_H264 (0x3 << 1) +#define H1_REG_ENC_CTRL_ENC_MODE_JPEG (0x2 << 1) +#define H1_REG_ENC_CTRL_ENC_MODE_VP8 (0x1 << 1) +#define H1_REG_ENC_CTRL_EN_BIT BIT(0) +#define H1_REG_IN_IMG_CTRL 0x03c +#define H1_REG_IN_IMG_CTRL_ROW_LEN(x) ((x) << 12) +#define H1_REG_IN_IMG_CTRL_OVRFLR_D4(x) ((x) << 10) +#define H1_REG_IN_IMG_CTRL_OVRFLB(x) ((x) << 6) +#define H1_REG_IN_IMG_CTRL_FMT(x) ((x) << 2) +#define H1_REG_ENC_CTRL0 0x040 +#define H1_REG_ENC_CTRL0_INIT_QP(x) ((x) << 26) +#define H1_REG_ENC_CTRL0_SLICE_ALPHA(x) ((x) << 22) +#define H1_REG_ENC_CTRL0_SLICE_BETA(x) ((x) << 18) +#define H1_REG_ENC_CTRL0_CHROMA_QP_OFFSET(x) ((x) << 13) +#define H1_REG_ENC_CTRL0_FILTER_DIS(x) ((x) << 5) +#define H1_REG_ENC_CTRL0_IDR_PICID(x) ((x) << 1) +#define H1_REG_ENC_CTRL0_CONSTR_INTRA_PRED BIT(0) +#define H1_REG_ENC_CTRL1 0x044 +#define H1_REG_ENC_CTRL1_PPS_ID(x) ((x) << 24) +#define H1_REG_ENC_CTRL1_INTRA_PRED_MODE(x) ((x) << 16) +#define H1_REG_ENC_CTRL1_FRAME_NUM(x) ((x)) +#define H1_REG_ENC_CTRL2 0x048 +#define H1_REG_ENC_CTRL2_DEBLOCKING_FILETER_MODE(x) ((x) << 30) +#define H1_REG_ENC_CTRL2_H264_SLICE_SIZE(x) ((x) << 23) +#define H1_REG_ENC_CTRL2_DISABLE_QUARTER_PIXMV BIT(22) +#define H1_REG_ENC_CTRL2_TRANS8X8_MODE_EN BIT(21) +#define H1_REG_ENC_CTRL2_CABAC_INIT_IDC(x) ((x) << 19) +#define H1_REG_ENC_CTRL2_ENTROPY_CODING_MODE BIT(18) +#define H1_REG_ENC_CTRL2_H264_INTER4X4_MODE BIT(17) +#define H1_REG_ENC_CTRL2_H264_STREAM_MODE BIT(16) +#define H1_REG_ENC_CTRL2_INTRA16X16_MODE(x) ((x)) +#define H1_REG_ENC_CTRL3 0x04c +#define H1_REG_ENC_CTRL3_MUTIMV_EN BIT(30) +#define H1_REG_ENC_CTRL3_MV_PENALTY_1_4P(x) ((x) << 20) +#define H1_REG_ENC_CTRL3_MV_PENALTY_4P(x) ((x) << 10) +#define H1_REG_ENC_CTRL3_MV_PENALTY_1P(x) ((x)) +#define H1_REG_ENC_CTRL4 0x050 +#define H1_REG_ENC_CTRL4_MV_PENALTY_16X8_8X16(x) ((x) << 20) +#define H1_REG_ENC_CTRL4_MV_PENALTY_8X8(x) ((x) << 10) +#define H1_REG_ENC_CTRL4_8X4_4X8(x) ((x)) +#define H1_REG_ENC_CTRL5 0x054 +#define H1_REG_ENC_CTRL5_MACROBLOCK_PENALTY(x) ((x) << 24) +#define H1_REG_ENC_CTRL5_COMPLETE_SLICES(x) ((x) << 16) +#define H1_REG_ENC_CTRL5_INTER_MODE(x) ((x)) +#define H1_REG_STR_HDR_REM_MSB 0x058 +#define H1_REG_STR_HDR_REM_LSB 0x05c +#define H1_REG_STR_BUF_LIMIT 0x060 +#define H1_REG_MAD_CTRL 0x064 +#define H1_REG_MAD_CTRL_QP_ADJUST(x) ((x) << 28) +#define H1_REG_MAD_CTRL_MAD_THREDHOLD(x) ((x) << 22) +#define H1_REG_MAD_CTRL_QP_SUM_DIV2(x) ((x)) +#define H1_REG_ADDR_VP8_PROB_CNT 0x068 +#define H1_REG_QP_VAL 0x06c +#define H1_REG_QP_VAL_LUM(x) ((x) << 26) +#define H1_REG_QP_VAL_MAX(x) ((x) << 20) +#define H1_REG_QP_VAL_MIN(x) ((x) << 14) +#define H1_REG_QP_VAL_CHECKPOINT_DISTAN(x) ((x)) +#define H1_REG_VP8_QP_VAL(i) (0x06c + ((i) * 0x4)) +#define H1_REG_CHECKPOINT(i) (0x070 + ((i) * 0x4)) +#define H1_REG_CHECKPOINT_CHECK0(x) (((x) & 0xffff)) +#define H1_REG_CHECKPOINT_CHECK1(x) (((x) & 0xffff) << 16) +#define H1_REG_CHECKPOINT_RESULT(x) ((((x) >> (16 - 16 \ + * (i & 1))) & 0xffff) \ + * 32) +#define H1_REG_CHKPT_WORD_ERR(i) (0x084 + ((i) * 0x4)) +#define H1_REG_CHKPT_WORD_ERR_CHK0(x) (((x) & 0xffff)) +#define H1_REG_CHKPT_WORD_ERR_CHK1(x) (((x) & 0xffff) << 16) +#define H1_REG_VP8_BOOL_ENC 0x08c +#define H1_REG_CHKPT_DELTA_QP 0x090 +#define H1_REG_CHKPT_DELTA_QP_CHK0(x) (((x) & 0x0f) << 0) +#define H1_REG_CHKPT_DELTA_QP_CHK1(x) (((x) & 0x0f) << 4) +#define H1_REG_CHKPT_DELTA_QP_CHK2(x) (((x) & 0x0f) << 8) +#define H1_REG_CHKPT_DELTA_QP_CHK3(x) (((x) & 0x0f) << 12) +#define H1_REG_CHKPT_DELTA_QP_CHK4(x) (((x) & 0x0f) << 16) +#define H1_REG_CHKPT_DELTA_QP_CHK5(x) (((x) & 0x0f) << 20) +#define H1_REG_CHKPT_DELTA_QP_CHK6(x) (((x) & 0x0f) << 24) +#define H1_REG_VP8_CTRL0 0x090 +#define H1_REG_RLC_CTRL 0x094 +#define H1_REG_RLC_CTRL_STR_OFFS_SHIFT 23 +#define H1_REG_RLC_CTRL_STR_OFFS_MASK (0x3f << 23) +#define H1_REG_RLC_CTRL_RLC_SUM(x) ((x)) +#define H1_REG_MB_CTRL 0x098 +#define H1_REG_MB_CNT_OUT(x) (((x) & 0xffff)) +#define H1_REG_MB_CNT_SET(x) (((x) & 0xffff) << 16) +#define H1_REG_ADDR_NEXT_PIC 0x09c +#define H1_REG_JPEG_LUMA_QUAT(i) (0x100 + ((i) * 0x4)) +#define H1_REG_JPEG_CHROMA_QUAT(i) (0x140 + ((i) * 0x4)) +#define H1_REG_STABILIZATION_OUTPUT 0x0A0 +#define H1_REG_ADDR_CABAC_TBL 0x0cc +#define H1_REG_ADDR_MV_OUT 0x0d0 +#define H1_REG_RGB_YUV_COEFF(i) (0x0d4 + ((i) * 0x4)) +#define H1_REG_RGB_MASK_MSB 0x0dc +#define H1_REG_INTRA_AREA_CTRL 0x0e0 +#define H1_REG_CIR_INTRA_CTRL 0x0e4 +#define H1_REG_INTRA_SLICE_BITMAP(i) (0x0e8 + ((i) * 0x4)) +#define H1_REG_ADDR_VP8_DCT_PART(i) (0x0e8 + ((i) * 0x4)) +#define H1_REG_FIRST_ROI_AREA 0x0f0 +#define H1_REG_SECOND_ROI_AREA 0x0f4 +#define H1_REG_MVC_CTRL 0x0f8 +#define H1_REG_MVC_CTRL_MV16X16_FAVOR(x) ((x) << 28) +#define H1_REG_VP8_INTRA_PENALTY(i) (0x100 + ((i) * 0x4)) +#define H1_REG_ADDR_VP8_SEG_MAP 0x11c +#define H1_REG_VP8_SEG_QP(i) (0x120 + ((i) * 0x4)) +#define H1_REG_DMV_4P_1P_PENALTY(i) (0x180 + ((i) * 0x4)) +#define H1_REG_DMV_4P_1P_PENALTY_BIT(x, i) ((x) << (i) * 8) +#define H1_REG_DMV_QPEL_PENALTY(i) (0x200 + ((i) * 0x4)) +#define H1_REG_DMV_QPEL_PENALTY_BIT(x, i) ((x) << (i) * 8) +#define H1_REG_VP8_CTRL1 0x280 +#define H1_REG_VP8_BIT_COST_GOLDEN 0x284 +#define H1_REG_VP8_LOOP_FLT_DELTA(i) (0x288 + ((i) * 0x4)) + +#endif /* HANTRO_H1_REGS_H_ */ diff --git a/drivers/media/platform/verisilicon/hantro_h264.c b/drivers/media/platform/verisilicon/hantro_h264.c new file mode 100644 index 000000000..4e9a0ecf5 --- /dev/null +++ b/drivers/media/platform/verisilicon/hantro_h264.c @@ -0,0 +1,521 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Rockchip RK3288 VPU codec driver + * + * Copyright (c) 2014 Rockchip Electronics Co., Ltd. + * Hertz Wong <hertz.wong@rock-chips.com> + * Herman Chen <herman.chen@rock-chips.com> + * + * Copyright (C) 2014 Google, Inc. + * Tomasz Figa <tfiga@chromium.org> + */ + +#include <linux/types.h> +#include <media/v4l2-h264.h> +#include <media/v4l2-mem2mem.h> + +#include "hantro.h" +#include "hantro_hw.h" + +/* Size with u32 units. */ +#define CABAC_INIT_BUFFER_SIZE (460 * 2) +#define POC_BUFFER_SIZE 34 +#define SCALING_LIST_SIZE (6 * 16 + 2 * 64) + +/* + * For valid and long term reference marking, index are reversed, so bit 31 + * indicates the status of the picture 0. + */ +#define REF_BIT(i) BIT(32 - 1 - (i)) + +/* Data structure describing auxiliary buffer format. */ +struct hantro_h264_dec_priv_tbl { + u32 cabac_table[CABAC_INIT_BUFFER_SIZE]; + u32 poc[POC_BUFFER_SIZE]; + u8 scaling_list[SCALING_LIST_SIZE]; +}; + +/* + * Constant CABAC table. + * From drivers/media/platform/rk3288-vpu/rk3288_vpu_hw_h264d.c + * in https://chromium.googlesource.com/chromiumos/third_party/kernel, + * chromeos-3.14 branch. + */ +static const u32 h264_cabac_table[] = { + 0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07330000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948, 0x0d29033e, 0x000b0137, + 0x0045ef7f, 0xf3660052, 0xf94aeb6b, 0xe57fe17f, 0xe87fee5f, 0xe57feb72, + 0xe27fef7b, 0xf473f07a, 0xf573f43f, 0xfe44f154, 0xf368fd46, 0xf85df65a, + 0xe27fff4a, 0xfa61f95b, 0xec7ffc38, 0xfb52f94c, 0xea7df95d, 0xf557fd4d, + 0xfb47fc3f, 0xfc44f454, 0xf93ef941, 0x083d0538, 0xfe420140, 0x003dfe4e, + 0x01320734, 0x0a23002c, 0x0b26012d, 0x002e052c, 0x1f110133, 0x07321c13, + 0x10210e3e, 0xf36cf164, 0xf365f35b, 0xf45ef658, 0xf054f656, 0xf953f357, + 0xed5e0146, 0x0048fb4a, 0x123bf866, 0xf164005f, 0xfc4b0248, 0xf54bfd47, + 0x0f2ef345, 0x003e0041, 0x1525f148, 0x09391036, 0x003e0c48, 0x18000f09, + 0x08190d12, 0x0f090d13, 0x0a250c12, 0x061d1421, 0x0f1e042d, 0x013a003e, + 0x073d0c26, 0x0b2d0f27, 0x0b2a0d2c, 0x102d0c29, 0x0a311e22, 0x122a0a37, + 0x1133112e, 0x00591aed, 0x16ef1aef, 0x1ee71cec, 0x21e925e5, 0x21e928e4, + 0x26ef21f5, 0x28f129fa, 0x26012911, 0x1efa1b03, 0x1a1625f0, 0x23fc26f8, + 0x26fd2503, 0x26052a00, 0x23102716, 0x0e301b25, 0x153c0c44, 0x0261fd47, + 0xfa2afb32, 0xfd36fe3e, 0x003a013f, 0xfe48ff4a, 0xf75bfb43, 0xfb1bfd27, + 0xfe2c002e, 0xf040f844, 0xf64efa4d, 0xf656f45c, 0xf137f63c, 0xfa3efc41, + 0xf449f84c, 0xf950f758, 0xef6ef561, 0xec54f54f, 0xfa49fc4a, 0xf356f360, + 0xf561ed75, 0xf84efb21, 0xfc30fe35, 0xfd3ef347, 0xf64ff456, 0xf35af261, + 0x0000fa5d, 0xfa54f84f, 0x0042ff47, 0x003efe3c, 0xfe3bfb4b, 0xfd3efc3a, + 0xf742ff4f, 0x00470344, 0x0a2cf93e, 0x0f240e28, 0x101b0c1d, 0x012c1424, + 0x1220052a, 0x01300a3e, 0x112e0940, 0xf468f561, 0xf060f958, 0xf855f955, + 0xf755f358, 0x0442fd4d, 0xfd4cfa4c, 0x0a3aff4c, 0xff53f963, 0xf25f025f, + 0x004cfb4a, 0x0046f54b, 0x01440041, 0xf249033e, 0x043eff44, 0xf34b0b37, + 0x05400c46, 0x0f060613, 0x07100c0e, 0x120d0d0b, 0x0d0f0f10, 0x0c170d17, + 0x0f140e1a, 0x0e2c1128, 0x112f1811, 0x15151916, 0x1f1b161d, 0x13230e32, + 0x0a39073f, 0xfe4dfc52, 0xfd5e0945, 0xf46d24dd, 0x24de20e6, 0x25e22ce0, + 0x22ee22f1, 0x28f121f9, 0x23fb2100, 0x2602210d, 0x17230d3a, 0x1dfd1a00, + 0x161e1ff9, 0x23f122fd, 0x220324ff, 0x2205200b, 0x2305220c, 0x270b1e1d, + 0x221a1d27, 0x13421f15, 0x1f1f1932, 0xef78ec70, 0xee72f555, 0xf15cf259, + 0xe647f151, 0xf2500044, 0xf246e838, 0xe944e832, 0xf54a17f3, 0x1af328f1, + 0x31f22c03, 0x2d062c22, 0x21361352, 0xfd4bff17, 0x0122012b, 0x0036fe37, + 0x003d0140, 0x0044f75c, 0xf26af361, 0xf15af45a, 0xee58f649, 0xf74ff256, + 0xf649f646, 0xf645fb42, 0xf740fb3a, 0x023b15f6, 0x18f51cf8, 0x1cff1d03, + 0x1d092314, 0x1d240e43, 0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968, + 0xfa35ff36, 0x07331721, 0x17021500, 0x01090031, 0xdb760539, 0xf34ef541, + 0x013e0c31, 0xfc491132, 0x1240092b, 0x1d001a43, 0x105a0968, 0xd27fec68, + 0x0143f34e, 0xf541013e, 0xfa56ef5f, 0xfa3d092d, 0xfd45fa51, 0xf5600637, + 0x0743fb56, 0x0258003a, 0xfd4cf65e, 0x05360445, 0xfd510058, 0xf943fb4a, + 0xfc4afb50, 0xf948013a, 0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948, + 0x0d29033e, 0x002dfc4e, 0xfd60e57e, 0xe462e765, 0xe943e452, 0xec5ef053, + 0xea6eeb5b, 0xee66f35d, 0xe37ff95c, 0xfb59f960, 0xf36cfd2e, 0xff41ff39, + 0xf75dfd4a, 0xf75cf857, 0xe97e0536, 0x063c063b, 0x0645ff30, 0x0044fc45, + 0xf858fe55, 0xfa4eff4b, 0xf94d0236, 0x0532fd44, 0x0132062a, 0xfc51013f, + 0xfc460043, 0x0239fe4c, 0x0b230440, 0x013d0b23, 0x12190c18, 0x0d1d0d24, + 0xf65df949, 0xfe490d2e, 0x0931f964, 0x09350235, 0x0535fe3d, 0x00380038, + 0xf33ffb3c, 0xff3e0439, 0xfa450439, 0x0e270433, 0x0d440340, 0x013d093f, + 0x07321027, 0x052c0434, 0x0b30fb3c, 0xff3b003b, 0x1621052c, 0x0e2bff4e, + 0x003c0945, 0x0b1c0228, 0x032c0031, 0x002e022c, 0x0233002f, 0x0427023e, + 0x062e0036, 0x0336023a, 0x043f0633, 0x06390735, 0x06340637, 0x0b2d0e24, + 0x0835ff52, 0x0737fd4e, 0x0f2e161f, 0xff541907, 0x1ef91c03, 0x1c042000, + 0x22ff1e06, 0x1e062009, 0x1f131a1b, 0x1a1e2514, 0x1c221146, 0x0143053b, + 0x0943101e, 0x12201223, 0x161d181f, 0x1726122b, 0x14290b3f, 0x093b0940, + 0xff5efe59, 0xf76cfa4c, 0xfe2c002d, 0x0034fd40, 0xfe3bfc46, 0xfc4bf852, + 0xef66f74d, 0x0318002a, 0x00300037, 0xfa3bf947, 0xf453f557, 0xe277013a, + 0xfd1dff24, 0x0126022b, 0xfa37003a, 0x0040fd4a, 0xf65a0046, 0xfc1d051f, + 0x072a013b, 0xfe3afd48, 0xfd51f561, 0x003a0805, 0x0a0e0e12, 0x0d1b0228, + 0x003afd46, 0xfa4ff855, 0x0000f36a, 0xf06af657, 0xeb72ee6e, 0xf262ea6e, + 0xeb6aee67, 0xeb6be96c, 0xe670f660, 0xf45ffb5b, 0xf75dea5e, 0xfb560943, + 0xfc50f655, 0xff46073c, 0x093a053d, 0x0c320f32, 0x12311136, 0x0a29072e, + 0xff330731, 0x08340929, 0x062f0237, 0x0d290a2c, 0x06320535, 0x0d31043f, + 0x0640fe45, 0xfe3b0646, 0x0a2c091f, 0x0c2b0335, 0x0e220a26, 0xfd340d28, + 0x1120072c, 0x07260d32, 0x0a391a2b, 0x0e0b0b0e, 0x090b120b, 0x150917fe, + 0x20f120f1, 0x22eb27e9, 0x2adf29e1, 0x2ee426f4, 0x151d2de8, 0x35d330e6, + 0x41d52bed, 0x27f61e09, 0x121a141b, 0x0039f252, 0xfb4bed61, 0xdd7d1b00, + 0x1c001ffc, 0x1b062208, 0x1e0a1816, 0x21131620, 0x1a1f1529, 0x1a2c172f, + 0x10410e47, 0x083c063f, 0x11411518, 0x17141a17, 0x1b201c17, 0x1c181728, + 0x18201c1d, 0x172a1339, 0x1635163d, 0x0b560c28, 0x0b330e3b, 0xfc4ff947, + 0xfb45f746, 0xf842f644, 0xed49f445, 0xf046f143, 0xec3eed46, 0xf042ea41, + 0xec3f09fe, 0x1af721f7, 0x27f929fe, 0x2d033109, 0x2d1b243b, 0xfa42f923, + 0xf92af82d, 0xfb30f438, 0xfa3cfb3e, 0xf842f84c, 0xfb55fa51, 0xf64df951, + 0xef50ee49, 0xfc4af653, 0xf747f743, 0xff3df842, 0xf242003b, 0x023b15f3, + 0x21f227f9, 0x2efe3302, 0x3c063d11, 0x37222a3e, 0x14f10236, 0x034a14f1, + 0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07331619, 0x22001000, 0xfe090429, + 0xe3760241, 0xfa47f34f, 0x05340932, 0xfd460a36, 0x1a221316, 0x28003902, + 0x29241a45, 0xd37ff165, 0xfc4cfa47, 0xf34f0534, 0x0645f35a, 0x0034082b, + 0xfe45fb52, 0xf660023b, 0x024bfd57, 0xfd640138, 0xfd4afa55, 0x003bfd51, + 0xf956fb5f, 0xff42ff4d, 0x0146fe56, 0xfb48003d, 0x0029003f, 0x003f003f, + 0xf7530456, 0x0061f948, 0x0d29033e, 0x0d0f0733, 0x0250d97f, 0xee5bef60, + 0xe651dd62, 0xe866e961, 0xe577e863, 0xeb6eee66, 0xdc7f0050, 0xfb59f95e, + 0xfc5c0027, 0x0041f154, 0xdd7ffe49, 0xf468f75b, 0xe17f0337, 0x07380737, + 0x083dfd35, 0x0044f94a, 0xf758f367, 0xf35bf759, 0xf25cf84c, 0xf457e96e, + 0xe869f64e, 0xec70ef63, 0xb27fba7f, 0xce7fd27f, 0xfc42fb4e, 0xfc47f848, + 0x023bff37, 0xf946fa4b, 0xf859de77, 0xfd4b2014, 0x1e16d47f, 0x0036fb3d, + 0x003aff3c, 0xfd3df843, 0xe754f24a, 0xfb410534, 0x0239003d, 0xf745f546, + 0x1237fc47, 0x003a073d, 0x09291219, 0x0920052b, 0x092f002c, 0x0033022e, + 0x1326fc42, 0x0f260c2a, 0x09220059, 0x042d0a1c, 0x0a1f21f5, 0x34d5120f, + 0x1c0023ea, 0x26e72200, 0x27ee20f4, 0x66a20000, 0x38f121fc, 0x1d0a25fb, + 0x33e327f7, 0x34de45c6, 0x43c12cfb, 0x200737e3, 0x20010000, 0x1b2421e7, + 0x22e224e4, 0x26e426e5, 0x22ee23f0, 0x22f220f8, 0x25fa2300, 0x1e0a1c12, + 0x1a191d29, 0x004b0248, 0x084d0e23, 0x121f1123, 0x151e112d, 0x142a122d, + 0x1b1a1036, 0x07421038, 0x0b490a43, 0xf674e970, 0xf147f93d, 0x0035fb42, + 0xf54df750, 0xf754f657, 0xde7feb65, 0xfd27fb35, 0xf93df54b, 0xf14def5b, + 0xe76be76f, 0xe47af54c, 0xf62cf634, 0xf639f73a, 0xf048f945, 0xfc45fb4a, + 0xf7560242, 0xf7220120, 0x0b1f0534, 0xfe37fe43, 0x0049f859, 0x03340704, + 0x0a081108, 0x10130325, 0xff3dfb49, 0xff46fc4e, 0x0000eb7e, 0xe97cec6e, + 0xe67ee77c, 0xef69e579, 0xe575ef66, 0xe675e574, 0xdf7af65f, 0xf264f85f, + 0xef6fe472, 0xfa59fe50, 0xfc52f755, 0xf851ff48, 0x05400143, 0x09380045, + 0x01450745, 0xf945fa43, 0xf04dfe40, 0x023dfa43, 0xfd400239, 0xfd41fd42, + 0x003e0933, 0xff42fe47, 0xfe4bff46, 0xf7480e3c, 0x1025002f, 0x12230b25, + 0x0c290a29, 0x02300c29, 0x0d29003b, 0x03321328, 0x03421232, 0x13fa12fa, + 0x0e001af4, 0x1ff021e7, 0x21ea25e4, 0x27e22ae2, 0x2fd62ddc, 0x31de29ef, + 0x200945b9, 0x3fc142c0, 0x4db636d9, 0x34dd29f6, 0x240028ff, 0x1e0e1c1a, + 0x17250c37, 0x0b4125df, 0x27dc28db, 0x26e22edf, 0x2ae228e8, 0x31e326f4, + 0x28f626fd, 0x2efb1f14, 0x1d1e192c, 0x0c300b31, 0x1a2d1616, 0x17161b15, + 0x21141a1c, 0x1e181b22, 0x122a1927, 0x12320c46, 0x15360e47, 0x0b531920, + 0x15311536, 0xfb55fa51, 0xf64df951, 0xef50ee49, 0xfc4af653, 0xf747f743, + 0xff3df842, 0xf242003b, 0x023b11f6, 0x20f32af7, 0x31fb3500, 0x4003440a, + 0x421b2f39, 0xfb470018, 0xff24fe2a, 0xfe34f739, 0xfa3ffc41, 0xfc43f952, + 0xfd51fd4c, 0xf948fa4e, 0xf448f244, 0xfd46fa4c, 0xfb42fb3e, 0x0039fc3d, + 0xf73c0136, 0x023a11f6, 0x20f32af7, 0x31fb3500, 0x4003440a, 0x421b2f39, + 0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07331d10, + 0x19000e00, 0xf633fd3e, 0xe5631a10, 0xfc55e866, 0x05390639, 0xef490e39, + 0x1428140a, 0x1d003600, 0x252a0c61, 0xe07fea75, 0xfe4afc55, 0xe8660539, + 0xfa5df258, 0xfa2c0437, 0xf559f167, 0xeb741339, 0x143a0454, 0x0660013f, + 0xfb55f36a, 0x053f064b, 0xfd5aff65, 0x0337fc4f, 0xfe4bf461, 0xf932013c, + 0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948, 0x0d29033e, 0x0722f758, + 0xec7fdc7f, 0xef5bf25f, 0xe754e756, 0xf459ef5b, 0xe17ff24c, 0xee67f35a, + 0xdb7f0b50, 0x054c0254, 0x054efa37, 0x043df253, 0xdb7ffb4f, 0xf568f55b, + 0xe27f0041, 0xfe4f0048, 0xfc5cfa38, 0x0344f847, 0xf362fc56, 0xf458fb52, + 0xfd48fc43, 0xf848f059, 0xf745ff3b, 0x05420439, 0xfc47fe47, 0x023aff4a, + 0xfc2cff45, 0x003ef933, 0xfc2ffa2a, 0xfd29fa35, 0x084cf74e, 0xf5530934, + 0x0043fb5a, 0x0143f148, 0xfb4bf850, 0xeb53eb40, 0xf31fe740, 0xe35e094b, + 0x113ff84a, 0xfb23fe1b, 0x0d5b0341, 0xf945084d, 0xf642033e, 0xfd44ec51, + 0x001e0107, 0xfd17eb4a, 0x1042e97c, 0x11252cee, 0x32deea7f, 0x0427002a, + 0x07220b1d, 0x081f0625, 0x072a0328, 0x08210d2b, 0x0d24042f, 0x0337023a, + 0x063c082c, 0x0b2c0e2a, 0x07300438, 0x04340d25, 0x0931133a, 0x0a300c2d, + 0x00451421, 0x083f23ee, 0x21e71cfd, 0x180a1b00, 0x22f234d4, 0x27e81311, + 0x1f19241d, 0x1821220f, 0x1e141649, 0x1422131f, 0x1b2c1310, 0x0f240f24, + 0x151c1915, 0x1e141f0c, 0x1b10182a, 0x005d0e38, 0x0f391a26, 0xe87fe873, + 0xea52f73e, 0x0035003b, 0xf255f359, 0xf35ef55c, 0xe37feb64, 0xf239f443, + 0xf547f64d, 0xeb55f058, 0xe968f162, 0xdb7ff652, 0xf830f83d, 0xf842f946, + 0xf24bf64f, 0xf753f45c, 0xee6cfc4f, 0xea45f04b, 0xfe3a013a, 0xf34ef753, + 0xfc51f363, 0xf351fa26, 0xf33efa3a, 0xfe3bf049, 0xf64cf356, 0xf753f657, + 0x0000ea7f, 0xe77fe778, 0xe57fed72, 0xe975e776, 0xe675e871, 0xe476e178, + 0xdb7cf65e, 0xf166f663, 0xf36ace7f, 0xfb5c1139, 0xfb56f35e, 0xf45bfe4d, + 0x0047ff49, 0x0440f951, 0x05400f39, 0x01430044, 0xf6430144, 0x004d0240, + 0x0044fb4e, 0x0737053b, 0x02410e36, 0x0f2c053c, 0x0246fe4c, 0xee560c46, + 0x0540f446, 0x0b370538, 0x00450241, 0xfa4a0536, 0x0736fa4c, 0xf552fe4d, + 0xfe4d192a, 0x11f310f7, 0x11f41beb, 0x25e229d8, 0x2ad730d1, 0x27e02ed8, + 0x34cd2ed7, 0x34d92bed, 0x200b3dc9, 0x38d23ece, 0x51bd2dec, 0x23fe1c0f, + 0x22012701, 0x1e111426, 0x122d0f36, 0x004f24f0, 0x25f225ef, 0x2001220f, + 0x1d0f1819, 0x22161f10, 0x23121f1c, 0x2129241c, 0x1b2f153e, 0x121f131a, + 0x24181817, 0x1b10181e, 0x1f1d1629, 0x162a103c, 0x0f340e3c, 0x034ef07b, + 0x15351638, 0x193d1521, 0x1332113d, 0xfd4ef84a, 0xf748f648, 0xee4bf447, + 0xf53ffb46, 0xef4bf248, 0xf043f835, 0xf23bf734, 0xf54409fe, 0x1ef61ffc, + 0x21ff2107, 0x1f0c2517, 0x1f261440, 0xf747f925, 0xf82cf531, 0xf638f43b, + 0xf83ff743, 0xfa44f64f, 0xfd4ef84a, 0xf748f648, 0xee4bf447, 0xf53ffb46, + 0xef4bf248, 0xf043f835, 0xf23bf734, 0xf54409fe, 0x1ef61ffc, 0x21ff2107, + 0x1f0c2517, 0x1f261440 +}; + +static void +assemble_scaling_list(struct hantro_ctx *ctx) +{ + const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls; + const struct v4l2_ctrl_h264_scaling_matrix *scaling = ctrls->scaling; + const struct v4l2_ctrl_h264_pps *pps = ctrls->pps; + const size_t num_list_4x4 = ARRAY_SIZE(scaling->scaling_list_4x4); + const size_t list_len_4x4 = ARRAY_SIZE(scaling->scaling_list_4x4[0]); + const size_t list_len_8x8 = ARRAY_SIZE(scaling->scaling_list_8x8[0]); + struct hantro_h264_dec_priv_tbl *tbl = ctx->h264_dec.priv.cpu; + u32 *dst = (u32 *)tbl->scaling_list; + const u32 *src; + int i, j; + + if (!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT)) + return; + + for (i = 0; i < num_list_4x4; i++) { + src = (u32 *)&scaling->scaling_list_4x4[i]; + for (j = 0; j < list_len_4x4 / 4; j++) + *dst++ = swab32(src[j]); + } + + /* Only Intra/Inter Y lists */ + for (i = 0; i < 2; i++) { + src = (u32 *)&scaling->scaling_list_8x8[i]; + for (j = 0; j < list_len_8x8 / 4; j++) + *dst++ = swab32(src[j]); + } +} + +static void prepare_table(struct hantro_ctx *ctx) +{ + const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls; + const struct v4l2_ctrl_h264_decode_params *dec_param = ctrls->decode; + const struct v4l2_ctrl_h264_sps *sps = ctrls->sps; + struct hantro_h264_dec_priv_tbl *tbl = ctx->h264_dec.priv.cpu; + const struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb; + u32 dpb_longterm = 0; + u32 dpb_valid = 0; + int i; + + for (i = 0; i < HANTRO_H264_DPB_SIZE; ++i) { + tbl->poc[i * 2] = dpb[i].top_field_order_cnt; + tbl->poc[i * 2 + 1] = dpb[i].bottom_field_order_cnt; + + if (!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_VALID)) + continue; + + /* + * Set up bit maps of valid and long term DPBs. + * NOTE: The bits are reversed, i.e. MSb is DPB 0. For frame + * decoding, bit 31 to 15 are used, while for field decoding, + * all bits are used, with bit 31 being a top field, 30 a bottom + * field and so on. + */ + if (dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) { + if (dpb[i].fields & V4L2_H264_TOP_FIELD_REF) + dpb_valid |= REF_BIT(i * 2); + + if (dpb[i].fields & V4L2_H264_BOTTOM_FIELD_REF) + dpb_valid |= REF_BIT(i * 2 + 1); + + if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) { + dpb_longterm |= REF_BIT(i * 2); + dpb_longterm |= REF_BIT(i * 2 + 1); + } + } else { + dpb_valid |= REF_BIT(i); + + if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) + dpb_longterm |= REF_BIT(i); + } + } + ctx->h264_dec.dpb_valid = dpb_valid; + ctx->h264_dec.dpb_longterm = dpb_longterm; + + if ((dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) || + !(sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)) { + tbl->poc[32] = ctx->h264_dec.cur_poc; + tbl->poc[33] = 0; + } else { + tbl->poc[32] = dec_param->top_field_order_cnt; + tbl->poc[33] = dec_param->bottom_field_order_cnt; + } + + assemble_scaling_list(ctx); +} + +static bool dpb_entry_match(const struct v4l2_h264_dpb_entry *a, + const struct v4l2_h264_dpb_entry *b) +{ + return a->reference_ts == b->reference_ts; +} + +static void update_dpb(struct hantro_ctx *ctx) +{ + const struct v4l2_ctrl_h264_decode_params *dec_param; + DECLARE_BITMAP(new, ARRAY_SIZE(dec_param->dpb)) = { 0, }; + DECLARE_BITMAP(used, ARRAY_SIZE(dec_param->dpb)) = { 0, }; + unsigned int i, j; + + dec_param = ctx->h264_dec.ctrls.decode; + + /* Disable all entries by default. */ + for (i = 0; i < ARRAY_SIZE(ctx->h264_dec.dpb); i++) + ctx->h264_dec.dpb[i].flags = 0; + + /* Try to match new DPB entries with existing ones by their POCs. */ + for (i = 0; i < ARRAY_SIZE(dec_param->dpb); i++) { + const struct v4l2_h264_dpb_entry *ndpb = &dec_param->dpb[i]; + + if (!(ndpb->flags & V4L2_H264_DPB_ENTRY_FLAG_VALID)) + continue; + + /* + * To cut off some comparisons, iterate only on target DPB + * entries which are not used yet. + */ + for_each_clear_bit(j, used, ARRAY_SIZE(ctx->h264_dec.dpb)) { + struct v4l2_h264_dpb_entry *cdpb; + + cdpb = &ctx->h264_dec.dpb[j]; + if (!dpb_entry_match(cdpb, ndpb)) + continue; + + *cdpb = *ndpb; + set_bit(j, used); + break; + } + + if (j == ARRAY_SIZE(ctx->h264_dec.dpb)) + set_bit(i, new); + } + + /* For entries that could not be matched, use remaining free slots. */ + for_each_set_bit(i, new, ARRAY_SIZE(dec_param->dpb)) { + const struct v4l2_h264_dpb_entry *ndpb = &dec_param->dpb[i]; + struct v4l2_h264_dpb_entry *cdpb; + + /* + * Both arrays are of the same sizes, so there is no way + * we can end up with no space in target array, unless + * something is buggy. + */ + j = find_first_zero_bit(used, ARRAY_SIZE(ctx->h264_dec.dpb)); + if (WARN_ON(j >= ARRAY_SIZE(ctx->h264_dec.dpb))) + return; + + cdpb = &ctx->h264_dec.dpb[j]; + *cdpb = *ndpb; + set_bit(j, used); + } +} + +dma_addr_t hantro_h264_get_ref_buf(struct hantro_ctx *ctx, + unsigned int dpb_idx) +{ + struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb; + dma_addr_t dma_addr = 0; + s32 cur_poc = ctx->h264_dec.cur_poc; + u32 flags; + + if (dpb[dpb_idx].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE) + dma_addr = hantro_get_ref(ctx, dpb[dpb_idx].reference_ts); + + if (!dma_addr) { + struct vb2_v4l2_buffer *dst_buf; + struct vb2_buffer *buf; + + /* + * If a DPB entry is unused or invalid, address of current + * destination buffer is returned. + */ + dst_buf = hantro_get_dst_buf(ctx); + buf = &dst_buf->vb2_buf; + dma_addr = hantro_get_dec_buf_addr(ctx, buf); + } + + flags = dpb[dpb_idx].flags & V4L2_H264_DPB_ENTRY_FLAG_FIELD ? 0x2 : 0; + flags |= abs(dpb[dpb_idx].top_field_order_cnt - cur_poc) < + abs(dpb[dpb_idx].bottom_field_order_cnt - cur_poc) ? + 0x1 : 0; + + return dma_addr | flags; +} + +u16 hantro_h264_get_ref_nbr(struct hantro_ctx *ctx, unsigned int dpb_idx) +{ + const struct v4l2_h264_dpb_entry *dpb = &ctx->h264_dec.dpb[dpb_idx]; + + if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) + return 0; + return dpb->frame_num; +} + +/* + * Removes all references with the same parity as the current picture from the + * reference list. The remaining list will have references with the opposite + * parity. This is effectively a deduplication of references since each buffer + * stores two fields. For this reason, each buffer is found twice in the + * reference list. + * + * This technique has been chosen through trial and error. This simple approach + * resulted in the highest conformance score. Note that this method may suffer + * worse quality in the case an opposite reference frame has been lost. If this + * becomes a problem in the future, it should be possible to add a preprocessing + * to identify un-paired fields and avoid removing them. + */ +static void deduplicate_reflist(struct v4l2_h264_reflist_builder *b, + struct v4l2_h264_reference *reflist) +{ + int write_idx = 0; + int i; + + if (b->cur_pic_fields == V4L2_H264_FRAME_REF) { + write_idx = b->num_valid; + goto done; + } + + for (i = 0; i < b->num_valid; i++) { + if (!(b->cur_pic_fields == reflist[i].fields)) { + reflist[write_idx++] = reflist[i]; + continue; + } + } + +done: + /* Should not happen unless we have a bug in the reflist builder. */ + if (WARN_ON(write_idx > 16)) + write_idx = 16; + + /* Clear the remaining, some streams fails otherwise */ + for (; write_idx < 16; write_idx++) + reflist[write_idx].index = 15; +} + +int hantro_h264_dec_prepare_run(struct hantro_ctx *ctx) +{ + struct hantro_h264_dec_hw_ctx *h264_ctx = &ctx->h264_dec; + struct hantro_h264_dec_ctrls *ctrls = &h264_ctx->ctrls; + struct v4l2_h264_reflist_builder reflist_builder; + + hantro_start_prepare_run(ctx); + + ctrls->scaling = + hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_SCALING_MATRIX); + if (WARN_ON(!ctrls->scaling)) + return -EINVAL; + + ctrls->decode = + hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_DECODE_PARAMS); + if (WARN_ON(!ctrls->decode)) + return -EINVAL; + + ctrls->sps = + hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_SPS); + if (WARN_ON(!ctrls->sps)) + return -EINVAL; + + ctrls->pps = + hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_PPS); + if (WARN_ON(!ctrls->pps)) + return -EINVAL; + + /* Update the DPB with new refs. */ + update_dpb(ctx); + + /* Build the P/B{0,1} ref lists. */ + v4l2_h264_init_reflist_builder(&reflist_builder, ctrls->decode, + ctrls->sps, ctx->h264_dec.dpb); + h264_ctx->cur_poc = reflist_builder.cur_pic_order_count; + + /* Prepare data in memory. */ + prepare_table(ctx); + + v4l2_h264_build_p_ref_list(&reflist_builder, h264_ctx->reflists.p); + v4l2_h264_build_b_ref_lists(&reflist_builder, h264_ctx->reflists.b0, + h264_ctx->reflists.b1); + + /* + * Reduce ref lists to at most 16 entries, Hantro hardware will deduce + * the actual picture lists in field through the dpb_valid, + * dpb_longterm bitmap along with the current frame parity. + */ + if (reflist_builder.cur_pic_fields != V4L2_H264_FRAME_REF) { + deduplicate_reflist(&reflist_builder, h264_ctx->reflists.p); + deduplicate_reflist(&reflist_builder, h264_ctx->reflists.b0); + deduplicate_reflist(&reflist_builder, h264_ctx->reflists.b1); + } + + return 0; +} + +void hantro_h264_dec_exit(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + struct hantro_h264_dec_hw_ctx *h264_dec = &ctx->h264_dec; + struct hantro_aux_buf *priv = &h264_dec->priv; + + dma_free_coherent(vpu->dev, priv->size, priv->cpu, priv->dma); +} + +int hantro_h264_dec_init(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + struct hantro_h264_dec_hw_ctx *h264_dec = &ctx->h264_dec; + struct hantro_aux_buf *priv = &h264_dec->priv; + struct hantro_h264_dec_priv_tbl *tbl; + + priv->cpu = dma_alloc_coherent(vpu->dev, sizeof(*tbl), &priv->dma, + GFP_KERNEL); + if (!priv->cpu) + return -ENOMEM; + + priv->size = sizeof(*tbl); + tbl = priv->cpu; + memcpy(tbl->cabac_table, h264_cabac_table, sizeof(tbl->cabac_table)); + + return 0; +} diff --git a/drivers/media/platform/verisilicon/hantro_hevc.c b/drivers/media/platform/verisilicon/hantro_hevc.c new file mode 100644 index 000000000..9383fb708 --- /dev/null +++ b/drivers/media/platform/verisilicon/hantro_hevc.c @@ -0,0 +1,284 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hantro VPU HEVC codec driver + * + * Copyright (C) 2020 Safran Passenger Innovations LLC + */ + +#include <linux/types.h> +#include <media/v4l2-mem2mem.h> + +#include "hantro.h" +#include "hantro_hw.h" + +#define VERT_FILTER_RAM_SIZE 8 /* bytes per pixel row */ +/* + * BSD control data of current picture at tile border + * 128 bits per 4x4 tile = 128/(8*4) bytes per row + */ +#define BSD_CTRL_RAM_SIZE 4 /* bytes per pixel row */ +/* tile border coefficients of filter */ +#define VERT_SAO_RAM_SIZE 48 /* bytes per pixel */ + +#define SCALING_LIST_SIZE (16 * 64) + +#define MAX_TILE_COLS 20 +#define MAX_TILE_ROWS 22 + +void hantro_hevc_ref_init(struct hantro_ctx *ctx) +{ + struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec; + + hevc_dec->ref_bufs_used = 0; +} + +dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx, + s32 poc) +{ + struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec; + int i; + + /* Find the reference buffer in already known ones */ + for (i = 0; i < NUM_REF_PICTURES; i++) { + if (hevc_dec->ref_bufs_poc[i] == poc) { + hevc_dec->ref_bufs_used |= 1 << i; + return hevc_dec->ref_bufs[i].dma; + } + } + + return 0; +} + +int hantro_hevc_add_ref_buf(struct hantro_ctx *ctx, int poc, dma_addr_t addr) +{ + struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec; + int i; + + /* Add a new reference buffer */ + for (i = 0; i < NUM_REF_PICTURES; i++) { + if (!(hevc_dec->ref_bufs_used & 1 << i)) { + hevc_dec->ref_bufs_used |= 1 << i; + hevc_dec->ref_bufs_poc[i] = poc; + hevc_dec->ref_bufs[i].dma = addr; + return 0; + } + } + + return -EINVAL; +} + +static int tile_buffer_reallocate(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec; + const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls; + const struct v4l2_ctrl_hevc_pps *pps = ctrls->pps; + const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps; + unsigned int num_tile_cols = pps->num_tile_columns_minus1 + 1; + unsigned int height64 = (sps->pic_height_in_luma_samples + 63) & ~63; + unsigned int size; + + if (num_tile_cols <= 1 || + num_tile_cols <= hevc_dec->num_tile_cols_allocated) + return 0; + + /* Need to reallocate due to tiles passed via PPS */ + if (hevc_dec->tile_filter.cpu) { + dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size, + hevc_dec->tile_filter.cpu, + hevc_dec->tile_filter.dma); + hevc_dec->tile_filter.cpu = NULL; + } + + if (hevc_dec->tile_sao.cpu) { + dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size, + hevc_dec->tile_sao.cpu, + hevc_dec->tile_sao.dma); + hevc_dec->tile_sao.cpu = NULL; + } + + if (hevc_dec->tile_bsd.cpu) { + dma_free_coherent(vpu->dev, hevc_dec->tile_bsd.size, + hevc_dec->tile_bsd.cpu, + hevc_dec->tile_bsd.dma); + hevc_dec->tile_bsd.cpu = NULL; + } + + size = (VERT_FILTER_RAM_SIZE * height64 * (num_tile_cols - 1) * ctx->bit_depth) / 8; + hevc_dec->tile_filter.cpu = dma_alloc_coherent(vpu->dev, size, + &hevc_dec->tile_filter.dma, + GFP_KERNEL); + if (!hevc_dec->tile_filter.cpu) + goto err_free_tile_buffers; + hevc_dec->tile_filter.size = size; + + size = (VERT_SAO_RAM_SIZE * height64 * (num_tile_cols - 1) * ctx->bit_depth) / 8; + hevc_dec->tile_sao.cpu = dma_alloc_coherent(vpu->dev, size, + &hevc_dec->tile_sao.dma, + GFP_KERNEL); + if (!hevc_dec->tile_sao.cpu) + goto err_free_tile_buffers; + hevc_dec->tile_sao.size = size; + + size = BSD_CTRL_RAM_SIZE * height64 * (num_tile_cols - 1); + hevc_dec->tile_bsd.cpu = dma_alloc_coherent(vpu->dev, size, + &hevc_dec->tile_bsd.dma, + GFP_KERNEL); + if (!hevc_dec->tile_bsd.cpu) + goto err_free_tile_buffers; + hevc_dec->tile_bsd.size = size; + + hevc_dec->num_tile_cols_allocated = num_tile_cols; + + return 0; + +err_free_tile_buffers: + if (hevc_dec->tile_filter.cpu) + dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size, + hevc_dec->tile_filter.cpu, + hevc_dec->tile_filter.dma); + hevc_dec->tile_filter.cpu = NULL; + + if (hevc_dec->tile_sao.cpu) + dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size, + hevc_dec->tile_sao.cpu, + hevc_dec->tile_sao.dma); + hevc_dec->tile_sao.cpu = NULL; + + if (hevc_dec->tile_bsd.cpu) + dma_free_coherent(vpu->dev, hevc_dec->tile_bsd.size, + hevc_dec->tile_bsd.cpu, + hevc_dec->tile_bsd.dma); + hevc_dec->tile_bsd.cpu = NULL; + + return -ENOMEM; +} + +static int hantro_hevc_validate_sps(struct hantro_ctx *ctx, const struct v4l2_ctrl_hevc_sps *sps) +{ + /* + * for tile pixel format check if the width and height match + * hardware constraints + */ + if (ctx->vpu_dst_fmt->fourcc == V4L2_PIX_FMT_NV12_4L4) { + if (ctx->dst_fmt.width != + ALIGN(sps->pic_width_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_width)) + return -EINVAL; + + if (ctx->dst_fmt.height != + ALIGN(sps->pic_height_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_height)) + return -EINVAL; + } + + return 0; +} + +int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx) +{ + struct hantro_hevc_dec_hw_ctx *hevc_ctx = &ctx->hevc_dec; + struct hantro_hevc_dec_ctrls *ctrls = &hevc_ctx->ctrls; + int ret; + + hantro_start_prepare_run(ctx); + + ctrls->decode_params = + hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_DECODE_PARAMS); + if (WARN_ON(!ctrls->decode_params)) + return -EINVAL; + + ctrls->scaling = + hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_SCALING_MATRIX); + if (WARN_ON(!ctrls->scaling)) + return -EINVAL; + + ctrls->sps = + hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_SPS); + if (WARN_ON(!ctrls->sps)) + return -EINVAL; + + ret = hantro_hevc_validate_sps(ctx, ctrls->sps); + if (ret) + return ret; + + ctrls->pps = + hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_PPS); + if (WARN_ON(!ctrls->pps)) + return -EINVAL; + + ret = tile_buffer_reallocate(ctx); + if (ret) + return ret; + + return 0; +} + +void hantro_hevc_dec_exit(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec; + + if (hevc_dec->tile_sizes.cpu) + dma_free_coherent(vpu->dev, hevc_dec->tile_sizes.size, + hevc_dec->tile_sizes.cpu, + hevc_dec->tile_sizes.dma); + hevc_dec->tile_sizes.cpu = NULL; + + if (hevc_dec->scaling_lists.cpu) + dma_free_coherent(vpu->dev, hevc_dec->scaling_lists.size, + hevc_dec->scaling_lists.cpu, + hevc_dec->scaling_lists.dma); + hevc_dec->scaling_lists.cpu = NULL; + + if (hevc_dec->tile_filter.cpu) + dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size, + hevc_dec->tile_filter.cpu, + hevc_dec->tile_filter.dma); + hevc_dec->tile_filter.cpu = NULL; + + if (hevc_dec->tile_sao.cpu) + dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size, + hevc_dec->tile_sao.cpu, + hevc_dec->tile_sao.dma); + hevc_dec->tile_sao.cpu = NULL; + + if (hevc_dec->tile_bsd.cpu) + dma_free_coherent(vpu->dev, hevc_dec->tile_bsd.size, + hevc_dec->tile_bsd.cpu, + hevc_dec->tile_bsd.dma); + hevc_dec->tile_bsd.cpu = NULL; +} + +int hantro_hevc_dec_init(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec; + unsigned int size; + + memset(hevc_dec, 0, sizeof(*hevc_dec)); + + /* + * Maximum number of tiles times width and height (2 bytes each), + * rounding up to next 16 bytes boundary + one extra 16 byte + * chunk (HW guys wanted to have this). + */ + size = round_up(MAX_TILE_COLS * MAX_TILE_ROWS * 4 * sizeof(u16) + 16, 16); + hevc_dec->tile_sizes.cpu = dma_alloc_coherent(vpu->dev, size, + &hevc_dec->tile_sizes.dma, + GFP_KERNEL); + if (!hevc_dec->tile_sizes.cpu) + return -ENOMEM; + + hevc_dec->tile_sizes.size = size; + + hevc_dec->scaling_lists.cpu = dma_alloc_coherent(vpu->dev, SCALING_LIST_SIZE, + &hevc_dec->scaling_lists.dma, + GFP_KERNEL); + if (!hevc_dec->scaling_lists.cpu) + return -ENOMEM; + + hevc_dec->scaling_lists.size = SCALING_LIST_SIZE; + + hantro_hevc_ref_init(ctx); + + return 0; +} diff --git a/drivers/media/platform/verisilicon/hantro_hw.h b/drivers/media/platform/verisilicon/hantro_hw.h new file mode 100644 index 000000000..e83f0c523 --- /dev/null +++ b/drivers/media/platform/verisilicon/hantro_hw.h @@ -0,0 +1,441 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Hantro VPU codec driver + * + * Copyright 2018 Google LLC. + * Tomasz Figa <tfiga@chromium.org> + */ + +#ifndef HANTRO_HW_H_ +#define HANTRO_HW_H_ + +#include <linux/interrupt.h> +#include <linux/v4l2-controls.h> +#include <media/v4l2-ctrls.h> +#include <media/v4l2-vp9.h> +#include <media/videobuf2-core.h> + +#define DEC_8190_ALIGN_MASK 0x07U + +#define MB_DIM 16 +#define TILE_MB_DIM 4 +#define MB_WIDTH(w) DIV_ROUND_UP(w, MB_DIM) +#define MB_HEIGHT(h) DIV_ROUND_UP(h, MB_DIM) + +#define FMT_MIN_WIDTH 48 +#define FMT_MIN_HEIGHT 48 +#define FMT_HD_WIDTH 1280 +#define FMT_HD_HEIGHT 720 +#define FMT_FHD_WIDTH 1920 +#define FMT_FHD_HEIGHT 1088 +#define FMT_UHD_WIDTH 3840 +#define FMT_UHD_HEIGHT 2160 +#define FMT_4K_WIDTH 4096 +#define FMT_4K_HEIGHT 2304 + +#define NUM_REF_PICTURES (V4L2_HEVC_DPB_ENTRIES_NUM_MAX + 1) + +struct hantro_dev; +struct hantro_ctx; +struct hantro_buf; +struct hantro_variant; + +/** + * struct hantro_aux_buf - auxiliary DMA buffer for hardware data + * + * @cpu: CPU pointer to the buffer. + * @dma: DMA address of the buffer. + * @size: Size of the buffer. + * @attrs: Attributes of the DMA mapping. + */ +struct hantro_aux_buf { + void *cpu; + dma_addr_t dma; + size_t size; + unsigned long attrs; +}; + +/* Max. number of entries in the DPB (HW limitation). */ +#define HANTRO_H264_DPB_SIZE 16 + +/** + * struct hantro_h264_dec_ctrls + * + * @decode: Decode params + * @scaling: Scaling info + * @sps: SPS info + * @pps: PPS info + */ +struct hantro_h264_dec_ctrls { + const struct v4l2_ctrl_h264_decode_params *decode; + const struct v4l2_ctrl_h264_scaling_matrix *scaling; + const struct v4l2_ctrl_h264_sps *sps; + const struct v4l2_ctrl_h264_pps *pps; +}; + +/** + * struct hantro_h264_dec_reflists + * + * @p: P reflist + * @b0: B0 reflist + * @b1: B1 reflist + */ +struct hantro_h264_dec_reflists { + struct v4l2_h264_reference p[V4L2_H264_REF_LIST_LEN]; + struct v4l2_h264_reference b0[V4L2_H264_REF_LIST_LEN]; + struct v4l2_h264_reference b1[V4L2_H264_REF_LIST_LEN]; +}; + +/** + * struct hantro_h264_dec_hw_ctx + * + * @priv: Private auxiliary buffer for hardware. + * @dpb: DPB + * @reflists: P/B0/B1 reflists + * @ctrls: V4L2 controls attached to a run + * @dpb_longterm: DPB long-term + * @dpb_valid: DPB valid + * @cur_poc: Current picture order count + */ +struct hantro_h264_dec_hw_ctx { + struct hantro_aux_buf priv; + struct v4l2_h264_dpb_entry dpb[HANTRO_H264_DPB_SIZE]; + struct hantro_h264_dec_reflists reflists; + struct hantro_h264_dec_ctrls ctrls; + u32 dpb_longterm; + u32 dpb_valid; + s32 cur_poc; +}; + +/** + * struct hantro_hevc_dec_ctrls + * @decode_params: Decode params + * @scaling: Scaling matrix + * @sps: SPS info + * @pps: PPS info + * @hevc_hdr_skip_length: the number of data (in bits) to skip in the + * slice segment header syntax after 'slice type' + * token + */ +struct hantro_hevc_dec_ctrls { + const struct v4l2_ctrl_hevc_decode_params *decode_params; + const struct v4l2_ctrl_hevc_scaling_matrix *scaling; + const struct v4l2_ctrl_hevc_sps *sps; + const struct v4l2_ctrl_hevc_pps *pps; + u32 hevc_hdr_skip_length; +}; + +/** + * struct hantro_hevc_dec_hw_ctx + * @tile_sizes: Tile sizes buffer + * @tile_filter: Tile vertical filter buffer + * @tile_sao: Tile SAO buffer + * @tile_bsd: Tile BSD control buffer + * @ref_bufs: Internal reference buffers + * @scaling_lists: Scaling lists buffer + * @ref_bufs_poc: Internal reference buffers picture order count + * @ref_bufs_used: Bitfield of used reference buffers + * @ctrls: V4L2 controls attached to a run + * @num_tile_cols_allocated: number of allocated tiles + */ +struct hantro_hevc_dec_hw_ctx { + struct hantro_aux_buf tile_sizes; + struct hantro_aux_buf tile_filter; + struct hantro_aux_buf tile_sao; + struct hantro_aux_buf tile_bsd; + struct hantro_aux_buf ref_bufs[NUM_REF_PICTURES]; + struct hantro_aux_buf scaling_lists; + s32 ref_bufs_poc[NUM_REF_PICTURES]; + u32 ref_bufs_used; + struct hantro_hevc_dec_ctrls ctrls; + unsigned int num_tile_cols_allocated; +}; + +/** + * struct hantro_mpeg2_dec_hw_ctx + * + * @qtable: Quantization table + */ +struct hantro_mpeg2_dec_hw_ctx { + struct hantro_aux_buf qtable; +}; + +/** + * struct hantro_vp8_dec_hw_ctx + * + * @segment_map: Segment map buffer. + * @prob_tbl: Probability table buffer. + */ +struct hantro_vp8_dec_hw_ctx { + struct hantro_aux_buf segment_map; + struct hantro_aux_buf prob_tbl; +}; + +/** + * struct hantro_vp9_frame_info + * + * @valid: frame info valid flag + * @frame_context_idx: index of frame context + * @reference_mode: inter prediction type + * @tx_mode: transform mode + * @interpolation_filter: filter selection for inter prediction + * @flags: frame flags + * @timestamp: frame timestamp + */ +struct hantro_vp9_frame_info { + u32 valid : 1; + u32 frame_context_idx : 2; + u32 reference_mode : 2; + u32 tx_mode : 3; + u32 interpolation_filter : 3; + u32 flags; + u64 timestamp; +}; + +#define MAX_SB_COLS 64 +#define MAX_SB_ROWS 34 + +/** + * struct hantro_vp9_dec_hw_ctx + * + * @tile_edge: auxiliary DMA buffer for tile edge processing + * @segment_map: auxiliary DMA buffer for segment map + * @misc: auxiliary DMA buffer for tile info, probabilities and hw counters + * @cnts: vp9 library struct for abstracting hw counters access + * @probability_tables: VP9 probability tables implied by the spec + * @frame_context: VP9 frame contexts + * @cur: current frame information + * @last: last frame information + * @bsd_ctrl_offset: bsd offset into tile_edge + * @segment_map_size: size of segment map + * @ctx_counters_offset: hw counters offset into misc + * @tile_info_offset: tile info offset into misc + * @tile_r_info: per-tile information array + * @tile_c_info: per-tile information array + * @last_tile_r: last number of tile rows + * @last_tile_c: last number of tile cols + * @last_sbs_r: last number of superblock rows + * @last_sbs_c: last number of superblock cols + * @active_segment: number of active segment (alternating between 0 and 1) + * @feature_enabled: segmentation feature enabled flags + * @feature_data: segmentation feature data + */ +struct hantro_vp9_dec_hw_ctx { + struct hantro_aux_buf tile_edge; + struct hantro_aux_buf segment_map; + struct hantro_aux_buf misc; + struct v4l2_vp9_frame_symbol_counts cnts; + struct v4l2_vp9_frame_context probability_tables; + struct v4l2_vp9_frame_context frame_context[4]; + struct hantro_vp9_frame_info cur; + struct hantro_vp9_frame_info last; + + unsigned int bsd_ctrl_offset; + unsigned int segment_map_size; + unsigned int ctx_counters_offset; + unsigned int tile_info_offset; + + unsigned short tile_r_info[MAX_SB_ROWS]; + unsigned short tile_c_info[MAX_SB_COLS]; + unsigned int last_tile_r; + unsigned int last_tile_c; + unsigned int last_sbs_r; + unsigned int last_sbs_c; + + unsigned int active_segment; + u8 feature_enabled[8]; + s16 feature_data[8][4]; +}; + +/** + * struct hantro_postproc_ctx + * + * @dec_q: References buffers, in decoder format. + */ +struct hantro_postproc_ctx { + struct hantro_aux_buf dec_q[VB2_MAX_FRAME]; +}; + +/** + * struct hantro_postproc_ops - post-processor operations + * + * @enable: Enable the post-processor block. Optional. + * @disable: Disable the post-processor block. Optional. + * @enum_framesizes: Enumerate possible scaled output formats. + * Returns zero if OK, a negative value in error cases. + * Optional. + */ +struct hantro_postproc_ops { + void (*enable)(struct hantro_ctx *ctx); + void (*disable)(struct hantro_ctx *ctx); + int (*enum_framesizes)(struct hantro_ctx *ctx, struct v4l2_frmsizeenum *fsize); +}; + +/** + * struct hantro_codec_ops - codec mode specific operations + * + * @init: If needed, can be used for initialization. + * Optional and called from process context. + * @exit: If needed, can be used to undo the .init phase. + * Optional and called from process context. + * @run: Start single {en,de)coding job. Called from atomic context + * to indicate that a pair of buffers is ready and the hardware + * should be programmed and started. Returns zero if OK, a + * negative value in error cases. + * @done: Read back processing results and additional data from hardware. + * @reset: Reset the hardware in case of a timeout. + */ +struct hantro_codec_ops { + int (*init)(struct hantro_ctx *ctx); + void (*exit)(struct hantro_ctx *ctx); + int (*run)(struct hantro_ctx *ctx); + void (*done)(struct hantro_ctx *ctx); + void (*reset)(struct hantro_ctx *ctx); +}; + +/** + * enum hantro_enc_fmt - source format ID for hardware registers. + * + * @ROCKCHIP_VPU_ENC_FMT_YUV420P: Y/CbCr 4:2:0 planar format + * @ROCKCHIP_VPU_ENC_FMT_YUV420SP: Y/CbCr 4:2:0 semi-planar format + * @ROCKCHIP_VPU_ENC_FMT_YUYV422: YUV 4:2:2 packed format (YUYV) + * @ROCKCHIP_VPU_ENC_FMT_UYVY422: YUV 4:2:2 packed format (UYVY) + */ +enum hantro_enc_fmt { + ROCKCHIP_VPU_ENC_FMT_YUV420P = 0, + ROCKCHIP_VPU_ENC_FMT_YUV420SP = 1, + ROCKCHIP_VPU_ENC_FMT_YUYV422 = 2, + ROCKCHIP_VPU_ENC_FMT_UYVY422 = 3, +}; + +extern const struct hantro_variant imx8mm_vpu_g1_variant; +extern const struct hantro_variant imx8mq_vpu_g1_variant; +extern const struct hantro_variant imx8mq_vpu_g2_variant; +extern const struct hantro_variant imx8mq_vpu_variant; +extern const struct hantro_variant px30_vpu_variant; +extern const struct hantro_variant rk3036_vpu_variant; +extern const struct hantro_variant rk3066_vpu_variant; +extern const struct hantro_variant rk3288_vpu_variant; +extern const struct hantro_variant rk3328_vpu_variant; +extern const struct hantro_variant rk3399_vpu_variant; +extern const struct hantro_variant rk3568_vepu_variant; +extern const struct hantro_variant rk3568_vpu_variant; +extern const struct hantro_variant sama5d4_vdec_variant; +extern const struct hantro_variant sunxi_vpu_variant; + +extern const struct hantro_postproc_ops hantro_g1_postproc_ops; +extern const struct hantro_postproc_ops hantro_g2_postproc_ops; + +extern const u32 hantro_vp8_dec_mc_filter[8][6]; + +void hantro_watchdog(struct work_struct *work); +void hantro_run(struct hantro_ctx *ctx); +void hantro_irq_done(struct hantro_dev *vpu, + enum vb2_buffer_state result); +void hantro_start_prepare_run(struct hantro_ctx *ctx); +void hantro_end_prepare_run(struct hantro_ctx *ctx); + +irqreturn_t hantro_g1_irq(int irq, void *dev_id); +void hantro_g1_reset(struct hantro_ctx *ctx); + +int hantro_h1_jpeg_enc_run(struct hantro_ctx *ctx); +int rockchip_vpu2_jpeg_enc_run(struct hantro_ctx *ctx); +void hantro_h1_jpeg_enc_done(struct hantro_ctx *ctx); +void rockchip_vpu2_jpeg_enc_done(struct hantro_ctx *ctx); + +dma_addr_t hantro_h264_get_ref_buf(struct hantro_ctx *ctx, + unsigned int dpb_idx); +u16 hantro_h264_get_ref_nbr(struct hantro_ctx *ctx, + unsigned int dpb_idx); +int hantro_h264_dec_prepare_run(struct hantro_ctx *ctx); +int rockchip_vpu2_h264_dec_run(struct hantro_ctx *ctx); +int hantro_g1_h264_dec_run(struct hantro_ctx *ctx); +int hantro_h264_dec_init(struct hantro_ctx *ctx); +void hantro_h264_dec_exit(struct hantro_ctx *ctx); + +int hantro_hevc_dec_init(struct hantro_ctx *ctx); +void hantro_hevc_dec_exit(struct hantro_ctx *ctx); +int hantro_g2_hevc_dec_run(struct hantro_ctx *ctx); +int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx); +void hantro_hevc_ref_init(struct hantro_ctx *ctx); +dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx, s32 poc); +int hantro_hevc_add_ref_buf(struct hantro_ctx *ctx, int poc, dma_addr_t addr); + + +static inline unsigned short hantro_vp9_num_sbs(unsigned short dimension) +{ + return (dimension + 63) / 64; +} + +static inline size_t +hantro_vp9_mv_size(unsigned int width, unsigned int height) +{ + int num_ctbs; + + /* + * There can be up to (CTBs x 64) number of blocks, + * and the motion vector for each block needs 16 bytes. + */ + num_ctbs = hantro_vp9_num_sbs(width) * hantro_vp9_num_sbs(height); + return (num_ctbs * 64) * 16; +} + +static inline size_t +hantro_h264_mv_size(unsigned int width, unsigned int height) +{ + /* + * A decoded 8-bit 4:2:0 NV12 frame may need memory for up to + * 448 bytes per macroblock with additional 32 bytes on + * multi-core variants. + * + * The H264 decoder needs extra space on the output buffers + * to store motion vectors. This is needed for reference + * frames and only if the format is non-post-processed NV12. + * + * Memory layout is as follow: + * + * +---------------------------+ + * | Y-plane 256 bytes x MBs | + * +---------------------------+ + * | UV-plane 128 bytes x MBs | + * +---------------------------+ + * | MV buffer 64 bytes x MBs | + * +---------------------------+ + * | MC sync 32 bytes | + * +---------------------------+ + */ + return 64 * MB_WIDTH(width) * MB_WIDTH(height) + 32; +} + +static inline size_t +hantro_hevc_mv_size(unsigned int width, unsigned int height) +{ + /* + * A CTB can be 64x64, 32x32 or 16x16. + * Allocated memory for the "worse" case: 16x16 + */ + return width * height / 16; +} + +int hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx); +int rockchip_vpu2_mpeg2_dec_run(struct hantro_ctx *ctx); +void hantro_mpeg2_dec_copy_qtable(u8 *qtable, + const struct v4l2_ctrl_mpeg2_quantisation *ctrl); +int hantro_mpeg2_dec_init(struct hantro_ctx *ctx); +void hantro_mpeg2_dec_exit(struct hantro_ctx *ctx); + +int hantro_g1_vp8_dec_run(struct hantro_ctx *ctx); +int rockchip_vpu2_vp8_dec_run(struct hantro_ctx *ctx); +int hantro_vp8_dec_init(struct hantro_ctx *ctx); +void hantro_vp8_dec_exit(struct hantro_ctx *ctx); +void hantro_vp8_prob_update(struct hantro_ctx *ctx, + const struct v4l2_ctrl_vp8_frame *hdr); + +int hantro_g2_vp9_dec_run(struct hantro_ctx *ctx); +void hantro_g2_vp9_dec_done(struct hantro_ctx *ctx); +int hantro_vp9_dec_init(struct hantro_ctx *ctx); +void hantro_vp9_dec_exit(struct hantro_ctx *ctx); +void hantro_g2_check_idle(struct hantro_dev *vpu); +irqreturn_t hantro_g2_irq(int irq, void *dev_id); + +#endif /* HANTRO_HW_H_ */ diff --git a/drivers/media/platform/verisilicon/hantro_jpeg.c b/drivers/media/platform/verisilicon/hantro_jpeg.c new file mode 100644 index 000000000..d07b1b449 --- /dev/null +++ b/drivers/media/platform/verisilicon/hantro_jpeg.c @@ -0,0 +1,348 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) Collabora, Ltd. + * + * Based on GSPCA and CODA drivers: + * Copyright (C) Jean-Francois Moine (http://moinejf.free.fr) + * Copyright (C) 2014 Philipp Zabel, Pengutronix + */ + +#include <linux/align.h> +#include <linux/build_bug.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include "hantro_jpeg.h" +#include "hantro.h" + +#define LUMA_QUANT_OFF 25 +#define CHROMA_QUANT_OFF 90 +#define HEIGHT_OFF 159 +#define WIDTH_OFF 161 + +#define HUFF_LUMA_DC_OFF 178 +#define HUFF_LUMA_AC_OFF 211 +#define HUFF_CHROMA_DC_OFF 394 +#define HUFF_CHROMA_AC_OFF 427 + +/* Default tables from JPEG ITU-T.81 + * (ISO/IEC 10918-1) Annex K, tables K.1 and K.2 + */ +static const unsigned char luma_q_table[] = { + 0x10, 0x0b, 0x0a, 0x10, 0x18, 0x28, 0x33, 0x3d, + 0x0c, 0x0c, 0x0e, 0x13, 0x1a, 0x3a, 0x3c, 0x37, + 0x0e, 0x0d, 0x10, 0x18, 0x28, 0x39, 0x45, 0x38, + 0x0e, 0x11, 0x16, 0x1d, 0x33, 0x57, 0x50, 0x3e, + 0x12, 0x16, 0x25, 0x38, 0x44, 0x6d, 0x67, 0x4d, + 0x18, 0x23, 0x37, 0x40, 0x51, 0x68, 0x71, 0x5c, + 0x31, 0x40, 0x4e, 0x57, 0x67, 0x79, 0x78, 0x65, + 0x48, 0x5c, 0x5f, 0x62, 0x70, 0x64, 0x67, 0x63 +}; + +static const unsigned char chroma_q_table[] = { + 0x11, 0x12, 0x18, 0x2f, 0x63, 0x63, 0x63, 0x63, + 0x12, 0x15, 0x1a, 0x42, 0x63, 0x63, 0x63, 0x63, + 0x18, 0x1a, 0x38, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x2f, 0x42, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, + 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63 +}; + +static const unsigned char zigzag[] = { + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63 +}; + +static const u32 hw_reorder[] = { + 0, 8, 16, 24, 1, 9, 17, 25, + 32, 40, 48, 56, 33, 41, 49, 57, + 2, 10, 18, 26, 3, 11, 19, 27, + 34, 42, 50, 58, 35, 43, 51, 59, + 4, 12, 20, 28, 5, 13, 21, 29, + 36, 44, 52, 60, 37, 45, 53, 61, + 6, 14, 22, 30, 7, 15, 23, 31, + 38, 46, 54, 62, 39, 47, 55, 63 +}; + +/* Huffman tables are shared with CODA */ +static const unsigned char luma_dc_table[] = { + 0x00, 0x01, 0x05, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, +}; + +static const unsigned char chroma_dc_table[] = { + 0x00, 0x03, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, +}; + +static const unsigned char luma_ac_table[] = { + 0x00, 0x02, 0x01, 0x03, 0x03, 0x02, 0x04, 0x03, + 0x05, 0x05, 0x04, 0x04, 0x00, 0x00, 0x01, 0x7d, + 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, + 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, + 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08, + 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0, + 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16, + 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28, + 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, + 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, + 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, + 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, + 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, + 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, + 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, + 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, + 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, + 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2, + 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, + 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, + 0xf9, 0xfa, +}; + +static const unsigned char chroma_ac_table[] = { + 0x00, 0x02, 0x01, 0x02, 0x04, 0x04, 0x03, 0x04, + 0x07, 0x05, 0x04, 0x04, 0x00, 0x01, 0x02, 0x77, + 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, + 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71, + 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, + 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0, + 0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34, + 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26, + 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38, + 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, + 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, + 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, + 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, + 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, + 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, + 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, + 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, + 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, + 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, + 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, + 0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, + 0xf9, 0xfa, +}; + +/* For simplicity, we keep a pre-formatted JPEG header, + * and we'll use fixed offsets to change the width, height + * quantization tables, etc. + */ +static const unsigned char hantro_jpeg_header[] = { + /* SOI */ + 0xff, 0xd8, + + /* JFIF-APP0 */ + 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, + 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x00, 0x01, + 0x00, 0x00, + + /* DQT */ + 0xff, 0xdb, 0x00, 0x84, + + 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + /* SOF */ + 0xff, 0xc0, 0x00, 0x11, 0x08, 0x00, 0xf0, 0x01, + 0x40, 0x03, 0x01, 0x22, 0x00, 0x02, 0x11, 0x01, + 0x03, 0x11, 0x01, + + /* DHT */ + 0xff, 0xc4, 0x00, 0x1f, 0x00, + + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + /* DHT */ + 0xff, 0xc4, 0x00, 0xb5, 0x10, + + 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + /* DHT */ + 0xff, 0xc4, 0x00, 0x1f, 0x01, + + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + /* DHT */ + 0xff, 0xc4, 0x00, 0xb5, 0x11, + + 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + /* COM */ + 0xff, 0xfe, 0x00, 0x03, 0x00, + + /* SOS */ + 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, + 0x11, 0x03, 0x11, 0x00, 0x3f, 0x00, +}; + +/* + * JPEG_HEADER_SIZE is used in other parts of the driver in lieu of + * "sizeof(hantro_jpeg_header)". The two must be equal. + */ +static_assert(sizeof(hantro_jpeg_header) == JPEG_HEADER_SIZE); + +/* + * hantro_jpeg_header is padded with a COM segment, so that the payload + * of the SOS segment (the entropy-encoded image scan), which should + * trail the whole header, is 8-byte aligned for the hardware to write + * to directly. + */ +static_assert(IS_ALIGNED(sizeof(hantro_jpeg_header), 8), + "Hantro JPEG header size needs to be 8-byte aligned."); + +static unsigned char jpeg_scale_qp(const unsigned char qp, int scale) +{ + unsigned int temp; + + temp = DIV_ROUND_CLOSEST((unsigned int)qp * scale, 100); + if (temp <= 0) + temp = 1; + if (temp > 255) + temp = 255; + + return (unsigned char)temp; +} + +static void +jpeg_scale_quant_table(unsigned char *file_q_tab, + unsigned char *reordered_q_tab, + const unsigned char *tab, int scale) +{ + int i; + + BUILD_BUG_ON(ARRAY_SIZE(zigzag) != JPEG_QUANT_SIZE); + BUILD_BUG_ON(ARRAY_SIZE(hw_reorder) != JPEG_QUANT_SIZE); + + for (i = 0; i < JPEG_QUANT_SIZE; i++) { + file_q_tab[i] = jpeg_scale_qp(tab[zigzag[i]], scale); + reordered_q_tab[i] = jpeg_scale_qp(tab[hw_reorder[i]], scale); + } +} + +static void jpeg_set_quality(struct hantro_jpeg_ctx *ctx) +{ + int scale; + + /* + * Non-linear scaling factor: + * [5,50] -> [1000..100], [51,100] -> [98..0] + */ + if (ctx->quality < 50) + scale = 5000 / ctx->quality; + else + scale = 200 - 2 * ctx->quality; + + BUILD_BUG_ON(ARRAY_SIZE(luma_q_table) != JPEG_QUANT_SIZE); + BUILD_BUG_ON(ARRAY_SIZE(chroma_q_table) != JPEG_QUANT_SIZE); + BUILD_BUG_ON(ARRAY_SIZE(ctx->hw_luma_qtable) != JPEG_QUANT_SIZE); + BUILD_BUG_ON(ARRAY_SIZE(ctx->hw_chroma_qtable) != JPEG_QUANT_SIZE); + + jpeg_scale_quant_table(ctx->buffer + LUMA_QUANT_OFF, + ctx->hw_luma_qtable, luma_q_table, scale); + jpeg_scale_quant_table(ctx->buffer + CHROMA_QUANT_OFF, + ctx->hw_chroma_qtable, chroma_q_table, scale); +} + +void hantro_jpeg_header_assemble(struct hantro_jpeg_ctx *ctx) +{ + char *buf = ctx->buffer; + + memcpy(buf, hantro_jpeg_header, + sizeof(hantro_jpeg_header)); + + buf[HEIGHT_OFF + 0] = ctx->height >> 8; + buf[HEIGHT_OFF + 1] = ctx->height; + buf[WIDTH_OFF + 0] = ctx->width >> 8; + buf[WIDTH_OFF + 1] = ctx->width; + + memcpy(buf + HUFF_LUMA_DC_OFF, luma_dc_table, sizeof(luma_dc_table)); + memcpy(buf + HUFF_LUMA_AC_OFF, luma_ac_table, sizeof(luma_ac_table)); + memcpy(buf + HUFF_CHROMA_DC_OFF, chroma_dc_table, + sizeof(chroma_dc_table)); + memcpy(buf + HUFF_CHROMA_AC_OFF, chroma_ac_table, + sizeof(chroma_ac_table)); + + jpeg_set_quality(ctx); +} diff --git a/drivers/media/platform/verisilicon/hantro_jpeg.h b/drivers/media/platform/verisilicon/hantro_jpeg.h new file mode 100644 index 000000000..0b49d0b82 --- /dev/null +++ b/drivers/media/platform/verisilicon/hantro_jpeg.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#define JPEG_HEADER_SIZE 624 +#define JPEG_QUANT_SIZE 64 + +struct hantro_jpeg_ctx { + int width; + int height; + int quality; + unsigned char *buffer; + unsigned char hw_luma_qtable[JPEG_QUANT_SIZE]; + unsigned char hw_chroma_qtable[JPEG_QUANT_SIZE]; +}; + +void hantro_jpeg_header_assemble(struct hantro_jpeg_ctx *ctx); diff --git a/drivers/media/platform/verisilicon/hantro_mpeg2.c b/drivers/media/platform/verisilicon/hantro_mpeg2.c new file mode 100644 index 000000000..04e545eb0 --- /dev/null +++ b/drivers/media/platform/verisilicon/hantro_mpeg2.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hantro VPU codec driver + * + * Copyright (C) 2018 Rockchip Electronics Co., Ltd. + */ + +#include "hantro.h" + +static const u8 zigzag[64] = { + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63 +}; + +void hantro_mpeg2_dec_copy_qtable(u8 *qtable, + const struct v4l2_ctrl_mpeg2_quantisation *ctrl) +{ + int i, n; + + if (!qtable || !ctrl) + return; + + for (i = 0; i < ARRAY_SIZE(zigzag); i++) { + n = zigzag[i]; + qtable[n + 0] = ctrl->intra_quantiser_matrix[i]; + qtable[n + 64] = ctrl->non_intra_quantiser_matrix[i]; + qtable[n + 128] = ctrl->chroma_intra_quantiser_matrix[i]; + qtable[n + 192] = ctrl->chroma_non_intra_quantiser_matrix[i]; + } +} + +int hantro_mpeg2_dec_init(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + + ctx->mpeg2_dec.qtable.size = ARRAY_SIZE(zigzag) * 4; + ctx->mpeg2_dec.qtable.cpu = + dma_alloc_coherent(vpu->dev, + ctx->mpeg2_dec.qtable.size, + &ctx->mpeg2_dec.qtable.dma, + GFP_KERNEL); + if (!ctx->mpeg2_dec.qtable.cpu) + return -ENOMEM; + return 0; +} + +void hantro_mpeg2_dec_exit(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + + dma_free_coherent(vpu->dev, + ctx->mpeg2_dec.qtable.size, + ctx->mpeg2_dec.qtable.cpu, + ctx->mpeg2_dec.qtable.dma); +} diff --git a/drivers/media/platform/verisilicon/hantro_postproc.c b/drivers/media/platform/verisilicon/hantro_postproc.c new file mode 100644 index 000000000..708095cf0 --- /dev/null +++ b/drivers/media/platform/verisilicon/hantro_postproc.c @@ -0,0 +1,284 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hantro G1 post-processor support + * + * Copyright (C) 2019 Collabora, Ltd. + */ + +#include <linux/dma-mapping.h> +#include <linux/types.h> + +#include "hantro.h" +#include "hantro_hw.h" +#include "hantro_g1_regs.h" +#include "hantro_g2_regs.h" +#include "hantro_v4l2.h" + +#define HANTRO_PP_REG_WRITE(vpu, reg_name, val) \ +{ \ + hantro_reg_write(vpu, \ + &hantro_g1_postproc_regs.reg_name, \ + val); \ +} + +#define HANTRO_PP_REG_WRITE_S(vpu, reg_name, val) \ +{ \ + hantro_reg_write_s(vpu, \ + &hantro_g1_postproc_regs.reg_name, \ + val); \ +} + +#define VPU_PP_IN_YUYV 0x0 +#define VPU_PP_IN_NV12 0x1 +#define VPU_PP_IN_YUV420 0x2 +#define VPU_PP_IN_YUV240_TILED 0x5 +#define VPU_PP_OUT_RGB 0x0 +#define VPU_PP_OUT_YUYV 0x3 + +static const struct hantro_postproc_regs hantro_g1_postproc_regs = { + .pipeline_en = {G1_REG_PP_INTERRUPT, 1, 0x1}, + .max_burst = {G1_REG_PP_DEV_CONFIG, 0, 0x1f}, + .clk_gate = {G1_REG_PP_DEV_CONFIG, 1, 0x1}, + .out_swap32 = {G1_REG_PP_DEV_CONFIG, 5, 0x1}, + .out_endian = {G1_REG_PP_DEV_CONFIG, 6, 0x1}, + .out_luma_base = {G1_REG_PP_OUT_LUMA_BASE, 0, 0xffffffff}, + .input_width = {G1_REG_PP_INPUT_SIZE, 0, 0x1ff}, + .input_height = {G1_REG_PP_INPUT_SIZE, 9, 0x1ff}, + .output_width = {G1_REG_PP_CONTROL, 4, 0x7ff}, + .output_height = {G1_REG_PP_CONTROL, 15, 0x7ff}, + .input_fmt = {G1_REG_PP_CONTROL, 29, 0x7}, + .output_fmt = {G1_REG_PP_CONTROL, 26, 0x7}, + .orig_width = {G1_REG_PP_MASK1_ORIG_WIDTH, 23, 0x1ff}, + .display_width = {G1_REG_PP_DISPLAY_WIDTH, 0, 0xfff}, +}; + +bool hantro_needs_postproc(const struct hantro_ctx *ctx, + const struct hantro_fmt *fmt) +{ + if (ctx->is_encoder) + return false; + return fmt->postprocessed; +} + +static void hantro_postproc_g1_enable(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + struct vb2_v4l2_buffer *dst_buf; + u32 src_pp_fmt, dst_pp_fmt; + dma_addr_t dst_dma; + + /* Turn on pipeline mode. Must be done first. */ + HANTRO_PP_REG_WRITE_S(vpu, pipeline_en, 0x1); + + src_pp_fmt = VPU_PP_IN_NV12; + + switch (ctx->vpu_dst_fmt->fourcc) { + case V4L2_PIX_FMT_YUYV: + dst_pp_fmt = VPU_PP_OUT_YUYV; + break; + default: + WARN(1, "output format %d not supported by the post-processor, this wasn't expected.", + ctx->vpu_dst_fmt->fourcc); + dst_pp_fmt = 0; + break; + } + + dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); + dst_dma = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0); + + HANTRO_PP_REG_WRITE(vpu, clk_gate, 0x1); + HANTRO_PP_REG_WRITE(vpu, out_endian, 0x1); + HANTRO_PP_REG_WRITE(vpu, out_swap32, 0x1); + HANTRO_PP_REG_WRITE(vpu, max_burst, 16); + HANTRO_PP_REG_WRITE(vpu, out_luma_base, dst_dma); + HANTRO_PP_REG_WRITE(vpu, input_width, MB_WIDTH(ctx->dst_fmt.width)); + HANTRO_PP_REG_WRITE(vpu, input_height, MB_HEIGHT(ctx->dst_fmt.height)); + HANTRO_PP_REG_WRITE(vpu, input_fmt, src_pp_fmt); + HANTRO_PP_REG_WRITE(vpu, output_fmt, dst_pp_fmt); + HANTRO_PP_REG_WRITE(vpu, output_width, ctx->dst_fmt.width); + HANTRO_PP_REG_WRITE(vpu, output_height, ctx->dst_fmt.height); + HANTRO_PP_REG_WRITE(vpu, orig_width, MB_WIDTH(ctx->dst_fmt.width)); + HANTRO_PP_REG_WRITE(vpu, display_width, ctx->dst_fmt.width); +} + +static int down_scale_factor(struct hantro_ctx *ctx) +{ + if (ctx->src_fmt.width <= ctx->dst_fmt.width) + return 0; + + return DIV_ROUND_CLOSEST(ctx->src_fmt.width, ctx->dst_fmt.width); +} + +static void hantro_postproc_g2_enable(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + struct vb2_v4l2_buffer *dst_buf; + int down_scale = down_scale_factor(ctx); + int out_depth; + size_t chroma_offset; + dma_addr_t dst_dma; + + dst_buf = hantro_get_dst_buf(ctx); + dst_dma = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0); + chroma_offset = ctx->dst_fmt.plane_fmt[0].bytesperline * + ctx->dst_fmt.height; + + if (down_scale) { + hantro_reg_write(vpu, &g2_down_scale_e, 1); + hantro_reg_write(vpu, &g2_down_scale_y, down_scale >> 2); + hantro_reg_write(vpu, &g2_down_scale_x, down_scale >> 2); + hantro_write_addr(vpu, G2_DS_DST, dst_dma); + hantro_write_addr(vpu, G2_DS_DST_CHR, dst_dma + (chroma_offset >> down_scale)); + } else { + hantro_write_addr(vpu, G2_RS_OUT_LUMA_ADDR, dst_dma); + hantro_write_addr(vpu, G2_RS_OUT_CHROMA_ADDR, dst_dma + chroma_offset); + } + + out_depth = hantro_get_format_depth(ctx->dst_fmt.pixelformat); + if (ctx->dev->variant->legacy_regs) { + u8 pp_shift = 0; + + if (out_depth > 8) + pp_shift = 16 - out_depth; + + hantro_reg_write(ctx->dev, &g2_rs_out_bit_depth, out_depth); + hantro_reg_write(ctx->dev, &g2_pp_pix_shift, pp_shift); + } else { + hantro_reg_write(vpu, &g2_output_8_bits, out_depth > 8 ? 0 : 1); + hantro_reg_write(vpu, &g2_output_format, out_depth > 8 ? 1 : 0); + } + hantro_reg_write(vpu, &g2_out_rs_e, 1); +} + +static int hantro_postproc_g2_enum_framesizes(struct hantro_ctx *ctx, + struct v4l2_frmsizeenum *fsize) +{ + /** + * G2 scaler can scale down by 0, 2, 4 or 8 + * use fsize->index has power of 2 diviser + **/ + if (fsize->index > 3) + return -EINVAL; + + if (!ctx->src_fmt.width || !ctx->src_fmt.height) + return -EINVAL; + + fsize->type = V4L2_FRMSIZE_TYPE_DISCRETE; + fsize->discrete.width = ctx->src_fmt.width >> fsize->index; + fsize->discrete.height = ctx->src_fmt.height >> fsize->index; + + return 0; +} + +void hantro_postproc_free(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + unsigned int i; + + for (i = 0; i < VB2_MAX_FRAME; ++i) { + struct hantro_aux_buf *priv = &ctx->postproc.dec_q[i]; + + if (priv->cpu) { + dma_free_attrs(vpu->dev, priv->size, priv->cpu, + priv->dma, priv->attrs); + priv->cpu = NULL; + } + } +} + +int hantro_postproc_alloc(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx; + struct vb2_queue *cap_queue = &m2m_ctx->cap_q_ctx.q; + unsigned int num_buffers = cap_queue->num_buffers; + struct v4l2_pix_format_mplane pix_mp; + const struct hantro_fmt *fmt; + unsigned int i, buf_size; + + /* this should always pick native format */ + fmt = hantro_get_default_fmt(ctx, false); + if (!fmt) + return -EINVAL; + v4l2_fill_pixfmt_mp(&pix_mp, fmt->fourcc, ctx->src_fmt.width, + ctx->src_fmt.height); + + buf_size = pix_mp.plane_fmt[0].sizeimage; + if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_H264_SLICE) + buf_size += hantro_h264_mv_size(pix_mp.width, + pix_mp.height); + else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_VP9_FRAME) + buf_size += hantro_vp9_mv_size(pix_mp.width, + pix_mp.height); + else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_HEVC_SLICE) + buf_size += hantro_hevc_mv_size(pix_mp.width, + pix_mp.height); + + for (i = 0; i < num_buffers; ++i) { + struct hantro_aux_buf *priv = &ctx->postproc.dec_q[i]; + + /* + * The buffers on this queue are meant as intermediate + * buffers for the decoder, so no mapping is needed. + */ + priv->attrs = DMA_ATTR_NO_KERNEL_MAPPING; + priv->cpu = dma_alloc_attrs(vpu->dev, buf_size, &priv->dma, + GFP_KERNEL, priv->attrs); + if (!priv->cpu) + return -ENOMEM; + priv->size = buf_size; + } + return 0; +} + +static void hantro_postproc_g1_disable(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + + HANTRO_PP_REG_WRITE_S(vpu, pipeline_en, 0x0); +} + +static void hantro_postproc_g2_disable(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + + hantro_reg_write(vpu, &g2_out_rs_e, 0); +} + +void hantro_postproc_disable(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + + if (vpu->variant->postproc_ops && vpu->variant->postproc_ops->disable) + vpu->variant->postproc_ops->disable(ctx); +} + +void hantro_postproc_enable(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + + if (vpu->variant->postproc_ops && vpu->variant->postproc_ops->enable) + vpu->variant->postproc_ops->enable(ctx); +} + +int hanto_postproc_enum_framesizes(struct hantro_ctx *ctx, + struct v4l2_frmsizeenum *fsize) +{ + struct hantro_dev *vpu = ctx->dev; + + if (vpu->variant->postproc_ops && vpu->variant->postproc_ops->enum_framesizes) + return vpu->variant->postproc_ops->enum_framesizes(ctx, fsize); + + return -EINVAL; +} + +const struct hantro_postproc_ops hantro_g1_postproc_ops = { + .enable = hantro_postproc_g1_enable, + .disable = hantro_postproc_g1_disable, +}; + +const struct hantro_postproc_ops hantro_g2_postproc_ops = { + .enable = hantro_postproc_g2_enable, + .disable = hantro_postproc_g2_disable, + .enum_framesizes = hantro_postproc_g2_enum_framesizes, +}; diff --git a/drivers/media/platform/verisilicon/hantro_v4l2.c b/drivers/media/platform/verisilicon/hantro_v4l2.c new file mode 100644 index 000000000..b2da48936 --- /dev/null +++ b/drivers/media/platform/verisilicon/hantro_v4l2.c @@ -0,0 +1,996 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hantro VPU codec driver + * + * Copyright (C) 2018 Collabora, Ltd. + * Copyright (C) 2018 Rockchip Electronics Co., Ltd. + * Alpha Lin <Alpha.Lin@rock-chips.com> + * Jeffy Chen <jeffy.chen@rock-chips.com> + * + * Copyright 2018 Google LLC. + * Tomasz Figa <tfiga@chromium.org> + * + * Based on s5p-mfc driver by Samsung Electronics Co., Ltd. + * Copyright (C) 2010-2011 Samsung Electronics Co., Ltd. + */ + +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/pm_runtime.h> +#include <linux/videodev2.h> +#include <linux/workqueue.h> +#include <media/v4l2-ctrls.h> +#include <media/v4l2-event.h> +#include <media/v4l2-mem2mem.h> + +#include "hantro.h" +#include "hantro_hw.h" +#include "hantro_v4l2.h" + +static int hantro_set_fmt_out(struct hantro_ctx *ctx, + struct v4l2_pix_format_mplane *pix_mp); +static int hantro_set_fmt_cap(struct hantro_ctx *ctx, + struct v4l2_pix_format_mplane *pix_mp); + +static const struct hantro_fmt * +hantro_get_formats(const struct hantro_ctx *ctx, unsigned int *num_fmts) +{ + const struct hantro_fmt *formats; + + if (ctx->is_encoder) { + formats = ctx->dev->variant->enc_fmts; + *num_fmts = ctx->dev->variant->num_enc_fmts; + } else { + formats = ctx->dev->variant->dec_fmts; + *num_fmts = ctx->dev->variant->num_dec_fmts; + } + + return formats; +} + +static const struct hantro_fmt * +hantro_get_postproc_formats(const struct hantro_ctx *ctx, + unsigned int *num_fmts) +{ + struct hantro_dev *vpu = ctx->dev; + + if (ctx->is_encoder || !vpu->variant->postproc_fmts) { + *num_fmts = 0; + return NULL; + } + + *num_fmts = ctx->dev->variant->num_postproc_fmts; + return ctx->dev->variant->postproc_fmts; +} + +int hantro_get_format_depth(u32 fourcc) +{ + switch (fourcc) { + case V4L2_PIX_FMT_P010: + case V4L2_PIX_FMT_P010_4L4: + return 10; + default: + return 8; + } +} + +static bool +hantro_check_depth_match(const struct hantro_ctx *ctx, + const struct hantro_fmt *fmt) +{ + int fmt_depth, ctx_depth = 8; + + if (!fmt->match_depth && !fmt->postprocessed) + return true; + + /* 0 means default depth, which is 8 */ + if (ctx->bit_depth) + ctx_depth = ctx->bit_depth; + + fmt_depth = hantro_get_format_depth(fmt->fourcc); + + /* + * Allow only downconversion for postproc formats for now. + * It may be possible to relax that on some HW. + */ + if (!fmt->match_depth) + return fmt_depth <= ctx_depth; + + return fmt_depth == ctx_depth; +} + +static const struct hantro_fmt * +hantro_find_format(const struct hantro_ctx *ctx, u32 fourcc) +{ + const struct hantro_fmt *formats; + unsigned int i, num_fmts; + + formats = hantro_get_formats(ctx, &num_fmts); + for (i = 0; i < num_fmts; i++) + if (formats[i].fourcc == fourcc) + return &formats[i]; + + formats = hantro_get_postproc_formats(ctx, &num_fmts); + for (i = 0; i < num_fmts; i++) + if (formats[i].fourcc == fourcc) + return &formats[i]; + return NULL; +} + +const struct hantro_fmt * +hantro_get_default_fmt(const struct hantro_ctx *ctx, bool bitstream) +{ + const struct hantro_fmt *formats; + unsigned int i, num_fmts; + + formats = hantro_get_formats(ctx, &num_fmts); + for (i = 0; i < num_fmts; i++) { + if (bitstream == (formats[i].codec_mode != + HANTRO_MODE_NONE) && + hantro_check_depth_match(ctx, &formats[i])) + return &formats[i]; + } + return NULL; +} + +static int vidioc_querycap(struct file *file, void *priv, + struct v4l2_capability *cap) +{ + struct hantro_dev *vpu = video_drvdata(file); + struct video_device *vdev = video_devdata(file); + + strscpy(cap->driver, vpu->dev->driver->name, sizeof(cap->driver)); + strscpy(cap->card, vdev->name, sizeof(cap->card)); + snprintf(cap->bus_info, sizeof(cap->bus_info), "platform: %s", + vpu->dev->driver->name); + return 0; +} + +static int vidioc_enum_framesizes(struct file *file, void *priv, + struct v4l2_frmsizeenum *fsize) +{ + struct hantro_ctx *ctx = fh_to_ctx(priv); + const struct hantro_fmt *fmt; + + fmt = hantro_find_format(ctx, fsize->pixel_format); + if (!fmt) { + vpu_debug(0, "unsupported bitstream format (%08x)\n", + fsize->pixel_format); + return -EINVAL; + } + + /* For non-coded formats check if postprocessing scaling is possible */ + if (fmt->codec_mode == HANTRO_MODE_NONE) { + if (hantro_needs_postproc(ctx, fmt)) + return hanto_postproc_enum_framesizes(ctx, fsize); + else + return -ENOTTY; + } else if (fsize->index != 0) { + vpu_debug(0, "invalid frame size index (expected 0, got %d)\n", + fsize->index); + return -EINVAL; + } + + fsize->type = V4L2_FRMSIZE_TYPE_STEPWISE; + fsize->stepwise = fmt->frmsize; + + return 0; +} + +static int vidioc_enum_fmt(struct file *file, void *priv, + struct v4l2_fmtdesc *f, bool capture) + +{ + struct hantro_ctx *ctx = fh_to_ctx(priv); + const struct hantro_fmt *fmt, *formats; + unsigned int num_fmts, i, j = 0; + bool skip_mode_none; + + /* + * When dealing with an encoder: + * - on the capture side we want to filter out all MODE_NONE formats. + * - on the output side we want to filter out all formats that are + * not MODE_NONE. + * When dealing with a decoder: + * - on the capture side we want to filter out all formats that are + * not MODE_NONE. + * - on the output side we want to filter out all MODE_NONE formats. + */ + skip_mode_none = capture == ctx->is_encoder; + + formats = hantro_get_formats(ctx, &num_fmts); + for (i = 0; i < num_fmts; i++) { + bool mode_none = formats[i].codec_mode == HANTRO_MODE_NONE; + fmt = &formats[i]; + + if (skip_mode_none == mode_none) + continue; + if (!hantro_check_depth_match(ctx, fmt)) + continue; + if (j == f->index) { + f->pixelformat = fmt->fourcc; + return 0; + } + ++j; + } + + /* + * Enumerate post-processed formats. As per the specification, + * we enumerated these formats after natively decoded formats + * as a hint for applications on what's the preferred fomat. + */ + if (!capture) + return -EINVAL; + formats = hantro_get_postproc_formats(ctx, &num_fmts); + for (i = 0; i < num_fmts; i++) { + fmt = &formats[i]; + + if (!hantro_check_depth_match(ctx, fmt)) + continue; + if (j == f->index) { + f->pixelformat = fmt->fourcc; + return 0; + } + ++j; + } + + return -EINVAL; +} + +static int vidioc_enum_fmt_vid_cap(struct file *file, void *priv, + struct v4l2_fmtdesc *f) +{ + return vidioc_enum_fmt(file, priv, f, true); +} + +static int vidioc_enum_fmt_vid_out(struct file *file, void *priv, + struct v4l2_fmtdesc *f) +{ + return vidioc_enum_fmt(file, priv, f, false); +} + +static int vidioc_g_fmt_out_mplane(struct file *file, void *priv, + struct v4l2_format *f) +{ + struct v4l2_pix_format_mplane *pix_mp = &f->fmt.pix_mp; + struct hantro_ctx *ctx = fh_to_ctx(priv); + + vpu_debug(4, "f->type = %d\n", f->type); + + *pix_mp = ctx->src_fmt; + + return 0; +} + +static int vidioc_g_fmt_cap_mplane(struct file *file, void *priv, + struct v4l2_format *f) +{ + struct v4l2_pix_format_mplane *pix_mp = &f->fmt.pix_mp; + struct hantro_ctx *ctx = fh_to_ctx(priv); + + vpu_debug(4, "f->type = %d\n", f->type); + + *pix_mp = ctx->dst_fmt; + + return 0; +} + +static int hantro_try_fmt(const struct hantro_ctx *ctx, + struct v4l2_pix_format_mplane *pix_mp, + enum v4l2_buf_type type) +{ + const struct hantro_fmt *fmt, *vpu_fmt; + bool capture = V4L2_TYPE_IS_CAPTURE(type); + bool coded; + + coded = capture == ctx->is_encoder; + + vpu_debug(4, "trying format %c%c%c%c\n", + (pix_mp->pixelformat & 0x7f), + (pix_mp->pixelformat >> 8) & 0x7f, + (pix_mp->pixelformat >> 16) & 0x7f, + (pix_mp->pixelformat >> 24) & 0x7f); + + fmt = hantro_find_format(ctx, pix_mp->pixelformat); + if (!fmt) { + fmt = hantro_get_default_fmt(ctx, coded); + pix_mp->pixelformat = fmt->fourcc; + } + + if (coded) { + pix_mp->num_planes = 1; + vpu_fmt = fmt; + } else if (ctx->is_encoder) { + vpu_fmt = ctx->vpu_dst_fmt; + } else { + vpu_fmt = fmt; + /* + * Width/height on the CAPTURE end of a decoder are ignored and + * replaced by the OUTPUT ones. + */ + pix_mp->width = ctx->src_fmt.width; + pix_mp->height = ctx->src_fmt.height; + } + + pix_mp->field = V4L2_FIELD_NONE; + + v4l2_apply_frmsize_constraints(&pix_mp->width, &pix_mp->height, + &vpu_fmt->frmsize); + + if (!coded) { + /* Fill remaining fields */ + v4l2_fill_pixfmt_mp(pix_mp, fmt->fourcc, pix_mp->width, + pix_mp->height); + if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_H264_SLICE && + !hantro_needs_postproc(ctx, fmt)) + pix_mp->plane_fmt[0].sizeimage += + hantro_h264_mv_size(pix_mp->width, + pix_mp->height); + else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_VP9_FRAME && + !hantro_needs_postproc(ctx, fmt)) + pix_mp->plane_fmt[0].sizeimage += + hantro_vp9_mv_size(pix_mp->width, + pix_mp->height); + else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_HEVC_SLICE && + !hantro_needs_postproc(ctx, fmt)) + pix_mp->plane_fmt[0].sizeimage += + hantro_hevc_mv_size(pix_mp->width, + pix_mp->height); + } else if (!pix_mp->plane_fmt[0].sizeimage) { + /* + * For coded formats the application can specify + * sizeimage. If the application passes a zero sizeimage, + * let's default to the maximum frame size. + */ + pix_mp->plane_fmt[0].sizeimage = fmt->header_size + + pix_mp->width * pix_mp->height * fmt->max_depth; + } + + return 0; +} + +static int vidioc_try_fmt_cap_mplane(struct file *file, void *priv, + struct v4l2_format *f) +{ + return hantro_try_fmt(fh_to_ctx(priv), &f->fmt.pix_mp, f->type); +} + +static int vidioc_try_fmt_out_mplane(struct file *file, void *priv, + struct v4l2_format *f) +{ + return hantro_try_fmt(fh_to_ctx(priv), &f->fmt.pix_mp, f->type); +} + +static void +hantro_reset_fmt(struct v4l2_pix_format_mplane *fmt, + const struct hantro_fmt *vpu_fmt) +{ + memset(fmt, 0, sizeof(*fmt)); + + fmt->pixelformat = vpu_fmt->fourcc; + fmt->field = V4L2_FIELD_NONE; + fmt->colorspace = V4L2_COLORSPACE_JPEG; + fmt->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT; + fmt->quantization = V4L2_QUANTIZATION_DEFAULT; + fmt->xfer_func = V4L2_XFER_FUNC_DEFAULT; +} + +static void +hantro_reset_encoded_fmt(struct hantro_ctx *ctx) +{ + const struct hantro_fmt *vpu_fmt; + struct v4l2_pix_format_mplane *fmt; + + vpu_fmt = hantro_get_default_fmt(ctx, true); + + if (ctx->is_encoder) { + ctx->vpu_dst_fmt = vpu_fmt; + fmt = &ctx->dst_fmt; + } else { + ctx->vpu_src_fmt = vpu_fmt; + fmt = &ctx->src_fmt; + } + + hantro_reset_fmt(fmt, vpu_fmt); + fmt->width = vpu_fmt->frmsize.min_width; + fmt->height = vpu_fmt->frmsize.min_height; + if (ctx->is_encoder) + hantro_set_fmt_cap(ctx, fmt); + else + hantro_set_fmt_out(ctx, fmt); +} + +static void +hantro_reset_raw_fmt(struct hantro_ctx *ctx) +{ + const struct hantro_fmt *raw_vpu_fmt; + struct v4l2_pix_format_mplane *raw_fmt, *encoded_fmt; + + raw_vpu_fmt = hantro_get_default_fmt(ctx, false); + + if (ctx->is_encoder) { + ctx->vpu_src_fmt = raw_vpu_fmt; + raw_fmt = &ctx->src_fmt; + encoded_fmt = &ctx->dst_fmt; + } else { + ctx->vpu_dst_fmt = raw_vpu_fmt; + raw_fmt = &ctx->dst_fmt; + encoded_fmt = &ctx->src_fmt; + } + + hantro_reset_fmt(raw_fmt, raw_vpu_fmt); + raw_fmt->width = encoded_fmt->width; + raw_fmt->height = encoded_fmt->height; + if (ctx->is_encoder) + hantro_set_fmt_out(ctx, raw_fmt); + else + hantro_set_fmt_cap(ctx, raw_fmt); +} + +void hantro_reset_fmts(struct hantro_ctx *ctx) +{ + hantro_reset_encoded_fmt(ctx); + hantro_reset_raw_fmt(ctx); +} + +static void +hantro_update_requires_request(struct hantro_ctx *ctx, u32 fourcc) +{ + switch (fourcc) { + case V4L2_PIX_FMT_JPEG: + ctx->fh.m2m_ctx->out_q_ctx.q.requires_requests = false; + break; + case V4L2_PIX_FMT_MPEG2_SLICE: + case V4L2_PIX_FMT_VP8_FRAME: + case V4L2_PIX_FMT_H264_SLICE: + case V4L2_PIX_FMT_HEVC_SLICE: + case V4L2_PIX_FMT_VP9_FRAME: + ctx->fh.m2m_ctx->out_q_ctx.q.requires_requests = true; + break; + default: + break; + } +} + +static void +hantro_update_requires_hold_capture_buf(struct hantro_ctx *ctx, u32 fourcc) +{ + struct vb2_queue *vq; + + vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, + V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE); + + switch (fourcc) { + case V4L2_PIX_FMT_JPEG: + case V4L2_PIX_FMT_MPEG2_SLICE: + case V4L2_PIX_FMT_VP8_FRAME: + case V4L2_PIX_FMT_HEVC_SLICE: + case V4L2_PIX_FMT_VP9_FRAME: + vq->subsystem_flags &= ~(VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF); + break; + case V4L2_PIX_FMT_H264_SLICE: + vq->subsystem_flags |= VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF; + break; + default: + break; + } +} + +static int hantro_set_fmt_out(struct hantro_ctx *ctx, + struct v4l2_pix_format_mplane *pix_mp) +{ + struct vb2_queue *vq; + int ret; + + vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, + V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE); + ret = hantro_try_fmt(ctx, pix_mp, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE); + if (ret) + return ret; + + if (!ctx->is_encoder) { + struct vb2_queue *peer_vq; + + /* + * In order to support dynamic resolution change, + * the decoder admits a resolution change, as long + * as the pixelformat remains. Can't be done if streaming. + */ + if (vb2_is_streaming(vq) || (vb2_is_busy(vq) && + pix_mp->pixelformat != ctx->src_fmt.pixelformat)) + return -EBUSY; + /* + * Since format change on the OUTPUT queue will reset + * the CAPTURE queue, we can't allow doing so + * when the CAPTURE queue has buffers allocated. + */ + peer_vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, + V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE); + if (vb2_is_busy(peer_vq)) + return -EBUSY; + } else { + /* + * The encoder doesn't admit a format change if + * there are OUTPUT buffers allocated. + */ + if (vb2_is_busy(vq)) + return -EBUSY; + } + + ctx->vpu_src_fmt = hantro_find_format(ctx, pix_mp->pixelformat); + ctx->src_fmt = *pix_mp; + + /* + * Current raw format might have become invalid with newly + * selected codec, so reset it to default just to be safe and + * keep internal driver state sane. User is mandated to set + * the raw format again after we return, so we don't need + * anything smarter. + * Note that hantro_reset_raw_fmt() also propagates size + * changes to the raw format. + */ + if (!ctx->is_encoder) + hantro_reset_raw_fmt(ctx); + + /* Colorimetry information are always propagated. */ + ctx->dst_fmt.colorspace = pix_mp->colorspace; + ctx->dst_fmt.ycbcr_enc = pix_mp->ycbcr_enc; + ctx->dst_fmt.xfer_func = pix_mp->xfer_func; + ctx->dst_fmt.quantization = pix_mp->quantization; + + hantro_update_requires_request(ctx, pix_mp->pixelformat); + hantro_update_requires_hold_capture_buf(ctx, pix_mp->pixelformat); + + vpu_debug(0, "OUTPUT codec mode: %d\n", ctx->vpu_src_fmt->codec_mode); + vpu_debug(0, "fmt - w: %d, h: %d\n", + pix_mp->width, pix_mp->height); + return 0; +} + +static int hantro_set_fmt_cap(struct hantro_ctx *ctx, + struct v4l2_pix_format_mplane *pix_mp) +{ + struct vb2_queue *vq; + int ret; + + /* Change not allowed if queue is busy. */ + vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, + V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE); + if (vb2_is_busy(vq)) + return -EBUSY; + + if (ctx->is_encoder) { + struct vb2_queue *peer_vq; + + /* + * Since format change on the CAPTURE queue will reset + * the OUTPUT queue, we can't allow doing so + * when the OUTPUT queue has buffers allocated. + */ + peer_vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, + V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE); + if (vb2_is_busy(peer_vq) && + (pix_mp->pixelformat != ctx->dst_fmt.pixelformat || + pix_mp->height != ctx->dst_fmt.height || + pix_mp->width != ctx->dst_fmt.width)) + return -EBUSY; + } + + ret = hantro_try_fmt(ctx, pix_mp, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE); + if (ret) + return ret; + + ctx->vpu_dst_fmt = hantro_find_format(ctx, pix_mp->pixelformat); + ctx->dst_fmt = *pix_mp; + + /* + * Current raw format might have become invalid with newly + * selected codec, so reset it to default just to be safe and + * keep internal driver state sane. User is mandated to set + * the raw format again after we return, so we don't need + * anything smarter. + * Note that hantro_reset_raw_fmt() also propagates size + * changes to the raw format. + */ + if (ctx->is_encoder) + hantro_reset_raw_fmt(ctx); + + /* Colorimetry information are always propagated. */ + ctx->src_fmt.colorspace = pix_mp->colorspace; + ctx->src_fmt.ycbcr_enc = pix_mp->ycbcr_enc; + ctx->src_fmt.xfer_func = pix_mp->xfer_func; + ctx->src_fmt.quantization = pix_mp->quantization; + + vpu_debug(0, "CAPTURE codec mode: %d\n", ctx->vpu_dst_fmt->codec_mode); + vpu_debug(0, "fmt - w: %d, h: %d\n", + pix_mp->width, pix_mp->height); + + hantro_update_requires_request(ctx, pix_mp->pixelformat); + + return 0; +} + +static int +vidioc_s_fmt_out_mplane(struct file *file, void *priv, struct v4l2_format *f) +{ + return hantro_set_fmt_out(fh_to_ctx(priv), &f->fmt.pix_mp); +} + +static int +vidioc_s_fmt_cap_mplane(struct file *file, void *priv, struct v4l2_format *f) +{ + return hantro_set_fmt_cap(fh_to_ctx(priv), &f->fmt.pix_mp); +} + +static int vidioc_g_selection(struct file *file, void *priv, + struct v4l2_selection *sel) +{ + struct hantro_ctx *ctx = fh_to_ctx(priv); + + /* Crop only supported on source. */ + if (!ctx->is_encoder || + sel->type != V4L2_BUF_TYPE_VIDEO_OUTPUT) + return -EINVAL; + + switch (sel->target) { + case V4L2_SEL_TGT_CROP_DEFAULT: + case V4L2_SEL_TGT_CROP_BOUNDS: + sel->r.top = 0; + sel->r.left = 0; + sel->r.width = ctx->src_fmt.width; + sel->r.height = ctx->src_fmt.height; + break; + case V4L2_SEL_TGT_CROP: + sel->r.top = 0; + sel->r.left = 0; + sel->r.width = ctx->dst_fmt.width; + sel->r.height = ctx->dst_fmt.height; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int vidioc_s_selection(struct file *file, void *priv, + struct v4l2_selection *sel) +{ + struct hantro_ctx *ctx = fh_to_ctx(priv); + struct v4l2_rect *rect = &sel->r; + struct vb2_queue *vq; + + /* Crop only supported on source. */ + if (!ctx->is_encoder || + sel->type != V4L2_BUF_TYPE_VIDEO_OUTPUT) + return -EINVAL; + + /* Change not allowed if the queue is streaming. */ + vq = v4l2_m2m_get_src_vq(ctx->fh.m2m_ctx); + if (vb2_is_streaming(vq)) + return -EBUSY; + + if (sel->target != V4L2_SEL_TGT_CROP) + return -EINVAL; + + /* + * We do not support offsets, and we can crop only inside + * right-most or bottom-most macroblocks. + */ + if (rect->left != 0 || rect->top != 0 || + round_up(rect->width, MB_DIM) != ctx->src_fmt.width || + round_up(rect->height, MB_DIM) != ctx->src_fmt.height) { + /* Default to full frame for incorrect settings. */ + rect->left = 0; + rect->top = 0; + rect->width = ctx->src_fmt.width; + rect->height = ctx->src_fmt.height; + } else { + /* We support widths aligned to 4 pixels and arbitrary heights. */ + rect->width = round_up(rect->width, 4); + } + + ctx->dst_fmt.width = rect->width; + ctx->dst_fmt.height = rect->height; + + return 0; +} + +static const struct v4l2_event hantro_eos_event = { + .type = V4L2_EVENT_EOS +}; + +static int vidioc_encoder_cmd(struct file *file, void *priv, + struct v4l2_encoder_cmd *ec) +{ + struct hantro_ctx *ctx = fh_to_ctx(priv); + int ret; + + ret = v4l2_m2m_ioctl_try_encoder_cmd(file, priv, ec); + if (ret < 0) + return ret; + + if (!vb2_is_streaming(v4l2_m2m_get_src_vq(ctx->fh.m2m_ctx)) || + !vb2_is_streaming(v4l2_m2m_get_dst_vq(ctx->fh.m2m_ctx))) + return 0; + + ret = v4l2_m2m_ioctl_encoder_cmd(file, priv, ec); + if (ret < 0) + return ret; + + if (ec->cmd == V4L2_ENC_CMD_STOP && + v4l2_m2m_has_stopped(ctx->fh.m2m_ctx)) + v4l2_event_queue_fh(&ctx->fh, &hantro_eos_event); + + if (ec->cmd == V4L2_ENC_CMD_START) + vb2_clear_last_buffer_dequeued(&ctx->fh.m2m_ctx->cap_q_ctx.q); + + return 0; +} + +const struct v4l2_ioctl_ops hantro_ioctl_ops = { + .vidioc_querycap = vidioc_querycap, + .vidioc_enum_framesizes = vidioc_enum_framesizes, + + .vidioc_try_fmt_vid_cap_mplane = vidioc_try_fmt_cap_mplane, + .vidioc_try_fmt_vid_out_mplane = vidioc_try_fmt_out_mplane, + .vidioc_s_fmt_vid_out_mplane = vidioc_s_fmt_out_mplane, + .vidioc_s_fmt_vid_cap_mplane = vidioc_s_fmt_cap_mplane, + .vidioc_g_fmt_vid_out_mplane = vidioc_g_fmt_out_mplane, + .vidioc_g_fmt_vid_cap_mplane = vidioc_g_fmt_cap_mplane, + .vidioc_enum_fmt_vid_out = vidioc_enum_fmt_vid_out, + .vidioc_enum_fmt_vid_cap = vidioc_enum_fmt_vid_cap, + + .vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs, + .vidioc_querybuf = v4l2_m2m_ioctl_querybuf, + .vidioc_qbuf = v4l2_m2m_ioctl_qbuf, + .vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf, + .vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf, + .vidioc_create_bufs = v4l2_m2m_ioctl_create_bufs, + .vidioc_expbuf = v4l2_m2m_ioctl_expbuf, + + .vidioc_subscribe_event = v4l2_ctrl_subscribe_event, + .vidioc_unsubscribe_event = v4l2_event_unsubscribe, + + .vidioc_streamon = v4l2_m2m_ioctl_streamon, + .vidioc_streamoff = v4l2_m2m_ioctl_streamoff, + + .vidioc_g_selection = vidioc_g_selection, + .vidioc_s_selection = vidioc_s_selection, + + .vidioc_decoder_cmd = v4l2_m2m_ioctl_stateless_decoder_cmd, + .vidioc_try_decoder_cmd = v4l2_m2m_ioctl_stateless_try_decoder_cmd, + + .vidioc_try_encoder_cmd = v4l2_m2m_ioctl_try_encoder_cmd, + .vidioc_encoder_cmd = vidioc_encoder_cmd, +}; + +static int +hantro_queue_setup(struct vb2_queue *vq, unsigned int *num_buffers, + unsigned int *num_planes, unsigned int sizes[], + struct device *alloc_devs[]) +{ + struct hantro_ctx *ctx = vb2_get_drv_priv(vq); + struct v4l2_pix_format_mplane *pixfmt; + int i; + + switch (vq->type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: + pixfmt = &ctx->dst_fmt; + break; + case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: + pixfmt = &ctx->src_fmt; + break; + default: + vpu_err("invalid queue type: %d\n", vq->type); + return -EINVAL; + } + + if (*num_planes) { + if (*num_planes != pixfmt->num_planes) + return -EINVAL; + for (i = 0; i < pixfmt->num_planes; ++i) + if (sizes[i] < pixfmt->plane_fmt[i].sizeimage) + return -EINVAL; + return 0; + } + + *num_planes = pixfmt->num_planes; + for (i = 0; i < pixfmt->num_planes; ++i) + sizes[i] = pixfmt->plane_fmt[i].sizeimage; + return 0; +} + +static int +hantro_buf_plane_check(struct vb2_buffer *vb, + struct v4l2_pix_format_mplane *pixfmt) +{ + unsigned int sz; + int i; + + for (i = 0; i < pixfmt->num_planes; ++i) { + sz = pixfmt->plane_fmt[i].sizeimage; + vpu_debug(4, "plane %d size: %ld, sizeimage: %u\n", + i, vb2_plane_size(vb, i), sz); + if (vb2_plane_size(vb, i) < sz) { + vpu_err("plane %d is too small for output\n", i); + return -EINVAL; + } + } + return 0; +} + +static int hantro_buf_prepare(struct vb2_buffer *vb) +{ + struct vb2_queue *vq = vb->vb2_queue; + struct hantro_ctx *ctx = vb2_get_drv_priv(vq); + struct v4l2_pix_format_mplane *pix_fmt; + int ret; + + if (V4L2_TYPE_IS_OUTPUT(vq->type)) + pix_fmt = &ctx->src_fmt; + else + pix_fmt = &ctx->dst_fmt; + ret = hantro_buf_plane_check(vb, pix_fmt); + if (ret) + return ret; + /* + * Buffer's bytesused must be written by driver for CAPTURE buffers. + * (for OUTPUT buffers, if userspace passes 0 bytesused, v4l2-core sets + * it to buffer length). + */ + if (V4L2_TYPE_IS_CAPTURE(vq->type)) { + if (ctx->is_encoder) + vb2_set_plane_payload(vb, 0, 0); + else + vb2_set_plane_payload(vb, 0, pix_fmt->plane_fmt[0].sizeimage); + } + + return 0; +} + +static void hantro_buf_queue(struct vb2_buffer *vb) +{ + struct hantro_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); + struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); + + if (V4L2_TYPE_IS_CAPTURE(vb->vb2_queue->type) && + vb2_is_streaming(vb->vb2_queue) && + v4l2_m2m_dst_buf_is_last(ctx->fh.m2m_ctx)) { + unsigned int i; + + for (i = 0; i < vb->num_planes; i++) + vb2_set_plane_payload(vb, i, 0); + + vbuf->field = V4L2_FIELD_NONE; + vbuf->sequence = ctx->sequence_cap++; + + v4l2_m2m_last_buffer_done(ctx->fh.m2m_ctx, vbuf); + v4l2_event_queue_fh(&ctx->fh, &hantro_eos_event); + return; + } + + v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vbuf); +} + +static bool hantro_vq_is_coded(struct vb2_queue *q) +{ + struct hantro_ctx *ctx = vb2_get_drv_priv(q); + + return ctx->is_encoder != V4L2_TYPE_IS_OUTPUT(q->type); +} + +static int hantro_start_streaming(struct vb2_queue *q, unsigned int count) +{ + struct hantro_ctx *ctx = vb2_get_drv_priv(q); + int ret = 0; + + v4l2_m2m_update_start_streaming_state(ctx->fh.m2m_ctx, q); + + if (V4L2_TYPE_IS_OUTPUT(q->type)) + ctx->sequence_out = 0; + else + ctx->sequence_cap = 0; + + if (hantro_vq_is_coded(q)) { + enum hantro_codec_mode codec_mode; + + if (V4L2_TYPE_IS_OUTPUT(q->type)) + codec_mode = ctx->vpu_src_fmt->codec_mode; + else + codec_mode = ctx->vpu_dst_fmt->codec_mode; + + vpu_debug(4, "Codec mode = %d\n", codec_mode); + ctx->codec_ops = &ctx->dev->variant->codec_ops[codec_mode]; + if (ctx->codec_ops->init) { + ret = ctx->codec_ops->init(ctx); + if (ret) + return ret; + } + + if (hantro_needs_postproc(ctx, ctx->vpu_dst_fmt)) { + ret = hantro_postproc_alloc(ctx); + if (ret) + goto err_codec_exit; + } + } + return ret; + +err_codec_exit: + if (ctx->codec_ops->exit) + ctx->codec_ops->exit(ctx); + return ret; +} + +static void +hantro_return_bufs(struct vb2_queue *q, + struct vb2_v4l2_buffer *(*buf_remove)(struct v4l2_m2m_ctx *)) +{ + struct hantro_ctx *ctx = vb2_get_drv_priv(q); + + for (;;) { + struct vb2_v4l2_buffer *vbuf; + + vbuf = buf_remove(ctx->fh.m2m_ctx); + if (!vbuf) + break; + v4l2_ctrl_request_complete(vbuf->vb2_buf.req_obj.req, + &ctx->ctrl_handler); + v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_ERROR); + } +} + +static void hantro_stop_streaming(struct vb2_queue *q) +{ + struct hantro_ctx *ctx = vb2_get_drv_priv(q); + + if (hantro_vq_is_coded(q)) { + hantro_postproc_free(ctx); + if (ctx->codec_ops && ctx->codec_ops->exit) + ctx->codec_ops->exit(ctx); + } + + /* + * The mem2mem framework calls v4l2_m2m_cancel_job before + * .stop_streaming, so there isn't any job running and + * it is safe to return all the buffers. + */ + if (V4L2_TYPE_IS_OUTPUT(q->type)) + hantro_return_bufs(q, v4l2_m2m_src_buf_remove); + else + hantro_return_bufs(q, v4l2_m2m_dst_buf_remove); + + v4l2_m2m_update_stop_streaming_state(ctx->fh.m2m_ctx, q); + + if (V4L2_TYPE_IS_OUTPUT(q->type) && + v4l2_m2m_has_stopped(ctx->fh.m2m_ctx)) + v4l2_event_queue_fh(&ctx->fh, &hantro_eos_event); +} + +static void hantro_buf_request_complete(struct vb2_buffer *vb) +{ + struct hantro_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); + + v4l2_ctrl_request_complete(vb->req_obj.req, &ctx->ctrl_handler); +} + +static int hantro_buf_out_validate(struct vb2_buffer *vb) +{ + struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); + + vbuf->field = V4L2_FIELD_NONE; + return 0; +} + +const struct vb2_ops hantro_queue_ops = { + .queue_setup = hantro_queue_setup, + .buf_prepare = hantro_buf_prepare, + .buf_queue = hantro_buf_queue, + .buf_out_validate = hantro_buf_out_validate, + .buf_request_complete = hantro_buf_request_complete, + .start_streaming = hantro_start_streaming, + .stop_streaming = hantro_stop_streaming, + .wait_prepare = vb2_ops_wait_prepare, + .wait_finish = vb2_ops_wait_finish, +}; diff --git a/drivers/media/platform/verisilicon/hantro_v4l2.h b/drivers/media/platform/verisilicon/hantro_v4l2.h new file mode 100644 index 000000000..64f6f57e9 --- /dev/null +++ b/drivers/media/platform/verisilicon/hantro_v4l2.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Hantro VPU codec driver + * + * Copyright (C) 2018 Rockchip Electronics Co., Ltd. + * Alpha Lin <Alpha.Lin@rock-chips.com> + * Jeffy Chen <jeffy.chen@rock-chips.com> + * + * Copyright 2018 Google LLC. + * Tomasz Figa <tfiga@chromium.org> + * + * Based on s5p-mfc driver by Samsung Electronics Co., Ltd. + * Copyright (C) 2011 Samsung Electronics Co., Ltd. + */ + +#ifndef HANTRO_V4L2_H_ +#define HANTRO_V4L2_H_ + +#include "hantro.h" + +extern const struct v4l2_ioctl_ops hantro_ioctl_ops; +extern const struct vb2_ops hantro_queue_ops; + +void hantro_reset_fmts(struct hantro_ctx *ctx); +int hantro_get_format_depth(u32 fourcc); +const struct hantro_fmt * +hantro_get_default_fmt(const struct hantro_ctx *ctx, bool bitstream); + +#endif /* HANTRO_V4L2_H_ */ diff --git a/drivers/media/platform/verisilicon/hantro_vp8.c b/drivers/media/platform/verisilicon/hantro_vp8.c new file mode 100644 index 000000000..381bc1d3b --- /dev/null +++ b/drivers/media/platform/verisilicon/hantro_vp8.c @@ -0,0 +1,201 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hantro VPU codec driver + * + * Copyright (C) 2018 Rockchip Electronics Co., Ltd. + */ + +#include "hantro.h" + +/* + * probs table with packed + */ +struct vp8_prob_tbl_packed { + u8 prob_mb_skip_false; + u8 prob_intra; + u8 prob_ref_last; + u8 prob_ref_golden; + u8 prob_segment[3]; + u8 padding0; + + u8 prob_luma_16x16_pred_mode[4]; + u8 prob_chroma_pred_mode[3]; + u8 padding1; + + /* mv prob */ + u8 prob_mv_context[2][V4L2_VP8_MV_PROB_CNT]; + u8 padding2[2]; + + /* coeff probs */ + u8 prob_coeffs[4][8][3][V4L2_VP8_COEFF_PROB_CNT]; + u8 padding3[96]; +}; + +/* + * filter taps taken to 7-bit precision, + * reference RFC6386#Page-16, filters[8][6] + */ +const u32 hantro_vp8_dec_mc_filter[8][6] = { + { 0, 0, 128, 0, 0, 0 }, + { 0, -6, 123, 12, -1, 0 }, + { 2, -11, 108, 36, -8, 1 }, + { 0, -9, 93, 50, -6, 0 }, + { 3, -16, 77, 77, -16, 3 }, + { 0, -6, 50, 93, -9, 0 }, + { 1, -8, 36, 108, -11, 2 }, + { 0, -1, 12, 123, -6, 0 } +}; + +void hantro_vp8_prob_update(struct hantro_ctx *ctx, + const struct v4l2_ctrl_vp8_frame *hdr) +{ + const struct v4l2_vp8_entropy *entropy = &hdr->entropy; + u32 i, j, k; + u8 *dst; + + /* first probs */ + dst = ctx->vp8_dec.prob_tbl.cpu; + + dst[0] = hdr->prob_skip_false; + dst[1] = hdr->prob_intra; + dst[2] = hdr->prob_last; + dst[3] = hdr->prob_gf; + dst[4] = hdr->segment.segment_probs[0]; + dst[5] = hdr->segment.segment_probs[1]; + dst[6] = hdr->segment.segment_probs[2]; + dst[7] = 0; + + dst += 8; + dst[0] = entropy->y_mode_probs[0]; + dst[1] = entropy->y_mode_probs[1]; + dst[2] = entropy->y_mode_probs[2]; + dst[3] = entropy->y_mode_probs[3]; + dst[4] = entropy->uv_mode_probs[0]; + dst[5] = entropy->uv_mode_probs[1]; + dst[6] = entropy->uv_mode_probs[2]; + dst[7] = 0; /*unused */ + + /* mv probs */ + dst += 8; + dst[0] = entropy->mv_probs[0][0]; /* is short */ + dst[1] = entropy->mv_probs[1][0]; + dst[2] = entropy->mv_probs[0][1]; /* sign */ + dst[3] = entropy->mv_probs[1][1]; + dst[4] = entropy->mv_probs[0][8 + 9]; + dst[5] = entropy->mv_probs[0][9 + 9]; + dst[6] = entropy->mv_probs[1][8 + 9]; + dst[7] = entropy->mv_probs[1][9 + 9]; + dst += 8; + for (i = 0; i < 2; ++i) { + for (j = 0; j < 8; j += 4) { + dst[0] = entropy->mv_probs[i][j + 9 + 0]; + dst[1] = entropy->mv_probs[i][j + 9 + 1]; + dst[2] = entropy->mv_probs[i][j + 9 + 2]; + dst[3] = entropy->mv_probs[i][j + 9 + 3]; + dst += 4; + } + } + for (i = 0; i < 2; ++i) { + dst[0] = entropy->mv_probs[i][0 + 2]; + dst[1] = entropy->mv_probs[i][1 + 2]; + dst[2] = entropy->mv_probs[i][2 + 2]; + dst[3] = entropy->mv_probs[i][3 + 2]; + dst[4] = entropy->mv_probs[i][4 + 2]; + dst[5] = entropy->mv_probs[i][5 + 2]; + dst[6] = entropy->mv_probs[i][6 + 2]; + dst[7] = 0; /*unused */ + dst += 8; + } + + /* coeff probs (header part) */ + dst = ctx->vp8_dec.prob_tbl.cpu; + dst += (8 * 7); + for (i = 0; i < 4; ++i) { + for (j = 0; j < 8; ++j) { + for (k = 0; k < 3; ++k) { + dst[0] = entropy->coeff_probs[i][j][k][0]; + dst[1] = entropy->coeff_probs[i][j][k][1]; + dst[2] = entropy->coeff_probs[i][j][k][2]; + dst[3] = entropy->coeff_probs[i][j][k][3]; + dst += 4; + } + } + } + + /* coeff probs (footer part) */ + dst = ctx->vp8_dec.prob_tbl.cpu; + dst += (8 * 55); + for (i = 0; i < 4; ++i) { + for (j = 0; j < 8; ++j) { + for (k = 0; k < 3; ++k) { + dst[0] = entropy->coeff_probs[i][j][k][4]; + dst[1] = entropy->coeff_probs[i][j][k][5]; + dst[2] = entropy->coeff_probs[i][j][k][6]; + dst[3] = entropy->coeff_probs[i][j][k][7]; + dst[4] = entropy->coeff_probs[i][j][k][8]; + dst[5] = entropy->coeff_probs[i][j][k][9]; + dst[6] = entropy->coeff_probs[i][j][k][10]; + dst[7] = 0; /*unused */ + dst += 8; + } + } + } +} + +int hantro_vp8_dec_init(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + struct hantro_aux_buf *aux_buf; + unsigned int mb_width, mb_height; + size_t segment_map_size; + int ret; + + /* segment map table size calculation */ + mb_width = DIV_ROUND_UP(ctx->dst_fmt.width, 16); + mb_height = DIV_ROUND_UP(ctx->dst_fmt.height, 16); + segment_map_size = round_up(DIV_ROUND_UP(mb_width * mb_height, 4), 64); + + /* + * In context init the dma buffer for segment map must be allocated. + * And the data in segment map buffer must be set to all zero. + */ + aux_buf = &ctx->vp8_dec.segment_map; + aux_buf->size = segment_map_size; + aux_buf->cpu = dma_alloc_coherent(vpu->dev, aux_buf->size, + &aux_buf->dma, GFP_KERNEL); + if (!aux_buf->cpu) + return -ENOMEM; + + /* + * Allocate probability table buffer, + * total 1208 bytes, 4K page is far enough. + */ + aux_buf = &ctx->vp8_dec.prob_tbl; + aux_buf->size = sizeof(struct vp8_prob_tbl_packed); + aux_buf->cpu = dma_alloc_coherent(vpu->dev, aux_buf->size, + &aux_buf->dma, GFP_KERNEL); + if (!aux_buf->cpu) { + ret = -ENOMEM; + goto err_free_seg_map; + } + + return 0; + +err_free_seg_map: + dma_free_coherent(vpu->dev, ctx->vp8_dec.segment_map.size, + ctx->vp8_dec.segment_map.cpu, + ctx->vp8_dec.segment_map.dma); + + return ret; +} + +void hantro_vp8_dec_exit(struct hantro_ctx *ctx) +{ + struct hantro_vp8_dec_hw_ctx *vp8_dec = &ctx->vp8_dec; + struct hantro_dev *vpu = ctx->dev; + + dma_free_coherent(vpu->dev, vp8_dec->segment_map.size, + vp8_dec->segment_map.cpu, vp8_dec->segment_map.dma); + dma_free_coherent(vpu->dev, vp8_dec->prob_tbl.size, + vp8_dec->prob_tbl.cpu, vp8_dec->prob_tbl.dma); +} diff --git a/drivers/media/platform/verisilicon/hantro_vp9.c b/drivers/media/platform/verisilicon/hantro_vp9.c new file mode 100644 index 000000000..566cd376c --- /dev/null +++ b/drivers/media/platform/verisilicon/hantro_vp9.c @@ -0,0 +1,240 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hantro VP9 codec driver + * + * Copyright (C) 2021 Collabora Ltd. + */ + +#include <linux/types.h> +#include <media/v4l2-mem2mem.h> + +#include "hantro.h" +#include "hantro_hw.h" +#include "hantro_vp9.h" + +#define POW2(x) (1 << (x)) + +#define MAX_LOG2_TILE_COLUMNS 6 +#define MAX_NUM_TILE_COLS POW2(MAX_LOG2_TILE_COLUMNS) +#define MAX_TILE_COLS 20 +#define MAX_TILE_ROWS 22 + +static size_t hantro_vp9_tile_filter_size(unsigned int height) +{ + u32 h, height32, size; + + h = roundup(height, 8); + + height32 = roundup(h, 64); + size = 24 * height32 * (MAX_NUM_TILE_COLS - 1); /* luma: 8, chroma: 8 + 8 */ + + return size; +} + +static size_t hantro_vp9_bsd_control_size(unsigned int height) +{ + u32 h, height32; + + h = roundup(height, 8); + height32 = roundup(h, 64); + + return 16 * (height32 / 4) * (MAX_NUM_TILE_COLS - 1); +} + +static size_t hantro_vp9_segment_map_size(unsigned int width, unsigned int height) +{ + u32 w, h; + int num_ctbs; + + w = roundup(width, 8); + h = roundup(height, 8); + num_ctbs = ((w + 63) / 64) * ((h + 63) / 64); + + return num_ctbs * 32; +} + +static inline size_t hantro_vp9_prob_tab_size(void) +{ + return roundup(sizeof(struct hantro_g2_all_probs), 16); +} + +static inline size_t hantro_vp9_count_tab_size(void) +{ + return roundup(sizeof(struct symbol_counts), 16); +} + +static inline size_t hantro_vp9_tile_info_size(void) +{ + return roundup((MAX_TILE_COLS * MAX_TILE_ROWS * 4 * sizeof(u16) + 15 + 16) & ~0xf, 16); +} + +static void *get_coeffs_arr(struct symbol_counts *cnts, int i, int j, int k, int l, int m) +{ + if (i == 0) + return &cnts->count_coeffs[j][k][l][m]; + + if (i == 1) + return &cnts->count_coeffs8x8[j][k][l][m]; + + if (i == 2) + return &cnts->count_coeffs16x16[j][k][l][m]; + + if (i == 3) + return &cnts->count_coeffs32x32[j][k][l][m]; + + return NULL; +} + +static void *get_eobs1(struct symbol_counts *cnts, int i, int j, int k, int l, int m) +{ + if (i == 0) + return &cnts->count_coeffs[j][k][l][m][3]; + + if (i == 1) + return &cnts->count_coeffs8x8[j][k][l][m][3]; + + if (i == 2) + return &cnts->count_coeffs16x16[j][k][l][m][3]; + + if (i == 3) + return &cnts->count_coeffs32x32[j][k][l][m][3]; + + return NULL; +} + +#define INNER_LOOP \ + do { \ + for (m = 0; m < ARRAY_SIZE(vp9_ctx->cnts.coeff[i][0][0][0]); ++m) { \ + vp9_ctx->cnts.coeff[i][j][k][l][m] = \ + get_coeffs_arr(cnts, i, j, k, l, m); \ + vp9_ctx->cnts.eob[i][j][k][l][m][0] = \ + &cnts->count_eobs[i][j][k][l][m]; \ + vp9_ctx->cnts.eob[i][j][k][l][m][1] = \ + get_eobs1(cnts, i, j, k, l, m); \ + } \ + } while (0) + +static void init_v4l2_vp9_count_tbl(struct hantro_ctx *ctx) +{ + struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec; + struct symbol_counts *cnts = vp9_ctx->misc.cpu + vp9_ctx->ctx_counters_offset; + int i, j, k, l, m; + + vp9_ctx->cnts.partition = &cnts->partition_counts; + vp9_ctx->cnts.skip = &cnts->mbskip_count; + vp9_ctx->cnts.intra_inter = &cnts->intra_inter_count; + vp9_ctx->cnts.tx32p = &cnts->tx32x32_count; + /* + * g2 hardware uses tx16x16_count[2][3], while the api + * expects tx16p[2][4], so this must be explicitly copied + * into vp9_ctx->cnts.tx16p when passing the data to the + * vp9 library function + */ + vp9_ctx->cnts.tx8p = &cnts->tx8x8_count; + + vp9_ctx->cnts.y_mode = &cnts->sb_ymode_counts; + vp9_ctx->cnts.uv_mode = &cnts->uv_mode_counts; + vp9_ctx->cnts.comp = &cnts->comp_inter_count; + vp9_ctx->cnts.comp_ref = &cnts->comp_ref_count; + vp9_ctx->cnts.single_ref = &cnts->single_ref_count; + vp9_ctx->cnts.filter = &cnts->switchable_interp_counts; + vp9_ctx->cnts.mv_joint = &cnts->mv_counts.joints; + vp9_ctx->cnts.sign = &cnts->mv_counts.sign; + vp9_ctx->cnts.classes = &cnts->mv_counts.classes; + vp9_ctx->cnts.class0 = &cnts->mv_counts.class0; + vp9_ctx->cnts.bits = &cnts->mv_counts.bits; + vp9_ctx->cnts.class0_fp = &cnts->mv_counts.class0_fp; + vp9_ctx->cnts.fp = &cnts->mv_counts.fp; + vp9_ctx->cnts.class0_hp = &cnts->mv_counts.class0_hp; + vp9_ctx->cnts.hp = &cnts->mv_counts.hp; + + for (i = 0; i < ARRAY_SIZE(vp9_ctx->cnts.coeff); ++i) + for (j = 0; j < ARRAY_SIZE(vp9_ctx->cnts.coeff[i]); ++j) + for (k = 0; k < ARRAY_SIZE(vp9_ctx->cnts.coeff[i][0]); ++k) + for (l = 0; l < ARRAY_SIZE(vp9_ctx->cnts.coeff[i][0][0]); ++l) + INNER_LOOP; +} + +int hantro_vp9_dec_init(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + const struct hantro_variant *variant = vpu->variant; + struct hantro_vp9_dec_hw_ctx *vp9_dec = &ctx->vp9_dec; + struct hantro_aux_buf *tile_edge = &vp9_dec->tile_edge; + struct hantro_aux_buf *segment_map = &vp9_dec->segment_map; + struct hantro_aux_buf *misc = &vp9_dec->misc; + u32 i, max_width, max_height, size; + + if (variant->num_dec_fmts < 1) + return -EINVAL; + + for (i = 0; i < variant->num_dec_fmts; ++i) + if (variant->dec_fmts[i].fourcc == V4L2_PIX_FMT_VP9_FRAME) + break; + + if (i == variant->num_dec_fmts) + return -EINVAL; + + max_width = vpu->variant->dec_fmts[i].frmsize.max_width; + max_height = vpu->variant->dec_fmts[i].frmsize.max_height; + + size = hantro_vp9_tile_filter_size(max_height); + vp9_dec->bsd_ctrl_offset = size; + size += hantro_vp9_bsd_control_size(max_height); + + tile_edge->cpu = dma_alloc_coherent(vpu->dev, size, &tile_edge->dma, GFP_KERNEL); + if (!tile_edge->cpu) + return -ENOMEM; + + tile_edge->size = size; + memset(tile_edge->cpu, 0, size); + + size = hantro_vp9_segment_map_size(max_width, max_height); + vp9_dec->segment_map_size = size; + size *= 2; /* we need two areas of this size, used alternately */ + + segment_map->cpu = dma_alloc_coherent(vpu->dev, size, &segment_map->dma, GFP_KERNEL); + if (!segment_map->cpu) + goto err_segment_map; + + segment_map->size = size; + memset(segment_map->cpu, 0, size); + + size = hantro_vp9_prob_tab_size(); + vp9_dec->ctx_counters_offset = size; + size += hantro_vp9_count_tab_size(); + vp9_dec->tile_info_offset = size; + size += hantro_vp9_tile_info_size(); + + misc->cpu = dma_alloc_coherent(vpu->dev, size, &misc->dma, GFP_KERNEL); + if (!misc->cpu) + goto err_misc; + + misc->size = size; + memset(misc->cpu, 0, size); + + init_v4l2_vp9_count_tbl(ctx); + + return 0; + +err_misc: + dma_free_coherent(vpu->dev, segment_map->size, segment_map->cpu, segment_map->dma); + +err_segment_map: + dma_free_coherent(vpu->dev, tile_edge->size, tile_edge->cpu, tile_edge->dma); + + return -ENOMEM; +} + +void hantro_vp9_dec_exit(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + struct hantro_vp9_dec_hw_ctx *vp9_dec = &ctx->vp9_dec; + struct hantro_aux_buf *tile_edge = &vp9_dec->tile_edge; + struct hantro_aux_buf *segment_map = &vp9_dec->segment_map; + struct hantro_aux_buf *misc = &vp9_dec->misc; + + dma_free_coherent(vpu->dev, misc->size, misc->cpu, misc->dma); + dma_free_coherent(vpu->dev, segment_map->size, segment_map->cpu, segment_map->dma); + dma_free_coherent(vpu->dev, tile_edge->size, tile_edge->cpu, tile_edge->dma); +} diff --git a/drivers/media/platform/verisilicon/hantro_vp9.h b/drivers/media/platform/verisilicon/hantro_vp9.h new file mode 100644 index 000000000..26b69275f --- /dev/null +++ b/drivers/media/platform/verisilicon/hantro_vp9.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Hantro VP9 codec driver + * + * Copyright (C) 2021 Collabora Ltd. + */ + +struct hantro_g2_mv_probs { + u8 joint[3]; + u8 sign[2]; + u8 class0_bit[2][1]; + u8 fr[2][3]; + u8 class0_hp[2]; + u8 hp[2]; + u8 classes[2][10]; + u8 class0_fr[2][2][3]; + u8 bits[2][10]; +}; + +struct hantro_g2_probs { + u8 inter_mode[7][4]; + u8 is_inter[4]; + u8 uv_mode[10][8]; + u8 tx8[2][1]; + u8 tx16[2][2]; + u8 tx32[2][3]; + u8 y_mode_tail[4][1]; + u8 y_mode[4][8]; + u8 partition[2][16][4]; /* [keyframe][][], [inter][][] */ + u8 uv_mode_tail[10][1]; + u8 interp_filter[4][2]; + u8 comp_mode[5]; + u8 skip[3]; + + u8 pad1[1]; + + struct hantro_g2_mv_probs mv; + + u8 single_ref[5][2]; + u8 comp_ref[5]; + + u8 pad2[17]; + + u8 coef[4][2][2][6][6][4]; +}; + +struct hantro_g2_all_probs { + u8 kf_y_mode_prob[10][10][8]; + + u8 kf_y_mode_prob_tail[10][10][1]; + u8 ref_pred_probs[3]; + u8 mb_segment_tree_probs[7]; + u8 segment_pred_probs[3]; + u8 ref_scores[4]; + u8 prob_comppred[2]; + + u8 pad1[9]; + + u8 kf_uv_mode_prob[10][8]; + u8 kf_uv_mode_prob_tail[10][1]; + + u8 pad2[6]; + + struct hantro_g2_probs probs; +}; + +struct mv_counts { + u32 joints[4]; + u32 sign[2][2]; + u32 classes[2][11]; + u32 class0[2][2]; + u32 bits[2][10][2]; + u32 class0_fp[2][2][4]; + u32 fp[2][4]; + u32 class0_hp[2][2]; + u32 hp[2][2]; +}; + +struct symbol_counts { + u32 inter_mode_counts[7][3][2]; + u32 sb_ymode_counts[4][10]; + u32 uv_mode_counts[10][10]; + u32 partition_counts[16][4]; + u32 switchable_interp_counts[4][3]; + u32 intra_inter_count[4][2]; + u32 comp_inter_count[5][2]; + u32 single_ref_count[5][2][2]; + u32 comp_ref_count[5][2]; + u32 tx32x32_count[2][4]; + u32 tx16x16_count[2][3]; + u32 tx8x8_count[2][2]; + u32 mbskip_count[3][2]; + + struct mv_counts mv_counts; + + u32 count_coeffs[2][2][6][6][4]; + u32 count_coeffs8x8[2][2][6][6][4]; + u32 count_coeffs16x16[2][2][6][6][4]; + u32 count_coeffs32x32[2][2][6][6][4]; + + u32 count_eobs[4][2][2][6][6]; +}; diff --git a/drivers/media/platform/verisilicon/imx8m_vpu_hw.c b/drivers/media/platform/verisilicon/imx8m_vpu_hw.c new file mode 100644 index 000000000..b390228fd --- /dev/null +++ b/drivers/media/platform/verisilicon/imx8m_vpu_hw.c @@ -0,0 +1,400 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hantro VPU codec driver + * + * Copyright (C) 2019 Pengutronix, Philipp Zabel <kernel@pengutronix.de> + */ + +#include <linux/clk.h> +#include <linux/delay.h> + +#include "hantro.h" +#include "hantro_jpeg.h" +#include "hantro_g1_regs.h" +#include "hantro_g2_regs.h" + +#define CTRL_SOFT_RESET 0x00 +#define RESET_G1 BIT(1) +#define RESET_G2 BIT(0) + +#define CTRL_CLOCK_ENABLE 0x04 +#define CLOCK_G1 BIT(1) +#define CLOCK_G2 BIT(0) + +#define CTRL_G1_DEC_FUSE 0x08 +#define CTRL_G1_PP_FUSE 0x0c +#define CTRL_G2_DEC_FUSE 0x10 + +static void imx8m_soft_reset(struct hantro_dev *vpu, u32 reset_bits) +{ + u32 val; + + /* Assert */ + val = readl(vpu->ctrl_base + CTRL_SOFT_RESET); + val &= ~reset_bits; + writel(val, vpu->ctrl_base + CTRL_SOFT_RESET); + + udelay(2); + + /* Release */ + val = readl(vpu->ctrl_base + CTRL_SOFT_RESET); + val |= reset_bits; + writel(val, vpu->ctrl_base + CTRL_SOFT_RESET); +} + +static void imx8m_clk_enable(struct hantro_dev *vpu, u32 clock_bits) +{ + u32 val; + + val = readl(vpu->ctrl_base + CTRL_CLOCK_ENABLE); + val |= clock_bits; + writel(val, vpu->ctrl_base + CTRL_CLOCK_ENABLE); +} + +static int imx8mq_runtime_resume(struct hantro_dev *vpu) +{ + int ret; + + ret = clk_bulk_prepare_enable(vpu->variant->num_clocks, vpu->clocks); + if (ret) { + dev_err(vpu->dev, "Failed to enable clocks\n"); + return ret; + } + + imx8m_soft_reset(vpu, RESET_G1 | RESET_G2); + imx8m_clk_enable(vpu, CLOCK_G1 | CLOCK_G2); + + /* Set values of the fuse registers */ + writel(0xffffffff, vpu->ctrl_base + CTRL_G1_DEC_FUSE); + writel(0xffffffff, vpu->ctrl_base + CTRL_G1_PP_FUSE); + writel(0xffffffff, vpu->ctrl_base + CTRL_G2_DEC_FUSE); + + clk_bulk_disable_unprepare(vpu->variant->num_clocks, vpu->clocks); + + return 0; +} + +/* + * Supported formats. + */ + +static const struct hantro_fmt imx8m_vpu_postproc_fmts[] = { + { + .fourcc = V4L2_PIX_FMT_YUYV, + .codec_mode = HANTRO_MODE_NONE, + .postprocessed = true, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = MB_DIM, + }, + }, +}; + +static const struct hantro_fmt imx8m_vpu_dec_fmts[] = { + { + .fourcc = V4L2_PIX_FMT_NV12, + .codec_mode = HANTRO_MODE_NONE, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = MB_DIM, + }, + }, + { + .fourcc = V4L2_PIX_FMT_MPEG2_SLICE, + .codec_mode = HANTRO_MODE_MPEG2_DEC, + .max_depth = 2, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_FHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_FHD_HEIGHT, + .step_height = MB_DIM, + }, + }, + { + .fourcc = V4L2_PIX_FMT_VP8_FRAME, + .codec_mode = HANTRO_MODE_VP8_DEC, + .max_depth = 2, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = MB_DIM, + }, + }, + { + .fourcc = V4L2_PIX_FMT_H264_SLICE, + .codec_mode = HANTRO_MODE_H264_DEC, + .max_depth = 2, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = MB_DIM, + }, + }, +}; + +static const struct hantro_fmt imx8m_vpu_g2_postproc_fmts[] = { + { + .fourcc = V4L2_PIX_FMT_NV12, + .codec_mode = HANTRO_MODE_NONE, + .postprocessed = true, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = MB_DIM, + }, + }, + { + .fourcc = V4L2_PIX_FMT_P010, + .codec_mode = HANTRO_MODE_NONE, + .postprocessed = true, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = MB_DIM, + }, + }, +}; + +static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = { + { + .fourcc = V4L2_PIX_FMT_NV12_4L4, + .codec_mode = HANTRO_MODE_NONE, + .match_depth = true, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, + .step_width = TILE_MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = TILE_MB_DIM, + }, + }, + { + .fourcc = V4L2_PIX_FMT_P010_4L4, + .codec_mode = HANTRO_MODE_NONE, + .match_depth = true, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, + .step_width = TILE_MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = TILE_MB_DIM, + }, + }, + { + .fourcc = V4L2_PIX_FMT_HEVC_SLICE, + .codec_mode = HANTRO_MODE_HEVC_DEC, + .max_depth = 2, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, + .step_width = TILE_MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = TILE_MB_DIM, + }, + }, + { + .fourcc = V4L2_PIX_FMT_VP9_FRAME, + .codec_mode = HANTRO_MODE_VP9_DEC, + .max_depth = 2, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, + .step_width = TILE_MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = TILE_MB_DIM, + }, + }, +}; + +static irqreturn_t imx8m_vpu_g1_irq(int irq, void *dev_id) +{ + struct hantro_dev *vpu = dev_id; + enum vb2_buffer_state state; + u32 status; + + status = vdpu_read(vpu, G1_REG_INTERRUPT); + state = (status & G1_REG_INTERRUPT_DEC_RDY_INT) ? + VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR; + + vdpu_write(vpu, 0, G1_REG_INTERRUPT); + vdpu_write(vpu, G1_REG_CONFIG_DEC_CLK_GATE_E, G1_REG_CONFIG); + + hantro_irq_done(vpu, state); + + return IRQ_HANDLED; +} + +static int imx8mq_vpu_hw_init(struct hantro_dev *vpu) +{ + vpu->ctrl_base = vpu->reg_bases[vpu->variant->num_regs - 1]; + + return 0; +} + +static void imx8m_vpu_g1_reset(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + + imx8m_soft_reset(vpu, RESET_G1); +} + +/* + * Supported codec ops. + */ + +static const struct hantro_codec_ops imx8mq_vpu_codec_ops[] = { + [HANTRO_MODE_MPEG2_DEC] = { + .run = hantro_g1_mpeg2_dec_run, + .reset = imx8m_vpu_g1_reset, + .init = hantro_mpeg2_dec_init, + .exit = hantro_mpeg2_dec_exit, + }, + [HANTRO_MODE_VP8_DEC] = { + .run = hantro_g1_vp8_dec_run, + .reset = imx8m_vpu_g1_reset, + .init = hantro_vp8_dec_init, + .exit = hantro_vp8_dec_exit, + }, + [HANTRO_MODE_H264_DEC] = { + .run = hantro_g1_h264_dec_run, + .reset = imx8m_vpu_g1_reset, + .init = hantro_h264_dec_init, + .exit = hantro_h264_dec_exit, + }, +}; + +static const struct hantro_codec_ops imx8mq_vpu_g1_codec_ops[] = { + [HANTRO_MODE_MPEG2_DEC] = { + .run = hantro_g1_mpeg2_dec_run, + .init = hantro_mpeg2_dec_init, + .exit = hantro_mpeg2_dec_exit, + }, + [HANTRO_MODE_VP8_DEC] = { + .run = hantro_g1_vp8_dec_run, + .init = hantro_vp8_dec_init, + .exit = hantro_vp8_dec_exit, + }, + [HANTRO_MODE_H264_DEC] = { + .run = hantro_g1_h264_dec_run, + .init = hantro_h264_dec_init, + .exit = hantro_h264_dec_exit, + }, +}; + +static const struct hantro_codec_ops imx8mq_vpu_g2_codec_ops[] = { + [HANTRO_MODE_HEVC_DEC] = { + .run = hantro_g2_hevc_dec_run, + .init = hantro_hevc_dec_init, + .exit = hantro_hevc_dec_exit, + }, + [HANTRO_MODE_VP9_DEC] = { + .run = hantro_g2_vp9_dec_run, + .done = hantro_g2_vp9_dec_done, + .init = hantro_vp9_dec_init, + .exit = hantro_vp9_dec_exit, + }, +}; + +/* + * VPU variants. + */ + +static const struct hantro_irq imx8mq_irqs[] = { + { "g1", imx8m_vpu_g1_irq }, +}; + +static const struct hantro_irq imx8mq_g2_irqs[] = { + { "g2", hantro_g2_irq }, +}; + +static const char * const imx8mq_clk_names[] = { "g1", "g2", "bus" }; +static const char * const imx8mq_reg_names[] = { "g1", "g2", "ctrl" }; +static const char * const imx8mq_g1_clk_names[] = { "g1" }; +static const char * const imx8mq_g2_clk_names[] = { "g2" }; + +const struct hantro_variant imx8mq_vpu_variant = { + .dec_fmts = imx8m_vpu_dec_fmts, + .num_dec_fmts = ARRAY_SIZE(imx8m_vpu_dec_fmts), + .postproc_fmts = imx8m_vpu_postproc_fmts, + .num_postproc_fmts = ARRAY_SIZE(imx8m_vpu_postproc_fmts), + .postproc_ops = &hantro_g1_postproc_ops, + .codec = HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER | + HANTRO_H264_DECODER, + .codec_ops = imx8mq_vpu_codec_ops, + .init = imx8mq_vpu_hw_init, + .runtime_resume = imx8mq_runtime_resume, + .irqs = imx8mq_irqs, + .num_irqs = ARRAY_SIZE(imx8mq_irqs), + .clk_names = imx8mq_clk_names, + .num_clocks = ARRAY_SIZE(imx8mq_clk_names), + .reg_names = imx8mq_reg_names, + .num_regs = ARRAY_SIZE(imx8mq_reg_names) +}; + +const struct hantro_variant imx8mq_vpu_g1_variant = { + .dec_fmts = imx8m_vpu_dec_fmts, + .num_dec_fmts = ARRAY_SIZE(imx8m_vpu_dec_fmts), + .postproc_fmts = imx8m_vpu_postproc_fmts, + .num_postproc_fmts = ARRAY_SIZE(imx8m_vpu_postproc_fmts), + .postproc_ops = &hantro_g1_postproc_ops, + .codec = HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER | + HANTRO_H264_DECODER, + .codec_ops = imx8mq_vpu_g1_codec_ops, + .irqs = imx8mq_irqs, + .num_irqs = ARRAY_SIZE(imx8mq_irqs), + .clk_names = imx8mq_g1_clk_names, + .num_clocks = ARRAY_SIZE(imx8mq_g1_clk_names), +}; + +const struct hantro_variant imx8mq_vpu_g2_variant = { + .dec_offset = 0x0, + .dec_fmts = imx8m_vpu_g2_dec_fmts, + .num_dec_fmts = ARRAY_SIZE(imx8m_vpu_g2_dec_fmts), + .postproc_fmts = imx8m_vpu_g2_postproc_fmts, + .num_postproc_fmts = ARRAY_SIZE(imx8m_vpu_g2_postproc_fmts), + .postproc_ops = &hantro_g2_postproc_ops, + .codec = HANTRO_HEVC_DECODER | HANTRO_VP9_DECODER, + .codec_ops = imx8mq_vpu_g2_codec_ops, + .irqs = imx8mq_g2_irqs, + .num_irqs = ARRAY_SIZE(imx8mq_g2_irqs), + .clk_names = imx8mq_g2_clk_names, + .num_clocks = ARRAY_SIZE(imx8mq_g2_clk_names), +}; + +const struct hantro_variant imx8mm_vpu_g1_variant = { + .dec_fmts = imx8m_vpu_dec_fmts, + .num_dec_fmts = ARRAY_SIZE(imx8m_vpu_dec_fmts), + .codec = HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER | + HANTRO_H264_DECODER, + .codec_ops = imx8mq_vpu_g1_codec_ops, + .irqs = imx8mq_irqs, + .num_irqs = ARRAY_SIZE(imx8mq_irqs), + .clk_names = imx8mq_g1_clk_names, + .num_clocks = ARRAY_SIZE(imx8mq_g1_clk_names), +}; diff --git a/drivers/media/platform/verisilicon/rockchip_vpu2_hw_h264_dec.c b/drivers/media/platform/verisilicon/rockchip_vpu2_hw_h264_dec.c new file mode 100644 index 000000000..46c1a83bc --- /dev/null +++ b/drivers/media/platform/verisilicon/rockchip_vpu2_hw_h264_dec.c @@ -0,0 +1,491 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hantro VPU codec driver + * + * Copyright (c) 2014 Rockchip Electronics Co., Ltd. + * Hertz Wong <hertz.wong@rock-chips.com> + * Herman Chen <herman.chen@rock-chips.com> + * + * Copyright (C) 2014 Google, Inc. + * Tomasz Figa <tfiga@chromium.org> + */ + +#include <linux/types.h> +#include <linux/sort.h> + +#include <media/v4l2-mem2mem.h> + +#include "hantro_hw.h" +#include "hantro_v4l2.h" + +#define VDPU_SWREG(nr) ((nr) * 4) + +#define VDPU_REG_DEC_OUT_BASE VDPU_SWREG(63) +#define VDPU_REG_RLC_VLC_BASE VDPU_SWREG(64) +#define VDPU_REG_QTABLE_BASE VDPU_SWREG(61) +#define VDPU_REG_DIR_MV_BASE VDPU_SWREG(62) +#define VDPU_REG_REFER_BASE(i) (VDPU_SWREG(84 + (i))) +#define VDPU_REG_DEC_E(v) ((v) ? BIT(0) : 0) + +#define VDPU_REG_DEC_ADV_PRE_DIS(v) ((v) ? BIT(11) : 0) +#define VDPU_REG_DEC_SCMD_DIS(v) ((v) ? BIT(10) : 0) +#define VDPU_REG_FILTERING_DIS(v) ((v) ? BIT(8) : 0) +#define VDPU_REG_PIC_FIXED_QUANT(v) ((v) ? BIT(7) : 0) +#define VDPU_REG_DEC_LATENCY(v) (((v) << 1) & GENMASK(6, 1)) + +#define VDPU_REG_INIT_QP(v) (((v) << 25) & GENMASK(30, 25)) +#define VDPU_REG_STREAM_LEN(v) (((v) << 0) & GENMASK(23, 0)) + +#define VDPU_REG_APF_THRESHOLD(v) (((v) << 17) & GENMASK(30, 17)) +#define VDPU_REG_STARTMB_X(v) (((v) << 8) & GENMASK(16, 8)) +#define VDPU_REG_STARTMB_Y(v) (((v) << 0) & GENMASK(7, 0)) + +#define VDPU_REG_DEC_MODE(v) (((v) << 0) & GENMASK(3, 0)) + +#define VDPU_REG_DEC_STRENDIAN_E(v) ((v) ? BIT(5) : 0) +#define VDPU_REG_DEC_STRSWAP32_E(v) ((v) ? BIT(4) : 0) +#define VDPU_REG_DEC_OUTSWAP32_E(v) ((v) ? BIT(3) : 0) +#define VDPU_REG_DEC_INSWAP32_E(v) ((v) ? BIT(2) : 0) +#define VDPU_REG_DEC_OUT_ENDIAN(v) ((v) ? BIT(1) : 0) +#define VDPU_REG_DEC_IN_ENDIAN(v) ((v) ? BIT(0) : 0) + +#define VDPU_REG_DEC_DATA_DISC_E(v) ((v) ? BIT(22) : 0) +#define VDPU_REG_DEC_MAX_BURST(v) (((v) << 16) & GENMASK(20, 16)) +#define VDPU_REG_DEC_AXI_WR_ID(v) (((v) << 8) & GENMASK(15, 8)) +#define VDPU_REG_DEC_AXI_RD_ID(v) (((v) << 0) & GENMASK(7, 0)) + +#define VDPU_REG_START_CODE_E(v) ((v) ? BIT(22) : 0) +#define VDPU_REG_CH_8PIX_ILEAV_E(v) ((v) ? BIT(21) : 0) +#define VDPU_REG_RLC_MODE_E(v) ((v) ? BIT(20) : 0) +#define VDPU_REG_PIC_INTERLACE_E(v) ((v) ? BIT(17) : 0) +#define VDPU_REG_PIC_FIELDMODE_E(v) ((v) ? BIT(16) : 0) +#define VDPU_REG_PIC_TOPFIELD_E(v) ((v) ? BIT(13) : 0) +#define VDPU_REG_WRITE_MVS_E(v) ((v) ? BIT(10) : 0) +#define VDPU_REG_SEQ_MBAFF_E(v) ((v) ? BIT(7) : 0) +#define VDPU_REG_PICORD_COUNT_E(v) ((v) ? BIT(6) : 0) +#define VDPU_REG_DEC_TIMEOUT_E(v) ((v) ? BIT(5) : 0) +#define VDPU_REG_DEC_CLK_GATE_E(v) ((v) ? BIT(4) : 0) + +#define VDPU_REG_PRED_BC_TAP_0_0(v) (((v) << 22) & GENMASK(31, 22)) +#define VDPU_REG_PRED_BC_TAP_0_1(v) (((v) << 12) & GENMASK(21, 12)) +#define VDPU_REG_PRED_BC_TAP_0_2(v) (((v) << 2) & GENMASK(11, 2)) + +#define VDPU_REG_REFBU_E(v) ((v) ? BIT(31) : 0) + +#define VDPU_REG_PINIT_RLIST_F9(v) (((v) << 25) & GENMASK(29, 25)) +#define VDPU_REG_PINIT_RLIST_F8(v) (((v) << 20) & GENMASK(24, 20)) +#define VDPU_REG_PINIT_RLIST_F7(v) (((v) << 15) & GENMASK(19, 15)) +#define VDPU_REG_PINIT_RLIST_F6(v) (((v) << 10) & GENMASK(14, 10)) +#define VDPU_REG_PINIT_RLIST_F5(v) (((v) << 5) & GENMASK(9, 5)) +#define VDPU_REG_PINIT_RLIST_F4(v) (((v) << 0) & GENMASK(4, 0)) + +#define VDPU_REG_PINIT_RLIST_F15(v) (((v) << 25) & GENMASK(29, 25)) +#define VDPU_REG_PINIT_RLIST_F14(v) (((v) << 20) & GENMASK(24, 20)) +#define VDPU_REG_PINIT_RLIST_F13(v) (((v) << 15) & GENMASK(19, 15)) +#define VDPU_REG_PINIT_RLIST_F12(v) (((v) << 10) & GENMASK(14, 10)) +#define VDPU_REG_PINIT_RLIST_F11(v) (((v) << 5) & GENMASK(9, 5)) +#define VDPU_REG_PINIT_RLIST_F10(v) (((v) << 0) & GENMASK(4, 0)) + +#define VDPU_REG_REFER1_NBR(v) (((v) << 16) & GENMASK(31, 16)) +#define VDPU_REG_REFER0_NBR(v) (((v) << 0) & GENMASK(15, 0)) + +#define VDPU_REG_REFER3_NBR(v) (((v) << 16) & GENMASK(31, 16)) +#define VDPU_REG_REFER2_NBR(v) (((v) << 0) & GENMASK(15, 0)) + +#define VDPU_REG_REFER5_NBR(v) (((v) << 16) & GENMASK(31, 16)) +#define VDPU_REG_REFER4_NBR(v) (((v) << 0) & GENMASK(15, 0)) + +#define VDPU_REG_REFER7_NBR(v) (((v) << 16) & GENMASK(31, 16)) +#define VDPU_REG_REFER6_NBR(v) (((v) << 0) & GENMASK(15, 0)) + +#define VDPU_REG_REFER9_NBR(v) (((v) << 16) & GENMASK(31, 16)) +#define VDPU_REG_REFER8_NBR(v) (((v) << 0) & GENMASK(15, 0)) + +#define VDPU_REG_REFER11_NBR(v) (((v) << 16) & GENMASK(31, 16)) +#define VDPU_REG_REFER10_NBR(v) (((v) << 0) & GENMASK(15, 0)) + +#define VDPU_REG_REFER13_NBR(v) (((v) << 16) & GENMASK(31, 16)) +#define VDPU_REG_REFER12_NBR(v) (((v) << 0) & GENMASK(15, 0)) + +#define VDPU_REG_REFER15_NBR(v) (((v) << 16) & GENMASK(31, 16)) +#define VDPU_REG_REFER14_NBR(v) (((v) << 0) & GENMASK(15, 0)) + +#define VDPU_REG_BINIT_RLIST_F5(v) (((v) << 25) & GENMASK(29, 25)) +#define VDPU_REG_BINIT_RLIST_F4(v) (((v) << 20) & GENMASK(24, 20)) +#define VDPU_REG_BINIT_RLIST_F3(v) (((v) << 15) & GENMASK(19, 15)) +#define VDPU_REG_BINIT_RLIST_F2(v) (((v) << 10) & GENMASK(14, 10)) +#define VDPU_REG_BINIT_RLIST_F1(v) (((v) << 5) & GENMASK(9, 5)) +#define VDPU_REG_BINIT_RLIST_F0(v) (((v) << 0) & GENMASK(4, 0)) + +#define VDPU_REG_BINIT_RLIST_F11(v) (((v) << 25) & GENMASK(29, 25)) +#define VDPU_REG_BINIT_RLIST_F10(v) (((v) << 20) & GENMASK(24, 20)) +#define VDPU_REG_BINIT_RLIST_F9(v) (((v) << 15) & GENMASK(19, 15)) +#define VDPU_REG_BINIT_RLIST_F8(v) (((v) << 10) & GENMASK(14, 10)) +#define VDPU_REG_BINIT_RLIST_F7(v) (((v) << 5) & GENMASK(9, 5)) +#define VDPU_REG_BINIT_RLIST_F6(v) (((v) << 0) & GENMASK(4, 0)) + +#define VDPU_REG_BINIT_RLIST_F15(v) (((v) << 15) & GENMASK(19, 15)) +#define VDPU_REG_BINIT_RLIST_F14(v) (((v) << 10) & GENMASK(14, 10)) +#define VDPU_REG_BINIT_RLIST_F13(v) (((v) << 5) & GENMASK(9, 5)) +#define VDPU_REG_BINIT_RLIST_F12(v) (((v) << 0) & GENMASK(4, 0)) + +#define VDPU_REG_BINIT_RLIST_B5(v) (((v) << 25) & GENMASK(29, 25)) +#define VDPU_REG_BINIT_RLIST_B4(v) (((v) << 20) & GENMASK(24, 20)) +#define VDPU_REG_BINIT_RLIST_B3(v) (((v) << 15) & GENMASK(19, 15)) +#define VDPU_REG_BINIT_RLIST_B2(v) (((v) << 10) & GENMASK(14, 10)) +#define VDPU_REG_BINIT_RLIST_B1(v) (((v) << 5) & GENMASK(9, 5)) +#define VDPU_REG_BINIT_RLIST_B0(v) (((v) << 0) & GENMASK(4, 0)) + +#define VDPU_REG_BINIT_RLIST_B11(v) (((v) << 25) & GENMASK(29, 25)) +#define VDPU_REG_BINIT_RLIST_B10(v) (((v) << 20) & GENMASK(24, 20)) +#define VDPU_REG_BINIT_RLIST_B9(v) (((v) << 15) & GENMASK(19, 15)) +#define VDPU_REG_BINIT_RLIST_B8(v) (((v) << 10) & GENMASK(14, 10)) +#define VDPU_REG_BINIT_RLIST_B7(v) (((v) << 5) & GENMASK(9, 5)) +#define VDPU_REG_BINIT_RLIST_B6(v) (((v) << 0) & GENMASK(4, 0)) + +#define VDPU_REG_BINIT_RLIST_B15(v) (((v) << 15) & GENMASK(19, 15)) +#define VDPU_REG_BINIT_RLIST_B14(v) (((v) << 10) & GENMASK(14, 10)) +#define VDPU_REG_BINIT_RLIST_B13(v) (((v) << 5) & GENMASK(9, 5)) +#define VDPU_REG_BINIT_RLIST_B12(v) (((v) << 0) & GENMASK(4, 0)) + +#define VDPU_REG_PINIT_RLIST_F3(v) (((v) << 15) & GENMASK(19, 15)) +#define VDPU_REG_PINIT_RLIST_F2(v) (((v) << 10) & GENMASK(14, 10)) +#define VDPU_REG_PINIT_RLIST_F1(v) (((v) << 5) & GENMASK(9, 5)) +#define VDPU_REG_PINIT_RLIST_F0(v) (((v) << 0) & GENMASK(4, 0)) + +#define VDPU_REG_REFER_LTERM_E(v) (((v) << 0) & GENMASK(31, 0)) + +#define VDPU_REG_REFER_VALID_E(v) (((v) << 0) & GENMASK(31, 0)) + +#define VDPU_REG_STRM_START_BIT(v) (((v) << 0) & GENMASK(5, 0)) + +#define VDPU_REG_CH_QP_OFFSET2(v) (((v) << 22) & GENMASK(26, 22)) +#define VDPU_REG_CH_QP_OFFSET(v) (((v) << 17) & GENMASK(21, 17)) +#define VDPU_REG_PIC_MB_HEIGHT_P(v) (((v) << 9) & GENMASK(16, 9)) +#define VDPU_REG_PIC_MB_WIDTH(v) (((v) << 0) & GENMASK(8, 0)) + +#define VDPU_REG_WEIGHT_BIPR_IDC(v) (((v) << 16) & GENMASK(17, 16)) +#define VDPU_REG_REF_FRAMES(v) (((v) << 0) & GENMASK(4, 0)) + +#define VDPU_REG_FILT_CTRL_PRES(v) ((v) ? BIT(31) : 0) +#define VDPU_REG_RDPIC_CNT_PRES(v) ((v) ? BIT(30) : 0) +#define VDPU_REG_FRAMENUM_LEN(v) (((v) << 16) & GENMASK(20, 16)) +#define VDPU_REG_FRAMENUM(v) (((v) << 0) & GENMASK(15, 0)) + +#define VDPU_REG_REFPIC_MK_LEN(v) (((v) << 16) & GENMASK(26, 16)) +#define VDPU_REG_IDR_PIC_ID(v) (((v) << 0) & GENMASK(15, 0)) + +#define VDPU_REG_PPS_ID(v) (((v) << 24) & GENMASK(31, 24)) +#define VDPU_REG_REFIDX1_ACTIVE(v) (((v) << 19) & GENMASK(23, 19)) +#define VDPU_REG_REFIDX0_ACTIVE(v) (((v) << 14) & GENMASK(18, 14)) +#define VDPU_REG_POC_LENGTH(v) (((v) << 0) & GENMASK(7, 0)) + +#define VDPU_REG_IDR_PIC_E(v) ((v) ? BIT(8) : 0) +#define VDPU_REG_DIR_8X8_INFER_E(v) ((v) ? BIT(7) : 0) +#define VDPU_REG_BLACKWHITE_E(v) ((v) ? BIT(6) : 0) +#define VDPU_REG_CABAC_E(v) ((v) ? BIT(5) : 0) +#define VDPU_REG_WEIGHT_PRED_E(v) ((v) ? BIT(4) : 0) +#define VDPU_REG_CONST_INTRA_E(v) ((v) ? BIT(3) : 0) +#define VDPU_REG_8X8TRANS_FLAG_E(v) ((v) ? BIT(2) : 0) +#define VDPU_REG_TYPE1_QUANT_E(v) ((v) ? BIT(1) : 0) +#define VDPU_REG_FIELDPIC_FLAG_E(v) ((v) ? BIT(0) : 0) + +static void set_params(struct hantro_ctx *ctx, struct vb2_v4l2_buffer *src_buf) +{ + const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls; + const struct v4l2_ctrl_h264_decode_params *dec_param = ctrls->decode; + const struct v4l2_ctrl_h264_sps *sps = ctrls->sps; + const struct v4l2_ctrl_h264_pps *pps = ctrls->pps; + struct hantro_dev *vpu = ctx->dev; + u32 reg; + + reg = VDPU_REG_DEC_ADV_PRE_DIS(0) | + VDPU_REG_DEC_SCMD_DIS(0) | + VDPU_REG_FILTERING_DIS(0) | + VDPU_REG_PIC_FIXED_QUANT(0) | + VDPU_REG_DEC_LATENCY(0); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(50)); + + reg = VDPU_REG_INIT_QP(pps->pic_init_qp_minus26 + 26) | + VDPU_REG_STREAM_LEN(vb2_get_plane_payload(&src_buf->vb2_buf, 0)); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(51)); + + reg = VDPU_REG_APF_THRESHOLD(8) | + VDPU_REG_STARTMB_X(0) | + VDPU_REG_STARTMB_Y(0); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(52)); + + reg = VDPU_REG_DEC_MODE(0); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(53)); + + reg = VDPU_REG_DEC_STRENDIAN_E(1) | + VDPU_REG_DEC_STRSWAP32_E(1) | + VDPU_REG_DEC_OUTSWAP32_E(1) | + VDPU_REG_DEC_INSWAP32_E(1) | + VDPU_REG_DEC_OUT_ENDIAN(1) | + VDPU_REG_DEC_IN_ENDIAN(0); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(54)); + + reg = VDPU_REG_DEC_DATA_DISC_E(0) | + VDPU_REG_DEC_MAX_BURST(16) | + VDPU_REG_DEC_AXI_WR_ID(0) | + VDPU_REG_DEC_AXI_RD_ID(0xff); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(56)); + + reg = VDPU_REG_START_CODE_E(1) | + VDPU_REG_CH_8PIX_ILEAV_E(0) | + VDPU_REG_RLC_MODE_E(0) | + VDPU_REG_PIC_INTERLACE_E(!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY) && + (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD || + dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC)) | + VDPU_REG_PIC_FIELDMODE_E(dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) | + VDPU_REG_PIC_TOPFIELD_E(!(dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD)) | + VDPU_REG_WRITE_MVS_E((sps->profile_idc > 66) && dec_param->nal_ref_idc) | + VDPU_REG_SEQ_MBAFF_E(sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD) | + VDPU_REG_PICORD_COUNT_E(sps->profile_idc > 66) | + VDPU_REG_DEC_TIMEOUT_E(1) | + VDPU_REG_DEC_CLK_GATE_E(1); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(57)); + + reg = VDPU_REG_PRED_BC_TAP_0_0(1) | + VDPU_REG_PRED_BC_TAP_0_1((u32)-5) | + VDPU_REG_PRED_BC_TAP_0_2(20); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(59)); + + reg = VDPU_REG_REFBU_E(0); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(65)); + + reg = VDPU_REG_STRM_START_BIT(0); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(109)); + + reg = VDPU_REG_CH_QP_OFFSET2(pps->second_chroma_qp_index_offset) | + VDPU_REG_CH_QP_OFFSET(pps->chroma_qp_index_offset) | + VDPU_REG_PIC_MB_HEIGHT_P(MB_HEIGHT(ctx->src_fmt.height)) | + VDPU_REG_PIC_MB_WIDTH(MB_WIDTH(ctx->src_fmt.width)); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(110)); + + reg = VDPU_REG_WEIGHT_BIPR_IDC(pps->weighted_bipred_idc) | + VDPU_REG_REF_FRAMES(sps->max_num_ref_frames); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(111)); + + reg = VDPU_REG_FILT_CTRL_PRES(pps->flags & V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT) | + VDPU_REG_RDPIC_CNT_PRES(pps->flags & V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT) | + VDPU_REG_FRAMENUM_LEN(sps->log2_max_frame_num_minus4 + 4) | + VDPU_REG_FRAMENUM(dec_param->frame_num); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(112)); + + reg = VDPU_REG_REFPIC_MK_LEN(dec_param->dec_ref_pic_marking_bit_size) | + VDPU_REG_IDR_PIC_ID(dec_param->idr_pic_id); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(113)); + + reg = VDPU_REG_PPS_ID(pps->pic_parameter_set_id) | + VDPU_REG_REFIDX1_ACTIVE(pps->num_ref_idx_l1_default_active_minus1 + 1) | + VDPU_REG_REFIDX0_ACTIVE(pps->num_ref_idx_l0_default_active_minus1 + 1) | + VDPU_REG_POC_LENGTH(dec_param->pic_order_cnt_bit_size); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(114)); + + reg = VDPU_REG_IDR_PIC_E(dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC) | + VDPU_REG_DIR_8X8_INFER_E(sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE) | + VDPU_REG_BLACKWHITE_E(sps->profile_idc >= 100 && sps->chroma_format_idc == 0) | + VDPU_REG_CABAC_E(pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE) | + VDPU_REG_WEIGHT_PRED_E(pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED) | + VDPU_REG_CONST_INTRA_E(pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED) | + VDPU_REG_8X8TRANS_FLAG_E(pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE) | + VDPU_REG_TYPE1_QUANT_E(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT) | + VDPU_REG_FIELDPIC_FLAG_E(!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY)); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(115)); +} + +static void set_ref(struct hantro_ctx *ctx) +{ + const struct v4l2_h264_reference *b0_reflist, *b1_reflist, *p_reflist; + struct hantro_dev *vpu = ctx->dev; + u32 reg; + int i; + + b0_reflist = ctx->h264_dec.reflists.b0; + b1_reflist = ctx->h264_dec.reflists.b1; + p_reflist = ctx->h264_dec.reflists.p; + + reg = VDPU_REG_PINIT_RLIST_F9(p_reflist[9].index) | + VDPU_REG_PINIT_RLIST_F8(p_reflist[8].index) | + VDPU_REG_PINIT_RLIST_F7(p_reflist[7].index) | + VDPU_REG_PINIT_RLIST_F6(p_reflist[6].index) | + VDPU_REG_PINIT_RLIST_F5(p_reflist[5].index) | + VDPU_REG_PINIT_RLIST_F4(p_reflist[4].index); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(74)); + + reg = VDPU_REG_PINIT_RLIST_F15(p_reflist[15].index) | + VDPU_REG_PINIT_RLIST_F14(p_reflist[14].index) | + VDPU_REG_PINIT_RLIST_F13(p_reflist[13].index) | + VDPU_REG_PINIT_RLIST_F12(p_reflist[12].index) | + VDPU_REG_PINIT_RLIST_F11(p_reflist[11].index) | + VDPU_REG_PINIT_RLIST_F10(p_reflist[10].index); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(75)); + + reg = VDPU_REG_REFER1_NBR(hantro_h264_get_ref_nbr(ctx, 1)) | + VDPU_REG_REFER0_NBR(hantro_h264_get_ref_nbr(ctx, 0)); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(76)); + + reg = VDPU_REG_REFER3_NBR(hantro_h264_get_ref_nbr(ctx, 3)) | + VDPU_REG_REFER2_NBR(hantro_h264_get_ref_nbr(ctx, 2)); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(77)); + + reg = VDPU_REG_REFER5_NBR(hantro_h264_get_ref_nbr(ctx, 5)) | + VDPU_REG_REFER4_NBR(hantro_h264_get_ref_nbr(ctx, 4)); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(78)); + + reg = VDPU_REG_REFER7_NBR(hantro_h264_get_ref_nbr(ctx, 7)) | + VDPU_REG_REFER6_NBR(hantro_h264_get_ref_nbr(ctx, 6)); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(79)); + + reg = VDPU_REG_REFER9_NBR(hantro_h264_get_ref_nbr(ctx, 9)) | + VDPU_REG_REFER8_NBR(hantro_h264_get_ref_nbr(ctx, 8)); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(80)); + + reg = VDPU_REG_REFER11_NBR(hantro_h264_get_ref_nbr(ctx, 11)) | + VDPU_REG_REFER10_NBR(hantro_h264_get_ref_nbr(ctx, 10)); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(81)); + + reg = VDPU_REG_REFER13_NBR(hantro_h264_get_ref_nbr(ctx, 13)) | + VDPU_REG_REFER12_NBR(hantro_h264_get_ref_nbr(ctx, 12)); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(82)); + + reg = VDPU_REG_REFER15_NBR(hantro_h264_get_ref_nbr(ctx, 15)) | + VDPU_REG_REFER14_NBR(hantro_h264_get_ref_nbr(ctx, 14)); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(83)); + + reg = VDPU_REG_BINIT_RLIST_F5(b0_reflist[5].index) | + VDPU_REG_BINIT_RLIST_F4(b0_reflist[4].index) | + VDPU_REG_BINIT_RLIST_F3(b0_reflist[3].index) | + VDPU_REG_BINIT_RLIST_F2(b0_reflist[2].index) | + VDPU_REG_BINIT_RLIST_F1(b0_reflist[1].index) | + VDPU_REG_BINIT_RLIST_F0(b0_reflist[0].index); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(100)); + + reg = VDPU_REG_BINIT_RLIST_F11(b0_reflist[11].index) | + VDPU_REG_BINIT_RLIST_F10(b0_reflist[10].index) | + VDPU_REG_BINIT_RLIST_F9(b0_reflist[9].index) | + VDPU_REG_BINIT_RLIST_F8(b0_reflist[8].index) | + VDPU_REG_BINIT_RLIST_F7(b0_reflist[7].index) | + VDPU_REG_BINIT_RLIST_F6(b0_reflist[6].index); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(101)); + + reg = VDPU_REG_BINIT_RLIST_F15(b0_reflist[15].index) | + VDPU_REG_BINIT_RLIST_F14(b0_reflist[14].index) | + VDPU_REG_BINIT_RLIST_F13(b0_reflist[13].index) | + VDPU_REG_BINIT_RLIST_F12(b0_reflist[12].index); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(102)); + + reg = VDPU_REG_BINIT_RLIST_B5(b1_reflist[5].index) | + VDPU_REG_BINIT_RLIST_B4(b1_reflist[4].index) | + VDPU_REG_BINIT_RLIST_B3(b1_reflist[3].index) | + VDPU_REG_BINIT_RLIST_B2(b1_reflist[2].index) | + VDPU_REG_BINIT_RLIST_B1(b1_reflist[1].index) | + VDPU_REG_BINIT_RLIST_B0(b1_reflist[0].index); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(103)); + + reg = VDPU_REG_BINIT_RLIST_B11(b1_reflist[11].index) | + VDPU_REG_BINIT_RLIST_B10(b1_reflist[10].index) | + VDPU_REG_BINIT_RLIST_B9(b1_reflist[9].index) | + VDPU_REG_BINIT_RLIST_B8(b1_reflist[8].index) | + VDPU_REG_BINIT_RLIST_B7(b1_reflist[7].index) | + VDPU_REG_BINIT_RLIST_B6(b1_reflist[6].index); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(104)); + + reg = VDPU_REG_BINIT_RLIST_B15(b1_reflist[15].index) | + VDPU_REG_BINIT_RLIST_B14(b1_reflist[14].index) | + VDPU_REG_BINIT_RLIST_B13(b1_reflist[13].index) | + VDPU_REG_BINIT_RLIST_B12(b1_reflist[12].index); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(105)); + + reg = VDPU_REG_PINIT_RLIST_F3(p_reflist[3].index) | + VDPU_REG_PINIT_RLIST_F2(p_reflist[2].index) | + VDPU_REG_PINIT_RLIST_F1(p_reflist[1].index) | + VDPU_REG_PINIT_RLIST_F0(p_reflist[0].index); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(106)); + + reg = VDPU_REG_REFER_LTERM_E(ctx->h264_dec.dpb_longterm); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(107)); + + reg = VDPU_REG_REFER_VALID_E(ctx->h264_dec.dpb_valid); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(108)); + + /* Set up addresses of DPB buffers. */ + for (i = 0; i < HANTRO_H264_DPB_SIZE; i++) { + dma_addr_t dma_addr = hantro_h264_get_ref_buf(ctx, i); + + vdpu_write_relaxed(vpu, dma_addr, VDPU_REG_REFER_BASE(i)); + } +} + +static void set_buffers(struct hantro_ctx *ctx, struct vb2_v4l2_buffer *src_buf) +{ + const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls; + struct vb2_v4l2_buffer *dst_buf; + struct hantro_dev *vpu = ctx->dev; + dma_addr_t src_dma, dst_dma; + size_t offset = 0; + + /* Source (stream) buffer. */ + src_dma = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0); + vdpu_write_relaxed(vpu, src_dma, VDPU_REG_RLC_VLC_BASE); + + /* Destination (decoded frame) buffer. */ + dst_buf = hantro_get_dst_buf(ctx); + dst_dma = hantro_get_dec_buf_addr(ctx, &dst_buf->vb2_buf); + /* Adjust dma addr to start at second line for bottom field */ + if (ctrls->decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD) + offset = ALIGN(ctx->src_fmt.width, MB_DIM); + vdpu_write_relaxed(vpu, dst_dma + offset, VDPU_REG_DEC_OUT_BASE); + + /* Higher profiles require DMV buffer appended to reference frames. */ + if (ctrls->sps->profile_idc > 66 && ctrls->decode->nal_ref_idc) { + unsigned int bytes_per_mb = 384; + + /* DMV buffer for monochrome start directly after Y-plane */ + if (ctrls->sps->profile_idc >= 100 && + ctrls->sps->chroma_format_idc == 0) + bytes_per_mb = 256; + offset = bytes_per_mb * MB_WIDTH(ctx->src_fmt.width) * + MB_HEIGHT(ctx->src_fmt.height); + + /* + * DMV buffer is split in two for field encoded frames, + * adjust offset for bottom field + */ + if (ctrls->decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD) + offset += 32 * MB_WIDTH(ctx->src_fmt.width) * + MB_HEIGHT(ctx->src_fmt.height); + vdpu_write_relaxed(vpu, dst_dma + offset, VDPU_REG_DIR_MV_BASE); + } + + /* Auxiliary buffer prepared in hantro_g1_h264_dec_prepare_table(). */ + vdpu_write_relaxed(vpu, ctx->h264_dec.priv.dma, VDPU_REG_QTABLE_BASE); +} + +int rockchip_vpu2_h264_dec_run(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + struct vb2_v4l2_buffer *src_buf; + u32 reg; + int ret; + + /* Prepare the H264 decoder context. */ + ret = hantro_h264_dec_prepare_run(ctx); + if (ret) + return ret; + + src_buf = hantro_get_src_buf(ctx); + set_params(ctx, src_buf); + set_ref(ctx); + set_buffers(ctx, src_buf); + + hantro_end_prepare_run(ctx); + + /* Start decoding! */ + reg = vdpu_read(vpu, VDPU_SWREG(57)) | VDPU_REG_DEC_E(1); + vdpu_write(vpu, reg, VDPU_SWREG(57)); + + return 0; +} diff --git a/drivers/media/platform/verisilicon/rockchip_vpu2_hw_jpeg_enc.c b/drivers/media/platform/verisilicon/rockchip_vpu2_hw_jpeg_enc.c new file mode 100644 index 000000000..8395c4d48 --- /dev/null +++ b/drivers/media/platform/verisilicon/rockchip_vpu2_hw_jpeg_enc.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hantro VPU codec driver + * + * Copyright (C) 2018 Rockchip Electronics Co., Ltd. + * + * JPEG encoder + * ------------ + * The VPU JPEG encoder produces JPEG baseline sequential format. + * The quantization coefficients are 8-bit values, complying with + * the baseline specification. Therefore, it requires + * luma and chroma quantization tables. The hardware does entropy + * encoding using internal Huffman tables, as specified in the JPEG + * specification. + * + * In other words, only the luma and chroma quantization tables are + * required for the encoding operation. + * + * Quantization luma table values are written to registers + * VEPU_swreg_0-VEPU_swreg_15, and chroma table values to + * VEPU_swreg_16-VEPU_swreg_31. A special order is needed, neither + * zigzag, nor linear. + */ + +#include <asm/unaligned.h> +#include <media/v4l2-mem2mem.h> +#include "hantro_jpeg.h" +#include "hantro.h" +#include "hantro_v4l2.h" +#include "hantro_hw.h" +#include "rockchip_vpu2_regs.h" + +#define VEPU_JPEG_QUANT_TABLE_COUNT 16 + +static void rockchip_vpu2_set_src_img_ctrl(struct hantro_dev *vpu, + struct hantro_ctx *ctx) +{ + u32 overfill_r, overfill_b; + u32 reg; + + /* + * The format width and height are already macroblock aligned + * by .vidioc_s_fmt_vid_cap_mplane() callback. Destination + * format width and height can be further modified by + * .vidioc_s_selection(), and the width is 4-aligned. + */ + overfill_r = ctx->src_fmt.width - ctx->dst_fmt.width; + overfill_b = ctx->src_fmt.height - ctx->dst_fmt.height; + + reg = VEPU_REG_IN_IMG_CTRL_ROW_LEN(ctx->src_fmt.width); + vepu_write_relaxed(vpu, reg, VEPU_REG_INPUT_LUMA_INFO); + + reg = VEPU_REG_IN_IMG_CTRL_OVRFLR_D4(overfill_r / 4) | + VEPU_REG_IN_IMG_CTRL_OVRFLB(overfill_b); + /* + * This register controls the input crop, as the offset + * from the right/bottom within the last macroblock. The offset from the + * right must be divided by 4 and so the crop must be aligned to 4 pixels + * horizontally. + */ + vepu_write_relaxed(vpu, reg, VEPU_REG_ENC_OVER_FILL_STRM_OFFSET); + + reg = VEPU_REG_IN_IMG_CTRL_FMT(ctx->vpu_src_fmt->enc_fmt); + vepu_write_relaxed(vpu, reg, VEPU_REG_ENC_CTRL1); +} + +static void rockchip_vpu2_jpeg_enc_set_buffers(struct hantro_dev *vpu, + struct hantro_ctx *ctx, + struct vb2_buffer *src_buf, + struct vb2_buffer *dst_buf) +{ + struct v4l2_pix_format_mplane *pix_fmt = &ctx->src_fmt; + dma_addr_t src[3]; + u32 size_left; + + size_left = vb2_plane_size(dst_buf, 0) - ctx->vpu_dst_fmt->header_size; + if (WARN_ON(vb2_plane_size(dst_buf, 0) < ctx->vpu_dst_fmt->header_size)) + size_left = 0; + + WARN_ON(pix_fmt->num_planes > 3); + + vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr(dst_buf, 0) + + ctx->vpu_dst_fmt->header_size, + VEPU_REG_ADDR_OUTPUT_STREAM); + vepu_write_relaxed(vpu, size_left, VEPU_REG_STR_BUF_LIMIT); + + if (pix_fmt->num_planes == 1) { + src[0] = vb2_dma_contig_plane_dma_addr(src_buf, 0); + vepu_write_relaxed(vpu, src[0], VEPU_REG_ADDR_IN_PLANE_0); + } else if (pix_fmt->num_planes == 2) { + src[0] = vb2_dma_contig_plane_dma_addr(src_buf, 0); + src[1] = vb2_dma_contig_plane_dma_addr(src_buf, 1); + vepu_write_relaxed(vpu, src[0], VEPU_REG_ADDR_IN_PLANE_0); + vepu_write_relaxed(vpu, src[1], VEPU_REG_ADDR_IN_PLANE_1); + } else { + src[0] = vb2_dma_contig_plane_dma_addr(src_buf, 0); + src[1] = vb2_dma_contig_plane_dma_addr(src_buf, 1); + src[2] = vb2_dma_contig_plane_dma_addr(src_buf, 2); + vepu_write_relaxed(vpu, src[0], VEPU_REG_ADDR_IN_PLANE_0); + vepu_write_relaxed(vpu, src[1], VEPU_REG_ADDR_IN_PLANE_1); + vepu_write_relaxed(vpu, src[2], VEPU_REG_ADDR_IN_PLANE_2); + } +} + +static void +rockchip_vpu2_jpeg_enc_set_qtable(struct hantro_dev *vpu, + unsigned char *luma_qtable, + unsigned char *chroma_qtable) +{ + u32 reg, i; + __be32 *luma_qtable_p; + __be32 *chroma_qtable_p; + + luma_qtable_p = (__be32 *)luma_qtable; + chroma_qtable_p = (__be32 *)chroma_qtable; + + /* + * Quantization table registers must be written in contiguous blocks. + * DO NOT collapse the below two "for" loops into one. + */ + for (i = 0; i < VEPU_JPEG_QUANT_TABLE_COUNT; i++) { + reg = get_unaligned_be32(&luma_qtable_p[i]); + vepu_write_relaxed(vpu, reg, VEPU_REG_JPEG_LUMA_QUAT(i)); + } + + for (i = 0; i < VEPU_JPEG_QUANT_TABLE_COUNT; i++) { + reg = get_unaligned_be32(&chroma_qtable_p[i]); + vepu_write_relaxed(vpu, reg, VEPU_REG_JPEG_CHROMA_QUAT(i)); + } +} + +int rockchip_vpu2_jpeg_enc_run(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + struct vb2_v4l2_buffer *src_buf, *dst_buf; + struct hantro_jpeg_ctx jpeg_ctx; + u32 reg; + + src_buf = hantro_get_src_buf(ctx); + dst_buf = hantro_get_dst_buf(ctx); + + hantro_start_prepare_run(ctx); + + memset(&jpeg_ctx, 0, sizeof(jpeg_ctx)); + jpeg_ctx.buffer = vb2_plane_vaddr(&dst_buf->vb2_buf, 0); + if (!jpeg_ctx.buffer) + return -ENOMEM; + + jpeg_ctx.width = ctx->dst_fmt.width; + jpeg_ctx.height = ctx->dst_fmt.height; + jpeg_ctx.quality = ctx->jpeg_quality; + hantro_jpeg_header_assemble(&jpeg_ctx); + + /* Switch to JPEG encoder mode before writing registers */ + vepu_write_relaxed(vpu, VEPU_REG_ENCODE_FORMAT_JPEG, + VEPU_REG_ENCODE_START); + + rockchip_vpu2_set_src_img_ctrl(vpu, ctx); + rockchip_vpu2_jpeg_enc_set_buffers(vpu, ctx, &src_buf->vb2_buf, + &dst_buf->vb2_buf); + rockchip_vpu2_jpeg_enc_set_qtable(vpu, jpeg_ctx.hw_luma_qtable, + jpeg_ctx.hw_chroma_qtable); + + reg = VEPU_REG_OUTPUT_SWAP32 + | VEPU_REG_OUTPUT_SWAP16 + | VEPU_REG_OUTPUT_SWAP8 + | VEPU_REG_INPUT_SWAP8 + | VEPU_REG_INPUT_SWAP16 + | VEPU_REG_INPUT_SWAP32; + /* Make sure that all registers are written at this point. */ + vepu_write(vpu, reg, VEPU_REG_DATA_ENDIAN); + + reg = VEPU_REG_AXI_CTRL_BURST_LEN(16); + vepu_write_relaxed(vpu, reg, VEPU_REG_AXI_CTRL); + + reg = VEPU_REG_MB_WIDTH(MB_WIDTH(ctx->src_fmt.width)) + | VEPU_REG_MB_HEIGHT(MB_HEIGHT(ctx->src_fmt.height)) + | VEPU_REG_FRAME_TYPE_INTRA + | VEPU_REG_ENCODE_FORMAT_JPEG + | VEPU_REG_ENCODE_ENABLE; + + /* Kick the watchdog and start encoding */ + hantro_end_prepare_run(ctx); + vepu_write(vpu, reg, VEPU_REG_ENCODE_START); + + return 0; +} + +void rockchip_vpu2_jpeg_enc_done(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + u32 bytesused = vepu_read(vpu, VEPU_REG_STR_BUF_LIMIT) / 8; + struct vb2_v4l2_buffer *dst_buf = hantro_get_dst_buf(ctx); + + vb2_set_plane_payload(&dst_buf->vb2_buf, 0, + ctx->vpu_dst_fmt->header_size + bytesused); +} diff --git a/drivers/media/platform/verisilicon/rockchip_vpu2_hw_mpeg2_dec.c b/drivers/media/platform/verisilicon/rockchip_vpu2_hw_mpeg2_dec.c new file mode 100644 index 000000000..b66737fab --- /dev/null +++ b/drivers/media/platform/verisilicon/rockchip_vpu2_hw_mpeg2_dec.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hantro VPU codec driver + * + * Copyright (C) 2018 Rockchip Electronics Co., Ltd. + */ + +#include <asm/unaligned.h> +#include <linux/bitfield.h> +#include <media/v4l2-mem2mem.h> +#include "hantro.h" +#include "hantro_hw.h" + +#define VDPU_SWREG(nr) ((nr) * 4) + +#define VDPU_REG_DEC_OUT_BASE VDPU_SWREG(63) +#define VDPU_REG_RLC_VLC_BASE VDPU_SWREG(64) +#define VDPU_REG_QTABLE_BASE VDPU_SWREG(61) +#define VDPU_REG_REFER0_BASE VDPU_SWREG(131) +#define VDPU_REG_REFER2_BASE VDPU_SWREG(134) +#define VDPU_REG_REFER3_BASE VDPU_SWREG(135) +#define VDPU_REG_REFER1_BASE VDPU_SWREG(148) +#define VDPU_REG_DEC_E(v) ((v) ? BIT(0) : 0) + +#define VDPU_REG_DEC_ADV_PRE_DIS(v) ((v) ? BIT(11) : 0) +#define VDPU_REG_DEC_SCMD_DIS(v) ((v) ? BIT(10) : 0) +#define VDPU_REG_FILTERING_DIS(v) ((v) ? BIT(8) : 0) +#define VDPU_REG_DEC_LATENCY(v) (((v) << 1) & GENMASK(6, 1)) + +#define VDPU_REG_INIT_QP(v) (((v) << 25) & GENMASK(30, 25)) +#define VDPU_REG_STREAM_LEN(v) (((v) << 0) & GENMASK(23, 0)) + +#define VDPU_REG_APF_THRESHOLD(v) (((v) << 17) & GENMASK(30, 17)) +#define VDPU_REG_STARTMB_X(v) (((v) << 8) & GENMASK(16, 8)) +#define VDPU_REG_STARTMB_Y(v) (((v) << 0) & GENMASK(7, 0)) + +#define VDPU_REG_DEC_MODE(v) (((v) << 0) & GENMASK(3, 0)) + +#define VDPU_REG_DEC_STRENDIAN_E(v) ((v) ? BIT(5) : 0) +#define VDPU_REG_DEC_STRSWAP32_E(v) ((v) ? BIT(4) : 0) +#define VDPU_REG_DEC_OUTSWAP32_E(v) ((v) ? BIT(3) : 0) +#define VDPU_REG_DEC_INSWAP32_E(v) ((v) ? BIT(2) : 0) +#define VDPU_REG_DEC_OUT_ENDIAN(v) ((v) ? BIT(1) : 0) +#define VDPU_REG_DEC_IN_ENDIAN(v) ((v) ? BIT(0) : 0) + +#define VDPU_REG_DEC_DATA_DISC_E(v) ((v) ? BIT(22) : 0) +#define VDPU_REG_DEC_MAX_BURST(v) (((v) << 16) & GENMASK(20, 16)) +#define VDPU_REG_DEC_AXI_WR_ID(v) (((v) << 8) & GENMASK(15, 8)) +#define VDPU_REG_DEC_AXI_RD_ID(v) (((v) << 0) & GENMASK(7, 0)) + +#define VDPU_REG_RLC_MODE_E(v) ((v) ? BIT(20) : 0) +#define VDPU_REG_PIC_INTERLACE_E(v) ((v) ? BIT(17) : 0) +#define VDPU_REG_PIC_FIELDMODE_E(v) ((v) ? BIT(16) : 0) +#define VDPU_REG_PIC_B_E(v) ((v) ? BIT(15) : 0) +#define VDPU_REG_PIC_INTER_E(v) ((v) ? BIT(14) : 0) +#define VDPU_REG_PIC_TOPFIELD_E(v) ((v) ? BIT(13) : 0) +#define VDPU_REG_FWD_INTERLACE_E(v) ((v) ? BIT(12) : 0) +#define VDPU_REG_WRITE_MVS_E(v) ((v) ? BIT(10) : 0) +#define VDPU_REG_DEC_TIMEOUT_E(v) ((v) ? BIT(5) : 0) +#define VDPU_REG_DEC_CLK_GATE_E(v) ((v) ? BIT(4) : 0) + +#define VDPU_REG_PIC_MB_WIDTH(v) (((v) << 23) & GENMASK(31, 23)) +#define VDPU_REG_PIC_MB_HEIGHT_P(v) (((v) << 11) & GENMASK(18, 11)) +#define VDPU_REG_ALT_SCAN_E(v) ((v) ? BIT(6) : 0) +#define VDPU_REG_TOPFIELDFIRST_E(v) ((v) ? BIT(5) : 0) + +#define VDPU_REG_STRM_START_BIT(v) (((v) << 26) & GENMASK(31, 26)) +#define VDPU_REG_QSCALE_TYPE(v) ((v) ? BIT(24) : 0) +#define VDPU_REG_CON_MV_E(v) ((v) ? BIT(4) : 0) +#define VDPU_REG_INTRA_DC_PREC(v) (((v) << 2) & GENMASK(3, 2)) +#define VDPU_REG_INTRA_VLC_TAB(v) ((v) ? BIT(1) : 0) +#define VDPU_REG_FRAME_PRED_DCT(v) ((v) ? BIT(0) : 0) + +#define VDPU_REG_ALT_SCAN_FLAG_E(v) ((v) ? BIT(19) : 0) +#define VDPU_REG_FCODE_FWD_HOR(v) (((v) << 15) & GENMASK(18, 15)) +#define VDPU_REG_FCODE_FWD_VER(v) (((v) << 11) & GENMASK(14, 11)) +#define VDPU_REG_FCODE_BWD_HOR(v) (((v) << 7) & GENMASK(10, 7)) +#define VDPU_REG_FCODE_BWD_VER(v) (((v) << 3) & GENMASK(6, 3)) +#define VDPU_REG_MV_ACCURACY_FWD(v) ((v) ? BIT(2) : 0) +#define VDPU_REG_MV_ACCURACY_BWD(v) ((v) ? BIT(1) : 0) + +static void +rockchip_vpu2_mpeg2_dec_set_quantisation(struct hantro_dev *vpu, + struct hantro_ctx *ctx) +{ + struct v4l2_ctrl_mpeg2_quantisation *q; + + q = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_MPEG2_QUANTISATION); + hantro_mpeg2_dec_copy_qtable(ctx->mpeg2_dec.qtable.cpu, q); + vdpu_write_relaxed(vpu, ctx->mpeg2_dec.qtable.dma, VDPU_REG_QTABLE_BASE); +} + +static void +rockchip_vpu2_mpeg2_dec_set_buffers(struct hantro_dev *vpu, + struct hantro_ctx *ctx, + struct vb2_buffer *src_buf, + struct vb2_buffer *dst_buf, + const struct v4l2_ctrl_mpeg2_sequence *seq, + const struct v4l2_ctrl_mpeg2_picture *pic) +{ + dma_addr_t forward_addr = 0, backward_addr = 0; + dma_addr_t current_addr, addr; + + switch (pic->picture_coding_type) { + case V4L2_MPEG2_PIC_CODING_TYPE_B: + backward_addr = hantro_get_ref(ctx, pic->backward_ref_ts); + fallthrough; + case V4L2_MPEG2_PIC_CODING_TYPE_P: + forward_addr = hantro_get_ref(ctx, pic->forward_ref_ts); + } + + /* Source bitstream buffer */ + addr = vb2_dma_contig_plane_dma_addr(src_buf, 0); + vdpu_write_relaxed(vpu, addr, VDPU_REG_RLC_VLC_BASE); + + /* Destination frame buffer */ + addr = vb2_dma_contig_plane_dma_addr(dst_buf, 0); + current_addr = addr; + + if (pic->picture_structure == V4L2_MPEG2_PIC_BOTTOM_FIELD) + addr += ALIGN(ctx->dst_fmt.width, 16); + vdpu_write_relaxed(vpu, addr, VDPU_REG_DEC_OUT_BASE); + + if (!forward_addr) + forward_addr = current_addr; + if (!backward_addr) + backward_addr = current_addr; + + /* Set forward ref frame (top/bottom field) */ + if (pic->picture_structure == V4L2_MPEG2_PIC_FRAME || + pic->picture_coding_type == V4L2_MPEG2_PIC_CODING_TYPE_B || + (pic->picture_structure == V4L2_MPEG2_PIC_TOP_FIELD && + pic->flags & V4L2_MPEG2_PIC_TOP_FIELD) || + (pic->picture_structure == V4L2_MPEG2_PIC_BOTTOM_FIELD && + !(pic->flags & V4L2_MPEG2_PIC_TOP_FIELD))) { + vdpu_write_relaxed(vpu, forward_addr, VDPU_REG_REFER0_BASE); + vdpu_write_relaxed(vpu, forward_addr, VDPU_REG_REFER1_BASE); + } else if (pic->picture_structure == V4L2_MPEG2_PIC_TOP_FIELD) { + vdpu_write_relaxed(vpu, forward_addr, VDPU_REG_REFER0_BASE); + vdpu_write_relaxed(vpu, current_addr, VDPU_REG_REFER1_BASE); + } else if (pic->picture_structure == V4L2_MPEG2_PIC_BOTTOM_FIELD) { + vdpu_write_relaxed(vpu, current_addr, VDPU_REG_REFER0_BASE); + vdpu_write_relaxed(vpu, forward_addr, VDPU_REG_REFER1_BASE); + } + + /* Set backward ref frame (top/bottom field) */ + vdpu_write_relaxed(vpu, backward_addr, VDPU_REG_REFER2_BASE); + vdpu_write_relaxed(vpu, backward_addr, VDPU_REG_REFER3_BASE); +} + +int rockchip_vpu2_mpeg2_dec_run(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + struct vb2_v4l2_buffer *src_buf, *dst_buf; + const struct v4l2_ctrl_mpeg2_sequence *seq; + const struct v4l2_ctrl_mpeg2_picture *pic; + u32 reg; + + src_buf = hantro_get_src_buf(ctx); + dst_buf = hantro_get_dst_buf(ctx); + + hantro_start_prepare_run(ctx); + + seq = hantro_get_ctrl(ctx, + V4L2_CID_STATELESS_MPEG2_SEQUENCE); + pic = hantro_get_ctrl(ctx, + V4L2_CID_STATELESS_MPEG2_PICTURE); + + reg = VDPU_REG_DEC_ADV_PRE_DIS(0) | + VDPU_REG_DEC_SCMD_DIS(0) | + VDPU_REG_FILTERING_DIS(1) | + VDPU_REG_DEC_LATENCY(0); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(50)); + + reg = VDPU_REG_INIT_QP(1) | + VDPU_REG_STREAM_LEN(vb2_get_plane_payload(&src_buf->vb2_buf, 0)); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(51)); + + reg = VDPU_REG_APF_THRESHOLD(8) | + VDPU_REG_STARTMB_X(0) | + VDPU_REG_STARTMB_Y(0); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(52)); + + reg = VDPU_REG_DEC_MODE(5); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(53)); + + reg = VDPU_REG_DEC_STRENDIAN_E(1) | + VDPU_REG_DEC_STRSWAP32_E(1) | + VDPU_REG_DEC_OUTSWAP32_E(1) | + VDPU_REG_DEC_INSWAP32_E(1) | + VDPU_REG_DEC_OUT_ENDIAN(1) | + VDPU_REG_DEC_IN_ENDIAN(1); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(54)); + + reg = VDPU_REG_DEC_DATA_DISC_E(0) | + VDPU_REG_DEC_MAX_BURST(16) | + VDPU_REG_DEC_AXI_WR_ID(0) | + VDPU_REG_DEC_AXI_RD_ID(0); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(56)); + + reg = VDPU_REG_RLC_MODE_E(0) | + VDPU_REG_PIC_INTERLACE_E(!(seq->flags & V4L2_MPEG2_SEQ_FLAG_PROGRESSIVE)) | + VDPU_REG_PIC_FIELDMODE_E(pic->picture_structure != V4L2_MPEG2_PIC_FRAME) | + VDPU_REG_PIC_B_E(pic->picture_coding_type == V4L2_MPEG2_PIC_CODING_TYPE_B) | + VDPU_REG_PIC_INTER_E(pic->picture_coding_type != V4L2_MPEG2_PIC_CODING_TYPE_I) | + VDPU_REG_PIC_TOPFIELD_E(pic->picture_structure == V4L2_MPEG2_PIC_TOP_FIELD) | + VDPU_REG_FWD_INTERLACE_E(0) | + VDPU_REG_WRITE_MVS_E(0) | + VDPU_REG_DEC_TIMEOUT_E(1) | + VDPU_REG_DEC_CLK_GATE_E(1); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(57)); + + reg = VDPU_REG_PIC_MB_WIDTH(MB_WIDTH(ctx->dst_fmt.width)) | + VDPU_REG_PIC_MB_HEIGHT_P(MB_HEIGHT(ctx->dst_fmt.height)) | + VDPU_REG_ALT_SCAN_E(pic->flags & V4L2_MPEG2_PIC_FLAG_ALT_SCAN) | + VDPU_REG_TOPFIELDFIRST_E(pic->flags & V4L2_MPEG2_PIC_FLAG_TOP_FIELD_FIRST); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(120)); + + reg = VDPU_REG_STRM_START_BIT(0) | + VDPU_REG_QSCALE_TYPE(pic->flags & V4L2_MPEG2_PIC_FLAG_Q_SCALE_TYPE) | + VDPU_REG_CON_MV_E(pic->flags & V4L2_MPEG2_PIC_FLAG_CONCEALMENT_MV) | + VDPU_REG_INTRA_DC_PREC(pic->intra_dc_precision) | + VDPU_REG_INTRA_VLC_TAB(pic->flags & V4L2_MPEG2_PIC_FLAG_INTRA_VLC) | + VDPU_REG_FRAME_PRED_DCT(pic->flags & V4L2_MPEG2_PIC_FLAG_FRAME_PRED_DCT); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(122)); + + reg = VDPU_REG_ALT_SCAN_FLAG_E(pic->flags & V4L2_MPEG2_PIC_FLAG_ALT_SCAN) | + VDPU_REG_FCODE_FWD_HOR(pic->f_code[0][0]) | + VDPU_REG_FCODE_FWD_VER(pic->f_code[0][1]) | + VDPU_REG_FCODE_BWD_HOR(pic->f_code[1][0]) | + VDPU_REG_FCODE_BWD_VER(pic->f_code[1][1]) | + VDPU_REG_MV_ACCURACY_FWD(1) | + VDPU_REG_MV_ACCURACY_BWD(1); + vdpu_write_relaxed(vpu, reg, VDPU_SWREG(136)); + + rockchip_vpu2_mpeg2_dec_set_quantisation(vpu, ctx); + + rockchip_vpu2_mpeg2_dec_set_buffers(vpu, ctx, &src_buf->vb2_buf, + &dst_buf->vb2_buf, seq, pic); + + /* Kick the watchdog and start decoding */ + hantro_end_prepare_run(ctx); + + reg = vdpu_read(vpu, VDPU_SWREG(57)) | VDPU_REG_DEC_E(1); + vdpu_write(vpu, reg, VDPU_SWREG(57)); + + return 0; +} diff --git a/drivers/media/platform/verisilicon/rockchip_vpu2_hw_vp8_dec.c b/drivers/media/platform/verisilicon/rockchip_vpu2_hw_vp8_dec.c new file mode 100644 index 000000000..d07907544 --- /dev/null +++ b/drivers/media/platform/verisilicon/rockchip_vpu2_hw_vp8_dec.c @@ -0,0 +1,600 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Rockchip VPU codec vp8 decode driver + * + * Copyright (C) 2014 Rockchip Electronics Co., Ltd. + * ZhiChao Yu <zhichao.yu@rock-chips.com> + * + * Copyright (C) 2014 Google LLC. + * Tomasz Figa <tfiga@chromium.org> + * + * Copyright (C) 2015 Rockchip Electronics Co., Ltd. + * Alpha Lin <alpha.lin@rock-chips.com> + */ + +#include <media/v4l2-mem2mem.h> + +#include "hantro_hw.h" +#include "hantro.h" +#include "hantro_g1_regs.h" + +#define VDPU_REG_DEC_CTRL0 0x0c8 +#define VDPU_REG_STREAM_LEN 0x0cc +#define VDPU_REG_DEC_FORMAT 0x0d4 +#define VDPU_REG_DEC_CTRL0_DEC_MODE(x) (((x) & 0xf) << 0) +#define VDPU_REG_DATA_ENDIAN 0x0d8 +#define VDPU_REG_CONFIG_DEC_STRENDIAN_E BIT(5) +#define VDPU_REG_CONFIG_DEC_STRSWAP32_E BIT(4) +#define VDPU_REG_CONFIG_DEC_OUTSWAP32_E BIT(3) +#define VDPU_REG_CONFIG_DEC_INSWAP32_E BIT(2) +#define VDPU_REG_CONFIG_DEC_OUT_ENDIAN BIT(1) +#define VDPU_REG_CONFIG_DEC_IN_ENDIAN BIT(0) +#define VDPU_REG_AXI_CTRL 0x0e0 +#define VDPU_REG_CONFIG_DEC_MAX_BURST(x) (((x) & 0x1f) << 16) +#define VDPU_REG_EN_FLAGS 0x0e4 +#define VDPU_REG_DEC_CTRL0_PIC_INTER_E BIT(14) +#define VDPU_REG_CONFIG_DEC_TIMEOUT_E BIT(5) +#define VDPU_REG_CONFIG_DEC_CLK_GATE_E BIT(4) +#define VDPU_REG_PRED_FLT 0x0ec +#define VDPU_REG_ADDR_QTABLE 0x0f4 +#define VDPU_REG_ADDR_DST 0x0fc +#define VDPU_REG_ADDR_STR 0x100 +#define VDPU_REG_VP8_PIC_MB_SIZE 0x1e0 +#define VDPU_REG_VP8_DCT_START_BIT 0x1e4 +#define VDPU_REG_DEC_CTRL4_VC1_HEIGHT_EXT BIT(13) +#define VDPU_REG_DEC_CTRL4_BILIN_MC_E BIT(12) +#define VDPU_REG_VP8_CTRL0 0x1e8 +#define VDPU_REG_VP8_DATA_VAL 0x1f0 +#define VDPU_REG_PRED_FLT7 0x1f4 +#define VDPU_REG_PRED_FLT8 0x1f8 +#define VDPU_REG_PRED_FLT9 0x1fc +#define VDPU_REG_PRED_FLT10 0x200 +#define VDPU_REG_FILTER_LEVEL 0x204 +#define VDPU_REG_VP8_QUANTER0 0x208 +#define VDPU_REG_VP8_ADDR_REF0 0x20c +#define VDPU_REG_FILTER_MB_ADJ 0x210 +#define VDPU_REG_REF_PIC_FILT_TYPE_E BIT(31) +#define VDPU_REG_REF_PIC_FILT_SHARPNESS(x) (((x) & 0x7) << 28) +#define VDPU_REG_FILTER_REF_ADJ 0x214 +#define VDPU_REG_VP8_ADDR_REF2_5(i) (0x218 + ((i) * 0x4)) +#define VDPU_REG_VP8_GREF_SIGN_BIAS BIT(0) +#define VDPU_REG_VP8_AREF_SIGN_BIAS BIT(0) +#define VDPU_REG_VP8_DCT_BASE(i) \ + (0x230 + ((((i) < 5) ? (i) : ((i) + 1)) * 0x4)) +#define VDPU_REG_VP8_ADDR_CTRL_PART 0x244 +#define VDPU_REG_VP8_SEGMENT_VAL 0x254 +#define VDPU_REG_FWD_PIC1_SEGMENT_BASE(x) ((x) << 0) +#define VDPU_REG_FWD_PIC1_SEGMENT_UPD_E BIT(1) +#define VDPU_REG_FWD_PIC1_SEGMENT_E BIT(0) +#define VDPU_REG_VP8_DCT_START_BIT2 0x258 +#define VDPU_REG_VP8_QUANTER1 0x25c +#define VDPU_REG_VP8_QUANTER2 0x260 +#define VDPU_REG_PRED_FLT1 0x264 +#define VDPU_REG_PRED_FLT2 0x268 +#define VDPU_REG_PRED_FLT3 0x26c +#define VDPU_REG_PRED_FLT4 0x270 +#define VDPU_REG_PRED_FLT5 0x274 +#define VDPU_REG_PRED_FLT6 0x278 + +static const struct hantro_reg vp8_dec_dct_base[8] = { + { VDPU_REG_ADDR_STR, 0, 0xffffffff }, + { VDPU_REG_VP8_DCT_BASE(0), 0, 0xffffffff }, + { VDPU_REG_VP8_DCT_BASE(1), 0, 0xffffffff }, + { VDPU_REG_VP8_DCT_BASE(2), 0, 0xffffffff }, + { VDPU_REG_VP8_DCT_BASE(3), 0, 0xffffffff }, + { VDPU_REG_VP8_DCT_BASE(4), 0, 0xffffffff }, + { VDPU_REG_VP8_DCT_BASE(5), 0, 0xffffffff }, + { VDPU_REG_VP8_DCT_BASE(6), 0, 0xffffffff }, +}; + +static const struct hantro_reg vp8_dec_lf_level[4] = { + { VDPU_REG_FILTER_LEVEL, 18, 0x3f }, + { VDPU_REG_FILTER_LEVEL, 12, 0x3f }, + { VDPU_REG_FILTER_LEVEL, 6, 0x3f }, + { VDPU_REG_FILTER_LEVEL, 0, 0x3f }, +}; + +static const struct hantro_reg vp8_dec_mb_adj[4] = { + { VDPU_REG_FILTER_MB_ADJ, 21, 0x7f }, + { VDPU_REG_FILTER_MB_ADJ, 14, 0x7f }, + { VDPU_REG_FILTER_MB_ADJ, 7, 0x7f }, + { VDPU_REG_FILTER_MB_ADJ, 0, 0x7f }, +}; + +static const struct hantro_reg vp8_dec_ref_adj[4] = { + { VDPU_REG_FILTER_REF_ADJ, 21, 0x7f }, + { VDPU_REG_FILTER_REF_ADJ, 14, 0x7f }, + { VDPU_REG_FILTER_REF_ADJ, 7, 0x7f }, + { VDPU_REG_FILTER_REF_ADJ, 0, 0x7f }, +}; + +static const struct hantro_reg vp8_dec_quant[4] = { + { VDPU_REG_VP8_QUANTER0, 11, 0x7ff }, + { VDPU_REG_VP8_QUANTER0, 0, 0x7ff }, + { VDPU_REG_VP8_QUANTER1, 11, 0x7ff }, + { VDPU_REG_VP8_QUANTER1, 0, 0x7ff }, +}; + +static const struct hantro_reg vp8_dec_quant_delta[5] = { + { VDPU_REG_VP8_QUANTER0, 27, 0x1f }, + { VDPU_REG_VP8_QUANTER0, 22, 0x1f }, + { VDPU_REG_VP8_QUANTER1, 27, 0x1f }, + { VDPU_REG_VP8_QUANTER1, 22, 0x1f }, + { VDPU_REG_VP8_QUANTER2, 27, 0x1f }, +}; + +static const struct hantro_reg vp8_dec_dct_start_bits[8] = { + { VDPU_REG_VP8_CTRL0, 26, 0x3f }, + { VDPU_REG_VP8_DCT_START_BIT, 26, 0x3f }, + { VDPU_REG_VP8_DCT_START_BIT, 20, 0x3f }, + { VDPU_REG_VP8_DCT_START_BIT2, 24, 0x3f }, + { VDPU_REG_VP8_DCT_START_BIT2, 18, 0x3f }, + { VDPU_REG_VP8_DCT_START_BIT2, 12, 0x3f }, + { VDPU_REG_VP8_DCT_START_BIT2, 6, 0x3f }, + { VDPU_REG_VP8_DCT_START_BIT2, 0, 0x3f }, +}; + +static const struct hantro_reg vp8_dec_pred_bc_tap[8][6] = { + { + { 0, 0, 0}, + { VDPU_REG_PRED_FLT, 22, 0x3ff }, + { VDPU_REG_PRED_FLT, 12, 0x3ff }, + { VDPU_REG_PRED_FLT, 2, 0x3ff }, + { VDPU_REG_PRED_FLT1, 22, 0x3ff }, + { 0, 0, 0}, + }, { + { 0, 0, 0}, + { VDPU_REG_PRED_FLT1, 12, 0x3ff }, + { VDPU_REG_PRED_FLT1, 2, 0x3ff }, + { VDPU_REG_PRED_FLT2, 22, 0x3ff }, + { VDPU_REG_PRED_FLT2, 12, 0x3ff }, + { 0, 0, 0}, + }, { + { VDPU_REG_PRED_FLT10, 10, 0x3 }, + { VDPU_REG_PRED_FLT2, 2, 0x3ff }, + { VDPU_REG_PRED_FLT3, 22, 0x3ff }, + { VDPU_REG_PRED_FLT3, 12, 0x3ff }, + { VDPU_REG_PRED_FLT3, 2, 0x3ff }, + { VDPU_REG_PRED_FLT10, 8, 0x3}, + }, { + { 0, 0, 0}, + { VDPU_REG_PRED_FLT4, 22, 0x3ff }, + { VDPU_REG_PRED_FLT4, 12, 0x3ff }, + { VDPU_REG_PRED_FLT4, 2, 0x3ff }, + { VDPU_REG_PRED_FLT5, 22, 0x3ff }, + { 0, 0, 0}, + }, { + { VDPU_REG_PRED_FLT10, 6, 0x3 }, + { VDPU_REG_PRED_FLT5, 12, 0x3ff }, + { VDPU_REG_PRED_FLT5, 2, 0x3ff }, + { VDPU_REG_PRED_FLT6, 22, 0x3ff }, + { VDPU_REG_PRED_FLT6, 12, 0x3ff }, + { VDPU_REG_PRED_FLT10, 4, 0x3 }, + }, { + { 0, 0, 0}, + { VDPU_REG_PRED_FLT6, 2, 0x3ff }, + { VDPU_REG_PRED_FLT7, 22, 0x3ff }, + { VDPU_REG_PRED_FLT7, 12, 0x3ff }, + { VDPU_REG_PRED_FLT7, 2, 0x3ff }, + { 0, 0, 0}, + }, { + { VDPU_REG_PRED_FLT10, 2, 0x3 }, + { VDPU_REG_PRED_FLT8, 22, 0x3ff }, + { VDPU_REG_PRED_FLT8, 12, 0x3ff }, + { VDPU_REG_PRED_FLT8, 2, 0x3ff }, + { VDPU_REG_PRED_FLT9, 22, 0x3ff }, + { VDPU_REG_PRED_FLT10, 0, 0x3 }, + }, { + { 0, 0, 0}, + { VDPU_REG_PRED_FLT9, 12, 0x3ff }, + { VDPU_REG_PRED_FLT9, 2, 0x3ff }, + { VDPU_REG_PRED_FLT10, 22, 0x3ff }, + { VDPU_REG_PRED_FLT10, 12, 0x3ff }, + { 0, 0, 0}, + }, +}; + +static const struct hantro_reg vp8_dec_mb_start_bit = { + .base = VDPU_REG_VP8_CTRL0, + .shift = 18, + .mask = 0x3f +}; + +static const struct hantro_reg vp8_dec_mb_aligned_data_len = { + .base = VDPU_REG_VP8_DATA_VAL, + .shift = 0, + .mask = 0x3fffff +}; + +static const struct hantro_reg vp8_dec_num_dct_partitions = { + .base = VDPU_REG_VP8_DATA_VAL, + .shift = 24, + .mask = 0xf +}; + +static const struct hantro_reg vp8_dec_stream_len = { + .base = VDPU_REG_STREAM_LEN, + .shift = 0, + .mask = 0xffffff +}; + +static const struct hantro_reg vp8_dec_mb_width = { + .base = VDPU_REG_VP8_PIC_MB_SIZE, + .shift = 23, + .mask = 0x1ff +}; + +static const struct hantro_reg vp8_dec_mb_height = { + .base = VDPU_REG_VP8_PIC_MB_SIZE, + .shift = 11, + .mask = 0xff +}; + +static const struct hantro_reg vp8_dec_mb_width_ext = { + .base = VDPU_REG_VP8_PIC_MB_SIZE, + .shift = 3, + .mask = 0x7 +}; + +static const struct hantro_reg vp8_dec_mb_height_ext = { + .base = VDPU_REG_VP8_PIC_MB_SIZE, + .shift = 0, + .mask = 0x7 +}; + +static const struct hantro_reg vp8_dec_bool_range = { + .base = VDPU_REG_VP8_CTRL0, + .shift = 0, + .mask = 0xff +}; + +static const struct hantro_reg vp8_dec_bool_value = { + .base = VDPU_REG_VP8_CTRL0, + .shift = 8, + .mask = 0xff +}; + +static const struct hantro_reg vp8_dec_filter_disable = { + .base = VDPU_REG_DEC_CTRL0, + .shift = 8, + .mask = 1 +}; + +static const struct hantro_reg vp8_dec_skip_mode = { + .base = VDPU_REG_DEC_CTRL0, + .shift = 9, + .mask = 1 +}; + +static const struct hantro_reg vp8_dec_start_dec = { + .base = VDPU_REG_EN_FLAGS, + .shift = 0, + .mask = 1 +}; + +static void cfg_lf(struct hantro_ctx *ctx, + const struct v4l2_ctrl_vp8_frame *hdr) +{ + const struct v4l2_vp8_segment *seg = &hdr->segment; + const struct v4l2_vp8_loop_filter *lf = &hdr->lf; + struct hantro_dev *vpu = ctx->dev; + unsigned int i; + u32 reg; + + if (!(seg->flags & V4L2_VP8_SEGMENT_FLAG_ENABLED)) { + hantro_reg_write(vpu, &vp8_dec_lf_level[0], lf->level); + } else if (seg->flags & V4L2_VP8_SEGMENT_FLAG_DELTA_VALUE_MODE) { + for (i = 0; i < 4; i++) { + u32 lf_level = clamp(lf->level + seg->lf_update[i], + 0, 63); + + hantro_reg_write(vpu, &vp8_dec_lf_level[i], lf_level); + } + } else { + for (i = 0; i < 4; i++) + hantro_reg_write(vpu, &vp8_dec_lf_level[i], + seg->lf_update[i]); + } + + reg = VDPU_REG_REF_PIC_FILT_SHARPNESS(lf->sharpness_level); + if (lf->flags & V4L2_VP8_LF_FILTER_TYPE_SIMPLE) + reg |= VDPU_REG_REF_PIC_FILT_TYPE_E; + vdpu_write_relaxed(vpu, reg, VDPU_REG_FILTER_MB_ADJ); + + if (lf->flags & V4L2_VP8_LF_ADJ_ENABLE) { + for (i = 0; i < 4; i++) { + hantro_reg_write(vpu, &vp8_dec_mb_adj[i], + lf->mb_mode_delta[i]); + hantro_reg_write(vpu, &vp8_dec_ref_adj[i], + lf->ref_frm_delta[i]); + } + } +} + +static void cfg_qp(struct hantro_ctx *ctx, + const struct v4l2_ctrl_vp8_frame *hdr) +{ + const struct v4l2_vp8_quantization *q = &hdr->quant; + const struct v4l2_vp8_segment *seg = &hdr->segment; + struct hantro_dev *vpu = ctx->dev; + unsigned int i; + + if (!(seg->flags & V4L2_VP8_SEGMENT_FLAG_ENABLED)) { + hantro_reg_write(vpu, &vp8_dec_quant[0], q->y_ac_qi); + } else if (seg->flags & V4L2_VP8_SEGMENT_FLAG_DELTA_VALUE_MODE) { + for (i = 0; i < 4; i++) { + u32 quant = clamp(q->y_ac_qi + seg->quant_update[i], + 0, 127); + + hantro_reg_write(vpu, &vp8_dec_quant[i], quant); + } + } else { + for (i = 0; i < 4; i++) + hantro_reg_write(vpu, &vp8_dec_quant[i], + seg->quant_update[i]); + } + + hantro_reg_write(vpu, &vp8_dec_quant_delta[0], q->y_dc_delta); + hantro_reg_write(vpu, &vp8_dec_quant_delta[1], q->y2_dc_delta); + hantro_reg_write(vpu, &vp8_dec_quant_delta[2], q->y2_ac_delta); + hantro_reg_write(vpu, &vp8_dec_quant_delta[3], q->uv_dc_delta); + hantro_reg_write(vpu, &vp8_dec_quant_delta[4], q->uv_ac_delta); +} + +static void cfg_parts(struct hantro_ctx *ctx, + const struct v4l2_ctrl_vp8_frame *hdr) +{ + struct hantro_dev *vpu = ctx->dev; + struct vb2_v4l2_buffer *vb2_src; + u32 first_part_offset = V4L2_VP8_FRAME_IS_KEY_FRAME(hdr) ? 10 : 3; + u32 mb_size, mb_offset_bytes, mb_offset_bits, mb_start_bits; + u32 dct_size_part_size, dct_part_offset; + dma_addr_t src_dma; + u32 dct_part_total_len = 0; + u32 count = 0; + unsigned int i; + + vb2_src = hantro_get_src_buf(ctx); + src_dma = vb2_dma_contig_plane_dma_addr(&vb2_src->vb2_buf, 0); + + /* + * Calculate control partition mb data info + * @first_part_header_bits: bits offset of mb data from first + * part start pos + * @mb_offset_bits: bits offset of mb data from src_dma + * base addr + * @mb_offset_byte: bytes offset of mb data from src_dma + * base addr + * @mb_start_bits: bits offset of mb data from mb data + * 64bits alignment addr + */ + mb_offset_bits = first_part_offset * 8 + + hdr->first_part_header_bits + 8; + mb_offset_bytes = mb_offset_bits / 8; + mb_start_bits = mb_offset_bits - + (mb_offset_bytes & (~DEC_8190_ALIGN_MASK)) * 8; + mb_size = hdr->first_part_size - + (mb_offset_bytes - first_part_offset) + + (mb_offset_bytes & DEC_8190_ALIGN_MASK); + + /* Macroblock data aligned base addr */ + vdpu_write_relaxed(vpu, (mb_offset_bytes & (~DEC_8190_ALIGN_MASK)) + + src_dma, VDPU_REG_VP8_ADDR_CTRL_PART); + hantro_reg_write(vpu, &vp8_dec_mb_start_bit, mb_start_bits); + hantro_reg_write(vpu, &vp8_dec_mb_aligned_data_len, mb_size); + + /* + * Calculate DCT partition info + * @dct_size_part_size: Containing sizes of DCT part, every DCT part + * has 3 bytes to store its size, except the last + * DCT part + * @dct_part_offset: bytes offset of DCT parts from src_dma base addr + * @dct_part_total_len: total size of all DCT parts + */ + dct_size_part_size = (hdr->num_dct_parts - 1) * 3; + dct_part_offset = first_part_offset + hdr->first_part_size; + for (i = 0; i < hdr->num_dct_parts; i++) + dct_part_total_len += hdr->dct_part_sizes[i]; + dct_part_total_len += dct_size_part_size; + dct_part_total_len += (dct_part_offset & DEC_8190_ALIGN_MASK); + + /* Number of DCT partitions */ + hantro_reg_write(vpu, &vp8_dec_num_dct_partitions, + hdr->num_dct_parts - 1); + + /* DCT partition length */ + hantro_reg_write(vpu, &vp8_dec_stream_len, dct_part_total_len); + + /* DCT partitions base address */ + for (i = 0; i < hdr->num_dct_parts; i++) { + u32 byte_offset = dct_part_offset + dct_size_part_size + count; + u32 base_addr = byte_offset + src_dma; + + hantro_reg_write(vpu, &vp8_dec_dct_base[i], + base_addr & (~DEC_8190_ALIGN_MASK)); + + hantro_reg_write(vpu, &vp8_dec_dct_start_bits[i], + (byte_offset & DEC_8190_ALIGN_MASK) * 8); + + count += hdr->dct_part_sizes[i]; + } +} + +/* + * prediction filter taps + * normal 6-tap filters + */ +static void cfg_tap(struct hantro_ctx *ctx, + const struct v4l2_ctrl_vp8_frame *hdr) +{ + struct hantro_dev *vpu = ctx->dev; + int i, j; + + if ((hdr->version & 0x03) != 0) + return; /* Tap filter not used. */ + + for (i = 0; i < 8; i++) { + for (j = 0; j < 6; j++) { + if (vp8_dec_pred_bc_tap[i][j].base != 0) + hantro_reg_write(vpu, + &vp8_dec_pred_bc_tap[i][j], + hantro_vp8_dec_mc_filter[i][j]); + } + } +} + +static void cfg_ref(struct hantro_ctx *ctx, + const struct v4l2_ctrl_vp8_frame *hdr, + struct vb2_v4l2_buffer *vb2_dst) +{ + struct hantro_dev *vpu = ctx->dev; + dma_addr_t ref; + + ref = hantro_get_ref(ctx, hdr->last_frame_ts); + if (!ref) { + vpu_debug(0, "failed to find last frame ts=%llu\n", + hdr->last_frame_ts); + ref = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0); + } + vdpu_write_relaxed(vpu, ref, VDPU_REG_VP8_ADDR_REF0); + + ref = hantro_get_ref(ctx, hdr->golden_frame_ts); + if (!ref && hdr->golden_frame_ts) + vpu_debug(0, "failed to find golden frame ts=%llu\n", + hdr->golden_frame_ts); + if (!ref) + ref = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0); + if (hdr->flags & V4L2_VP8_FRAME_FLAG_SIGN_BIAS_GOLDEN) + ref |= VDPU_REG_VP8_GREF_SIGN_BIAS; + vdpu_write_relaxed(vpu, ref, VDPU_REG_VP8_ADDR_REF2_5(2)); + + ref = hantro_get_ref(ctx, hdr->alt_frame_ts); + if (!ref && hdr->alt_frame_ts) + vpu_debug(0, "failed to find alt frame ts=%llu\n", + hdr->alt_frame_ts); + if (!ref) + ref = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0); + if (hdr->flags & V4L2_VP8_FRAME_FLAG_SIGN_BIAS_ALT) + ref |= VDPU_REG_VP8_AREF_SIGN_BIAS; + vdpu_write_relaxed(vpu, ref, VDPU_REG_VP8_ADDR_REF2_5(3)); +} + +static void cfg_buffers(struct hantro_ctx *ctx, + const struct v4l2_ctrl_vp8_frame *hdr, + struct vb2_v4l2_buffer *vb2_dst) +{ + const struct v4l2_vp8_segment *seg = &hdr->segment; + struct hantro_dev *vpu = ctx->dev; + dma_addr_t dst_dma; + u32 reg; + + /* Set probability table buffer address */ + vdpu_write_relaxed(vpu, ctx->vp8_dec.prob_tbl.dma, + VDPU_REG_ADDR_QTABLE); + + /* Set segment map address */ + reg = VDPU_REG_FWD_PIC1_SEGMENT_BASE(ctx->vp8_dec.segment_map.dma); + if (seg->flags & V4L2_VP8_SEGMENT_FLAG_ENABLED) { + reg |= VDPU_REG_FWD_PIC1_SEGMENT_E; + if (seg->flags & V4L2_VP8_SEGMENT_FLAG_UPDATE_MAP) + reg |= VDPU_REG_FWD_PIC1_SEGMENT_UPD_E; + } + vdpu_write_relaxed(vpu, reg, VDPU_REG_VP8_SEGMENT_VAL); + + /* set output frame buffer address */ + dst_dma = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0); + vdpu_write_relaxed(vpu, dst_dma, VDPU_REG_ADDR_DST); +} + +int rockchip_vpu2_vp8_dec_run(struct hantro_ctx *ctx) +{ + const struct v4l2_ctrl_vp8_frame *hdr; + struct hantro_dev *vpu = ctx->dev; + struct vb2_v4l2_buffer *vb2_dst; + size_t height = ctx->dst_fmt.height; + size_t width = ctx->dst_fmt.width; + u32 mb_width, mb_height; + u32 reg; + + hantro_start_prepare_run(ctx); + + hdr = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_VP8_FRAME); + if (WARN_ON(!hdr)) + return -EINVAL; + + /* Reset segment_map buffer in keyframe */ + if (V4L2_VP8_FRAME_IS_KEY_FRAME(hdr) && ctx->vp8_dec.segment_map.cpu) + memset(ctx->vp8_dec.segment_map.cpu, 0, + ctx->vp8_dec.segment_map.size); + + hantro_vp8_prob_update(ctx, hdr); + + /* + * Extensive testing shows that the hardware does not properly + * clear the internal state from previous a decoding run. This + * causes corruption in decoded frames for multi-instance use cases. + * A soft reset before programming the registers has been found + * to resolve those problems. + */ + ctx->codec_ops->reset(ctx); + + reg = VDPU_REG_CONFIG_DEC_TIMEOUT_E + | VDPU_REG_CONFIG_DEC_CLK_GATE_E; + if (!V4L2_VP8_FRAME_IS_KEY_FRAME(hdr)) + reg |= VDPU_REG_DEC_CTRL0_PIC_INTER_E; + vdpu_write_relaxed(vpu, reg, VDPU_REG_EN_FLAGS); + + reg = VDPU_REG_CONFIG_DEC_STRENDIAN_E + | VDPU_REG_CONFIG_DEC_INSWAP32_E + | VDPU_REG_CONFIG_DEC_STRSWAP32_E + | VDPU_REG_CONFIG_DEC_OUTSWAP32_E + | VDPU_REG_CONFIG_DEC_IN_ENDIAN + | VDPU_REG_CONFIG_DEC_OUT_ENDIAN; + vdpu_write_relaxed(vpu, reg, VDPU_REG_DATA_ENDIAN); + + reg = VDPU_REG_CONFIG_DEC_MAX_BURST(16); + vdpu_write_relaxed(vpu, reg, VDPU_REG_AXI_CTRL); + + reg = VDPU_REG_DEC_CTRL0_DEC_MODE(10); + vdpu_write_relaxed(vpu, reg, VDPU_REG_DEC_FORMAT); + + if (!(hdr->flags & V4L2_VP8_FRAME_FLAG_MB_NO_SKIP_COEFF)) + hantro_reg_write(vpu, &vp8_dec_skip_mode, 1); + if (hdr->lf.level == 0) + hantro_reg_write(vpu, &vp8_dec_filter_disable, 1); + + /* Frame dimensions */ + mb_width = MB_WIDTH(width); + mb_height = MB_HEIGHT(height); + + hantro_reg_write(vpu, &vp8_dec_mb_width, mb_width); + hantro_reg_write(vpu, &vp8_dec_mb_height, mb_height); + hantro_reg_write(vpu, &vp8_dec_mb_width_ext, mb_width >> 9); + hantro_reg_write(vpu, &vp8_dec_mb_height_ext, mb_height >> 8); + + /* Boolean decoder */ + hantro_reg_write(vpu, &vp8_dec_bool_range, hdr->coder_state.range); + hantro_reg_write(vpu, &vp8_dec_bool_value, hdr->coder_state.value); + + reg = vdpu_read(vpu, VDPU_REG_VP8_DCT_START_BIT); + if (hdr->version != 3) + reg |= VDPU_REG_DEC_CTRL4_VC1_HEIGHT_EXT; + if (hdr->version & 0x3) + reg |= VDPU_REG_DEC_CTRL4_BILIN_MC_E; + vdpu_write_relaxed(vpu, reg, VDPU_REG_VP8_DCT_START_BIT); + + cfg_lf(ctx, hdr); + cfg_qp(ctx, hdr); + cfg_parts(ctx, hdr); + cfg_tap(ctx, hdr); + + vb2_dst = hantro_get_dst_buf(ctx); + cfg_ref(ctx, hdr, vb2_dst); + cfg_buffers(ctx, hdr, vb2_dst); + + hantro_end_prepare_run(ctx); + + hantro_reg_write(vpu, &vp8_dec_start_dec, 1); + + return 0; +} diff --git a/drivers/media/platform/verisilicon/rockchip_vpu2_regs.h b/drivers/media/platform/verisilicon/rockchip_vpu2_regs.h new file mode 100644 index 000000000..49e408895 --- /dev/null +++ b/drivers/media/platform/verisilicon/rockchip_vpu2_regs.h @@ -0,0 +1,600 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Hantro VPU codec driver + * + * Copyright (C) 2018 Rockchip Electronics Co., Ltd. + * Alpha Lin <alpha.lin@rock-chips.com> + */ + +#ifndef ROCKCHIP_VPU2_REGS_H_ +#define ROCKCHIP_VPU2_REGS_H_ + +/* Encoder registers. */ +#define VEPU_REG_VP8_QUT_1ST(i) (0x000 + ((i) * 0x24)) +#define VEPU_REG_VP8_QUT_DC_Y2(x) (((x) & 0x3fff) << 16) +#define VEPU_REG_VP8_QUT_DC_Y1(x) (((x) & 0x3fff) << 0) +#define VEPU_REG_VP8_QUT_2ND(i) (0x004 + ((i) * 0x24)) +#define VEPU_REG_VP8_QUT_AC_Y1(x) (((x) & 0x3fff) << 16) +#define VEPU_REG_VP8_QUT_DC_CHR(x) (((x) & 0x3fff) << 0) +#define VEPU_REG_VP8_QUT_3RD(i) (0x008 + ((i) * 0x24)) +#define VEPU_REG_VP8_QUT_AC_CHR(x) (((x) & 0x3fff) << 16) +#define VEPU_REG_VP8_QUT_AC_Y2(x) (((x) & 0x3fff) << 0) +#define VEPU_REG_VP8_QUT_4TH(i) (0x00c + ((i) * 0x24)) +#define VEPU_REG_VP8_QUT_ZB_DC_CHR(x) (((x) & 0x1ff) << 18) +#define VEPU_REG_VP8_QUT_ZB_DC_Y2(x) (((x) & 0x1ff) << 9) +#define VEPU_REG_VP8_QUT_ZB_DC_Y1(x) (((x) & 0x1ff) << 0) +#define VEPU_REG_VP8_QUT_5TH(i) (0x010 + ((i) * 0x24)) +#define VEPU_REG_VP8_QUT_ZB_AC_CHR(x) (((x) & 0x1ff) << 18) +#define VEPU_REG_VP8_QUT_ZB_AC_Y2(x) (((x) & 0x1ff) << 9) +#define VEPU_REG_VP8_QUT_ZB_AC_Y1(x) (((x) & 0x1ff) << 0) +#define VEPU_REG_VP8_QUT_6TH(i) (0x014 + ((i) * 0x24)) +#define VEPU_REG_VP8_QUT_RND_DC_CHR(x) (((x) & 0xff) << 16) +#define VEPU_REG_VP8_QUT_RND_DC_Y2(x) (((x) & 0xff) << 8) +#define VEPU_REG_VP8_QUT_RND_DC_Y1(x) (((x) & 0xff) << 0) +#define VEPU_REG_VP8_QUT_7TH(i) (0x018 + ((i) * 0x24)) +#define VEPU_REG_VP8_QUT_RND_AC_CHR(x) (((x) & 0xff) << 16) +#define VEPU_REG_VP8_QUT_RND_AC_Y2(x) (((x) & 0xff) << 8) +#define VEPU_REG_VP8_QUT_RND_AC_Y1(x) (((x) & 0xff) << 0) +#define VEPU_REG_VP8_QUT_8TH(i) (0x01c + ((i) * 0x24)) +#define VEPU_REG_VP8_SEG_FILTER_LEVEL(x) (((x) & 0x3f) << 25) +#define VEPU_REG_VP8_DEQUT_DC_CHR(x) (((x) & 0xff) << 17) +#define VEPU_REG_VP8_DEQUT_DC_Y2(x) (((x) & 0x1ff) << 8) +#define VEPU_REG_VP8_DEQUT_DC_Y1(x) (((x) & 0xff) << 0) +#define VEPU_REG_VP8_QUT_9TH(i) (0x020 + ((i) * 0x24)) +#define VEPU_REG_VP8_DEQUT_AC_CHR(x) (((x) & 0x1ff) << 18) +#define VEPU_REG_VP8_DEQUT_AC_Y2(x) (((x) & 0x1ff) << 9) +#define VEPU_REG_VP8_DEQUT_AC_Y1(x) (((x) & 0x1ff) << 0) +#define VEPU_REG_ADDR_VP8_SEG_MAP 0x06c +#define VEPU_REG_VP8_INTRA_4X4_PENALTY(i) (0x070 + ((i) * 0x4)) +#define VEPU_REG_VP8_INTRA_4X4_PENALTY_0(x) (((x) & 0xfff) << 0) +#define VEPU_REG_VP8_INTRA_4x4_PENALTY_1(x) (((x) & 0xfff) << 16) +#define VEPU_REG_VP8_INTRA_16X16_PENALTY(i) (0x084 + ((i) * 0x4)) +#define VEPU_REG_VP8_INTRA_16X16_PENALTY_0(x) (((x) & 0xfff) << 0) +#define VEPU_REG_VP8_INTRA_16X16_PENALTY_1(x) (((x) & 0xfff) << 16) +#define VEPU_REG_VP8_CONTROL 0x0a0 +#define VEPU_REG_VP8_LF_MODE_DELTA_BPRED(x) (((x) & 0x1f) << 24) +#define VEPU_REG_VP8_LF_REF_DELTA_INTRA_MB(x) (((x) & 0x7f) << 16) +#define VEPU_REG_VP8_INTER_TYPE_BIT_COST(x) (((x) & 0xfff) << 0) +#define VEPU_REG_VP8_REF_FRAME_VAL 0x0a4 +#define VEPU_REG_VP8_COEF_DMV_PENALTY(x) (((x) & 0xfff) << 16) +#define VEPU_REG_VP8_REF_FRAME(x) (((x) & 0xfff) << 0) +#define VEPU_REG_VP8_LOOP_FILTER_REF_DELTA 0x0a8 +#define VEPU_REG_VP8_LF_REF_DELTA_ALT_REF(x) (((x) & 0x7f) << 16) +#define VEPU_REG_VP8_LF_REF_DELTA_LAST_REF(x) (((x) & 0x7f) << 8) +#define VEPU_REG_VP8_LF_REF_DELTA_GOLDEN(x) (((x) & 0x7f) << 0) +#define VEPU_REG_VP8_LOOP_FILTER_MODE_DELTA 0x0ac +#define VEPU_REG_VP8_LF_MODE_DELTA_SPLITMV(x) (((x) & 0x7f) << 16) +#define VEPU_REG_VP8_LF_MODE_DELTA_ZEROMV(x) (((x) & 0x7f) << 8) +#define VEPU_REG_VP8_LF_MODE_DELTA_NEWMV(x) (((x) & 0x7f) << 0) +#define VEPU_REG_JPEG_LUMA_QUAT(i) (0x000 + ((i) * 0x4)) +#define VEPU_REG_JPEG_CHROMA_QUAT(i) (0x040 + ((i) * 0x4)) +#define VEPU_REG_INTRA_SLICE_BITMAP(i) (0x0b0 + ((i) * 0x4)) +#define VEPU_REG_ADDR_VP8_DCT_PART(i) (0x0b0 + ((i) * 0x4)) +#define VEPU_REG_INTRA_AREA_CTRL 0x0b8 +#define VEPU_REG_INTRA_AREA_TOP(x) (((x) & 0xff) << 24) +#define VEPU_REG_INTRA_AREA_BOTTOM(x) (((x) & 0xff) << 16) +#define VEPU_REG_INTRA_AREA_LEFT(x) (((x) & 0xff) << 8) +#define VEPU_REG_INTRA_AREA_RIGHT(x) (((x) & 0xff) << 0) +#define VEPU_REG_CIR_INTRA_CTRL 0x0bc +#define VEPU_REG_CIR_INTRA_FIRST_MB(x) (((x) & 0xffff) << 16) +#define VEPU_REG_CIR_INTRA_INTERVAL(x) (((x) & 0xffff) << 0) +#define VEPU_REG_ADDR_IN_PLANE_0 0x0c0 +#define VEPU_REG_ADDR_IN_PLANE_1 0x0c4 +#define VEPU_REG_ADDR_IN_PLANE_2 0x0c8 +#define VEPU_REG_STR_HDR_REM_MSB 0x0cc +#define VEPU_REG_STR_HDR_REM_LSB 0x0d0 +#define VEPU_REG_STR_BUF_LIMIT 0x0d4 +#define VEPU_REG_AXI_CTRL 0x0d8 +#define VEPU_REG_AXI_CTRL_READ_ID(x) (((x) & 0xff) << 24) +#define VEPU_REG_AXI_CTRL_WRITE_ID(x) (((x) & 0xff) << 16) +#define VEPU_REG_AXI_CTRL_BURST_LEN(x) (((x) & 0x3f) << 8) +#define VEPU_REG_AXI_CTRL_INCREMENT_MODE(x) (((x) & 0x01) << 2) +#define VEPU_REG_AXI_CTRL_BIRST_DISCARD(x) (((x) & 0x01) << 1) +#define VEPU_REG_AXI_CTRL_BIRST_DISABLE BIT(0) +#define VEPU_QP_ADJUST_MAD_DELTA_ROI 0x0dc +#define VEPU_REG_ROI_QP_DELTA_1 (((x) & 0xf) << 12) +#define VEPU_REG_ROI_QP_DELTA_2 (((x) & 0xf) << 8) +#define VEPU_REG_MAD_QP_ADJUSTMENT (((x) & 0xf) << 0) +#define VEPU_REG_ADDR_REF_LUMA 0x0e0 +#define VEPU_REG_ADDR_REF_CHROMA 0x0e4 +#define VEPU_REG_QP_SUM_DIV2 0x0e8 +#define VEPU_REG_QP_SUM(x) (((x) & 0x001fffff) * 2) +#define VEPU_REG_ENC_CTRL0 0x0ec +#define VEPU_REG_DISABLE_QUARTER_PIXEL_MV BIT(28) +#define VEPU_REG_DEBLOCKING_FILTER_MODE(x) (((x) & 0x3) << 24) +#define VEPU_REG_CABAC_INIT_IDC(x) (((x) & 0x3) << 21) +#define VEPU_REG_ENTROPY_CODING_MODE BIT(20) +#define VEPU_REG_H264_TRANS8X8_MODE BIT(17) +#define VEPU_REG_H264_INTER4X4_MODE BIT(16) +#define VEPU_REG_H264_STREAM_MODE BIT(15) +#define VEPU_REG_H264_SLICE_SIZE(x) (((x) & 0x7f) << 8) +#define VEPU_REG_ENC_OVER_FILL_STRM_OFFSET 0x0f0 +#define VEPU_REG_STREAM_START_OFFSET(x) (((x) & 0x3f) << 16) +#define VEPU_REG_SKIP_MACROBLOCK_PENALTY(x) (((x) & 0xff) << 8) +#define VEPU_REG_IN_IMG_CTRL_OVRFLR_D4(x) (((x) & 0x3) << 4) +#define VEPU_REG_IN_IMG_CTRL_OVRFLB(x) (((x) & 0xf) << 0) +#define VEPU_REG_INPUT_LUMA_INFO 0x0f4 +#define VEPU_REG_IN_IMG_CHROMA_OFFSET(x) (((x) & 0x7) << 20) +#define VEPU_REG_IN_IMG_LUMA_OFFSET(x) (((x) & 0x7) << 16) +#define VEPU_REG_IN_IMG_CTRL_ROW_LEN(x) (((x) & 0x3fff) << 0) +#define VEPU_REG_RLC_SUM 0x0f8 +#define VEPU_REG_RLC_SUM_OUT(x) (((x) & 0x007fffff) * 4) +#define VEPU_REG_SPLIT_PENALTY_4X4 0x0f8 +#define VEPU_REG_VP8_SPLIT_PENALTY_4X4 (((x) & 0x1ff) << 19) +#define VEPU_REG_ADDR_REC_LUMA 0x0fc +#define VEPU_REG_ADDR_REC_CHROMA 0x100 +#define VEPU_REG_CHECKPOINT(i) (0x104 + ((i) * 0x4)) +#define VEPU_REG_CHECKPOINT_CHECK0(x) (((x) & 0xffff)) +#define VEPU_REG_CHECKPOINT_CHECK1(x) (((x) & 0xffff) << 16) +#define VEPU_REG_CHECKPOINT_RESULT(x) \ + ((((x) >> (16 - 16 * ((i) & 1))) & 0xffff) * 32) +#define VEPU_REG_VP8_SEG0_QUANT_AC_Y1 0x104 +#define VEPU_REG_VP8_SEG0_RND_AC_Y1(x) (((x) & 0xff) << 23) +#define VEPU_REG_VP8_SEG0_ZBIN_AC_Y1(x) (((x) & 0x1ff) << 14) +#define VEPU_REG_VP8_SEG0_QUT_AC_Y1(x) (((x) & 0x3fff) << 0) +#define VEPU_REG_VP8_SEG0_QUANT_DC_Y2 0x108 +#define VEPU_REG_VP8_SEG0_RND_DC_Y2(x) (((x) & 0xff) << 23) +#define VEPU_REG_VP8_SEG0_ZBIN_DC_Y2(x) (((x) & 0x1ff) << 14) +#define VEPU_REG_VP8_SEG0_QUT_DC_Y2(x) (((x) & 0x3fff) << 0) +#define VEPU_REG_VP8_SEG0_QUANT_AC_Y2 0x10c +#define VEPU_REG_VP8_SEG0_RND_AC_Y2(x) (((x) & 0xff) << 23) +#define VEPU_REG_VP8_SEG0_ZBIN_AC_Y2(x) (((x) & 0x1ff) << 14) +#define VEPU_REG_VP8_SEG0_QUT_AC_Y2(x) (((x) & 0x3fff) << 0) +#define VEPU_REG_VP8_SEG0_QUANT_DC_CHR 0x110 +#define VEPU_REG_VP8_SEG0_RND_DC_CHR(x) (((x) & 0xff) << 23) +#define VEPU_REG_VP8_SEG0_ZBIN_DC_CHR(x) (((x) & 0x1ff) << 14) +#define VEPU_REG_VP8_SEG0_QUT_DC_CHR(x) (((x) & 0x3fff) << 0) +#define VEPU_REG_VP8_SEG0_QUANT_AC_CHR 0x114 +#define VEPU_REG_VP8_SEG0_RND_AC_CHR(x) (((x) & 0xff) << 23) +#define VEPU_REG_VP8_SEG0_ZBIN_AC_CHR(x) (((x) & 0x1ff) << 14) +#define VEPU_REG_VP8_SEG0_QUT_AC_CHR(x) (((x) & 0x3fff) << 0) +#define VEPU_REG_VP8_SEG0_QUANT_DQUT 0x118 +#define VEPU_REG_VP8_MV_REF_IDX1(x) (((x) & 0x03) << 26) +#define VEPU_REG_VP8_SEG0_DQUT_DC_Y2(x) (((x) & 0x1ff) << 17) +#define VEPU_REG_VP8_SEG0_DQUT_AC_Y1(x) (((x) & 0x1ff) << 8) +#define VEPU_REG_VP8_SEG0_DQUT_DC_Y1(x) (((x) & 0xff) << 0) +#define VEPU_REG_CHKPT_WORD_ERR(i) (0x118 + ((i) * 0x4)) +#define VEPU_REG_CHKPT_WORD_ERR_CHK0(x) (((x) & 0xffff)) +#define VEPU_REG_CHKPT_WORD_ERR_CHK1(x) (((x) & 0xffff) << 16) +#define VEPU_REG_VP8_SEG0_QUANT_DQUT_1 0x11c +#define VEPU_REG_VP8_SEGMENT_MAP_UPDATE BIT(30) +#define VEPU_REG_VP8_SEGMENT_EN BIT(29) +#define VEPU_REG_VP8_MV_REF_IDX2_EN BIT(28) +#define VEPU_REG_VP8_MV_REF_IDX2(x) (((x) & 0x03) << 26) +#define VEPU_REG_VP8_SEG0_DQUT_AC_CHR(x) (((x) & 0x1ff) << 17) +#define VEPU_REG_VP8_SEG0_DQUT_DC_CHR(x) (((x) & 0xff) << 9) +#define VEPU_REG_VP8_SEG0_DQUT_AC_Y2(x) (((x) & 0x1ff) << 0) +#define VEPU_REG_VP8_BOOL_ENC_VALUE 0x120 +#define VEPU_REG_CHKPT_DELTA_QP 0x124 +#define VEPU_REG_CHKPT_DELTA_QP_CHK0(x) (((x) & 0x0f) << 0) +#define VEPU_REG_CHKPT_DELTA_QP_CHK1(x) (((x) & 0x0f) << 4) +#define VEPU_REG_CHKPT_DELTA_QP_CHK2(x) (((x) & 0x0f) << 8) +#define VEPU_REG_CHKPT_DELTA_QP_CHK3(x) (((x) & 0x0f) << 12) +#define VEPU_REG_CHKPT_DELTA_QP_CHK4(x) (((x) & 0x0f) << 16) +#define VEPU_REG_CHKPT_DELTA_QP_CHK5(x) (((x) & 0x0f) << 20) +#define VEPU_REG_CHKPT_DELTA_QP_CHK6(x) (((x) & 0x0f) << 24) +#define VEPU_REG_VP8_ENC_CTRL2 0x124 +#define VEPU_REG_VP8_ZERO_MV_PENALTY_FOR_REF2(x) (((x) & 0xff) << 24) +#define VEPU_REG_VP8_FILTER_SHARPNESS(x) (((x) & 0x07) << 21) +#define VEPU_REG_VP8_FILTER_LEVEL(x) (((x) & 0x3f) << 15) +#define VEPU_REG_VP8_DCT_PARTITION_CNT(x) (((x) & 0x03) << 13) +#define VEPU_REG_VP8_BOOL_ENC_VALUE_BITS(x) (((x) & 0x1f) << 8) +#define VEPU_REG_VP8_BOOL_ENC_RANGE(x) (((x) & 0xff) << 0) +#define VEPU_REG_ENC_CTRL1 0x128 +#define VEPU_REG_MAD_THRESHOLD(x) (((x) & 0x3f) << 24) +#define VEPU_REG_COMPLETED_SLICES(x) (((x) & 0xff) << 16) +#define VEPU_REG_IN_IMG_CTRL_FMT(x) (((x) & 0xf) << 4) +#define VEPU_REG_IN_IMG_ROTATE_MODE(x) (((x) & 0x3) << 2) +#define VEPU_REG_SIZE_TABLE_PRESENT BIT(0) +#define VEPU_REG_INTRA_INTER_MODE 0x12c +#define VEPU_REG_INTRA16X16_MODE(x) (((x) & 0xffff) << 16) +#define VEPU_REG_INTER_MODE(x) (((x) & 0xffff) << 0) +#define VEPU_REG_ENC_CTRL2 0x130 +#define VEPU_REG_PPS_INIT_QP(x) (((x) & 0x3f) << 26) +#define VEPU_REG_SLICE_FILTER_ALPHA(x) (((x) & 0xf) << 22) +#define VEPU_REG_SLICE_FILTER_BETA(x) (((x) & 0xf) << 18) +#define VEPU_REG_CHROMA_QP_OFFSET(x) (((x) & 0x1f) << 13) +#define VEPU_REG_FILTER_DISABLE BIT(5) +#define VEPU_REG_IDR_PIC_ID(x) (((x) & 0xf) << 1) +#define VEPU_REG_CONSTRAINED_INTRA_PREDICTION BIT(0) +#define VEPU_REG_ADDR_OUTPUT_STREAM 0x134 +#define VEPU_REG_ADDR_OUTPUT_CTRL 0x138 +#define VEPU_REG_ADDR_NEXT_PIC 0x13c +#define VEPU_REG_ADDR_MV_OUT 0x140 +#define VEPU_REG_ADDR_CABAC_TBL 0x144 +#define VEPU_REG_ROI1 0x148 +#define VEPU_REG_ROI1_TOP_MB(x) (((x) & 0xff) << 24) +#define VEPU_REG_ROI1_BOTTOM_MB(x) (((x) & 0xff) << 16) +#define VEPU_REG_ROI1_LEFT_MB(x) (((x) & 0xff) << 8) +#define VEPU_REG_ROI1_RIGHT_MB(x) (((x) & 0xff) << 0) +#define VEPU_REG_ROI2 0x14c +#define VEPU_REG_ROI2_TOP_MB(x) (((x) & 0xff) << 24) +#define VEPU_REG_ROI2_BOTTOM_MB(x) (((x) & 0xff) << 16) +#define VEPU_REG_ROI2_LEFT_MB(x) (((x) & 0xff) << 8) +#define VEPU_REG_ROI2_RIGHT_MB(x) (((x) & 0xff) << 0) +#define VEPU_REG_STABLE_MATRIX(i) (0x150 + ((i) * 0x4)) +#define VEPU_REG_STABLE_MOTION_SUM 0x174 +#define VEPU_REG_STABILIZATION_OUTPUT 0x178 +#define VEPU_REG_STABLE_MIN_VALUE(x) (((x) & 0xffffff) << 8) +#define VEPU_REG_STABLE_MODE_SEL(x) (((x) & 0x3) << 6) +#define VEPU_REG_STABLE_HOR_GMV(x) (((x) & 0x3f) << 0) +#define VEPU_REG_RGB2YUV_CONVERSION_COEF1 0x17c +#define VEPU_REG_RGB2YUV_CONVERSION_COEFB(x) (((x) & 0xffff) << 16) +#define VEPU_REG_RGB2YUV_CONVERSION_COEFA(x) (((x) & 0xffff) << 0) +#define VEPU_REG_RGB2YUV_CONVERSION_COEF2 0x180 +#define VEPU_REG_RGB2YUV_CONVERSION_COEFE(x) (((x) & 0xffff) << 16) +#define VEPU_REG_RGB2YUV_CONVERSION_COEFC(x) (((x) & 0xffff) << 0) +#define VEPU_REG_RGB2YUV_CONVERSION_COEF3 0x184 +#define VEPU_REG_RGB2YUV_CONVERSION_COEFF(x) (((x) & 0xffff) << 0) +#define VEPU_REG_RGB_MASK_MSB 0x188 +#define VEPU_REG_RGB_MASK_B_MSB(x) (((x) & 0x1f) << 16) +#define VEPU_REG_RGB_MASK_G_MSB(x) (((x) & 0x1f) << 8) +#define VEPU_REG_RGB_MASK_R_MSB(x) (((x) & 0x1f) << 0) +#define VEPU_REG_MV_PENALTY 0x18c +#define VEPU_REG_1MV_PENALTY(x) (((x) & 0x3ff) << 21) +#define VEPU_REG_QMV_PENALTY(x) (((x) & 0x3ff) << 11) +#define VEPU_REG_4MV_PENALTY(x) (((x) & 0x3ff) << 1) +#define VEPU_REG_SPLIT_MV_MODE_EN BIT(0) +#define VEPU_REG_QP_VAL 0x190 +#define VEPU_REG_H264_LUMA_INIT_QP(x) (((x) & 0x3f) << 26) +#define VEPU_REG_H264_QP_MAX(x) (((x) & 0x3f) << 20) +#define VEPU_REG_H264_QP_MIN(x) (((x) & 0x3f) << 14) +#define VEPU_REG_H264_CHKPT_DISTANCE(x) (((x) & 0xfff) << 0) +#define VEPU_REG_VP8_SEG0_QUANT_DC_Y1 0x190 +#define VEPU_REG_VP8_SEG0_RND_DC_Y1(x) (((x) & 0xff) << 23) +#define VEPU_REG_VP8_SEG0_ZBIN_DC_Y1(x) (((x) & 0x1ff) << 14) +#define VEPU_REG_VP8_SEG0_QUT_DC_Y1(x) (((x) & 0x3fff) << 0) +#define VEPU_REG_MVC_RELATE 0x198 +#define VEPU_REG_ZERO_MV_FAVOR_D2(x) (((x) & 0xf) << 20) +#define VEPU_REG_PENALTY_4X4MV(x) (((x) & 0x1ff) << 11) +#define VEPU_REG_MVC_VIEW_ID(x) (((x) & 0x7) << 8) +#define VEPU_REG_MVC_ANCHOR_PIC_FLAG BIT(7) +#define VEPU_REG_MVC_PRIORITY_ID(x) (((x) & 0x7) << 4) +#define VEPU_REG_MVC_TEMPORAL_ID(x) (((x) & 0x7) << 1) +#define VEPU_REG_MVC_INTER_VIEW_FLAG BIT(0) +#define VEPU_REG_ENCODE_START 0x19c +#define VEPU_REG_MB_HEIGHT(x) (((x) & 0x1ff) << 20) +#define VEPU_REG_MB_WIDTH(x) (((x) & 0x1ff) << 8) +#define VEPU_REG_FRAME_TYPE_INTER (0x0 << 6) +#define VEPU_REG_FRAME_TYPE_INTRA (0x1 << 6) +#define VEPU_REG_FRAME_TYPE_MVCINTER (0x2 << 6) +#define VEPU_REG_ENCODE_FORMAT_JPEG (0x2 << 4) +#define VEPU_REG_ENCODE_FORMAT_H264 (0x3 << 4) +#define VEPU_REG_ENCODE_ENABLE BIT(0) +#define VEPU_REG_MB_CTRL 0x1a0 +#define VEPU_REG_MB_CNT_OUT(x) (((x) & 0xffff) << 16) +#define VEPU_REG_MB_CNT_SET(x) (((x) & 0xffff) << 0) +#define VEPU_REG_DATA_ENDIAN 0x1a4 +#define VEPU_REG_INPUT_SWAP8 BIT(31) +#define VEPU_REG_INPUT_SWAP16 BIT(30) +#define VEPU_REG_INPUT_SWAP32 BIT(29) +#define VEPU_REG_OUTPUT_SWAP8 BIT(28) +#define VEPU_REG_OUTPUT_SWAP16 BIT(27) +#define VEPU_REG_OUTPUT_SWAP32 BIT(26) +#define VEPU_REG_TEST_IRQ BIT(24) +#define VEPU_REG_TEST_COUNTER(x) (((x) & 0xf) << 20) +#define VEPU_REG_TEST_REG BIT(19) +#define VEPU_REG_TEST_MEMORY BIT(18) +#define VEPU_REG_TEST_LEN(x) (((x) & 0x3ffff) << 0) +#define VEPU_REG_ENC_CTRL3 0x1a8 +#define VEPU_REG_PPS_ID(x) (((x) & 0xff) << 24) +#define VEPU_REG_INTRA_PRED_MODE(x) (((x) & 0xff) << 16) +#define VEPU_REG_FRAME_NUM(x) (((x) & 0xffff) << 0) +#define VEPU_REG_ENC_CTRL4 0x1ac +#define VEPU_REG_MV_PENALTY_16X8_8X16(x) (((x) & 0x3ff) << 20) +#define VEPU_REG_MV_PENALTY_8X8(x) (((x) & 0x3ff) << 10) +#define VEPU_REG_MV_PENALTY_8X4_4X8(x) (((x) & 0x3ff) << 0) +#define VEPU_REG_ADDR_VP8_PROB_CNT 0x1b0 +#define VEPU_REG_INTERRUPT 0x1b4 +#define VEPU_REG_INTERRUPT_NON BIT(28) +#define VEPU_REG_MV_WRITE_EN BIT(24) +#define VEPU_REG_RECON_WRITE_DIS BIT(20) +#define VEPU_REG_INTERRUPT_SLICE_READY_EN BIT(16) +#define VEPU_REG_CLK_GATING_EN BIT(12) +#define VEPU_REG_INTERRUPT_TIMEOUT_EN BIT(10) +#define VEPU_REG_INTERRUPT_RESET BIT(9) +#define VEPU_REG_INTERRUPT_DIS_BIT BIT(8) +#define VEPU_REG_INTERRUPT_TIMEOUT BIT(6) +#define VEPU_REG_INTERRUPT_BUFFER_FULL BIT(5) +#define VEPU_REG_INTERRUPT_BUS_ERROR BIT(4) +#define VEPU_REG_INTERRUPT_FUSE BIT(3) +#define VEPU_REG_INTERRUPT_SLICE_READY BIT(2) +#define VEPU_REG_INTERRUPT_FRAME_READY BIT(1) +#define VEPU_REG_INTERRUPT_BIT BIT(0) +#define VEPU_REG_DMV_PENALTY_TBL(i) (0x1E0 + ((i) * 0x4)) +#define VEPU_REG_DMV_PENALTY_TABLE_BIT(x, i) ((x) << (i) * 8) +#define VEPU_REG_DMV_Q_PIXEL_PENALTY_TBL(i) (0x260 + ((i) * 0x4)) +#define VEPU_REG_DMV_Q_PIXEL_PENALTY_TABLE_BIT(x, i) ((x) << (i) * 8) + +/* vpu decoder register */ +#define VDPU_REG_DEC_CTRL0 0x0c8 // 50 +#define VDPU_REG_REF_BUF_CTRL2_REFBU2_PICID(x) (((x) & 0x1f) << 25) +#define VDPU_REG_REF_BUF_CTRL2_REFBU2_THR(x) (((x) & 0xfff) << 13) +#define VDPU_REG_CONFIG_TILED_MODE_LSB BIT(12) +#define VDPU_REG_CONFIG_DEC_ADV_PRE_DIS BIT(11) +#define VDPU_REG_CONFIG_DEC_SCMD_DIS BIT(10) +#define VDPU_REG_DEC_CTRL0_SKIP_MODE BIT(9) +#define VDPU_REG_DEC_CTRL0_FILTERING_DIS BIT(8) +#define VDPU_REG_DEC_CTRL0_PIC_FIXED_QUANT BIT(7) +#define VDPU_REG_CONFIG_DEC_LATENCY(x) (((x) & 0x3f) << 1) +#define VDPU_REG_CONFIG_TILED_MODE_MSB(x) BIT(0) +#define VDPU_REG_CONFIG_DEC_OUT_TILED_E BIT(0) +#define VDPU_REG_STREAM_LEN 0x0cc +#define VDPU_REG_DEC_CTRL3_INIT_QP(x) (((x) & 0x3f) << 25) +#define VDPU_REG_DEC_STREAM_LEN_HI BIT(24) +#define VDPU_REG_DEC_CTRL3_STREAM_LEN(x) (((x) & 0xffffff) << 0) +#define VDPU_REG_ERROR_CONCEALMENT 0x0d0 +#define VDPU_REG_REF_BUF_CTRL2_APF_THRESHOLD(x) (((x) & 0x3fff) << 17) +#define VDPU_REG_ERR_CONC_STARTMB_X(x) (((x) & 0x1ff) << 8) +#define VDPU_REG_ERR_CONC_STARTMB_Y(x) (((x) & 0xff) << 0) +#define VDPU_REG_DEC_FORMAT 0x0d4 +#define VDPU_REG_DEC_CTRL0_DEC_MODE(x) (((x) & 0xf) << 0) +#define VDPU_REG_DATA_ENDIAN 0x0d8 +#define VDPU_REG_CONFIG_DEC_STRENDIAN_E BIT(5) +#define VDPU_REG_CONFIG_DEC_STRSWAP32_E BIT(4) +#define VDPU_REG_CONFIG_DEC_OUTSWAP32_E BIT(3) +#define VDPU_REG_CONFIG_DEC_INSWAP32_E BIT(2) +#define VDPU_REG_CONFIG_DEC_OUT_ENDIAN BIT(1) +#define VDPU_REG_CONFIG_DEC_IN_ENDIAN BIT(0) +#define VDPU_REG_INTERRUPT 0x0dc +#define VDPU_REG_INTERRUPT_DEC_TIMEOUT BIT(13) +#define VDPU_REG_INTERRUPT_DEC_ERROR_INT BIT(12) +#define VDPU_REG_INTERRUPT_DEC_PIC_INF BIT(10) +#define VDPU_REG_INTERRUPT_DEC_SLICE_INT BIT(9) +#define VDPU_REG_INTERRUPT_DEC_ASO_INT BIT(8) +#define VDPU_REG_INTERRUPT_DEC_BUFFER_INT BIT(6) +#define VDPU_REG_INTERRUPT_DEC_BUS_INT BIT(5) +#define VDPU_REG_INTERRUPT_DEC_RDY_INT BIT(4) +#define VDPU_REG_INTERRUPT_DEC_IRQ_DIS BIT(1) +#define VDPU_REG_INTERRUPT_DEC_IRQ BIT(0) +#define VDPU_REG_AXI_CTRL 0x0e0 +#define VDPU_REG_AXI_DEC_SEL BIT(23) +#define VDPU_REG_CONFIG_DEC_DATA_DISC_E BIT(22) +#define VDPU_REG_PARAL_BUS_E(x) BIT(21) +#define VDPU_REG_CONFIG_DEC_MAX_BURST(x) (((x) & 0x1f) << 16) +#define VDPU_REG_DEC_CTRL0_DEC_AXI_WR_ID(x) (((x) & 0xff) << 8) +#define VDPU_REG_CONFIG_DEC_AXI_RD_ID(x) (((x) & 0xff) << 0) +#define VDPU_REG_EN_FLAGS 0x0e4 +#define VDPU_REG_AHB_HLOCK_E BIT(31) +#define VDPU_REG_CACHE_E BIT(29) +#define VDPU_REG_PREFETCH_SINGLE_CHANNEL_E BIT(28) +#define VDPU_REG_INTRA_3_CYCLE_ENHANCE BIT(27) +#define VDPU_REG_INTRA_DOUBLE_SPEED BIT(26) +#define VDPU_REG_INTER_DOUBLE_SPEED BIT(25) +#define VDPU_REG_DEC_CTRL3_START_CODE_E BIT(22) +#define VDPU_REG_DEC_CTRL3_CH_8PIX_ILEAV_E BIT(21) +#define VDPU_REG_DEC_CTRL0_RLC_MODE_E BIT(20) +#define VDPU_REG_DEC_CTRL0_DIVX3_E BIT(19) +#define VDPU_REG_DEC_CTRL0_PJPEG_E BIT(18) +#define VDPU_REG_DEC_CTRL0_PIC_INTERLACE_E BIT(17) +#define VDPU_REG_DEC_CTRL0_PIC_FIELDMODE_E BIT(16) +#define VDPU_REG_DEC_CTRL0_PIC_B_E BIT(15) +#define VDPU_REG_DEC_CTRL0_PIC_INTER_E BIT(14) +#define VDPU_REG_DEC_CTRL0_PIC_TOPFIELD_E BIT(13) +#define VDPU_REG_DEC_CTRL0_FWD_INTERLACE_E BIT(12) +#define VDPU_REG_DEC_CTRL0_SORENSON_E BIT(11) +#define VDPU_REG_DEC_CTRL0_WRITE_MVS_E BIT(10) +#define VDPU_REG_DEC_CTRL0_REF_TOPFIELD_E BIT(9) +#define VDPU_REG_DEC_CTRL0_REFTOPFIRST_E BIT(8) +#define VDPU_REG_DEC_CTRL0_SEQ_MBAFF_E BIT(7) +#define VDPU_REG_DEC_CTRL0_PICORD_COUNT_E BIT(6) +#define VDPU_REG_CONFIG_DEC_TIMEOUT_E BIT(5) +#define VDPU_REG_CONFIG_DEC_CLK_GATE_E BIT(4) +#define VDPU_REG_DEC_CTRL0_DEC_OUT_DIS BIT(2) +#define VDPU_REG_REF_BUF_CTRL2_REFBU2_BUF_E BIT(1) +#define VDPU_REG_INTERRUPT_DEC_E BIT(0) +#define VDPU_REG_SOFT_RESET 0x0e8 +#define VDPU_REG_PRED_FLT 0x0ec +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_0_0(x) (((x) & 0x3ff) << 22) +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_0_1(x) (((x) & 0x3ff) << 12) +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_0_2(x) (((x) & 0x3ff) << 2) +#define VDPU_REG_ADDITIONAL_CHROMA_ADDRESS 0x0f0 +#define VDPU_REG_ADDR_QTABLE 0x0f4 +#define VDPU_REG_DIRECT_MV_ADDR 0x0f8 +#define VDPU_REG_ADDR_DST 0x0fc +#define VDPU_REG_ADDR_STR 0x100 +#define VDPU_REG_REFBUF_RELATED 0x104 +#define VDPU_REG_FWD_PIC(i) (0x128 + ((i) * 0x4)) +#define VDPU_REG_FWD_PIC_PINIT_RLIST_F5(x) (((x) & 0x1f) << 25) +#define VDPU_REG_FWD_PIC_PINIT_RLIST_F4(x) (((x) & 0x1f) << 20) +#define VDPU_REG_FWD_PIC_PINIT_RLIST_F3(x) (((x) & 0x1f) << 15) +#define VDPU_REG_FWD_PIC_PINIT_RLIST_F2(x) (((x) & 0x1f) << 10) +#define VDPU_REG_FWD_PIC_PINIT_RLIST_F1(x) (((x) & 0x1f) << 5) +#define VDPU_REG_FWD_PIC_PINIT_RLIST_F0(x) (((x) & 0x1f) << 0) +#define VDPU_REG_REF_PIC(i) (0x130 + ((i) * 0x4)) +#define VDPU_REG_REF_PIC_REFER1_NBR(x) (((x) & 0xffff) << 16) +#define VDPU_REG_REF_PIC_REFER0_NBR(x) (((x) & 0xffff) << 0) +#define VDPU_REG_H264_ADDR_REF(i) (0x150 + ((i) * 0x4)) +#define VDPU_REG_ADDR_REF_FIELD_E BIT(1) +#define VDPU_REG_ADDR_REF_TOPC_E BIT(0) +#define VDPU_REG_INITIAL_REF_PIC_LIST0 0x190 +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F5(x) (((x) & 0x1f) << 25) +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F4(x) (((x) & 0x1f) << 20) +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F3(x) (((x) & 0x1f) << 15) +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F2(x) (((x) & 0x1f) << 10) +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F1(x) (((x) & 0x1f) << 5) +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F0(x) (((x) & 0x1f) << 0) +#define VDPU_REG_INITIAL_REF_PIC_LIST1 0x194 +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F11(x) (((x) & 0x1f) << 25) +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F10(x) (((x) & 0x1f) << 20) +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F9(x) (((x) & 0x1f) << 15) +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F8(x) (((x) & 0x1f) << 10) +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F7(x) (((x) & 0x1f) << 5) +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F6(x) (((x) & 0x1f) << 0) +#define VDPU_REG_INITIAL_REF_PIC_LIST2 0x198 +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F15(x) (((x) & 0x1f) << 15) +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F14(x) (((x) & 0x1f) << 10) +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F13(x) (((x) & 0x1f) << 5) +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F12(x) (((x) & 0x1f) << 0) +#define VDPU_REG_INITIAL_REF_PIC_LIST3 0x19c +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B5(x) (((x) & 0x1f) << 25) +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B4(x) (((x) & 0x1f) << 20) +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B3(x) (((x) & 0x1f) << 15) +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B2(x) (((x) & 0x1f) << 10) +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B1(x) (((x) & 0x1f) << 5) +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B0(x) (((x) & 0x1f) << 0) +#define VDPU_REG_INITIAL_REF_PIC_LIST4 0x1a0 +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B11(x) (((x) & 0x1f) << 25) +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B10(x) (((x) & 0x1f) << 20) +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B9(x) (((x) & 0x1f) << 15) +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B8(x) (((x) & 0x1f) << 10) +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B7(x) (((x) & 0x1f) << 5) +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B6(x) (((x) & 0x1f) << 0) +#define VDPU_REG_INITIAL_REF_PIC_LIST5 0x1a4 +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B15(x) (((x) & 0x1f) << 15) +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B14(x) (((x) & 0x1f) << 10) +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B13(x) (((x) & 0x1f) << 5) +#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B12(x) (((x) & 0x1f) << 0) +#define VDPU_REG_INITIAL_REF_PIC_LIST6 0x1a8 +#define VDPU_REG_BD_P_REF_PIC_PINIT_RLIST_F3(x) (((x) & 0x1f) << 15) +#define VDPU_REG_BD_P_REF_PIC_PINIT_RLIST_F2(x) (((x) & 0x1f) << 10) +#define VDPU_REG_BD_P_REF_PIC_PINIT_RLIST_F1(x) (((x) & 0x1f) << 5) +#define VDPU_REG_BD_P_REF_PIC_PINIT_RLIST_F0(x) (((x) & 0x1f) << 0) +#define VDPU_REG_LT_REF 0x1ac +#define VDPU_REG_VALID_REF 0x1b0 +#define VDPU_REG_H264_PIC_MB_SIZE 0x1b8 +#define VDPU_REG_DEC_CTRL2_CH_QP_OFFSET2(x) (((x) & 0x1f) << 22) +#define VDPU_REG_DEC_CTRL2_CH_QP_OFFSET(x) (((x) & 0x1f) << 17) +#define VDPU_REG_DEC_CTRL1_PIC_MB_HEIGHT_P(x) (((x) & 0xff) << 9) +#define VDPU_REG_DEC_CTRL1_PIC_MB_WIDTH(x) (((x) & 0x1ff) << 0) +#define VDPU_REG_H264_CTRL 0x1bc +#define VDPU_REG_DEC_CTRL4_WEIGHT_BIPR_IDC(x) (((x) & 0x3) << 16) +#define VDPU_REG_DEC_CTRL1_REF_FRAMES(x) (((x) & 0x1f) << 0) +#define VDPU_REG_CURRENT_FRAME 0x1c0 +#define VDPU_REG_DEC_CTRL5_FILT_CTRL_PRES BIT(31) +#define VDPU_REG_DEC_CTRL5_RDPIC_CNT_PRES BIT(30) +#define VDPU_REG_DEC_CTRL4_FRAMENUM_LEN(x) (((x) & 0x1f) << 16) +#define VDPU_REG_DEC_CTRL4_FRAMENUM(x) (((x) & 0xffff) << 0) +#define VDPU_REG_REF_FRAME 0x1c4 +#define VDPU_REG_DEC_CTRL5_REFPIC_MK_LEN(x) (((x) & 0x7ff) << 16) +#define VDPU_REG_DEC_CTRL5_IDR_PIC_ID(x) (((x) & 0xffff) << 0) +#define VDPU_REG_DEC_CTRL6 0x1c8 +#define VDPU_REG_DEC_CTRL6_PPS_ID(x) (((x) & 0xff) << 24) +#define VDPU_REG_DEC_CTRL6_REFIDX1_ACTIVE(x) (((x) & 0x1f) << 19) +#define VDPU_REG_DEC_CTRL6_REFIDX0_ACTIVE(x) (((x) & 0x1f) << 14) +#define VDPU_REG_DEC_CTRL6_POC_LENGTH(x) (((x) & 0xff) << 0) +#define VDPU_REG_ENABLE_FLAG 0x1cc +#define VDPU_REG_DEC_CTRL5_IDR_PIC_E BIT(8) +#define VDPU_REG_DEC_CTRL4_DIR_8X8_INFER_E BIT(7) +#define VDPU_REG_DEC_CTRL4_BLACKWHITE_E BIT(6) +#define VDPU_REG_DEC_CTRL4_CABAC_E BIT(5) +#define VDPU_REG_DEC_CTRL4_WEIGHT_PRED_E BIT(4) +#define VDPU_REG_DEC_CTRL5_CONST_INTRA_E BIT(3) +#define VDPU_REG_DEC_CTRL5_8X8TRANS_FLAG_E BIT(2) +#define VDPU_REG_DEC_CTRL2_TYPE1_QUANT_E BIT(1) +#define VDPU_REG_DEC_CTRL2_FIELDPIC_FLAG_E BIT(0) +#define VDPU_REG_VP8_PIC_MB_SIZE 0x1e0 +#define VDPU_REG_DEC_PIC_MB_WIDTH(x) (((x) & 0x1ff) << 23) +#define VDPU_REG_DEC_MB_WIDTH_OFF(x) (((x) & 0xf) << 19) +#define VDPU_REG_DEC_PIC_MB_HEIGHT_P(x) (((x) & 0xff) << 11) +#define VDPU_REG_DEC_MB_HEIGHT_OFF(x) (((x) & 0xf) << 7) +#define VDPU_REG_DEC_CTRL1_PIC_MB_W_EXT(x) (((x) & 0x7) << 3) +#define VDPU_REG_DEC_CTRL1_PIC_MB_H_EXT(x) (((x) & 0x7) << 0) +#define VDPU_REG_VP8_DCT_START_BIT 0x1e4 +#define VDPU_REG_DEC_CTRL4_DCT1_START_BIT(x) (((x) & 0x3f) << 26) +#define VDPU_REG_DEC_CTRL4_DCT2_START_BIT(x) (((x) & 0x3f) << 20) +#define VDPU_REG_DEC_CTRL4_VC1_HEIGHT_EXT BIT(13) +#define VDPU_REG_DEC_CTRL4_BILIN_MC_E BIT(12) +#define VDPU_REG_VP8_CTRL0 0x1e8 +#define VDPU_REG_DEC_CTRL2_STRM_START_BIT(x) (((x) & 0x3f) << 26) +#define VDPU_REG_DEC_CTRL2_STRM1_START_BIT(x) (((x) & 0x3f) << 18) +#define VDPU_REG_DEC_CTRL2_BOOLEAN_VALUE(x) (((x) & 0xff) << 8) +#define VDPU_REG_DEC_CTRL2_BOOLEAN_RANGE(x) (((x) & 0xff) << 0) +#define VDPU_REG_VP8_DATA_VAL 0x1f0 +#define VDPU_REG_DEC_CTRL6_COEFFS_PART_AM(x) (((x) & 0xf) << 24) +#define VDPU_REG_DEC_CTRL6_STREAM1_LEN(x) (((x) & 0xffffff) << 0) +#define VDPU_REG_PRED_FLT7 0x1f4 +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_5_1(x) (((x) & 0x3ff) << 22) +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_5_2(x) (((x) & 0x3ff) << 12) +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_5_3(x) (((x) & 0x3ff) << 2) +#define VDPU_REG_PRED_FLT8 0x1f8 +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_6_0(x) (((x) & 0x3ff) << 22) +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_6_1(x) (((x) & 0x3ff) << 12) +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_6_2(x) (((x) & 0x3ff) << 2) +#define VDPU_REG_PRED_FLT9 0x1fc +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_6_3(x) (((x) & 0x3ff) << 22) +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_7_0(x) (((x) & 0x3ff) << 12) +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_7_1(x) (((x) & 0x3ff) << 2) +#define VDPU_REG_PRED_FLT10 0x200 +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_7_2(x) (((x) & 0x3ff) << 22) +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_7_3(x) (((x) & 0x3ff) << 12) +#define VDPU_REG_BD_REF_PIC_PRED_TAP_2_M1(x) (((x) & 0x3) << 10) +#define VDPU_REG_BD_REF_PIC_PRED_TAP_2_4(x) (((x) & 0x3) << 8) +#define VDPU_REG_BD_REF_PIC_PRED_TAP_4_M1(x) (((x) & 0x3) << 6) +#define VDPU_REG_BD_REF_PIC_PRED_TAP_4_4(x) (((x) & 0x3) << 4) +#define VDPU_REG_BD_REF_PIC_PRED_TAP_6_M1(x) (((x) & 0x3) << 2) +#define VDPU_REG_BD_REF_PIC_PRED_TAP_6_4(x) (((x) & 0x3) << 0) +#define VDPU_REG_FILTER_LEVEL 0x204 +#define VDPU_REG_REF_PIC_LF_LEVEL_0(x) (((x) & 0x3f) << 18) +#define VDPU_REG_REF_PIC_LF_LEVEL_1(x) (((x) & 0x3f) << 12) +#define VDPU_REG_REF_PIC_LF_LEVEL_2(x) (((x) & 0x3f) << 6) +#define VDPU_REG_REF_PIC_LF_LEVEL_3(x) (((x) & 0x3f) << 0) +#define VDPU_REG_VP8_QUANTER0 0x208 +#define VDPU_REG_REF_PIC_QUANT_DELTA_0(x) (((x) & 0x1f) << 27) +#define VDPU_REG_REF_PIC_QUANT_DELTA_1(x) (((x) & 0x1f) << 22) +#define VDPU_REG_REF_PIC_QUANT_0(x) (((x) & 0x7ff) << 11) +#define VDPU_REG_REF_PIC_QUANT_1(x) (((x) & 0x7ff) << 0) +#define VDPU_REG_VP8_ADDR_REF0 0x20c +#define VDPU_REG_FILTER_MB_ADJ 0x210 +#define VDPU_REG_REF_PIC_FILT_TYPE_E BIT(31) +#define VDPU_REG_REF_PIC_FILT_SHARPNESS(x) (((x) & 0x7) << 28) +#define VDPU_REG_FILT_MB_ADJ_0(x) (((x) & 0x7f) << 21) +#define VDPU_REG_FILT_MB_ADJ_1(x) (((x) & 0x7f) << 14) +#define VDPU_REG_FILT_MB_ADJ_2(x) (((x) & 0x7f) << 7) +#define VDPU_REG_FILT_MB_ADJ_3(x) (((x) & 0x7f) << 0) +#define VDPU_REG_FILTER_REF_ADJ 0x214 +#define VDPU_REG_REF_PIC_ADJ_0(x) (((x) & 0x7f) << 21) +#define VDPU_REG_REF_PIC_ADJ_1(x) (((x) & 0x7f) << 14) +#define VDPU_REG_REF_PIC_ADJ_2(x) (((x) & 0x7f) << 7) +#define VDPU_REG_REF_PIC_ADJ_3(x) (((x) & 0x7f) << 0) +#define VDPU_REG_VP8_ADDR_REF2_5(i) (0x218 + ((i) * 0x4)) +#define VDPU_REG_VP8_GREF_SIGN_BIAS BIT(0) +#define VDPU_REG_VP8_AREF_SIGN_BIAS BIT(0) +#define VDPU_REG_VP8_DCT_BASE(i) (0x230 + ((i) * 0x4)) +#define VDPU_REG_VP8_ADDR_CTRL_PART 0x244 +#define VDPU_REG_VP8_ADDR_REF1 0x250 +#define VDPU_REG_VP8_SEGMENT_VAL 0x254 +#define VDPU_REG_FWD_PIC1_SEGMENT_BASE(x) ((x) << 0) +#define VDPU_REG_FWD_PIC1_SEGMENT_UPD_E BIT(1) +#define VDPU_REG_FWD_PIC1_SEGMENT_E BIT(0) +#define VDPU_REG_VP8_DCT_START_BIT2 0x258 +#define VDPU_REG_DEC_CTRL7_DCT3_START_BIT(x) (((x) & 0x3f) << 24) +#define VDPU_REG_DEC_CTRL7_DCT4_START_BIT(x) (((x) & 0x3f) << 18) +#define VDPU_REG_DEC_CTRL7_DCT5_START_BIT(x) (((x) & 0x3f) << 12) +#define VDPU_REG_DEC_CTRL7_DCT6_START_BIT(x) (((x) & 0x3f) << 6) +#define VDPU_REG_DEC_CTRL7_DCT7_START_BIT(x) (((x) & 0x3f) << 0) +#define VDPU_REG_VP8_QUANTER1 0x25c +#define VDPU_REG_REF_PIC_QUANT_DELTA_2(x) (((x) & 0x1f) << 27) +#define VDPU_REG_REF_PIC_QUANT_DELTA_3(x) (((x) & 0x1f) << 22) +#define VDPU_REG_REF_PIC_QUANT_2(x) (((x) & 0x7ff) << 11) +#define VDPU_REG_REF_PIC_QUANT_3(x) (((x) & 0x7ff) << 0) +#define VDPU_REG_VP8_QUANTER2 0x260 +#define VDPU_REG_REF_PIC_QUANT_DELTA_4(x) (((x) & 0x1f) << 27) +#define VDPU_REG_REF_PIC_QUANT_4(x) (((x) & 0x7ff) << 11) +#define VDPU_REG_REF_PIC_QUANT_5(x) (((x) & 0x7ff) << 0) +#define VDPU_REG_PRED_FLT1 0x264 +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_0_3(x) (((x) & 0x3ff) << 22) +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_1_0(x) (((x) & 0x3ff) << 12) +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_1_1(x) (((x) & 0x3ff) << 2) +#define VDPU_REG_PRED_FLT2 0x268 +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_1_2(x) (((x) & 0x3ff) << 22) +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_1_3(x) (((x) & 0x3ff) << 12) +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_2_0(x) (((x) & 0x3ff) << 2) +#define VDPU_REG_PRED_FLT3 0x26c +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_2_1(x) (((x) & 0x3ff) << 22) +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_2_2(x) (((x) & 0x3ff) << 12) +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_2_3(x) (((x) & 0x3ff) << 2) +#define VDPU_REG_PRED_FLT4 0x270 +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_3_0(x) (((x) & 0x3ff) << 22) +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_3_1(x) (((x) & 0x3ff) << 12) +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_3_2(x) (((x) & 0x3ff) << 2) +#define VDPU_REG_PRED_FLT5 0x274 +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_3_3(x) (((x) & 0x3ff) << 22) +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_4_0(x) (((x) & 0x3ff) << 12) +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_4_1(x) (((x) & 0x3ff) << 2) +#define VDPU_REG_PRED_FLT6 0x278 +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_4_2(x) (((x) & 0x3ff) << 22) +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_4_3(x) (((x) & 0x3ff) << 12) +#define VDPU_REG_PRED_FLT_PRED_BC_TAP_5_0(x) (((x) & 0x3ff) << 2) + +#endif /* ROCKCHIP_VPU2_REGS_H_ */ diff --git a/drivers/media/platform/verisilicon/rockchip_vpu_hw.c b/drivers/media/platform/verisilicon/rockchip_vpu_hw.c new file mode 100644 index 000000000..8de6fd2e8 --- /dev/null +++ b/drivers/media/platform/verisilicon/rockchip_vpu_hw.c @@ -0,0 +1,680 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hantro VPU codec driver + * + * Copyright (C) 2018 Rockchip Electronics Co., Ltd. + * Jeffy Chen <jeffy.chen@rock-chips.com> + */ + +#include <linux/clk.h> + +#include "hantro.h" +#include "hantro_jpeg.h" +#include "hantro_g1_regs.h" +#include "hantro_h1_regs.h" +#include "rockchip_vpu2_regs.h" + +#define RK3066_ACLK_MAX_FREQ (300 * 1000 * 1000) +#define RK3288_ACLK_MAX_FREQ (400 * 1000 * 1000) + +/* + * Supported formats. + */ + +static const struct hantro_fmt rockchip_vpu_enc_fmts[] = { + { + .fourcc = V4L2_PIX_FMT_YUV420M, + .codec_mode = HANTRO_MODE_NONE, + .enc_fmt = ROCKCHIP_VPU_ENC_FMT_YUV420P, + }, + { + .fourcc = V4L2_PIX_FMT_NV12M, + .codec_mode = HANTRO_MODE_NONE, + .enc_fmt = ROCKCHIP_VPU_ENC_FMT_YUV420SP, + }, + { + .fourcc = V4L2_PIX_FMT_YUYV, + .codec_mode = HANTRO_MODE_NONE, + .enc_fmt = ROCKCHIP_VPU_ENC_FMT_YUYV422, + }, + { + .fourcc = V4L2_PIX_FMT_UYVY, + .codec_mode = HANTRO_MODE_NONE, + .enc_fmt = ROCKCHIP_VPU_ENC_FMT_UYVY422, + }, + { + .fourcc = V4L2_PIX_FMT_JPEG, + .codec_mode = HANTRO_MODE_JPEG_ENC, + .max_depth = 2, + .header_size = JPEG_HEADER_SIZE, + .frmsize = { + .min_width = 96, + .max_width = 8192, + .step_width = MB_DIM, + .min_height = 32, + .max_height = 8192, + .step_height = MB_DIM, + }, + }, +}; + +static const struct hantro_fmt rockchip_vpu1_postproc_fmts[] = { + { + .fourcc = V4L2_PIX_FMT_YUYV, + .codec_mode = HANTRO_MODE_NONE, + .postprocessed = true, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_FHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_FHD_HEIGHT, + .step_height = MB_DIM, + }, + }, +}; + +static const struct hantro_fmt rk3066_vpu_dec_fmts[] = { + { + .fourcc = V4L2_PIX_FMT_NV12, + .codec_mode = HANTRO_MODE_NONE, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_FHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_FHD_HEIGHT, + .step_height = MB_DIM, + }, + }, + { + .fourcc = V4L2_PIX_FMT_H264_SLICE, + .codec_mode = HANTRO_MODE_H264_DEC, + .max_depth = 2, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_FHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_FHD_HEIGHT, + .step_height = MB_DIM, + }, + }, + { + .fourcc = V4L2_PIX_FMT_MPEG2_SLICE, + .codec_mode = HANTRO_MODE_MPEG2_DEC, + .max_depth = 2, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_FHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_FHD_HEIGHT, + .step_height = MB_DIM, + }, + }, + { + .fourcc = V4L2_PIX_FMT_VP8_FRAME, + .codec_mode = HANTRO_MODE_VP8_DEC, + .max_depth = 2, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_FHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_FHD_HEIGHT, + .step_height = MB_DIM, + }, + }, +}; + +static const struct hantro_fmt rk3288_vpu_dec_fmts[] = { + { + .fourcc = V4L2_PIX_FMT_NV12, + .codec_mode = HANTRO_MODE_NONE, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_4K_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_4K_HEIGHT, + .step_height = MB_DIM, + }, + }, + { + .fourcc = V4L2_PIX_FMT_H264_SLICE, + .codec_mode = HANTRO_MODE_H264_DEC, + .max_depth = 2, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_4K_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_4K_HEIGHT, + .step_height = MB_DIM, + }, + }, + { + .fourcc = V4L2_PIX_FMT_MPEG2_SLICE, + .codec_mode = HANTRO_MODE_MPEG2_DEC, + .max_depth = 2, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_FHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_FHD_HEIGHT, + .step_height = MB_DIM, + }, + }, + { + .fourcc = V4L2_PIX_FMT_VP8_FRAME, + .codec_mode = HANTRO_MODE_VP8_DEC, + .max_depth = 2, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = MB_DIM, + }, + }, +}; + +static const struct hantro_fmt rockchip_vdpu2_dec_fmts[] = { + { + .fourcc = V4L2_PIX_FMT_NV12, + .codec_mode = HANTRO_MODE_NONE, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_FHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_FHD_HEIGHT, + .step_height = MB_DIM, + }, + }, + { + .fourcc = V4L2_PIX_FMT_H264_SLICE, + .codec_mode = HANTRO_MODE_H264_DEC, + .max_depth = 2, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_FHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_FHD_HEIGHT, + .step_height = MB_DIM, + }, + }, + { + .fourcc = V4L2_PIX_FMT_MPEG2_SLICE, + .codec_mode = HANTRO_MODE_MPEG2_DEC, + .max_depth = 2, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_FHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_FHD_HEIGHT, + .step_height = MB_DIM, + }, + }, + { + .fourcc = V4L2_PIX_FMT_VP8_FRAME, + .codec_mode = HANTRO_MODE_VP8_DEC, + .max_depth = 2, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = MB_DIM, + }, + }, +}; + +static const struct hantro_fmt rk3399_vpu_dec_fmts[] = { + { + .fourcc = V4L2_PIX_FMT_NV12, + .codec_mode = HANTRO_MODE_NONE, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_FHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_FHD_HEIGHT, + .step_height = MB_DIM, + }, + }, + { + .fourcc = V4L2_PIX_FMT_MPEG2_SLICE, + .codec_mode = HANTRO_MODE_MPEG2_DEC, + .max_depth = 2, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_FHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_FHD_HEIGHT, + .step_height = MB_DIM, + }, + }, + { + .fourcc = V4L2_PIX_FMT_VP8_FRAME, + .codec_mode = HANTRO_MODE_VP8_DEC, + .max_depth = 2, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = MB_DIM, + }, + }, +}; + +static irqreturn_t rockchip_vpu1_vepu_irq(int irq, void *dev_id) +{ + struct hantro_dev *vpu = dev_id; + enum vb2_buffer_state state; + u32 status; + + status = vepu_read(vpu, H1_REG_INTERRUPT); + state = (status & H1_REG_INTERRUPT_FRAME_RDY) ? + VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR; + + vepu_write(vpu, 0, H1_REG_INTERRUPT); + vepu_write(vpu, 0, H1_REG_AXI_CTRL); + + hantro_irq_done(vpu, state); + + return IRQ_HANDLED; +} + +static irqreturn_t rockchip_vpu2_vdpu_irq(int irq, void *dev_id) +{ + struct hantro_dev *vpu = dev_id; + enum vb2_buffer_state state; + u32 status; + + status = vdpu_read(vpu, VDPU_REG_INTERRUPT); + state = (status & VDPU_REG_INTERRUPT_DEC_IRQ) ? + VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR; + + vdpu_write(vpu, 0, VDPU_REG_INTERRUPT); + vdpu_write(vpu, 0, VDPU_REG_AXI_CTRL); + + hantro_irq_done(vpu, state); + + return IRQ_HANDLED; +} + +static irqreturn_t rockchip_vpu2_vepu_irq(int irq, void *dev_id) +{ + struct hantro_dev *vpu = dev_id; + enum vb2_buffer_state state; + u32 status; + + status = vepu_read(vpu, VEPU_REG_INTERRUPT); + state = (status & VEPU_REG_INTERRUPT_FRAME_READY) ? + VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR; + + vepu_write(vpu, 0, VEPU_REG_INTERRUPT); + vepu_write(vpu, 0, VEPU_REG_AXI_CTRL); + + hantro_irq_done(vpu, state); + + return IRQ_HANDLED; +} + +static int rk3036_vpu_hw_init(struct hantro_dev *vpu) +{ + /* Bump ACLK to max. possible freq. to improve performance. */ + clk_set_rate(vpu->clocks[0].clk, RK3066_ACLK_MAX_FREQ); + return 0; +} + +static int rk3066_vpu_hw_init(struct hantro_dev *vpu) +{ + /* Bump ACLKs to max. possible freq. to improve performance. */ + clk_set_rate(vpu->clocks[0].clk, RK3066_ACLK_MAX_FREQ); + clk_set_rate(vpu->clocks[2].clk, RK3066_ACLK_MAX_FREQ); + return 0; +} + +static int rockchip_vpu_hw_init(struct hantro_dev *vpu) +{ + /* Bump ACLK to max. possible freq. to improve performance. */ + clk_set_rate(vpu->clocks[0].clk, RK3288_ACLK_MAX_FREQ); + return 0; +} + +static void rk3066_vpu_dec_reset(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + + vdpu_write(vpu, G1_REG_INTERRUPT_DEC_IRQ_DIS, G1_REG_INTERRUPT); + vdpu_write(vpu, G1_REG_CONFIG_DEC_CLK_GATE_E, G1_REG_CONFIG); +} + +static void rockchip_vpu1_enc_reset(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + + vepu_write(vpu, H1_REG_INTERRUPT_DIS_BIT, H1_REG_INTERRUPT); + vepu_write(vpu, 0, H1_REG_ENC_CTRL); + vepu_write(vpu, 0, H1_REG_AXI_CTRL); +} + +static void rockchip_vpu2_dec_reset(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + + vdpu_write(vpu, VDPU_REG_INTERRUPT_DEC_IRQ_DIS, VDPU_REG_INTERRUPT); + vdpu_write(vpu, 0, VDPU_REG_EN_FLAGS); + vdpu_write(vpu, 1, VDPU_REG_SOFT_RESET); +} + +static void rockchip_vpu2_enc_reset(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + + vepu_write(vpu, VEPU_REG_INTERRUPT_DIS_BIT, VEPU_REG_INTERRUPT); + vepu_write(vpu, 0, VEPU_REG_ENCODE_START); + vepu_write(vpu, 0, VEPU_REG_AXI_CTRL); +} + +/* + * Supported codec ops. + */ +static const struct hantro_codec_ops rk3036_vpu_codec_ops[] = { + [HANTRO_MODE_H264_DEC] = { + .run = hantro_g1_h264_dec_run, + .reset = hantro_g1_reset, + .init = hantro_h264_dec_init, + .exit = hantro_h264_dec_exit, + }, + [HANTRO_MODE_MPEG2_DEC] = { + .run = hantro_g1_mpeg2_dec_run, + .reset = hantro_g1_reset, + .init = hantro_mpeg2_dec_init, + .exit = hantro_mpeg2_dec_exit, + }, + [HANTRO_MODE_VP8_DEC] = { + .run = hantro_g1_vp8_dec_run, + .reset = hantro_g1_reset, + .init = hantro_vp8_dec_init, + .exit = hantro_vp8_dec_exit, + }, +}; + +static const struct hantro_codec_ops rk3066_vpu_codec_ops[] = { + [HANTRO_MODE_JPEG_ENC] = { + .run = hantro_h1_jpeg_enc_run, + .reset = rockchip_vpu1_enc_reset, + .done = hantro_h1_jpeg_enc_done, + }, + [HANTRO_MODE_H264_DEC] = { + .run = hantro_g1_h264_dec_run, + .reset = rk3066_vpu_dec_reset, + .init = hantro_h264_dec_init, + .exit = hantro_h264_dec_exit, + }, + [HANTRO_MODE_MPEG2_DEC] = { + .run = hantro_g1_mpeg2_dec_run, + .reset = rk3066_vpu_dec_reset, + .init = hantro_mpeg2_dec_init, + .exit = hantro_mpeg2_dec_exit, + }, + [HANTRO_MODE_VP8_DEC] = { + .run = hantro_g1_vp8_dec_run, + .reset = rk3066_vpu_dec_reset, + .init = hantro_vp8_dec_init, + .exit = hantro_vp8_dec_exit, + }, +}; + +static const struct hantro_codec_ops rk3288_vpu_codec_ops[] = { + [HANTRO_MODE_JPEG_ENC] = { + .run = hantro_h1_jpeg_enc_run, + .reset = rockchip_vpu1_enc_reset, + .done = hantro_h1_jpeg_enc_done, + }, + [HANTRO_MODE_H264_DEC] = { + .run = hantro_g1_h264_dec_run, + .reset = hantro_g1_reset, + .init = hantro_h264_dec_init, + .exit = hantro_h264_dec_exit, + }, + [HANTRO_MODE_MPEG2_DEC] = { + .run = hantro_g1_mpeg2_dec_run, + .reset = hantro_g1_reset, + .init = hantro_mpeg2_dec_init, + .exit = hantro_mpeg2_dec_exit, + }, + [HANTRO_MODE_VP8_DEC] = { + .run = hantro_g1_vp8_dec_run, + .reset = hantro_g1_reset, + .init = hantro_vp8_dec_init, + .exit = hantro_vp8_dec_exit, + }, +}; + +static const struct hantro_codec_ops rk3399_vpu_codec_ops[] = { + [HANTRO_MODE_JPEG_ENC] = { + .run = rockchip_vpu2_jpeg_enc_run, + .reset = rockchip_vpu2_enc_reset, + .done = rockchip_vpu2_jpeg_enc_done, + }, + [HANTRO_MODE_H264_DEC] = { + .run = rockchip_vpu2_h264_dec_run, + .reset = rockchip_vpu2_dec_reset, + .init = hantro_h264_dec_init, + .exit = hantro_h264_dec_exit, + }, + [HANTRO_MODE_MPEG2_DEC] = { + .run = rockchip_vpu2_mpeg2_dec_run, + .reset = rockchip_vpu2_dec_reset, + .init = hantro_mpeg2_dec_init, + .exit = hantro_mpeg2_dec_exit, + }, + [HANTRO_MODE_VP8_DEC] = { + .run = rockchip_vpu2_vp8_dec_run, + .reset = rockchip_vpu2_dec_reset, + .init = hantro_vp8_dec_init, + .exit = hantro_vp8_dec_exit, + }, +}; + +static const struct hantro_codec_ops rk3568_vepu_codec_ops[] = { + [HANTRO_MODE_JPEG_ENC] = { + .run = rockchip_vpu2_jpeg_enc_run, + .reset = rockchip_vpu2_enc_reset, + .done = rockchip_vpu2_jpeg_enc_done, + }, +}; + +/* + * VPU variant. + */ + +static const struct hantro_irq rockchip_vdpu1_irqs[] = { + { "vdpu", hantro_g1_irq }, +}; + +static const struct hantro_irq rockchip_vpu1_irqs[] = { + { "vepu", rockchip_vpu1_vepu_irq }, + { "vdpu", hantro_g1_irq }, +}; + +static const struct hantro_irq rockchip_vdpu2_irqs[] = { + { "vdpu", rockchip_vpu2_vdpu_irq }, +}; + +static const struct hantro_irq rockchip_vpu2_irqs[] = { + { "vepu", rockchip_vpu2_vepu_irq }, + { "vdpu", rockchip_vpu2_vdpu_irq }, +}; + +static const struct hantro_irq rk3568_vepu_irqs[] = { + { "vepu", rockchip_vpu2_vepu_irq }, +}; + +static const char * const rk3066_vpu_clk_names[] = { + "aclk_vdpu", "hclk_vdpu", + "aclk_vepu", "hclk_vepu" +}; + +static const char * const rockchip_vpu_clk_names[] = { + "aclk", "hclk" +}; + +/* VDPU1/VEPU1 */ + +const struct hantro_variant rk3036_vpu_variant = { + .dec_offset = 0x400, + .dec_fmts = rk3066_vpu_dec_fmts, + .num_dec_fmts = ARRAY_SIZE(rk3066_vpu_dec_fmts), + .postproc_fmts = rockchip_vpu1_postproc_fmts, + .num_postproc_fmts = ARRAY_SIZE(rockchip_vpu1_postproc_fmts), + .postproc_ops = &hantro_g1_postproc_ops, + .codec = HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER | + HANTRO_H264_DECODER, + .codec_ops = rk3036_vpu_codec_ops, + .irqs = rockchip_vdpu1_irqs, + .num_irqs = ARRAY_SIZE(rockchip_vdpu1_irqs), + .init = rk3036_vpu_hw_init, + .clk_names = rockchip_vpu_clk_names, + .num_clocks = ARRAY_SIZE(rockchip_vpu_clk_names) +}; + +/* + * Despite this variant has separate clocks for decoder and encoder, + * it's still required to enable all four of them for either decoding + * or encoding and we can't split it in separate g1/h1 variants. + */ +const struct hantro_variant rk3066_vpu_variant = { + .enc_offset = 0x0, + .enc_fmts = rockchip_vpu_enc_fmts, + .num_enc_fmts = ARRAY_SIZE(rockchip_vpu_enc_fmts), + .dec_offset = 0x400, + .dec_fmts = rk3066_vpu_dec_fmts, + .num_dec_fmts = ARRAY_SIZE(rk3066_vpu_dec_fmts), + .postproc_fmts = rockchip_vpu1_postproc_fmts, + .num_postproc_fmts = ARRAY_SIZE(rockchip_vpu1_postproc_fmts), + .postproc_ops = &hantro_g1_postproc_ops, + .codec = HANTRO_JPEG_ENCODER | HANTRO_MPEG2_DECODER | + HANTRO_VP8_DECODER | HANTRO_H264_DECODER, + .codec_ops = rk3066_vpu_codec_ops, + .irqs = rockchip_vpu1_irqs, + .num_irqs = ARRAY_SIZE(rockchip_vpu1_irqs), + .init = rk3066_vpu_hw_init, + .clk_names = rk3066_vpu_clk_names, + .num_clocks = ARRAY_SIZE(rk3066_vpu_clk_names) +}; + +const struct hantro_variant rk3288_vpu_variant = { + .enc_offset = 0x0, + .enc_fmts = rockchip_vpu_enc_fmts, + .num_enc_fmts = ARRAY_SIZE(rockchip_vpu_enc_fmts), + .dec_offset = 0x400, + .dec_fmts = rk3288_vpu_dec_fmts, + .num_dec_fmts = ARRAY_SIZE(rk3288_vpu_dec_fmts), + .postproc_fmts = rockchip_vpu1_postproc_fmts, + .num_postproc_fmts = ARRAY_SIZE(rockchip_vpu1_postproc_fmts), + .postproc_ops = &hantro_g1_postproc_ops, + .codec = HANTRO_JPEG_ENCODER | HANTRO_MPEG2_DECODER | + HANTRO_VP8_DECODER | HANTRO_H264_DECODER, + .codec_ops = rk3288_vpu_codec_ops, + .irqs = rockchip_vpu1_irqs, + .num_irqs = ARRAY_SIZE(rockchip_vpu1_irqs), + .init = rockchip_vpu_hw_init, + .clk_names = rockchip_vpu_clk_names, + .num_clocks = ARRAY_SIZE(rockchip_vpu_clk_names) +}; + +/* VDPU2/VEPU2 */ + +const struct hantro_variant rk3328_vpu_variant = { + .dec_offset = 0x400, + .dec_fmts = rockchip_vdpu2_dec_fmts, + .num_dec_fmts = ARRAY_SIZE(rockchip_vdpu2_dec_fmts), + .codec = HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER | + HANTRO_H264_DECODER, + .codec_ops = rk3399_vpu_codec_ops, + .irqs = rockchip_vdpu2_irqs, + .num_irqs = ARRAY_SIZE(rockchip_vdpu2_irqs), + .init = rockchip_vpu_hw_init, + .clk_names = rockchip_vpu_clk_names, + .num_clocks = ARRAY_SIZE(rockchip_vpu_clk_names), +}; + +/* + * H.264 decoding explicitly disabled in RK3399. + * This ensures userspace applications use the Rockchip VDEC core, + * which has better performance. + */ +const struct hantro_variant rk3399_vpu_variant = { + .enc_offset = 0x0, + .enc_fmts = rockchip_vpu_enc_fmts, + .num_enc_fmts = ARRAY_SIZE(rockchip_vpu_enc_fmts), + .dec_offset = 0x400, + .dec_fmts = rk3399_vpu_dec_fmts, + .num_dec_fmts = ARRAY_SIZE(rk3399_vpu_dec_fmts), + .codec = HANTRO_JPEG_ENCODER | HANTRO_MPEG2_DECODER | + HANTRO_VP8_DECODER, + .codec_ops = rk3399_vpu_codec_ops, + .irqs = rockchip_vpu2_irqs, + .num_irqs = ARRAY_SIZE(rockchip_vpu2_irqs), + .init = rockchip_vpu_hw_init, + .clk_names = rockchip_vpu_clk_names, + .num_clocks = ARRAY_SIZE(rockchip_vpu_clk_names) +}; + +const struct hantro_variant rk3568_vepu_variant = { + .enc_offset = 0x0, + .enc_fmts = rockchip_vpu_enc_fmts, + .num_enc_fmts = ARRAY_SIZE(rockchip_vpu_enc_fmts), + .codec = HANTRO_JPEG_ENCODER, + .codec_ops = rk3568_vepu_codec_ops, + .irqs = rk3568_vepu_irqs, + .num_irqs = ARRAY_SIZE(rk3568_vepu_irqs), + .init = rockchip_vpu_hw_init, + .clk_names = rockchip_vpu_clk_names, + .num_clocks = ARRAY_SIZE(rockchip_vpu_clk_names) +}; + +const struct hantro_variant rk3568_vpu_variant = { + .dec_offset = 0x400, + .dec_fmts = rockchip_vdpu2_dec_fmts, + .num_dec_fmts = ARRAY_SIZE(rockchip_vdpu2_dec_fmts), + .codec = HANTRO_MPEG2_DECODER | + HANTRO_VP8_DECODER | HANTRO_H264_DECODER, + .codec_ops = rk3399_vpu_codec_ops, + .irqs = rockchip_vdpu2_irqs, + .num_irqs = ARRAY_SIZE(rockchip_vdpu2_irqs), + .init = rockchip_vpu_hw_init, + .clk_names = rockchip_vpu_clk_names, + .num_clocks = ARRAY_SIZE(rockchip_vpu_clk_names) +}; + +const struct hantro_variant px30_vpu_variant = { + .enc_offset = 0x0, + .enc_fmts = rockchip_vpu_enc_fmts, + .num_enc_fmts = ARRAY_SIZE(rockchip_vpu_enc_fmts), + .dec_offset = 0x400, + .dec_fmts = rockchip_vdpu2_dec_fmts, + .num_dec_fmts = ARRAY_SIZE(rockchip_vdpu2_dec_fmts), + .codec = HANTRO_JPEG_ENCODER | HANTRO_MPEG2_DECODER | + HANTRO_VP8_DECODER | HANTRO_H264_DECODER, + .codec_ops = rk3399_vpu_codec_ops, + .irqs = rockchip_vpu2_irqs, + .num_irqs = ARRAY_SIZE(rockchip_vpu2_irqs), + .init = rk3036_vpu_hw_init, + .clk_names = rockchip_vpu_clk_names, + .num_clocks = ARRAY_SIZE(rockchip_vpu_clk_names) +}; diff --git a/drivers/media/platform/verisilicon/sama5d4_vdec_hw.c b/drivers/media/platform/verisilicon/sama5d4_vdec_hw.c new file mode 100644 index 000000000..b205e2db5 --- /dev/null +++ b/drivers/media/platform/verisilicon/sama5d4_vdec_hw.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hantro VDEC driver + * + * Copyright (C) 2021 Collabora Ltd, Emil Velikov <emil.velikov@collabora.com> + */ + +#include "hantro.h" + +/* + * Supported formats. + */ + +static const struct hantro_fmt sama5d4_vdec_postproc_fmts[] = { + { + .fourcc = V4L2_PIX_FMT_YUYV, + .codec_mode = HANTRO_MODE_NONE, + .postprocessed = true, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_HD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_HD_HEIGHT, + .step_height = MB_DIM, + }, + }, +}; + +static const struct hantro_fmt sama5d4_vdec_fmts[] = { + { + .fourcc = V4L2_PIX_FMT_NV12, + .codec_mode = HANTRO_MODE_NONE, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_HD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_HD_HEIGHT, + .step_height = MB_DIM, + }, + }, + { + .fourcc = V4L2_PIX_FMT_MPEG2_SLICE, + .codec_mode = HANTRO_MODE_MPEG2_DEC, + .max_depth = 2, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_HD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_HD_HEIGHT, + .step_height = MB_DIM, + }, + }, + { + .fourcc = V4L2_PIX_FMT_VP8_FRAME, + .codec_mode = HANTRO_MODE_VP8_DEC, + .max_depth = 2, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_HD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_HD_HEIGHT, + .step_height = MB_DIM, + }, + }, + { + .fourcc = V4L2_PIX_FMT_H264_SLICE, + .codec_mode = HANTRO_MODE_H264_DEC, + .max_depth = 2, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_HD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_HD_HEIGHT, + .step_height = MB_DIM, + }, + }, +}; + +/* + * Supported codec ops. + */ + +static const struct hantro_codec_ops sama5d4_vdec_codec_ops[] = { + [HANTRO_MODE_MPEG2_DEC] = { + .run = hantro_g1_mpeg2_dec_run, + .reset = hantro_g1_reset, + .init = hantro_mpeg2_dec_init, + .exit = hantro_mpeg2_dec_exit, + }, + [HANTRO_MODE_VP8_DEC] = { + .run = hantro_g1_vp8_dec_run, + .reset = hantro_g1_reset, + .init = hantro_vp8_dec_init, + .exit = hantro_vp8_dec_exit, + }, + [HANTRO_MODE_H264_DEC] = { + .run = hantro_g1_h264_dec_run, + .reset = hantro_g1_reset, + .init = hantro_h264_dec_init, + .exit = hantro_h264_dec_exit, + }, +}; + +static const struct hantro_irq sama5d4_irqs[] = { + { "vdec", hantro_g1_irq }, +}; + +static const char * const sama5d4_clk_names[] = { "vdec_clk" }; + +const struct hantro_variant sama5d4_vdec_variant = { + .dec_fmts = sama5d4_vdec_fmts, + .num_dec_fmts = ARRAY_SIZE(sama5d4_vdec_fmts), + .postproc_fmts = sama5d4_vdec_postproc_fmts, + .num_postproc_fmts = ARRAY_SIZE(sama5d4_vdec_postproc_fmts), + .postproc_ops = &hantro_g1_postproc_ops, + .codec = HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER | + HANTRO_H264_DECODER, + .codec_ops = sama5d4_vdec_codec_ops, + .irqs = sama5d4_irqs, + .num_irqs = ARRAY_SIZE(sama5d4_irqs), + .clk_names = sama5d4_clk_names, + .num_clocks = ARRAY_SIZE(sama5d4_clk_names), +}; diff --git a/drivers/media/platform/verisilicon/sunxi_vpu_hw.c b/drivers/media/platform/verisilicon/sunxi_vpu_hw.c new file mode 100644 index 000000000..02ce8b064 --- /dev/null +++ b/drivers/media/platform/verisilicon/sunxi_vpu_hw.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Allwinner Hantro G2 VPU codec driver + * + * Copyright (C) 2021 Jernej Skrabec <jernej.skrabec@gmail.com> + */ + +#include <linux/clk.h> + +#include "hantro.h" + +static const struct hantro_fmt sunxi_vpu_postproc_fmts[] = { + { + .fourcc = V4L2_PIX_FMT_NV12, + .codec_mode = HANTRO_MODE_NONE, + .postprocessed = true, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, + .step_width = 32, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = 32, + }, + }, + { + .fourcc = V4L2_PIX_FMT_P010, + .codec_mode = HANTRO_MODE_NONE, + .postprocessed = true, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, + .step_width = 32, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = 32, + }, + }, +}; + +static const struct hantro_fmt sunxi_vpu_dec_fmts[] = { + { + .fourcc = V4L2_PIX_FMT_NV12_4L4, + .codec_mode = HANTRO_MODE_NONE, + .match_depth = true, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, + .step_width = 32, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = 32, + }, + }, + { + .fourcc = V4L2_PIX_FMT_P010_4L4, + .codec_mode = HANTRO_MODE_NONE, + .match_depth = true, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, + .step_width = 32, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = 32, + }, + }, + { + .fourcc = V4L2_PIX_FMT_VP9_FRAME, + .codec_mode = HANTRO_MODE_VP9_DEC, + .max_depth = 2, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, + .step_width = 32, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = 32, + }, + }, +}; + +static int sunxi_vpu_hw_init(struct hantro_dev *vpu) +{ + clk_set_rate(vpu->clocks[0].clk, 300000000); + + return 0; +} + +static void sunxi_vpu_reset(struct hantro_ctx *ctx) +{ + struct hantro_dev *vpu = ctx->dev; + + reset_control_reset(vpu->resets); +} + +static const struct hantro_codec_ops sunxi_vpu_codec_ops[] = { + [HANTRO_MODE_VP9_DEC] = { + .run = hantro_g2_vp9_dec_run, + .done = hantro_g2_vp9_dec_done, + .reset = sunxi_vpu_reset, + .init = hantro_vp9_dec_init, + .exit = hantro_vp9_dec_exit, + }, +}; + +static const struct hantro_irq sunxi_irqs[] = { + { NULL, hantro_g2_irq }, +}; + +static const char * const sunxi_clk_names[] = { "mod", "bus" }; + +const struct hantro_variant sunxi_vpu_variant = { + .dec_fmts = sunxi_vpu_dec_fmts, + .num_dec_fmts = ARRAY_SIZE(sunxi_vpu_dec_fmts), + .postproc_fmts = sunxi_vpu_postproc_fmts, + .num_postproc_fmts = ARRAY_SIZE(sunxi_vpu_postproc_fmts), + .postproc_ops = &hantro_g2_postproc_ops, + .codec = HANTRO_VP9_DECODER, + .codec_ops = sunxi_vpu_codec_ops, + .init = sunxi_vpu_hw_init, + .irqs = sunxi_irqs, + .num_irqs = ARRAY_SIZE(sunxi_irqs), + .clk_names = sunxi_clk_names, + .num_clocks = ARRAY_SIZE(sunxi_clk_names), + .double_buffer = 1, + .legacy_regs = 1, + .late_postproc = 1, +}; |