summaryrefslogtreecommitdiffstats
path: root/drivers/media/platform/verisilicon
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/media/platform/verisilicon')
-rw-r--r--drivers/media/platform/verisilicon/Kconfig54
-rw-r--r--drivers/media/platform/verisilicon/Makefile38
-rw-r--r--drivers/media/platform/verisilicon/hantro.h485
-rw-r--r--drivers/media/platform/verisilicon/hantro_drv.c1153
-rw-r--r--drivers/media/platform/verisilicon/hantro_g1.c39
-rw-r--r--drivers/media/platform/verisilicon/hantro_g1_h264_dec.c284
-rw-r--r--drivers/media/platform/verisilicon/hantro_g1_mpeg2_dec.c240
-rw-r--r--drivers/media/platform/verisilicon/hantro_g1_regs.h356
-rw-r--r--drivers/media/platform/verisilicon/hantro_g1_vp8_dec.c511
-rw-r--r--drivers/media/platform/verisilicon/hantro_g2.c44
-rw-r--r--drivers/media/platform/verisilicon/hantro_g2_hevc_dec.c627
-rw-r--r--drivers/media/platform/verisilicon/hantro_g2_regs.h325
-rw-r--r--drivers/media/platform/verisilicon/hantro_g2_vp9_dec.c1014
-rw-r--r--drivers/media/platform/verisilicon/hantro_h1_jpeg_enc.c166
-rw-r--r--drivers/media/platform/verisilicon/hantro_h1_regs.h154
-rw-r--r--drivers/media/platform/verisilicon/hantro_h264.c521
-rw-r--r--drivers/media/platform/verisilicon/hantro_hevc.c284
-rw-r--r--drivers/media/platform/verisilicon/hantro_hw.h441
-rw-r--r--drivers/media/platform/verisilicon/hantro_jpeg.c348
-rw-r--r--drivers/media/platform/verisilicon/hantro_jpeg.h15
-rw-r--r--drivers/media/platform/verisilicon/hantro_mpeg2.c61
-rw-r--r--drivers/media/platform/verisilicon/hantro_postproc.c284
-rw-r--r--drivers/media/platform/verisilicon/hantro_v4l2.c996
-rw-r--r--drivers/media/platform/verisilicon/hantro_v4l2.h29
-rw-r--r--drivers/media/platform/verisilicon/hantro_vp8.c201
-rw-r--r--drivers/media/platform/verisilicon/hantro_vp9.c240
-rw-r--r--drivers/media/platform/verisilicon/hantro_vp9.h102
-rw-r--r--drivers/media/platform/verisilicon/imx8m_vpu_hw.c400
-rw-r--r--drivers/media/platform/verisilicon/rockchip_vpu2_hw_h264_dec.c491
-rw-r--r--drivers/media/platform/verisilicon/rockchip_vpu2_hw_jpeg_enc.c197
-rw-r--r--drivers/media/platform/verisilicon/rockchip_vpu2_hw_mpeg2_dec.c248
-rw-r--r--drivers/media/platform/verisilicon/rockchip_vpu2_hw_vp8_dec.c600
-rw-r--r--drivers/media/platform/verisilicon/rockchip_vpu2_regs.h600
-rw-r--r--drivers/media/platform/verisilicon/rockchip_vpu_hw.c680
-rw-r--r--drivers/media/platform/verisilicon/sama5d4_vdec_hw.c128
-rw-r--r--drivers/media/platform/verisilicon/sunxi_vpu_hw.c129
36 files changed, 12485 insertions, 0 deletions
diff --git a/drivers/media/platform/verisilicon/Kconfig b/drivers/media/platform/verisilicon/Kconfig
new file mode 100644
index 000000000..e65b836b9
--- /dev/null
+++ b/drivers/media/platform/verisilicon/Kconfig
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+comment "Verisilicon media platform drivers"
+
+config VIDEO_HANTRO
+ tristate "Hantro VPU driver"
+ depends on ARCH_MXC || ARCH_ROCKCHIP || ARCH_AT91 || ARCH_SUNXI || COMPILE_TEST
+ depends on V4L_MEM2MEM_DRIVERS
+ depends on VIDEO_DEV
+ select MEDIA_CONTROLLER
+ select MEDIA_CONTROLLER_REQUEST_API
+ select VIDEOBUF2_DMA_CONTIG
+ select VIDEOBUF2_VMALLOC
+ select V4L2_MEM2MEM_DEV
+ select V4L2_H264
+ select V4L2_VP9
+ help
+ Support for the Hantro IP based Video Processing Units present on
+ Rockchip and NXP i.MX8M SoCs, which accelerate video and image
+ encoding and decoding.
+ To compile this driver as a module, choose M here: the module
+ will be called hantro-vpu.
+
+config VIDEO_HANTRO_IMX8M
+ bool "Hantro VPU i.MX8M support"
+ depends on VIDEO_HANTRO
+ depends on ARCH_MXC || COMPILE_TEST
+ default y
+ help
+ Enable support for i.MX8M SoCs.
+
+config VIDEO_HANTRO_SAMA5D4
+ bool "Hantro VDEC SAMA5D4 support"
+ depends on VIDEO_HANTRO
+ depends on ARCH_AT91 || COMPILE_TEST
+ default y
+ help
+ Enable support for Microchip SAMA5D4 SoCs.
+
+config VIDEO_HANTRO_ROCKCHIP
+ bool "Hantro VPU Rockchip support"
+ depends on VIDEO_HANTRO
+ depends on ARCH_ROCKCHIP || COMPILE_TEST
+ default y
+ help
+ Enable support for RK3288, RK3328, and RK3399 SoCs.
+
+config VIDEO_HANTRO_SUNXI
+ bool "Hantro VPU Allwinner support"
+ depends on VIDEO_HANTRO
+ depends on ARCH_SUNXI || COMPILE_TEST
+ default y
+ help
+ Enable support for H6 SoC.
diff --git a/drivers/media/platform/verisilicon/Makefile b/drivers/media/platform/verisilicon/Makefile
new file mode 100644
index 000000000..ebd5ede7b
--- /dev/null
+++ b/drivers/media/platform/verisilicon/Makefile
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_VIDEO_HANTRO) += hantro-vpu.o
+
+hantro-vpu-y += \
+ hantro_drv.o \
+ hantro_v4l2.o \
+ hantro_postproc.o \
+ hantro_h1_jpeg_enc.o \
+ hantro_g1.o \
+ hantro_g1_h264_dec.o \
+ hantro_g1_mpeg2_dec.o \
+ hantro_g1_vp8_dec.o \
+ hantro_g2.o \
+ hantro_g2_hevc_dec.o \
+ hantro_g2_vp9_dec.o \
+ rockchip_vpu2_hw_jpeg_enc.o \
+ rockchip_vpu2_hw_h264_dec.o \
+ rockchip_vpu2_hw_mpeg2_dec.o \
+ rockchip_vpu2_hw_vp8_dec.o \
+ hantro_jpeg.o \
+ hantro_h264.o \
+ hantro_hevc.o \
+ hantro_mpeg2.o \
+ hantro_vp8.o \
+ hantro_vp9.o
+
+hantro-vpu-$(CONFIG_VIDEO_HANTRO_IMX8M) += \
+ imx8m_vpu_hw.o
+
+hantro-vpu-$(CONFIG_VIDEO_HANTRO_SAMA5D4) += \
+ sama5d4_vdec_hw.o
+
+hantro-vpu-$(CONFIG_VIDEO_HANTRO_ROCKCHIP) += \
+ rockchip_vpu_hw.o
+
+hantro-vpu-$(CONFIG_VIDEO_HANTRO_SUNXI) += \
+ sunxi_vpu_hw.o
diff --git a/drivers/media/platform/verisilicon/hantro.h b/drivers/media/platform/verisilicon/hantro.h
new file mode 100644
index 000000000..2989ebc63
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro.h
@@ -0,0 +1,485 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Hantro VPU codec driver
+ *
+ * Copyright 2018 Google LLC.
+ * Tomasz Figa <tfiga@chromium.org>
+ *
+ * Based on s5p-mfc driver by Samsung Electronics Co., Ltd.
+ * Copyright (C) 2011 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef HANTRO_H_
+#define HANTRO_H_
+
+#include <linux/platform_device.h>
+#include <linux/videodev2.h>
+#include <linux/wait.h>
+#include <linux/clk.h>
+#include <linux/reset.h>
+
+#include <media/v4l2-ctrls.h>
+#include <media/v4l2-device.h>
+#include <media/v4l2-ioctl.h>
+#include <media/v4l2-mem2mem.h>
+#include <media/videobuf2-core.h>
+#include <media/videobuf2-dma-contig.h>
+
+#include "hantro_hw.h"
+
+struct hantro_ctx;
+struct hantro_codec_ops;
+struct hantro_postproc_ops;
+
+#define HANTRO_JPEG_ENCODER BIT(0)
+#define HANTRO_ENCODERS 0x0000ffff
+#define HANTRO_MPEG2_DECODER BIT(16)
+#define HANTRO_VP8_DECODER BIT(17)
+#define HANTRO_H264_DECODER BIT(18)
+#define HANTRO_HEVC_DECODER BIT(19)
+#define HANTRO_VP9_DECODER BIT(20)
+#define HANTRO_DECODERS 0xffff0000
+
+/**
+ * struct hantro_irq - irq handler and name
+ *
+ * @name: irq name for device tree lookup
+ * @handler: interrupt handler
+ */
+struct hantro_irq {
+ const char *name;
+ irqreturn_t (*handler)(int irq, void *priv);
+};
+
+/**
+ * struct hantro_variant - information about VPU hardware variant
+ *
+ * @enc_offset: Offset from VPU base to encoder registers.
+ * @dec_offset: Offset from VPU base to decoder registers.
+ * @enc_fmts: Encoder formats.
+ * @num_enc_fmts: Number of encoder formats.
+ * @dec_fmts: Decoder formats.
+ * @num_dec_fmts: Number of decoder formats.
+ * @postproc_fmts: Post-processor formats.
+ * @num_postproc_fmts: Number of post-processor formats.
+ * @postproc_ops: Post-processor ops.
+ * @codec: Supported codecs
+ * @codec_ops: Codec ops.
+ * @init: Initialize hardware, optional.
+ * @runtime_resume: reenable hardware after power gating, optional.
+ * @irqs: array of irq names and interrupt handlers
+ * @num_irqs: number of irqs in the array
+ * @clk_names: array of clock names
+ * @num_clocks: number of clocks in the array
+ * @reg_names: array of register range names
+ * @num_regs: number of register range names in the array
+ * @double_buffer: core needs double buffering
+ * @legacy_regs: core uses legacy register set
+ * @late_postproc: postproc must be set up at the end of the job
+ */
+struct hantro_variant {
+ unsigned int enc_offset;
+ unsigned int dec_offset;
+ const struct hantro_fmt *enc_fmts;
+ unsigned int num_enc_fmts;
+ const struct hantro_fmt *dec_fmts;
+ unsigned int num_dec_fmts;
+ const struct hantro_fmt *postproc_fmts;
+ unsigned int num_postproc_fmts;
+ const struct hantro_postproc_ops *postproc_ops;
+ unsigned int codec;
+ const struct hantro_codec_ops *codec_ops;
+ int (*init)(struct hantro_dev *vpu);
+ int (*runtime_resume)(struct hantro_dev *vpu);
+ const struct hantro_irq *irqs;
+ int num_irqs;
+ const char * const *clk_names;
+ int num_clocks;
+ const char * const *reg_names;
+ int num_regs;
+ unsigned int double_buffer : 1;
+ unsigned int legacy_regs : 1;
+ unsigned int late_postproc : 1;
+};
+
+/**
+ * enum hantro_codec_mode - codec operating mode.
+ * @HANTRO_MODE_NONE: No operating mode. Used for RAW video formats.
+ * @HANTRO_MODE_JPEG_ENC: JPEG encoder.
+ * @HANTRO_MODE_H264_DEC: H264 decoder.
+ * @HANTRO_MODE_MPEG2_DEC: MPEG-2 decoder.
+ * @HANTRO_MODE_VP8_DEC: VP8 decoder.
+ * @HANTRO_MODE_HEVC_DEC: HEVC decoder.
+ * @HANTRO_MODE_VP9_DEC: VP9 decoder.
+ */
+enum hantro_codec_mode {
+ HANTRO_MODE_NONE = -1,
+ HANTRO_MODE_JPEG_ENC,
+ HANTRO_MODE_H264_DEC,
+ HANTRO_MODE_MPEG2_DEC,
+ HANTRO_MODE_VP8_DEC,
+ HANTRO_MODE_HEVC_DEC,
+ HANTRO_MODE_VP9_DEC,
+};
+
+/*
+ * struct hantro_ctrl - helper type to declare supported controls
+ * @codec: codec id this control belong to (HANTRO_JPEG_ENCODER, etc.)
+ * @cfg: control configuration
+ */
+struct hantro_ctrl {
+ unsigned int codec;
+ struct v4l2_ctrl_config cfg;
+};
+
+/*
+ * struct hantro_func - Hantro VPU functionality
+ *
+ * @id: processing functionality ID (can be
+ * %MEDIA_ENT_F_PROC_VIDEO_ENCODER or
+ * %MEDIA_ENT_F_PROC_VIDEO_DECODER)
+ * @vdev: &struct video_device that exposes the encoder or
+ * decoder functionality
+ * @source_pad: &struct media_pad with the source pad.
+ * @sink: &struct media_entity pointer with the sink entity
+ * @sink_pad: &struct media_pad with the sink pad.
+ * @proc: &struct media_entity pointer with the M2M device itself.
+ * @proc_pads: &struct media_pad with the @proc pads.
+ * @intf_devnode: &struct media_intf devnode pointer with the interface
+ * with controls the M2M device.
+ *
+ * Contains everything needed to attach the video device to the media device.
+ */
+struct hantro_func {
+ unsigned int id;
+ struct video_device vdev;
+ struct media_pad source_pad;
+ struct media_entity sink;
+ struct media_pad sink_pad;
+ struct media_entity proc;
+ struct media_pad proc_pads[2];
+ struct media_intf_devnode *intf_devnode;
+};
+
+static inline struct hantro_func *
+hantro_vdev_to_func(struct video_device *vdev)
+{
+ return container_of(vdev, struct hantro_func, vdev);
+}
+
+/**
+ * struct hantro_dev - driver data
+ * @v4l2_dev: V4L2 device to register video devices for.
+ * @m2m_dev: mem2mem device associated to this device.
+ * @mdev: media device associated to this device.
+ * @encoder: encoder functionality.
+ * @decoder: decoder functionality.
+ * @pdev: Pointer to VPU platform device.
+ * @dev: Pointer to device for convenient logging using
+ * dev_ macros.
+ * @clocks: Array of clock handles.
+ * @resets: Array of reset handles.
+ * @reg_bases: Mapped addresses of VPU registers.
+ * @enc_base: Mapped address of VPU encoder register for convenience.
+ * @dec_base: Mapped address of VPU decoder register for convenience.
+ * @ctrl_base: Mapped address of VPU control block.
+ * @vpu_mutex: Mutex to synchronize V4L2 calls.
+ * @irqlock: Spinlock to synchronize access to data structures
+ * shared with interrupt handlers.
+ * @variant: Hardware variant-specific parameters.
+ * @watchdog_work: Delayed work for hardware timeout handling.
+ */
+struct hantro_dev {
+ struct v4l2_device v4l2_dev;
+ struct v4l2_m2m_dev *m2m_dev;
+ struct media_device mdev;
+ struct hantro_func *encoder;
+ struct hantro_func *decoder;
+ struct platform_device *pdev;
+ struct device *dev;
+ struct clk_bulk_data *clocks;
+ struct reset_control *resets;
+ void __iomem **reg_bases;
+ void __iomem *enc_base;
+ void __iomem *dec_base;
+ void __iomem *ctrl_base;
+
+ struct mutex vpu_mutex; /* video_device lock */
+ spinlock_t irqlock;
+ const struct hantro_variant *variant;
+ struct delayed_work watchdog_work;
+};
+
+/**
+ * struct hantro_ctx - Context (instance) private data.
+ *
+ * @dev: VPU driver data to which the context belongs.
+ * @fh: V4L2 file handler.
+ * @is_encoder: Decoder or encoder context?
+ *
+ * @sequence_cap: Sequence counter for capture queue
+ * @sequence_out: Sequence counter for output queue
+ *
+ * @vpu_src_fmt: Descriptor of active source format.
+ * @src_fmt: V4L2 pixel format of active source format.
+ * @vpu_dst_fmt: Descriptor of active destination format.
+ * @dst_fmt: V4L2 pixel format of active destination format.
+ *
+ * @ctrl_handler: Control handler used to register controls.
+ * @jpeg_quality: User-specified JPEG compression quality.
+ * @bit_depth: Bit depth of current frame
+ *
+ * @codec_ops: Set of operations related to codec mode.
+ * @postproc: Post-processing context.
+ * @h264_dec: H.264-decoding context.
+ * @jpeg_enc: JPEG-encoding context.
+ * @mpeg2_dec: MPEG-2-decoding context.
+ * @vp8_dec: VP8-decoding context.
+ * @hevc_dec: HEVC-decoding context.
+ * @vp9_dec: VP9-decoding context.
+ */
+struct hantro_ctx {
+ struct hantro_dev *dev;
+ struct v4l2_fh fh;
+ bool is_encoder;
+
+ u32 sequence_cap;
+ u32 sequence_out;
+
+ const struct hantro_fmt *vpu_src_fmt;
+ struct v4l2_pix_format_mplane src_fmt;
+ const struct hantro_fmt *vpu_dst_fmt;
+ struct v4l2_pix_format_mplane dst_fmt;
+
+ struct v4l2_ctrl_handler ctrl_handler;
+ int jpeg_quality;
+ int bit_depth;
+
+ const struct hantro_codec_ops *codec_ops;
+ struct hantro_postproc_ctx postproc;
+
+ /* Specific for particular codec modes. */
+ union {
+ struct hantro_h264_dec_hw_ctx h264_dec;
+ struct hantro_mpeg2_dec_hw_ctx mpeg2_dec;
+ struct hantro_vp8_dec_hw_ctx vp8_dec;
+ struct hantro_hevc_dec_hw_ctx hevc_dec;
+ struct hantro_vp9_dec_hw_ctx vp9_dec;
+ };
+};
+
+/**
+ * struct hantro_fmt - information about supported video formats.
+ * @name: Human readable name of the format.
+ * @fourcc: FourCC code of the format. See V4L2_PIX_FMT_*.
+ * @codec_mode: Codec mode related to this format. See
+ * enum hantro_codec_mode.
+ * @header_size: Optional header size. Currently used by JPEG encoder.
+ * @max_depth: Maximum depth, for bitstream formats
+ * @enc_fmt: Format identifier for encoder registers.
+ * @frmsize: Supported range of frame sizes (only for bitstream formats).
+ * @postprocessed: Indicates if this format needs the post-processor.
+ * @match_depth: Indicates if format bit depth must match video bit depth
+ */
+struct hantro_fmt {
+ char *name;
+ u32 fourcc;
+ enum hantro_codec_mode codec_mode;
+ int header_size;
+ int max_depth;
+ enum hantro_enc_fmt enc_fmt;
+ struct v4l2_frmsize_stepwise frmsize;
+ bool postprocessed;
+ bool match_depth;
+};
+
+struct hantro_reg {
+ u32 base;
+ u32 shift;
+ u32 mask;
+};
+
+struct hantro_postproc_regs {
+ struct hantro_reg pipeline_en;
+ struct hantro_reg max_burst;
+ struct hantro_reg clk_gate;
+ struct hantro_reg out_swap32;
+ struct hantro_reg out_endian;
+ struct hantro_reg out_luma_base;
+ struct hantro_reg input_width;
+ struct hantro_reg input_height;
+ struct hantro_reg output_width;
+ struct hantro_reg output_height;
+ struct hantro_reg input_fmt;
+ struct hantro_reg output_fmt;
+ struct hantro_reg orig_width;
+ struct hantro_reg display_width;
+};
+
+struct hantro_vp9_decoded_buffer_info {
+ /* Info needed when the decoded frame serves as a reference frame. */
+ unsigned short width;
+ unsigned short height;
+ u32 bit_depth : 4;
+};
+
+struct hantro_decoded_buffer {
+ /* Must be the first field in this struct. */
+ struct v4l2_m2m_buffer base;
+
+ union {
+ struct hantro_vp9_decoded_buffer_info vp9;
+ };
+};
+
+/* Logging helpers */
+
+/**
+ * DOC: hantro_debug: Module parameter to control level of debugging messages.
+ *
+ * Level of debugging messages can be controlled by bits of
+ * module parameter called "debug". Meaning of particular
+ * bits is as follows:
+ *
+ * bit 0 - global information: mode, size, init, release
+ * bit 1 - each run start/result information
+ * bit 2 - contents of small controls from userspace
+ * bit 3 - contents of big controls from userspace
+ * bit 4 - detail fmt, ctrl, buffer q/dq information
+ * bit 5 - detail function enter/leave trace information
+ * bit 6 - register write/read information
+ */
+extern int hantro_debug;
+
+#define vpu_debug(level, fmt, args...) \
+ do { \
+ if (hantro_debug & BIT(level)) \
+ pr_info("%s:%d: " fmt, \
+ __func__, __LINE__, ##args); \
+ } while (0)
+
+#define vpu_err(fmt, args...) \
+ pr_err("%s:%d: " fmt, __func__, __LINE__, ##args)
+
+/* Structure access helpers. */
+static inline struct hantro_ctx *fh_to_ctx(struct v4l2_fh *fh)
+{
+ return container_of(fh, struct hantro_ctx, fh);
+}
+
+/* Register accessors. */
+static inline void vepu_write_relaxed(struct hantro_dev *vpu,
+ u32 val, u32 reg)
+{
+ vpu_debug(6, "0x%04x = 0x%08x\n", reg / 4, val);
+ writel_relaxed(val, vpu->enc_base + reg);
+}
+
+static inline void vepu_write(struct hantro_dev *vpu, u32 val, u32 reg)
+{
+ vpu_debug(6, "0x%04x = 0x%08x\n", reg / 4, val);
+ writel(val, vpu->enc_base + reg);
+}
+
+static inline u32 vepu_read(struct hantro_dev *vpu, u32 reg)
+{
+ u32 val = readl(vpu->enc_base + reg);
+
+ vpu_debug(6, "0x%04x = 0x%08x\n", reg / 4, val);
+ return val;
+}
+
+static inline void vdpu_write_relaxed(struct hantro_dev *vpu,
+ u32 val, u32 reg)
+{
+ vpu_debug(6, "0x%04x = 0x%08x\n", reg / 4, val);
+ writel_relaxed(val, vpu->dec_base + reg);
+}
+
+static inline void vdpu_write(struct hantro_dev *vpu, u32 val, u32 reg)
+{
+ vpu_debug(6, "0x%04x = 0x%08x\n", reg / 4, val);
+ writel(val, vpu->dec_base + reg);
+}
+
+static inline void hantro_write_addr(struct hantro_dev *vpu,
+ unsigned long offset,
+ dma_addr_t addr)
+{
+ vdpu_write(vpu, addr & 0xffffffff, offset);
+}
+
+static inline u32 vdpu_read(struct hantro_dev *vpu, u32 reg)
+{
+ u32 val = readl(vpu->dec_base + reg);
+
+ vpu_debug(6, "0x%04x = 0x%08x\n", reg / 4, val);
+ return val;
+}
+
+static inline u32 vdpu_read_mask(struct hantro_dev *vpu,
+ const struct hantro_reg *reg,
+ u32 val)
+{
+ u32 v;
+
+ v = vdpu_read(vpu, reg->base);
+ v &= ~(reg->mask << reg->shift);
+ v |= ((val & reg->mask) << reg->shift);
+ return v;
+}
+
+static inline void hantro_reg_write(struct hantro_dev *vpu,
+ const struct hantro_reg *reg,
+ u32 val)
+{
+ vdpu_write_relaxed(vpu, vdpu_read_mask(vpu, reg, val), reg->base);
+}
+
+static inline void hantro_reg_write_s(struct hantro_dev *vpu,
+ const struct hantro_reg *reg,
+ u32 val)
+{
+ vdpu_write(vpu, vdpu_read_mask(vpu, reg, val), reg->base);
+}
+
+void *hantro_get_ctrl(struct hantro_ctx *ctx, u32 id);
+dma_addr_t hantro_get_ref(struct hantro_ctx *ctx, u64 ts);
+
+static inline struct vb2_v4l2_buffer *
+hantro_get_src_buf(struct hantro_ctx *ctx)
+{
+ return v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
+}
+
+static inline struct vb2_v4l2_buffer *
+hantro_get_dst_buf(struct hantro_ctx *ctx)
+{
+ return v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
+}
+
+bool hantro_needs_postproc(const struct hantro_ctx *ctx,
+ const struct hantro_fmt *fmt);
+
+static inline dma_addr_t
+hantro_get_dec_buf_addr(struct hantro_ctx *ctx, struct vb2_buffer *vb)
+{
+ if (hantro_needs_postproc(ctx, ctx->vpu_dst_fmt))
+ return ctx->postproc.dec_q[vb->index].dma;
+ return vb2_dma_contig_plane_dma_addr(vb, 0);
+}
+
+static inline struct hantro_decoded_buffer *
+vb2_to_hantro_decoded_buf(struct vb2_buffer *buf)
+{
+ return container_of(buf, struct hantro_decoded_buffer, base.vb.vb2_buf);
+}
+
+void hantro_postproc_disable(struct hantro_ctx *ctx);
+void hantro_postproc_enable(struct hantro_ctx *ctx);
+void hantro_postproc_free(struct hantro_ctx *ctx);
+int hantro_postproc_alloc(struct hantro_ctx *ctx);
+int hanto_postproc_enum_framesizes(struct hantro_ctx *ctx,
+ struct v4l2_frmsizeenum *fsize);
+
+#endif /* HANTRO_H_ */
diff --git a/drivers/media/platform/verisilicon/hantro_drv.c b/drivers/media/platform/verisilicon/hantro_drv.c
new file mode 100644
index 000000000..69a2442f3
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_drv.c
@@ -0,0 +1,1153 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hantro VPU codec driver
+ *
+ * Copyright (C) 2018 Collabora, Ltd.
+ * Copyright 2018 Google LLC.
+ * Tomasz Figa <tfiga@chromium.org>
+ *
+ * Based on s5p-mfc driver by Samsung Electronics Co., Ltd.
+ * Copyright (C) 2011 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/videodev2.h>
+#include <linux/workqueue.h>
+#include <media/v4l2-event.h>
+#include <media/v4l2-mem2mem.h>
+#include <media/videobuf2-core.h>
+#include <media/videobuf2-vmalloc.h>
+
+#include "hantro_v4l2.h"
+#include "hantro.h"
+#include "hantro_hw.h"
+
+#define DRIVER_NAME "hantro-vpu"
+
+int hantro_debug;
+module_param_named(debug, hantro_debug, int, 0644);
+MODULE_PARM_DESC(debug,
+ "Debug level - higher value produces more verbose messages");
+
+void *hantro_get_ctrl(struct hantro_ctx *ctx, u32 id)
+{
+ struct v4l2_ctrl *ctrl;
+
+ ctrl = v4l2_ctrl_find(&ctx->ctrl_handler, id);
+ return ctrl ? ctrl->p_cur.p : NULL;
+}
+
+dma_addr_t hantro_get_ref(struct hantro_ctx *ctx, u64 ts)
+{
+ struct vb2_queue *q = v4l2_m2m_get_dst_vq(ctx->fh.m2m_ctx);
+ struct vb2_buffer *buf;
+
+ buf = vb2_find_buffer(q, ts);
+ if (!buf)
+ return 0;
+ return hantro_get_dec_buf_addr(ctx, buf);
+}
+
+static const struct v4l2_event hantro_eos_event = {
+ .type = V4L2_EVENT_EOS
+};
+
+static void hantro_job_finish_no_pm(struct hantro_dev *vpu,
+ struct hantro_ctx *ctx,
+ enum vb2_buffer_state result)
+{
+ struct vb2_v4l2_buffer *src, *dst;
+
+ src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
+ dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
+
+ if (WARN_ON(!src))
+ return;
+ if (WARN_ON(!dst))
+ return;
+
+ src->sequence = ctx->sequence_out++;
+ dst->sequence = ctx->sequence_cap++;
+
+ if (v4l2_m2m_is_last_draining_src_buf(ctx->fh.m2m_ctx, src)) {
+ dst->flags |= V4L2_BUF_FLAG_LAST;
+ v4l2_event_queue_fh(&ctx->fh, &hantro_eos_event);
+ v4l2_m2m_mark_stopped(ctx->fh.m2m_ctx);
+ }
+
+ v4l2_m2m_buf_done_and_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx,
+ result);
+}
+
+static void hantro_job_finish(struct hantro_dev *vpu,
+ struct hantro_ctx *ctx,
+ enum vb2_buffer_state result)
+{
+ pm_runtime_mark_last_busy(vpu->dev);
+ pm_runtime_put_autosuspend(vpu->dev);
+
+ clk_bulk_disable(vpu->variant->num_clocks, vpu->clocks);
+
+ hantro_job_finish_no_pm(vpu, ctx, result);
+}
+
+void hantro_irq_done(struct hantro_dev *vpu,
+ enum vb2_buffer_state result)
+{
+ struct hantro_ctx *ctx =
+ v4l2_m2m_get_curr_priv(vpu->m2m_dev);
+
+ /*
+ * If cancel_delayed_work returns false
+ * the timeout expired. The watchdog is running,
+ * and will take care of finishing the job.
+ */
+ if (cancel_delayed_work(&vpu->watchdog_work)) {
+ if (result == VB2_BUF_STATE_DONE && ctx->codec_ops->done)
+ ctx->codec_ops->done(ctx);
+ hantro_job_finish(vpu, ctx, result);
+ }
+}
+
+void hantro_watchdog(struct work_struct *work)
+{
+ struct hantro_dev *vpu;
+ struct hantro_ctx *ctx;
+
+ vpu = container_of(to_delayed_work(work),
+ struct hantro_dev, watchdog_work);
+ ctx = v4l2_m2m_get_curr_priv(vpu->m2m_dev);
+ if (ctx) {
+ vpu_err("frame processing timed out!\n");
+ if (ctx->codec_ops->reset)
+ ctx->codec_ops->reset(ctx);
+ hantro_job_finish(vpu, ctx, VB2_BUF_STATE_ERROR);
+ }
+}
+
+void hantro_start_prepare_run(struct hantro_ctx *ctx)
+{
+ struct vb2_v4l2_buffer *src_buf;
+
+ src_buf = hantro_get_src_buf(ctx);
+ v4l2_ctrl_request_setup(src_buf->vb2_buf.req_obj.req,
+ &ctx->ctrl_handler);
+
+ if (!ctx->is_encoder && !ctx->dev->variant->late_postproc) {
+ if (hantro_needs_postproc(ctx, ctx->vpu_dst_fmt))
+ hantro_postproc_enable(ctx);
+ else
+ hantro_postproc_disable(ctx);
+ }
+}
+
+void hantro_end_prepare_run(struct hantro_ctx *ctx)
+{
+ struct vb2_v4l2_buffer *src_buf;
+
+ if (!ctx->is_encoder && ctx->dev->variant->late_postproc) {
+ if (hantro_needs_postproc(ctx, ctx->vpu_dst_fmt))
+ hantro_postproc_enable(ctx);
+ else
+ hantro_postproc_disable(ctx);
+ }
+
+ src_buf = hantro_get_src_buf(ctx);
+ v4l2_ctrl_request_complete(src_buf->vb2_buf.req_obj.req,
+ &ctx->ctrl_handler);
+
+ /* Kick the watchdog. */
+ schedule_delayed_work(&ctx->dev->watchdog_work,
+ msecs_to_jiffies(2000));
+}
+
+static void device_run(void *priv)
+{
+ struct hantro_ctx *ctx = priv;
+ struct vb2_v4l2_buffer *src, *dst;
+ int ret;
+
+ src = hantro_get_src_buf(ctx);
+ dst = hantro_get_dst_buf(ctx);
+
+ ret = pm_runtime_resume_and_get(ctx->dev->dev);
+ if (ret < 0)
+ goto err_cancel_job;
+
+ ret = clk_bulk_enable(ctx->dev->variant->num_clocks, ctx->dev->clocks);
+ if (ret)
+ goto err_cancel_job;
+
+ v4l2_m2m_buf_copy_metadata(src, dst, true);
+
+ if (ctx->codec_ops->run(ctx))
+ goto err_cancel_job;
+
+ return;
+
+err_cancel_job:
+ hantro_job_finish_no_pm(ctx->dev, ctx, VB2_BUF_STATE_ERROR);
+}
+
+static const struct v4l2_m2m_ops vpu_m2m_ops = {
+ .device_run = device_run,
+};
+
+static int
+queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq)
+{
+ struct hantro_ctx *ctx = priv;
+ int ret;
+
+ src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
+ src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
+ src_vq->drv_priv = ctx;
+ src_vq->ops = &hantro_queue_ops;
+ src_vq->mem_ops = &vb2_dma_contig_memops;
+
+ /*
+ * Driver does mostly sequential access, so sacrifice TLB efficiency
+ * for faster allocation. Also, no CPU access on the source queue,
+ * so no kernel mapping needed.
+ */
+ src_vq->dma_attrs = DMA_ATTR_ALLOC_SINGLE_PAGES |
+ DMA_ATTR_NO_KERNEL_MAPPING;
+ src_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
+ src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
+ src_vq->lock = &ctx->dev->vpu_mutex;
+ src_vq->dev = ctx->dev->v4l2_dev.dev;
+ src_vq->supports_requests = true;
+
+ ret = vb2_queue_init(src_vq);
+ if (ret)
+ return ret;
+
+ dst_vq->bidirectional = true;
+ dst_vq->mem_ops = &vb2_dma_contig_memops;
+ dst_vq->dma_attrs = DMA_ATTR_ALLOC_SINGLE_PAGES;
+ /*
+ * The Kernel needs access to the JPEG destination buffer for the
+ * JPEG encoder to fill in the JPEG headers.
+ */
+ if (!ctx->is_encoder)
+ dst_vq->dma_attrs |= DMA_ATTR_NO_KERNEL_MAPPING;
+
+ dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
+ dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
+ dst_vq->drv_priv = ctx;
+ dst_vq->ops = &hantro_queue_ops;
+ dst_vq->buf_struct_size = sizeof(struct hantro_decoded_buffer);
+ dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
+ dst_vq->lock = &ctx->dev->vpu_mutex;
+ dst_vq->dev = ctx->dev->v4l2_dev.dev;
+
+ return vb2_queue_init(dst_vq);
+}
+
+static int hantro_try_ctrl(struct v4l2_ctrl *ctrl)
+{
+ struct hantro_ctx *ctx;
+
+ ctx = container_of(ctrl->handler,
+ struct hantro_ctx, ctrl_handler);
+
+ if (ctrl->id == V4L2_CID_STATELESS_H264_SPS) {
+ const struct v4l2_ctrl_h264_sps *sps = ctrl->p_new.p_h264_sps;
+
+ if (sps->chroma_format_idc > 1)
+ /* Only 4:0:0 and 4:2:0 are supported */
+ return -EINVAL;
+ if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8)
+ /* Luma and chroma bit depth mismatch */
+ return -EINVAL;
+ if (sps->bit_depth_luma_minus8 != 0)
+ /* Only 8-bit is supported */
+ return -EINVAL;
+ } else if (ctrl->id == V4L2_CID_STATELESS_HEVC_SPS) {
+ const struct v4l2_ctrl_hevc_sps *sps = ctrl->p_new.p_hevc_sps;
+
+ if (sps->bit_depth_luma_minus8 != 0 && sps->bit_depth_luma_minus8 != 2)
+ /* Only 8-bit and 10-bit are supported */
+ return -EINVAL;
+
+ ctx->bit_depth = sps->bit_depth_luma_minus8 + 8;
+ } else if (ctrl->id == V4L2_CID_STATELESS_VP9_FRAME) {
+ const struct v4l2_ctrl_vp9_frame *dec_params = ctrl->p_new.p_vp9_frame;
+
+ /* We only support profile 0 */
+ if (dec_params->profile != 0)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int hantro_jpeg_s_ctrl(struct v4l2_ctrl *ctrl)
+{
+ struct hantro_ctx *ctx;
+
+ ctx = container_of(ctrl->handler,
+ struct hantro_ctx, ctrl_handler);
+
+ vpu_debug(1, "s_ctrl: id = %d, val = %d\n", ctrl->id, ctrl->val);
+
+ switch (ctrl->id) {
+ case V4L2_CID_JPEG_COMPRESSION_QUALITY:
+ ctx->jpeg_quality = ctrl->val;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int hantro_vp9_s_ctrl(struct v4l2_ctrl *ctrl)
+{
+ struct hantro_ctx *ctx;
+
+ ctx = container_of(ctrl->handler,
+ struct hantro_ctx, ctrl_handler);
+
+ switch (ctrl->id) {
+ case V4L2_CID_STATELESS_VP9_FRAME:
+ ctx->bit_depth = ctrl->p_new.p_vp9_frame->bit_depth;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct v4l2_ctrl_ops hantro_ctrl_ops = {
+ .try_ctrl = hantro_try_ctrl,
+};
+
+static const struct v4l2_ctrl_ops hantro_jpeg_ctrl_ops = {
+ .s_ctrl = hantro_jpeg_s_ctrl,
+};
+
+static const struct v4l2_ctrl_ops hantro_vp9_ctrl_ops = {
+ .s_ctrl = hantro_vp9_s_ctrl,
+};
+
+#define HANTRO_JPEG_ACTIVE_MARKERS (V4L2_JPEG_ACTIVE_MARKER_APP0 | \
+ V4L2_JPEG_ACTIVE_MARKER_COM | \
+ V4L2_JPEG_ACTIVE_MARKER_DQT | \
+ V4L2_JPEG_ACTIVE_MARKER_DHT)
+
+static const struct hantro_ctrl controls[] = {
+ {
+ .codec = HANTRO_JPEG_ENCODER,
+ .cfg = {
+ .id = V4L2_CID_JPEG_COMPRESSION_QUALITY,
+ .min = 5,
+ .max = 100,
+ .step = 1,
+ .def = 50,
+ .ops = &hantro_jpeg_ctrl_ops,
+ },
+ }, {
+ .codec = HANTRO_JPEG_ENCODER,
+ .cfg = {
+ .id = V4L2_CID_JPEG_ACTIVE_MARKER,
+ .max = HANTRO_JPEG_ACTIVE_MARKERS,
+ .def = HANTRO_JPEG_ACTIVE_MARKERS,
+ /*
+ * Changing the set of active markers/segments also
+ * messes up the alignment of the JPEG header, which
+ * is needed to allow the hardware to write directly
+ * to the output buffer. Implementing this introduces
+ * a lot of complexity for little gain, as the markers
+ * enabled is already the minimum required set.
+ */
+ .flags = V4L2_CTRL_FLAG_READ_ONLY,
+ },
+ }, {
+ .codec = HANTRO_MPEG2_DECODER,
+ .cfg = {
+ .id = V4L2_CID_STATELESS_MPEG2_SEQUENCE,
+ },
+ }, {
+ .codec = HANTRO_MPEG2_DECODER,
+ .cfg = {
+ .id = V4L2_CID_STATELESS_MPEG2_PICTURE,
+ },
+ }, {
+ .codec = HANTRO_MPEG2_DECODER,
+ .cfg = {
+ .id = V4L2_CID_STATELESS_MPEG2_QUANTISATION,
+ },
+ }, {
+ .codec = HANTRO_VP8_DECODER,
+ .cfg = {
+ .id = V4L2_CID_STATELESS_VP8_FRAME,
+ },
+ }, {
+ .codec = HANTRO_H264_DECODER,
+ .cfg = {
+ .id = V4L2_CID_STATELESS_H264_DECODE_PARAMS,
+ },
+ }, {
+ .codec = HANTRO_H264_DECODER,
+ .cfg = {
+ .id = V4L2_CID_STATELESS_H264_SPS,
+ .ops = &hantro_ctrl_ops,
+ },
+ }, {
+ .codec = HANTRO_H264_DECODER,
+ .cfg = {
+ .id = V4L2_CID_STATELESS_H264_PPS,
+ },
+ }, {
+ .codec = HANTRO_H264_DECODER,
+ .cfg = {
+ .id = V4L2_CID_STATELESS_H264_SCALING_MATRIX,
+ },
+ }, {
+ .codec = HANTRO_H264_DECODER,
+ .cfg = {
+ .id = V4L2_CID_STATELESS_H264_DECODE_MODE,
+ .min = V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED,
+ .def = V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED,
+ .max = V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED,
+ },
+ }, {
+ .codec = HANTRO_H264_DECODER,
+ .cfg = {
+ .id = V4L2_CID_STATELESS_H264_START_CODE,
+ .min = V4L2_STATELESS_H264_START_CODE_ANNEX_B,
+ .def = V4L2_STATELESS_H264_START_CODE_ANNEX_B,
+ .max = V4L2_STATELESS_H264_START_CODE_ANNEX_B,
+ },
+ }, {
+ .codec = HANTRO_H264_DECODER,
+ .cfg = {
+ .id = V4L2_CID_MPEG_VIDEO_H264_PROFILE,
+ .min = V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE,
+ .max = V4L2_MPEG_VIDEO_H264_PROFILE_HIGH,
+ .menu_skip_mask =
+ BIT(V4L2_MPEG_VIDEO_H264_PROFILE_EXTENDED),
+ .def = V4L2_MPEG_VIDEO_H264_PROFILE_MAIN,
+ }
+ }, {
+ .codec = HANTRO_HEVC_DECODER,
+ .cfg = {
+ .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE,
+ .min = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED,
+ .max = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED,
+ .def = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED,
+ },
+ }, {
+ .codec = HANTRO_HEVC_DECODER,
+ .cfg = {
+ .id = V4L2_CID_STATELESS_HEVC_START_CODE,
+ .min = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B,
+ .max = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B,
+ .def = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B,
+ },
+ }, {
+ .codec = HANTRO_HEVC_DECODER,
+ .cfg = {
+ .id = V4L2_CID_MPEG_VIDEO_HEVC_PROFILE,
+ .min = V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN,
+ .max = V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN_10,
+ .def = V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN,
+ },
+ }, {
+ .codec = HANTRO_HEVC_DECODER,
+ .cfg = {
+ .id = V4L2_CID_MPEG_VIDEO_HEVC_LEVEL,
+ .min = V4L2_MPEG_VIDEO_HEVC_LEVEL_1,
+ .max = V4L2_MPEG_VIDEO_HEVC_LEVEL_5_1,
+ },
+ }, {
+ .codec = HANTRO_HEVC_DECODER,
+ .cfg = {
+ .id = V4L2_CID_STATELESS_HEVC_SPS,
+ .ops = &hantro_ctrl_ops,
+ },
+ }, {
+ .codec = HANTRO_HEVC_DECODER,
+ .cfg = {
+ .id = V4L2_CID_STATELESS_HEVC_PPS,
+ },
+ }, {
+ .codec = HANTRO_HEVC_DECODER,
+ .cfg = {
+ .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS,
+ },
+ }, {
+ .codec = HANTRO_HEVC_DECODER,
+ .cfg = {
+ .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX,
+ },
+ }, {
+ .codec = HANTRO_VP9_DECODER,
+ .cfg = {
+ .id = V4L2_CID_STATELESS_VP9_FRAME,
+ .ops = &hantro_vp9_ctrl_ops,
+ },
+ }, {
+ .codec = HANTRO_VP9_DECODER,
+ .cfg = {
+ .id = V4L2_CID_STATELESS_VP9_COMPRESSED_HDR,
+ },
+ },
+};
+
+static int hantro_ctrls_setup(struct hantro_dev *vpu,
+ struct hantro_ctx *ctx,
+ int allowed_codecs)
+{
+ int i, num_ctrls = ARRAY_SIZE(controls);
+
+ v4l2_ctrl_handler_init(&ctx->ctrl_handler, num_ctrls);
+
+ for (i = 0; i < num_ctrls; i++) {
+ if (!(allowed_codecs & controls[i].codec))
+ continue;
+
+ v4l2_ctrl_new_custom(&ctx->ctrl_handler,
+ &controls[i].cfg, NULL);
+ if (ctx->ctrl_handler.error) {
+ vpu_err("Adding control (%d) failed %d\n",
+ controls[i].cfg.id,
+ ctx->ctrl_handler.error);
+ v4l2_ctrl_handler_free(&ctx->ctrl_handler);
+ return ctx->ctrl_handler.error;
+ }
+ }
+ return v4l2_ctrl_handler_setup(&ctx->ctrl_handler);
+}
+
+/*
+ * V4L2 file operations.
+ */
+
+static int hantro_open(struct file *filp)
+{
+ struct hantro_dev *vpu = video_drvdata(filp);
+ struct video_device *vdev = video_devdata(filp);
+ struct hantro_func *func = hantro_vdev_to_func(vdev);
+ struct hantro_ctx *ctx;
+ int allowed_codecs, ret;
+
+ /*
+ * We do not need any extra locking here, because we operate only
+ * on local data here, except reading few fields from dev, which
+ * do not change through device's lifetime (which is guaranteed by
+ * reference on module from open()) and V4L2 internal objects (such
+ * as vdev and ctx->fh), which have proper locking done in respective
+ * helper functions used here.
+ */
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->dev = vpu;
+ if (func->id == MEDIA_ENT_F_PROC_VIDEO_ENCODER) {
+ allowed_codecs = vpu->variant->codec & HANTRO_ENCODERS;
+ ctx->is_encoder = true;
+ } else if (func->id == MEDIA_ENT_F_PROC_VIDEO_DECODER) {
+ allowed_codecs = vpu->variant->codec & HANTRO_DECODERS;
+ ctx->is_encoder = false;
+ } else {
+ ret = -ENODEV;
+ goto err_ctx_free;
+ }
+
+ ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(vpu->m2m_dev, ctx, queue_init);
+ if (IS_ERR(ctx->fh.m2m_ctx)) {
+ ret = PTR_ERR(ctx->fh.m2m_ctx);
+ goto err_ctx_free;
+ }
+
+ v4l2_fh_init(&ctx->fh, vdev);
+ filp->private_data = &ctx->fh;
+ v4l2_fh_add(&ctx->fh);
+
+ hantro_reset_fmts(ctx);
+
+ ret = hantro_ctrls_setup(vpu, ctx, allowed_codecs);
+ if (ret) {
+ vpu_err("Failed to set up controls\n");
+ goto err_fh_free;
+ }
+ ctx->fh.ctrl_handler = &ctx->ctrl_handler;
+
+ return 0;
+
+err_fh_free:
+ v4l2_fh_del(&ctx->fh);
+ v4l2_fh_exit(&ctx->fh);
+err_ctx_free:
+ kfree(ctx);
+ return ret;
+}
+
+static int hantro_release(struct file *filp)
+{
+ struct hantro_ctx *ctx =
+ container_of(filp->private_data, struct hantro_ctx, fh);
+
+ /*
+ * No need for extra locking because this was the last reference
+ * to this file.
+ */
+ v4l2_m2m_ctx_release(ctx->fh.m2m_ctx);
+ v4l2_fh_del(&ctx->fh);
+ v4l2_fh_exit(&ctx->fh);
+ v4l2_ctrl_handler_free(&ctx->ctrl_handler);
+ kfree(ctx);
+
+ return 0;
+}
+
+static const struct v4l2_file_operations hantro_fops = {
+ .owner = THIS_MODULE,
+ .open = hantro_open,
+ .release = hantro_release,
+ .poll = v4l2_m2m_fop_poll,
+ .unlocked_ioctl = video_ioctl2,
+ .mmap = v4l2_m2m_fop_mmap,
+};
+
+static const struct of_device_id of_hantro_match[] = {
+#ifdef CONFIG_VIDEO_HANTRO_ROCKCHIP
+ { .compatible = "rockchip,px30-vpu", .data = &px30_vpu_variant, },
+ { .compatible = "rockchip,rk3036-vpu", .data = &rk3036_vpu_variant, },
+ { .compatible = "rockchip,rk3066-vpu", .data = &rk3066_vpu_variant, },
+ { .compatible = "rockchip,rk3288-vpu", .data = &rk3288_vpu_variant, },
+ { .compatible = "rockchip,rk3328-vpu", .data = &rk3328_vpu_variant, },
+ { .compatible = "rockchip,rk3399-vpu", .data = &rk3399_vpu_variant, },
+ { .compatible = "rockchip,rk3568-vepu", .data = &rk3568_vepu_variant, },
+ { .compatible = "rockchip,rk3568-vpu", .data = &rk3568_vpu_variant, },
+#endif
+#ifdef CONFIG_VIDEO_HANTRO_IMX8M
+ { .compatible = "nxp,imx8mm-vpu-g1", .data = &imx8mm_vpu_g1_variant, },
+ { .compatible = "nxp,imx8mq-vpu", .data = &imx8mq_vpu_variant, },
+ { .compatible = "nxp,imx8mq-vpu-g1", .data = &imx8mq_vpu_g1_variant },
+ { .compatible = "nxp,imx8mq-vpu-g2", .data = &imx8mq_vpu_g2_variant },
+#endif
+#ifdef CONFIG_VIDEO_HANTRO_SAMA5D4
+ { .compatible = "microchip,sama5d4-vdec", .data = &sama5d4_vdec_variant, },
+#endif
+#ifdef CONFIG_VIDEO_HANTRO_SUNXI
+ { .compatible = "allwinner,sun50i-h6-vpu-g2", .data = &sunxi_vpu_variant, },
+#endif
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, of_hantro_match);
+
+static int hantro_register_entity(struct media_device *mdev,
+ struct media_entity *entity,
+ const char *entity_name,
+ struct media_pad *pads, int num_pads,
+ int function, struct video_device *vdev)
+{
+ char *name;
+ int ret;
+
+ entity->obj_type = MEDIA_ENTITY_TYPE_BASE;
+ if (function == MEDIA_ENT_F_IO_V4L) {
+ entity->info.dev.major = VIDEO_MAJOR;
+ entity->info.dev.minor = vdev->minor;
+ }
+
+ name = devm_kasprintf(mdev->dev, GFP_KERNEL, "%s-%s", vdev->name,
+ entity_name);
+ if (!name)
+ return -ENOMEM;
+
+ entity->name = name;
+ entity->function = function;
+
+ ret = media_entity_pads_init(entity, num_pads, pads);
+ if (ret)
+ return ret;
+
+ ret = media_device_register_entity(mdev, entity);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int hantro_attach_func(struct hantro_dev *vpu,
+ struct hantro_func *func)
+{
+ struct media_device *mdev = &vpu->mdev;
+ struct media_link *link;
+ int ret;
+
+ /* Create the three encoder entities with their pads */
+ func->source_pad.flags = MEDIA_PAD_FL_SOURCE;
+ ret = hantro_register_entity(mdev, &func->vdev.entity, "source",
+ &func->source_pad, 1, MEDIA_ENT_F_IO_V4L,
+ &func->vdev);
+ if (ret)
+ return ret;
+
+ func->proc_pads[0].flags = MEDIA_PAD_FL_SINK;
+ func->proc_pads[1].flags = MEDIA_PAD_FL_SOURCE;
+ ret = hantro_register_entity(mdev, &func->proc, "proc",
+ func->proc_pads, 2, func->id,
+ &func->vdev);
+ if (ret)
+ goto err_rel_entity0;
+
+ func->sink_pad.flags = MEDIA_PAD_FL_SINK;
+ ret = hantro_register_entity(mdev, &func->sink, "sink",
+ &func->sink_pad, 1, MEDIA_ENT_F_IO_V4L,
+ &func->vdev);
+ if (ret)
+ goto err_rel_entity1;
+
+ /* Connect the three entities */
+ ret = media_create_pad_link(&func->vdev.entity, 0, &func->proc, 0,
+ MEDIA_LNK_FL_IMMUTABLE |
+ MEDIA_LNK_FL_ENABLED);
+ if (ret)
+ goto err_rel_entity2;
+
+ ret = media_create_pad_link(&func->proc, 1, &func->sink, 0,
+ MEDIA_LNK_FL_IMMUTABLE |
+ MEDIA_LNK_FL_ENABLED);
+ if (ret)
+ goto err_rm_links0;
+
+ /* Create video interface */
+ func->intf_devnode = media_devnode_create(mdev, MEDIA_INTF_T_V4L_VIDEO,
+ 0, VIDEO_MAJOR,
+ func->vdev.minor);
+ if (!func->intf_devnode) {
+ ret = -ENOMEM;
+ goto err_rm_links1;
+ }
+
+ /* Connect the two DMA engines to the interface */
+ link = media_create_intf_link(&func->vdev.entity,
+ &func->intf_devnode->intf,
+ MEDIA_LNK_FL_IMMUTABLE |
+ MEDIA_LNK_FL_ENABLED);
+ if (!link) {
+ ret = -ENOMEM;
+ goto err_rm_devnode;
+ }
+
+ link = media_create_intf_link(&func->sink, &func->intf_devnode->intf,
+ MEDIA_LNK_FL_IMMUTABLE |
+ MEDIA_LNK_FL_ENABLED);
+ if (!link) {
+ ret = -ENOMEM;
+ goto err_rm_devnode;
+ }
+ return 0;
+
+err_rm_devnode:
+ media_devnode_remove(func->intf_devnode);
+
+err_rm_links1:
+ media_entity_remove_links(&func->sink);
+
+err_rm_links0:
+ media_entity_remove_links(&func->proc);
+ media_entity_remove_links(&func->vdev.entity);
+
+err_rel_entity2:
+ media_device_unregister_entity(&func->sink);
+
+err_rel_entity1:
+ media_device_unregister_entity(&func->proc);
+
+err_rel_entity0:
+ media_device_unregister_entity(&func->vdev.entity);
+ return ret;
+}
+
+static void hantro_detach_func(struct hantro_func *func)
+{
+ media_devnode_remove(func->intf_devnode);
+ media_entity_remove_links(&func->sink);
+ media_entity_remove_links(&func->proc);
+ media_entity_remove_links(&func->vdev.entity);
+ media_device_unregister_entity(&func->sink);
+ media_device_unregister_entity(&func->proc);
+ media_device_unregister_entity(&func->vdev.entity);
+}
+
+static int hantro_add_func(struct hantro_dev *vpu, unsigned int funcid)
+{
+ const struct of_device_id *match;
+ struct hantro_func *func;
+ struct video_device *vfd;
+ int ret;
+
+ match = of_match_node(of_hantro_match, vpu->dev->of_node);
+ func = devm_kzalloc(vpu->dev, sizeof(*func), GFP_KERNEL);
+ if (!func) {
+ v4l2_err(&vpu->v4l2_dev, "Failed to allocate video device\n");
+ return -ENOMEM;
+ }
+
+ func->id = funcid;
+
+ vfd = &func->vdev;
+ vfd->fops = &hantro_fops;
+ vfd->release = video_device_release_empty;
+ vfd->lock = &vpu->vpu_mutex;
+ vfd->v4l2_dev = &vpu->v4l2_dev;
+ vfd->vfl_dir = VFL_DIR_M2M;
+ vfd->device_caps = V4L2_CAP_STREAMING | V4L2_CAP_VIDEO_M2M_MPLANE;
+ vfd->ioctl_ops = &hantro_ioctl_ops;
+ snprintf(vfd->name, sizeof(vfd->name), "%s-%s", match->compatible,
+ funcid == MEDIA_ENT_F_PROC_VIDEO_ENCODER ? "enc" : "dec");
+
+ if (funcid == MEDIA_ENT_F_PROC_VIDEO_ENCODER) {
+ vpu->encoder = func;
+ v4l2_disable_ioctl(vfd, VIDIOC_TRY_DECODER_CMD);
+ v4l2_disable_ioctl(vfd, VIDIOC_DECODER_CMD);
+ } else {
+ vpu->decoder = func;
+ v4l2_disable_ioctl(vfd, VIDIOC_TRY_ENCODER_CMD);
+ v4l2_disable_ioctl(vfd, VIDIOC_ENCODER_CMD);
+ }
+
+ video_set_drvdata(vfd, vpu);
+
+ ret = video_register_device(vfd, VFL_TYPE_VIDEO, -1);
+ if (ret) {
+ v4l2_err(&vpu->v4l2_dev, "Failed to register video device\n");
+ return ret;
+ }
+
+ ret = hantro_attach_func(vpu, func);
+ if (ret) {
+ v4l2_err(&vpu->v4l2_dev,
+ "Failed to attach functionality to the media device\n");
+ goto err_unreg_dev;
+ }
+
+ v4l2_info(&vpu->v4l2_dev, "registered %s as /dev/video%d\n", vfd->name,
+ vfd->num);
+
+ return 0;
+
+err_unreg_dev:
+ video_unregister_device(vfd);
+ return ret;
+}
+
+static int hantro_add_enc_func(struct hantro_dev *vpu)
+{
+ if (!vpu->variant->enc_fmts)
+ return 0;
+
+ return hantro_add_func(vpu, MEDIA_ENT_F_PROC_VIDEO_ENCODER);
+}
+
+static int hantro_add_dec_func(struct hantro_dev *vpu)
+{
+ if (!vpu->variant->dec_fmts)
+ return 0;
+
+ return hantro_add_func(vpu, MEDIA_ENT_F_PROC_VIDEO_DECODER);
+}
+
+static void hantro_remove_func(struct hantro_dev *vpu,
+ unsigned int funcid)
+{
+ struct hantro_func *func;
+
+ if (funcid == MEDIA_ENT_F_PROC_VIDEO_ENCODER)
+ func = vpu->encoder;
+ else
+ func = vpu->decoder;
+
+ if (!func)
+ return;
+
+ hantro_detach_func(func);
+ video_unregister_device(&func->vdev);
+}
+
+static void hantro_remove_enc_func(struct hantro_dev *vpu)
+{
+ hantro_remove_func(vpu, MEDIA_ENT_F_PROC_VIDEO_ENCODER);
+}
+
+static void hantro_remove_dec_func(struct hantro_dev *vpu)
+{
+ hantro_remove_func(vpu, MEDIA_ENT_F_PROC_VIDEO_DECODER);
+}
+
+static const struct media_device_ops hantro_m2m_media_ops = {
+ .req_validate = vb2_request_validate,
+ .req_queue = v4l2_m2m_request_queue,
+};
+
+static int hantro_probe(struct platform_device *pdev)
+{
+ const struct of_device_id *match;
+ struct hantro_dev *vpu;
+ struct resource *res;
+ int num_bases;
+ int i, ret;
+
+ vpu = devm_kzalloc(&pdev->dev, sizeof(*vpu), GFP_KERNEL);
+ if (!vpu)
+ return -ENOMEM;
+
+ vpu->dev = &pdev->dev;
+ vpu->pdev = pdev;
+ mutex_init(&vpu->vpu_mutex);
+ spin_lock_init(&vpu->irqlock);
+
+ match = of_match_node(of_hantro_match, pdev->dev.of_node);
+ vpu->variant = match->data;
+
+ /*
+ * Support for nxp,imx8mq-vpu is kept for backwards compatibility
+ * but it's deprecated. Please update your DTS file to use
+ * nxp,imx8mq-vpu-g1 or nxp,imx8mq-vpu-g2 instead.
+ */
+ if (of_device_is_compatible(pdev->dev.of_node, "nxp,imx8mq-vpu"))
+ dev_warn(&pdev->dev, "%s compatible is deprecated\n",
+ match->compatible);
+
+ INIT_DELAYED_WORK(&vpu->watchdog_work, hantro_watchdog);
+
+ vpu->clocks = devm_kcalloc(&pdev->dev, vpu->variant->num_clocks,
+ sizeof(*vpu->clocks), GFP_KERNEL);
+ if (!vpu->clocks)
+ return -ENOMEM;
+
+ if (vpu->variant->num_clocks > 1) {
+ for (i = 0; i < vpu->variant->num_clocks; i++)
+ vpu->clocks[i].id = vpu->variant->clk_names[i];
+
+ ret = devm_clk_bulk_get(&pdev->dev, vpu->variant->num_clocks,
+ vpu->clocks);
+ if (ret)
+ return ret;
+ } else {
+ /*
+ * If the driver has a single clk, chances are there will be no
+ * actual name in the DT bindings.
+ */
+ vpu->clocks[0].clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(vpu->clocks[0].clk))
+ return PTR_ERR(vpu->clocks[0].clk);
+ }
+
+ vpu->resets = devm_reset_control_array_get(&pdev->dev, false, true);
+ if (IS_ERR(vpu->resets))
+ return PTR_ERR(vpu->resets);
+
+ num_bases = vpu->variant->num_regs ?: 1;
+ vpu->reg_bases = devm_kcalloc(&pdev->dev, num_bases,
+ sizeof(*vpu->reg_bases), GFP_KERNEL);
+ if (!vpu->reg_bases)
+ return -ENOMEM;
+
+ for (i = 0; i < num_bases; i++) {
+ res = vpu->variant->reg_names ?
+ platform_get_resource_byname(vpu->pdev, IORESOURCE_MEM,
+ vpu->variant->reg_names[i]) :
+ platform_get_resource(vpu->pdev, IORESOURCE_MEM, 0);
+ vpu->reg_bases[i] = devm_ioremap_resource(vpu->dev, res);
+ if (IS_ERR(vpu->reg_bases[i]))
+ return PTR_ERR(vpu->reg_bases[i]);
+ }
+ vpu->enc_base = vpu->reg_bases[0] + vpu->variant->enc_offset;
+ vpu->dec_base = vpu->reg_bases[0] + vpu->variant->dec_offset;
+
+ /**
+ * TODO: Eventually allow taking advantage of full 64-bit address space.
+ * Until then we assume the MSB portion of buffers' base addresses is
+ * always 0 due to this masking operation.
+ */
+ ret = dma_set_coherent_mask(vpu->dev, DMA_BIT_MASK(32));
+ if (ret) {
+ dev_err(vpu->dev, "Could not set DMA coherent mask.\n");
+ return ret;
+ }
+ vb2_dma_contig_set_max_seg_size(&pdev->dev, DMA_BIT_MASK(32));
+
+ for (i = 0; i < vpu->variant->num_irqs; i++) {
+ const char *irq_name;
+ int irq;
+
+ if (!vpu->variant->irqs[i].handler)
+ continue;
+
+ if (vpu->variant->num_irqs > 1) {
+ irq_name = vpu->variant->irqs[i].name;
+ irq = platform_get_irq_byname(vpu->pdev, irq_name);
+ } else {
+ /*
+ * If the driver has a single IRQ, chances are there
+ * will be no actual name in the DT bindings.
+ */
+ irq_name = "default";
+ irq = platform_get_irq(vpu->pdev, 0);
+ }
+ if (irq <= 0)
+ return -ENXIO;
+
+ ret = devm_request_irq(vpu->dev, irq,
+ vpu->variant->irqs[i].handler, 0,
+ dev_name(vpu->dev), vpu);
+ if (ret) {
+ dev_err(vpu->dev, "Could not request %s IRQ.\n",
+ irq_name);
+ return ret;
+ }
+ }
+
+ if (vpu->variant->init) {
+ ret = vpu->variant->init(vpu);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to init VPU hardware\n");
+ return ret;
+ }
+ }
+
+ pm_runtime_set_autosuspend_delay(vpu->dev, 100);
+ pm_runtime_use_autosuspend(vpu->dev);
+ pm_runtime_enable(vpu->dev);
+
+ ret = reset_control_deassert(vpu->resets);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to deassert resets\n");
+ goto err_pm_disable;
+ }
+
+ ret = clk_bulk_prepare(vpu->variant->num_clocks, vpu->clocks);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to prepare clocks\n");
+ goto err_rst_assert;
+ }
+
+ ret = v4l2_device_register(&pdev->dev, &vpu->v4l2_dev);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to register v4l2 device\n");
+ goto err_clk_unprepare;
+ }
+ platform_set_drvdata(pdev, vpu);
+
+ vpu->m2m_dev = v4l2_m2m_init(&vpu_m2m_ops);
+ if (IS_ERR(vpu->m2m_dev)) {
+ v4l2_err(&vpu->v4l2_dev, "Failed to init mem2mem device\n");
+ ret = PTR_ERR(vpu->m2m_dev);
+ goto err_v4l2_unreg;
+ }
+
+ vpu->mdev.dev = vpu->dev;
+ strscpy(vpu->mdev.model, DRIVER_NAME, sizeof(vpu->mdev.model));
+ strscpy(vpu->mdev.bus_info, "platform: " DRIVER_NAME,
+ sizeof(vpu->mdev.bus_info));
+ media_device_init(&vpu->mdev);
+ vpu->mdev.ops = &hantro_m2m_media_ops;
+ vpu->v4l2_dev.mdev = &vpu->mdev;
+
+ ret = hantro_add_enc_func(vpu);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to register encoder\n");
+ goto err_m2m_rel;
+ }
+
+ ret = hantro_add_dec_func(vpu);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to register decoder\n");
+ goto err_rm_enc_func;
+ }
+
+ ret = media_device_register(&vpu->mdev);
+ if (ret) {
+ v4l2_err(&vpu->v4l2_dev, "Failed to register mem2mem media device\n");
+ goto err_rm_dec_func;
+ }
+
+ return 0;
+
+err_rm_dec_func:
+ hantro_remove_dec_func(vpu);
+err_rm_enc_func:
+ hantro_remove_enc_func(vpu);
+err_m2m_rel:
+ media_device_cleanup(&vpu->mdev);
+ v4l2_m2m_release(vpu->m2m_dev);
+err_v4l2_unreg:
+ v4l2_device_unregister(&vpu->v4l2_dev);
+err_clk_unprepare:
+ clk_bulk_unprepare(vpu->variant->num_clocks, vpu->clocks);
+err_rst_assert:
+ reset_control_assert(vpu->resets);
+err_pm_disable:
+ pm_runtime_dont_use_autosuspend(vpu->dev);
+ pm_runtime_disable(vpu->dev);
+ return ret;
+}
+
+static int hantro_remove(struct platform_device *pdev)
+{
+ struct hantro_dev *vpu = platform_get_drvdata(pdev);
+
+ v4l2_info(&vpu->v4l2_dev, "Removing %s\n", pdev->name);
+
+ media_device_unregister(&vpu->mdev);
+ hantro_remove_dec_func(vpu);
+ hantro_remove_enc_func(vpu);
+ media_device_cleanup(&vpu->mdev);
+ v4l2_m2m_release(vpu->m2m_dev);
+ v4l2_device_unregister(&vpu->v4l2_dev);
+ clk_bulk_unprepare(vpu->variant->num_clocks, vpu->clocks);
+ reset_control_assert(vpu->resets);
+ pm_runtime_dont_use_autosuspend(vpu->dev);
+ pm_runtime_disable(vpu->dev);
+ return 0;
+}
+
+#ifdef CONFIG_PM
+static int hantro_runtime_resume(struct device *dev)
+{
+ struct hantro_dev *vpu = dev_get_drvdata(dev);
+
+ if (vpu->variant->runtime_resume)
+ return vpu->variant->runtime_resume(vpu);
+
+ return 0;
+}
+#endif
+
+static const struct dev_pm_ops hantro_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+ pm_runtime_force_resume)
+ SET_RUNTIME_PM_OPS(NULL, hantro_runtime_resume, NULL)
+};
+
+static struct platform_driver hantro_driver = {
+ .probe = hantro_probe,
+ .remove = hantro_remove,
+ .driver = {
+ .name = DRIVER_NAME,
+ .of_match_table = of_match_ptr(of_hantro_match),
+ .pm = &hantro_pm_ops,
+ },
+};
+module_platform_driver(hantro_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Alpha Lin <Alpha.Lin@Rock-Chips.com>");
+MODULE_AUTHOR("Tomasz Figa <tfiga@chromium.org>");
+MODULE_AUTHOR("Ezequiel Garcia <ezequiel@collabora.com>");
+MODULE_DESCRIPTION("Hantro VPU codec driver");
diff --git a/drivers/media/platform/verisilicon/hantro_g1.c b/drivers/media/platform/verisilicon/hantro_g1.c
new file mode 100644
index 000000000..0ab1cee62
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_g1.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hantro VPU codec driver
+ *
+ * Copyright (C) 2018 Rockchip Electronics Co., Ltd.
+ * Jeffy Chen <jeffy.chen@rock-chips.com>
+ * Copyright (C) 2019 Pengutronix, Philipp Zabel <kernel@pengutronix.de>
+ * Copyright (C) 2021 Collabora Ltd, Emil Velikov <emil.velikov@collabora.com>
+ */
+
+#include "hantro.h"
+#include "hantro_g1_regs.h"
+
+irqreturn_t hantro_g1_irq(int irq, void *dev_id)
+{
+ struct hantro_dev *vpu = dev_id;
+ enum vb2_buffer_state state;
+ u32 status;
+
+ status = vdpu_read(vpu, G1_REG_INTERRUPT);
+ state = (status & G1_REG_INTERRUPT_DEC_RDY_INT) ?
+ VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR;
+
+ vdpu_write(vpu, 0, G1_REG_INTERRUPT);
+ vdpu_write(vpu, G1_REG_CONFIG_DEC_CLK_GATE_E, G1_REG_CONFIG);
+
+ hantro_irq_done(vpu, state);
+
+ return IRQ_HANDLED;
+}
+
+void hantro_g1_reset(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+
+ vdpu_write(vpu, G1_REG_INTERRUPT_DEC_IRQ_DIS, G1_REG_INTERRUPT);
+ vdpu_write(vpu, G1_REG_CONFIG_DEC_CLK_GATE_E, G1_REG_CONFIG);
+ vdpu_write(vpu, 1, G1_REG_SOFT_RESET);
+}
diff --git a/drivers/media/platform/verisilicon/hantro_g1_h264_dec.c b/drivers/media/platform/verisilicon/hantro_g1_h264_dec.c
new file mode 100644
index 000000000..9de7f05ef
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_g1_h264_dec.c
@@ -0,0 +1,284 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Rockchip RK3288 VPU codec driver
+ *
+ * Copyright (c) 2014 Rockchip Electronics Co., Ltd.
+ * Hertz Wong <hertz.wong@rock-chips.com>
+ * Herman Chen <herman.chen@rock-chips.com>
+ *
+ * Copyright (C) 2014 Google, Inc.
+ * Tomasz Figa <tfiga@chromium.org>
+ */
+
+#include <linux/types.h>
+#include <linux/sort.h>
+
+#include <media/v4l2-mem2mem.h>
+
+#include "hantro_g1_regs.h"
+#include "hantro_hw.h"
+#include "hantro_v4l2.h"
+
+static void set_params(struct hantro_ctx *ctx, struct vb2_v4l2_buffer *src_buf)
+{
+ const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls;
+ const struct v4l2_ctrl_h264_decode_params *dec_param = ctrls->decode;
+ const struct v4l2_ctrl_h264_sps *sps = ctrls->sps;
+ const struct v4l2_ctrl_h264_pps *pps = ctrls->pps;
+ struct hantro_dev *vpu = ctx->dev;
+ u32 reg;
+
+ /* Decoder control register 0. */
+ reg = G1_REG_DEC_CTRL0_DEC_AXI_AUTO;
+ if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)
+ reg |= G1_REG_DEC_CTRL0_SEQ_MBAFF_E;
+ if (sps->profile_idc > 66) {
+ reg |= G1_REG_DEC_CTRL0_PICORD_COUNT_E;
+ if (dec_param->nal_ref_idc)
+ reg |= G1_REG_DEC_CTRL0_WRITE_MVS_E;
+ }
+
+ if (!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY) &&
+ (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD ||
+ dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC))
+ reg |= G1_REG_DEC_CTRL0_PIC_INTERLACE_E;
+ if (dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC)
+ reg |= G1_REG_DEC_CTRL0_PIC_FIELDMODE_E;
+ if (!(dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD))
+ reg |= G1_REG_DEC_CTRL0_PIC_TOPFIELD_E;
+ vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL0);
+
+ /* Decoder control register 1. */
+ reg = G1_REG_DEC_CTRL1_PIC_MB_WIDTH(MB_WIDTH(ctx->src_fmt.width)) |
+ G1_REG_DEC_CTRL1_PIC_MB_HEIGHT_P(MB_HEIGHT(ctx->src_fmt.height)) |
+ G1_REG_DEC_CTRL1_REF_FRAMES(sps->max_num_ref_frames);
+ vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL1);
+
+ /* Decoder control register 2. */
+ reg = G1_REG_DEC_CTRL2_CH_QP_OFFSET(pps->chroma_qp_index_offset) |
+ G1_REG_DEC_CTRL2_CH_QP_OFFSET2(pps->second_chroma_qp_index_offset);
+
+ if (pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT)
+ reg |= G1_REG_DEC_CTRL2_TYPE1_QUANT_E;
+ if (!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY))
+ reg |= G1_REG_DEC_CTRL2_FIELDPIC_FLAG_E;
+ vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL2);
+
+ /* Decoder control register 3. */
+ reg = G1_REG_DEC_CTRL3_START_CODE_E |
+ G1_REG_DEC_CTRL3_INIT_QP(pps->pic_init_qp_minus26 + 26) |
+ G1_REG_DEC_CTRL3_STREAM_LEN(vb2_get_plane_payload(&src_buf->vb2_buf, 0));
+ vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL3);
+
+ /* Decoder control register 4. */
+ reg = G1_REG_DEC_CTRL4_FRAMENUM_LEN(sps->log2_max_frame_num_minus4 + 4) |
+ G1_REG_DEC_CTRL4_FRAMENUM(dec_param->frame_num) |
+ G1_REG_DEC_CTRL4_WEIGHT_BIPR_IDC(pps->weighted_bipred_idc);
+ if (pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE)
+ reg |= G1_REG_DEC_CTRL4_CABAC_E;
+ if (sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE)
+ reg |= G1_REG_DEC_CTRL4_DIR_8X8_INFER_E;
+ if (sps->profile_idc >= 100 && sps->chroma_format_idc == 0)
+ reg |= G1_REG_DEC_CTRL4_BLACKWHITE_E;
+ if (pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED)
+ reg |= G1_REG_DEC_CTRL4_WEIGHT_PRED_E;
+ vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL4);
+
+ /* Decoder control register 5. */
+ reg = G1_REG_DEC_CTRL5_REFPIC_MK_LEN(dec_param->dec_ref_pic_marking_bit_size) |
+ G1_REG_DEC_CTRL5_IDR_PIC_ID(dec_param->idr_pic_id);
+ if (pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED)
+ reg |= G1_REG_DEC_CTRL5_CONST_INTRA_E;
+ if (pps->flags & V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT)
+ reg |= G1_REG_DEC_CTRL5_FILT_CTRL_PRES;
+ if (pps->flags & V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT)
+ reg |= G1_REG_DEC_CTRL5_RDPIC_CNT_PRES;
+ if (pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE)
+ reg |= G1_REG_DEC_CTRL5_8X8TRANS_FLAG_E;
+ if (dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC)
+ reg |= G1_REG_DEC_CTRL5_IDR_PIC_E;
+ vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL5);
+
+ /* Decoder control register 6. */
+ reg = G1_REG_DEC_CTRL6_PPS_ID(pps->pic_parameter_set_id) |
+ G1_REG_DEC_CTRL6_REFIDX0_ACTIVE(pps->num_ref_idx_l0_default_active_minus1 + 1) |
+ G1_REG_DEC_CTRL6_REFIDX1_ACTIVE(pps->num_ref_idx_l1_default_active_minus1 + 1) |
+ G1_REG_DEC_CTRL6_POC_LENGTH(dec_param->pic_order_cnt_bit_size);
+ vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL6);
+
+ /* Error concealment register. */
+ vdpu_write_relaxed(vpu, 0, G1_REG_ERR_CONC);
+
+ /* Prediction filter tap register. */
+ vdpu_write_relaxed(vpu,
+ G1_REG_PRED_FLT_PRED_BC_TAP_0_0(1) |
+ G1_REG_PRED_FLT_PRED_BC_TAP_0_1(-5 & 0x3ff) |
+ G1_REG_PRED_FLT_PRED_BC_TAP_0_2(20),
+ G1_REG_PRED_FLT);
+
+ /* Reference picture buffer control register. */
+ vdpu_write_relaxed(vpu, 0, G1_REG_REF_BUF_CTRL);
+
+ /* Reference picture buffer control register 2. */
+ vdpu_write_relaxed(vpu, G1_REG_REF_BUF_CTRL2_APF_THRESHOLD(8),
+ G1_REG_REF_BUF_CTRL2);
+}
+
+static void set_ref(struct hantro_ctx *ctx)
+{
+ const struct v4l2_h264_reference *b0_reflist, *b1_reflist, *p_reflist;
+ struct hantro_dev *vpu = ctx->dev;
+ int reg_num;
+ u32 reg;
+ int i;
+
+ vdpu_write_relaxed(vpu, ctx->h264_dec.dpb_valid, G1_REG_VALID_REF);
+ vdpu_write_relaxed(vpu, ctx->h264_dec.dpb_longterm, G1_REG_LT_REF);
+
+ /*
+ * Set up reference frame picture numbers.
+ *
+ * Each G1_REG_REF_PIC(x) register contains numbers of two
+ * subsequential reference pictures.
+ */
+ for (i = 0; i < HANTRO_H264_DPB_SIZE; i += 2) {
+ reg = G1_REG_REF_PIC_REFER0_NBR(hantro_h264_get_ref_nbr(ctx, i)) |
+ G1_REG_REF_PIC_REFER1_NBR(hantro_h264_get_ref_nbr(ctx, i + 1));
+ vdpu_write_relaxed(vpu, reg, G1_REG_REF_PIC(i / 2));
+ }
+
+ b0_reflist = ctx->h264_dec.reflists.b0;
+ b1_reflist = ctx->h264_dec.reflists.b1;
+ p_reflist = ctx->h264_dec.reflists.p;
+
+ /*
+ * Each G1_REG_BD_REF_PIC(x) register contains three entries
+ * of each forward and backward picture list.
+ */
+ reg_num = 0;
+ for (i = 0; i < 15; i += 3) {
+ reg = G1_REG_BD_REF_PIC_BINIT_RLIST_F0(b0_reflist[i].index) |
+ G1_REG_BD_REF_PIC_BINIT_RLIST_F1(b0_reflist[i + 1].index) |
+ G1_REG_BD_REF_PIC_BINIT_RLIST_F2(b0_reflist[i + 2].index) |
+ G1_REG_BD_REF_PIC_BINIT_RLIST_B0(b1_reflist[i].index) |
+ G1_REG_BD_REF_PIC_BINIT_RLIST_B1(b1_reflist[i + 1].index) |
+ G1_REG_BD_REF_PIC_BINIT_RLIST_B2(b1_reflist[i + 2].index);
+ vdpu_write_relaxed(vpu, reg, G1_REG_BD_REF_PIC(reg_num++));
+ }
+
+ /*
+ * G1_REG_BD_P_REF_PIC register contains last entries (index 15)
+ * of forward and backward reference picture lists and first 4 entries
+ * of P forward picture list.
+ */
+ reg = G1_REG_BD_P_REF_PIC_BINIT_RLIST_F15(b0_reflist[15].index) |
+ G1_REG_BD_P_REF_PIC_BINIT_RLIST_B15(b1_reflist[15].index) |
+ G1_REG_BD_P_REF_PIC_PINIT_RLIST_F0(p_reflist[0].index) |
+ G1_REG_BD_P_REF_PIC_PINIT_RLIST_F1(p_reflist[1].index) |
+ G1_REG_BD_P_REF_PIC_PINIT_RLIST_F2(p_reflist[2].index) |
+ G1_REG_BD_P_REF_PIC_PINIT_RLIST_F3(p_reflist[3].index);
+ vdpu_write_relaxed(vpu, reg, G1_REG_BD_P_REF_PIC);
+
+ /*
+ * Each G1_REG_FWD_PIC(x) register contains six consecutive
+ * entries of P forward picture list, starting from index 4.
+ */
+ reg_num = 0;
+ for (i = 4; i < HANTRO_H264_DPB_SIZE; i += 6) {
+ reg = G1_REG_FWD_PIC_PINIT_RLIST_F0(p_reflist[i].index) |
+ G1_REG_FWD_PIC_PINIT_RLIST_F1(p_reflist[i + 1].index) |
+ G1_REG_FWD_PIC_PINIT_RLIST_F2(p_reflist[i + 2].index) |
+ G1_REG_FWD_PIC_PINIT_RLIST_F3(p_reflist[i + 3].index) |
+ G1_REG_FWD_PIC_PINIT_RLIST_F4(p_reflist[i + 4].index) |
+ G1_REG_FWD_PIC_PINIT_RLIST_F5(p_reflist[i + 5].index);
+ vdpu_write_relaxed(vpu, reg, G1_REG_FWD_PIC(reg_num++));
+ }
+
+ /* Set up addresses of DPB buffers. */
+ for (i = 0; i < HANTRO_H264_DPB_SIZE; i++) {
+ dma_addr_t dma_addr = hantro_h264_get_ref_buf(ctx, i);
+
+ vdpu_write_relaxed(vpu, dma_addr, G1_REG_ADDR_REF(i));
+ }
+}
+
+static void set_buffers(struct hantro_ctx *ctx, struct vb2_v4l2_buffer *src_buf)
+{
+ const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls;
+ struct vb2_v4l2_buffer *dst_buf;
+ struct hantro_dev *vpu = ctx->dev;
+ dma_addr_t src_dma, dst_dma;
+ size_t offset = 0;
+
+ /* Source (stream) buffer. */
+ src_dma = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
+ vdpu_write_relaxed(vpu, src_dma, G1_REG_ADDR_STR);
+
+ /* Destination (decoded frame) buffer. */
+ dst_buf = hantro_get_dst_buf(ctx);
+ dst_dma = hantro_get_dec_buf_addr(ctx, &dst_buf->vb2_buf);
+ /* Adjust dma addr to start at second line for bottom field */
+ if (ctrls->decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD)
+ offset = ALIGN(ctx->src_fmt.width, MB_DIM);
+ vdpu_write_relaxed(vpu, dst_dma + offset, G1_REG_ADDR_DST);
+
+ /* Higher profiles require DMV buffer appended to reference frames. */
+ if (ctrls->sps->profile_idc > 66 && ctrls->decode->nal_ref_idc) {
+ unsigned int bytes_per_mb = 384;
+
+ /* DMV buffer for monochrome start directly after Y-plane */
+ if (ctrls->sps->profile_idc >= 100 &&
+ ctrls->sps->chroma_format_idc == 0)
+ bytes_per_mb = 256;
+ offset = bytes_per_mb * MB_WIDTH(ctx->src_fmt.width) *
+ MB_HEIGHT(ctx->src_fmt.height);
+
+ /*
+ * DMV buffer is split in two for field encoded frames,
+ * adjust offset for bottom field
+ */
+ if (ctrls->decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD)
+ offset += 32 * MB_WIDTH(ctx->src_fmt.width) *
+ MB_HEIGHT(ctx->src_fmt.height);
+ vdpu_write_relaxed(vpu, dst_dma + offset, G1_REG_ADDR_DIR_MV);
+ }
+
+ /* Auxiliary buffer prepared in hantro_g1_h264_dec_prepare_table(). */
+ vdpu_write_relaxed(vpu, ctx->h264_dec.priv.dma, G1_REG_ADDR_QTABLE);
+}
+
+int hantro_g1_h264_dec_run(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ struct vb2_v4l2_buffer *src_buf;
+ int ret;
+
+ /* Prepare the H264 decoder context. */
+ ret = hantro_h264_dec_prepare_run(ctx);
+ if (ret)
+ return ret;
+
+ /* Configure hardware registers. */
+ src_buf = hantro_get_src_buf(ctx);
+ set_params(ctx, src_buf);
+ set_ref(ctx);
+ set_buffers(ctx, src_buf);
+
+ hantro_end_prepare_run(ctx);
+
+ /* Start decoding! */
+ vdpu_write_relaxed(vpu,
+ G1_REG_CONFIG_DEC_AXI_RD_ID(0xffu) |
+ G1_REG_CONFIG_DEC_TIMEOUT_E |
+ G1_REG_CONFIG_DEC_OUT_ENDIAN |
+ G1_REG_CONFIG_DEC_STRENDIAN_E |
+ G1_REG_CONFIG_DEC_MAX_BURST(16) |
+ G1_REG_CONFIG_DEC_OUTSWAP32_E |
+ G1_REG_CONFIG_DEC_INSWAP32_E |
+ G1_REG_CONFIG_DEC_STRSWAP32_E |
+ G1_REG_CONFIG_DEC_CLK_GATE_E,
+ G1_REG_CONFIG);
+ vdpu_write(vpu, G1_REG_INTERRUPT_DEC_E, G1_REG_INTERRUPT);
+
+ return 0;
+}
diff --git a/drivers/media/platform/verisilicon/hantro_g1_mpeg2_dec.c b/drivers/media/platform/verisilicon/hantro_g1_mpeg2_dec.c
new file mode 100644
index 000000000..9aea331e1
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_g1_mpeg2_dec.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hantro VPU codec driver
+ *
+ * Copyright (C) 2018 Rockchip Electronics Co., Ltd.
+ */
+
+#include <asm/unaligned.h>
+#include <linux/bitfield.h>
+#include <media/v4l2-mem2mem.h>
+#include "hantro.h"
+#include "hantro_hw.h"
+#include "hantro_g1_regs.h"
+
+#define G1_SWREG(nr) ((nr) * 4)
+
+#define G1_REG_RLC_VLC_BASE G1_SWREG(12)
+#define G1_REG_DEC_OUT_BASE G1_SWREG(13)
+#define G1_REG_REFER0_BASE G1_SWREG(14)
+#define G1_REG_REFER1_BASE G1_SWREG(15)
+#define G1_REG_REFER2_BASE G1_SWREG(16)
+#define G1_REG_REFER3_BASE G1_SWREG(17)
+#define G1_REG_QTABLE_BASE G1_SWREG(40)
+
+#define G1_REG_DEC_AXI_RD_ID(v) (((v) << 24) & GENMASK(31, 24))
+#define G1_REG_DEC_TIMEOUT_E(v) ((v) ? BIT(23) : 0)
+#define G1_REG_DEC_STRSWAP32_E(v) ((v) ? BIT(22) : 0)
+#define G1_REG_DEC_STRENDIAN_E(v) ((v) ? BIT(21) : 0)
+#define G1_REG_DEC_INSWAP32_E(v) ((v) ? BIT(20) : 0)
+#define G1_REG_DEC_OUTSWAP32_E(v) ((v) ? BIT(19) : 0)
+#define G1_REG_DEC_DATA_DISC_E(v) ((v) ? BIT(18) : 0)
+#define G1_REG_DEC_LATENCY(v) (((v) << 11) & GENMASK(16, 11))
+#define G1_REG_DEC_CLK_GATE_E(v) ((v) ? BIT(10) : 0)
+#define G1_REG_DEC_IN_ENDIAN(v) ((v) ? BIT(9) : 0)
+#define G1_REG_DEC_OUT_ENDIAN(v) ((v) ? BIT(8) : 0)
+#define G1_REG_DEC_ADV_PRE_DIS(v) ((v) ? BIT(6) : 0)
+#define G1_REG_DEC_SCMD_DIS(v) ((v) ? BIT(5) : 0)
+#define G1_REG_DEC_MAX_BURST(v) (((v) << 0) & GENMASK(4, 0))
+
+#define G1_REG_DEC_MODE(v) (((v) << 28) & GENMASK(31, 28))
+#define G1_REG_RLC_MODE_E(v) ((v) ? BIT(27) : 0)
+#define G1_REG_PIC_INTERLACE_E(v) ((v) ? BIT(23) : 0)
+#define G1_REG_PIC_FIELDMODE_E(v) ((v) ? BIT(22) : 0)
+#define G1_REG_PIC_B_E(v) ((v) ? BIT(21) : 0)
+#define G1_REG_PIC_INTER_E(v) ((v) ? BIT(20) : 0)
+#define G1_REG_PIC_TOPFIELD_E(v) ((v) ? BIT(19) : 0)
+#define G1_REG_FWD_INTERLACE_E(v) ((v) ? BIT(18) : 0)
+#define G1_REG_FILTERING_DIS(v) ((v) ? BIT(14) : 0)
+#define G1_REG_WRITE_MVS_E(v) ((v) ? BIT(12) : 0)
+#define G1_REG_DEC_AXI_WR_ID(v) (((v) << 0) & GENMASK(7, 0))
+
+#define G1_REG_PIC_MB_WIDTH(v) (((v) << 23) & GENMASK(31, 23))
+#define G1_REG_PIC_MB_HEIGHT_P(v) (((v) << 11) & GENMASK(18, 11))
+#define G1_REG_ALT_SCAN_E(v) ((v) ? BIT(6) : 0)
+#define G1_REG_TOPFIELDFIRST_E(v) ((v) ? BIT(5) : 0)
+
+#define G1_REG_STRM_START_BIT(v) (((v) << 26) & GENMASK(31, 26))
+#define G1_REG_QSCALE_TYPE(v) ((v) ? BIT(24) : 0)
+#define G1_REG_CON_MV_E(v) ((v) ? BIT(4) : 0)
+#define G1_REG_INTRA_DC_PREC(v) (((v) << 2) & GENMASK(3, 2))
+#define G1_REG_INTRA_VLC_TAB(v) ((v) ? BIT(1) : 0)
+#define G1_REG_FRAME_PRED_DCT(v) ((v) ? BIT(0) : 0)
+
+#define G1_REG_INIT_QP(v) (((v) << 25) & GENMASK(30, 25))
+#define G1_REG_STREAM_LEN(v) (((v) << 0) & GENMASK(23, 0))
+
+#define G1_REG_ALT_SCAN_FLAG_E(v) ((v) ? BIT(19) : 0)
+#define G1_REG_FCODE_FWD_HOR(v) (((v) << 15) & GENMASK(18, 15))
+#define G1_REG_FCODE_FWD_VER(v) (((v) << 11) & GENMASK(14, 11))
+#define G1_REG_FCODE_BWD_HOR(v) (((v) << 7) & GENMASK(10, 7))
+#define G1_REG_FCODE_BWD_VER(v) (((v) << 3) & GENMASK(6, 3))
+#define G1_REG_MV_ACCURACY_FWD(v) ((v) ? BIT(2) : 0)
+#define G1_REG_MV_ACCURACY_BWD(v) ((v) ? BIT(1) : 0)
+
+#define G1_REG_STARTMB_X(v) (((v) << 23) & GENMASK(31, 23))
+#define G1_REG_STARTMB_Y(v) (((v) << 15) & GENMASK(22, 15))
+
+#define G1_REG_APF_THRESHOLD(v) (((v) << 0) & GENMASK(13, 0))
+
+static void
+hantro_g1_mpeg2_dec_set_quantisation(struct hantro_dev *vpu,
+ struct hantro_ctx *ctx)
+{
+ struct v4l2_ctrl_mpeg2_quantisation *q;
+
+ q = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_MPEG2_QUANTISATION);
+ hantro_mpeg2_dec_copy_qtable(ctx->mpeg2_dec.qtable.cpu, q);
+ vdpu_write_relaxed(vpu, ctx->mpeg2_dec.qtable.dma, G1_REG_QTABLE_BASE);
+}
+
+static void
+hantro_g1_mpeg2_dec_set_buffers(struct hantro_dev *vpu, struct hantro_ctx *ctx,
+ struct vb2_buffer *src_buf,
+ struct vb2_buffer *dst_buf,
+ const struct v4l2_ctrl_mpeg2_sequence *seq,
+ const struct v4l2_ctrl_mpeg2_picture *pic)
+{
+ dma_addr_t forward_addr = 0, backward_addr = 0;
+ dma_addr_t current_addr, addr;
+
+ switch (pic->picture_coding_type) {
+ case V4L2_MPEG2_PIC_CODING_TYPE_B:
+ backward_addr = hantro_get_ref(ctx, pic->backward_ref_ts);
+ fallthrough;
+ case V4L2_MPEG2_PIC_CODING_TYPE_P:
+ forward_addr = hantro_get_ref(ctx, pic->forward_ref_ts);
+ }
+
+ /* Source bitstream buffer */
+ addr = vb2_dma_contig_plane_dma_addr(src_buf, 0);
+ vdpu_write_relaxed(vpu, addr, G1_REG_RLC_VLC_BASE);
+
+ /* Destination frame buffer */
+ addr = hantro_get_dec_buf_addr(ctx, dst_buf);
+ current_addr = addr;
+
+ if (pic->picture_structure == V4L2_MPEG2_PIC_BOTTOM_FIELD)
+ addr += ALIGN(ctx->dst_fmt.width, 16);
+ vdpu_write_relaxed(vpu, addr, G1_REG_DEC_OUT_BASE);
+
+ if (!forward_addr)
+ forward_addr = current_addr;
+ if (!backward_addr)
+ backward_addr = current_addr;
+
+ /* Set forward ref frame (top/bottom field) */
+ if (pic->picture_structure == V4L2_MPEG2_PIC_FRAME ||
+ pic->picture_coding_type == V4L2_MPEG2_PIC_CODING_TYPE_B ||
+ (pic->picture_structure == V4L2_MPEG2_PIC_TOP_FIELD &&
+ pic->flags & V4L2_MPEG2_PIC_FLAG_TOP_FIELD_FIRST) ||
+ (pic->picture_structure == V4L2_MPEG2_PIC_BOTTOM_FIELD &&
+ !(pic->flags & V4L2_MPEG2_PIC_FLAG_TOP_FIELD_FIRST))) {
+ vdpu_write_relaxed(vpu, forward_addr, G1_REG_REFER0_BASE);
+ vdpu_write_relaxed(vpu, forward_addr, G1_REG_REFER1_BASE);
+ } else if (pic->picture_structure == V4L2_MPEG2_PIC_TOP_FIELD) {
+ vdpu_write_relaxed(vpu, forward_addr, G1_REG_REFER0_BASE);
+ vdpu_write_relaxed(vpu, current_addr, G1_REG_REFER1_BASE);
+ } else if (pic->picture_structure == V4L2_MPEG2_PIC_BOTTOM_FIELD) {
+ vdpu_write_relaxed(vpu, current_addr, G1_REG_REFER0_BASE);
+ vdpu_write_relaxed(vpu, forward_addr, G1_REG_REFER1_BASE);
+ }
+
+ /* Set backward ref frame (top/bottom field) */
+ vdpu_write_relaxed(vpu, backward_addr, G1_REG_REFER2_BASE);
+ vdpu_write_relaxed(vpu, backward_addr, G1_REG_REFER3_BASE);
+}
+
+int hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ struct vb2_v4l2_buffer *src_buf, *dst_buf;
+ const struct v4l2_ctrl_mpeg2_sequence *seq;
+ const struct v4l2_ctrl_mpeg2_picture *pic;
+ u32 reg;
+
+ src_buf = hantro_get_src_buf(ctx);
+ dst_buf = hantro_get_dst_buf(ctx);
+
+ /* Apply request controls if any */
+ hantro_start_prepare_run(ctx);
+
+ seq = hantro_get_ctrl(ctx,
+ V4L2_CID_STATELESS_MPEG2_SEQUENCE);
+ pic = hantro_get_ctrl(ctx,
+ V4L2_CID_STATELESS_MPEG2_PICTURE);
+
+ reg = G1_REG_DEC_AXI_RD_ID(0) |
+ G1_REG_DEC_TIMEOUT_E(1) |
+ G1_REG_DEC_STRSWAP32_E(1) |
+ G1_REG_DEC_STRENDIAN_E(1) |
+ G1_REG_DEC_INSWAP32_E(1) |
+ G1_REG_DEC_OUTSWAP32_E(1) |
+ G1_REG_DEC_DATA_DISC_E(0) |
+ G1_REG_DEC_LATENCY(0) |
+ G1_REG_DEC_CLK_GATE_E(1) |
+ G1_REG_DEC_IN_ENDIAN(1) |
+ G1_REG_DEC_OUT_ENDIAN(1) |
+ G1_REG_DEC_ADV_PRE_DIS(0) |
+ G1_REG_DEC_SCMD_DIS(0) |
+ G1_REG_DEC_MAX_BURST(16);
+ vdpu_write_relaxed(vpu, reg, G1_SWREG(2));
+
+ reg = G1_REG_DEC_MODE(5) |
+ G1_REG_RLC_MODE_E(0) |
+ G1_REG_PIC_INTERLACE_E(!(seq->flags & V4L2_MPEG2_SEQ_FLAG_PROGRESSIVE)) |
+ G1_REG_PIC_FIELDMODE_E(pic->picture_structure != V4L2_MPEG2_PIC_FRAME) |
+ G1_REG_PIC_B_E(pic->picture_coding_type == V4L2_MPEG2_PIC_CODING_TYPE_B) |
+ G1_REG_PIC_INTER_E(pic->picture_coding_type != V4L2_MPEG2_PIC_CODING_TYPE_I) |
+ G1_REG_PIC_TOPFIELD_E(pic->picture_structure == V4L2_MPEG2_PIC_TOP_FIELD) |
+ G1_REG_FWD_INTERLACE_E(0) |
+ G1_REG_FILTERING_DIS(1) |
+ G1_REG_WRITE_MVS_E(0) |
+ G1_REG_DEC_AXI_WR_ID(0);
+ vdpu_write_relaxed(vpu, reg, G1_SWREG(3));
+
+ reg = G1_REG_PIC_MB_WIDTH(MB_WIDTH(ctx->dst_fmt.width)) |
+ G1_REG_PIC_MB_HEIGHT_P(MB_HEIGHT(ctx->dst_fmt.height)) |
+ G1_REG_ALT_SCAN_E(pic->flags & V4L2_MPEG2_PIC_FLAG_ALT_SCAN) |
+ G1_REG_TOPFIELDFIRST_E(pic->flags & V4L2_MPEG2_PIC_FLAG_TOP_FIELD_FIRST);
+ vdpu_write_relaxed(vpu, reg, G1_SWREG(4));
+
+ reg = G1_REG_STRM_START_BIT(0) |
+ G1_REG_QSCALE_TYPE(pic->flags & V4L2_MPEG2_PIC_FLAG_Q_SCALE_TYPE) |
+ G1_REG_CON_MV_E(pic->flags & V4L2_MPEG2_PIC_FLAG_CONCEALMENT_MV) |
+ G1_REG_INTRA_DC_PREC(pic->intra_dc_precision) |
+ G1_REG_INTRA_VLC_TAB(pic->flags & V4L2_MPEG2_PIC_FLAG_INTRA_VLC) |
+ G1_REG_FRAME_PRED_DCT(pic->flags & V4L2_MPEG2_PIC_FLAG_FRAME_PRED_DCT);
+ vdpu_write_relaxed(vpu, reg, G1_SWREG(5));
+
+ reg = G1_REG_INIT_QP(1) |
+ G1_REG_STREAM_LEN(vb2_get_plane_payload(&src_buf->vb2_buf, 0));
+ vdpu_write_relaxed(vpu, reg, G1_SWREG(6));
+
+ reg = G1_REG_ALT_SCAN_FLAG_E(pic->flags & V4L2_MPEG2_PIC_FLAG_ALT_SCAN) |
+ G1_REG_FCODE_FWD_HOR(pic->f_code[0][0]) |
+ G1_REG_FCODE_FWD_VER(pic->f_code[0][1]) |
+ G1_REG_FCODE_BWD_HOR(pic->f_code[1][0]) |
+ G1_REG_FCODE_BWD_VER(pic->f_code[1][1]) |
+ G1_REG_MV_ACCURACY_FWD(1) |
+ G1_REG_MV_ACCURACY_BWD(1);
+ vdpu_write_relaxed(vpu, reg, G1_SWREG(18));
+
+ reg = G1_REG_STARTMB_X(0) |
+ G1_REG_STARTMB_Y(0);
+ vdpu_write_relaxed(vpu, reg, G1_SWREG(48));
+
+ reg = G1_REG_APF_THRESHOLD(8);
+ vdpu_write_relaxed(vpu, reg, G1_SWREG(55));
+
+ hantro_g1_mpeg2_dec_set_quantisation(vpu, ctx);
+ hantro_g1_mpeg2_dec_set_buffers(vpu, ctx, &src_buf->vb2_buf,
+ &dst_buf->vb2_buf,
+ seq, pic);
+
+ hantro_end_prepare_run(ctx);
+
+ vdpu_write(vpu, G1_REG_INTERRUPT_DEC_E, G1_REG_INTERRUPT);
+
+ return 0;
+}
diff --git a/drivers/media/platform/verisilicon/hantro_g1_regs.h b/drivers/media/platform/verisilicon/hantro_g1_regs.h
new file mode 100644
index 000000000..c623b3b0b
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_g1_regs.h
@@ -0,0 +1,356 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Hantro VPU codec driver
+ *
+ * Copyright 2018 Google LLC.
+ * Tomasz Figa <tfiga@chromium.org>
+ */
+
+#ifndef HANTRO_G1_REGS_H_
+#define HANTRO_G1_REGS_H_
+
+#define G1_SWREG(nr) ((nr) * 4)
+
+/* Decoder registers. */
+#define G1_REG_INTERRUPT 0x004
+#define G1_REG_INTERRUPT_DEC_PIC_INF BIT(24)
+#define G1_REG_INTERRUPT_DEC_TIMEOUT BIT(18)
+#define G1_REG_INTERRUPT_DEC_SLICE_INT BIT(17)
+#define G1_REG_INTERRUPT_DEC_ERROR_INT BIT(16)
+#define G1_REG_INTERRUPT_DEC_ASO_INT BIT(15)
+#define G1_REG_INTERRUPT_DEC_BUFFER_INT BIT(14)
+#define G1_REG_INTERRUPT_DEC_BUS_INT BIT(13)
+#define G1_REG_INTERRUPT_DEC_RDY_INT BIT(12)
+#define G1_REG_INTERRUPT_DEC_IRQ BIT(8)
+#define G1_REG_INTERRUPT_DEC_IRQ_DIS BIT(4)
+#define G1_REG_INTERRUPT_DEC_E BIT(0)
+#define G1_REG_CONFIG 0x008
+#define G1_REG_CONFIG_DEC_AXI_RD_ID(x) (((x) & 0xff) << 24)
+#define G1_REG_CONFIG_DEC_TIMEOUT_E BIT(23)
+#define G1_REG_CONFIG_DEC_STRSWAP32_E BIT(22)
+#define G1_REG_CONFIG_DEC_STRENDIAN_E BIT(21)
+#define G1_REG_CONFIG_DEC_INSWAP32_E BIT(20)
+#define G1_REG_CONFIG_DEC_OUTSWAP32_E BIT(19)
+#define G1_REG_CONFIG_DEC_DATA_DISC_E BIT(18)
+#define G1_REG_CONFIG_TILED_MODE_MSB BIT(17)
+#define G1_REG_CONFIG_DEC_OUT_TILED_E BIT(17)
+#define G1_REG_CONFIG_DEC_LATENCY(x) (((x) & 0x3f) << 11)
+#define G1_REG_CONFIG_DEC_CLK_GATE_E BIT(10)
+#define G1_REG_CONFIG_DEC_IN_ENDIAN BIT(9)
+#define G1_REG_CONFIG_DEC_OUT_ENDIAN BIT(8)
+#define G1_REG_CONFIG_PRIORITY_MODE(x) (((x) & 0x7) << 5)
+#define G1_REG_CONFIG_TILED_MODE_LSB BIT(7)
+#define G1_REG_CONFIG_DEC_ADV_PRE_DIS BIT(6)
+#define G1_REG_CONFIG_DEC_SCMD_DIS BIT(5)
+#define G1_REG_CONFIG_DEC_MAX_BURST(x) (((x) & 0x1f) << 0)
+#define G1_REG_DEC_CTRL0 0x00c
+#define G1_REG_DEC_CTRL0_DEC_MODE(x) (((x) & 0xf) << 28)
+#define G1_REG_DEC_CTRL0_RLC_MODE_E BIT(27)
+#define G1_REG_DEC_CTRL0_SKIP_MODE BIT(26)
+#define G1_REG_DEC_CTRL0_DIVX3_E BIT(25)
+#define G1_REG_DEC_CTRL0_PJPEG_E BIT(24)
+#define G1_REG_DEC_CTRL0_PIC_INTERLACE_E BIT(23)
+#define G1_REG_DEC_CTRL0_PIC_FIELDMODE_E BIT(22)
+#define G1_REG_DEC_CTRL0_PIC_B_E BIT(21)
+#define G1_REG_DEC_CTRL0_PIC_INTER_E BIT(20)
+#define G1_REG_DEC_CTRL0_PIC_TOPFIELD_E BIT(19)
+#define G1_REG_DEC_CTRL0_FWD_INTERLACE_E BIT(18)
+#define G1_REG_DEC_CTRL0_SORENSON_E BIT(17)
+#define G1_REG_DEC_CTRL0_REF_TOPFIELD_E BIT(16)
+#define G1_REG_DEC_CTRL0_DEC_OUT_DIS BIT(15)
+#define G1_REG_DEC_CTRL0_FILTERING_DIS BIT(14)
+#define G1_REG_DEC_CTRL0_WEBP_E BIT(13)
+#define G1_REG_DEC_CTRL0_MVC_E BIT(13)
+#define G1_REG_DEC_CTRL0_PIC_FIXED_QUANT BIT(13)
+#define G1_REG_DEC_CTRL0_WRITE_MVS_E BIT(12)
+#define G1_REG_DEC_CTRL0_REFTOPFIRST_E BIT(11)
+#define G1_REG_DEC_CTRL0_SEQ_MBAFF_E BIT(10)
+#define G1_REG_DEC_CTRL0_PICORD_COUNT_E BIT(9)
+#define G1_REG_DEC_CTRL0_DEC_AHB_HLOCK_E BIT(8)
+#define G1_REG_DEC_CTRL0_DEC_AXI_WR_ID(x) (((x) & 0xff) << 0)
+/* Setting AXI ID to 0xff to get auto generated ID to avoid possible conflicts */
+#define G1_REG_DEC_CTRL0_DEC_AXI_AUTO G1_REG_DEC_CTRL0_DEC_AXI_WR_ID(0xff)
+#define G1_REG_DEC_CTRL1 0x010
+#define G1_REG_DEC_CTRL1_PIC_MB_WIDTH(x) (((x) & 0x1ff) << 23)
+#define G1_REG_DEC_CTRL1_MB_WIDTH_OFF(x) (((x) & 0xf) << 19)
+#define G1_REG_DEC_CTRL1_PIC_MB_HEIGHT_P(x) (((x) & 0xff) << 11)
+#define G1_REG_DEC_CTRL1_MB_HEIGHT_OFF(x) (((x) & 0xf) << 7)
+#define G1_REG_DEC_CTRL1_ALT_SCAN_E BIT(6)
+#define G1_REG_DEC_CTRL1_TOPFIELDFIRST_E BIT(5)
+#define G1_REG_DEC_CTRL1_REF_FRAMES(x) (((x) & 0x1f) << 0)
+#define G1_REG_DEC_CTRL1_PIC_MB_W_EXT(x) (((x) & 0x7) << 3)
+#define G1_REG_DEC_CTRL1_PIC_MB_H_EXT(x) (((x) & 0x7) << 0)
+#define G1_REG_DEC_CTRL1_PIC_REFER_FLAG BIT(0)
+#define G1_REG_DEC_CTRL2 0x014
+#define G1_REG_DEC_CTRL2_STRM_START_BIT(x) (((x) & 0x3f) << 26)
+#define G1_REG_DEC_CTRL2_SYNC_MARKER_E BIT(25)
+#define G1_REG_DEC_CTRL2_TYPE1_QUANT_E BIT(24)
+#define G1_REG_DEC_CTRL2_CH_QP_OFFSET(x) (((x) & 0x1f) << 19)
+#define G1_REG_DEC_CTRL2_CH_QP_OFFSET2(x) (((x) & 0x1f) << 14)
+#define G1_REG_DEC_CTRL2_FIELDPIC_FLAG_E BIT(0)
+#define G1_REG_DEC_CTRL2_INTRADC_VLC_THR(x) (((x) & 0x7) << 16)
+#define G1_REG_DEC_CTRL2_VOP_TIME_INCR(x) (((x) & 0xffff) << 0)
+#define G1_REG_DEC_CTRL2_DQ_PROFILE BIT(24)
+#define G1_REG_DEC_CTRL2_DQBI_LEVEL BIT(23)
+#define G1_REG_DEC_CTRL2_RANGE_RED_FRM_E BIT(22)
+#define G1_REG_DEC_CTRL2_FAST_UVMC_E BIT(20)
+#define G1_REG_DEC_CTRL2_TRANSDCTAB BIT(17)
+#define G1_REG_DEC_CTRL2_TRANSACFRM(x) (((x) & 0x3) << 15)
+#define G1_REG_DEC_CTRL2_TRANSACFRM2(x) (((x) & 0x3) << 13)
+#define G1_REG_DEC_CTRL2_MB_MODE_TAB(x) (((x) & 0x7) << 10)
+#define G1_REG_DEC_CTRL2_MVTAB(x) (((x) & 0x7) << 7)
+#define G1_REG_DEC_CTRL2_CBPTAB(x) (((x) & 0x7) << 4)
+#define G1_REG_DEC_CTRL2_2MV_BLK_PAT_TAB(x) (((x) & 0x3) << 2)
+#define G1_REG_DEC_CTRL2_4MV_BLK_PAT_TAB(x) (((x) & 0x3) << 0)
+#define G1_REG_DEC_CTRL2_QSCALE_TYPE BIT(24)
+#define G1_REG_DEC_CTRL2_CON_MV_E BIT(4)
+#define G1_REG_DEC_CTRL2_INTRA_DC_PREC(x) (((x) & 0x3) << 2)
+#define G1_REG_DEC_CTRL2_INTRA_VLC_TAB BIT(1)
+#define G1_REG_DEC_CTRL2_FRAME_PRED_DCT BIT(0)
+#define G1_REG_DEC_CTRL2_JPEG_QTABLES(x) (((x) & 0x3) << 11)
+#define G1_REG_DEC_CTRL2_JPEG_MODE(x) (((x) & 0x7) << 8)
+#define G1_REG_DEC_CTRL2_JPEG_FILRIGHT_E BIT(7)
+#define G1_REG_DEC_CTRL2_JPEG_STREAM_ALL BIT(6)
+#define G1_REG_DEC_CTRL2_CR_AC_VLCTABLE BIT(5)
+#define G1_REG_DEC_CTRL2_CB_AC_VLCTABLE BIT(4)
+#define G1_REG_DEC_CTRL2_CR_DC_VLCTABLE BIT(3)
+#define G1_REG_DEC_CTRL2_CB_DC_VLCTABLE BIT(2)
+#define G1_REG_DEC_CTRL2_CR_DC_VLCTABLE3 BIT(1)
+#define G1_REG_DEC_CTRL2_CB_DC_VLCTABLE3 BIT(0)
+#define G1_REG_DEC_CTRL2_STRM1_START_BIT(x) (((x) & 0x3f) << 18)
+#define G1_REG_DEC_CTRL2_HUFFMAN_E BIT(17)
+#define G1_REG_DEC_CTRL2_MULTISTREAM_E BIT(16)
+#define G1_REG_DEC_CTRL2_BOOLEAN_VALUE(x) (((x) & 0xff) << 8)
+#define G1_REG_DEC_CTRL2_BOOLEAN_RANGE(x) (((x) & 0xff) << 0)
+#define G1_REG_DEC_CTRL2_ALPHA_OFFSET(x) (((x) & 0x1f) << 5)
+#define G1_REG_DEC_CTRL2_BETA_OFFSET(x) (((x) & 0x1f) << 0)
+#define G1_REG_DEC_CTRL3 0x018
+#define G1_REG_DEC_CTRL3_START_CODE_E BIT(31)
+#define G1_REG_DEC_CTRL3_INIT_QP(x) (((x) & 0x3f) << 25)
+#define G1_REG_DEC_CTRL3_CH_8PIX_ILEAV_E BIT(24)
+#define G1_REG_DEC_CTRL3_STREAM_LEN_EXT(x) (((x) & 0xff) << 24)
+#define G1_REG_DEC_CTRL3_STREAM_LEN(x) (((x) & 0xffffff) << 0)
+#define G1_REG_DEC_CTRL4 0x01c
+#define G1_REG_DEC_CTRL4_CABAC_E BIT(31)
+#define G1_REG_DEC_CTRL4_BLACKWHITE_E BIT(30)
+#define G1_REG_DEC_CTRL4_DIR_8X8_INFER_E BIT(29)
+#define G1_REG_DEC_CTRL4_WEIGHT_PRED_E BIT(28)
+#define G1_REG_DEC_CTRL4_WEIGHT_BIPR_IDC(x) (((x) & 0x3) << 26)
+#define G1_REG_DEC_CTRL4_AVS_H264_H_EXT BIT(25)
+#define G1_REG_DEC_CTRL4_FRAMENUM_LEN(x) (((x) & 0x1f) << 16)
+#define G1_REG_DEC_CTRL4_FRAMENUM(x) (((x) & 0xffff) << 0)
+#define G1_REG_DEC_CTRL4_BITPLANE0_E BIT(31)
+#define G1_REG_DEC_CTRL4_BITPLANE1_E BIT(30)
+#define G1_REG_DEC_CTRL4_BITPLANE2_E BIT(29)
+#define G1_REG_DEC_CTRL4_ALT_PQUANT(x) (((x) & 0x1f) << 24)
+#define G1_REG_DEC_CTRL4_DQ_EDGES(x) (((x) & 0xf) << 20)
+#define G1_REG_DEC_CTRL4_TTMBF BIT(19)
+#define G1_REG_DEC_CTRL4_PQINDEX(x) (((x) & 0x1f) << 14)
+#define G1_REG_DEC_CTRL4_VC1_HEIGHT_EXT BIT(13)
+#define G1_REG_DEC_CTRL4_BILIN_MC_E BIT(12)
+#define G1_REG_DEC_CTRL4_UNIQP_E BIT(11)
+#define G1_REG_DEC_CTRL4_HALFQP_E BIT(10)
+#define G1_REG_DEC_CTRL4_TTFRM(x) (((x) & 0x3) << 8)
+#define G1_REG_DEC_CTRL4_2ND_BYTE_EMUL_E BIT(7)
+#define G1_REG_DEC_CTRL4_DQUANT_E BIT(6)
+#define G1_REG_DEC_CTRL4_VC1_ADV_E BIT(5)
+#define G1_REG_DEC_CTRL4_PJPEG_FILDOWN_E BIT(26)
+#define G1_REG_DEC_CTRL4_PJPEG_WDIV8 BIT(25)
+#define G1_REG_DEC_CTRL4_PJPEG_HDIV8 BIT(24)
+#define G1_REG_DEC_CTRL4_PJPEG_AH(x) (((x) & 0xf) << 20)
+#define G1_REG_DEC_CTRL4_PJPEG_AL(x) (((x) & 0xf) << 16)
+#define G1_REG_DEC_CTRL4_PJPEG_SS(x) (((x) & 0xff) << 8)
+#define G1_REG_DEC_CTRL4_PJPEG_SE(x) (((x) & 0xff) << 0)
+#define G1_REG_DEC_CTRL4_DCT1_START_BIT(x) (((x) & 0x3f) << 26)
+#define G1_REG_DEC_CTRL4_DCT2_START_BIT(x) (((x) & 0x3f) << 20)
+#define G1_REG_DEC_CTRL4_CH_MV_RES BIT(13)
+#define G1_REG_DEC_CTRL4_INIT_DC_MATCH0(x) (((x) & 0x7) << 9)
+#define G1_REG_DEC_CTRL4_INIT_DC_MATCH1(x) (((x) & 0x7) << 6)
+#define G1_REG_DEC_CTRL4_VP7_VERSION BIT(5)
+#define G1_REG_DEC_CTRL5 0x020
+#define G1_REG_DEC_CTRL5_CONST_INTRA_E BIT(31)
+#define G1_REG_DEC_CTRL5_FILT_CTRL_PRES BIT(30)
+#define G1_REG_DEC_CTRL5_RDPIC_CNT_PRES BIT(29)
+#define G1_REG_DEC_CTRL5_8X8TRANS_FLAG_E BIT(28)
+#define G1_REG_DEC_CTRL5_REFPIC_MK_LEN(x) (((x) & 0x7ff) << 17)
+#define G1_REG_DEC_CTRL5_IDR_PIC_E BIT(16)
+#define G1_REG_DEC_CTRL5_IDR_PIC_ID(x) (((x) & 0xffff) << 0)
+#define G1_REG_DEC_CTRL5_MV_SCALEFACTOR(x) (((x) & 0xff) << 24)
+#define G1_REG_DEC_CTRL5_REF_DIST_FWD(x) (((x) & 0x1f) << 19)
+#define G1_REG_DEC_CTRL5_REF_DIST_BWD(x) (((x) & 0x1f) << 14)
+#define G1_REG_DEC_CTRL5_LOOP_FILT_LIMIT(x) (((x) & 0xf) << 14)
+#define G1_REG_DEC_CTRL5_VARIANCE_TEST_E BIT(13)
+#define G1_REG_DEC_CTRL5_MV_THRESHOLD(x) (((x) & 0x7) << 10)
+#define G1_REG_DEC_CTRL5_VAR_THRESHOLD(x) (((x) & 0x3ff) << 0)
+#define G1_REG_DEC_CTRL5_DIVX_IDCT_E BIT(8)
+#define G1_REG_DEC_CTRL5_DIVX3_SLICE_SIZE(x) (((x) & 0xff) << 0)
+#define G1_REG_DEC_CTRL5_PJPEG_REST_FREQ(x) (((x) & 0xffff) << 0)
+#define G1_REG_DEC_CTRL5_RV_PROFILE(x) (((x) & 0x3) << 30)
+#define G1_REG_DEC_CTRL5_RV_OSV_QUANT(x) (((x) & 0x3) << 28)
+#define G1_REG_DEC_CTRL5_RV_FWD_SCALE(x) (((x) & 0x3fff) << 14)
+#define G1_REG_DEC_CTRL5_RV_BWD_SCALE(x) (((x) & 0x3fff) << 0)
+#define G1_REG_DEC_CTRL5_INIT_DC_COMP0(x) (((x) & 0xffff) << 16)
+#define G1_REG_DEC_CTRL5_INIT_DC_COMP1(x) (((x) & 0xffff) << 0)
+#define G1_REG_DEC_CTRL6 0x024
+#define G1_REG_DEC_CTRL6_PPS_ID(x) (((x) & 0xff) << 24)
+#define G1_REG_DEC_CTRL6_REFIDX1_ACTIVE(x) (((x) & 0x1f) << 19)
+#define G1_REG_DEC_CTRL6_REFIDX0_ACTIVE(x) (((x) & 0x1f) << 14)
+#define G1_REG_DEC_CTRL6_POC_LENGTH(x) (((x) & 0xff) << 0)
+#define G1_REG_DEC_CTRL6_ICOMP0_E BIT(24)
+#define G1_REG_DEC_CTRL6_ISCALE0(x) (((x) & 0xff) << 16)
+#define G1_REG_DEC_CTRL6_ISHIFT0(x) (((x) & 0xffff) << 0)
+#define G1_REG_DEC_CTRL6_STREAM1_LEN(x) (((x) & 0xffffff) << 0)
+#define G1_REG_DEC_CTRL6_PIC_SLICE_AM(x) (((x) & 0x1fff) << 0)
+#define G1_REG_DEC_CTRL6_COEFFS_PART_AM(x) (((x) & 0xf) << 24)
+#define G1_REG_FWD_PIC(i) (0x028 + ((i) * 0x4))
+#define G1_REG_FWD_PIC_PINIT_RLIST_F5(x) (((x) & 0x1f) << 25)
+#define G1_REG_FWD_PIC_PINIT_RLIST_F4(x) (((x) & 0x1f) << 20)
+#define G1_REG_FWD_PIC_PINIT_RLIST_F3(x) (((x) & 0x1f) << 15)
+#define G1_REG_FWD_PIC_PINIT_RLIST_F2(x) (((x) & 0x1f) << 10)
+#define G1_REG_FWD_PIC_PINIT_RLIST_F1(x) (((x) & 0x1f) << 5)
+#define G1_REG_FWD_PIC_PINIT_RLIST_F0(x) (((x) & 0x1f) << 0)
+#define G1_REG_FWD_PIC1_ICOMP1_E BIT(24)
+#define G1_REG_FWD_PIC1_ISCALE1(x) (((x) & 0xff) << 16)
+#define G1_REG_FWD_PIC1_ISHIFT1(x) (((x) & 0xffff) << 0)
+#define G1_REG_FWD_PIC1_SEGMENT_BASE(x) ((x) << 0)
+#define G1_REG_FWD_PIC1_SEGMENT_UPD_E BIT(1)
+#define G1_REG_FWD_PIC1_SEGMENT_E BIT(0)
+#define G1_REG_DEC_CTRL7 0x02c
+#define G1_REG_DEC_CTRL7_PINIT_RLIST_F15(x) (((x) & 0x1f) << 25)
+#define G1_REG_DEC_CTRL7_PINIT_RLIST_F14(x) (((x) & 0x1f) << 20)
+#define G1_REG_DEC_CTRL7_PINIT_RLIST_F13(x) (((x) & 0x1f) << 15)
+#define G1_REG_DEC_CTRL7_PINIT_RLIST_F12(x) (((x) & 0x1f) << 10)
+#define G1_REG_DEC_CTRL7_PINIT_RLIST_F11(x) (((x) & 0x1f) << 5)
+#define G1_REG_DEC_CTRL7_PINIT_RLIST_F10(x) (((x) & 0x1f) << 0)
+#define G1_REG_DEC_CTRL7_ICOMP2_E BIT(24)
+#define G1_REG_DEC_CTRL7_ISCALE2(x) (((x) & 0xff) << 16)
+#define G1_REG_DEC_CTRL7_ISHIFT2(x) (((x) & 0xffff) << 0)
+#define G1_REG_DEC_CTRL7_DCT3_START_BIT(x) (((x) & 0x3f) << 24)
+#define G1_REG_DEC_CTRL7_DCT4_START_BIT(x) (((x) & 0x3f) << 18)
+#define G1_REG_DEC_CTRL7_DCT5_START_BIT(x) (((x) & 0x3f) << 12)
+#define G1_REG_DEC_CTRL7_DCT6_START_BIT(x) (((x) & 0x3f) << 6)
+#define G1_REG_DEC_CTRL7_DCT7_START_BIT(x) (((x) & 0x3f) << 0)
+#define G1_REG_ADDR_STR 0x030
+#define G1_REG_ADDR_DST 0x034
+#define G1_REG_ADDR_REF(i) (0x038 + ((i) * 0x4))
+#define G1_REG_ADDR_REF_FIELD_E BIT(1)
+#define G1_REG_ADDR_REF_TOPC_E BIT(0)
+#define G1_REG_REF_PIC(i) (0x078 + ((i) * 0x4))
+#define G1_REG_REF_PIC_FILT_TYPE_E BIT(31)
+#define G1_REG_REF_PIC_FILT_SHARPNESS(x) (((x) & 0x7) << 28)
+#define G1_REG_REF_PIC_MB_ADJ_0(x) (((x) & 0x7f) << 21)
+#define G1_REG_REF_PIC_MB_ADJ_1(x) (((x) & 0x7f) << 14)
+#define G1_REG_REF_PIC_MB_ADJ_2(x) (((x) & 0x7f) << 7)
+#define G1_REG_REF_PIC_MB_ADJ_3(x) (((x) & 0x7f) << 0)
+#define G1_REG_REF_PIC_REFER1_NBR(x) (((x) & 0xffff) << 16)
+#define G1_REG_REF_PIC_REFER0_NBR(x) (((x) & 0xffff) << 0)
+#define G1_REG_REF_PIC_LF_LEVEL_0(x) (((x) & 0x3f) << 18)
+#define G1_REG_REF_PIC_LF_LEVEL_1(x) (((x) & 0x3f) << 12)
+#define G1_REG_REF_PIC_LF_LEVEL_2(x) (((x) & 0x3f) << 6)
+#define G1_REG_REF_PIC_LF_LEVEL_3(x) (((x) & 0x3f) << 0)
+#define G1_REG_REF_PIC_QUANT_DELTA_0(x) (((x) & 0x1f) << 27)
+#define G1_REG_REF_PIC_QUANT_DELTA_1(x) (((x) & 0x1f) << 22)
+#define G1_REG_REF_PIC_QUANT_0(x) (((x) & 0x7ff) << 11)
+#define G1_REG_REF_PIC_QUANT_1(x) (((x) & 0x7ff) << 0)
+#define G1_REG_LT_REF 0x098
+#define G1_REG_VALID_REF 0x09c
+#define G1_REG_ADDR_QTABLE 0x0a0
+#define G1_REG_ADDR_DIR_MV 0x0a4
+#define G1_REG_BD_REF_PIC(i) (0x0a8 + ((i) * 0x4))
+#define G1_REG_BD_REF_PIC_BINIT_RLIST_B2(x) (((x) & 0x1f) << 25)
+#define G1_REG_BD_REF_PIC_BINIT_RLIST_F2(x) (((x) & 0x1f) << 20)
+#define G1_REG_BD_REF_PIC_BINIT_RLIST_B1(x) (((x) & 0x1f) << 15)
+#define G1_REG_BD_REF_PIC_BINIT_RLIST_F1(x) (((x) & 0x1f) << 10)
+#define G1_REG_BD_REF_PIC_BINIT_RLIST_B0(x) (((x) & 0x1f) << 5)
+#define G1_REG_BD_REF_PIC_BINIT_RLIST_F0(x) (((x) & 0x1f) << 0)
+#define G1_REG_BD_REF_PIC_PRED_TAP_2_M1(x) (((x) & 0x3) << 10)
+#define G1_REG_BD_REF_PIC_PRED_TAP_2_4(x) (((x) & 0x3) << 8)
+#define G1_REG_BD_REF_PIC_PRED_TAP_4_M1(x) (((x) & 0x3) << 6)
+#define G1_REG_BD_REF_PIC_PRED_TAP_4_4(x) (((x) & 0x3) << 4)
+#define G1_REG_BD_REF_PIC_PRED_TAP_6_M1(x) (((x) & 0x3) << 2)
+#define G1_REG_BD_REF_PIC_PRED_TAP_6_4(x) (((x) & 0x3) << 0)
+#define G1_REG_BD_REF_PIC_QUANT_DELTA_2(x) (((x) & 0x1f) << 27)
+#define G1_REG_BD_REF_PIC_QUANT_DELTA_3(x) (((x) & 0x1f) << 22)
+#define G1_REG_BD_REF_PIC_QUANT_2(x) (((x) & 0x7ff) << 11)
+#define G1_REG_BD_REF_PIC_QUANT_3(x) (((x) & 0x7ff) << 0)
+#define G1_REG_BD_P_REF_PIC 0x0bc
+#define G1_REG_BD_P_REF_PIC_QUANT_DELTA_4(x) (((x) & 0x1f) << 27)
+#define G1_REG_BD_P_REF_PIC_PINIT_RLIST_F3(x) (((x) & 0x1f) << 25)
+#define G1_REG_BD_P_REF_PIC_PINIT_RLIST_F2(x) (((x) & 0x1f) << 20)
+#define G1_REG_BD_P_REF_PIC_PINIT_RLIST_F1(x) (((x) & 0x1f) << 15)
+#define G1_REG_BD_P_REF_PIC_PINIT_RLIST_F0(x) (((x) & 0x1f) << 10)
+#define G1_REG_BD_P_REF_PIC_BINIT_RLIST_B15(x) (((x) & 0x1f) << 5)
+#define G1_REG_BD_P_REF_PIC_BINIT_RLIST_F15(x) (((x) & 0x1f) << 0)
+#define G1_REG_ERR_CONC 0x0c0
+#define G1_REG_ERR_CONC_STARTMB_X(x) (((x) & 0x1ff) << 23)
+#define G1_REG_ERR_CONC_STARTMB_Y(x) (((x) & 0xff) << 15)
+#define G1_REG_PRED_FLT 0x0c4
+#define G1_REG_PRED_FLT_PRED_BC_TAP_0_0(x) (((x) & 0x3ff) << 22)
+#define G1_REG_PRED_FLT_PRED_BC_TAP_0_1(x) (((x) & 0x3ff) << 12)
+#define G1_REG_PRED_FLT_PRED_BC_TAP_0_2(x) (((x) & 0x3ff) << 2)
+#define G1_REG_REF_BUF_CTRL 0x0cc
+#define G1_REG_REF_BUF_CTRL_REFBU_E BIT(31)
+#define G1_REG_REF_BUF_CTRL_REFBU_THR(x) (((x) & 0xfff) << 19)
+#define G1_REG_REF_BUF_CTRL_REFBU_PICID(x) (((x) & 0x1f) << 14)
+#define G1_REG_REF_BUF_CTRL_REFBU_EVAL_E BIT(13)
+#define G1_REG_REF_BUF_CTRL_REFBU_FPARMOD_E BIT(12)
+#define G1_REG_REF_BUF_CTRL_REFBU_Y_OFFSET(x) (((x) & 0x1ff) << 0)
+#define G1_REG_REF_BUF_CTRL2 0x0dc
+#define G1_REG_REF_BUF_CTRL2_REFBU2_BUF_E BIT(31)
+#define G1_REG_REF_BUF_CTRL2_REFBU2_THR(x) (((x) & 0xfff) << 19)
+#define G1_REG_REF_BUF_CTRL2_REFBU2_PICID(x) (((x) & 0x1f) << 14)
+#define G1_REG_REF_BUF_CTRL2_APF_THRESHOLD(x) (((x) & 0x3fff) << 0)
+#define G1_REG_SOFT_RESET 0x194
+
+/* Post-processor registers. */
+#define G1_REG_PP_INTERRUPT G1_SWREG(60)
+#define G1_REG_PP_READY_IRQ BIT(12)
+#define G1_REG_PP_IRQ BIT(8)
+#define G1_REG_PP_IRQ_DIS BIT(4)
+#define G1_REG_PP_PIPELINE_EN BIT(1)
+#define G1_REG_PP_EXTERNAL_TRIGGER BIT(0)
+#define G1_REG_PP_DEV_CONFIG G1_SWREG(61)
+#define G1_REG_PP_AXI_RD_ID(v) (((v) << 24) & GENMASK(31, 24))
+#define G1_REG_PP_AXI_WR_ID(v) (((v) << 16) & GENMASK(23, 16))
+#define G1_REG_PP_INSWAP32_E(v) ((v) ? BIT(10) : 0)
+#define G1_REG_PP_DATA_DISC_E(v) ((v) ? BIT(9) : 0)
+#define G1_REG_PP_CLK_GATE_E(v) ((v) ? BIT(8) : 0)
+#define G1_REG_PP_IN_ENDIAN(v) ((v) ? BIT(7) : 0)
+#define G1_REG_PP_OUT_ENDIAN(v) ((v) ? BIT(6) : 0)
+#define G1_REG_PP_OUTSWAP32_E(v) ((v) ? BIT(5) : 0)
+#define G1_REG_PP_MAX_BURST(v) (((v) << 0) & GENMASK(4, 0))
+#define G1_REG_PP_IN_LUMA_BASE G1_SWREG(63)
+#define G1_REG_PP_IN_CB_BASE G1_SWREG(64)
+#define G1_REG_PP_IN_CR_BASE G1_SWREG(65)
+#define G1_REG_PP_OUT_LUMA_BASE G1_SWREG(66)
+#define G1_REG_PP_OUT_CHROMA_BASE G1_SWREG(67)
+#define G1_REG_PP_CONTRAST_ADJUST G1_SWREG(68)
+#define G1_REG_PP_COLOR_CONVERSION G1_SWREG(69)
+#define G1_REG_PP_COLOR_CONVERSION0 G1_SWREG(70)
+#define G1_REG_PP_COLOR_CONVERSION1 G1_SWREG(71)
+#define G1_REG_PP_INPUT_SIZE G1_SWREG(72)
+#define G1_REG_PP_INPUT_SIZE_HEIGHT(v) (((v) << 9) & GENMASK(16, 9))
+#define G1_REG_PP_INPUT_SIZE_WIDTH(v) (((v) << 0) & GENMASK(8, 0))
+#define G1_REG_PP_SCALING0 G1_SWREG(79)
+#define G1_REG_PP_PADD_R(v) (((v) << 23) & GENMASK(27, 23))
+#define G1_REG_PP_PADD_G(v) (((v) << 18) & GENMASK(22, 18))
+#define G1_REG_PP_RANGEMAP_Y(v) ((v) ? BIT(31) : 0)
+#define G1_REG_PP_RANGEMAP_C(v) ((v) ? BIT(30) : 0)
+#define G1_REG_PP_YCBCR_RANGE(v) ((v) ? BIT(29) : 0)
+#define G1_REG_PP_RGB_16(v) ((v) ? BIT(28) : 0)
+#define G1_REG_PP_SCALING1 G1_SWREG(80)
+#define G1_REG_PP_PADD_B(v) (((v) << 18) & GENMASK(22, 18))
+#define G1_REG_PP_MASK_R G1_SWREG(82)
+#define G1_REG_PP_MASK_G G1_SWREG(83)
+#define G1_REG_PP_MASK_B G1_SWREG(84)
+#define G1_REG_PP_CONTROL G1_SWREG(85)
+#define G1_REG_PP_CONTROL_IN_FMT(v) (((v) << 29) & GENMASK(31, 29))
+#define G1_REG_PP_CONTROL_OUT_FMT(v) (((v) << 26) & GENMASK(28, 26))
+#define G1_REG_PP_CONTROL_OUT_HEIGHT(v) (((v) << 15) & GENMASK(25, 15))
+#define G1_REG_PP_CONTROL_OUT_WIDTH(v) (((v) << 4) & GENMASK(14, 4))
+#define G1_REG_PP_MASK1_ORIG_WIDTH G1_SWREG(88)
+#define G1_REG_PP_ORIG_WIDTH(v) (((v) << 23) & GENMASK(31, 23))
+#define G1_REG_PP_DISPLAY_WIDTH G1_SWREG(92)
+#define G1_REG_PP_FUSE G1_SWREG(99)
+
+#endif /* HANTRO_G1_REGS_H_ */
diff --git a/drivers/media/platform/verisilicon/hantro_g1_vp8_dec.c b/drivers/media/platform/verisilicon/hantro_g1_vp8_dec.c
new file mode 100644
index 000000000..851eb67f1
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_g1_vp8_dec.c
@@ -0,0 +1,511 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hantro VP8 codec driver
+ *
+ * Copyright (C) 2019 Rockchip Electronics Co., Ltd.
+ * ZhiChao Yu <zhichao.yu@rock-chips.com>
+ *
+ * Copyright (C) 2019 Google, Inc.
+ * Tomasz Figa <tfiga@chromium.org>
+ */
+
+#include <media/v4l2-mem2mem.h>
+
+#include "hantro_hw.h"
+#include "hantro.h"
+#include "hantro_g1_regs.h"
+
+/* DCT partition base address regs */
+static const struct hantro_reg vp8_dec_dct_base[8] = {
+ { G1_REG_ADDR_STR, 0, 0xffffffff },
+ { G1_REG_ADDR_REF(8), 0, 0xffffffff },
+ { G1_REG_ADDR_REF(9), 0, 0xffffffff },
+ { G1_REG_ADDR_REF(10), 0, 0xffffffff },
+ { G1_REG_ADDR_REF(11), 0, 0xffffffff },
+ { G1_REG_ADDR_REF(12), 0, 0xffffffff },
+ { G1_REG_ADDR_REF(14), 0, 0xffffffff },
+ { G1_REG_ADDR_REF(15), 0, 0xffffffff },
+};
+
+/* Loop filter level regs */
+static const struct hantro_reg vp8_dec_lf_level[4] = {
+ { G1_REG_REF_PIC(2), 18, 0x3f },
+ { G1_REG_REF_PIC(2), 12, 0x3f },
+ { G1_REG_REF_PIC(2), 6, 0x3f },
+ { G1_REG_REF_PIC(2), 0, 0x3f },
+};
+
+/* Macroblock loop filter level adjustment regs */
+static const struct hantro_reg vp8_dec_mb_adj[4] = {
+ { G1_REG_REF_PIC(0), 21, 0x7f },
+ { G1_REG_REF_PIC(0), 14, 0x7f },
+ { G1_REG_REF_PIC(0), 7, 0x7f },
+ { G1_REG_REF_PIC(0), 0, 0x7f },
+};
+
+/* Reference frame adjustment regs */
+static const struct hantro_reg vp8_dec_ref_adj[4] = {
+ { G1_REG_REF_PIC(1), 21, 0x7f },
+ { G1_REG_REF_PIC(1), 14, 0x7f },
+ { G1_REG_REF_PIC(1), 7, 0x7f },
+ { G1_REG_REF_PIC(1), 0, 0x7f },
+};
+
+/* Quantizer */
+static const struct hantro_reg vp8_dec_quant[4] = {
+ { G1_REG_REF_PIC(3), 11, 0x7ff },
+ { G1_REG_REF_PIC(3), 0, 0x7ff },
+ { G1_REG_BD_REF_PIC(4), 11, 0x7ff },
+ { G1_REG_BD_REF_PIC(4), 0, 0x7ff },
+};
+
+/* Quantizer delta regs */
+static const struct hantro_reg vp8_dec_quant_delta[5] = {
+ { G1_REG_REF_PIC(3), 27, 0x1f },
+ { G1_REG_REF_PIC(3), 22, 0x1f },
+ { G1_REG_BD_REF_PIC(4), 27, 0x1f },
+ { G1_REG_BD_REF_PIC(4), 22, 0x1f },
+ { G1_REG_BD_P_REF_PIC, 27, 0x1f },
+};
+
+/* DCT partition start bits regs */
+static const struct hantro_reg vp8_dec_dct_start_bits[8] = {
+ { G1_REG_DEC_CTRL2, 26, 0x3f }, { G1_REG_DEC_CTRL4, 26, 0x3f },
+ { G1_REG_DEC_CTRL4, 20, 0x3f }, { G1_REG_DEC_CTRL7, 24, 0x3f },
+ { G1_REG_DEC_CTRL7, 18, 0x3f }, { G1_REG_DEC_CTRL7, 12, 0x3f },
+ { G1_REG_DEC_CTRL7, 6, 0x3f }, { G1_REG_DEC_CTRL7, 0, 0x3f },
+};
+
+/* Precision filter tap regs */
+static const struct hantro_reg vp8_dec_pred_bc_tap[8][4] = {
+ {
+ { G1_REG_PRED_FLT, 22, 0x3ff },
+ { G1_REG_PRED_FLT, 12, 0x3ff },
+ { G1_REG_PRED_FLT, 2, 0x3ff },
+ { G1_REG_REF_PIC(4), 22, 0x3ff },
+ },
+ {
+ { G1_REG_REF_PIC(4), 12, 0x3ff },
+ { G1_REG_REF_PIC(4), 2, 0x3ff },
+ { G1_REG_REF_PIC(5), 22, 0x3ff },
+ { G1_REG_REF_PIC(5), 12, 0x3ff },
+ },
+ {
+ { G1_REG_REF_PIC(5), 2, 0x3ff },
+ { G1_REG_REF_PIC(6), 22, 0x3ff },
+ { G1_REG_REF_PIC(6), 12, 0x3ff },
+ { G1_REG_REF_PIC(6), 2, 0x3ff },
+ },
+ {
+ { G1_REG_REF_PIC(7), 22, 0x3ff },
+ { G1_REG_REF_PIC(7), 12, 0x3ff },
+ { G1_REG_REF_PIC(7), 2, 0x3ff },
+ { G1_REG_LT_REF, 22, 0x3ff },
+ },
+ {
+ { G1_REG_LT_REF, 12, 0x3ff },
+ { G1_REG_LT_REF, 2, 0x3ff },
+ { G1_REG_VALID_REF, 22, 0x3ff },
+ { G1_REG_VALID_REF, 12, 0x3ff },
+ },
+ {
+ { G1_REG_VALID_REF, 2, 0x3ff },
+ { G1_REG_BD_REF_PIC(0), 22, 0x3ff },
+ { G1_REG_BD_REF_PIC(0), 12, 0x3ff },
+ { G1_REG_BD_REF_PIC(0), 2, 0x3ff },
+ },
+ {
+ { G1_REG_BD_REF_PIC(1), 22, 0x3ff },
+ { G1_REG_BD_REF_PIC(1), 12, 0x3ff },
+ { G1_REG_BD_REF_PIC(1), 2, 0x3ff },
+ { G1_REG_BD_REF_PIC(2), 22, 0x3ff },
+ },
+ {
+ { G1_REG_BD_REF_PIC(2), 12, 0x3ff },
+ { G1_REG_BD_REF_PIC(2), 2, 0x3ff },
+ { G1_REG_BD_REF_PIC(3), 22, 0x3ff },
+ { G1_REG_BD_REF_PIC(3), 12, 0x3ff },
+ },
+};
+
+/*
+ * Set loop filters
+ */
+static void cfg_lf(struct hantro_ctx *ctx,
+ const struct v4l2_ctrl_vp8_frame *hdr)
+{
+ const struct v4l2_vp8_segment *seg = &hdr->segment;
+ const struct v4l2_vp8_loop_filter *lf = &hdr->lf;
+ struct hantro_dev *vpu = ctx->dev;
+ unsigned int i;
+ u32 reg;
+
+ if (!(seg->flags & V4L2_VP8_SEGMENT_FLAG_ENABLED)) {
+ hantro_reg_write(vpu, &vp8_dec_lf_level[0], lf->level);
+ } else if (seg->flags & V4L2_VP8_SEGMENT_FLAG_DELTA_VALUE_MODE) {
+ for (i = 0; i < 4; i++) {
+ u32 lf_level = clamp(lf->level + seg->lf_update[i],
+ 0, 63);
+
+ hantro_reg_write(vpu, &vp8_dec_lf_level[i], lf_level);
+ }
+ } else {
+ for (i = 0; i < 4; i++)
+ hantro_reg_write(vpu, &vp8_dec_lf_level[i],
+ seg->lf_update[i]);
+ }
+
+ reg = G1_REG_REF_PIC_FILT_SHARPNESS(lf->sharpness_level);
+ if (lf->flags & V4L2_VP8_LF_FILTER_TYPE_SIMPLE)
+ reg |= G1_REG_REF_PIC_FILT_TYPE_E;
+ vdpu_write_relaxed(vpu, reg, G1_REG_REF_PIC(0));
+
+ if (lf->flags & V4L2_VP8_LF_ADJ_ENABLE) {
+ for (i = 0; i < 4; i++) {
+ hantro_reg_write(vpu, &vp8_dec_mb_adj[i],
+ lf->mb_mode_delta[i]);
+ hantro_reg_write(vpu, &vp8_dec_ref_adj[i],
+ lf->ref_frm_delta[i]);
+ }
+ }
+}
+
+/*
+ * Set quantization parameters
+ */
+static void cfg_qp(struct hantro_ctx *ctx,
+ const struct v4l2_ctrl_vp8_frame *hdr)
+{
+ const struct v4l2_vp8_quantization *q = &hdr->quant;
+ const struct v4l2_vp8_segment *seg = &hdr->segment;
+ struct hantro_dev *vpu = ctx->dev;
+ unsigned int i;
+
+ if (!(seg->flags & V4L2_VP8_SEGMENT_FLAG_ENABLED)) {
+ hantro_reg_write(vpu, &vp8_dec_quant[0], q->y_ac_qi);
+ } else if (seg->flags & V4L2_VP8_SEGMENT_FLAG_DELTA_VALUE_MODE) {
+ for (i = 0; i < 4; i++) {
+ u32 quant = clamp(q->y_ac_qi + seg->quant_update[i],
+ 0, 127);
+
+ hantro_reg_write(vpu, &vp8_dec_quant[i], quant);
+ }
+ } else {
+ for (i = 0; i < 4; i++)
+ hantro_reg_write(vpu, &vp8_dec_quant[i],
+ seg->quant_update[i]);
+ }
+
+ hantro_reg_write(vpu, &vp8_dec_quant_delta[0], q->y_dc_delta);
+ hantro_reg_write(vpu, &vp8_dec_quant_delta[1], q->y2_dc_delta);
+ hantro_reg_write(vpu, &vp8_dec_quant_delta[2], q->y2_ac_delta);
+ hantro_reg_write(vpu, &vp8_dec_quant_delta[3], q->uv_dc_delta);
+ hantro_reg_write(vpu, &vp8_dec_quant_delta[4], q->uv_ac_delta);
+}
+
+/*
+ * set control partition and DCT partition regs
+ *
+ * VP8 frame stream data layout:
+ *
+ * first_part_size parttion_sizes[0]
+ * ^ ^
+ * src_dma | |
+ * ^ +--------+------+ +-----+-----+
+ * | | control part | | |
+ * +--------+----------------+------------------+-----------+-----+-----------+
+ * | tag 3B | extra 7B | hdr | mb_data | DCT sz | DCT part0 | ... | DCT partn |
+ * +--------+-----------------------------------+-----------+-----+-----------+
+ * | | | |
+ * v +----+---+ v
+ * mb_start | src_dma_end
+ * v
+ * DCT size part
+ * (num_dct-1)*3B
+ * Note:
+ * 1. only key-frames have extra 7-bytes
+ * 2. all offsets are base on src_dma
+ * 3. number of DCT parts is 1, 2, 4 or 8
+ * 4. the addresses set to the VPU must be 64-bits aligned
+ */
+static void cfg_parts(struct hantro_ctx *ctx,
+ const struct v4l2_ctrl_vp8_frame *hdr)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ struct vb2_v4l2_buffer *vb2_src;
+ u32 first_part_offset = V4L2_VP8_FRAME_IS_KEY_FRAME(hdr) ? 10 : 3;
+ u32 mb_size, mb_offset_bytes, mb_offset_bits, mb_start_bits;
+ u32 dct_size_part_size, dct_part_offset;
+ struct hantro_reg reg;
+ dma_addr_t src_dma;
+ u32 dct_part_total_len = 0;
+ u32 count = 0;
+ unsigned int i;
+
+ vb2_src = hantro_get_src_buf(ctx);
+ src_dma = vb2_dma_contig_plane_dma_addr(&vb2_src->vb2_buf, 0);
+
+ /*
+ * Calculate control partition mb data info
+ * @first_part_header_bits: bits offset of mb data from first
+ * part start pos
+ * @mb_offset_bits: bits offset of mb data from src_dma
+ * base addr
+ * @mb_offset_byte: bytes offset of mb data from src_dma
+ * base addr
+ * @mb_start_bits: bits offset of mb data from mb data
+ * 64bits alignment addr
+ */
+ mb_offset_bits = first_part_offset * 8 +
+ hdr->first_part_header_bits + 8;
+ mb_offset_bytes = mb_offset_bits / 8;
+ mb_start_bits = mb_offset_bits -
+ (mb_offset_bytes & (~DEC_8190_ALIGN_MASK)) * 8;
+ mb_size = hdr->first_part_size -
+ (mb_offset_bytes - first_part_offset) +
+ (mb_offset_bytes & DEC_8190_ALIGN_MASK);
+
+ /* Macroblock data aligned base addr */
+ vdpu_write_relaxed(vpu, (mb_offset_bytes & (~DEC_8190_ALIGN_MASK))
+ + src_dma, G1_REG_ADDR_REF(13));
+
+ /* Macroblock data start bits */
+ reg.base = G1_REG_DEC_CTRL2;
+ reg.mask = 0x3f;
+ reg.shift = 18;
+ hantro_reg_write(vpu, &reg, mb_start_bits);
+
+ /* Macroblock aligned data length */
+ reg.base = G1_REG_DEC_CTRL6;
+ reg.mask = 0x3fffff;
+ reg.shift = 0;
+ hantro_reg_write(vpu, &reg, mb_size + 1);
+
+ /*
+ * Calculate DCT partition info
+ * @dct_size_part_size: Containing sizes of DCT part, every DCT part
+ * has 3 bytes to store its size, except the last
+ * DCT part
+ * @dct_part_offset: bytes offset of DCT parts from src_dma base addr
+ * @dct_part_total_len: total size of all DCT parts
+ */
+ dct_size_part_size = (hdr->num_dct_parts - 1) * 3;
+ dct_part_offset = first_part_offset + hdr->first_part_size;
+ for (i = 0; i < hdr->num_dct_parts; i++)
+ dct_part_total_len += hdr->dct_part_sizes[i];
+ dct_part_total_len += dct_size_part_size;
+ dct_part_total_len += (dct_part_offset & DEC_8190_ALIGN_MASK);
+
+ /* Number of DCT partitions */
+ reg.base = G1_REG_DEC_CTRL6;
+ reg.mask = 0xf;
+ reg.shift = 24;
+ hantro_reg_write(vpu, &reg, hdr->num_dct_parts - 1);
+
+ /* DCT partition length */
+ vdpu_write_relaxed(vpu,
+ G1_REG_DEC_CTRL3_STREAM_LEN(dct_part_total_len),
+ G1_REG_DEC_CTRL3);
+
+ /* DCT partitions base address */
+ for (i = 0; i < hdr->num_dct_parts; i++) {
+ u32 byte_offset = dct_part_offset + dct_size_part_size + count;
+ u32 base_addr = byte_offset + src_dma;
+
+ hantro_reg_write(vpu, &vp8_dec_dct_base[i],
+ base_addr & (~DEC_8190_ALIGN_MASK));
+
+ hantro_reg_write(vpu, &vp8_dec_dct_start_bits[i],
+ (byte_offset & DEC_8190_ALIGN_MASK) * 8);
+
+ count += hdr->dct_part_sizes[i];
+ }
+}
+
+/*
+ * prediction filter taps
+ * normal 6-tap filters
+ */
+static void cfg_tap(struct hantro_ctx *ctx,
+ const struct v4l2_ctrl_vp8_frame *hdr)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ struct hantro_reg reg;
+ u32 val = 0;
+ int i, j;
+
+ reg.base = G1_REG_BD_REF_PIC(3);
+ reg.mask = 0xf;
+
+ if ((hdr->version & 0x03) != 0)
+ return; /* Tap filter not used. */
+
+ for (i = 0; i < 8; i++) {
+ val = (hantro_vp8_dec_mc_filter[i][0] << 2) |
+ hantro_vp8_dec_mc_filter[i][5];
+
+ for (j = 0; j < 4; j++)
+ hantro_reg_write(vpu, &vp8_dec_pred_bc_tap[i][j],
+ hantro_vp8_dec_mc_filter[i][j + 1]);
+
+ switch (i) {
+ case 2:
+ reg.shift = 8;
+ break;
+ case 4:
+ reg.shift = 4;
+ break;
+ case 6:
+ reg.shift = 0;
+ break;
+ default:
+ continue;
+ }
+
+ hantro_reg_write(vpu, &reg, val);
+ }
+}
+
+static void cfg_ref(struct hantro_ctx *ctx,
+ const struct v4l2_ctrl_vp8_frame *hdr,
+ struct vb2_v4l2_buffer *vb2_dst)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ dma_addr_t ref;
+
+
+ ref = hantro_get_ref(ctx, hdr->last_frame_ts);
+ if (!ref) {
+ vpu_debug(0, "failed to find last frame ts=%llu\n",
+ hdr->last_frame_ts);
+ ref = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
+ }
+ vdpu_write_relaxed(vpu, ref, G1_REG_ADDR_REF(0));
+
+ ref = hantro_get_ref(ctx, hdr->golden_frame_ts);
+ if (!ref && hdr->golden_frame_ts)
+ vpu_debug(0, "failed to find golden frame ts=%llu\n",
+ hdr->golden_frame_ts);
+ if (!ref)
+ ref = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
+ if (hdr->flags & V4L2_VP8_FRAME_FLAG_SIGN_BIAS_GOLDEN)
+ ref |= G1_REG_ADDR_REF_TOPC_E;
+ vdpu_write_relaxed(vpu, ref, G1_REG_ADDR_REF(4));
+
+ ref = hantro_get_ref(ctx, hdr->alt_frame_ts);
+ if (!ref && hdr->alt_frame_ts)
+ vpu_debug(0, "failed to find alt frame ts=%llu\n",
+ hdr->alt_frame_ts);
+ if (!ref)
+ ref = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
+ if (hdr->flags & V4L2_VP8_FRAME_FLAG_SIGN_BIAS_ALT)
+ ref |= G1_REG_ADDR_REF_TOPC_E;
+ vdpu_write_relaxed(vpu, ref, G1_REG_ADDR_REF(5));
+}
+
+static void cfg_buffers(struct hantro_ctx *ctx,
+ const struct v4l2_ctrl_vp8_frame *hdr,
+ struct vb2_v4l2_buffer *vb2_dst)
+{
+ const struct v4l2_vp8_segment *seg = &hdr->segment;
+ struct hantro_dev *vpu = ctx->dev;
+ dma_addr_t dst_dma;
+ u32 reg;
+
+ /* Set probability table buffer address */
+ vdpu_write_relaxed(vpu, ctx->vp8_dec.prob_tbl.dma,
+ G1_REG_ADDR_QTABLE);
+
+ /* Set segment map address */
+ reg = G1_REG_FWD_PIC1_SEGMENT_BASE(ctx->vp8_dec.segment_map.dma);
+ if (seg->flags & V4L2_VP8_SEGMENT_FLAG_ENABLED) {
+ reg |= G1_REG_FWD_PIC1_SEGMENT_E;
+ if (seg->flags & V4L2_VP8_SEGMENT_FLAG_UPDATE_MAP)
+ reg |= G1_REG_FWD_PIC1_SEGMENT_UPD_E;
+ }
+ vdpu_write_relaxed(vpu, reg, G1_REG_FWD_PIC(0));
+
+ dst_dma = hantro_get_dec_buf_addr(ctx, &vb2_dst->vb2_buf);
+ vdpu_write_relaxed(vpu, dst_dma, G1_REG_ADDR_DST);
+}
+
+int hantro_g1_vp8_dec_run(struct hantro_ctx *ctx)
+{
+ const struct v4l2_ctrl_vp8_frame *hdr;
+ struct hantro_dev *vpu = ctx->dev;
+ struct vb2_v4l2_buffer *vb2_dst;
+ size_t height = ctx->dst_fmt.height;
+ size_t width = ctx->dst_fmt.width;
+ u32 mb_width, mb_height;
+ u32 reg;
+
+ hantro_start_prepare_run(ctx);
+
+ hdr = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_VP8_FRAME);
+ if (WARN_ON(!hdr))
+ return -EINVAL;
+
+ /* Reset segment_map buffer in keyframe */
+ if (V4L2_VP8_FRAME_IS_KEY_FRAME(hdr) && ctx->vp8_dec.segment_map.cpu)
+ memset(ctx->vp8_dec.segment_map.cpu, 0,
+ ctx->vp8_dec.segment_map.size);
+
+ hantro_vp8_prob_update(ctx, hdr);
+
+ reg = G1_REG_CONFIG_DEC_TIMEOUT_E |
+ G1_REG_CONFIG_DEC_STRENDIAN_E |
+ G1_REG_CONFIG_DEC_INSWAP32_E |
+ G1_REG_CONFIG_DEC_STRSWAP32_E |
+ G1_REG_CONFIG_DEC_OUTSWAP32_E |
+ G1_REG_CONFIG_DEC_CLK_GATE_E |
+ G1_REG_CONFIG_DEC_IN_ENDIAN |
+ G1_REG_CONFIG_DEC_OUT_ENDIAN |
+ G1_REG_CONFIG_DEC_MAX_BURST(16);
+ vdpu_write_relaxed(vpu, reg, G1_REG_CONFIG);
+
+ reg = G1_REG_DEC_CTRL0_DEC_MODE(10) |
+ G1_REG_DEC_CTRL0_DEC_AXI_AUTO;
+ if (!V4L2_VP8_FRAME_IS_KEY_FRAME(hdr))
+ reg |= G1_REG_DEC_CTRL0_PIC_INTER_E;
+ if (!(hdr->flags & V4L2_VP8_FRAME_FLAG_MB_NO_SKIP_COEFF))
+ reg |= G1_REG_DEC_CTRL0_SKIP_MODE;
+ if (hdr->lf.level == 0)
+ reg |= G1_REG_DEC_CTRL0_FILTERING_DIS;
+ vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL0);
+
+ /* Frame dimensions */
+ mb_width = MB_WIDTH(width);
+ mb_height = MB_HEIGHT(height);
+ reg = G1_REG_DEC_CTRL1_PIC_MB_WIDTH(mb_width) |
+ G1_REG_DEC_CTRL1_PIC_MB_HEIGHT_P(mb_height) |
+ G1_REG_DEC_CTRL1_PIC_MB_W_EXT(mb_width >> 9) |
+ G1_REG_DEC_CTRL1_PIC_MB_H_EXT(mb_height >> 8);
+ vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL1);
+
+ /* Boolean decoder */
+ reg = G1_REG_DEC_CTRL2_BOOLEAN_RANGE(hdr->coder_state.range)
+ | G1_REG_DEC_CTRL2_BOOLEAN_VALUE(hdr->coder_state.value);
+ vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL2);
+
+ reg = 0;
+ if (hdr->version != 3)
+ reg |= G1_REG_DEC_CTRL4_VC1_HEIGHT_EXT;
+ if (hdr->version & 0x3)
+ reg |= G1_REG_DEC_CTRL4_BILIN_MC_E;
+ vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL4);
+
+ cfg_lf(ctx, hdr);
+ cfg_qp(ctx, hdr);
+ cfg_parts(ctx, hdr);
+ cfg_tap(ctx, hdr);
+
+ vb2_dst = hantro_get_dst_buf(ctx);
+ cfg_ref(ctx, hdr, vb2_dst);
+ cfg_buffers(ctx, hdr, vb2_dst);
+
+ hantro_end_prepare_run(ctx);
+
+ vdpu_write(vpu, G1_REG_INTERRUPT_DEC_E, G1_REG_INTERRUPT);
+
+ return 0;
+}
diff --git a/drivers/media/platform/verisilicon/hantro_g2.c b/drivers/media/platform/verisilicon/hantro_g2.c
new file mode 100644
index 000000000..ee5f14c5f
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_g2.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hantro VPU codec driver
+ *
+ * Copyright (C) 2021 Collabora Ltd, Andrzej Pietrasiewicz <andrzej.p@collabora.com>
+ */
+
+#include "hantro_hw.h"
+#include "hantro_g2_regs.h"
+
+void hantro_g2_check_idle(struct hantro_dev *vpu)
+{
+ int i;
+
+ for (i = 0; i < 3; i++) {
+ u32 status;
+
+ /* Make sure the VPU is idle */
+ status = vdpu_read(vpu, G2_REG_INTERRUPT);
+ if (status & G2_REG_INTERRUPT_DEC_E) {
+ dev_warn(vpu->dev, "device still running, aborting");
+ status |= G2_REG_INTERRUPT_DEC_ABORT_E | G2_REG_INTERRUPT_DEC_IRQ_DIS;
+ vdpu_write(vpu, status, G2_REG_INTERRUPT);
+ }
+ }
+}
+
+irqreturn_t hantro_g2_irq(int irq, void *dev_id)
+{
+ struct hantro_dev *vpu = dev_id;
+ enum vb2_buffer_state state;
+ u32 status;
+
+ status = vdpu_read(vpu, G2_REG_INTERRUPT);
+ state = (status & G2_REG_INTERRUPT_DEC_RDY_INT) ?
+ VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR;
+
+ vdpu_write(vpu, 0, G2_REG_INTERRUPT);
+ vdpu_write(vpu, G2_REG_CONFIG_DEC_CLK_GATE_E, G2_REG_CONFIG);
+
+ hantro_irq_done(vpu, state);
+
+ return IRQ_HANDLED;
+}
diff --git a/drivers/media/platform/verisilicon/hantro_g2_hevc_dec.c b/drivers/media/platform/verisilicon/hantro_g2_hevc_dec.c
new file mode 100644
index 000000000..a9d4ac84a
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_g2_hevc_dec.c
@@ -0,0 +1,627 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hantro VPU HEVC codec driver
+ *
+ * Copyright (C) 2020 Safran Passenger Innovations LLC
+ */
+
+#include "hantro_hw.h"
+#include "hantro_g2_regs.h"
+
+#define G2_ALIGN 16
+
+static size_t hantro_hevc_chroma_offset(struct hantro_ctx *ctx)
+{
+ return ctx->dst_fmt.width * ctx->dst_fmt.height * ctx->bit_depth / 8;
+}
+
+static size_t hantro_hevc_motion_vectors_offset(struct hantro_ctx *ctx)
+{
+ size_t cr_offset = hantro_hevc_chroma_offset(ctx);
+
+ return ALIGN((cr_offset * 3) / 2, G2_ALIGN);
+}
+
+static void prepare_tile_info_buffer(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls;
+ const struct v4l2_ctrl_hevc_pps *pps = ctrls->pps;
+ const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps;
+ u16 *p = (u16 *)((u8 *)ctx->hevc_dec.tile_sizes.cpu);
+ unsigned int num_tile_rows = pps->num_tile_rows_minus1 + 1;
+ unsigned int num_tile_cols = pps->num_tile_columns_minus1 + 1;
+ unsigned int pic_width_in_ctbs, pic_height_in_ctbs;
+ unsigned int max_log2_ctb_size, ctb_size;
+ bool tiles_enabled, uniform_spacing;
+ u32 no_chroma = 0;
+
+ tiles_enabled = !!(pps->flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED);
+ uniform_spacing = !!(pps->flags & V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING);
+
+ hantro_reg_write(vpu, &g2_tile_e, tiles_enabled);
+
+ max_log2_ctb_size = sps->log2_min_luma_coding_block_size_minus3 + 3 +
+ sps->log2_diff_max_min_luma_coding_block_size;
+ pic_width_in_ctbs = (sps->pic_width_in_luma_samples +
+ (1 << max_log2_ctb_size) - 1) >> max_log2_ctb_size;
+ pic_height_in_ctbs = (sps->pic_height_in_luma_samples + (1 << max_log2_ctb_size) - 1)
+ >> max_log2_ctb_size;
+ ctb_size = 1 << max_log2_ctb_size;
+
+ vpu_debug(1, "Preparing tile sizes buffer for %dx%d CTBs (CTB size %d)\n",
+ pic_width_in_ctbs, pic_height_in_ctbs, ctb_size);
+
+ if (tiles_enabled) {
+ unsigned int i, j, h;
+
+ vpu_debug(1, "Tiles enabled! %dx%d\n", num_tile_cols, num_tile_rows);
+
+ hantro_reg_write(vpu, &g2_num_tile_rows, num_tile_rows);
+ hantro_reg_write(vpu, &g2_num_tile_cols, num_tile_cols);
+
+ /* write width + height for each tile in pic */
+ if (!uniform_spacing) {
+ u32 tmp_w = 0, tmp_h = 0;
+
+ for (i = 0; i < num_tile_rows; i++) {
+ if (i == num_tile_rows - 1)
+ h = pic_height_in_ctbs - tmp_h;
+ else
+ h = pps->row_height_minus1[i] + 1;
+ tmp_h += h;
+ if (i == 0 && h == 1 && ctb_size == 16)
+ no_chroma = 1;
+ for (j = 0, tmp_w = 0; j < num_tile_cols - 1; j++) {
+ tmp_w += pps->column_width_minus1[j] + 1;
+ *p++ = pps->column_width_minus1[j] + 1;
+ *p++ = h;
+ if (i == 0 && h == 1 && ctb_size == 16)
+ no_chroma = 1;
+ }
+ /* last column */
+ *p++ = pic_width_in_ctbs - tmp_w;
+ *p++ = h;
+ }
+ } else { /* uniform spacing */
+ u32 tmp, prev_h, prev_w;
+
+ for (i = 0, prev_h = 0; i < num_tile_rows; i++) {
+ tmp = (i + 1) * pic_height_in_ctbs / num_tile_rows;
+ h = tmp - prev_h;
+ prev_h = tmp;
+ if (i == 0 && h == 1 && ctb_size == 16)
+ no_chroma = 1;
+ for (j = 0, prev_w = 0; j < num_tile_cols; j++) {
+ tmp = (j + 1) * pic_width_in_ctbs / num_tile_cols;
+ *p++ = tmp - prev_w;
+ *p++ = h;
+ if (j == 0 &&
+ (pps->column_width_minus1[0] + 1) == 1 &&
+ ctb_size == 16)
+ no_chroma = 1;
+ prev_w = tmp;
+ }
+ }
+ }
+ } else {
+ hantro_reg_write(vpu, &g2_num_tile_rows, 1);
+ hantro_reg_write(vpu, &g2_num_tile_cols, 1);
+
+ /* There's one tile, with dimensions equal to pic size. */
+ p[0] = pic_width_in_ctbs;
+ p[1] = pic_height_in_ctbs;
+ }
+
+ if (no_chroma)
+ vpu_debug(1, "%s: no chroma!\n", __func__);
+}
+
+static int compute_header_skip_length(struct hantro_ctx *ctx)
+{
+ const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls;
+ const struct v4l2_ctrl_hevc_decode_params *decode_params = ctrls->decode_params;
+ const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps;
+ const struct v4l2_ctrl_hevc_pps *pps = ctrls->pps;
+ int skip = 0;
+
+ if (pps->flags & V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT)
+ /* size of pic_output_flag */
+ skip++;
+
+ if (sps->flags & V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE)
+ /* size of pic_order_cnt_lsb */
+ skip += 2;
+
+ if (!(decode_params->flags & V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC)) {
+ /* size of pic_order_cnt_lsb */
+ skip += sps->log2_max_pic_order_cnt_lsb_minus4 + 4;
+
+ /* size of short_term_ref_pic_set_sps_flag */
+ skip++;
+
+ if (decode_params->short_term_ref_pic_set_size)
+ /* size of st_ref_pic_set( num_short_term_ref_pic_sets ) */
+ skip += decode_params->short_term_ref_pic_set_size;
+ else if (sps->num_short_term_ref_pic_sets > 1)
+ skip += fls(sps->num_short_term_ref_pic_sets - 1);
+
+ skip += decode_params->long_term_ref_pic_set_size;
+ }
+
+ return skip;
+}
+
+static void set_params(struct hantro_ctx *ctx)
+{
+ const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls;
+ const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps;
+ const struct v4l2_ctrl_hevc_pps *pps = ctrls->pps;
+ const struct v4l2_ctrl_hevc_decode_params *decode_params = ctrls->decode_params;
+ struct hantro_dev *vpu = ctx->dev;
+ u32 min_log2_cb_size, max_log2_ctb_size, min_cb_size, max_ctb_size;
+ u32 pic_width_in_min_cbs, pic_height_in_min_cbs;
+ u32 pic_width_aligned, pic_height_aligned;
+ u32 partial_ctb_x, partial_ctb_y;
+
+ hantro_reg_write(vpu, &g2_bit_depth_y_minus8, sps->bit_depth_luma_minus8);
+ hantro_reg_write(vpu, &g2_bit_depth_c_minus8, sps->bit_depth_chroma_minus8);
+
+ hantro_reg_write(vpu, &g2_hdr_skip_length, compute_header_skip_length(ctx));
+
+ min_log2_cb_size = sps->log2_min_luma_coding_block_size_minus3 + 3;
+ max_log2_ctb_size = min_log2_cb_size + sps->log2_diff_max_min_luma_coding_block_size;
+
+ hantro_reg_write(vpu, &g2_min_cb_size, min_log2_cb_size);
+ hantro_reg_write(vpu, &g2_max_cb_size, max_log2_ctb_size);
+
+ min_cb_size = 1 << min_log2_cb_size;
+ max_ctb_size = 1 << max_log2_ctb_size;
+
+ pic_width_in_min_cbs = sps->pic_width_in_luma_samples / min_cb_size;
+ pic_height_in_min_cbs = sps->pic_height_in_luma_samples / min_cb_size;
+ pic_width_aligned = ALIGN(sps->pic_width_in_luma_samples, max_ctb_size);
+ pic_height_aligned = ALIGN(sps->pic_height_in_luma_samples, max_ctb_size);
+
+ partial_ctb_x = !!(sps->pic_width_in_luma_samples != pic_width_aligned);
+ partial_ctb_y = !!(sps->pic_height_in_luma_samples != pic_height_aligned);
+
+ hantro_reg_write(vpu, &g2_partial_ctb_x, partial_ctb_x);
+ hantro_reg_write(vpu, &g2_partial_ctb_y, partial_ctb_y);
+
+ hantro_reg_write(vpu, &g2_pic_width_in_cbs, pic_width_in_min_cbs);
+ hantro_reg_write(vpu, &g2_pic_height_in_cbs, pic_height_in_min_cbs);
+
+ hantro_reg_write(vpu, &g2_pic_width_4x4,
+ (pic_width_in_min_cbs * min_cb_size) / 4);
+ hantro_reg_write(vpu, &g2_pic_height_4x4,
+ (pic_height_in_min_cbs * min_cb_size) / 4);
+
+ hantro_reg_write(vpu, &hevc_max_inter_hierdepth,
+ sps->max_transform_hierarchy_depth_inter);
+ hantro_reg_write(vpu, &hevc_max_intra_hierdepth,
+ sps->max_transform_hierarchy_depth_intra);
+ hantro_reg_write(vpu, &hevc_min_trb_size,
+ sps->log2_min_luma_transform_block_size_minus2 + 2);
+ hantro_reg_write(vpu, &hevc_max_trb_size,
+ sps->log2_min_luma_transform_block_size_minus2 + 2 +
+ sps->log2_diff_max_min_luma_transform_block_size);
+
+ hantro_reg_write(vpu, &g2_tempor_mvp_e,
+ !!(sps->flags & V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED) &&
+ !(decode_params->flags & V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC));
+ hantro_reg_write(vpu, &g2_strong_smooth_e,
+ !!(sps->flags & V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED));
+ hantro_reg_write(vpu, &g2_asym_pred_e,
+ !!(sps->flags & V4L2_HEVC_SPS_FLAG_AMP_ENABLED));
+ hantro_reg_write(vpu, &g2_sao_e,
+ !!(sps->flags & V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET));
+ hantro_reg_write(vpu, &g2_sign_data_hide,
+ !!(pps->flags & V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED));
+
+ if (pps->flags & V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED) {
+ hantro_reg_write(vpu, &g2_cu_qpd_e, 1);
+ hantro_reg_write(vpu, &g2_max_cu_qpd_depth, pps->diff_cu_qp_delta_depth);
+ } else {
+ hantro_reg_write(vpu, &g2_cu_qpd_e, 0);
+ hantro_reg_write(vpu, &g2_max_cu_qpd_depth, 0);
+ }
+
+ hantro_reg_write(vpu, &g2_cb_qp_offset, pps->pps_cb_qp_offset);
+ hantro_reg_write(vpu, &g2_cr_qp_offset, pps->pps_cr_qp_offset);
+
+ hantro_reg_write(vpu, &g2_filt_offset_beta, pps->pps_beta_offset_div2);
+ hantro_reg_write(vpu, &g2_filt_offset_tc, pps->pps_tc_offset_div2);
+ hantro_reg_write(vpu, &g2_slice_hdr_ext_e,
+ !!(pps->flags & V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT));
+ hantro_reg_write(vpu, &g2_slice_hdr_ext_bits, pps->num_extra_slice_header_bits);
+ hantro_reg_write(vpu, &g2_slice_chqp_present,
+ !!(pps->flags & V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT));
+ hantro_reg_write(vpu, &g2_weight_bipr_idc,
+ !!(pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED));
+ hantro_reg_write(vpu, &g2_transq_bypass,
+ !!(pps->flags & V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED));
+ hantro_reg_write(vpu, &g2_list_mod_e,
+ !!(pps->flags & V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT));
+ hantro_reg_write(vpu, &g2_entropy_sync_e,
+ !!(pps->flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED));
+ hantro_reg_write(vpu, &g2_cabac_init_present,
+ !!(pps->flags & V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT));
+ hantro_reg_write(vpu, &g2_idr_pic_e,
+ !!(decode_params->flags & V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC));
+ hantro_reg_write(vpu, &hevc_parallel_merge,
+ pps->log2_parallel_merge_level_minus2 + 2);
+ hantro_reg_write(vpu, &g2_pcm_filt_d,
+ !!(sps->flags & V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED));
+ hantro_reg_write(vpu, &g2_pcm_e,
+ !!(sps->flags & V4L2_HEVC_SPS_FLAG_PCM_ENABLED));
+ if (sps->flags & V4L2_HEVC_SPS_FLAG_PCM_ENABLED) {
+ hantro_reg_write(vpu, &g2_max_pcm_size,
+ sps->log2_diff_max_min_pcm_luma_coding_block_size +
+ sps->log2_min_pcm_luma_coding_block_size_minus3 + 3);
+ hantro_reg_write(vpu, &g2_min_pcm_size,
+ sps->log2_min_pcm_luma_coding_block_size_minus3 + 3);
+ hantro_reg_write(vpu, &g2_bit_depth_pcm_y,
+ sps->pcm_sample_bit_depth_luma_minus1 + 1);
+ hantro_reg_write(vpu, &g2_bit_depth_pcm_c,
+ sps->pcm_sample_bit_depth_chroma_minus1 + 1);
+ } else {
+ hantro_reg_write(vpu, &g2_max_pcm_size, 0);
+ hantro_reg_write(vpu, &g2_min_pcm_size, 0);
+ hantro_reg_write(vpu, &g2_bit_depth_pcm_y, 0);
+ hantro_reg_write(vpu, &g2_bit_depth_pcm_c, 0);
+ }
+
+ hantro_reg_write(vpu, &g2_start_code_e, 1);
+ hantro_reg_write(vpu, &g2_init_qp, pps->init_qp_minus26 + 26);
+ hantro_reg_write(vpu, &g2_weight_pred_e,
+ !!(pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED));
+ hantro_reg_write(vpu, &g2_cabac_init_present,
+ !!(pps->flags & V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT));
+ hantro_reg_write(vpu, &g2_const_intra_e,
+ !!(pps->flags & V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED));
+ hantro_reg_write(vpu, &g2_transform_skip,
+ !!(pps->flags & V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED));
+ hantro_reg_write(vpu, &g2_out_filtering_dis,
+ !!(pps->flags & V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER));
+ hantro_reg_write(vpu, &g2_filt_ctrl_pres,
+ !!(pps->flags & V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT));
+ hantro_reg_write(vpu, &g2_dependent_slice,
+ !!(pps->flags & V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED));
+ hantro_reg_write(vpu, &g2_filter_override,
+ !!(pps->flags & V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED));
+ hantro_reg_write(vpu, &g2_refidx0_active,
+ pps->num_ref_idx_l0_default_active_minus1 + 1);
+ hantro_reg_write(vpu, &g2_refidx1_active,
+ pps->num_ref_idx_l1_default_active_minus1 + 1);
+ hantro_reg_write(vpu, &g2_apf_threshold, 8);
+}
+
+static void set_ref_pic_list(struct hantro_ctx *ctx)
+{
+ const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls;
+ struct hantro_dev *vpu = ctx->dev;
+ const struct v4l2_ctrl_hevc_decode_params *decode_params = ctrls->decode_params;
+ u32 list0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX] = {};
+ u32 list1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX] = {};
+ static const struct hantro_reg ref_pic_regs0[] = {
+ hevc_rlist_f0,
+ hevc_rlist_f1,
+ hevc_rlist_f2,
+ hevc_rlist_f3,
+ hevc_rlist_f4,
+ hevc_rlist_f5,
+ hevc_rlist_f6,
+ hevc_rlist_f7,
+ hevc_rlist_f8,
+ hevc_rlist_f9,
+ hevc_rlist_f10,
+ hevc_rlist_f11,
+ hevc_rlist_f12,
+ hevc_rlist_f13,
+ hevc_rlist_f14,
+ hevc_rlist_f15,
+ };
+ static const struct hantro_reg ref_pic_regs1[] = {
+ hevc_rlist_b0,
+ hevc_rlist_b1,
+ hevc_rlist_b2,
+ hevc_rlist_b3,
+ hevc_rlist_b4,
+ hevc_rlist_b5,
+ hevc_rlist_b6,
+ hevc_rlist_b7,
+ hevc_rlist_b8,
+ hevc_rlist_b9,
+ hevc_rlist_b10,
+ hevc_rlist_b11,
+ hevc_rlist_b12,
+ hevc_rlist_b13,
+ hevc_rlist_b14,
+ hevc_rlist_b15,
+ };
+ unsigned int i, j;
+
+ /* List 0 contains: short term before, short term after and long term */
+ j = 0;
+ for (i = 0; i < decode_params->num_poc_st_curr_before && j < ARRAY_SIZE(list0); i++)
+ list0[j++] = decode_params->poc_st_curr_before[i];
+ for (i = 0; i < decode_params->num_poc_st_curr_after && j < ARRAY_SIZE(list0); i++)
+ list0[j++] = decode_params->poc_st_curr_after[i];
+ for (i = 0; i < decode_params->num_poc_lt_curr && j < ARRAY_SIZE(list0); i++)
+ list0[j++] = decode_params->poc_lt_curr[i];
+
+ /* Fill the list, copying over and over */
+ i = 0;
+ while (j < ARRAY_SIZE(list0))
+ list0[j++] = list0[i++];
+
+ j = 0;
+ for (i = 0; i < decode_params->num_poc_st_curr_after && j < ARRAY_SIZE(list1); i++)
+ list1[j++] = decode_params->poc_st_curr_after[i];
+ for (i = 0; i < decode_params->num_poc_st_curr_before && j < ARRAY_SIZE(list1); i++)
+ list1[j++] = decode_params->poc_st_curr_before[i];
+ for (i = 0; i < decode_params->num_poc_lt_curr && j < ARRAY_SIZE(list1); i++)
+ list1[j++] = decode_params->poc_lt_curr[i];
+
+ i = 0;
+ while (j < ARRAY_SIZE(list1))
+ list1[j++] = list1[i++];
+
+ for (i = 0; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX; i++) {
+ hantro_reg_write(vpu, &ref_pic_regs0[i], list0[i]);
+ hantro_reg_write(vpu, &ref_pic_regs1[i], list1[i]);
+ }
+}
+
+static int set_ref(struct hantro_ctx *ctx)
+{
+ const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls;
+ const struct v4l2_ctrl_hevc_pps *pps = ctrls->pps;
+ const struct v4l2_ctrl_hevc_decode_params *decode_params = ctrls->decode_params;
+ const struct v4l2_hevc_dpb_entry *dpb = decode_params->dpb;
+ dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
+ struct hantro_dev *vpu = ctx->dev;
+ struct vb2_v4l2_buffer *vb2_dst;
+ struct hantro_decoded_buffer *dst;
+ size_t cr_offset = hantro_hevc_chroma_offset(ctx);
+ size_t mv_offset = hantro_hevc_motion_vectors_offset(ctx);
+ u32 max_ref_frames;
+ u16 dpb_longterm_e;
+ static const struct hantro_reg cur_poc[] = {
+ hevc_cur_poc_00,
+ hevc_cur_poc_01,
+ hevc_cur_poc_02,
+ hevc_cur_poc_03,
+ hevc_cur_poc_04,
+ hevc_cur_poc_05,
+ hevc_cur_poc_06,
+ hevc_cur_poc_07,
+ hevc_cur_poc_08,
+ hevc_cur_poc_09,
+ hevc_cur_poc_10,
+ hevc_cur_poc_11,
+ hevc_cur_poc_12,
+ hevc_cur_poc_13,
+ hevc_cur_poc_14,
+ hevc_cur_poc_15,
+ };
+ unsigned int i;
+
+ max_ref_frames = decode_params->num_poc_lt_curr +
+ decode_params->num_poc_st_curr_before +
+ decode_params->num_poc_st_curr_after;
+ /*
+ * Set max_ref_frames to non-zero to avoid HW hang when decoding
+ * badly marked I-frames.
+ */
+ max_ref_frames = max_ref_frames ? max_ref_frames : 1;
+ hantro_reg_write(vpu, &g2_num_ref_frames, max_ref_frames);
+ hantro_reg_write(vpu, &g2_filter_over_slices,
+ !!(pps->flags & V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED));
+ hantro_reg_write(vpu, &g2_filter_over_tiles,
+ !!(pps->flags & V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED));
+
+ /*
+ * Write POC count diff from current pic.
+ */
+ for (i = 0; i < decode_params->num_active_dpb_entries && i < ARRAY_SIZE(cur_poc); i++) {
+ char poc_diff = decode_params->pic_order_cnt_val - dpb[i].pic_order_cnt_val;
+
+ hantro_reg_write(vpu, &cur_poc[i], poc_diff);
+ }
+
+ if (i < ARRAY_SIZE(cur_poc)) {
+ /*
+ * After the references, fill one entry pointing to itself,
+ * i.e. difference is zero.
+ */
+ hantro_reg_write(vpu, &cur_poc[i], 0);
+ i++;
+ }
+
+ /* Fill the rest with the current picture */
+ for (; i < ARRAY_SIZE(cur_poc); i++)
+ hantro_reg_write(vpu, &cur_poc[i], decode_params->pic_order_cnt_val);
+
+ set_ref_pic_list(ctx);
+
+ /* We will only keep the reference pictures that are still used */
+ hantro_hevc_ref_init(ctx);
+
+ /* Set up addresses of DPB buffers */
+ dpb_longterm_e = 0;
+ for (i = 0; i < decode_params->num_active_dpb_entries &&
+ i < (V4L2_HEVC_DPB_ENTRIES_NUM_MAX - 1); i++) {
+ luma_addr = hantro_hevc_get_ref_buf(ctx, dpb[i].pic_order_cnt_val);
+ if (!luma_addr)
+ return -ENOMEM;
+
+ chroma_addr = luma_addr + cr_offset;
+ mv_addr = luma_addr + mv_offset;
+
+ if (dpb[i].flags & V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE)
+ dpb_longterm_e |= BIT(V4L2_HEVC_DPB_ENTRIES_NUM_MAX - 1 - i);
+
+ hantro_write_addr(vpu, G2_REF_LUMA_ADDR(i), luma_addr);
+ hantro_write_addr(vpu, G2_REF_CHROMA_ADDR(i), chroma_addr);
+ hantro_write_addr(vpu, G2_REF_MV_ADDR(i), mv_addr);
+ }
+
+ vb2_dst = hantro_get_dst_buf(ctx);
+ dst = vb2_to_hantro_decoded_buf(&vb2_dst->vb2_buf);
+ luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
+ if (!luma_addr)
+ return -ENOMEM;
+
+ if (hantro_hevc_add_ref_buf(ctx, decode_params->pic_order_cnt_val, luma_addr))
+ return -EINVAL;
+
+ chroma_addr = luma_addr + cr_offset;
+ mv_addr = luma_addr + mv_offset;
+
+ hantro_write_addr(vpu, G2_REF_LUMA_ADDR(i), luma_addr);
+ hantro_write_addr(vpu, G2_REF_CHROMA_ADDR(i), chroma_addr);
+ hantro_write_addr(vpu, G2_REF_MV_ADDR(i++), mv_addr);
+
+ hantro_write_addr(vpu, G2_OUT_LUMA_ADDR, luma_addr);
+ hantro_write_addr(vpu, G2_OUT_CHROMA_ADDR, chroma_addr);
+ hantro_write_addr(vpu, G2_OUT_MV_ADDR, mv_addr);
+
+ for (; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX; i++) {
+ hantro_write_addr(vpu, G2_REF_LUMA_ADDR(i), 0);
+ hantro_write_addr(vpu, G2_REF_CHROMA_ADDR(i), 0);
+ hantro_write_addr(vpu, G2_REF_MV_ADDR(i), 0);
+ }
+
+ hantro_reg_write(vpu, &g2_refer_lterm_e, dpb_longterm_e);
+
+ return 0;
+}
+
+static void set_buffers(struct hantro_ctx *ctx)
+{
+ struct vb2_v4l2_buffer *src_buf;
+ struct hantro_dev *vpu = ctx->dev;
+ dma_addr_t src_dma;
+ u32 src_len, src_buf_len;
+
+ src_buf = hantro_get_src_buf(ctx);
+
+ /* Source (stream) buffer. */
+ src_dma = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
+ src_len = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
+ src_buf_len = vb2_plane_size(&src_buf->vb2_buf, 0);
+
+ hantro_write_addr(vpu, G2_STREAM_ADDR, src_dma);
+ hantro_reg_write(vpu, &g2_stream_len, src_len);
+ hantro_reg_write(vpu, &g2_strm_buffer_len, src_buf_len);
+ hantro_reg_write(vpu, &g2_strm_start_offset, 0);
+ hantro_reg_write(vpu, &g2_write_mvs_e, 1);
+
+ hantro_write_addr(vpu, G2_TILE_SIZES_ADDR, ctx->hevc_dec.tile_sizes.dma);
+ hantro_write_addr(vpu, G2_TILE_FILTER_ADDR, ctx->hevc_dec.tile_filter.dma);
+ hantro_write_addr(vpu, G2_TILE_SAO_ADDR, ctx->hevc_dec.tile_sao.dma);
+ hantro_write_addr(vpu, G2_TILE_BSD_ADDR, ctx->hevc_dec.tile_bsd.dma);
+}
+
+static void prepare_scaling_list_buffer(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls;
+ const struct v4l2_ctrl_hevc_scaling_matrix *sc = ctrls->scaling;
+ const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps;
+ u8 *p = ((u8 *)ctx->hevc_dec.scaling_lists.cpu);
+ unsigned int scaling_list_enabled;
+ unsigned int i, j, k;
+
+ scaling_list_enabled = !!(sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED);
+ hantro_reg_write(vpu, &g2_scaling_list_e, scaling_list_enabled);
+
+ if (!scaling_list_enabled)
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(sc->scaling_list_dc_coef_16x16); i++)
+ *p++ = sc->scaling_list_dc_coef_16x16[i];
+
+ for (i = 0; i < ARRAY_SIZE(sc->scaling_list_dc_coef_32x32); i++)
+ *p++ = sc->scaling_list_dc_coef_32x32[i];
+
+ /* 128-bit boundary */
+ p += 8;
+
+ /* write scaling lists column by column */
+
+ for (i = 0; i < 6; i++)
+ for (j = 0; j < 4; j++)
+ for (k = 0; k < 4; k++)
+ *p++ = sc->scaling_list_4x4[i][4 * k + j];
+
+ for (i = 0; i < 6; i++)
+ for (j = 0; j < 8; j++)
+ for (k = 0; k < 8; k++)
+ *p++ = sc->scaling_list_8x8[i][8 * k + j];
+
+ for (i = 0; i < 6; i++)
+ for (j = 0; j < 8; j++)
+ for (k = 0; k < 8; k++)
+ *p++ = sc->scaling_list_16x16[i][8 * k + j];
+
+ for (i = 0; i < 2; i++)
+ for (j = 0; j < 8; j++)
+ for (k = 0; k < 8; k++)
+ *p++ = sc->scaling_list_32x32[i][8 * k + j];
+
+ hantro_write_addr(vpu, G2_HEVC_SCALING_LIST_ADDR, ctx->hevc_dec.scaling_lists.dma);
+}
+
+int hantro_g2_hevc_dec_run(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ int ret;
+
+ hantro_g2_check_idle(vpu);
+
+ /* Prepare HEVC decoder context. */
+ ret = hantro_hevc_dec_prepare_run(ctx);
+ if (ret)
+ return ret;
+
+ /* Configure hardware registers. */
+ set_params(ctx);
+
+ /* set reference pictures */
+ ret = set_ref(ctx);
+ if (ret)
+ return ret;
+
+ set_buffers(ctx);
+ prepare_tile_info_buffer(ctx);
+
+ prepare_scaling_list_buffer(ctx);
+
+ hantro_end_prepare_run(ctx);
+
+ hantro_reg_write(vpu, &g2_mode, HEVC_DEC_MODE);
+ hantro_reg_write(vpu, &g2_clk_gate_e, 1);
+
+ /* Don't disable output */
+ hantro_reg_write(vpu, &g2_out_dis, 0);
+
+ /* Don't compress buffers */
+ hantro_reg_write(vpu, &g2_ref_compress_bypass, 1);
+
+ /* Bus width and max burst */
+ hantro_reg_write(vpu, &g2_buswidth, BUS_WIDTH_128);
+ hantro_reg_write(vpu, &g2_max_burst, 16);
+
+ /* Swap */
+ hantro_reg_write(vpu, &g2_strm_swap, 0xf);
+ hantro_reg_write(vpu, &g2_dirmv_swap, 0xf);
+ hantro_reg_write(vpu, &g2_compress_swap, 0xf);
+
+ /* Start decoding! */
+ vdpu_write(vpu, G2_REG_INTERRUPT_DEC_E, G2_REG_INTERRUPT);
+
+ return 0;
+}
diff --git a/drivers/media/platform/verisilicon/hantro_g2_regs.h b/drivers/media/platform/verisilicon/hantro_g2_regs.h
new file mode 100644
index 000000000..826067835
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_g2_regs.h
@@ -0,0 +1,325 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, Collabora
+ *
+ * Author: Benjamin Gaignard <benjamin.gaignard@collabora.com>
+ */
+
+#ifndef HANTRO_G2_REGS_H_
+#define HANTRO_G2_REGS_H_
+
+#include "hantro.h"
+
+#define G2_SWREG(nr) ((nr) * 4)
+
+#define G2_DEC_REG(b, s, m) \
+ ((const struct hantro_reg) { \
+ .base = G2_SWREG(b), \
+ .shift = s, \
+ .mask = m, \
+ })
+
+#define G2_REG_VERSION G2_SWREG(0)
+
+#define G2_REG_INTERRUPT G2_SWREG(1)
+#define G2_REG_INTERRUPT_DEC_RDY_INT BIT(12)
+#define G2_REG_INTERRUPT_DEC_ABORT_E BIT(5)
+#define G2_REG_INTERRUPT_DEC_IRQ_DIS BIT(4)
+#define G2_REG_INTERRUPT_DEC_E BIT(0)
+
+#define HEVC_DEC_MODE 0xc
+#define VP9_DEC_MODE 0xd
+
+#define BUS_WIDTH_32 0
+#define BUS_WIDTH_64 1
+#define BUS_WIDTH_128 2
+#define BUS_WIDTH_256 3
+
+#define g2_strm_swap G2_DEC_REG(2, 28, 0xf)
+#define g2_strm_swap_old G2_DEC_REG(2, 27, 0x1f)
+#define g2_pic_swap G2_DEC_REG(2, 22, 0x1f)
+#define g2_dirmv_swap G2_DEC_REG(2, 20, 0xf)
+#define g2_dirmv_swap_old G2_DEC_REG(2, 17, 0x1f)
+#define g2_tab0_swap_old G2_DEC_REG(2, 12, 0x1f)
+#define g2_tab1_swap_old G2_DEC_REG(2, 7, 0x1f)
+#define g2_tab2_swap_old G2_DEC_REG(2, 2, 0x1f)
+
+#define g2_mode G2_DEC_REG(3, 27, 0x1f)
+#define g2_compress_swap G2_DEC_REG(3, 20, 0xf)
+#define g2_ref_compress_bypass G2_DEC_REG(3, 17, 0x1)
+#define g2_out_rs_e G2_DEC_REG(3, 16, 0x1)
+#define g2_out_dis G2_DEC_REG(3, 15, 0x1)
+#define g2_out_filtering_dis G2_DEC_REG(3, 14, 0x1)
+#define g2_write_mvs_e G2_DEC_REG(3, 12, 0x1)
+#define g2_tab3_swap_old G2_DEC_REG(3, 7, 0x1f)
+#define g2_rscan_swap G2_DEC_REG(3, 2, 0x1f)
+
+#define g2_pic_width_in_cbs G2_DEC_REG(4, 19, 0x1fff)
+#define g2_pic_height_in_cbs G2_DEC_REG(4, 6, 0x1fff)
+#define g2_num_ref_frames G2_DEC_REG(4, 0, 0x1f)
+
+#define g2_start_bit G2_DEC_REG(5, 25, 0x7f)
+#define g2_scaling_list_e G2_DEC_REG(5, 24, 0x1)
+#define g2_cb_qp_offset G2_DEC_REG(5, 19, 0x1f)
+#define g2_cr_qp_offset G2_DEC_REG(5, 14, 0x1f)
+#define g2_sign_data_hide G2_DEC_REG(5, 12, 0x1)
+#define g2_tempor_mvp_e G2_DEC_REG(5, 11, 0x1)
+#define g2_max_cu_qpd_depth G2_DEC_REG(5, 5, 0x3f)
+#define g2_cu_qpd_e G2_DEC_REG(5, 4, 0x1)
+#define g2_pix_shift G2_DEC_REG(5, 0, 0xf)
+
+#define g2_stream_len G2_DEC_REG(6, 0, 0xffffffff)
+
+#define g2_cabac_init_present G2_DEC_REG(7, 31, 0x1)
+#define g2_weight_pred_e G2_DEC_REG(7, 28, 0x1)
+#define g2_weight_bipr_idc G2_DEC_REG(7, 26, 0x3)
+#define g2_filter_over_slices G2_DEC_REG(7, 25, 0x1)
+#define g2_filter_over_tiles G2_DEC_REG(7, 24, 0x1)
+#define g2_asym_pred_e G2_DEC_REG(7, 23, 0x1)
+#define g2_sao_e G2_DEC_REG(7, 22, 0x1)
+#define g2_pcm_filt_d G2_DEC_REG(7, 21, 0x1)
+#define g2_slice_chqp_present G2_DEC_REG(7, 20, 0x1)
+#define g2_dependent_slice G2_DEC_REG(7, 19, 0x1)
+#define g2_filter_override G2_DEC_REG(7, 18, 0x1)
+#define g2_strong_smooth_e G2_DEC_REG(7, 17, 0x1)
+#define g2_filt_offset_beta G2_DEC_REG(7, 12, 0x1f)
+#define g2_filt_offset_tc G2_DEC_REG(7, 7, 0x1f)
+#define g2_slice_hdr_ext_e G2_DEC_REG(7, 6, 0x1)
+#define g2_slice_hdr_ext_bits G2_DEC_REG(7, 3, 0x7)
+
+#define g2_const_intra_e G2_DEC_REG(8, 31, 0x1)
+#define g2_filt_ctrl_pres G2_DEC_REG(8, 30, 0x1)
+#define g2_bit_depth_y G2_DEC_REG(8, 21, 0xf)
+#define g2_bit_depth_c G2_DEC_REG(8, 17, 0xf)
+#define g2_idr_pic_e G2_DEC_REG(8, 16, 0x1)
+#define g2_bit_depth_pcm_y G2_DEC_REG(8, 12, 0xf)
+#define g2_bit_depth_pcm_c G2_DEC_REG(8, 8, 0xf)
+#define g2_bit_depth_y_minus8 G2_DEC_REG(8, 6, 0x3)
+#define g2_bit_depth_c_minus8 G2_DEC_REG(8, 4, 0x3)
+#define g2_rs_out_bit_depth G2_DEC_REG(8, 4, 0xf)
+#define g2_output_8_bits G2_DEC_REG(8, 3, 0x1)
+#define g2_output_format G2_DEC_REG(8, 0, 0x7)
+#define g2_pp_pix_shift G2_DEC_REG(8, 0, 0xf)
+
+#define g2_refidx1_active G2_DEC_REG(9, 19, 0x1f)
+#define g2_refidx0_active G2_DEC_REG(9, 14, 0x1f)
+#define g2_hdr_skip_length G2_DEC_REG(9, 0, 0x3fff)
+
+#define g2_start_code_e G2_DEC_REG(10, 31, 0x1)
+#define g2_init_qp_old G2_DEC_REG(10, 25, 0x3f)
+#define g2_init_qp G2_DEC_REG(10, 24, 0x7f)
+#define g2_num_tile_cols_old G2_DEC_REG(10, 20, 0x1f)
+#define g2_num_tile_cols G2_DEC_REG(10, 19, 0x1f)
+#define g2_num_tile_rows_old G2_DEC_REG(10, 15, 0x1f)
+#define g2_num_tile_rows G2_DEC_REG(10, 14, 0x1f)
+#define g2_tile_e G2_DEC_REG(10, 1, 0x1)
+#define g2_entropy_sync_e G2_DEC_REG(10, 0, 0x1)
+
+#define vp9_transform_mode G2_DEC_REG(11, 27, 0x7)
+#define vp9_filt_sharpness G2_DEC_REG(11, 21, 0x7)
+#define vp9_mcomp_filt_type G2_DEC_REG(11, 8, 0x7)
+#define vp9_high_prec_mv_e G2_DEC_REG(11, 7, 0x1)
+#define vp9_comp_pred_mode G2_DEC_REG(11, 4, 0x3)
+#define vp9_gref_sign_bias G2_DEC_REG(11, 2, 0x1)
+#define vp9_aref_sign_bias G2_DEC_REG(11, 0, 0x1)
+
+#define g2_refer_lterm_e G2_DEC_REG(12, 16, 0xffff)
+#define g2_min_cb_size G2_DEC_REG(12, 13, 0x7)
+#define g2_max_cb_size G2_DEC_REG(12, 10, 0x7)
+#define g2_min_pcm_size G2_DEC_REG(12, 7, 0x7)
+#define g2_max_pcm_size G2_DEC_REG(12, 4, 0x7)
+#define g2_pcm_e G2_DEC_REG(12, 3, 0x1)
+#define g2_transform_skip G2_DEC_REG(12, 2, 0x1)
+#define g2_transq_bypass G2_DEC_REG(12, 1, 0x1)
+#define g2_list_mod_e G2_DEC_REG(12, 0, 0x1)
+
+#define hevc_min_trb_size G2_DEC_REG(13, 13, 0x7)
+#define hevc_max_trb_size G2_DEC_REG(13, 10, 0x7)
+#define hevc_max_intra_hierdepth G2_DEC_REG(13, 7, 0x7)
+#define hevc_max_inter_hierdepth G2_DEC_REG(13, 4, 0x7)
+#define hevc_parallel_merge G2_DEC_REG(13, 0, 0xf)
+
+#define hevc_rlist_f0 G2_DEC_REG(14, 0, 0x1f)
+#define hevc_rlist_f1 G2_DEC_REG(14, 10, 0x1f)
+#define hevc_rlist_f2 G2_DEC_REG(14, 20, 0x1f)
+#define hevc_rlist_b0 G2_DEC_REG(14, 5, 0x1f)
+#define hevc_rlist_b1 G2_DEC_REG(14, 15, 0x1f)
+#define hevc_rlist_b2 G2_DEC_REG(14, 25, 0x1f)
+
+#define hevc_rlist_f3 G2_DEC_REG(15, 0, 0x1f)
+#define hevc_rlist_f4 G2_DEC_REG(15, 10, 0x1f)
+#define hevc_rlist_f5 G2_DEC_REG(15, 20, 0x1f)
+#define hevc_rlist_b3 G2_DEC_REG(15, 5, 0x1f)
+#define hevc_rlist_b4 G2_DEC_REG(15, 15, 0x1f)
+#define hevc_rlist_b5 G2_DEC_REG(15, 25, 0x1f)
+
+#define hevc_rlist_f6 G2_DEC_REG(16, 0, 0x1f)
+#define hevc_rlist_f7 G2_DEC_REG(16, 10, 0x1f)
+#define hevc_rlist_f8 G2_DEC_REG(16, 20, 0x1f)
+#define hevc_rlist_b6 G2_DEC_REG(16, 5, 0x1f)
+#define hevc_rlist_b7 G2_DEC_REG(16, 15, 0x1f)
+#define hevc_rlist_b8 G2_DEC_REG(16, 25, 0x1f)
+
+#define hevc_rlist_f9 G2_DEC_REG(17, 0, 0x1f)
+#define hevc_rlist_f10 G2_DEC_REG(17, 10, 0x1f)
+#define hevc_rlist_f11 G2_DEC_REG(17, 20, 0x1f)
+#define hevc_rlist_b9 G2_DEC_REG(17, 5, 0x1f)
+#define hevc_rlist_b10 G2_DEC_REG(17, 15, 0x1f)
+#define hevc_rlist_b11 G2_DEC_REG(17, 25, 0x1f)
+
+#define hevc_rlist_f12 G2_DEC_REG(18, 0, 0x1f)
+#define hevc_rlist_f13 G2_DEC_REG(18, 10, 0x1f)
+#define hevc_rlist_f14 G2_DEC_REG(18, 20, 0x1f)
+#define hevc_rlist_b12 G2_DEC_REG(18, 5, 0x1f)
+#define hevc_rlist_b13 G2_DEC_REG(18, 15, 0x1f)
+#define hevc_rlist_b14 G2_DEC_REG(18, 25, 0x1f)
+
+#define hevc_rlist_f15 G2_DEC_REG(19, 0, 0x1f)
+#define hevc_rlist_b15 G2_DEC_REG(19, 5, 0x1f)
+
+#define g2_partial_ctb_x G2_DEC_REG(20, 31, 0x1)
+#define g2_partial_ctb_y G2_DEC_REG(20, 30, 0x1)
+#define g2_pic_width_4x4 G2_DEC_REG(20, 16, 0xfff)
+#define g2_pic_height_4x4 G2_DEC_REG(20, 0, 0xfff)
+
+#define vp9_qp_delta_y_dc G2_DEC_REG(13, 23, 0x3f)
+#define vp9_qp_delta_ch_dc G2_DEC_REG(13, 17, 0x3f)
+#define vp9_qp_delta_ch_ac G2_DEC_REG(13, 11, 0x3f)
+#define vp9_last_sign_bias G2_DEC_REG(13, 10, 0x1)
+#define vp9_lossless_e G2_DEC_REG(13, 9, 0x1)
+#define vp9_comp_pred_var_ref1 G2_DEC_REG(13, 7, 0x3)
+#define vp9_comp_pred_var_ref0 G2_DEC_REG(13, 5, 0x3)
+#define vp9_comp_pred_fixed_ref G2_DEC_REG(13, 3, 0x3)
+#define vp9_segment_temp_upd_e G2_DEC_REG(13, 2, 0x1)
+#define vp9_segment_upd_e G2_DEC_REG(13, 1, 0x1)
+#define vp9_segment_e G2_DEC_REG(13, 0, 0x1)
+
+#define vp9_filt_level G2_DEC_REG(14, 18, 0x3f)
+#define vp9_refpic_seg0 G2_DEC_REG(14, 15, 0x7)
+#define vp9_skip_seg0 G2_DEC_REG(14, 14, 0x1)
+#define vp9_filt_level_seg0 G2_DEC_REG(14, 8, 0x3f)
+#define vp9_quant_seg0 G2_DEC_REG(14, 0, 0xff)
+
+#define vp9_refpic_seg1 G2_DEC_REG(15, 15, 0x7)
+#define vp9_skip_seg1 G2_DEC_REG(15, 14, 0x1)
+#define vp9_filt_level_seg1 G2_DEC_REG(15, 8, 0x3f)
+#define vp9_quant_seg1 G2_DEC_REG(15, 0, 0xff)
+
+#define vp9_refpic_seg2 G2_DEC_REG(16, 15, 0x7)
+#define vp9_skip_seg2 G2_DEC_REG(16, 14, 0x1)
+#define vp9_filt_level_seg2 G2_DEC_REG(16, 8, 0x3f)
+#define vp9_quant_seg2 G2_DEC_REG(16, 0, 0xff)
+
+#define vp9_refpic_seg3 G2_DEC_REG(17, 15, 0x7)
+#define vp9_skip_seg3 G2_DEC_REG(17, 14, 0x1)
+#define vp9_filt_level_seg3 G2_DEC_REG(17, 8, 0x3f)
+#define vp9_quant_seg3 G2_DEC_REG(17, 0, 0xff)
+
+#define vp9_refpic_seg4 G2_DEC_REG(18, 15, 0x7)
+#define vp9_skip_seg4 G2_DEC_REG(18, 14, 0x1)
+#define vp9_filt_level_seg4 G2_DEC_REG(18, 8, 0x3f)
+#define vp9_quant_seg4 G2_DEC_REG(18, 0, 0xff)
+
+#define vp9_refpic_seg5 G2_DEC_REG(19, 15, 0x7)
+#define vp9_skip_seg5 G2_DEC_REG(19, 14, 0x1)
+#define vp9_filt_level_seg5 G2_DEC_REG(19, 8, 0x3f)
+#define vp9_quant_seg5 G2_DEC_REG(19, 0, 0xff)
+
+#define hevc_cur_poc_00 G2_DEC_REG(46, 24, 0xff)
+#define hevc_cur_poc_01 G2_DEC_REG(46, 16, 0xff)
+#define hevc_cur_poc_02 G2_DEC_REG(46, 8, 0xff)
+#define hevc_cur_poc_03 G2_DEC_REG(46, 0, 0xff)
+
+#define hevc_cur_poc_04 G2_DEC_REG(47, 24, 0xff)
+#define hevc_cur_poc_05 G2_DEC_REG(47, 16, 0xff)
+#define hevc_cur_poc_06 G2_DEC_REG(47, 8, 0xff)
+#define hevc_cur_poc_07 G2_DEC_REG(47, 0, 0xff)
+
+#define hevc_cur_poc_08 G2_DEC_REG(48, 24, 0xff)
+#define hevc_cur_poc_09 G2_DEC_REG(48, 16, 0xff)
+#define hevc_cur_poc_10 G2_DEC_REG(48, 8, 0xff)
+#define hevc_cur_poc_11 G2_DEC_REG(48, 0, 0xff)
+
+#define hevc_cur_poc_12 G2_DEC_REG(49, 24, 0xff)
+#define hevc_cur_poc_13 G2_DEC_REG(49, 16, 0xff)
+#define hevc_cur_poc_14 G2_DEC_REG(49, 8, 0xff)
+#define hevc_cur_poc_15 G2_DEC_REG(49, 0, 0xff)
+
+#define vp9_refpic_seg6 G2_DEC_REG(31, 15, 0x7)
+#define vp9_skip_seg6 G2_DEC_REG(31, 14, 0x1)
+#define vp9_filt_level_seg6 G2_DEC_REG(31, 8, 0x3f)
+#define vp9_quant_seg6 G2_DEC_REG(31, 0, 0xff)
+
+#define vp9_refpic_seg7 G2_DEC_REG(32, 15, 0x7)
+#define vp9_skip_seg7 G2_DEC_REG(32, 14, 0x1)
+#define vp9_filt_level_seg7 G2_DEC_REG(32, 8, 0x3f)
+#define vp9_quant_seg7 G2_DEC_REG(32, 0, 0xff)
+
+#define vp9_lref_width G2_DEC_REG(33, 16, 0xffff)
+#define vp9_lref_height G2_DEC_REG(33, 0, 0xffff)
+
+#define vp9_gref_width G2_DEC_REG(34, 16, 0xffff)
+#define vp9_gref_height G2_DEC_REG(34, 0, 0xffff)
+
+#define vp9_aref_width G2_DEC_REG(35, 16, 0xffff)
+#define vp9_aref_height G2_DEC_REG(35, 0, 0xffff)
+
+#define vp9_lref_hor_scale G2_DEC_REG(36, 16, 0xffff)
+#define vp9_lref_ver_scale G2_DEC_REG(36, 0, 0xffff)
+
+#define vp9_gref_hor_scale G2_DEC_REG(37, 16, 0xffff)
+#define vp9_gref_ver_scale G2_DEC_REG(37, 0, 0xffff)
+
+#define vp9_aref_hor_scale G2_DEC_REG(38, 16, 0xffff)
+#define vp9_aref_ver_scale G2_DEC_REG(38, 0, 0xffff)
+
+#define vp9_filt_ref_adj_0 G2_DEC_REG(46, 24, 0x7f)
+#define vp9_filt_ref_adj_1 G2_DEC_REG(46, 16, 0x7f)
+#define vp9_filt_ref_adj_2 G2_DEC_REG(46, 8, 0x7f)
+#define vp9_filt_ref_adj_3 G2_DEC_REG(46, 0, 0x7f)
+
+#define vp9_filt_mb_adj_0 G2_DEC_REG(47, 24, 0x7f)
+#define vp9_filt_mb_adj_1 G2_DEC_REG(47, 16, 0x7f)
+#define vp9_filt_mb_adj_2 G2_DEC_REG(47, 8, 0x7f)
+#define vp9_filt_mb_adj_3 G2_DEC_REG(47, 0, 0x7f)
+
+#define g2_apf_threshold G2_DEC_REG(55, 0, 0xffff)
+
+#define g2_clk_gate_e G2_DEC_REG(58, 16, 0x1)
+#define g2_double_buffer_e G2_DEC_REG(58, 15, 0x1)
+#define g2_buswidth G2_DEC_REG(58, 8, 0x7)
+#define g2_max_burst G2_DEC_REG(58, 0, 0xff)
+
+#define g2_down_scale_e G2_DEC_REG(184, 7, 0x1)
+#define g2_down_scale_y G2_DEC_REG(184, 2, 0x3)
+#define g2_down_scale_x G2_DEC_REG(184, 0, 0x3)
+
+#define G2_REG_CONFIG G2_SWREG(58)
+#define G2_REG_CONFIG_DEC_CLK_GATE_E BIT(16)
+#define G2_REG_CONFIG_DEC_CLK_GATE_IDLE_E BIT(17)
+
+#define G2_OUT_LUMA_ADDR (G2_SWREG(65))
+#define G2_REF_LUMA_ADDR(i) (G2_SWREG(67) + ((i) * 0x8))
+#define G2_VP9_SEGMENT_WRITE_ADDR (G2_SWREG(79))
+#define G2_VP9_SEGMENT_READ_ADDR (G2_SWREG(81))
+#define G2_OUT_CHROMA_ADDR (G2_SWREG(99))
+#define G2_REF_CHROMA_ADDR(i) (G2_SWREG(101) + ((i) * 0x8))
+#define G2_OUT_MV_ADDR (G2_SWREG(133))
+#define G2_REF_MV_ADDR(i) (G2_SWREG(135) + ((i) * 0x8))
+#define G2_TILE_SIZES_ADDR (G2_SWREG(167))
+#define G2_STREAM_ADDR (G2_SWREG(169))
+#define G2_HEVC_SCALING_LIST_ADDR (G2_SWREG(171))
+#define G2_VP9_CTX_COUNT_ADDR (G2_SWREG(171))
+#define G2_VP9_PROBS_ADDR (G2_SWREG(173))
+#define G2_RS_OUT_LUMA_ADDR (G2_SWREG(175))
+#define G2_RS_OUT_CHROMA_ADDR (G2_SWREG(177))
+#define G2_TILE_FILTER_ADDR (G2_SWREG(179))
+#define G2_TILE_SAO_ADDR (G2_SWREG(181))
+#define G2_TILE_BSD_ADDR (G2_SWREG(183))
+#define G2_DS_DST (G2_SWREG(186))
+#define G2_DS_DST_CHR (G2_SWREG(188))
+
+#define g2_strm_buffer_len G2_DEC_REG(258, 0, 0xffffffff)
+#define g2_strm_start_offset G2_DEC_REG(259, 0, 0xffffffff)
+
+#endif
diff --git a/drivers/media/platform/verisilicon/hantro_g2_vp9_dec.c b/drivers/media/platform/verisilicon/hantro_g2_vp9_dec.c
new file mode 100644
index 000000000..6fc4b5555
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_g2_vp9_dec.c
@@ -0,0 +1,1014 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hantro VP9 codec driver
+ *
+ * Copyright (C) 2021 Collabora Ltd.
+ */
+#include "media/videobuf2-core.h"
+#include "media/videobuf2-dma-contig.h"
+#include "media/videobuf2-v4l2.h"
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
+#include <media/v4l2-mem2mem.h>
+#include <media/v4l2-vp9.h>
+
+#include "hantro.h"
+#include "hantro_vp9.h"
+#include "hantro_g2_regs.h"
+
+#define G2_ALIGN 16
+
+enum hantro_ref_frames {
+ INTRA_FRAME = 0,
+ LAST_FRAME = 1,
+ GOLDEN_FRAME = 2,
+ ALTREF_FRAME = 3,
+ MAX_REF_FRAMES = 4
+};
+
+static int start_prepare_run(struct hantro_ctx *ctx, const struct v4l2_ctrl_vp9_frame **dec_params)
+{
+ const struct v4l2_ctrl_vp9_compressed_hdr *prob_updates;
+ struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec;
+ struct v4l2_ctrl *ctrl;
+ unsigned int fctx_idx;
+
+ /* v4l2-specific stuff */
+ hantro_start_prepare_run(ctx);
+
+ ctrl = v4l2_ctrl_find(&ctx->ctrl_handler, V4L2_CID_STATELESS_VP9_FRAME);
+ if (WARN_ON(!ctrl))
+ return -EINVAL;
+ *dec_params = ctrl->p_cur.p;
+
+ ctrl = v4l2_ctrl_find(&ctx->ctrl_handler, V4L2_CID_STATELESS_VP9_COMPRESSED_HDR);
+ if (WARN_ON(!ctrl))
+ return -EINVAL;
+ prob_updates = ctrl->p_cur.p;
+ vp9_ctx->cur.tx_mode = prob_updates->tx_mode;
+
+ /*
+ * vp9 stuff
+ *
+ * by this point the userspace has done all parts of 6.2 uncompressed_header()
+ * except this fragment:
+ * if ( FrameIsIntra || error_resilient_mode ) {
+ * setup_past_independence ( )
+ * if ( frame_type == KEY_FRAME || error_resilient_mode == 1 ||
+ * reset_frame_context == 3 ) {
+ * for ( i = 0; i < 4; i ++ ) {
+ * save_probs( i )
+ * }
+ * } else if ( reset_frame_context == 2 ) {
+ * save_probs( frame_context_idx )
+ * }
+ * frame_context_idx = 0
+ * }
+ */
+ fctx_idx = v4l2_vp9_reset_frame_ctx(*dec_params, vp9_ctx->frame_context);
+ vp9_ctx->cur.frame_context_idx = fctx_idx;
+
+ /* 6.1 frame(sz): load_probs() and load_probs2() */
+ vp9_ctx->probability_tables = vp9_ctx->frame_context[fctx_idx];
+
+ /*
+ * The userspace has also performed 6.3 compressed_header(), but handling the
+ * probs in a special way. All probs which need updating, except MV-related,
+ * have been read from the bitstream and translated through inv_map_table[],
+ * but no 6.3.6 inv_recenter_nonneg(v, m) has been performed. The values passed
+ * by userspace are either translated values (there are no 0 values in
+ * inv_map_table[]), or zero to indicate no update. All MV-related probs which need
+ * updating have been read from the bitstream and (mv_prob << 1) | 1 has been
+ * performed. The values passed by userspace are either new values
+ * to replace old ones (the above mentioned shift and bitwise or never result in
+ * a zero) or zero to indicate no update.
+ * fw_update_probs() performs actual probs updates or leaves probs as-is
+ * for values for which a zero was passed from userspace.
+ */
+ v4l2_vp9_fw_update_probs(&vp9_ctx->probability_tables, prob_updates, *dec_params);
+
+ return 0;
+}
+
+static size_t chroma_offset(const struct hantro_ctx *ctx,
+ const struct v4l2_ctrl_vp9_frame *dec_params)
+{
+ int bytes_per_pixel = dec_params->bit_depth == 8 ? 1 : 2;
+
+ return ctx->src_fmt.width * ctx->src_fmt.height * bytes_per_pixel;
+}
+
+static size_t mv_offset(const struct hantro_ctx *ctx,
+ const struct v4l2_ctrl_vp9_frame *dec_params)
+{
+ size_t cr_offset = chroma_offset(ctx, dec_params);
+
+ return ALIGN((cr_offset * 3) / 2, G2_ALIGN);
+}
+
+static struct hantro_decoded_buffer *
+get_ref_buf(struct hantro_ctx *ctx, struct vb2_v4l2_buffer *dst, u64 timestamp)
+{
+ struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
+ struct vb2_queue *cap_q = &m2m_ctx->cap_q_ctx.q;
+ struct vb2_buffer *buf;
+
+ /*
+ * If a ref is unused or invalid, address of current destination
+ * buffer is returned.
+ */
+ buf = vb2_find_buffer(cap_q, timestamp);
+ if (!buf)
+ buf = &dst->vb2_buf;
+
+ return vb2_to_hantro_decoded_buf(buf);
+}
+
+static void update_dec_buf_info(struct hantro_decoded_buffer *buf,
+ const struct v4l2_ctrl_vp9_frame *dec_params)
+{
+ buf->vp9.width = dec_params->frame_width_minus_1 + 1;
+ buf->vp9.height = dec_params->frame_height_minus_1 + 1;
+ buf->vp9.bit_depth = dec_params->bit_depth;
+}
+
+static void update_ctx_cur_info(struct hantro_vp9_dec_hw_ctx *vp9_ctx,
+ struct hantro_decoded_buffer *buf,
+ const struct v4l2_ctrl_vp9_frame *dec_params)
+{
+ vp9_ctx->cur.valid = true;
+ vp9_ctx->cur.reference_mode = dec_params->reference_mode;
+ vp9_ctx->cur.interpolation_filter = dec_params->interpolation_filter;
+ vp9_ctx->cur.flags = dec_params->flags;
+ vp9_ctx->cur.timestamp = buf->base.vb.vb2_buf.timestamp;
+}
+
+static void config_output(struct hantro_ctx *ctx,
+ struct hantro_decoded_buffer *dst,
+ const struct v4l2_ctrl_vp9_frame *dec_params)
+{
+ dma_addr_t luma_addr, chroma_addr, mv_addr;
+
+ hantro_reg_write(ctx->dev, &g2_out_dis, 0);
+ if (!ctx->dev->variant->legacy_regs)
+ hantro_reg_write(ctx->dev, &g2_output_format, 0);
+
+ luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
+ hantro_write_addr(ctx->dev, G2_OUT_LUMA_ADDR, luma_addr);
+
+ chroma_addr = luma_addr + chroma_offset(ctx, dec_params);
+ hantro_write_addr(ctx->dev, G2_OUT_CHROMA_ADDR, chroma_addr);
+
+ mv_addr = luma_addr + mv_offset(ctx, dec_params);
+ hantro_write_addr(ctx->dev, G2_OUT_MV_ADDR, mv_addr);
+}
+
+struct hantro_vp9_ref_reg {
+ const struct hantro_reg width;
+ const struct hantro_reg height;
+ const struct hantro_reg hor_scale;
+ const struct hantro_reg ver_scale;
+ u32 y_base;
+ u32 c_base;
+};
+
+static void config_ref(struct hantro_ctx *ctx,
+ struct hantro_decoded_buffer *dst,
+ const struct hantro_vp9_ref_reg *ref_reg,
+ const struct v4l2_ctrl_vp9_frame *dec_params,
+ u64 ref_ts)
+{
+ struct hantro_decoded_buffer *buf;
+ dma_addr_t luma_addr, chroma_addr;
+ u32 refw, refh;
+
+ buf = get_ref_buf(ctx, &dst->base.vb, ref_ts);
+ refw = buf->vp9.width;
+ refh = buf->vp9.height;
+
+ hantro_reg_write(ctx->dev, &ref_reg->width, refw);
+ hantro_reg_write(ctx->dev, &ref_reg->height, refh);
+
+ hantro_reg_write(ctx->dev, &ref_reg->hor_scale, (refw << 14) / dst->vp9.width);
+ hantro_reg_write(ctx->dev, &ref_reg->ver_scale, (refh << 14) / dst->vp9.height);
+
+ luma_addr = hantro_get_dec_buf_addr(ctx, &buf->base.vb.vb2_buf);
+ hantro_write_addr(ctx->dev, ref_reg->y_base, luma_addr);
+
+ chroma_addr = luma_addr + chroma_offset(ctx, dec_params);
+ hantro_write_addr(ctx->dev, ref_reg->c_base, chroma_addr);
+}
+
+static void config_ref_registers(struct hantro_ctx *ctx,
+ const struct v4l2_ctrl_vp9_frame *dec_params,
+ struct hantro_decoded_buffer *dst,
+ struct hantro_decoded_buffer *mv_ref)
+{
+ static const struct hantro_vp9_ref_reg ref_regs[] = {
+ {
+ /* Last */
+ .width = vp9_lref_width,
+ .height = vp9_lref_height,
+ .hor_scale = vp9_lref_hor_scale,
+ .ver_scale = vp9_lref_ver_scale,
+ .y_base = G2_REF_LUMA_ADDR(0),
+ .c_base = G2_REF_CHROMA_ADDR(0),
+ }, {
+ /* Golden */
+ .width = vp9_gref_width,
+ .height = vp9_gref_height,
+ .hor_scale = vp9_gref_hor_scale,
+ .ver_scale = vp9_gref_ver_scale,
+ .y_base = G2_REF_LUMA_ADDR(4),
+ .c_base = G2_REF_CHROMA_ADDR(4),
+ }, {
+ /* Altref */
+ .width = vp9_aref_width,
+ .height = vp9_aref_height,
+ .hor_scale = vp9_aref_hor_scale,
+ .ver_scale = vp9_aref_ver_scale,
+ .y_base = G2_REF_LUMA_ADDR(5),
+ .c_base = G2_REF_CHROMA_ADDR(5),
+ },
+ };
+ dma_addr_t mv_addr;
+
+ config_ref(ctx, dst, &ref_regs[0], dec_params, dec_params->last_frame_ts);
+ config_ref(ctx, dst, &ref_regs[1], dec_params, dec_params->golden_frame_ts);
+ config_ref(ctx, dst, &ref_regs[2], dec_params, dec_params->alt_frame_ts);
+
+ mv_addr = hantro_get_dec_buf_addr(ctx, &mv_ref->base.vb.vb2_buf) +
+ mv_offset(ctx, dec_params);
+ hantro_write_addr(ctx->dev, G2_REF_MV_ADDR(0), mv_addr);
+
+ hantro_reg_write(ctx->dev, &vp9_last_sign_bias,
+ dec_params->ref_frame_sign_bias & V4L2_VP9_SIGN_BIAS_LAST ? 1 : 0);
+
+ hantro_reg_write(ctx->dev, &vp9_gref_sign_bias,
+ dec_params->ref_frame_sign_bias & V4L2_VP9_SIGN_BIAS_GOLDEN ? 1 : 0);
+
+ hantro_reg_write(ctx->dev, &vp9_aref_sign_bias,
+ dec_params->ref_frame_sign_bias & V4L2_VP9_SIGN_BIAS_ALT ? 1 : 0);
+}
+
+static void recompute_tile_info(unsigned short *tile_info, unsigned int tiles, unsigned int sbs)
+{
+ int i;
+ unsigned int accumulated = 0;
+ unsigned int next_accumulated;
+
+ for (i = 1; i <= tiles; ++i) {
+ next_accumulated = i * sbs / tiles;
+ *tile_info++ = next_accumulated - accumulated;
+ accumulated = next_accumulated;
+ }
+}
+
+static void
+recompute_tile_rc_info(struct hantro_ctx *ctx,
+ unsigned int tile_r, unsigned int tile_c,
+ unsigned int sbs_r, unsigned int sbs_c)
+{
+ struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec;
+
+ recompute_tile_info(vp9_ctx->tile_r_info, tile_r, sbs_r);
+ recompute_tile_info(vp9_ctx->tile_c_info, tile_c, sbs_c);
+
+ vp9_ctx->last_tile_r = tile_r;
+ vp9_ctx->last_tile_c = tile_c;
+ vp9_ctx->last_sbs_r = sbs_r;
+ vp9_ctx->last_sbs_c = sbs_c;
+}
+
+static inline unsigned int first_tile_row(unsigned int tile_r, unsigned int sbs_r)
+{
+ if (tile_r == sbs_r + 1)
+ return 1;
+
+ if (tile_r == sbs_r + 2)
+ return 2;
+
+ return 0;
+}
+
+static void
+fill_tile_info(struct hantro_ctx *ctx,
+ unsigned int tile_r, unsigned int tile_c,
+ unsigned int sbs_r, unsigned int sbs_c,
+ unsigned short *tile_mem)
+{
+ struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec;
+ unsigned int i, j;
+ bool first = true;
+
+ for (i = first_tile_row(tile_r, sbs_r); i < tile_r; ++i) {
+ unsigned short r_info = vp9_ctx->tile_r_info[i];
+
+ if (first) {
+ if (i > 0)
+ r_info += vp9_ctx->tile_r_info[0];
+ if (i == 2)
+ r_info += vp9_ctx->tile_r_info[1];
+ first = false;
+ }
+ for (j = 0; j < tile_c; ++j) {
+ *tile_mem++ = vp9_ctx->tile_c_info[j];
+ *tile_mem++ = r_info;
+ }
+ }
+}
+
+static void
+config_tiles(struct hantro_ctx *ctx,
+ const struct v4l2_ctrl_vp9_frame *dec_params,
+ struct hantro_decoded_buffer *dst)
+{
+ struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec;
+ struct hantro_aux_buf *misc = &vp9_ctx->misc;
+ struct hantro_aux_buf *tile_edge = &vp9_ctx->tile_edge;
+ dma_addr_t addr;
+ unsigned short *tile_mem;
+ unsigned int rows, cols;
+
+ addr = misc->dma + vp9_ctx->tile_info_offset;
+ hantro_write_addr(ctx->dev, G2_TILE_SIZES_ADDR, addr);
+
+ tile_mem = misc->cpu + vp9_ctx->tile_info_offset;
+ if (dec_params->tile_cols_log2 || dec_params->tile_rows_log2) {
+ unsigned int tile_r = (1 << dec_params->tile_rows_log2);
+ unsigned int tile_c = (1 << dec_params->tile_cols_log2);
+ unsigned int sbs_r = hantro_vp9_num_sbs(dst->vp9.height);
+ unsigned int sbs_c = hantro_vp9_num_sbs(dst->vp9.width);
+
+ if (tile_r != vp9_ctx->last_tile_r || tile_c != vp9_ctx->last_tile_c ||
+ sbs_r != vp9_ctx->last_sbs_r || sbs_c != vp9_ctx->last_sbs_c)
+ recompute_tile_rc_info(ctx, tile_r, tile_c, sbs_r, sbs_c);
+
+ fill_tile_info(ctx, tile_r, tile_c, sbs_r, sbs_c, tile_mem);
+
+ cols = tile_c;
+ rows = tile_r;
+ hantro_reg_write(ctx->dev, &g2_tile_e, 1);
+ } else {
+ tile_mem[0] = hantro_vp9_num_sbs(dst->vp9.width);
+ tile_mem[1] = hantro_vp9_num_sbs(dst->vp9.height);
+
+ cols = 1;
+ rows = 1;
+ hantro_reg_write(ctx->dev, &g2_tile_e, 0);
+ }
+
+ if (ctx->dev->variant->legacy_regs) {
+ hantro_reg_write(ctx->dev, &g2_num_tile_cols_old, cols);
+ hantro_reg_write(ctx->dev, &g2_num_tile_rows_old, rows);
+ } else {
+ hantro_reg_write(ctx->dev, &g2_num_tile_cols, cols);
+ hantro_reg_write(ctx->dev, &g2_num_tile_rows, rows);
+ }
+
+ /* provide aux buffers even if no tiles are used */
+ addr = tile_edge->dma;
+ hantro_write_addr(ctx->dev, G2_TILE_FILTER_ADDR, addr);
+
+ addr = tile_edge->dma + vp9_ctx->bsd_ctrl_offset;
+ hantro_write_addr(ctx->dev, G2_TILE_BSD_ADDR, addr);
+}
+
+static void
+update_feat_and_flag(struct hantro_vp9_dec_hw_ctx *vp9_ctx,
+ const struct v4l2_vp9_segmentation *seg,
+ unsigned int feature,
+ unsigned int segid)
+{
+ u8 mask = V4L2_VP9_SEGMENT_FEATURE_ENABLED(feature);
+
+ vp9_ctx->feature_data[segid][feature] = seg->feature_data[segid][feature];
+ vp9_ctx->feature_enabled[segid] &= ~mask;
+ vp9_ctx->feature_enabled[segid] |= (seg->feature_enabled[segid] & mask);
+}
+
+static inline s16 clip3(s16 x, s16 y, s16 z)
+{
+ return (z < x) ? x : (z > y) ? y : z;
+}
+
+static s16 feat_val_clip3(s16 feat_val, s16 feature_data, bool absolute, u8 clip)
+{
+ if (absolute)
+ return feature_data;
+
+ return clip3(0, 255, feat_val + feature_data);
+}
+
+static void config_segment(struct hantro_ctx *ctx, const struct v4l2_ctrl_vp9_frame *dec_params)
+{
+ struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec;
+ const struct v4l2_vp9_segmentation *seg;
+ s16 feat_val;
+ unsigned char feat_id;
+ unsigned int segid;
+ bool segment_enabled, absolute, update_data;
+
+ static const struct hantro_reg seg_regs[8][V4L2_VP9_SEG_LVL_MAX] = {
+ { vp9_quant_seg0, vp9_filt_level_seg0, vp9_refpic_seg0, vp9_skip_seg0 },
+ { vp9_quant_seg1, vp9_filt_level_seg1, vp9_refpic_seg1, vp9_skip_seg1 },
+ { vp9_quant_seg2, vp9_filt_level_seg2, vp9_refpic_seg2, vp9_skip_seg2 },
+ { vp9_quant_seg3, vp9_filt_level_seg3, vp9_refpic_seg3, vp9_skip_seg3 },
+ { vp9_quant_seg4, vp9_filt_level_seg4, vp9_refpic_seg4, vp9_skip_seg4 },
+ { vp9_quant_seg5, vp9_filt_level_seg5, vp9_refpic_seg5, vp9_skip_seg5 },
+ { vp9_quant_seg6, vp9_filt_level_seg6, vp9_refpic_seg6, vp9_skip_seg6 },
+ { vp9_quant_seg7, vp9_filt_level_seg7, vp9_refpic_seg7, vp9_skip_seg7 },
+ };
+
+ segment_enabled = !!(dec_params->seg.flags & V4L2_VP9_SEGMENTATION_FLAG_ENABLED);
+ hantro_reg_write(ctx->dev, &vp9_segment_e, segment_enabled);
+ hantro_reg_write(ctx->dev, &vp9_segment_upd_e,
+ !!(dec_params->seg.flags & V4L2_VP9_SEGMENTATION_FLAG_UPDATE_MAP));
+ hantro_reg_write(ctx->dev, &vp9_segment_temp_upd_e,
+ !!(dec_params->seg.flags & V4L2_VP9_SEGMENTATION_FLAG_TEMPORAL_UPDATE));
+
+ seg = &dec_params->seg;
+ absolute = !!(seg->flags & V4L2_VP9_SEGMENTATION_FLAG_ABS_OR_DELTA_UPDATE);
+ update_data = !!(seg->flags & V4L2_VP9_SEGMENTATION_FLAG_UPDATE_DATA);
+
+ for (segid = 0; segid < 8; ++segid) {
+ /* Quantizer segment feature */
+ feat_id = V4L2_VP9_SEG_LVL_ALT_Q;
+ feat_val = dec_params->quant.base_q_idx;
+ if (segment_enabled) {
+ if (update_data)
+ update_feat_and_flag(vp9_ctx, seg, feat_id, segid);
+ if (v4l2_vp9_seg_feat_enabled(vp9_ctx->feature_enabled, feat_id, segid))
+ feat_val = feat_val_clip3(feat_val,
+ vp9_ctx->feature_data[segid][feat_id],
+ absolute, 255);
+ }
+ hantro_reg_write(ctx->dev, &seg_regs[segid][feat_id], feat_val);
+
+ /* Loop filter segment feature */
+ feat_id = V4L2_VP9_SEG_LVL_ALT_L;
+ feat_val = dec_params->lf.level;
+ if (segment_enabled) {
+ if (update_data)
+ update_feat_and_flag(vp9_ctx, seg, feat_id, segid);
+ if (v4l2_vp9_seg_feat_enabled(vp9_ctx->feature_enabled, feat_id, segid))
+ feat_val = feat_val_clip3(feat_val,
+ vp9_ctx->feature_data[segid][feat_id],
+ absolute, 63);
+ }
+ hantro_reg_write(ctx->dev, &seg_regs[segid][feat_id], feat_val);
+
+ /* Reference frame segment feature */
+ feat_id = V4L2_VP9_SEG_LVL_REF_FRAME;
+ feat_val = 0;
+ if (segment_enabled) {
+ if (update_data)
+ update_feat_and_flag(vp9_ctx, seg, feat_id, segid);
+ if (!(dec_params->flags & V4L2_VP9_FRAME_FLAG_KEY_FRAME) &&
+ v4l2_vp9_seg_feat_enabled(vp9_ctx->feature_enabled, feat_id, segid))
+ feat_val = vp9_ctx->feature_data[segid][feat_id] + 1;
+ }
+ hantro_reg_write(ctx->dev, &seg_regs[segid][feat_id], feat_val);
+
+ /* Skip segment feature */
+ feat_id = V4L2_VP9_SEG_LVL_SKIP;
+ feat_val = 0;
+ if (segment_enabled) {
+ if (update_data)
+ update_feat_and_flag(vp9_ctx, seg, feat_id, segid);
+ feat_val = v4l2_vp9_seg_feat_enabled(vp9_ctx->feature_enabled,
+ feat_id, segid) ? 1 : 0;
+ }
+ hantro_reg_write(ctx->dev, &seg_regs[segid][feat_id], feat_val);
+ }
+}
+
+static void config_loop_filter(struct hantro_ctx *ctx, const struct v4l2_ctrl_vp9_frame *dec_params)
+{
+ bool d = dec_params->lf.flags & V4L2_VP9_LOOP_FILTER_FLAG_DELTA_ENABLED;
+
+ hantro_reg_write(ctx->dev, &vp9_filt_level, dec_params->lf.level);
+ hantro_reg_write(ctx->dev, &g2_out_filtering_dis, dec_params->lf.level == 0);
+ hantro_reg_write(ctx->dev, &vp9_filt_sharpness, dec_params->lf.sharpness);
+
+ hantro_reg_write(ctx->dev, &vp9_filt_ref_adj_0, d ? dec_params->lf.ref_deltas[0] : 0);
+ hantro_reg_write(ctx->dev, &vp9_filt_ref_adj_1, d ? dec_params->lf.ref_deltas[1] : 0);
+ hantro_reg_write(ctx->dev, &vp9_filt_ref_adj_2, d ? dec_params->lf.ref_deltas[2] : 0);
+ hantro_reg_write(ctx->dev, &vp9_filt_ref_adj_3, d ? dec_params->lf.ref_deltas[3] : 0);
+ hantro_reg_write(ctx->dev, &vp9_filt_mb_adj_0, d ? dec_params->lf.mode_deltas[0] : 0);
+ hantro_reg_write(ctx->dev, &vp9_filt_mb_adj_1, d ? dec_params->lf.mode_deltas[1] : 0);
+}
+
+static void config_picture_dimensions(struct hantro_ctx *ctx, struct hantro_decoded_buffer *dst)
+{
+ u32 pic_w_4x4, pic_h_4x4;
+
+ hantro_reg_write(ctx->dev, &g2_pic_width_in_cbs, (dst->vp9.width + 7) / 8);
+ hantro_reg_write(ctx->dev, &g2_pic_height_in_cbs, (dst->vp9.height + 7) / 8);
+ pic_w_4x4 = roundup(dst->vp9.width, 8) >> 2;
+ pic_h_4x4 = roundup(dst->vp9.height, 8) >> 2;
+ hantro_reg_write(ctx->dev, &g2_pic_width_4x4, pic_w_4x4);
+ hantro_reg_write(ctx->dev, &g2_pic_height_4x4, pic_h_4x4);
+}
+
+static void
+config_bit_depth(struct hantro_ctx *ctx, const struct v4l2_ctrl_vp9_frame *dec_params)
+{
+ if (ctx->dev->variant->legacy_regs) {
+ hantro_reg_write(ctx->dev, &g2_bit_depth_y, dec_params->bit_depth);
+ hantro_reg_write(ctx->dev, &g2_bit_depth_c, dec_params->bit_depth);
+ hantro_reg_write(ctx->dev, &g2_pix_shift, 0);
+ } else {
+ hantro_reg_write(ctx->dev, &g2_bit_depth_y_minus8, dec_params->bit_depth - 8);
+ hantro_reg_write(ctx->dev, &g2_bit_depth_c_minus8, dec_params->bit_depth - 8);
+ }
+}
+
+static inline bool is_lossless(const struct v4l2_vp9_quantization *quant)
+{
+ return quant->base_q_idx == 0 && quant->delta_q_uv_ac == 0 &&
+ quant->delta_q_uv_dc == 0 && quant->delta_q_y_dc == 0;
+}
+
+static void
+config_quant(struct hantro_ctx *ctx, const struct v4l2_ctrl_vp9_frame *dec_params)
+{
+ hantro_reg_write(ctx->dev, &vp9_qp_delta_y_dc, dec_params->quant.delta_q_y_dc);
+ hantro_reg_write(ctx->dev, &vp9_qp_delta_ch_dc, dec_params->quant.delta_q_uv_dc);
+ hantro_reg_write(ctx->dev, &vp9_qp_delta_ch_ac, dec_params->quant.delta_q_uv_ac);
+ hantro_reg_write(ctx->dev, &vp9_lossless_e, is_lossless(&dec_params->quant));
+}
+
+static u32
+hantro_interp_filter_from_v4l2(unsigned int interpolation_filter)
+{
+ switch (interpolation_filter) {
+ case V4L2_VP9_INTERP_FILTER_EIGHTTAP:
+ return 0x1;
+ case V4L2_VP9_INTERP_FILTER_EIGHTTAP_SMOOTH:
+ return 0;
+ case V4L2_VP9_INTERP_FILTER_EIGHTTAP_SHARP:
+ return 0x2;
+ case V4L2_VP9_INTERP_FILTER_BILINEAR:
+ return 0x3;
+ case V4L2_VP9_INTERP_FILTER_SWITCHABLE:
+ return 0x4;
+ }
+
+ return 0;
+}
+
+static void
+config_others(struct hantro_ctx *ctx, const struct v4l2_ctrl_vp9_frame *dec_params,
+ bool intra_only, bool resolution_change)
+{
+ struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec;
+
+ hantro_reg_write(ctx->dev, &g2_idr_pic_e, intra_only);
+
+ hantro_reg_write(ctx->dev, &vp9_transform_mode, vp9_ctx->cur.tx_mode);
+
+ hantro_reg_write(ctx->dev, &vp9_mcomp_filt_type, intra_only ?
+ 0 : hantro_interp_filter_from_v4l2(dec_params->interpolation_filter));
+
+ hantro_reg_write(ctx->dev, &vp9_high_prec_mv_e,
+ !!(dec_params->flags & V4L2_VP9_FRAME_FLAG_ALLOW_HIGH_PREC_MV));
+
+ hantro_reg_write(ctx->dev, &vp9_comp_pred_mode, dec_params->reference_mode);
+
+ hantro_reg_write(ctx->dev, &g2_tempor_mvp_e,
+ !(dec_params->flags & V4L2_VP9_FRAME_FLAG_ERROR_RESILIENT) &&
+ !(dec_params->flags & V4L2_VP9_FRAME_FLAG_KEY_FRAME) &&
+ !(vp9_ctx->last.flags & V4L2_VP9_FRAME_FLAG_KEY_FRAME) &&
+ !(dec_params->flags & V4L2_VP9_FRAME_FLAG_INTRA_ONLY) &&
+ !resolution_change &&
+ vp9_ctx->last.flags & V4L2_VP9_FRAME_FLAG_SHOW_FRAME
+ );
+
+ hantro_reg_write(ctx->dev, &g2_write_mvs_e,
+ !(dec_params->flags & V4L2_VP9_FRAME_FLAG_KEY_FRAME));
+}
+
+static void
+config_compound_reference(struct hantro_ctx *ctx,
+ const struct v4l2_ctrl_vp9_frame *dec_params)
+{
+ u32 comp_fixed_ref, comp_var_ref[2];
+ bool last_ref_frame_sign_bias;
+ bool golden_ref_frame_sign_bias;
+ bool alt_ref_frame_sign_bias;
+ bool comp_ref_allowed = 0;
+
+ comp_fixed_ref = 0;
+ comp_var_ref[0] = 0;
+ comp_var_ref[1] = 0;
+
+ last_ref_frame_sign_bias = dec_params->ref_frame_sign_bias & V4L2_VP9_SIGN_BIAS_LAST;
+ golden_ref_frame_sign_bias = dec_params->ref_frame_sign_bias & V4L2_VP9_SIGN_BIAS_GOLDEN;
+ alt_ref_frame_sign_bias = dec_params->ref_frame_sign_bias & V4L2_VP9_SIGN_BIAS_ALT;
+
+ /* 6.3.12 Frame reference mode syntax */
+ comp_ref_allowed |= golden_ref_frame_sign_bias != last_ref_frame_sign_bias;
+ comp_ref_allowed |= alt_ref_frame_sign_bias != last_ref_frame_sign_bias;
+
+ if (comp_ref_allowed) {
+ if (last_ref_frame_sign_bias ==
+ golden_ref_frame_sign_bias) {
+ comp_fixed_ref = ALTREF_FRAME;
+ comp_var_ref[0] = LAST_FRAME;
+ comp_var_ref[1] = GOLDEN_FRAME;
+ } else if (last_ref_frame_sign_bias ==
+ alt_ref_frame_sign_bias) {
+ comp_fixed_ref = GOLDEN_FRAME;
+ comp_var_ref[0] = LAST_FRAME;
+ comp_var_ref[1] = ALTREF_FRAME;
+ } else {
+ comp_fixed_ref = LAST_FRAME;
+ comp_var_ref[0] = GOLDEN_FRAME;
+ comp_var_ref[1] = ALTREF_FRAME;
+ }
+ }
+
+ hantro_reg_write(ctx->dev, &vp9_comp_pred_fixed_ref, comp_fixed_ref);
+ hantro_reg_write(ctx->dev, &vp9_comp_pred_var_ref0, comp_var_ref[0]);
+ hantro_reg_write(ctx->dev, &vp9_comp_pred_var_ref1, comp_var_ref[1]);
+}
+
+#define INNER_LOOP \
+do { \
+ for (m = 0; m < ARRAY_SIZE(adaptive->coef[0][0][0][0]); ++m) { \
+ memcpy(adaptive->coef[i][j][k][l][m], \
+ probs->coef[i][j][k][l][m], \
+ sizeof(probs->coef[i][j][k][l][m])); \
+ \
+ adaptive->coef[i][j][k][l][m][3] = 0; \
+ } \
+} while (0)
+
+static void config_probs(struct hantro_ctx *ctx, const struct v4l2_ctrl_vp9_frame *dec_params)
+{
+ struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec;
+ struct hantro_aux_buf *misc = &vp9_ctx->misc;
+ struct hantro_g2_all_probs *all_probs = misc->cpu;
+ struct hantro_g2_probs *adaptive;
+ struct hantro_g2_mv_probs *mv;
+ const struct v4l2_vp9_segmentation *seg = &dec_params->seg;
+ const struct v4l2_vp9_frame_context *probs = &vp9_ctx->probability_tables;
+ int i, j, k, l, m;
+
+ for (i = 0; i < ARRAY_SIZE(all_probs->kf_y_mode_prob); ++i)
+ for (j = 0; j < ARRAY_SIZE(all_probs->kf_y_mode_prob[0]); ++j) {
+ memcpy(all_probs->kf_y_mode_prob[i][j],
+ v4l2_vp9_kf_y_mode_prob[i][j],
+ ARRAY_SIZE(all_probs->kf_y_mode_prob[i][j]));
+
+ all_probs->kf_y_mode_prob_tail[i][j][0] =
+ v4l2_vp9_kf_y_mode_prob[i][j][8];
+ }
+
+ memcpy(all_probs->mb_segment_tree_probs, seg->tree_probs,
+ sizeof(all_probs->mb_segment_tree_probs));
+
+ memcpy(all_probs->segment_pred_probs, seg->pred_probs,
+ sizeof(all_probs->segment_pred_probs));
+
+ for (i = 0; i < ARRAY_SIZE(all_probs->kf_uv_mode_prob); ++i) {
+ memcpy(all_probs->kf_uv_mode_prob[i], v4l2_vp9_kf_uv_mode_prob[i],
+ ARRAY_SIZE(all_probs->kf_uv_mode_prob[i]));
+
+ all_probs->kf_uv_mode_prob_tail[i][0] = v4l2_vp9_kf_uv_mode_prob[i][8];
+ }
+
+ adaptive = &all_probs->probs;
+
+ for (i = 0; i < ARRAY_SIZE(adaptive->inter_mode); ++i) {
+ memcpy(adaptive->inter_mode[i], probs->inter_mode[i],
+ ARRAY_SIZE(probs->inter_mode[i]));
+
+ adaptive->inter_mode[i][3] = 0;
+ }
+
+ memcpy(adaptive->is_inter, probs->is_inter, sizeof(adaptive->is_inter));
+
+ for (i = 0; i < ARRAY_SIZE(adaptive->uv_mode); ++i) {
+ memcpy(adaptive->uv_mode[i], probs->uv_mode[i],
+ sizeof(adaptive->uv_mode[i]));
+ adaptive->uv_mode_tail[i][0] = probs->uv_mode[i][8];
+ }
+
+ memcpy(adaptive->tx8, probs->tx8, sizeof(adaptive->tx8));
+ memcpy(adaptive->tx16, probs->tx16, sizeof(adaptive->tx16));
+ memcpy(adaptive->tx32, probs->tx32, sizeof(adaptive->tx32));
+
+ for (i = 0; i < ARRAY_SIZE(adaptive->y_mode); ++i) {
+ memcpy(adaptive->y_mode[i], probs->y_mode[i],
+ ARRAY_SIZE(adaptive->y_mode[i]));
+
+ adaptive->y_mode_tail[i][0] = probs->y_mode[i][8];
+ }
+
+ for (i = 0; i < ARRAY_SIZE(adaptive->partition[0]); ++i) {
+ memcpy(adaptive->partition[0][i], v4l2_vp9_kf_partition_probs[i],
+ sizeof(v4l2_vp9_kf_partition_probs[i]));
+
+ adaptive->partition[0][i][3] = 0;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(adaptive->partition[1]); ++i) {
+ memcpy(adaptive->partition[1][i], probs->partition[i],
+ sizeof(probs->partition[i]));
+
+ adaptive->partition[1][i][3] = 0;
+ }
+
+ memcpy(adaptive->interp_filter, probs->interp_filter,
+ sizeof(adaptive->interp_filter));
+
+ memcpy(adaptive->comp_mode, probs->comp_mode, sizeof(adaptive->comp_mode));
+
+ memcpy(adaptive->skip, probs->skip, sizeof(adaptive->skip));
+
+ mv = &adaptive->mv;
+
+ memcpy(mv->joint, probs->mv.joint, sizeof(mv->joint));
+ memcpy(mv->sign, probs->mv.sign, sizeof(mv->sign));
+ memcpy(mv->class0_bit, probs->mv.class0_bit, sizeof(mv->class0_bit));
+ memcpy(mv->fr, probs->mv.fr, sizeof(mv->fr));
+ memcpy(mv->class0_hp, probs->mv.class0_hp, sizeof(mv->class0_hp));
+ memcpy(mv->hp, probs->mv.hp, sizeof(mv->hp));
+ memcpy(mv->classes, probs->mv.classes, sizeof(mv->classes));
+ memcpy(mv->class0_fr, probs->mv.class0_fr, sizeof(mv->class0_fr));
+ memcpy(mv->bits, probs->mv.bits, sizeof(mv->bits));
+
+ memcpy(adaptive->single_ref, probs->single_ref, sizeof(adaptive->single_ref));
+
+ memcpy(adaptive->comp_ref, probs->comp_ref, sizeof(adaptive->comp_ref));
+
+ for (i = 0; i < ARRAY_SIZE(adaptive->coef); ++i)
+ for (j = 0; j < ARRAY_SIZE(adaptive->coef[0]); ++j)
+ for (k = 0; k < ARRAY_SIZE(adaptive->coef[0][0]); ++k)
+ for (l = 0; l < ARRAY_SIZE(adaptive->coef[0][0][0]); ++l)
+ INNER_LOOP;
+
+ hantro_write_addr(ctx->dev, G2_VP9_PROBS_ADDR, misc->dma);
+}
+
+static void config_counts(struct hantro_ctx *ctx)
+{
+ struct hantro_vp9_dec_hw_ctx *vp9_dec = &ctx->vp9_dec;
+ struct hantro_aux_buf *misc = &vp9_dec->misc;
+ dma_addr_t addr = misc->dma + vp9_dec->ctx_counters_offset;
+
+ hantro_write_addr(ctx->dev, G2_VP9_CTX_COUNT_ADDR, addr);
+}
+
+static void config_seg_map(struct hantro_ctx *ctx,
+ const struct v4l2_ctrl_vp9_frame *dec_params,
+ bool intra_only, bool update_map)
+{
+ struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec;
+ struct hantro_aux_buf *segment_map = &vp9_ctx->segment_map;
+ dma_addr_t addr;
+
+ if (intra_only ||
+ (dec_params->flags & V4L2_VP9_FRAME_FLAG_ERROR_RESILIENT)) {
+ memset(segment_map->cpu, 0, segment_map->size);
+ memset(vp9_ctx->feature_data, 0, sizeof(vp9_ctx->feature_data));
+ memset(vp9_ctx->feature_enabled, 0, sizeof(vp9_ctx->feature_enabled));
+ }
+
+ addr = segment_map->dma + vp9_ctx->active_segment * vp9_ctx->segment_map_size;
+ hantro_write_addr(ctx->dev, G2_VP9_SEGMENT_READ_ADDR, addr);
+
+ addr = segment_map->dma + (1 - vp9_ctx->active_segment) * vp9_ctx->segment_map_size;
+ hantro_write_addr(ctx->dev, G2_VP9_SEGMENT_WRITE_ADDR, addr);
+
+ if (update_map)
+ vp9_ctx->active_segment = 1 - vp9_ctx->active_segment;
+}
+
+static void
+config_source(struct hantro_ctx *ctx, const struct v4l2_ctrl_vp9_frame *dec_params,
+ struct vb2_v4l2_buffer *vb2_src)
+{
+ dma_addr_t stream_base, tmp_addr;
+ unsigned int headres_size;
+ u32 src_len, start_bit, src_buf_len;
+
+ headres_size = dec_params->uncompressed_header_size
+ + dec_params->compressed_header_size;
+
+ stream_base = vb2_dma_contig_plane_dma_addr(&vb2_src->vb2_buf, 0);
+
+ tmp_addr = stream_base + headres_size;
+ if (ctx->dev->variant->legacy_regs)
+ hantro_write_addr(ctx->dev, G2_STREAM_ADDR, (tmp_addr & ~0xf));
+ else
+ hantro_write_addr(ctx->dev, G2_STREAM_ADDR, stream_base);
+
+ start_bit = (tmp_addr & 0xf) * 8;
+ hantro_reg_write(ctx->dev, &g2_start_bit, start_bit);
+
+ src_len = vb2_get_plane_payload(&vb2_src->vb2_buf, 0);
+ src_len += start_bit / 8 - headres_size;
+ hantro_reg_write(ctx->dev, &g2_stream_len, src_len);
+
+ if (!ctx->dev->variant->legacy_regs) {
+ tmp_addr &= ~0xf;
+ hantro_reg_write(ctx->dev, &g2_strm_start_offset, tmp_addr - stream_base);
+ src_buf_len = vb2_plane_size(&vb2_src->vb2_buf, 0);
+ hantro_reg_write(ctx->dev, &g2_strm_buffer_len, src_buf_len);
+ }
+}
+
+static void
+config_registers(struct hantro_ctx *ctx, const struct v4l2_ctrl_vp9_frame *dec_params,
+ struct vb2_v4l2_buffer *vb2_src, struct vb2_v4l2_buffer *vb2_dst)
+{
+ struct hantro_decoded_buffer *dst, *last, *mv_ref;
+ struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec;
+ const struct v4l2_vp9_segmentation *seg;
+ bool intra_only, resolution_change;
+
+ /* vp9 stuff */
+ dst = vb2_to_hantro_decoded_buf(&vb2_dst->vb2_buf);
+
+ if (vp9_ctx->last.valid)
+ last = get_ref_buf(ctx, &dst->base.vb, vp9_ctx->last.timestamp);
+ else
+ last = dst;
+
+ update_dec_buf_info(dst, dec_params);
+ update_ctx_cur_info(vp9_ctx, dst, dec_params);
+ seg = &dec_params->seg;
+
+ intra_only = !!(dec_params->flags &
+ (V4L2_VP9_FRAME_FLAG_KEY_FRAME |
+ V4L2_VP9_FRAME_FLAG_INTRA_ONLY));
+
+ if (!intra_only &&
+ !(dec_params->flags & V4L2_VP9_FRAME_FLAG_ERROR_RESILIENT) &&
+ vp9_ctx->last.valid)
+ mv_ref = last;
+ else
+ mv_ref = dst;
+
+ resolution_change = dst->vp9.width != last->vp9.width ||
+ dst->vp9.height != last->vp9.height;
+
+ /* configure basic registers */
+ hantro_reg_write(ctx->dev, &g2_mode, VP9_DEC_MODE);
+ if (!ctx->dev->variant->legacy_regs) {
+ hantro_reg_write(ctx->dev, &g2_strm_swap, 0xf);
+ hantro_reg_write(ctx->dev, &g2_dirmv_swap, 0xf);
+ hantro_reg_write(ctx->dev, &g2_compress_swap, 0xf);
+ hantro_reg_write(ctx->dev, &g2_ref_compress_bypass, 1);
+ } else {
+ hantro_reg_write(ctx->dev, &g2_strm_swap_old, 0x1f);
+ hantro_reg_write(ctx->dev, &g2_pic_swap, 0x10);
+ hantro_reg_write(ctx->dev, &g2_dirmv_swap_old, 0x10);
+ hantro_reg_write(ctx->dev, &g2_tab0_swap_old, 0x10);
+ hantro_reg_write(ctx->dev, &g2_tab1_swap_old, 0x10);
+ hantro_reg_write(ctx->dev, &g2_tab2_swap_old, 0x10);
+ hantro_reg_write(ctx->dev, &g2_tab3_swap_old, 0x10);
+ hantro_reg_write(ctx->dev, &g2_rscan_swap, 0x10);
+ }
+ hantro_reg_write(ctx->dev, &g2_buswidth, BUS_WIDTH_128);
+ hantro_reg_write(ctx->dev, &g2_max_burst, 16);
+ hantro_reg_write(ctx->dev, &g2_apf_threshold, 8);
+ hantro_reg_write(ctx->dev, &g2_clk_gate_e, 1);
+ hantro_reg_write(ctx->dev, &g2_max_cb_size, 6);
+ hantro_reg_write(ctx->dev, &g2_min_cb_size, 3);
+ if (ctx->dev->variant->double_buffer)
+ hantro_reg_write(ctx->dev, &g2_double_buffer_e, 1);
+
+ config_output(ctx, dst, dec_params);
+
+ if (!intra_only)
+ config_ref_registers(ctx, dec_params, dst, mv_ref);
+
+ config_tiles(ctx, dec_params, dst);
+ config_segment(ctx, dec_params);
+ config_loop_filter(ctx, dec_params);
+ config_picture_dimensions(ctx, dst);
+ config_bit_depth(ctx, dec_params);
+ config_quant(ctx, dec_params);
+ config_others(ctx, dec_params, intra_only, resolution_change);
+ config_compound_reference(ctx, dec_params);
+ config_probs(ctx, dec_params);
+ config_counts(ctx);
+ config_seg_map(ctx, dec_params, intra_only,
+ seg->flags & V4L2_VP9_SEGMENTATION_FLAG_UPDATE_MAP);
+ config_source(ctx, dec_params, vb2_src);
+}
+
+int hantro_g2_vp9_dec_run(struct hantro_ctx *ctx)
+{
+ const struct v4l2_ctrl_vp9_frame *decode_params;
+ struct vb2_v4l2_buffer *src;
+ struct vb2_v4l2_buffer *dst;
+ int ret;
+
+ hantro_g2_check_idle(ctx->dev);
+
+ ret = start_prepare_run(ctx, &decode_params);
+ if (ret) {
+ hantro_end_prepare_run(ctx);
+ return ret;
+ }
+
+ src = hantro_get_src_buf(ctx);
+ dst = hantro_get_dst_buf(ctx);
+
+ config_registers(ctx, decode_params, src, dst);
+
+ hantro_end_prepare_run(ctx);
+
+ vdpu_write(ctx->dev, G2_REG_INTERRUPT_DEC_E, G2_REG_INTERRUPT);
+
+ return 0;
+}
+
+#define copy_tx_and_skip(p1, p2) \
+do { \
+ memcpy((p1)->tx8, (p2)->tx8, sizeof((p1)->tx8)); \
+ memcpy((p1)->tx16, (p2)->tx16, sizeof((p1)->tx16)); \
+ memcpy((p1)->tx32, (p2)->tx32, sizeof((p1)->tx32)); \
+ memcpy((p1)->skip, (p2)->skip, sizeof((p1)->skip)); \
+} while (0)
+
+void hantro_g2_vp9_dec_done(struct hantro_ctx *ctx)
+{
+ struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec;
+ unsigned int fctx_idx;
+
+ if (!(vp9_ctx->cur.flags & V4L2_VP9_FRAME_FLAG_REFRESH_FRAME_CTX))
+ goto out_update_last;
+
+ fctx_idx = vp9_ctx->cur.frame_context_idx;
+
+ if (!(vp9_ctx->cur.flags & V4L2_VP9_FRAME_FLAG_PARALLEL_DEC_MODE)) {
+ /* error_resilient_mode == 0 && frame_parallel_decoding_mode == 0 */
+ struct v4l2_vp9_frame_context *probs = &vp9_ctx->probability_tables;
+ bool frame_is_intra = vp9_ctx->cur.flags &
+ (V4L2_VP9_FRAME_FLAG_KEY_FRAME | V4L2_VP9_FRAME_FLAG_INTRA_ONLY);
+ struct tx_and_skip {
+ u8 tx8[2][1];
+ u8 tx16[2][2];
+ u8 tx32[2][3];
+ u8 skip[3];
+ } _tx_skip, *tx_skip = &_tx_skip;
+ struct v4l2_vp9_frame_symbol_counts *counts;
+ struct symbol_counts *hantro_cnts;
+ u32 tx16p[2][4];
+ int i;
+
+ /* buffer the forward-updated TX and skip probs */
+ if (frame_is_intra)
+ copy_tx_and_skip(tx_skip, probs);
+
+ /* 6.1.2 refresh_probs(): load_probs() and load_probs2() */
+ *probs = vp9_ctx->frame_context[fctx_idx];
+
+ /* if FrameIsIntra then undo the effect of load_probs2() */
+ if (frame_is_intra)
+ copy_tx_and_skip(probs, tx_skip);
+
+ counts = &vp9_ctx->cnts;
+ hantro_cnts = vp9_ctx->misc.cpu + vp9_ctx->ctx_counters_offset;
+ for (i = 0; i < ARRAY_SIZE(tx16p); ++i) {
+ memcpy(tx16p[i],
+ hantro_cnts->tx16x16_count[i],
+ sizeof(hantro_cnts->tx16x16_count[0]));
+ tx16p[i][3] = 0;
+ }
+ counts->tx16p = &tx16p;
+
+ v4l2_vp9_adapt_coef_probs(probs, counts,
+ !vp9_ctx->last.valid ||
+ vp9_ctx->last.flags & V4L2_VP9_FRAME_FLAG_KEY_FRAME,
+ frame_is_intra);
+
+ if (!frame_is_intra) {
+ /* load_probs2() already done */
+ u32 mv_mode[7][4];
+
+ for (i = 0; i < ARRAY_SIZE(mv_mode); ++i) {
+ mv_mode[i][0] = hantro_cnts->inter_mode_counts[i][1][0];
+ mv_mode[i][1] = hantro_cnts->inter_mode_counts[i][2][0];
+ mv_mode[i][2] = hantro_cnts->inter_mode_counts[i][0][0];
+ mv_mode[i][3] = hantro_cnts->inter_mode_counts[i][2][1];
+ }
+ counts->mv_mode = &mv_mode;
+ v4l2_vp9_adapt_noncoef_probs(&vp9_ctx->probability_tables, counts,
+ vp9_ctx->cur.reference_mode,
+ vp9_ctx->cur.interpolation_filter,
+ vp9_ctx->cur.tx_mode, vp9_ctx->cur.flags);
+ }
+ }
+
+ vp9_ctx->frame_context[fctx_idx] = vp9_ctx->probability_tables;
+
+out_update_last:
+ vp9_ctx->last = vp9_ctx->cur;
+}
diff --git a/drivers/media/platform/verisilicon/hantro_h1_jpeg_enc.c b/drivers/media/platform/verisilicon/hantro_h1_jpeg_enc.c
new file mode 100644
index 000000000..12d69503d
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_h1_jpeg_enc.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hantro VPU codec driver
+ *
+ * Copyright (C) 2018 Rockchip Electronics Co., Ltd.
+ */
+
+#include <asm/unaligned.h>
+#include <media/v4l2-mem2mem.h>
+#include "hantro_jpeg.h"
+#include "hantro.h"
+#include "hantro_v4l2.h"
+#include "hantro_hw.h"
+#include "hantro_h1_regs.h"
+
+#define H1_JPEG_QUANT_TABLE_COUNT 16
+
+static void hantro_h1_set_src_img_ctrl(struct hantro_dev *vpu,
+ struct hantro_ctx *ctx)
+{
+ u32 overfill_r, overfill_b;
+ u32 reg;
+
+ /*
+ * The format width and height are already macroblock aligned
+ * by .vidioc_s_fmt_vid_cap_mplane() callback. Destination
+ * format width and height can be further modified by
+ * .vidioc_s_selection(), and the width is 4-aligned.
+ */
+ overfill_r = ctx->src_fmt.width - ctx->dst_fmt.width;
+ overfill_b = ctx->src_fmt.height - ctx->dst_fmt.height;
+
+ reg = H1_REG_IN_IMG_CTRL_ROW_LEN(ctx->src_fmt.width)
+ | H1_REG_IN_IMG_CTRL_OVRFLR_D4(overfill_r / 4)
+ | H1_REG_IN_IMG_CTRL_OVRFLB(overfill_b)
+ | H1_REG_IN_IMG_CTRL_FMT(ctx->vpu_src_fmt->enc_fmt);
+ vepu_write_relaxed(vpu, reg, H1_REG_IN_IMG_CTRL);
+}
+
+static void hantro_h1_jpeg_enc_set_buffers(struct hantro_dev *vpu,
+ struct hantro_ctx *ctx,
+ struct vb2_buffer *src_buf,
+ struct vb2_buffer *dst_buf)
+{
+ struct v4l2_pix_format_mplane *pix_fmt = &ctx->src_fmt;
+ dma_addr_t src[3];
+ u32 size_left;
+
+ size_left = vb2_plane_size(dst_buf, 0) - ctx->vpu_dst_fmt->header_size;
+ if (WARN_ON(vb2_plane_size(dst_buf, 0) < ctx->vpu_dst_fmt->header_size))
+ size_left = 0;
+
+ WARN_ON(pix_fmt->num_planes > 3);
+
+ vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr(dst_buf, 0) +
+ ctx->vpu_dst_fmt->header_size,
+ H1_REG_ADDR_OUTPUT_STREAM);
+ vepu_write_relaxed(vpu, size_left, H1_REG_STR_BUF_LIMIT);
+
+ if (pix_fmt->num_planes == 1) {
+ src[0] = vb2_dma_contig_plane_dma_addr(src_buf, 0);
+ /* single plane formats we supported are all interlaced */
+ vepu_write_relaxed(vpu, src[0], H1_REG_ADDR_IN_PLANE_0);
+ } else if (pix_fmt->num_planes == 2) {
+ src[0] = vb2_dma_contig_plane_dma_addr(src_buf, 0);
+ src[1] = vb2_dma_contig_plane_dma_addr(src_buf, 1);
+ vepu_write_relaxed(vpu, src[0], H1_REG_ADDR_IN_PLANE_0);
+ vepu_write_relaxed(vpu, src[1], H1_REG_ADDR_IN_PLANE_1);
+ } else {
+ src[0] = vb2_dma_contig_plane_dma_addr(src_buf, 0);
+ src[1] = vb2_dma_contig_plane_dma_addr(src_buf, 1);
+ src[2] = vb2_dma_contig_plane_dma_addr(src_buf, 2);
+ vepu_write_relaxed(vpu, src[0], H1_REG_ADDR_IN_PLANE_0);
+ vepu_write_relaxed(vpu, src[1], H1_REG_ADDR_IN_PLANE_1);
+ vepu_write_relaxed(vpu, src[2], H1_REG_ADDR_IN_PLANE_2);
+ }
+}
+
+static void
+hantro_h1_jpeg_enc_set_qtable(struct hantro_dev *vpu,
+ unsigned char *luma_qtable,
+ unsigned char *chroma_qtable)
+{
+ u32 reg, i;
+ __be32 *luma_qtable_p;
+ __be32 *chroma_qtable_p;
+
+ luma_qtable_p = (__be32 *)luma_qtable;
+ chroma_qtable_p = (__be32 *)chroma_qtable;
+
+ /*
+ * Quantization table registers must be written in contiguous blocks.
+ * DO NOT collapse the below two "for" loops into one.
+ */
+ for (i = 0; i < H1_JPEG_QUANT_TABLE_COUNT; i++) {
+ reg = get_unaligned_be32(&luma_qtable_p[i]);
+ vepu_write_relaxed(vpu, reg, H1_REG_JPEG_LUMA_QUAT(i));
+ }
+
+ for (i = 0; i < H1_JPEG_QUANT_TABLE_COUNT; i++) {
+ reg = get_unaligned_be32(&chroma_qtable_p[i]);
+ vepu_write_relaxed(vpu, reg, H1_REG_JPEG_CHROMA_QUAT(i));
+ }
+}
+
+int hantro_h1_jpeg_enc_run(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ struct vb2_v4l2_buffer *src_buf, *dst_buf;
+ struct hantro_jpeg_ctx jpeg_ctx;
+ u32 reg;
+
+ src_buf = hantro_get_src_buf(ctx);
+ dst_buf = hantro_get_dst_buf(ctx);
+
+ hantro_start_prepare_run(ctx);
+
+ memset(&jpeg_ctx, 0, sizeof(jpeg_ctx));
+ jpeg_ctx.buffer = vb2_plane_vaddr(&dst_buf->vb2_buf, 0);
+ jpeg_ctx.width = ctx->dst_fmt.width;
+ jpeg_ctx.height = ctx->dst_fmt.height;
+ jpeg_ctx.quality = ctx->jpeg_quality;
+ hantro_jpeg_header_assemble(&jpeg_ctx);
+
+ /* Switch to JPEG encoder mode before writing registers */
+ vepu_write_relaxed(vpu, H1_REG_ENC_CTRL_ENC_MODE_JPEG,
+ H1_REG_ENC_CTRL);
+
+ hantro_h1_set_src_img_ctrl(vpu, ctx);
+ hantro_h1_jpeg_enc_set_buffers(vpu, ctx, &src_buf->vb2_buf,
+ &dst_buf->vb2_buf);
+ hantro_h1_jpeg_enc_set_qtable(vpu, jpeg_ctx.hw_luma_qtable,
+ jpeg_ctx.hw_chroma_qtable);
+
+ reg = H1_REG_AXI_CTRL_OUTPUT_SWAP16
+ | H1_REG_AXI_CTRL_INPUT_SWAP16
+ | H1_REG_AXI_CTRL_BURST_LEN(16)
+ | H1_REG_AXI_CTRL_OUTPUT_SWAP32
+ | H1_REG_AXI_CTRL_INPUT_SWAP32
+ | H1_REG_AXI_CTRL_OUTPUT_SWAP8
+ | H1_REG_AXI_CTRL_INPUT_SWAP8;
+ /* Make sure that all registers are written at this point. */
+ vepu_write(vpu, reg, H1_REG_AXI_CTRL);
+
+ reg = H1_REG_ENC_CTRL_WIDTH(MB_WIDTH(ctx->src_fmt.width))
+ | H1_REG_ENC_CTRL_HEIGHT(MB_HEIGHT(ctx->src_fmt.height))
+ | H1_REG_ENC_CTRL_ENC_MODE_JPEG
+ | H1_REG_ENC_PIC_INTRA
+ | H1_REG_ENC_CTRL_EN_BIT;
+
+ hantro_end_prepare_run(ctx);
+
+ vepu_write(vpu, reg, H1_REG_ENC_CTRL);
+
+ return 0;
+}
+
+void hantro_h1_jpeg_enc_done(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ u32 bytesused = vepu_read(vpu, H1_REG_STR_BUF_LIMIT) / 8;
+ struct vb2_v4l2_buffer *dst_buf = hantro_get_dst_buf(ctx);
+
+ vb2_set_plane_payload(&dst_buf->vb2_buf, 0,
+ ctx->vpu_dst_fmt->header_size + bytesused);
+}
diff --git a/drivers/media/platform/verisilicon/hantro_h1_regs.h b/drivers/media/platform/verisilicon/hantro_h1_regs.h
new file mode 100644
index 000000000..30e7e7b92
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_h1_regs.h
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Hantro VPU codec driver
+ *
+ * Copyright 2018 Google LLC.
+ * Tomasz Figa <tfiga@chromium.org>
+ */
+
+#ifndef HANTRO_H1_REGS_H_
+#define HANTRO_H1_REGS_H_
+
+/* Encoder registers. */
+#define H1_REG_INTERRUPT 0x004
+#define H1_REG_INTERRUPT_FRAME_RDY BIT(2)
+#define H1_REG_INTERRUPT_DIS_BIT BIT(1)
+#define H1_REG_INTERRUPT_BIT BIT(0)
+#define H1_REG_AXI_CTRL 0x008
+#define H1_REG_AXI_CTRL_OUTPUT_SWAP16 BIT(15)
+#define H1_REG_AXI_CTRL_INPUT_SWAP16 BIT(14)
+#define H1_REG_AXI_CTRL_BURST_LEN(x) ((x) << 8)
+#define H1_REG_AXI_CTRL_GATE_BIT BIT(4)
+#define H1_REG_AXI_CTRL_OUTPUT_SWAP32 BIT(3)
+#define H1_REG_AXI_CTRL_INPUT_SWAP32 BIT(2)
+#define H1_REG_AXI_CTRL_OUTPUT_SWAP8 BIT(1)
+#define H1_REG_AXI_CTRL_INPUT_SWAP8 BIT(0)
+#define H1_REG_ADDR_OUTPUT_STREAM 0x014
+#define H1_REG_ADDR_OUTPUT_CTRL 0x018
+#define H1_REG_ADDR_REF_LUMA 0x01c
+#define H1_REG_ADDR_REF_CHROMA 0x020
+#define H1_REG_ADDR_REC_LUMA 0x024
+#define H1_REG_ADDR_REC_CHROMA 0x028
+#define H1_REG_ADDR_IN_PLANE_0 0x02c
+#define H1_REG_ADDR_IN_PLANE_1 0x030
+#define H1_REG_ADDR_IN_PLANE_2 0x034
+#define H1_REG_ENC_CTRL 0x038
+#define H1_REG_ENC_CTRL_TIMEOUT_EN BIT(31)
+#define H1_REG_ENC_CTRL_NAL_MODE_BIT BIT(29)
+#define H1_REG_ENC_CTRL_WIDTH(w) ((w) << 19)
+#define H1_REG_ENC_CTRL_HEIGHT(h) ((h) << 10)
+#define H1_REG_ENC_PIC_INTER (0x0 << 3)
+#define H1_REG_ENC_PIC_INTRA (0x1 << 3)
+#define H1_REG_ENC_PIC_MVCINTER (0x2 << 3)
+#define H1_REG_ENC_CTRL_ENC_MODE_H264 (0x3 << 1)
+#define H1_REG_ENC_CTRL_ENC_MODE_JPEG (0x2 << 1)
+#define H1_REG_ENC_CTRL_ENC_MODE_VP8 (0x1 << 1)
+#define H1_REG_ENC_CTRL_EN_BIT BIT(0)
+#define H1_REG_IN_IMG_CTRL 0x03c
+#define H1_REG_IN_IMG_CTRL_ROW_LEN(x) ((x) << 12)
+#define H1_REG_IN_IMG_CTRL_OVRFLR_D4(x) ((x) << 10)
+#define H1_REG_IN_IMG_CTRL_OVRFLB(x) ((x) << 6)
+#define H1_REG_IN_IMG_CTRL_FMT(x) ((x) << 2)
+#define H1_REG_ENC_CTRL0 0x040
+#define H1_REG_ENC_CTRL0_INIT_QP(x) ((x) << 26)
+#define H1_REG_ENC_CTRL0_SLICE_ALPHA(x) ((x) << 22)
+#define H1_REG_ENC_CTRL0_SLICE_BETA(x) ((x) << 18)
+#define H1_REG_ENC_CTRL0_CHROMA_QP_OFFSET(x) ((x) << 13)
+#define H1_REG_ENC_CTRL0_FILTER_DIS(x) ((x) << 5)
+#define H1_REG_ENC_CTRL0_IDR_PICID(x) ((x) << 1)
+#define H1_REG_ENC_CTRL0_CONSTR_INTRA_PRED BIT(0)
+#define H1_REG_ENC_CTRL1 0x044
+#define H1_REG_ENC_CTRL1_PPS_ID(x) ((x) << 24)
+#define H1_REG_ENC_CTRL1_INTRA_PRED_MODE(x) ((x) << 16)
+#define H1_REG_ENC_CTRL1_FRAME_NUM(x) ((x))
+#define H1_REG_ENC_CTRL2 0x048
+#define H1_REG_ENC_CTRL2_DEBLOCKING_FILETER_MODE(x) ((x) << 30)
+#define H1_REG_ENC_CTRL2_H264_SLICE_SIZE(x) ((x) << 23)
+#define H1_REG_ENC_CTRL2_DISABLE_QUARTER_PIXMV BIT(22)
+#define H1_REG_ENC_CTRL2_TRANS8X8_MODE_EN BIT(21)
+#define H1_REG_ENC_CTRL2_CABAC_INIT_IDC(x) ((x) << 19)
+#define H1_REG_ENC_CTRL2_ENTROPY_CODING_MODE BIT(18)
+#define H1_REG_ENC_CTRL2_H264_INTER4X4_MODE BIT(17)
+#define H1_REG_ENC_CTRL2_H264_STREAM_MODE BIT(16)
+#define H1_REG_ENC_CTRL2_INTRA16X16_MODE(x) ((x))
+#define H1_REG_ENC_CTRL3 0x04c
+#define H1_REG_ENC_CTRL3_MUTIMV_EN BIT(30)
+#define H1_REG_ENC_CTRL3_MV_PENALTY_1_4P(x) ((x) << 20)
+#define H1_REG_ENC_CTRL3_MV_PENALTY_4P(x) ((x) << 10)
+#define H1_REG_ENC_CTRL3_MV_PENALTY_1P(x) ((x))
+#define H1_REG_ENC_CTRL4 0x050
+#define H1_REG_ENC_CTRL4_MV_PENALTY_16X8_8X16(x) ((x) << 20)
+#define H1_REG_ENC_CTRL4_MV_PENALTY_8X8(x) ((x) << 10)
+#define H1_REG_ENC_CTRL4_8X4_4X8(x) ((x))
+#define H1_REG_ENC_CTRL5 0x054
+#define H1_REG_ENC_CTRL5_MACROBLOCK_PENALTY(x) ((x) << 24)
+#define H1_REG_ENC_CTRL5_COMPLETE_SLICES(x) ((x) << 16)
+#define H1_REG_ENC_CTRL5_INTER_MODE(x) ((x))
+#define H1_REG_STR_HDR_REM_MSB 0x058
+#define H1_REG_STR_HDR_REM_LSB 0x05c
+#define H1_REG_STR_BUF_LIMIT 0x060
+#define H1_REG_MAD_CTRL 0x064
+#define H1_REG_MAD_CTRL_QP_ADJUST(x) ((x) << 28)
+#define H1_REG_MAD_CTRL_MAD_THREDHOLD(x) ((x) << 22)
+#define H1_REG_MAD_CTRL_QP_SUM_DIV2(x) ((x))
+#define H1_REG_ADDR_VP8_PROB_CNT 0x068
+#define H1_REG_QP_VAL 0x06c
+#define H1_REG_QP_VAL_LUM(x) ((x) << 26)
+#define H1_REG_QP_VAL_MAX(x) ((x) << 20)
+#define H1_REG_QP_VAL_MIN(x) ((x) << 14)
+#define H1_REG_QP_VAL_CHECKPOINT_DISTAN(x) ((x))
+#define H1_REG_VP8_QP_VAL(i) (0x06c + ((i) * 0x4))
+#define H1_REG_CHECKPOINT(i) (0x070 + ((i) * 0x4))
+#define H1_REG_CHECKPOINT_CHECK0(x) (((x) & 0xffff))
+#define H1_REG_CHECKPOINT_CHECK1(x) (((x) & 0xffff) << 16)
+#define H1_REG_CHECKPOINT_RESULT(x) ((((x) >> (16 - 16 \
+ * (i & 1))) & 0xffff) \
+ * 32)
+#define H1_REG_CHKPT_WORD_ERR(i) (0x084 + ((i) * 0x4))
+#define H1_REG_CHKPT_WORD_ERR_CHK0(x) (((x) & 0xffff))
+#define H1_REG_CHKPT_WORD_ERR_CHK1(x) (((x) & 0xffff) << 16)
+#define H1_REG_VP8_BOOL_ENC 0x08c
+#define H1_REG_CHKPT_DELTA_QP 0x090
+#define H1_REG_CHKPT_DELTA_QP_CHK0(x) (((x) & 0x0f) << 0)
+#define H1_REG_CHKPT_DELTA_QP_CHK1(x) (((x) & 0x0f) << 4)
+#define H1_REG_CHKPT_DELTA_QP_CHK2(x) (((x) & 0x0f) << 8)
+#define H1_REG_CHKPT_DELTA_QP_CHK3(x) (((x) & 0x0f) << 12)
+#define H1_REG_CHKPT_DELTA_QP_CHK4(x) (((x) & 0x0f) << 16)
+#define H1_REG_CHKPT_DELTA_QP_CHK5(x) (((x) & 0x0f) << 20)
+#define H1_REG_CHKPT_DELTA_QP_CHK6(x) (((x) & 0x0f) << 24)
+#define H1_REG_VP8_CTRL0 0x090
+#define H1_REG_RLC_CTRL 0x094
+#define H1_REG_RLC_CTRL_STR_OFFS_SHIFT 23
+#define H1_REG_RLC_CTRL_STR_OFFS_MASK (0x3f << 23)
+#define H1_REG_RLC_CTRL_RLC_SUM(x) ((x))
+#define H1_REG_MB_CTRL 0x098
+#define H1_REG_MB_CNT_OUT(x) (((x) & 0xffff))
+#define H1_REG_MB_CNT_SET(x) (((x) & 0xffff) << 16)
+#define H1_REG_ADDR_NEXT_PIC 0x09c
+#define H1_REG_JPEG_LUMA_QUAT(i) (0x100 + ((i) * 0x4))
+#define H1_REG_JPEG_CHROMA_QUAT(i) (0x140 + ((i) * 0x4))
+#define H1_REG_STABILIZATION_OUTPUT 0x0A0
+#define H1_REG_ADDR_CABAC_TBL 0x0cc
+#define H1_REG_ADDR_MV_OUT 0x0d0
+#define H1_REG_RGB_YUV_COEFF(i) (0x0d4 + ((i) * 0x4))
+#define H1_REG_RGB_MASK_MSB 0x0dc
+#define H1_REG_INTRA_AREA_CTRL 0x0e0
+#define H1_REG_CIR_INTRA_CTRL 0x0e4
+#define H1_REG_INTRA_SLICE_BITMAP(i) (0x0e8 + ((i) * 0x4))
+#define H1_REG_ADDR_VP8_DCT_PART(i) (0x0e8 + ((i) * 0x4))
+#define H1_REG_FIRST_ROI_AREA 0x0f0
+#define H1_REG_SECOND_ROI_AREA 0x0f4
+#define H1_REG_MVC_CTRL 0x0f8
+#define H1_REG_MVC_CTRL_MV16X16_FAVOR(x) ((x) << 28)
+#define H1_REG_VP8_INTRA_PENALTY(i) (0x100 + ((i) * 0x4))
+#define H1_REG_ADDR_VP8_SEG_MAP 0x11c
+#define H1_REG_VP8_SEG_QP(i) (0x120 + ((i) * 0x4))
+#define H1_REG_DMV_4P_1P_PENALTY(i) (0x180 + ((i) * 0x4))
+#define H1_REG_DMV_4P_1P_PENALTY_BIT(x, i) ((x) << (i) * 8)
+#define H1_REG_DMV_QPEL_PENALTY(i) (0x200 + ((i) * 0x4))
+#define H1_REG_DMV_QPEL_PENALTY_BIT(x, i) ((x) << (i) * 8)
+#define H1_REG_VP8_CTRL1 0x280
+#define H1_REG_VP8_BIT_COST_GOLDEN 0x284
+#define H1_REG_VP8_LOOP_FLT_DELTA(i) (0x288 + ((i) * 0x4))
+
+#endif /* HANTRO_H1_REGS_H_ */
diff --git a/drivers/media/platform/verisilicon/hantro_h264.c b/drivers/media/platform/verisilicon/hantro_h264.c
new file mode 100644
index 000000000..4e9a0ecf5
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_h264.c
@@ -0,0 +1,521 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Rockchip RK3288 VPU codec driver
+ *
+ * Copyright (c) 2014 Rockchip Electronics Co., Ltd.
+ * Hertz Wong <hertz.wong@rock-chips.com>
+ * Herman Chen <herman.chen@rock-chips.com>
+ *
+ * Copyright (C) 2014 Google, Inc.
+ * Tomasz Figa <tfiga@chromium.org>
+ */
+
+#include <linux/types.h>
+#include <media/v4l2-h264.h>
+#include <media/v4l2-mem2mem.h>
+
+#include "hantro.h"
+#include "hantro_hw.h"
+
+/* Size with u32 units. */
+#define CABAC_INIT_BUFFER_SIZE (460 * 2)
+#define POC_BUFFER_SIZE 34
+#define SCALING_LIST_SIZE (6 * 16 + 2 * 64)
+
+/*
+ * For valid and long term reference marking, index are reversed, so bit 31
+ * indicates the status of the picture 0.
+ */
+#define REF_BIT(i) BIT(32 - 1 - (i))
+
+/* Data structure describing auxiliary buffer format. */
+struct hantro_h264_dec_priv_tbl {
+ u32 cabac_table[CABAC_INIT_BUFFER_SIZE];
+ u32 poc[POC_BUFFER_SIZE];
+ u8 scaling_list[SCALING_LIST_SIZE];
+};
+
+/*
+ * Constant CABAC table.
+ * From drivers/media/platform/rk3288-vpu/rk3288_vpu_hw_h264d.c
+ * in https://chromium.googlesource.com/chromiumos/third_party/kernel,
+ * chromeos-3.14 branch.
+ */
+static const u32 h264_cabac_table[] = {
+ 0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07330000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948, 0x0d29033e, 0x000b0137,
+ 0x0045ef7f, 0xf3660052, 0xf94aeb6b, 0xe57fe17f, 0xe87fee5f, 0xe57feb72,
+ 0xe27fef7b, 0xf473f07a, 0xf573f43f, 0xfe44f154, 0xf368fd46, 0xf85df65a,
+ 0xe27fff4a, 0xfa61f95b, 0xec7ffc38, 0xfb52f94c, 0xea7df95d, 0xf557fd4d,
+ 0xfb47fc3f, 0xfc44f454, 0xf93ef941, 0x083d0538, 0xfe420140, 0x003dfe4e,
+ 0x01320734, 0x0a23002c, 0x0b26012d, 0x002e052c, 0x1f110133, 0x07321c13,
+ 0x10210e3e, 0xf36cf164, 0xf365f35b, 0xf45ef658, 0xf054f656, 0xf953f357,
+ 0xed5e0146, 0x0048fb4a, 0x123bf866, 0xf164005f, 0xfc4b0248, 0xf54bfd47,
+ 0x0f2ef345, 0x003e0041, 0x1525f148, 0x09391036, 0x003e0c48, 0x18000f09,
+ 0x08190d12, 0x0f090d13, 0x0a250c12, 0x061d1421, 0x0f1e042d, 0x013a003e,
+ 0x073d0c26, 0x0b2d0f27, 0x0b2a0d2c, 0x102d0c29, 0x0a311e22, 0x122a0a37,
+ 0x1133112e, 0x00591aed, 0x16ef1aef, 0x1ee71cec, 0x21e925e5, 0x21e928e4,
+ 0x26ef21f5, 0x28f129fa, 0x26012911, 0x1efa1b03, 0x1a1625f0, 0x23fc26f8,
+ 0x26fd2503, 0x26052a00, 0x23102716, 0x0e301b25, 0x153c0c44, 0x0261fd47,
+ 0xfa2afb32, 0xfd36fe3e, 0x003a013f, 0xfe48ff4a, 0xf75bfb43, 0xfb1bfd27,
+ 0xfe2c002e, 0xf040f844, 0xf64efa4d, 0xf656f45c, 0xf137f63c, 0xfa3efc41,
+ 0xf449f84c, 0xf950f758, 0xef6ef561, 0xec54f54f, 0xfa49fc4a, 0xf356f360,
+ 0xf561ed75, 0xf84efb21, 0xfc30fe35, 0xfd3ef347, 0xf64ff456, 0xf35af261,
+ 0x0000fa5d, 0xfa54f84f, 0x0042ff47, 0x003efe3c, 0xfe3bfb4b, 0xfd3efc3a,
+ 0xf742ff4f, 0x00470344, 0x0a2cf93e, 0x0f240e28, 0x101b0c1d, 0x012c1424,
+ 0x1220052a, 0x01300a3e, 0x112e0940, 0xf468f561, 0xf060f958, 0xf855f955,
+ 0xf755f358, 0x0442fd4d, 0xfd4cfa4c, 0x0a3aff4c, 0xff53f963, 0xf25f025f,
+ 0x004cfb4a, 0x0046f54b, 0x01440041, 0xf249033e, 0x043eff44, 0xf34b0b37,
+ 0x05400c46, 0x0f060613, 0x07100c0e, 0x120d0d0b, 0x0d0f0f10, 0x0c170d17,
+ 0x0f140e1a, 0x0e2c1128, 0x112f1811, 0x15151916, 0x1f1b161d, 0x13230e32,
+ 0x0a39073f, 0xfe4dfc52, 0xfd5e0945, 0xf46d24dd, 0x24de20e6, 0x25e22ce0,
+ 0x22ee22f1, 0x28f121f9, 0x23fb2100, 0x2602210d, 0x17230d3a, 0x1dfd1a00,
+ 0x161e1ff9, 0x23f122fd, 0x220324ff, 0x2205200b, 0x2305220c, 0x270b1e1d,
+ 0x221a1d27, 0x13421f15, 0x1f1f1932, 0xef78ec70, 0xee72f555, 0xf15cf259,
+ 0xe647f151, 0xf2500044, 0xf246e838, 0xe944e832, 0xf54a17f3, 0x1af328f1,
+ 0x31f22c03, 0x2d062c22, 0x21361352, 0xfd4bff17, 0x0122012b, 0x0036fe37,
+ 0x003d0140, 0x0044f75c, 0xf26af361, 0xf15af45a, 0xee58f649, 0xf74ff256,
+ 0xf649f646, 0xf645fb42, 0xf740fb3a, 0x023b15f6, 0x18f51cf8, 0x1cff1d03,
+ 0x1d092314, 0x1d240e43, 0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968,
+ 0xfa35ff36, 0x07331721, 0x17021500, 0x01090031, 0xdb760539, 0xf34ef541,
+ 0x013e0c31, 0xfc491132, 0x1240092b, 0x1d001a43, 0x105a0968, 0xd27fec68,
+ 0x0143f34e, 0xf541013e, 0xfa56ef5f, 0xfa3d092d, 0xfd45fa51, 0xf5600637,
+ 0x0743fb56, 0x0258003a, 0xfd4cf65e, 0x05360445, 0xfd510058, 0xf943fb4a,
+ 0xfc4afb50, 0xf948013a, 0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948,
+ 0x0d29033e, 0x002dfc4e, 0xfd60e57e, 0xe462e765, 0xe943e452, 0xec5ef053,
+ 0xea6eeb5b, 0xee66f35d, 0xe37ff95c, 0xfb59f960, 0xf36cfd2e, 0xff41ff39,
+ 0xf75dfd4a, 0xf75cf857, 0xe97e0536, 0x063c063b, 0x0645ff30, 0x0044fc45,
+ 0xf858fe55, 0xfa4eff4b, 0xf94d0236, 0x0532fd44, 0x0132062a, 0xfc51013f,
+ 0xfc460043, 0x0239fe4c, 0x0b230440, 0x013d0b23, 0x12190c18, 0x0d1d0d24,
+ 0xf65df949, 0xfe490d2e, 0x0931f964, 0x09350235, 0x0535fe3d, 0x00380038,
+ 0xf33ffb3c, 0xff3e0439, 0xfa450439, 0x0e270433, 0x0d440340, 0x013d093f,
+ 0x07321027, 0x052c0434, 0x0b30fb3c, 0xff3b003b, 0x1621052c, 0x0e2bff4e,
+ 0x003c0945, 0x0b1c0228, 0x032c0031, 0x002e022c, 0x0233002f, 0x0427023e,
+ 0x062e0036, 0x0336023a, 0x043f0633, 0x06390735, 0x06340637, 0x0b2d0e24,
+ 0x0835ff52, 0x0737fd4e, 0x0f2e161f, 0xff541907, 0x1ef91c03, 0x1c042000,
+ 0x22ff1e06, 0x1e062009, 0x1f131a1b, 0x1a1e2514, 0x1c221146, 0x0143053b,
+ 0x0943101e, 0x12201223, 0x161d181f, 0x1726122b, 0x14290b3f, 0x093b0940,
+ 0xff5efe59, 0xf76cfa4c, 0xfe2c002d, 0x0034fd40, 0xfe3bfc46, 0xfc4bf852,
+ 0xef66f74d, 0x0318002a, 0x00300037, 0xfa3bf947, 0xf453f557, 0xe277013a,
+ 0xfd1dff24, 0x0126022b, 0xfa37003a, 0x0040fd4a, 0xf65a0046, 0xfc1d051f,
+ 0x072a013b, 0xfe3afd48, 0xfd51f561, 0x003a0805, 0x0a0e0e12, 0x0d1b0228,
+ 0x003afd46, 0xfa4ff855, 0x0000f36a, 0xf06af657, 0xeb72ee6e, 0xf262ea6e,
+ 0xeb6aee67, 0xeb6be96c, 0xe670f660, 0xf45ffb5b, 0xf75dea5e, 0xfb560943,
+ 0xfc50f655, 0xff46073c, 0x093a053d, 0x0c320f32, 0x12311136, 0x0a29072e,
+ 0xff330731, 0x08340929, 0x062f0237, 0x0d290a2c, 0x06320535, 0x0d31043f,
+ 0x0640fe45, 0xfe3b0646, 0x0a2c091f, 0x0c2b0335, 0x0e220a26, 0xfd340d28,
+ 0x1120072c, 0x07260d32, 0x0a391a2b, 0x0e0b0b0e, 0x090b120b, 0x150917fe,
+ 0x20f120f1, 0x22eb27e9, 0x2adf29e1, 0x2ee426f4, 0x151d2de8, 0x35d330e6,
+ 0x41d52bed, 0x27f61e09, 0x121a141b, 0x0039f252, 0xfb4bed61, 0xdd7d1b00,
+ 0x1c001ffc, 0x1b062208, 0x1e0a1816, 0x21131620, 0x1a1f1529, 0x1a2c172f,
+ 0x10410e47, 0x083c063f, 0x11411518, 0x17141a17, 0x1b201c17, 0x1c181728,
+ 0x18201c1d, 0x172a1339, 0x1635163d, 0x0b560c28, 0x0b330e3b, 0xfc4ff947,
+ 0xfb45f746, 0xf842f644, 0xed49f445, 0xf046f143, 0xec3eed46, 0xf042ea41,
+ 0xec3f09fe, 0x1af721f7, 0x27f929fe, 0x2d033109, 0x2d1b243b, 0xfa42f923,
+ 0xf92af82d, 0xfb30f438, 0xfa3cfb3e, 0xf842f84c, 0xfb55fa51, 0xf64df951,
+ 0xef50ee49, 0xfc4af653, 0xf747f743, 0xff3df842, 0xf242003b, 0x023b15f3,
+ 0x21f227f9, 0x2efe3302, 0x3c063d11, 0x37222a3e, 0x14f10236, 0x034a14f1,
+ 0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07331619, 0x22001000, 0xfe090429,
+ 0xe3760241, 0xfa47f34f, 0x05340932, 0xfd460a36, 0x1a221316, 0x28003902,
+ 0x29241a45, 0xd37ff165, 0xfc4cfa47, 0xf34f0534, 0x0645f35a, 0x0034082b,
+ 0xfe45fb52, 0xf660023b, 0x024bfd57, 0xfd640138, 0xfd4afa55, 0x003bfd51,
+ 0xf956fb5f, 0xff42ff4d, 0x0146fe56, 0xfb48003d, 0x0029003f, 0x003f003f,
+ 0xf7530456, 0x0061f948, 0x0d29033e, 0x0d0f0733, 0x0250d97f, 0xee5bef60,
+ 0xe651dd62, 0xe866e961, 0xe577e863, 0xeb6eee66, 0xdc7f0050, 0xfb59f95e,
+ 0xfc5c0027, 0x0041f154, 0xdd7ffe49, 0xf468f75b, 0xe17f0337, 0x07380737,
+ 0x083dfd35, 0x0044f94a, 0xf758f367, 0xf35bf759, 0xf25cf84c, 0xf457e96e,
+ 0xe869f64e, 0xec70ef63, 0xb27fba7f, 0xce7fd27f, 0xfc42fb4e, 0xfc47f848,
+ 0x023bff37, 0xf946fa4b, 0xf859de77, 0xfd4b2014, 0x1e16d47f, 0x0036fb3d,
+ 0x003aff3c, 0xfd3df843, 0xe754f24a, 0xfb410534, 0x0239003d, 0xf745f546,
+ 0x1237fc47, 0x003a073d, 0x09291219, 0x0920052b, 0x092f002c, 0x0033022e,
+ 0x1326fc42, 0x0f260c2a, 0x09220059, 0x042d0a1c, 0x0a1f21f5, 0x34d5120f,
+ 0x1c0023ea, 0x26e72200, 0x27ee20f4, 0x66a20000, 0x38f121fc, 0x1d0a25fb,
+ 0x33e327f7, 0x34de45c6, 0x43c12cfb, 0x200737e3, 0x20010000, 0x1b2421e7,
+ 0x22e224e4, 0x26e426e5, 0x22ee23f0, 0x22f220f8, 0x25fa2300, 0x1e0a1c12,
+ 0x1a191d29, 0x004b0248, 0x084d0e23, 0x121f1123, 0x151e112d, 0x142a122d,
+ 0x1b1a1036, 0x07421038, 0x0b490a43, 0xf674e970, 0xf147f93d, 0x0035fb42,
+ 0xf54df750, 0xf754f657, 0xde7feb65, 0xfd27fb35, 0xf93df54b, 0xf14def5b,
+ 0xe76be76f, 0xe47af54c, 0xf62cf634, 0xf639f73a, 0xf048f945, 0xfc45fb4a,
+ 0xf7560242, 0xf7220120, 0x0b1f0534, 0xfe37fe43, 0x0049f859, 0x03340704,
+ 0x0a081108, 0x10130325, 0xff3dfb49, 0xff46fc4e, 0x0000eb7e, 0xe97cec6e,
+ 0xe67ee77c, 0xef69e579, 0xe575ef66, 0xe675e574, 0xdf7af65f, 0xf264f85f,
+ 0xef6fe472, 0xfa59fe50, 0xfc52f755, 0xf851ff48, 0x05400143, 0x09380045,
+ 0x01450745, 0xf945fa43, 0xf04dfe40, 0x023dfa43, 0xfd400239, 0xfd41fd42,
+ 0x003e0933, 0xff42fe47, 0xfe4bff46, 0xf7480e3c, 0x1025002f, 0x12230b25,
+ 0x0c290a29, 0x02300c29, 0x0d29003b, 0x03321328, 0x03421232, 0x13fa12fa,
+ 0x0e001af4, 0x1ff021e7, 0x21ea25e4, 0x27e22ae2, 0x2fd62ddc, 0x31de29ef,
+ 0x200945b9, 0x3fc142c0, 0x4db636d9, 0x34dd29f6, 0x240028ff, 0x1e0e1c1a,
+ 0x17250c37, 0x0b4125df, 0x27dc28db, 0x26e22edf, 0x2ae228e8, 0x31e326f4,
+ 0x28f626fd, 0x2efb1f14, 0x1d1e192c, 0x0c300b31, 0x1a2d1616, 0x17161b15,
+ 0x21141a1c, 0x1e181b22, 0x122a1927, 0x12320c46, 0x15360e47, 0x0b531920,
+ 0x15311536, 0xfb55fa51, 0xf64df951, 0xef50ee49, 0xfc4af653, 0xf747f743,
+ 0xff3df842, 0xf242003b, 0x023b11f6, 0x20f32af7, 0x31fb3500, 0x4003440a,
+ 0x421b2f39, 0xfb470018, 0xff24fe2a, 0xfe34f739, 0xfa3ffc41, 0xfc43f952,
+ 0xfd51fd4c, 0xf948fa4e, 0xf448f244, 0xfd46fa4c, 0xfb42fb3e, 0x0039fc3d,
+ 0xf73c0136, 0x023a11f6, 0x20f32af7, 0x31fb3500, 0x4003440a, 0x421b2f39,
+ 0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07331d10,
+ 0x19000e00, 0xf633fd3e, 0xe5631a10, 0xfc55e866, 0x05390639, 0xef490e39,
+ 0x1428140a, 0x1d003600, 0x252a0c61, 0xe07fea75, 0xfe4afc55, 0xe8660539,
+ 0xfa5df258, 0xfa2c0437, 0xf559f167, 0xeb741339, 0x143a0454, 0x0660013f,
+ 0xfb55f36a, 0x053f064b, 0xfd5aff65, 0x0337fc4f, 0xfe4bf461, 0xf932013c,
+ 0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948, 0x0d29033e, 0x0722f758,
+ 0xec7fdc7f, 0xef5bf25f, 0xe754e756, 0xf459ef5b, 0xe17ff24c, 0xee67f35a,
+ 0xdb7f0b50, 0x054c0254, 0x054efa37, 0x043df253, 0xdb7ffb4f, 0xf568f55b,
+ 0xe27f0041, 0xfe4f0048, 0xfc5cfa38, 0x0344f847, 0xf362fc56, 0xf458fb52,
+ 0xfd48fc43, 0xf848f059, 0xf745ff3b, 0x05420439, 0xfc47fe47, 0x023aff4a,
+ 0xfc2cff45, 0x003ef933, 0xfc2ffa2a, 0xfd29fa35, 0x084cf74e, 0xf5530934,
+ 0x0043fb5a, 0x0143f148, 0xfb4bf850, 0xeb53eb40, 0xf31fe740, 0xe35e094b,
+ 0x113ff84a, 0xfb23fe1b, 0x0d5b0341, 0xf945084d, 0xf642033e, 0xfd44ec51,
+ 0x001e0107, 0xfd17eb4a, 0x1042e97c, 0x11252cee, 0x32deea7f, 0x0427002a,
+ 0x07220b1d, 0x081f0625, 0x072a0328, 0x08210d2b, 0x0d24042f, 0x0337023a,
+ 0x063c082c, 0x0b2c0e2a, 0x07300438, 0x04340d25, 0x0931133a, 0x0a300c2d,
+ 0x00451421, 0x083f23ee, 0x21e71cfd, 0x180a1b00, 0x22f234d4, 0x27e81311,
+ 0x1f19241d, 0x1821220f, 0x1e141649, 0x1422131f, 0x1b2c1310, 0x0f240f24,
+ 0x151c1915, 0x1e141f0c, 0x1b10182a, 0x005d0e38, 0x0f391a26, 0xe87fe873,
+ 0xea52f73e, 0x0035003b, 0xf255f359, 0xf35ef55c, 0xe37feb64, 0xf239f443,
+ 0xf547f64d, 0xeb55f058, 0xe968f162, 0xdb7ff652, 0xf830f83d, 0xf842f946,
+ 0xf24bf64f, 0xf753f45c, 0xee6cfc4f, 0xea45f04b, 0xfe3a013a, 0xf34ef753,
+ 0xfc51f363, 0xf351fa26, 0xf33efa3a, 0xfe3bf049, 0xf64cf356, 0xf753f657,
+ 0x0000ea7f, 0xe77fe778, 0xe57fed72, 0xe975e776, 0xe675e871, 0xe476e178,
+ 0xdb7cf65e, 0xf166f663, 0xf36ace7f, 0xfb5c1139, 0xfb56f35e, 0xf45bfe4d,
+ 0x0047ff49, 0x0440f951, 0x05400f39, 0x01430044, 0xf6430144, 0x004d0240,
+ 0x0044fb4e, 0x0737053b, 0x02410e36, 0x0f2c053c, 0x0246fe4c, 0xee560c46,
+ 0x0540f446, 0x0b370538, 0x00450241, 0xfa4a0536, 0x0736fa4c, 0xf552fe4d,
+ 0xfe4d192a, 0x11f310f7, 0x11f41beb, 0x25e229d8, 0x2ad730d1, 0x27e02ed8,
+ 0x34cd2ed7, 0x34d92bed, 0x200b3dc9, 0x38d23ece, 0x51bd2dec, 0x23fe1c0f,
+ 0x22012701, 0x1e111426, 0x122d0f36, 0x004f24f0, 0x25f225ef, 0x2001220f,
+ 0x1d0f1819, 0x22161f10, 0x23121f1c, 0x2129241c, 0x1b2f153e, 0x121f131a,
+ 0x24181817, 0x1b10181e, 0x1f1d1629, 0x162a103c, 0x0f340e3c, 0x034ef07b,
+ 0x15351638, 0x193d1521, 0x1332113d, 0xfd4ef84a, 0xf748f648, 0xee4bf447,
+ 0xf53ffb46, 0xef4bf248, 0xf043f835, 0xf23bf734, 0xf54409fe, 0x1ef61ffc,
+ 0x21ff2107, 0x1f0c2517, 0x1f261440, 0xf747f925, 0xf82cf531, 0xf638f43b,
+ 0xf83ff743, 0xfa44f64f, 0xfd4ef84a, 0xf748f648, 0xee4bf447, 0xf53ffb46,
+ 0xef4bf248, 0xf043f835, 0xf23bf734, 0xf54409fe, 0x1ef61ffc, 0x21ff2107,
+ 0x1f0c2517, 0x1f261440
+};
+
+static void
+assemble_scaling_list(struct hantro_ctx *ctx)
+{
+ const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls;
+ const struct v4l2_ctrl_h264_scaling_matrix *scaling = ctrls->scaling;
+ const struct v4l2_ctrl_h264_pps *pps = ctrls->pps;
+ const size_t num_list_4x4 = ARRAY_SIZE(scaling->scaling_list_4x4);
+ const size_t list_len_4x4 = ARRAY_SIZE(scaling->scaling_list_4x4[0]);
+ const size_t list_len_8x8 = ARRAY_SIZE(scaling->scaling_list_8x8[0]);
+ struct hantro_h264_dec_priv_tbl *tbl = ctx->h264_dec.priv.cpu;
+ u32 *dst = (u32 *)tbl->scaling_list;
+ const u32 *src;
+ int i, j;
+
+ if (!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT))
+ return;
+
+ for (i = 0; i < num_list_4x4; i++) {
+ src = (u32 *)&scaling->scaling_list_4x4[i];
+ for (j = 0; j < list_len_4x4 / 4; j++)
+ *dst++ = swab32(src[j]);
+ }
+
+ /* Only Intra/Inter Y lists */
+ for (i = 0; i < 2; i++) {
+ src = (u32 *)&scaling->scaling_list_8x8[i];
+ for (j = 0; j < list_len_8x8 / 4; j++)
+ *dst++ = swab32(src[j]);
+ }
+}
+
+static void prepare_table(struct hantro_ctx *ctx)
+{
+ const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls;
+ const struct v4l2_ctrl_h264_decode_params *dec_param = ctrls->decode;
+ const struct v4l2_ctrl_h264_sps *sps = ctrls->sps;
+ struct hantro_h264_dec_priv_tbl *tbl = ctx->h264_dec.priv.cpu;
+ const struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb;
+ u32 dpb_longterm = 0;
+ u32 dpb_valid = 0;
+ int i;
+
+ for (i = 0; i < HANTRO_H264_DPB_SIZE; ++i) {
+ tbl->poc[i * 2] = dpb[i].top_field_order_cnt;
+ tbl->poc[i * 2 + 1] = dpb[i].bottom_field_order_cnt;
+
+ if (!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_VALID))
+ continue;
+
+ /*
+ * Set up bit maps of valid and long term DPBs.
+ * NOTE: The bits are reversed, i.e. MSb is DPB 0. For frame
+ * decoding, bit 31 to 15 are used, while for field decoding,
+ * all bits are used, with bit 31 being a top field, 30 a bottom
+ * field and so on.
+ */
+ if (dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) {
+ if (dpb[i].fields & V4L2_H264_TOP_FIELD_REF)
+ dpb_valid |= REF_BIT(i * 2);
+
+ if (dpb[i].fields & V4L2_H264_BOTTOM_FIELD_REF)
+ dpb_valid |= REF_BIT(i * 2 + 1);
+
+ if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) {
+ dpb_longterm |= REF_BIT(i * 2);
+ dpb_longterm |= REF_BIT(i * 2 + 1);
+ }
+ } else {
+ dpb_valid |= REF_BIT(i);
+
+ if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
+ dpb_longterm |= REF_BIT(i);
+ }
+ }
+ ctx->h264_dec.dpb_valid = dpb_valid;
+ ctx->h264_dec.dpb_longterm = dpb_longterm;
+
+ if ((dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) ||
+ !(sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)) {
+ tbl->poc[32] = ctx->h264_dec.cur_poc;
+ tbl->poc[33] = 0;
+ } else {
+ tbl->poc[32] = dec_param->top_field_order_cnt;
+ tbl->poc[33] = dec_param->bottom_field_order_cnt;
+ }
+
+ assemble_scaling_list(ctx);
+}
+
+static bool dpb_entry_match(const struct v4l2_h264_dpb_entry *a,
+ const struct v4l2_h264_dpb_entry *b)
+{
+ return a->reference_ts == b->reference_ts;
+}
+
+static void update_dpb(struct hantro_ctx *ctx)
+{
+ const struct v4l2_ctrl_h264_decode_params *dec_param;
+ DECLARE_BITMAP(new, ARRAY_SIZE(dec_param->dpb)) = { 0, };
+ DECLARE_BITMAP(used, ARRAY_SIZE(dec_param->dpb)) = { 0, };
+ unsigned int i, j;
+
+ dec_param = ctx->h264_dec.ctrls.decode;
+
+ /* Disable all entries by default. */
+ for (i = 0; i < ARRAY_SIZE(ctx->h264_dec.dpb); i++)
+ ctx->h264_dec.dpb[i].flags = 0;
+
+ /* Try to match new DPB entries with existing ones by their POCs. */
+ for (i = 0; i < ARRAY_SIZE(dec_param->dpb); i++) {
+ const struct v4l2_h264_dpb_entry *ndpb = &dec_param->dpb[i];
+
+ if (!(ndpb->flags & V4L2_H264_DPB_ENTRY_FLAG_VALID))
+ continue;
+
+ /*
+ * To cut off some comparisons, iterate only on target DPB
+ * entries which are not used yet.
+ */
+ for_each_clear_bit(j, used, ARRAY_SIZE(ctx->h264_dec.dpb)) {
+ struct v4l2_h264_dpb_entry *cdpb;
+
+ cdpb = &ctx->h264_dec.dpb[j];
+ if (!dpb_entry_match(cdpb, ndpb))
+ continue;
+
+ *cdpb = *ndpb;
+ set_bit(j, used);
+ break;
+ }
+
+ if (j == ARRAY_SIZE(ctx->h264_dec.dpb))
+ set_bit(i, new);
+ }
+
+ /* For entries that could not be matched, use remaining free slots. */
+ for_each_set_bit(i, new, ARRAY_SIZE(dec_param->dpb)) {
+ const struct v4l2_h264_dpb_entry *ndpb = &dec_param->dpb[i];
+ struct v4l2_h264_dpb_entry *cdpb;
+
+ /*
+ * Both arrays are of the same sizes, so there is no way
+ * we can end up with no space in target array, unless
+ * something is buggy.
+ */
+ j = find_first_zero_bit(used, ARRAY_SIZE(ctx->h264_dec.dpb));
+ if (WARN_ON(j >= ARRAY_SIZE(ctx->h264_dec.dpb)))
+ return;
+
+ cdpb = &ctx->h264_dec.dpb[j];
+ *cdpb = *ndpb;
+ set_bit(j, used);
+ }
+}
+
+dma_addr_t hantro_h264_get_ref_buf(struct hantro_ctx *ctx,
+ unsigned int dpb_idx)
+{
+ struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb;
+ dma_addr_t dma_addr = 0;
+ s32 cur_poc = ctx->h264_dec.cur_poc;
+ u32 flags;
+
+ if (dpb[dpb_idx].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)
+ dma_addr = hantro_get_ref(ctx, dpb[dpb_idx].reference_ts);
+
+ if (!dma_addr) {
+ struct vb2_v4l2_buffer *dst_buf;
+ struct vb2_buffer *buf;
+
+ /*
+ * If a DPB entry is unused or invalid, address of current
+ * destination buffer is returned.
+ */
+ dst_buf = hantro_get_dst_buf(ctx);
+ buf = &dst_buf->vb2_buf;
+ dma_addr = hantro_get_dec_buf_addr(ctx, buf);
+ }
+
+ flags = dpb[dpb_idx].flags & V4L2_H264_DPB_ENTRY_FLAG_FIELD ? 0x2 : 0;
+ flags |= abs(dpb[dpb_idx].top_field_order_cnt - cur_poc) <
+ abs(dpb[dpb_idx].bottom_field_order_cnt - cur_poc) ?
+ 0x1 : 0;
+
+ return dma_addr | flags;
+}
+
+u16 hantro_h264_get_ref_nbr(struct hantro_ctx *ctx, unsigned int dpb_idx)
+{
+ const struct v4l2_h264_dpb_entry *dpb = &ctx->h264_dec.dpb[dpb_idx];
+
+ if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
+ return 0;
+ return dpb->frame_num;
+}
+
+/*
+ * Removes all references with the same parity as the current picture from the
+ * reference list. The remaining list will have references with the opposite
+ * parity. This is effectively a deduplication of references since each buffer
+ * stores two fields. For this reason, each buffer is found twice in the
+ * reference list.
+ *
+ * This technique has been chosen through trial and error. This simple approach
+ * resulted in the highest conformance score. Note that this method may suffer
+ * worse quality in the case an opposite reference frame has been lost. If this
+ * becomes a problem in the future, it should be possible to add a preprocessing
+ * to identify un-paired fields and avoid removing them.
+ */
+static void deduplicate_reflist(struct v4l2_h264_reflist_builder *b,
+ struct v4l2_h264_reference *reflist)
+{
+ int write_idx = 0;
+ int i;
+
+ if (b->cur_pic_fields == V4L2_H264_FRAME_REF) {
+ write_idx = b->num_valid;
+ goto done;
+ }
+
+ for (i = 0; i < b->num_valid; i++) {
+ if (!(b->cur_pic_fields == reflist[i].fields)) {
+ reflist[write_idx++] = reflist[i];
+ continue;
+ }
+ }
+
+done:
+ /* Should not happen unless we have a bug in the reflist builder. */
+ if (WARN_ON(write_idx > 16))
+ write_idx = 16;
+
+ /* Clear the remaining, some streams fails otherwise */
+ for (; write_idx < 16; write_idx++)
+ reflist[write_idx].index = 15;
+}
+
+int hantro_h264_dec_prepare_run(struct hantro_ctx *ctx)
+{
+ struct hantro_h264_dec_hw_ctx *h264_ctx = &ctx->h264_dec;
+ struct hantro_h264_dec_ctrls *ctrls = &h264_ctx->ctrls;
+ struct v4l2_h264_reflist_builder reflist_builder;
+
+ hantro_start_prepare_run(ctx);
+
+ ctrls->scaling =
+ hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_SCALING_MATRIX);
+ if (WARN_ON(!ctrls->scaling))
+ return -EINVAL;
+
+ ctrls->decode =
+ hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_DECODE_PARAMS);
+ if (WARN_ON(!ctrls->decode))
+ return -EINVAL;
+
+ ctrls->sps =
+ hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_SPS);
+ if (WARN_ON(!ctrls->sps))
+ return -EINVAL;
+
+ ctrls->pps =
+ hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_PPS);
+ if (WARN_ON(!ctrls->pps))
+ return -EINVAL;
+
+ /* Update the DPB with new refs. */
+ update_dpb(ctx);
+
+ /* Build the P/B{0,1} ref lists. */
+ v4l2_h264_init_reflist_builder(&reflist_builder, ctrls->decode,
+ ctrls->sps, ctx->h264_dec.dpb);
+ h264_ctx->cur_poc = reflist_builder.cur_pic_order_count;
+
+ /* Prepare data in memory. */
+ prepare_table(ctx);
+
+ v4l2_h264_build_p_ref_list(&reflist_builder, h264_ctx->reflists.p);
+ v4l2_h264_build_b_ref_lists(&reflist_builder, h264_ctx->reflists.b0,
+ h264_ctx->reflists.b1);
+
+ /*
+ * Reduce ref lists to at most 16 entries, Hantro hardware will deduce
+ * the actual picture lists in field through the dpb_valid,
+ * dpb_longterm bitmap along with the current frame parity.
+ */
+ if (reflist_builder.cur_pic_fields != V4L2_H264_FRAME_REF) {
+ deduplicate_reflist(&reflist_builder, h264_ctx->reflists.p);
+ deduplicate_reflist(&reflist_builder, h264_ctx->reflists.b0);
+ deduplicate_reflist(&reflist_builder, h264_ctx->reflists.b1);
+ }
+
+ return 0;
+}
+
+void hantro_h264_dec_exit(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ struct hantro_h264_dec_hw_ctx *h264_dec = &ctx->h264_dec;
+ struct hantro_aux_buf *priv = &h264_dec->priv;
+
+ dma_free_coherent(vpu->dev, priv->size, priv->cpu, priv->dma);
+}
+
+int hantro_h264_dec_init(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ struct hantro_h264_dec_hw_ctx *h264_dec = &ctx->h264_dec;
+ struct hantro_aux_buf *priv = &h264_dec->priv;
+ struct hantro_h264_dec_priv_tbl *tbl;
+
+ priv->cpu = dma_alloc_coherent(vpu->dev, sizeof(*tbl), &priv->dma,
+ GFP_KERNEL);
+ if (!priv->cpu)
+ return -ENOMEM;
+
+ priv->size = sizeof(*tbl);
+ tbl = priv->cpu;
+ memcpy(tbl->cabac_table, h264_cabac_table, sizeof(tbl->cabac_table));
+
+ return 0;
+}
diff --git a/drivers/media/platform/verisilicon/hantro_hevc.c b/drivers/media/platform/verisilicon/hantro_hevc.c
new file mode 100644
index 000000000..9383fb708
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_hevc.c
@@ -0,0 +1,284 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hantro VPU HEVC codec driver
+ *
+ * Copyright (C) 2020 Safran Passenger Innovations LLC
+ */
+
+#include <linux/types.h>
+#include <media/v4l2-mem2mem.h>
+
+#include "hantro.h"
+#include "hantro_hw.h"
+
+#define VERT_FILTER_RAM_SIZE 8 /* bytes per pixel row */
+/*
+ * BSD control data of current picture at tile border
+ * 128 bits per 4x4 tile = 128/(8*4) bytes per row
+ */
+#define BSD_CTRL_RAM_SIZE 4 /* bytes per pixel row */
+/* tile border coefficients of filter */
+#define VERT_SAO_RAM_SIZE 48 /* bytes per pixel */
+
+#define SCALING_LIST_SIZE (16 * 64)
+
+#define MAX_TILE_COLS 20
+#define MAX_TILE_ROWS 22
+
+void hantro_hevc_ref_init(struct hantro_ctx *ctx)
+{
+ struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
+
+ hevc_dec->ref_bufs_used = 0;
+}
+
+dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx,
+ s32 poc)
+{
+ struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
+ int i;
+
+ /* Find the reference buffer in already known ones */
+ for (i = 0; i < NUM_REF_PICTURES; i++) {
+ if (hevc_dec->ref_bufs_poc[i] == poc) {
+ hevc_dec->ref_bufs_used |= 1 << i;
+ return hevc_dec->ref_bufs[i].dma;
+ }
+ }
+
+ return 0;
+}
+
+int hantro_hevc_add_ref_buf(struct hantro_ctx *ctx, int poc, dma_addr_t addr)
+{
+ struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
+ int i;
+
+ /* Add a new reference buffer */
+ for (i = 0; i < NUM_REF_PICTURES; i++) {
+ if (!(hevc_dec->ref_bufs_used & 1 << i)) {
+ hevc_dec->ref_bufs_used |= 1 << i;
+ hevc_dec->ref_bufs_poc[i] = poc;
+ hevc_dec->ref_bufs[i].dma = addr;
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static int tile_buffer_reallocate(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
+ const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls;
+ const struct v4l2_ctrl_hevc_pps *pps = ctrls->pps;
+ const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps;
+ unsigned int num_tile_cols = pps->num_tile_columns_minus1 + 1;
+ unsigned int height64 = (sps->pic_height_in_luma_samples + 63) & ~63;
+ unsigned int size;
+
+ if (num_tile_cols <= 1 ||
+ num_tile_cols <= hevc_dec->num_tile_cols_allocated)
+ return 0;
+
+ /* Need to reallocate due to tiles passed via PPS */
+ if (hevc_dec->tile_filter.cpu) {
+ dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size,
+ hevc_dec->tile_filter.cpu,
+ hevc_dec->tile_filter.dma);
+ hevc_dec->tile_filter.cpu = NULL;
+ }
+
+ if (hevc_dec->tile_sao.cpu) {
+ dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size,
+ hevc_dec->tile_sao.cpu,
+ hevc_dec->tile_sao.dma);
+ hevc_dec->tile_sao.cpu = NULL;
+ }
+
+ if (hevc_dec->tile_bsd.cpu) {
+ dma_free_coherent(vpu->dev, hevc_dec->tile_bsd.size,
+ hevc_dec->tile_bsd.cpu,
+ hevc_dec->tile_bsd.dma);
+ hevc_dec->tile_bsd.cpu = NULL;
+ }
+
+ size = (VERT_FILTER_RAM_SIZE * height64 * (num_tile_cols - 1) * ctx->bit_depth) / 8;
+ hevc_dec->tile_filter.cpu = dma_alloc_coherent(vpu->dev, size,
+ &hevc_dec->tile_filter.dma,
+ GFP_KERNEL);
+ if (!hevc_dec->tile_filter.cpu)
+ goto err_free_tile_buffers;
+ hevc_dec->tile_filter.size = size;
+
+ size = (VERT_SAO_RAM_SIZE * height64 * (num_tile_cols - 1) * ctx->bit_depth) / 8;
+ hevc_dec->tile_sao.cpu = dma_alloc_coherent(vpu->dev, size,
+ &hevc_dec->tile_sao.dma,
+ GFP_KERNEL);
+ if (!hevc_dec->tile_sao.cpu)
+ goto err_free_tile_buffers;
+ hevc_dec->tile_sao.size = size;
+
+ size = BSD_CTRL_RAM_SIZE * height64 * (num_tile_cols - 1);
+ hevc_dec->tile_bsd.cpu = dma_alloc_coherent(vpu->dev, size,
+ &hevc_dec->tile_bsd.dma,
+ GFP_KERNEL);
+ if (!hevc_dec->tile_bsd.cpu)
+ goto err_free_tile_buffers;
+ hevc_dec->tile_bsd.size = size;
+
+ hevc_dec->num_tile_cols_allocated = num_tile_cols;
+
+ return 0;
+
+err_free_tile_buffers:
+ if (hevc_dec->tile_filter.cpu)
+ dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size,
+ hevc_dec->tile_filter.cpu,
+ hevc_dec->tile_filter.dma);
+ hevc_dec->tile_filter.cpu = NULL;
+
+ if (hevc_dec->tile_sao.cpu)
+ dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size,
+ hevc_dec->tile_sao.cpu,
+ hevc_dec->tile_sao.dma);
+ hevc_dec->tile_sao.cpu = NULL;
+
+ if (hevc_dec->tile_bsd.cpu)
+ dma_free_coherent(vpu->dev, hevc_dec->tile_bsd.size,
+ hevc_dec->tile_bsd.cpu,
+ hevc_dec->tile_bsd.dma);
+ hevc_dec->tile_bsd.cpu = NULL;
+
+ return -ENOMEM;
+}
+
+static int hantro_hevc_validate_sps(struct hantro_ctx *ctx, const struct v4l2_ctrl_hevc_sps *sps)
+{
+ /*
+ * for tile pixel format check if the width and height match
+ * hardware constraints
+ */
+ if (ctx->vpu_dst_fmt->fourcc == V4L2_PIX_FMT_NV12_4L4) {
+ if (ctx->dst_fmt.width !=
+ ALIGN(sps->pic_width_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_width))
+ return -EINVAL;
+
+ if (ctx->dst_fmt.height !=
+ ALIGN(sps->pic_height_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_height))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx)
+{
+ struct hantro_hevc_dec_hw_ctx *hevc_ctx = &ctx->hevc_dec;
+ struct hantro_hevc_dec_ctrls *ctrls = &hevc_ctx->ctrls;
+ int ret;
+
+ hantro_start_prepare_run(ctx);
+
+ ctrls->decode_params =
+ hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_DECODE_PARAMS);
+ if (WARN_ON(!ctrls->decode_params))
+ return -EINVAL;
+
+ ctrls->scaling =
+ hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_SCALING_MATRIX);
+ if (WARN_ON(!ctrls->scaling))
+ return -EINVAL;
+
+ ctrls->sps =
+ hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_SPS);
+ if (WARN_ON(!ctrls->sps))
+ return -EINVAL;
+
+ ret = hantro_hevc_validate_sps(ctx, ctrls->sps);
+ if (ret)
+ return ret;
+
+ ctrls->pps =
+ hantro_get_ctrl(ctx, V4L2_CID_STATELESS_HEVC_PPS);
+ if (WARN_ON(!ctrls->pps))
+ return -EINVAL;
+
+ ret = tile_buffer_reallocate(ctx);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+void hantro_hevc_dec_exit(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
+
+ if (hevc_dec->tile_sizes.cpu)
+ dma_free_coherent(vpu->dev, hevc_dec->tile_sizes.size,
+ hevc_dec->tile_sizes.cpu,
+ hevc_dec->tile_sizes.dma);
+ hevc_dec->tile_sizes.cpu = NULL;
+
+ if (hevc_dec->scaling_lists.cpu)
+ dma_free_coherent(vpu->dev, hevc_dec->scaling_lists.size,
+ hevc_dec->scaling_lists.cpu,
+ hevc_dec->scaling_lists.dma);
+ hevc_dec->scaling_lists.cpu = NULL;
+
+ if (hevc_dec->tile_filter.cpu)
+ dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size,
+ hevc_dec->tile_filter.cpu,
+ hevc_dec->tile_filter.dma);
+ hevc_dec->tile_filter.cpu = NULL;
+
+ if (hevc_dec->tile_sao.cpu)
+ dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size,
+ hevc_dec->tile_sao.cpu,
+ hevc_dec->tile_sao.dma);
+ hevc_dec->tile_sao.cpu = NULL;
+
+ if (hevc_dec->tile_bsd.cpu)
+ dma_free_coherent(vpu->dev, hevc_dec->tile_bsd.size,
+ hevc_dec->tile_bsd.cpu,
+ hevc_dec->tile_bsd.dma);
+ hevc_dec->tile_bsd.cpu = NULL;
+}
+
+int hantro_hevc_dec_init(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
+ unsigned int size;
+
+ memset(hevc_dec, 0, sizeof(*hevc_dec));
+
+ /*
+ * Maximum number of tiles times width and height (2 bytes each),
+ * rounding up to next 16 bytes boundary + one extra 16 byte
+ * chunk (HW guys wanted to have this).
+ */
+ size = round_up(MAX_TILE_COLS * MAX_TILE_ROWS * 4 * sizeof(u16) + 16, 16);
+ hevc_dec->tile_sizes.cpu = dma_alloc_coherent(vpu->dev, size,
+ &hevc_dec->tile_sizes.dma,
+ GFP_KERNEL);
+ if (!hevc_dec->tile_sizes.cpu)
+ return -ENOMEM;
+
+ hevc_dec->tile_sizes.size = size;
+
+ hevc_dec->scaling_lists.cpu = dma_alloc_coherent(vpu->dev, SCALING_LIST_SIZE,
+ &hevc_dec->scaling_lists.dma,
+ GFP_KERNEL);
+ if (!hevc_dec->scaling_lists.cpu)
+ return -ENOMEM;
+
+ hevc_dec->scaling_lists.size = SCALING_LIST_SIZE;
+
+ hantro_hevc_ref_init(ctx);
+
+ return 0;
+}
diff --git a/drivers/media/platform/verisilicon/hantro_hw.h b/drivers/media/platform/verisilicon/hantro_hw.h
new file mode 100644
index 000000000..e83f0c523
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_hw.h
@@ -0,0 +1,441 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Hantro VPU codec driver
+ *
+ * Copyright 2018 Google LLC.
+ * Tomasz Figa <tfiga@chromium.org>
+ */
+
+#ifndef HANTRO_HW_H_
+#define HANTRO_HW_H_
+
+#include <linux/interrupt.h>
+#include <linux/v4l2-controls.h>
+#include <media/v4l2-ctrls.h>
+#include <media/v4l2-vp9.h>
+#include <media/videobuf2-core.h>
+
+#define DEC_8190_ALIGN_MASK 0x07U
+
+#define MB_DIM 16
+#define TILE_MB_DIM 4
+#define MB_WIDTH(w) DIV_ROUND_UP(w, MB_DIM)
+#define MB_HEIGHT(h) DIV_ROUND_UP(h, MB_DIM)
+
+#define FMT_MIN_WIDTH 48
+#define FMT_MIN_HEIGHT 48
+#define FMT_HD_WIDTH 1280
+#define FMT_HD_HEIGHT 720
+#define FMT_FHD_WIDTH 1920
+#define FMT_FHD_HEIGHT 1088
+#define FMT_UHD_WIDTH 3840
+#define FMT_UHD_HEIGHT 2160
+#define FMT_4K_WIDTH 4096
+#define FMT_4K_HEIGHT 2304
+
+#define NUM_REF_PICTURES (V4L2_HEVC_DPB_ENTRIES_NUM_MAX + 1)
+
+struct hantro_dev;
+struct hantro_ctx;
+struct hantro_buf;
+struct hantro_variant;
+
+/**
+ * struct hantro_aux_buf - auxiliary DMA buffer for hardware data
+ *
+ * @cpu: CPU pointer to the buffer.
+ * @dma: DMA address of the buffer.
+ * @size: Size of the buffer.
+ * @attrs: Attributes of the DMA mapping.
+ */
+struct hantro_aux_buf {
+ void *cpu;
+ dma_addr_t dma;
+ size_t size;
+ unsigned long attrs;
+};
+
+/* Max. number of entries in the DPB (HW limitation). */
+#define HANTRO_H264_DPB_SIZE 16
+
+/**
+ * struct hantro_h264_dec_ctrls
+ *
+ * @decode: Decode params
+ * @scaling: Scaling info
+ * @sps: SPS info
+ * @pps: PPS info
+ */
+struct hantro_h264_dec_ctrls {
+ const struct v4l2_ctrl_h264_decode_params *decode;
+ const struct v4l2_ctrl_h264_scaling_matrix *scaling;
+ const struct v4l2_ctrl_h264_sps *sps;
+ const struct v4l2_ctrl_h264_pps *pps;
+};
+
+/**
+ * struct hantro_h264_dec_reflists
+ *
+ * @p: P reflist
+ * @b0: B0 reflist
+ * @b1: B1 reflist
+ */
+struct hantro_h264_dec_reflists {
+ struct v4l2_h264_reference p[V4L2_H264_REF_LIST_LEN];
+ struct v4l2_h264_reference b0[V4L2_H264_REF_LIST_LEN];
+ struct v4l2_h264_reference b1[V4L2_H264_REF_LIST_LEN];
+};
+
+/**
+ * struct hantro_h264_dec_hw_ctx
+ *
+ * @priv: Private auxiliary buffer for hardware.
+ * @dpb: DPB
+ * @reflists: P/B0/B1 reflists
+ * @ctrls: V4L2 controls attached to a run
+ * @dpb_longterm: DPB long-term
+ * @dpb_valid: DPB valid
+ * @cur_poc: Current picture order count
+ */
+struct hantro_h264_dec_hw_ctx {
+ struct hantro_aux_buf priv;
+ struct v4l2_h264_dpb_entry dpb[HANTRO_H264_DPB_SIZE];
+ struct hantro_h264_dec_reflists reflists;
+ struct hantro_h264_dec_ctrls ctrls;
+ u32 dpb_longterm;
+ u32 dpb_valid;
+ s32 cur_poc;
+};
+
+/**
+ * struct hantro_hevc_dec_ctrls
+ * @decode_params: Decode params
+ * @scaling: Scaling matrix
+ * @sps: SPS info
+ * @pps: PPS info
+ * @hevc_hdr_skip_length: the number of data (in bits) to skip in the
+ * slice segment header syntax after 'slice type'
+ * token
+ */
+struct hantro_hevc_dec_ctrls {
+ const struct v4l2_ctrl_hevc_decode_params *decode_params;
+ const struct v4l2_ctrl_hevc_scaling_matrix *scaling;
+ const struct v4l2_ctrl_hevc_sps *sps;
+ const struct v4l2_ctrl_hevc_pps *pps;
+ u32 hevc_hdr_skip_length;
+};
+
+/**
+ * struct hantro_hevc_dec_hw_ctx
+ * @tile_sizes: Tile sizes buffer
+ * @tile_filter: Tile vertical filter buffer
+ * @tile_sao: Tile SAO buffer
+ * @tile_bsd: Tile BSD control buffer
+ * @ref_bufs: Internal reference buffers
+ * @scaling_lists: Scaling lists buffer
+ * @ref_bufs_poc: Internal reference buffers picture order count
+ * @ref_bufs_used: Bitfield of used reference buffers
+ * @ctrls: V4L2 controls attached to a run
+ * @num_tile_cols_allocated: number of allocated tiles
+ */
+struct hantro_hevc_dec_hw_ctx {
+ struct hantro_aux_buf tile_sizes;
+ struct hantro_aux_buf tile_filter;
+ struct hantro_aux_buf tile_sao;
+ struct hantro_aux_buf tile_bsd;
+ struct hantro_aux_buf ref_bufs[NUM_REF_PICTURES];
+ struct hantro_aux_buf scaling_lists;
+ s32 ref_bufs_poc[NUM_REF_PICTURES];
+ u32 ref_bufs_used;
+ struct hantro_hevc_dec_ctrls ctrls;
+ unsigned int num_tile_cols_allocated;
+};
+
+/**
+ * struct hantro_mpeg2_dec_hw_ctx
+ *
+ * @qtable: Quantization table
+ */
+struct hantro_mpeg2_dec_hw_ctx {
+ struct hantro_aux_buf qtable;
+};
+
+/**
+ * struct hantro_vp8_dec_hw_ctx
+ *
+ * @segment_map: Segment map buffer.
+ * @prob_tbl: Probability table buffer.
+ */
+struct hantro_vp8_dec_hw_ctx {
+ struct hantro_aux_buf segment_map;
+ struct hantro_aux_buf prob_tbl;
+};
+
+/**
+ * struct hantro_vp9_frame_info
+ *
+ * @valid: frame info valid flag
+ * @frame_context_idx: index of frame context
+ * @reference_mode: inter prediction type
+ * @tx_mode: transform mode
+ * @interpolation_filter: filter selection for inter prediction
+ * @flags: frame flags
+ * @timestamp: frame timestamp
+ */
+struct hantro_vp9_frame_info {
+ u32 valid : 1;
+ u32 frame_context_idx : 2;
+ u32 reference_mode : 2;
+ u32 tx_mode : 3;
+ u32 interpolation_filter : 3;
+ u32 flags;
+ u64 timestamp;
+};
+
+#define MAX_SB_COLS 64
+#define MAX_SB_ROWS 34
+
+/**
+ * struct hantro_vp9_dec_hw_ctx
+ *
+ * @tile_edge: auxiliary DMA buffer for tile edge processing
+ * @segment_map: auxiliary DMA buffer for segment map
+ * @misc: auxiliary DMA buffer for tile info, probabilities and hw counters
+ * @cnts: vp9 library struct for abstracting hw counters access
+ * @probability_tables: VP9 probability tables implied by the spec
+ * @frame_context: VP9 frame contexts
+ * @cur: current frame information
+ * @last: last frame information
+ * @bsd_ctrl_offset: bsd offset into tile_edge
+ * @segment_map_size: size of segment map
+ * @ctx_counters_offset: hw counters offset into misc
+ * @tile_info_offset: tile info offset into misc
+ * @tile_r_info: per-tile information array
+ * @tile_c_info: per-tile information array
+ * @last_tile_r: last number of tile rows
+ * @last_tile_c: last number of tile cols
+ * @last_sbs_r: last number of superblock rows
+ * @last_sbs_c: last number of superblock cols
+ * @active_segment: number of active segment (alternating between 0 and 1)
+ * @feature_enabled: segmentation feature enabled flags
+ * @feature_data: segmentation feature data
+ */
+struct hantro_vp9_dec_hw_ctx {
+ struct hantro_aux_buf tile_edge;
+ struct hantro_aux_buf segment_map;
+ struct hantro_aux_buf misc;
+ struct v4l2_vp9_frame_symbol_counts cnts;
+ struct v4l2_vp9_frame_context probability_tables;
+ struct v4l2_vp9_frame_context frame_context[4];
+ struct hantro_vp9_frame_info cur;
+ struct hantro_vp9_frame_info last;
+
+ unsigned int bsd_ctrl_offset;
+ unsigned int segment_map_size;
+ unsigned int ctx_counters_offset;
+ unsigned int tile_info_offset;
+
+ unsigned short tile_r_info[MAX_SB_ROWS];
+ unsigned short tile_c_info[MAX_SB_COLS];
+ unsigned int last_tile_r;
+ unsigned int last_tile_c;
+ unsigned int last_sbs_r;
+ unsigned int last_sbs_c;
+
+ unsigned int active_segment;
+ u8 feature_enabled[8];
+ s16 feature_data[8][4];
+};
+
+/**
+ * struct hantro_postproc_ctx
+ *
+ * @dec_q: References buffers, in decoder format.
+ */
+struct hantro_postproc_ctx {
+ struct hantro_aux_buf dec_q[VB2_MAX_FRAME];
+};
+
+/**
+ * struct hantro_postproc_ops - post-processor operations
+ *
+ * @enable: Enable the post-processor block. Optional.
+ * @disable: Disable the post-processor block. Optional.
+ * @enum_framesizes: Enumerate possible scaled output formats.
+ * Returns zero if OK, a negative value in error cases.
+ * Optional.
+ */
+struct hantro_postproc_ops {
+ void (*enable)(struct hantro_ctx *ctx);
+ void (*disable)(struct hantro_ctx *ctx);
+ int (*enum_framesizes)(struct hantro_ctx *ctx, struct v4l2_frmsizeenum *fsize);
+};
+
+/**
+ * struct hantro_codec_ops - codec mode specific operations
+ *
+ * @init: If needed, can be used for initialization.
+ * Optional and called from process context.
+ * @exit: If needed, can be used to undo the .init phase.
+ * Optional and called from process context.
+ * @run: Start single {en,de)coding job. Called from atomic context
+ * to indicate that a pair of buffers is ready and the hardware
+ * should be programmed and started. Returns zero if OK, a
+ * negative value in error cases.
+ * @done: Read back processing results and additional data from hardware.
+ * @reset: Reset the hardware in case of a timeout.
+ */
+struct hantro_codec_ops {
+ int (*init)(struct hantro_ctx *ctx);
+ void (*exit)(struct hantro_ctx *ctx);
+ int (*run)(struct hantro_ctx *ctx);
+ void (*done)(struct hantro_ctx *ctx);
+ void (*reset)(struct hantro_ctx *ctx);
+};
+
+/**
+ * enum hantro_enc_fmt - source format ID for hardware registers.
+ *
+ * @ROCKCHIP_VPU_ENC_FMT_YUV420P: Y/CbCr 4:2:0 planar format
+ * @ROCKCHIP_VPU_ENC_FMT_YUV420SP: Y/CbCr 4:2:0 semi-planar format
+ * @ROCKCHIP_VPU_ENC_FMT_YUYV422: YUV 4:2:2 packed format (YUYV)
+ * @ROCKCHIP_VPU_ENC_FMT_UYVY422: YUV 4:2:2 packed format (UYVY)
+ */
+enum hantro_enc_fmt {
+ ROCKCHIP_VPU_ENC_FMT_YUV420P = 0,
+ ROCKCHIP_VPU_ENC_FMT_YUV420SP = 1,
+ ROCKCHIP_VPU_ENC_FMT_YUYV422 = 2,
+ ROCKCHIP_VPU_ENC_FMT_UYVY422 = 3,
+};
+
+extern const struct hantro_variant imx8mm_vpu_g1_variant;
+extern const struct hantro_variant imx8mq_vpu_g1_variant;
+extern const struct hantro_variant imx8mq_vpu_g2_variant;
+extern const struct hantro_variant imx8mq_vpu_variant;
+extern const struct hantro_variant px30_vpu_variant;
+extern const struct hantro_variant rk3036_vpu_variant;
+extern const struct hantro_variant rk3066_vpu_variant;
+extern const struct hantro_variant rk3288_vpu_variant;
+extern const struct hantro_variant rk3328_vpu_variant;
+extern const struct hantro_variant rk3399_vpu_variant;
+extern const struct hantro_variant rk3568_vepu_variant;
+extern const struct hantro_variant rk3568_vpu_variant;
+extern const struct hantro_variant sama5d4_vdec_variant;
+extern const struct hantro_variant sunxi_vpu_variant;
+
+extern const struct hantro_postproc_ops hantro_g1_postproc_ops;
+extern const struct hantro_postproc_ops hantro_g2_postproc_ops;
+
+extern const u32 hantro_vp8_dec_mc_filter[8][6];
+
+void hantro_watchdog(struct work_struct *work);
+void hantro_run(struct hantro_ctx *ctx);
+void hantro_irq_done(struct hantro_dev *vpu,
+ enum vb2_buffer_state result);
+void hantro_start_prepare_run(struct hantro_ctx *ctx);
+void hantro_end_prepare_run(struct hantro_ctx *ctx);
+
+irqreturn_t hantro_g1_irq(int irq, void *dev_id);
+void hantro_g1_reset(struct hantro_ctx *ctx);
+
+int hantro_h1_jpeg_enc_run(struct hantro_ctx *ctx);
+int rockchip_vpu2_jpeg_enc_run(struct hantro_ctx *ctx);
+void hantro_h1_jpeg_enc_done(struct hantro_ctx *ctx);
+void rockchip_vpu2_jpeg_enc_done(struct hantro_ctx *ctx);
+
+dma_addr_t hantro_h264_get_ref_buf(struct hantro_ctx *ctx,
+ unsigned int dpb_idx);
+u16 hantro_h264_get_ref_nbr(struct hantro_ctx *ctx,
+ unsigned int dpb_idx);
+int hantro_h264_dec_prepare_run(struct hantro_ctx *ctx);
+int rockchip_vpu2_h264_dec_run(struct hantro_ctx *ctx);
+int hantro_g1_h264_dec_run(struct hantro_ctx *ctx);
+int hantro_h264_dec_init(struct hantro_ctx *ctx);
+void hantro_h264_dec_exit(struct hantro_ctx *ctx);
+
+int hantro_hevc_dec_init(struct hantro_ctx *ctx);
+void hantro_hevc_dec_exit(struct hantro_ctx *ctx);
+int hantro_g2_hevc_dec_run(struct hantro_ctx *ctx);
+int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx);
+void hantro_hevc_ref_init(struct hantro_ctx *ctx);
+dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx, s32 poc);
+int hantro_hevc_add_ref_buf(struct hantro_ctx *ctx, int poc, dma_addr_t addr);
+
+
+static inline unsigned short hantro_vp9_num_sbs(unsigned short dimension)
+{
+ return (dimension + 63) / 64;
+}
+
+static inline size_t
+hantro_vp9_mv_size(unsigned int width, unsigned int height)
+{
+ int num_ctbs;
+
+ /*
+ * There can be up to (CTBs x 64) number of blocks,
+ * and the motion vector for each block needs 16 bytes.
+ */
+ num_ctbs = hantro_vp9_num_sbs(width) * hantro_vp9_num_sbs(height);
+ return (num_ctbs * 64) * 16;
+}
+
+static inline size_t
+hantro_h264_mv_size(unsigned int width, unsigned int height)
+{
+ /*
+ * A decoded 8-bit 4:2:0 NV12 frame may need memory for up to
+ * 448 bytes per macroblock with additional 32 bytes on
+ * multi-core variants.
+ *
+ * The H264 decoder needs extra space on the output buffers
+ * to store motion vectors. This is needed for reference
+ * frames and only if the format is non-post-processed NV12.
+ *
+ * Memory layout is as follow:
+ *
+ * +---------------------------+
+ * | Y-plane 256 bytes x MBs |
+ * +---------------------------+
+ * | UV-plane 128 bytes x MBs |
+ * +---------------------------+
+ * | MV buffer 64 bytes x MBs |
+ * +---------------------------+
+ * | MC sync 32 bytes |
+ * +---------------------------+
+ */
+ return 64 * MB_WIDTH(width) * MB_WIDTH(height) + 32;
+}
+
+static inline size_t
+hantro_hevc_mv_size(unsigned int width, unsigned int height)
+{
+ /*
+ * A CTB can be 64x64, 32x32 or 16x16.
+ * Allocated memory for the "worse" case: 16x16
+ */
+ return width * height / 16;
+}
+
+int hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx);
+int rockchip_vpu2_mpeg2_dec_run(struct hantro_ctx *ctx);
+void hantro_mpeg2_dec_copy_qtable(u8 *qtable,
+ const struct v4l2_ctrl_mpeg2_quantisation *ctrl);
+int hantro_mpeg2_dec_init(struct hantro_ctx *ctx);
+void hantro_mpeg2_dec_exit(struct hantro_ctx *ctx);
+
+int hantro_g1_vp8_dec_run(struct hantro_ctx *ctx);
+int rockchip_vpu2_vp8_dec_run(struct hantro_ctx *ctx);
+int hantro_vp8_dec_init(struct hantro_ctx *ctx);
+void hantro_vp8_dec_exit(struct hantro_ctx *ctx);
+void hantro_vp8_prob_update(struct hantro_ctx *ctx,
+ const struct v4l2_ctrl_vp8_frame *hdr);
+
+int hantro_g2_vp9_dec_run(struct hantro_ctx *ctx);
+void hantro_g2_vp9_dec_done(struct hantro_ctx *ctx);
+int hantro_vp9_dec_init(struct hantro_ctx *ctx);
+void hantro_vp9_dec_exit(struct hantro_ctx *ctx);
+void hantro_g2_check_idle(struct hantro_dev *vpu);
+irqreturn_t hantro_g2_irq(int irq, void *dev_id);
+
+#endif /* HANTRO_HW_H_ */
diff --git a/drivers/media/platform/verisilicon/hantro_jpeg.c b/drivers/media/platform/verisilicon/hantro_jpeg.c
new file mode 100644
index 000000000..d07b1b449
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_jpeg.c
@@ -0,0 +1,348 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) Collabora, Ltd.
+ *
+ * Based on GSPCA and CODA drivers:
+ * Copyright (C) Jean-Francois Moine (http://moinejf.free.fr)
+ * Copyright (C) 2014 Philipp Zabel, Pengutronix
+ */
+
+#include <linux/align.h>
+#include <linux/build_bug.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include "hantro_jpeg.h"
+#include "hantro.h"
+
+#define LUMA_QUANT_OFF 25
+#define CHROMA_QUANT_OFF 90
+#define HEIGHT_OFF 159
+#define WIDTH_OFF 161
+
+#define HUFF_LUMA_DC_OFF 178
+#define HUFF_LUMA_AC_OFF 211
+#define HUFF_CHROMA_DC_OFF 394
+#define HUFF_CHROMA_AC_OFF 427
+
+/* Default tables from JPEG ITU-T.81
+ * (ISO/IEC 10918-1) Annex K, tables K.1 and K.2
+ */
+static const unsigned char luma_q_table[] = {
+ 0x10, 0x0b, 0x0a, 0x10, 0x18, 0x28, 0x33, 0x3d,
+ 0x0c, 0x0c, 0x0e, 0x13, 0x1a, 0x3a, 0x3c, 0x37,
+ 0x0e, 0x0d, 0x10, 0x18, 0x28, 0x39, 0x45, 0x38,
+ 0x0e, 0x11, 0x16, 0x1d, 0x33, 0x57, 0x50, 0x3e,
+ 0x12, 0x16, 0x25, 0x38, 0x44, 0x6d, 0x67, 0x4d,
+ 0x18, 0x23, 0x37, 0x40, 0x51, 0x68, 0x71, 0x5c,
+ 0x31, 0x40, 0x4e, 0x57, 0x67, 0x79, 0x78, 0x65,
+ 0x48, 0x5c, 0x5f, 0x62, 0x70, 0x64, 0x67, 0x63
+};
+
+static const unsigned char chroma_q_table[] = {
+ 0x11, 0x12, 0x18, 0x2f, 0x63, 0x63, 0x63, 0x63,
+ 0x12, 0x15, 0x1a, 0x42, 0x63, 0x63, 0x63, 0x63,
+ 0x18, 0x1a, 0x38, 0x63, 0x63, 0x63, 0x63, 0x63,
+ 0x2f, 0x42, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63
+};
+
+static const unsigned char zigzag[] = {
+ 0, 1, 8, 16, 9, 2, 3, 10,
+ 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63
+};
+
+static const u32 hw_reorder[] = {
+ 0, 8, 16, 24, 1, 9, 17, 25,
+ 32, 40, 48, 56, 33, 41, 49, 57,
+ 2, 10, 18, 26, 3, 11, 19, 27,
+ 34, 42, 50, 58, 35, 43, 51, 59,
+ 4, 12, 20, 28, 5, 13, 21, 29,
+ 36, 44, 52, 60, 37, 45, 53, 61,
+ 6, 14, 22, 30, 7, 15, 23, 31,
+ 38, 46, 54, 62, 39, 47, 55, 63
+};
+
+/* Huffman tables are shared with CODA */
+static const unsigned char luma_dc_table[] = {
+ 0x00, 0x01, 0x05, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b,
+};
+
+static const unsigned char chroma_dc_table[] = {
+ 0x00, 0x03, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b,
+};
+
+static const unsigned char luma_ac_table[] = {
+ 0x00, 0x02, 0x01, 0x03, 0x03, 0x02, 0x04, 0x03,
+ 0x05, 0x05, 0x04, 0x04, 0x00, 0x00, 0x01, 0x7d,
+ 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
+ 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
+ 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
+ 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
+ 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
+ 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
+ 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
+ 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
+ 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
+ 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
+ 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
+ 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
+ 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+ 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+ 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
+ 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
+ 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
+ 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
+ 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
+ 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+ 0xf9, 0xfa,
+};
+
+static const unsigned char chroma_ac_table[] = {
+ 0x00, 0x02, 0x01, 0x02, 0x04, 0x04, 0x03, 0x04,
+ 0x07, 0x05, 0x04, 0x04, 0x00, 0x01, 0x02, 0x77,
+ 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
+ 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
+ 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
+ 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
+ 0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
+ 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
+ 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
+ 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+ 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+ 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+ 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+ 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
+ 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
+ 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
+ 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
+ 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
+ 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
+ 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
+ 0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+ 0xf9, 0xfa,
+};
+
+/* For simplicity, we keep a pre-formatted JPEG header,
+ * and we'll use fixed offsets to change the width, height
+ * quantization tables, etc.
+ */
+static const unsigned char hantro_jpeg_header[] = {
+ /* SOI */
+ 0xff, 0xd8,
+
+ /* JFIF-APP0 */
+ 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46,
+ 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x00, 0x01,
+ 0x00, 0x00,
+
+ /* DQT */
+ 0xff, 0xdb, 0x00, 0x84,
+
+ 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+
+ 0x01,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+
+ /* SOF */
+ 0xff, 0xc0, 0x00, 0x11, 0x08, 0x00, 0xf0, 0x01,
+ 0x40, 0x03, 0x01, 0x22, 0x00, 0x02, 0x11, 0x01,
+ 0x03, 0x11, 0x01,
+
+ /* DHT */
+ 0xff, 0xc4, 0x00, 0x1f, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ /* DHT */
+ 0xff, 0xc4, 0x00, 0xb5, 0x10,
+
+ 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+
+ /* DHT */
+ 0xff, 0xc4, 0x00, 0x1f, 0x01,
+
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ /* DHT */
+ 0xff, 0xc4, 0x00, 0xb5, 0x11,
+
+ 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+
+ /* COM */
+ 0xff, 0xfe, 0x00, 0x03, 0x00,
+
+ /* SOS */
+ 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02,
+ 0x11, 0x03, 0x11, 0x00, 0x3f, 0x00,
+};
+
+/*
+ * JPEG_HEADER_SIZE is used in other parts of the driver in lieu of
+ * "sizeof(hantro_jpeg_header)". The two must be equal.
+ */
+static_assert(sizeof(hantro_jpeg_header) == JPEG_HEADER_SIZE);
+
+/*
+ * hantro_jpeg_header is padded with a COM segment, so that the payload
+ * of the SOS segment (the entropy-encoded image scan), which should
+ * trail the whole header, is 8-byte aligned for the hardware to write
+ * to directly.
+ */
+static_assert(IS_ALIGNED(sizeof(hantro_jpeg_header), 8),
+ "Hantro JPEG header size needs to be 8-byte aligned.");
+
+static unsigned char jpeg_scale_qp(const unsigned char qp, int scale)
+{
+ unsigned int temp;
+
+ temp = DIV_ROUND_CLOSEST((unsigned int)qp * scale, 100);
+ if (temp <= 0)
+ temp = 1;
+ if (temp > 255)
+ temp = 255;
+
+ return (unsigned char)temp;
+}
+
+static void
+jpeg_scale_quant_table(unsigned char *file_q_tab,
+ unsigned char *reordered_q_tab,
+ const unsigned char *tab, int scale)
+{
+ int i;
+
+ BUILD_BUG_ON(ARRAY_SIZE(zigzag) != JPEG_QUANT_SIZE);
+ BUILD_BUG_ON(ARRAY_SIZE(hw_reorder) != JPEG_QUANT_SIZE);
+
+ for (i = 0; i < JPEG_QUANT_SIZE; i++) {
+ file_q_tab[i] = jpeg_scale_qp(tab[zigzag[i]], scale);
+ reordered_q_tab[i] = jpeg_scale_qp(tab[hw_reorder[i]], scale);
+ }
+}
+
+static void jpeg_set_quality(struct hantro_jpeg_ctx *ctx)
+{
+ int scale;
+
+ /*
+ * Non-linear scaling factor:
+ * [5,50] -> [1000..100], [51,100] -> [98..0]
+ */
+ if (ctx->quality < 50)
+ scale = 5000 / ctx->quality;
+ else
+ scale = 200 - 2 * ctx->quality;
+
+ BUILD_BUG_ON(ARRAY_SIZE(luma_q_table) != JPEG_QUANT_SIZE);
+ BUILD_BUG_ON(ARRAY_SIZE(chroma_q_table) != JPEG_QUANT_SIZE);
+ BUILD_BUG_ON(ARRAY_SIZE(ctx->hw_luma_qtable) != JPEG_QUANT_SIZE);
+ BUILD_BUG_ON(ARRAY_SIZE(ctx->hw_chroma_qtable) != JPEG_QUANT_SIZE);
+
+ jpeg_scale_quant_table(ctx->buffer + LUMA_QUANT_OFF,
+ ctx->hw_luma_qtable, luma_q_table, scale);
+ jpeg_scale_quant_table(ctx->buffer + CHROMA_QUANT_OFF,
+ ctx->hw_chroma_qtable, chroma_q_table, scale);
+}
+
+void hantro_jpeg_header_assemble(struct hantro_jpeg_ctx *ctx)
+{
+ char *buf = ctx->buffer;
+
+ memcpy(buf, hantro_jpeg_header,
+ sizeof(hantro_jpeg_header));
+
+ buf[HEIGHT_OFF + 0] = ctx->height >> 8;
+ buf[HEIGHT_OFF + 1] = ctx->height;
+ buf[WIDTH_OFF + 0] = ctx->width >> 8;
+ buf[WIDTH_OFF + 1] = ctx->width;
+
+ memcpy(buf + HUFF_LUMA_DC_OFF, luma_dc_table, sizeof(luma_dc_table));
+ memcpy(buf + HUFF_LUMA_AC_OFF, luma_ac_table, sizeof(luma_ac_table));
+ memcpy(buf + HUFF_CHROMA_DC_OFF, chroma_dc_table,
+ sizeof(chroma_dc_table));
+ memcpy(buf + HUFF_CHROMA_AC_OFF, chroma_ac_table,
+ sizeof(chroma_ac_table));
+
+ jpeg_set_quality(ctx);
+}
diff --git a/drivers/media/platform/verisilicon/hantro_jpeg.h b/drivers/media/platform/verisilicon/hantro_jpeg.h
new file mode 100644
index 000000000..0b49d0b82
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_jpeg.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#define JPEG_HEADER_SIZE 624
+#define JPEG_QUANT_SIZE 64
+
+struct hantro_jpeg_ctx {
+ int width;
+ int height;
+ int quality;
+ unsigned char *buffer;
+ unsigned char hw_luma_qtable[JPEG_QUANT_SIZE];
+ unsigned char hw_chroma_qtable[JPEG_QUANT_SIZE];
+};
+
+void hantro_jpeg_header_assemble(struct hantro_jpeg_ctx *ctx);
diff --git a/drivers/media/platform/verisilicon/hantro_mpeg2.c b/drivers/media/platform/verisilicon/hantro_mpeg2.c
new file mode 100644
index 000000000..04e545eb0
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_mpeg2.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hantro VPU codec driver
+ *
+ * Copyright (C) 2018 Rockchip Electronics Co., Ltd.
+ */
+
+#include "hantro.h"
+
+static const u8 zigzag[64] = {
+ 0, 1, 8, 16, 9, 2, 3, 10,
+ 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63
+};
+
+void hantro_mpeg2_dec_copy_qtable(u8 *qtable,
+ const struct v4l2_ctrl_mpeg2_quantisation *ctrl)
+{
+ int i, n;
+
+ if (!qtable || !ctrl)
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(zigzag); i++) {
+ n = zigzag[i];
+ qtable[n + 0] = ctrl->intra_quantiser_matrix[i];
+ qtable[n + 64] = ctrl->non_intra_quantiser_matrix[i];
+ qtable[n + 128] = ctrl->chroma_intra_quantiser_matrix[i];
+ qtable[n + 192] = ctrl->chroma_non_intra_quantiser_matrix[i];
+ }
+}
+
+int hantro_mpeg2_dec_init(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+
+ ctx->mpeg2_dec.qtable.size = ARRAY_SIZE(zigzag) * 4;
+ ctx->mpeg2_dec.qtable.cpu =
+ dma_alloc_coherent(vpu->dev,
+ ctx->mpeg2_dec.qtable.size,
+ &ctx->mpeg2_dec.qtable.dma,
+ GFP_KERNEL);
+ if (!ctx->mpeg2_dec.qtable.cpu)
+ return -ENOMEM;
+ return 0;
+}
+
+void hantro_mpeg2_dec_exit(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+
+ dma_free_coherent(vpu->dev,
+ ctx->mpeg2_dec.qtable.size,
+ ctx->mpeg2_dec.qtable.cpu,
+ ctx->mpeg2_dec.qtable.dma);
+}
diff --git a/drivers/media/platform/verisilicon/hantro_postproc.c b/drivers/media/platform/verisilicon/hantro_postproc.c
new file mode 100644
index 000000000..708095cf0
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_postproc.c
@@ -0,0 +1,284 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hantro G1 post-processor support
+ *
+ * Copyright (C) 2019 Collabora, Ltd.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/types.h>
+
+#include "hantro.h"
+#include "hantro_hw.h"
+#include "hantro_g1_regs.h"
+#include "hantro_g2_regs.h"
+#include "hantro_v4l2.h"
+
+#define HANTRO_PP_REG_WRITE(vpu, reg_name, val) \
+{ \
+ hantro_reg_write(vpu, \
+ &hantro_g1_postproc_regs.reg_name, \
+ val); \
+}
+
+#define HANTRO_PP_REG_WRITE_S(vpu, reg_name, val) \
+{ \
+ hantro_reg_write_s(vpu, \
+ &hantro_g1_postproc_regs.reg_name, \
+ val); \
+}
+
+#define VPU_PP_IN_YUYV 0x0
+#define VPU_PP_IN_NV12 0x1
+#define VPU_PP_IN_YUV420 0x2
+#define VPU_PP_IN_YUV240_TILED 0x5
+#define VPU_PP_OUT_RGB 0x0
+#define VPU_PP_OUT_YUYV 0x3
+
+static const struct hantro_postproc_regs hantro_g1_postproc_regs = {
+ .pipeline_en = {G1_REG_PP_INTERRUPT, 1, 0x1},
+ .max_burst = {G1_REG_PP_DEV_CONFIG, 0, 0x1f},
+ .clk_gate = {G1_REG_PP_DEV_CONFIG, 1, 0x1},
+ .out_swap32 = {G1_REG_PP_DEV_CONFIG, 5, 0x1},
+ .out_endian = {G1_REG_PP_DEV_CONFIG, 6, 0x1},
+ .out_luma_base = {G1_REG_PP_OUT_LUMA_BASE, 0, 0xffffffff},
+ .input_width = {G1_REG_PP_INPUT_SIZE, 0, 0x1ff},
+ .input_height = {G1_REG_PP_INPUT_SIZE, 9, 0x1ff},
+ .output_width = {G1_REG_PP_CONTROL, 4, 0x7ff},
+ .output_height = {G1_REG_PP_CONTROL, 15, 0x7ff},
+ .input_fmt = {G1_REG_PP_CONTROL, 29, 0x7},
+ .output_fmt = {G1_REG_PP_CONTROL, 26, 0x7},
+ .orig_width = {G1_REG_PP_MASK1_ORIG_WIDTH, 23, 0x1ff},
+ .display_width = {G1_REG_PP_DISPLAY_WIDTH, 0, 0xfff},
+};
+
+bool hantro_needs_postproc(const struct hantro_ctx *ctx,
+ const struct hantro_fmt *fmt)
+{
+ if (ctx->is_encoder)
+ return false;
+ return fmt->postprocessed;
+}
+
+static void hantro_postproc_g1_enable(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ struct vb2_v4l2_buffer *dst_buf;
+ u32 src_pp_fmt, dst_pp_fmt;
+ dma_addr_t dst_dma;
+
+ /* Turn on pipeline mode. Must be done first. */
+ HANTRO_PP_REG_WRITE_S(vpu, pipeline_en, 0x1);
+
+ src_pp_fmt = VPU_PP_IN_NV12;
+
+ switch (ctx->vpu_dst_fmt->fourcc) {
+ case V4L2_PIX_FMT_YUYV:
+ dst_pp_fmt = VPU_PP_OUT_YUYV;
+ break;
+ default:
+ WARN(1, "output format %d not supported by the post-processor, this wasn't expected.",
+ ctx->vpu_dst_fmt->fourcc);
+ dst_pp_fmt = 0;
+ break;
+ }
+
+ dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
+ dst_dma = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
+
+ HANTRO_PP_REG_WRITE(vpu, clk_gate, 0x1);
+ HANTRO_PP_REG_WRITE(vpu, out_endian, 0x1);
+ HANTRO_PP_REG_WRITE(vpu, out_swap32, 0x1);
+ HANTRO_PP_REG_WRITE(vpu, max_burst, 16);
+ HANTRO_PP_REG_WRITE(vpu, out_luma_base, dst_dma);
+ HANTRO_PP_REG_WRITE(vpu, input_width, MB_WIDTH(ctx->dst_fmt.width));
+ HANTRO_PP_REG_WRITE(vpu, input_height, MB_HEIGHT(ctx->dst_fmt.height));
+ HANTRO_PP_REG_WRITE(vpu, input_fmt, src_pp_fmt);
+ HANTRO_PP_REG_WRITE(vpu, output_fmt, dst_pp_fmt);
+ HANTRO_PP_REG_WRITE(vpu, output_width, ctx->dst_fmt.width);
+ HANTRO_PP_REG_WRITE(vpu, output_height, ctx->dst_fmt.height);
+ HANTRO_PP_REG_WRITE(vpu, orig_width, MB_WIDTH(ctx->dst_fmt.width));
+ HANTRO_PP_REG_WRITE(vpu, display_width, ctx->dst_fmt.width);
+}
+
+static int down_scale_factor(struct hantro_ctx *ctx)
+{
+ if (ctx->src_fmt.width <= ctx->dst_fmt.width)
+ return 0;
+
+ return DIV_ROUND_CLOSEST(ctx->src_fmt.width, ctx->dst_fmt.width);
+}
+
+static void hantro_postproc_g2_enable(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ struct vb2_v4l2_buffer *dst_buf;
+ int down_scale = down_scale_factor(ctx);
+ int out_depth;
+ size_t chroma_offset;
+ dma_addr_t dst_dma;
+
+ dst_buf = hantro_get_dst_buf(ctx);
+ dst_dma = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
+ chroma_offset = ctx->dst_fmt.plane_fmt[0].bytesperline *
+ ctx->dst_fmt.height;
+
+ if (down_scale) {
+ hantro_reg_write(vpu, &g2_down_scale_e, 1);
+ hantro_reg_write(vpu, &g2_down_scale_y, down_scale >> 2);
+ hantro_reg_write(vpu, &g2_down_scale_x, down_scale >> 2);
+ hantro_write_addr(vpu, G2_DS_DST, dst_dma);
+ hantro_write_addr(vpu, G2_DS_DST_CHR, dst_dma + (chroma_offset >> down_scale));
+ } else {
+ hantro_write_addr(vpu, G2_RS_OUT_LUMA_ADDR, dst_dma);
+ hantro_write_addr(vpu, G2_RS_OUT_CHROMA_ADDR, dst_dma + chroma_offset);
+ }
+
+ out_depth = hantro_get_format_depth(ctx->dst_fmt.pixelformat);
+ if (ctx->dev->variant->legacy_regs) {
+ u8 pp_shift = 0;
+
+ if (out_depth > 8)
+ pp_shift = 16 - out_depth;
+
+ hantro_reg_write(ctx->dev, &g2_rs_out_bit_depth, out_depth);
+ hantro_reg_write(ctx->dev, &g2_pp_pix_shift, pp_shift);
+ } else {
+ hantro_reg_write(vpu, &g2_output_8_bits, out_depth > 8 ? 0 : 1);
+ hantro_reg_write(vpu, &g2_output_format, out_depth > 8 ? 1 : 0);
+ }
+ hantro_reg_write(vpu, &g2_out_rs_e, 1);
+}
+
+static int hantro_postproc_g2_enum_framesizes(struct hantro_ctx *ctx,
+ struct v4l2_frmsizeenum *fsize)
+{
+ /**
+ * G2 scaler can scale down by 0, 2, 4 or 8
+ * use fsize->index has power of 2 diviser
+ **/
+ if (fsize->index > 3)
+ return -EINVAL;
+
+ if (!ctx->src_fmt.width || !ctx->src_fmt.height)
+ return -EINVAL;
+
+ fsize->type = V4L2_FRMSIZE_TYPE_DISCRETE;
+ fsize->discrete.width = ctx->src_fmt.width >> fsize->index;
+ fsize->discrete.height = ctx->src_fmt.height >> fsize->index;
+
+ return 0;
+}
+
+void hantro_postproc_free(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ unsigned int i;
+
+ for (i = 0; i < VB2_MAX_FRAME; ++i) {
+ struct hantro_aux_buf *priv = &ctx->postproc.dec_q[i];
+
+ if (priv->cpu) {
+ dma_free_attrs(vpu->dev, priv->size, priv->cpu,
+ priv->dma, priv->attrs);
+ priv->cpu = NULL;
+ }
+ }
+}
+
+int hantro_postproc_alloc(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
+ struct vb2_queue *cap_queue = &m2m_ctx->cap_q_ctx.q;
+ unsigned int num_buffers = cap_queue->num_buffers;
+ struct v4l2_pix_format_mplane pix_mp;
+ const struct hantro_fmt *fmt;
+ unsigned int i, buf_size;
+
+ /* this should always pick native format */
+ fmt = hantro_get_default_fmt(ctx, false);
+ if (!fmt)
+ return -EINVAL;
+ v4l2_fill_pixfmt_mp(&pix_mp, fmt->fourcc, ctx->src_fmt.width,
+ ctx->src_fmt.height);
+
+ buf_size = pix_mp.plane_fmt[0].sizeimage;
+ if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_H264_SLICE)
+ buf_size += hantro_h264_mv_size(pix_mp.width,
+ pix_mp.height);
+ else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_VP9_FRAME)
+ buf_size += hantro_vp9_mv_size(pix_mp.width,
+ pix_mp.height);
+ else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_HEVC_SLICE)
+ buf_size += hantro_hevc_mv_size(pix_mp.width,
+ pix_mp.height);
+
+ for (i = 0; i < num_buffers; ++i) {
+ struct hantro_aux_buf *priv = &ctx->postproc.dec_q[i];
+
+ /*
+ * The buffers on this queue are meant as intermediate
+ * buffers for the decoder, so no mapping is needed.
+ */
+ priv->attrs = DMA_ATTR_NO_KERNEL_MAPPING;
+ priv->cpu = dma_alloc_attrs(vpu->dev, buf_size, &priv->dma,
+ GFP_KERNEL, priv->attrs);
+ if (!priv->cpu)
+ return -ENOMEM;
+ priv->size = buf_size;
+ }
+ return 0;
+}
+
+static void hantro_postproc_g1_disable(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+
+ HANTRO_PP_REG_WRITE_S(vpu, pipeline_en, 0x0);
+}
+
+static void hantro_postproc_g2_disable(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+
+ hantro_reg_write(vpu, &g2_out_rs_e, 0);
+}
+
+void hantro_postproc_disable(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+
+ if (vpu->variant->postproc_ops && vpu->variant->postproc_ops->disable)
+ vpu->variant->postproc_ops->disable(ctx);
+}
+
+void hantro_postproc_enable(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+
+ if (vpu->variant->postproc_ops && vpu->variant->postproc_ops->enable)
+ vpu->variant->postproc_ops->enable(ctx);
+}
+
+int hanto_postproc_enum_framesizes(struct hantro_ctx *ctx,
+ struct v4l2_frmsizeenum *fsize)
+{
+ struct hantro_dev *vpu = ctx->dev;
+
+ if (vpu->variant->postproc_ops && vpu->variant->postproc_ops->enum_framesizes)
+ return vpu->variant->postproc_ops->enum_framesizes(ctx, fsize);
+
+ return -EINVAL;
+}
+
+const struct hantro_postproc_ops hantro_g1_postproc_ops = {
+ .enable = hantro_postproc_g1_enable,
+ .disable = hantro_postproc_g1_disable,
+};
+
+const struct hantro_postproc_ops hantro_g2_postproc_ops = {
+ .enable = hantro_postproc_g2_enable,
+ .disable = hantro_postproc_g2_disable,
+ .enum_framesizes = hantro_postproc_g2_enum_framesizes,
+};
diff --git a/drivers/media/platform/verisilicon/hantro_v4l2.c b/drivers/media/platform/verisilicon/hantro_v4l2.c
new file mode 100644
index 000000000..b2da48936
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_v4l2.c
@@ -0,0 +1,996 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hantro VPU codec driver
+ *
+ * Copyright (C) 2018 Collabora, Ltd.
+ * Copyright (C) 2018 Rockchip Electronics Co., Ltd.
+ * Alpha Lin <Alpha.Lin@rock-chips.com>
+ * Jeffy Chen <jeffy.chen@rock-chips.com>
+ *
+ * Copyright 2018 Google LLC.
+ * Tomasz Figa <tfiga@chromium.org>
+ *
+ * Based on s5p-mfc driver by Samsung Electronics Co., Ltd.
+ * Copyright (C) 2010-2011 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/pm_runtime.h>
+#include <linux/videodev2.h>
+#include <linux/workqueue.h>
+#include <media/v4l2-ctrls.h>
+#include <media/v4l2-event.h>
+#include <media/v4l2-mem2mem.h>
+
+#include "hantro.h"
+#include "hantro_hw.h"
+#include "hantro_v4l2.h"
+
+static int hantro_set_fmt_out(struct hantro_ctx *ctx,
+ struct v4l2_pix_format_mplane *pix_mp);
+static int hantro_set_fmt_cap(struct hantro_ctx *ctx,
+ struct v4l2_pix_format_mplane *pix_mp);
+
+static const struct hantro_fmt *
+hantro_get_formats(const struct hantro_ctx *ctx, unsigned int *num_fmts)
+{
+ const struct hantro_fmt *formats;
+
+ if (ctx->is_encoder) {
+ formats = ctx->dev->variant->enc_fmts;
+ *num_fmts = ctx->dev->variant->num_enc_fmts;
+ } else {
+ formats = ctx->dev->variant->dec_fmts;
+ *num_fmts = ctx->dev->variant->num_dec_fmts;
+ }
+
+ return formats;
+}
+
+static const struct hantro_fmt *
+hantro_get_postproc_formats(const struct hantro_ctx *ctx,
+ unsigned int *num_fmts)
+{
+ struct hantro_dev *vpu = ctx->dev;
+
+ if (ctx->is_encoder || !vpu->variant->postproc_fmts) {
+ *num_fmts = 0;
+ return NULL;
+ }
+
+ *num_fmts = ctx->dev->variant->num_postproc_fmts;
+ return ctx->dev->variant->postproc_fmts;
+}
+
+int hantro_get_format_depth(u32 fourcc)
+{
+ switch (fourcc) {
+ case V4L2_PIX_FMT_P010:
+ case V4L2_PIX_FMT_P010_4L4:
+ return 10;
+ default:
+ return 8;
+ }
+}
+
+static bool
+hantro_check_depth_match(const struct hantro_ctx *ctx,
+ const struct hantro_fmt *fmt)
+{
+ int fmt_depth, ctx_depth = 8;
+
+ if (!fmt->match_depth && !fmt->postprocessed)
+ return true;
+
+ /* 0 means default depth, which is 8 */
+ if (ctx->bit_depth)
+ ctx_depth = ctx->bit_depth;
+
+ fmt_depth = hantro_get_format_depth(fmt->fourcc);
+
+ /*
+ * Allow only downconversion for postproc formats for now.
+ * It may be possible to relax that on some HW.
+ */
+ if (!fmt->match_depth)
+ return fmt_depth <= ctx_depth;
+
+ return fmt_depth == ctx_depth;
+}
+
+static const struct hantro_fmt *
+hantro_find_format(const struct hantro_ctx *ctx, u32 fourcc)
+{
+ const struct hantro_fmt *formats;
+ unsigned int i, num_fmts;
+
+ formats = hantro_get_formats(ctx, &num_fmts);
+ for (i = 0; i < num_fmts; i++)
+ if (formats[i].fourcc == fourcc)
+ return &formats[i];
+
+ formats = hantro_get_postproc_formats(ctx, &num_fmts);
+ for (i = 0; i < num_fmts; i++)
+ if (formats[i].fourcc == fourcc)
+ return &formats[i];
+ return NULL;
+}
+
+const struct hantro_fmt *
+hantro_get_default_fmt(const struct hantro_ctx *ctx, bool bitstream)
+{
+ const struct hantro_fmt *formats;
+ unsigned int i, num_fmts;
+
+ formats = hantro_get_formats(ctx, &num_fmts);
+ for (i = 0; i < num_fmts; i++) {
+ if (bitstream == (formats[i].codec_mode !=
+ HANTRO_MODE_NONE) &&
+ hantro_check_depth_match(ctx, &formats[i]))
+ return &formats[i];
+ }
+ return NULL;
+}
+
+static int vidioc_querycap(struct file *file, void *priv,
+ struct v4l2_capability *cap)
+{
+ struct hantro_dev *vpu = video_drvdata(file);
+ struct video_device *vdev = video_devdata(file);
+
+ strscpy(cap->driver, vpu->dev->driver->name, sizeof(cap->driver));
+ strscpy(cap->card, vdev->name, sizeof(cap->card));
+ snprintf(cap->bus_info, sizeof(cap->bus_info), "platform: %s",
+ vpu->dev->driver->name);
+ return 0;
+}
+
+static int vidioc_enum_framesizes(struct file *file, void *priv,
+ struct v4l2_frmsizeenum *fsize)
+{
+ struct hantro_ctx *ctx = fh_to_ctx(priv);
+ const struct hantro_fmt *fmt;
+
+ fmt = hantro_find_format(ctx, fsize->pixel_format);
+ if (!fmt) {
+ vpu_debug(0, "unsupported bitstream format (%08x)\n",
+ fsize->pixel_format);
+ return -EINVAL;
+ }
+
+ /* For non-coded formats check if postprocessing scaling is possible */
+ if (fmt->codec_mode == HANTRO_MODE_NONE) {
+ if (hantro_needs_postproc(ctx, fmt))
+ return hanto_postproc_enum_framesizes(ctx, fsize);
+ else
+ return -ENOTTY;
+ } else if (fsize->index != 0) {
+ vpu_debug(0, "invalid frame size index (expected 0, got %d)\n",
+ fsize->index);
+ return -EINVAL;
+ }
+
+ fsize->type = V4L2_FRMSIZE_TYPE_STEPWISE;
+ fsize->stepwise = fmt->frmsize;
+
+ return 0;
+}
+
+static int vidioc_enum_fmt(struct file *file, void *priv,
+ struct v4l2_fmtdesc *f, bool capture)
+
+{
+ struct hantro_ctx *ctx = fh_to_ctx(priv);
+ const struct hantro_fmt *fmt, *formats;
+ unsigned int num_fmts, i, j = 0;
+ bool skip_mode_none;
+
+ /*
+ * When dealing with an encoder:
+ * - on the capture side we want to filter out all MODE_NONE formats.
+ * - on the output side we want to filter out all formats that are
+ * not MODE_NONE.
+ * When dealing with a decoder:
+ * - on the capture side we want to filter out all formats that are
+ * not MODE_NONE.
+ * - on the output side we want to filter out all MODE_NONE formats.
+ */
+ skip_mode_none = capture == ctx->is_encoder;
+
+ formats = hantro_get_formats(ctx, &num_fmts);
+ for (i = 0; i < num_fmts; i++) {
+ bool mode_none = formats[i].codec_mode == HANTRO_MODE_NONE;
+ fmt = &formats[i];
+
+ if (skip_mode_none == mode_none)
+ continue;
+ if (!hantro_check_depth_match(ctx, fmt))
+ continue;
+ if (j == f->index) {
+ f->pixelformat = fmt->fourcc;
+ return 0;
+ }
+ ++j;
+ }
+
+ /*
+ * Enumerate post-processed formats. As per the specification,
+ * we enumerated these formats after natively decoded formats
+ * as a hint for applications on what's the preferred fomat.
+ */
+ if (!capture)
+ return -EINVAL;
+ formats = hantro_get_postproc_formats(ctx, &num_fmts);
+ for (i = 0; i < num_fmts; i++) {
+ fmt = &formats[i];
+
+ if (!hantro_check_depth_match(ctx, fmt))
+ continue;
+ if (j == f->index) {
+ f->pixelformat = fmt->fourcc;
+ return 0;
+ }
+ ++j;
+ }
+
+ return -EINVAL;
+}
+
+static int vidioc_enum_fmt_vid_cap(struct file *file, void *priv,
+ struct v4l2_fmtdesc *f)
+{
+ return vidioc_enum_fmt(file, priv, f, true);
+}
+
+static int vidioc_enum_fmt_vid_out(struct file *file, void *priv,
+ struct v4l2_fmtdesc *f)
+{
+ return vidioc_enum_fmt(file, priv, f, false);
+}
+
+static int vidioc_g_fmt_out_mplane(struct file *file, void *priv,
+ struct v4l2_format *f)
+{
+ struct v4l2_pix_format_mplane *pix_mp = &f->fmt.pix_mp;
+ struct hantro_ctx *ctx = fh_to_ctx(priv);
+
+ vpu_debug(4, "f->type = %d\n", f->type);
+
+ *pix_mp = ctx->src_fmt;
+
+ return 0;
+}
+
+static int vidioc_g_fmt_cap_mplane(struct file *file, void *priv,
+ struct v4l2_format *f)
+{
+ struct v4l2_pix_format_mplane *pix_mp = &f->fmt.pix_mp;
+ struct hantro_ctx *ctx = fh_to_ctx(priv);
+
+ vpu_debug(4, "f->type = %d\n", f->type);
+
+ *pix_mp = ctx->dst_fmt;
+
+ return 0;
+}
+
+static int hantro_try_fmt(const struct hantro_ctx *ctx,
+ struct v4l2_pix_format_mplane *pix_mp,
+ enum v4l2_buf_type type)
+{
+ const struct hantro_fmt *fmt, *vpu_fmt;
+ bool capture = V4L2_TYPE_IS_CAPTURE(type);
+ bool coded;
+
+ coded = capture == ctx->is_encoder;
+
+ vpu_debug(4, "trying format %c%c%c%c\n",
+ (pix_mp->pixelformat & 0x7f),
+ (pix_mp->pixelformat >> 8) & 0x7f,
+ (pix_mp->pixelformat >> 16) & 0x7f,
+ (pix_mp->pixelformat >> 24) & 0x7f);
+
+ fmt = hantro_find_format(ctx, pix_mp->pixelformat);
+ if (!fmt) {
+ fmt = hantro_get_default_fmt(ctx, coded);
+ pix_mp->pixelformat = fmt->fourcc;
+ }
+
+ if (coded) {
+ pix_mp->num_planes = 1;
+ vpu_fmt = fmt;
+ } else if (ctx->is_encoder) {
+ vpu_fmt = ctx->vpu_dst_fmt;
+ } else {
+ vpu_fmt = fmt;
+ /*
+ * Width/height on the CAPTURE end of a decoder are ignored and
+ * replaced by the OUTPUT ones.
+ */
+ pix_mp->width = ctx->src_fmt.width;
+ pix_mp->height = ctx->src_fmt.height;
+ }
+
+ pix_mp->field = V4L2_FIELD_NONE;
+
+ v4l2_apply_frmsize_constraints(&pix_mp->width, &pix_mp->height,
+ &vpu_fmt->frmsize);
+
+ if (!coded) {
+ /* Fill remaining fields */
+ v4l2_fill_pixfmt_mp(pix_mp, fmt->fourcc, pix_mp->width,
+ pix_mp->height);
+ if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_H264_SLICE &&
+ !hantro_needs_postproc(ctx, fmt))
+ pix_mp->plane_fmt[0].sizeimage +=
+ hantro_h264_mv_size(pix_mp->width,
+ pix_mp->height);
+ else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_VP9_FRAME &&
+ !hantro_needs_postproc(ctx, fmt))
+ pix_mp->plane_fmt[0].sizeimage +=
+ hantro_vp9_mv_size(pix_mp->width,
+ pix_mp->height);
+ else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_HEVC_SLICE &&
+ !hantro_needs_postproc(ctx, fmt))
+ pix_mp->plane_fmt[0].sizeimage +=
+ hantro_hevc_mv_size(pix_mp->width,
+ pix_mp->height);
+ } else if (!pix_mp->plane_fmt[0].sizeimage) {
+ /*
+ * For coded formats the application can specify
+ * sizeimage. If the application passes a zero sizeimage,
+ * let's default to the maximum frame size.
+ */
+ pix_mp->plane_fmt[0].sizeimage = fmt->header_size +
+ pix_mp->width * pix_mp->height * fmt->max_depth;
+ }
+
+ return 0;
+}
+
+static int vidioc_try_fmt_cap_mplane(struct file *file, void *priv,
+ struct v4l2_format *f)
+{
+ return hantro_try_fmt(fh_to_ctx(priv), &f->fmt.pix_mp, f->type);
+}
+
+static int vidioc_try_fmt_out_mplane(struct file *file, void *priv,
+ struct v4l2_format *f)
+{
+ return hantro_try_fmt(fh_to_ctx(priv), &f->fmt.pix_mp, f->type);
+}
+
+static void
+hantro_reset_fmt(struct v4l2_pix_format_mplane *fmt,
+ const struct hantro_fmt *vpu_fmt)
+{
+ memset(fmt, 0, sizeof(*fmt));
+
+ fmt->pixelformat = vpu_fmt->fourcc;
+ fmt->field = V4L2_FIELD_NONE;
+ fmt->colorspace = V4L2_COLORSPACE_JPEG;
+ fmt->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
+ fmt->quantization = V4L2_QUANTIZATION_DEFAULT;
+ fmt->xfer_func = V4L2_XFER_FUNC_DEFAULT;
+}
+
+static void
+hantro_reset_encoded_fmt(struct hantro_ctx *ctx)
+{
+ const struct hantro_fmt *vpu_fmt;
+ struct v4l2_pix_format_mplane *fmt;
+
+ vpu_fmt = hantro_get_default_fmt(ctx, true);
+
+ if (ctx->is_encoder) {
+ ctx->vpu_dst_fmt = vpu_fmt;
+ fmt = &ctx->dst_fmt;
+ } else {
+ ctx->vpu_src_fmt = vpu_fmt;
+ fmt = &ctx->src_fmt;
+ }
+
+ hantro_reset_fmt(fmt, vpu_fmt);
+ fmt->width = vpu_fmt->frmsize.min_width;
+ fmt->height = vpu_fmt->frmsize.min_height;
+ if (ctx->is_encoder)
+ hantro_set_fmt_cap(ctx, fmt);
+ else
+ hantro_set_fmt_out(ctx, fmt);
+}
+
+static void
+hantro_reset_raw_fmt(struct hantro_ctx *ctx)
+{
+ const struct hantro_fmt *raw_vpu_fmt;
+ struct v4l2_pix_format_mplane *raw_fmt, *encoded_fmt;
+
+ raw_vpu_fmt = hantro_get_default_fmt(ctx, false);
+
+ if (ctx->is_encoder) {
+ ctx->vpu_src_fmt = raw_vpu_fmt;
+ raw_fmt = &ctx->src_fmt;
+ encoded_fmt = &ctx->dst_fmt;
+ } else {
+ ctx->vpu_dst_fmt = raw_vpu_fmt;
+ raw_fmt = &ctx->dst_fmt;
+ encoded_fmt = &ctx->src_fmt;
+ }
+
+ hantro_reset_fmt(raw_fmt, raw_vpu_fmt);
+ raw_fmt->width = encoded_fmt->width;
+ raw_fmt->height = encoded_fmt->height;
+ if (ctx->is_encoder)
+ hantro_set_fmt_out(ctx, raw_fmt);
+ else
+ hantro_set_fmt_cap(ctx, raw_fmt);
+}
+
+void hantro_reset_fmts(struct hantro_ctx *ctx)
+{
+ hantro_reset_encoded_fmt(ctx);
+ hantro_reset_raw_fmt(ctx);
+}
+
+static void
+hantro_update_requires_request(struct hantro_ctx *ctx, u32 fourcc)
+{
+ switch (fourcc) {
+ case V4L2_PIX_FMT_JPEG:
+ ctx->fh.m2m_ctx->out_q_ctx.q.requires_requests = false;
+ break;
+ case V4L2_PIX_FMT_MPEG2_SLICE:
+ case V4L2_PIX_FMT_VP8_FRAME:
+ case V4L2_PIX_FMT_H264_SLICE:
+ case V4L2_PIX_FMT_HEVC_SLICE:
+ case V4L2_PIX_FMT_VP9_FRAME:
+ ctx->fh.m2m_ctx->out_q_ctx.q.requires_requests = true;
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+hantro_update_requires_hold_capture_buf(struct hantro_ctx *ctx, u32 fourcc)
+{
+ struct vb2_queue *vq;
+
+ vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx,
+ V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE);
+
+ switch (fourcc) {
+ case V4L2_PIX_FMT_JPEG:
+ case V4L2_PIX_FMT_MPEG2_SLICE:
+ case V4L2_PIX_FMT_VP8_FRAME:
+ case V4L2_PIX_FMT_HEVC_SLICE:
+ case V4L2_PIX_FMT_VP9_FRAME:
+ vq->subsystem_flags &= ~(VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF);
+ break;
+ case V4L2_PIX_FMT_H264_SLICE:
+ vq->subsystem_flags |= VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF;
+ break;
+ default:
+ break;
+ }
+}
+
+static int hantro_set_fmt_out(struct hantro_ctx *ctx,
+ struct v4l2_pix_format_mplane *pix_mp)
+{
+ struct vb2_queue *vq;
+ int ret;
+
+ vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx,
+ V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE);
+ ret = hantro_try_fmt(ctx, pix_mp, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE);
+ if (ret)
+ return ret;
+
+ if (!ctx->is_encoder) {
+ struct vb2_queue *peer_vq;
+
+ /*
+ * In order to support dynamic resolution change,
+ * the decoder admits a resolution change, as long
+ * as the pixelformat remains. Can't be done if streaming.
+ */
+ if (vb2_is_streaming(vq) || (vb2_is_busy(vq) &&
+ pix_mp->pixelformat != ctx->src_fmt.pixelformat))
+ return -EBUSY;
+ /*
+ * Since format change on the OUTPUT queue will reset
+ * the CAPTURE queue, we can't allow doing so
+ * when the CAPTURE queue has buffers allocated.
+ */
+ peer_vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx,
+ V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
+ if (vb2_is_busy(peer_vq))
+ return -EBUSY;
+ } else {
+ /*
+ * The encoder doesn't admit a format change if
+ * there are OUTPUT buffers allocated.
+ */
+ if (vb2_is_busy(vq))
+ return -EBUSY;
+ }
+
+ ctx->vpu_src_fmt = hantro_find_format(ctx, pix_mp->pixelformat);
+ ctx->src_fmt = *pix_mp;
+
+ /*
+ * Current raw format might have become invalid with newly
+ * selected codec, so reset it to default just to be safe and
+ * keep internal driver state sane. User is mandated to set
+ * the raw format again after we return, so we don't need
+ * anything smarter.
+ * Note that hantro_reset_raw_fmt() also propagates size
+ * changes to the raw format.
+ */
+ if (!ctx->is_encoder)
+ hantro_reset_raw_fmt(ctx);
+
+ /* Colorimetry information are always propagated. */
+ ctx->dst_fmt.colorspace = pix_mp->colorspace;
+ ctx->dst_fmt.ycbcr_enc = pix_mp->ycbcr_enc;
+ ctx->dst_fmt.xfer_func = pix_mp->xfer_func;
+ ctx->dst_fmt.quantization = pix_mp->quantization;
+
+ hantro_update_requires_request(ctx, pix_mp->pixelformat);
+ hantro_update_requires_hold_capture_buf(ctx, pix_mp->pixelformat);
+
+ vpu_debug(0, "OUTPUT codec mode: %d\n", ctx->vpu_src_fmt->codec_mode);
+ vpu_debug(0, "fmt - w: %d, h: %d\n",
+ pix_mp->width, pix_mp->height);
+ return 0;
+}
+
+static int hantro_set_fmt_cap(struct hantro_ctx *ctx,
+ struct v4l2_pix_format_mplane *pix_mp)
+{
+ struct vb2_queue *vq;
+ int ret;
+
+ /* Change not allowed if queue is busy. */
+ vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx,
+ V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
+ if (vb2_is_busy(vq))
+ return -EBUSY;
+
+ if (ctx->is_encoder) {
+ struct vb2_queue *peer_vq;
+
+ /*
+ * Since format change on the CAPTURE queue will reset
+ * the OUTPUT queue, we can't allow doing so
+ * when the OUTPUT queue has buffers allocated.
+ */
+ peer_vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx,
+ V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE);
+ if (vb2_is_busy(peer_vq) &&
+ (pix_mp->pixelformat != ctx->dst_fmt.pixelformat ||
+ pix_mp->height != ctx->dst_fmt.height ||
+ pix_mp->width != ctx->dst_fmt.width))
+ return -EBUSY;
+ }
+
+ ret = hantro_try_fmt(ctx, pix_mp, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
+ if (ret)
+ return ret;
+
+ ctx->vpu_dst_fmt = hantro_find_format(ctx, pix_mp->pixelformat);
+ ctx->dst_fmt = *pix_mp;
+
+ /*
+ * Current raw format might have become invalid with newly
+ * selected codec, so reset it to default just to be safe and
+ * keep internal driver state sane. User is mandated to set
+ * the raw format again after we return, so we don't need
+ * anything smarter.
+ * Note that hantro_reset_raw_fmt() also propagates size
+ * changes to the raw format.
+ */
+ if (ctx->is_encoder)
+ hantro_reset_raw_fmt(ctx);
+
+ /* Colorimetry information are always propagated. */
+ ctx->src_fmt.colorspace = pix_mp->colorspace;
+ ctx->src_fmt.ycbcr_enc = pix_mp->ycbcr_enc;
+ ctx->src_fmt.xfer_func = pix_mp->xfer_func;
+ ctx->src_fmt.quantization = pix_mp->quantization;
+
+ vpu_debug(0, "CAPTURE codec mode: %d\n", ctx->vpu_dst_fmt->codec_mode);
+ vpu_debug(0, "fmt - w: %d, h: %d\n",
+ pix_mp->width, pix_mp->height);
+
+ hantro_update_requires_request(ctx, pix_mp->pixelformat);
+
+ return 0;
+}
+
+static int
+vidioc_s_fmt_out_mplane(struct file *file, void *priv, struct v4l2_format *f)
+{
+ return hantro_set_fmt_out(fh_to_ctx(priv), &f->fmt.pix_mp);
+}
+
+static int
+vidioc_s_fmt_cap_mplane(struct file *file, void *priv, struct v4l2_format *f)
+{
+ return hantro_set_fmt_cap(fh_to_ctx(priv), &f->fmt.pix_mp);
+}
+
+static int vidioc_g_selection(struct file *file, void *priv,
+ struct v4l2_selection *sel)
+{
+ struct hantro_ctx *ctx = fh_to_ctx(priv);
+
+ /* Crop only supported on source. */
+ if (!ctx->is_encoder ||
+ sel->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
+ return -EINVAL;
+
+ switch (sel->target) {
+ case V4L2_SEL_TGT_CROP_DEFAULT:
+ case V4L2_SEL_TGT_CROP_BOUNDS:
+ sel->r.top = 0;
+ sel->r.left = 0;
+ sel->r.width = ctx->src_fmt.width;
+ sel->r.height = ctx->src_fmt.height;
+ break;
+ case V4L2_SEL_TGT_CROP:
+ sel->r.top = 0;
+ sel->r.left = 0;
+ sel->r.width = ctx->dst_fmt.width;
+ sel->r.height = ctx->dst_fmt.height;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int vidioc_s_selection(struct file *file, void *priv,
+ struct v4l2_selection *sel)
+{
+ struct hantro_ctx *ctx = fh_to_ctx(priv);
+ struct v4l2_rect *rect = &sel->r;
+ struct vb2_queue *vq;
+
+ /* Crop only supported on source. */
+ if (!ctx->is_encoder ||
+ sel->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
+ return -EINVAL;
+
+ /* Change not allowed if the queue is streaming. */
+ vq = v4l2_m2m_get_src_vq(ctx->fh.m2m_ctx);
+ if (vb2_is_streaming(vq))
+ return -EBUSY;
+
+ if (sel->target != V4L2_SEL_TGT_CROP)
+ return -EINVAL;
+
+ /*
+ * We do not support offsets, and we can crop only inside
+ * right-most or bottom-most macroblocks.
+ */
+ if (rect->left != 0 || rect->top != 0 ||
+ round_up(rect->width, MB_DIM) != ctx->src_fmt.width ||
+ round_up(rect->height, MB_DIM) != ctx->src_fmt.height) {
+ /* Default to full frame for incorrect settings. */
+ rect->left = 0;
+ rect->top = 0;
+ rect->width = ctx->src_fmt.width;
+ rect->height = ctx->src_fmt.height;
+ } else {
+ /* We support widths aligned to 4 pixels and arbitrary heights. */
+ rect->width = round_up(rect->width, 4);
+ }
+
+ ctx->dst_fmt.width = rect->width;
+ ctx->dst_fmt.height = rect->height;
+
+ return 0;
+}
+
+static const struct v4l2_event hantro_eos_event = {
+ .type = V4L2_EVENT_EOS
+};
+
+static int vidioc_encoder_cmd(struct file *file, void *priv,
+ struct v4l2_encoder_cmd *ec)
+{
+ struct hantro_ctx *ctx = fh_to_ctx(priv);
+ int ret;
+
+ ret = v4l2_m2m_ioctl_try_encoder_cmd(file, priv, ec);
+ if (ret < 0)
+ return ret;
+
+ if (!vb2_is_streaming(v4l2_m2m_get_src_vq(ctx->fh.m2m_ctx)) ||
+ !vb2_is_streaming(v4l2_m2m_get_dst_vq(ctx->fh.m2m_ctx)))
+ return 0;
+
+ ret = v4l2_m2m_ioctl_encoder_cmd(file, priv, ec);
+ if (ret < 0)
+ return ret;
+
+ if (ec->cmd == V4L2_ENC_CMD_STOP &&
+ v4l2_m2m_has_stopped(ctx->fh.m2m_ctx))
+ v4l2_event_queue_fh(&ctx->fh, &hantro_eos_event);
+
+ if (ec->cmd == V4L2_ENC_CMD_START)
+ vb2_clear_last_buffer_dequeued(&ctx->fh.m2m_ctx->cap_q_ctx.q);
+
+ return 0;
+}
+
+const struct v4l2_ioctl_ops hantro_ioctl_ops = {
+ .vidioc_querycap = vidioc_querycap,
+ .vidioc_enum_framesizes = vidioc_enum_framesizes,
+
+ .vidioc_try_fmt_vid_cap_mplane = vidioc_try_fmt_cap_mplane,
+ .vidioc_try_fmt_vid_out_mplane = vidioc_try_fmt_out_mplane,
+ .vidioc_s_fmt_vid_out_mplane = vidioc_s_fmt_out_mplane,
+ .vidioc_s_fmt_vid_cap_mplane = vidioc_s_fmt_cap_mplane,
+ .vidioc_g_fmt_vid_out_mplane = vidioc_g_fmt_out_mplane,
+ .vidioc_g_fmt_vid_cap_mplane = vidioc_g_fmt_cap_mplane,
+ .vidioc_enum_fmt_vid_out = vidioc_enum_fmt_vid_out,
+ .vidioc_enum_fmt_vid_cap = vidioc_enum_fmt_vid_cap,
+
+ .vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs,
+ .vidioc_querybuf = v4l2_m2m_ioctl_querybuf,
+ .vidioc_qbuf = v4l2_m2m_ioctl_qbuf,
+ .vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf,
+ .vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf,
+ .vidioc_create_bufs = v4l2_m2m_ioctl_create_bufs,
+ .vidioc_expbuf = v4l2_m2m_ioctl_expbuf,
+
+ .vidioc_subscribe_event = v4l2_ctrl_subscribe_event,
+ .vidioc_unsubscribe_event = v4l2_event_unsubscribe,
+
+ .vidioc_streamon = v4l2_m2m_ioctl_streamon,
+ .vidioc_streamoff = v4l2_m2m_ioctl_streamoff,
+
+ .vidioc_g_selection = vidioc_g_selection,
+ .vidioc_s_selection = vidioc_s_selection,
+
+ .vidioc_decoder_cmd = v4l2_m2m_ioctl_stateless_decoder_cmd,
+ .vidioc_try_decoder_cmd = v4l2_m2m_ioctl_stateless_try_decoder_cmd,
+
+ .vidioc_try_encoder_cmd = v4l2_m2m_ioctl_try_encoder_cmd,
+ .vidioc_encoder_cmd = vidioc_encoder_cmd,
+};
+
+static int
+hantro_queue_setup(struct vb2_queue *vq, unsigned int *num_buffers,
+ unsigned int *num_planes, unsigned int sizes[],
+ struct device *alloc_devs[])
+{
+ struct hantro_ctx *ctx = vb2_get_drv_priv(vq);
+ struct v4l2_pix_format_mplane *pixfmt;
+ int i;
+
+ switch (vq->type) {
+ case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE:
+ pixfmt = &ctx->dst_fmt;
+ break;
+ case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
+ pixfmt = &ctx->src_fmt;
+ break;
+ default:
+ vpu_err("invalid queue type: %d\n", vq->type);
+ return -EINVAL;
+ }
+
+ if (*num_planes) {
+ if (*num_planes != pixfmt->num_planes)
+ return -EINVAL;
+ for (i = 0; i < pixfmt->num_planes; ++i)
+ if (sizes[i] < pixfmt->plane_fmt[i].sizeimage)
+ return -EINVAL;
+ return 0;
+ }
+
+ *num_planes = pixfmt->num_planes;
+ for (i = 0; i < pixfmt->num_planes; ++i)
+ sizes[i] = pixfmt->plane_fmt[i].sizeimage;
+ return 0;
+}
+
+static int
+hantro_buf_plane_check(struct vb2_buffer *vb,
+ struct v4l2_pix_format_mplane *pixfmt)
+{
+ unsigned int sz;
+ int i;
+
+ for (i = 0; i < pixfmt->num_planes; ++i) {
+ sz = pixfmt->plane_fmt[i].sizeimage;
+ vpu_debug(4, "plane %d size: %ld, sizeimage: %u\n",
+ i, vb2_plane_size(vb, i), sz);
+ if (vb2_plane_size(vb, i) < sz) {
+ vpu_err("plane %d is too small for output\n", i);
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+static int hantro_buf_prepare(struct vb2_buffer *vb)
+{
+ struct vb2_queue *vq = vb->vb2_queue;
+ struct hantro_ctx *ctx = vb2_get_drv_priv(vq);
+ struct v4l2_pix_format_mplane *pix_fmt;
+ int ret;
+
+ if (V4L2_TYPE_IS_OUTPUT(vq->type))
+ pix_fmt = &ctx->src_fmt;
+ else
+ pix_fmt = &ctx->dst_fmt;
+ ret = hantro_buf_plane_check(vb, pix_fmt);
+ if (ret)
+ return ret;
+ /*
+ * Buffer's bytesused must be written by driver for CAPTURE buffers.
+ * (for OUTPUT buffers, if userspace passes 0 bytesused, v4l2-core sets
+ * it to buffer length).
+ */
+ if (V4L2_TYPE_IS_CAPTURE(vq->type)) {
+ if (ctx->is_encoder)
+ vb2_set_plane_payload(vb, 0, 0);
+ else
+ vb2_set_plane_payload(vb, 0, pix_fmt->plane_fmt[0].sizeimage);
+ }
+
+ return 0;
+}
+
+static void hantro_buf_queue(struct vb2_buffer *vb)
+{
+ struct hantro_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
+ struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
+
+ if (V4L2_TYPE_IS_CAPTURE(vb->vb2_queue->type) &&
+ vb2_is_streaming(vb->vb2_queue) &&
+ v4l2_m2m_dst_buf_is_last(ctx->fh.m2m_ctx)) {
+ unsigned int i;
+
+ for (i = 0; i < vb->num_planes; i++)
+ vb2_set_plane_payload(vb, i, 0);
+
+ vbuf->field = V4L2_FIELD_NONE;
+ vbuf->sequence = ctx->sequence_cap++;
+
+ v4l2_m2m_last_buffer_done(ctx->fh.m2m_ctx, vbuf);
+ v4l2_event_queue_fh(&ctx->fh, &hantro_eos_event);
+ return;
+ }
+
+ v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vbuf);
+}
+
+static bool hantro_vq_is_coded(struct vb2_queue *q)
+{
+ struct hantro_ctx *ctx = vb2_get_drv_priv(q);
+
+ return ctx->is_encoder != V4L2_TYPE_IS_OUTPUT(q->type);
+}
+
+static int hantro_start_streaming(struct vb2_queue *q, unsigned int count)
+{
+ struct hantro_ctx *ctx = vb2_get_drv_priv(q);
+ int ret = 0;
+
+ v4l2_m2m_update_start_streaming_state(ctx->fh.m2m_ctx, q);
+
+ if (V4L2_TYPE_IS_OUTPUT(q->type))
+ ctx->sequence_out = 0;
+ else
+ ctx->sequence_cap = 0;
+
+ if (hantro_vq_is_coded(q)) {
+ enum hantro_codec_mode codec_mode;
+
+ if (V4L2_TYPE_IS_OUTPUT(q->type))
+ codec_mode = ctx->vpu_src_fmt->codec_mode;
+ else
+ codec_mode = ctx->vpu_dst_fmt->codec_mode;
+
+ vpu_debug(4, "Codec mode = %d\n", codec_mode);
+ ctx->codec_ops = &ctx->dev->variant->codec_ops[codec_mode];
+ if (ctx->codec_ops->init) {
+ ret = ctx->codec_ops->init(ctx);
+ if (ret)
+ return ret;
+ }
+
+ if (hantro_needs_postproc(ctx, ctx->vpu_dst_fmt)) {
+ ret = hantro_postproc_alloc(ctx);
+ if (ret)
+ goto err_codec_exit;
+ }
+ }
+ return ret;
+
+err_codec_exit:
+ if (ctx->codec_ops->exit)
+ ctx->codec_ops->exit(ctx);
+ return ret;
+}
+
+static void
+hantro_return_bufs(struct vb2_queue *q,
+ struct vb2_v4l2_buffer *(*buf_remove)(struct v4l2_m2m_ctx *))
+{
+ struct hantro_ctx *ctx = vb2_get_drv_priv(q);
+
+ for (;;) {
+ struct vb2_v4l2_buffer *vbuf;
+
+ vbuf = buf_remove(ctx->fh.m2m_ctx);
+ if (!vbuf)
+ break;
+ v4l2_ctrl_request_complete(vbuf->vb2_buf.req_obj.req,
+ &ctx->ctrl_handler);
+ v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_ERROR);
+ }
+}
+
+static void hantro_stop_streaming(struct vb2_queue *q)
+{
+ struct hantro_ctx *ctx = vb2_get_drv_priv(q);
+
+ if (hantro_vq_is_coded(q)) {
+ hantro_postproc_free(ctx);
+ if (ctx->codec_ops && ctx->codec_ops->exit)
+ ctx->codec_ops->exit(ctx);
+ }
+
+ /*
+ * The mem2mem framework calls v4l2_m2m_cancel_job before
+ * .stop_streaming, so there isn't any job running and
+ * it is safe to return all the buffers.
+ */
+ if (V4L2_TYPE_IS_OUTPUT(q->type))
+ hantro_return_bufs(q, v4l2_m2m_src_buf_remove);
+ else
+ hantro_return_bufs(q, v4l2_m2m_dst_buf_remove);
+
+ v4l2_m2m_update_stop_streaming_state(ctx->fh.m2m_ctx, q);
+
+ if (V4L2_TYPE_IS_OUTPUT(q->type) &&
+ v4l2_m2m_has_stopped(ctx->fh.m2m_ctx))
+ v4l2_event_queue_fh(&ctx->fh, &hantro_eos_event);
+}
+
+static void hantro_buf_request_complete(struct vb2_buffer *vb)
+{
+ struct hantro_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
+
+ v4l2_ctrl_request_complete(vb->req_obj.req, &ctx->ctrl_handler);
+}
+
+static int hantro_buf_out_validate(struct vb2_buffer *vb)
+{
+ struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
+
+ vbuf->field = V4L2_FIELD_NONE;
+ return 0;
+}
+
+const struct vb2_ops hantro_queue_ops = {
+ .queue_setup = hantro_queue_setup,
+ .buf_prepare = hantro_buf_prepare,
+ .buf_queue = hantro_buf_queue,
+ .buf_out_validate = hantro_buf_out_validate,
+ .buf_request_complete = hantro_buf_request_complete,
+ .start_streaming = hantro_start_streaming,
+ .stop_streaming = hantro_stop_streaming,
+ .wait_prepare = vb2_ops_wait_prepare,
+ .wait_finish = vb2_ops_wait_finish,
+};
diff --git a/drivers/media/platform/verisilicon/hantro_v4l2.h b/drivers/media/platform/verisilicon/hantro_v4l2.h
new file mode 100644
index 000000000..64f6f57e9
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_v4l2.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Hantro VPU codec driver
+ *
+ * Copyright (C) 2018 Rockchip Electronics Co., Ltd.
+ * Alpha Lin <Alpha.Lin@rock-chips.com>
+ * Jeffy Chen <jeffy.chen@rock-chips.com>
+ *
+ * Copyright 2018 Google LLC.
+ * Tomasz Figa <tfiga@chromium.org>
+ *
+ * Based on s5p-mfc driver by Samsung Electronics Co., Ltd.
+ * Copyright (C) 2011 Samsung Electronics Co., Ltd.
+ */
+
+#ifndef HANTRO_V4L2_H_
+#define HANTRO_V4L2_H_
+
+#include "hantro.h"
+
+extern const struct v4l2_ioctl_ops hantro_ioctl_ops;
+extern const struct vb2_ops hantro_queue_ops;
+
+void hantro_reset_fmts(struct hantro_ctx *ctx);
+int hantro_get_format_depth(u32 fourcc);
+const struct hantro_fmt *
+hantro_get_default_fmt(const struct hantro_ctx *ctx, bool bitstream);
+
+#endif /* HANTRO_V4L2_H_ */
diff --git a/drivers/media/platform/verisilicon/hantro_vp8.c b/drivers/media/platform/verisilicon/hantro_vp8.c
new file mode 100644
index 000000000..381bc1d3b
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_vp8.c
@@ -0,0 +1,201 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hantro VPU codec driver
+ *
+ * Copyright (C) 2018 Rockchip Electronics Co., Ltd.
+ */
+
+#include "hantro.h"
+
+/*
+ * probs table with packed
+ */
+struct vp8_prob_tbl_packed {
+ u8 prob_mb_skip_false;
+ u8 prob_intra;
+ u8 prob_ref_last;
+ u8 prob_ref_golden;
+ u8 prob_segment[3];
+ u8 padding0;
+
+ u8 prob_luma_16x16_pred_mode[4];
+ u8 prob_chroma_pred_mode[3];
+ u8 padding1;
+
+ /* mv prob */
+ u8 prob_mv_context[2][V4L2_VP8_MV_PROB_CNT];
+ u8 padding2[2];
+
+ /* coeff probs */
+ u8 prob_coeffs[4][8][3][V4L2_VP8_COEFF_PROB_CNT];
+ u8 padding3[96];
+};
+
+/*
+ * filter taps taken to 7-bit precision,
+ * reference RFC6386#Page-16, filters[8][6]
+ */
+const u32 hantro_vp8_dec_mc_filter[8][6] = {
+ { 0, 0, 128, 0, 0, 0 },
+ { 0, -6, 123, 12, -1, 0 },
+ { 2, -11, 108, 36, -8, 1 },
+ { 0, -9, 93, 50, -6, 0 },
+ { 3, -16, 77, 77, -16, 3 },
+ { 0, -6, 50, 93, -9, 0 },
+ { 1, -8, 36, 108, -11, 2 },
+ { 0, -1, 12, 123, -6, 0 }
+};
+
+void hantro_vp8_prob_update(struct hantro_ctx *ctx,
+ const struct v4l2_ctrl_vp8_frame *hdr)
+{
+ const struct v4l2_vp8_entropy *entropy = &hdr->entropy;
+ u32 i, j, k;
+ u8 *dst;
+
+ /* first probs */
+ dst = ctx->vp8_dec.prob_tbl.cpu;
+
+ dst[0] = hdr->prob_skip_false;
+ dst[1] = hdr->prob_intra;
+ dst[2] = hdr->prob_last;
+ dst[3] = hdr->prob_gf;
+ dst[4] = hdr->segment.segment_probs[0];
+ dst[5] = hdr->segment.segment_probs[1];
+ dst[6] = hdr->segment.segment_probs[2];
+ dst[7] = 0;
+
+ dst += 8;
+ dst[0] = entropy->y_mode_probs[0];
+ dst[1] = entropy->y_mode_probs[1];
+ dst[2] = entropy->y_mode_probs[2];
+ dst[3] = entropy->y_mode_probs[3];
+ dst[4] = entropy->uv_mode_probs[0];
+ dst[5] = entropy->uv_mode_probs[1];
+ dst[6] = entropy->uv_mode_probs[2];
+ dst[7] = 0; /*unused */
+
+ /* mv probs */
+ dst += 8;
+ dst[0] = entropy->mv_probs[0][0]; /* is short */
+ dst[1] = entropy->mv_probs[1][0];
+ dst[2] = entropy->mv_probs[0][1]; /* sign */
+ dst[3] = entropy->mv_probs[1][1];
+ dst[4] = entropy->mv_probs[0][8 + 9];
+ dst[5] = entropy->mv_probs[0][9 + 9];
+ dst[6] = entropy->mv_probs[1][8 + 9];
+ dst[7] = entropy->mv_probs[1][9 + 9];
+ dst += 8;
+ for (i = 0; i < 2; ++i) {
+ for (j = 0; j < 8; j += 4) {
+ dst[0] = entropy->mv_probs[i][j + 9 + 0];
+ dst[1] = entropy->mv_probs[i][j + 9 + 1];
+ dst[2] = entropy->mv_probs[i][j + 9 + 2];
+ dst[3] = entropy->mv_probs[i][j + 9 + 3];
+ dst += 4;
+ }
+ }
+ for (i = 0; i < 2; ++i) {
+ dst[0] = entropy->mv_probs[i][0 + 2];
+ dst[1] = entropy->mv_probs[i][1 + 2];
+ dst[2] = entropy->mv_probs[i][2 + 2];
+ dst[3] = entropy->mv_probs[i][3 + 2];
+ dst[4] = entropy->mv_probs[i][4 + 2];
+ dst[5] = entropy->mv_probs[i][5 + 2];
+ dst[6] = entropy->mv_probs[i][6 + 2];
+ dst[7] = 0; /*unused */
+ dst += 8;
+ }
+
+ /* coeff probs (header part) */
+ dst = ctx->vp8_dec.prob_tbl.cpu;
+ dst += (8 * 7);
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 8; ++j) {
+ for (k = 0; k < 3; ++k) {
+ dst[0] = entropy->coeff_probs[i][j][k][0];
+ dst[1] = entropy->coeff_probs[i][j][k][1];
+ dst[2] = entropy->coeff_probs[i][j][k][2];
+ dst[3] = entropy->coeff_probs[i][j][k][3];
+ dst += 4;
+ }
+ }
+ }
+
+ /* coeff probs (footer part) */
+ dst = ctx->vp8_dec.prob_tbl.cpu;
+ dst += (8 * 55);
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 8; ++j) {
+ for (k = 0; k < 3; ++k) {
+ dst[0] = entropy->coeff_probs[i][j][k][4];
+ dst[1] = entropy->coeff_probs[i][j][k][5];
+ dst[2] = entropy->coeff_probs[i][j][k][6];
+ dst[3] = entropy->coeff_probs[i][j][k][7];
+ dst[4] = entropy->coeff_probs[i][j][k][8];
+ dst[5] = entropy->coeff_probs[i][j][k][9];
+ dst[6] = entropy->coeff_probs[i][j][k][10];
+ dst[7] = 0; /*unused */
+ dst += 8;
+ }
+ }
+ }
+}
+
+int hantro_vp8_dec_init(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ struct hantro_aux_buf *aux_buf;
+ unsigned int mb_width, mb_height;
+ size_t segment_map_size;
+ int ret;
+
+ /* segment map table size calculation */
+ mb_width = DIV_ROUND_UP(ctx->dst_fmt.width, 16);
+ mb_height = DIV_ROUND_UP(ctx->dst_fmt.height, 16);
+ segment_map_size = round_up(DIV_ROUND_UP(mb_width * mb_height, 4), 64);
+
+ /*
+ * In context init the dma buffer for segment map must be allocated.
+ * And the data in segment map buffer must be set to all zero.
+ */
+ aux_buf = &ctx->vp8_dec.segment_map;
+ aux_buf->size = segment_map_size;
+ aux_buf->cpu = dma_alloc_coherent(vpu->dev, aux_buf->size,
+ &aux_buf->dma, GFP_KERNEL);
+ if (!aux_buf->cpu)
+ return -ENOMEM;
+
+ /*
+ * Allocate probability table buffer,
+ * total 1208 bytes, 4K page is far enough.
+ */
+ aux_buf = &ctx->vp8_dec.prob_tbl;
+ aux_buf->size = sizeof(struct vp8_prob_tbl_packed);
+ aux_buf->cpu = dma_alloc_coherent(vpu->dev, aux_buf->size,
+ &aux_buf->dma, GFP_KERNEL);
+ if (!aux_buf->cpu) {
+ ret = -ENOMEM;
+ goto err_free_seg_map;
+ }
+
+ return 0;
+
+err_free_seg_map:
+ dma_free_coherent(vpu->dev, ctx->vp8_dec.segment_map.size,
+ ctx->vp8_dec.segment_map.cpu,
+ ctx->vp8_dec.segment_map.dma);
+
+ return ret;
+}
+
+void hantro_vp8_dec_exit(struct hantro_ctx *ctx)
+{
+ struct hantro_vp8_dec_hw_ctx *vp8_dec = &ctx->vp8_dec;
+ struct hantro_dev *vpu = ctx->dev;
+
+ dma_free_coherent(vpu->dev, vp8_dec->segment_map.size,
+ vp8_dec->segment_map.cpu, vp8_dec->segment_map.dma);
+ dma_free_coherent(vpu->dev, vp8_dec->prob_tbl.size,
+ vp8_dec->prob_tbl.cpu, vp8_dec->prob_tbl.dma);
+}
diff --git a/drivers/media/platform/verisilicon/hantro_vp9.c b/drivers/media/platform/verisilicon/hantro_vp9.c
new file mode 100644
index 000000000..566cd376c
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_vp9.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hantro VP9 codec driver
+ *
+ * Copyright (C) 2021 Collabora Ltd.
+ */
+
+#include <linux/types.h>
+#include <media/v4l2-mem2mem.h>
+
+#include "hantro.h"
+#include "hantro_hw.h"
+#include "hantro_vp9.h"
+
+#define POW2(x) (1 << (x))
+
+#define MAX_LOG2_TILE_COLUMNS 6
+#define MAX_NUM_TILE_COLS POW2(MAX_LOG2_TILE_COLUMNS)
+#define MAX_TILE_COLS 20
+#define MAX_TILE_ROWS 22
+
+static size_t hantro_vp9_tile_filter_size(unsigned int height)
+{
+ u32 h, height32, size;
+
+ h = roundup(height, 8);
+
+ height32 = roundup(h, 64);
+ size = 24 * height32 * (MAX_NUM_TILE_COLS - 1); /* luma: 8, chroma: 8 + 8 */
+
+ return size;
+}
+
+static size_t hantro_vp9_bsd_control_size(unsigned int height)
+{
+ u32 h, height32;
+
+ h = roundup(height, 8);
+ height32 = roundup(h, 64);
+
+ return 16 * (height32 / 4) * (MAX_NUM_TILE_COLS - 1);
+}
+
+static size_t hantro_vp9_segment_map_size(unsigned int width, unsigned int height)
+{
+ u32 w, h;
+ int num_ctbs;
+
+ w = roundup(width, 8);
+ h = roundup(height, 8);
+ num_ctbs = ((w + 63) / 64) * ((h + 63) / 64);
+
+ return num_ctbs * 32;
+}
+
+static inline size_t hantro_vp9_prob_tab_size(void)
+{
+ return roundup(sizeof(struct hantro_g2_all_probs), 16);
+}
+
+static inline size_t hantro_vp9_count_tab_size(void)
+{
+ return roundup(sizeof(struct symbol_counts), 16);
+}
+
+static inline size_t hantro_vp9_tile_info_size(void)
+{
+ return roundup((MAX_TILE_COLS * MAX_TILE_ROWS * 4 * sizeof(u16) + 15 + 16) & ~0xf, 16);
+}
+
+static void *get_coeffs_arr(struct symbol_counts *cnts, int i, int j, int k, int l, int m)
+{
+ if (i == 0)
+ return &cnts->count_coeffs[j][k][l][m];
+
+ if (i == 1)
+ return &cnts->count_coeffs8x8[j][k][l][m];
+
+ if (i == 2)
+ return &cnts->count_coeffs16x16[j][k][l][m];
+
+ if (i == 3)
+ return &cnts->count_coeffs32x32[j][k][l][m];
+
+ return NULL;
+}
+
+static void *get_eobs1(struct symbol_counts *cnts, int i, int j, int k, int l, int m)
+{
+ if (i == 0)
+ return &cnts->count_coeffs[j][k][l][m][3];
+
+ if (i == 1)
+ return &cnts->count_coeffs8x8[j][k][l][m][3];
+
+ if (i == 2)
+ return &cnts->count_coeffs16x16[j][k][l][m][3];
+
+ if (i == 3)
+ return &cnts->count_coeffs32x32[j][k][l][m][3];
+
+ return NULL;
+}
+
+#define INNER_LOOP \
+ do { \
+ for (m = 0; m < ARRAY_SIZE(vp9_ctx->cnts.coeff[i][0][0][0]); ++m) { \
+ vp9_ctx->cnts.coeff[i][j][k][l][m] = \
+ get_coeffs_arr(cnts, i, j, k, l, m); \
+ vp9_ctx->cnts.eob[i][j][k][l][m][0] = \
+ &cnts->count_eobs[i][j][k][l][m]; \
+ vp9_ctx->cnts.eob[i][j][k][l][m][1] = \
+ get_eobs1(cnts, i, j, k, l, m); \
+ } \
+ } while (0)
+
+static void init_v4l2_vp9_count_tbl(struct hantro_ctx *ctx)
+{
+ struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec;
+ struct symbol_counts *cnts = vp9_ctx->misc.cpu + vp9_ctx->ctx_counters_offset;
+ int i, j, k, l, m;
+
+ vp9_ctx->cnts.partition = &cnts->partition_counts;
+ vp9_ctx->cnts.skip = &cnts->mbskip_count;
+ vp9_ctx->cnts.intra_inter = &cnts->intra_inter_count;
+ vp9_ctx->cnts.tx32p = &cnts->tx32x32_count;
+ /*
+ * g2 hardware uses tx16x16_count[2][3], while the api
+ * expects tx16p[2][4], so this must be explicitly copied
+ * into vp9_ctx->cnts.tx16p when passing the data to the
+ * vp9 library function
+ */
+ vp9_ctx->cnts.tx8p = &cnts->tx8x8_count;
+
+ vp9_ctx->cnts.y_mode = &cnts->sb_ymode_counts;
+ vp9_ctx->cnts.uv_mode = &cnts->uv_mode_counts;
+ vp9_ctx->cnts.comp = &cnts->comp_inter_count;
+ vp9_ctx->cnts.comp_ref = &cnts->comp_ref_count;
+ vp9_ctx->cnts.single_ref = &cnts->single_ref_count;
+ vp9_ctx->cnts.filter = &cnts->switchable_interp_counts;
+ vp9_ctx->cnts.mv_joint = &cnts->mv_counts.joints;
+ vp9_ctx->cnts.sign = &cnts->mv_counts.sign;
+ vp9_ctx->cnts.classes = &cnts->mv_counts.classes;
+ vp9_ctx->cnts.class0 = &cnts->mv_counts.class0;
+ vp9_ctx->cnts.bits = &cnts->mv_counts.bits;
+ vp9_ctx->cnts.class0_fp = &cnts->mv_counts.class0_fp;
+ vp9_ctx->cnts.fp = &cnts->mv_counts.fp;
+ vp9_ctx->cnts.class0_hp = &cnts->mv_counts.class0_hp;
+ vp9_ctx->cnts.hp = &cnts->mv_counts.hp;
+
+ for (i = 0; i < ARRAY_SIZE(vp9_ctx->cnts.coeff); ++i)
+ for (j = 0; j < ARRAY_SIZE(vp9_ctx->cnts.coeff[i]); ++j)
+ for (k = 0; k < ARRAY_SIZE(vp9_ctx->cnts.coeff[i][0]); ++k)
+ for (l = 0; l < ARRAY_SIZE(vp9_ctx->cnts.coeff[i][0][0]); ++l)
+ INNER_LOOP;
+}
+
+int hantro_vp9_dec_init(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ const struct hantro_variant *variant = vpu->variant;
+ struct hantro_vp9_dec_hw_ctx *vp9_dec = &ctx->vp9_dec;
+ struct hantro_aux_buf *tile_edge = &vp9_dec->tile_edge;
+ struct hantro_aux_buf *segment_map = &vp9_dec->segment_map;
+ struct hantro_aux_buf *misc = &vp9_dec->misc;
+ u32 i, max_width, max_height, size;
+
+ if (variant->num_dec_fmts < 1)
+ return -EINVAL;
+
+ for (i = 0; i < variant->num_dec_fmts; ++i)
+ if (variant->dec_fmts[i].fourcc == V4L2_PIX_FMT_VP9_FRAME)
+ break;
+
+ if (i == variant->num_dec_fmts)
+ return -EINVAL;
+
+ max_width = vpu->variant->dec_fmts[i].frmsize.max_width;
+ max_height = vpu->variant->dec_fmts[i].frmsize.max_height;
+
+ size = hantro_vp9_tile_filter_size(max_height);
+ vp9_dec->bsd_ctrl_offset = size;
+ size += hantro_vp9_bsd_control_size(max_height);
+
+ tile_edge->cpu = dma_alloc_coherent(vpu->dev, size, &tile_edge->dma, GFP_KERNEL);
+ if (!tile_edge->cpu)
+ return -ENOMEM;
+
+ tile_edge->size = size;
+ memset(tile_edge->cpu, 0, size);
+
+ size = hantro_vp9_segment_map_size(max_width, max_height);
+ vp9_dec->segment_map_size = size;
+ size *= 2; /* we need two areas of this size, used alternately */
+
+ segment_map->cpu = dma_alloc_coherent(vpu->dev, size, &segment_map->dma, GFP_KERNEL);
+ if (!segment_map->cpu)
+ goto err_segment_map;
+
+ segment_map->size = size;
+ memset(segment_map->cpu, 0, size);
+
+ size = hantro_vp9_prob_tab_size();
+ vp9_dec->ctx_counters_offset = size;
+ size += hantro_vp9_count_tab_size();
+ vp9_dec->tile_info_offset = size;
+ size += hantro_vp9_tile_info_size();
+
+ misc->cpu = dma_alloc_coherent(vpu->dev, size, &misc->dma, GFP_KERNEL);
+ if (!misc->cpu)
+ goto err_misc;
+
+ misc->size = size;
+ memset(misc->cpu, 0, size);
+
+ init_v4l2_vp9_count_tbl(ctx);
+
+ return 0;
+
+err_misc:
+ dma_free_coherent(vpu->dev, segment_map->size, segment_map->cpu, segment_map->dma);
+
+err_segment_map:
+ dma_free_coherent(vpu->dev, tile_edge->size, tile_edge->cpu, tile_edge->dma);
+
+ return -ENOMEM;
+}
+
+void hantro_vp9_dec_exit(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ struct hantro_vp9_dec_hw_ctx *vp9_dec = &ctx->vp9_dec;
+ struct hantro_aux_buf *tile_edge = &vp9_dec->tile_edge;
+ struct hantro_aux_buf *segment_map = &vp9_dec->segment_map;
+ struct hantro_aux_buf *misc = &vp9_dec->misc;
+
+ dma_free_coherent(vpu->dev, misc->size, misc->cpu, misc->dma);
+ dma_free_coherent(vpu->dev, segment_map->size, segment_map->cpu, segment_map->dma);
+ dma_free_coherent(vpu->dev, tile_edge->size, tile_edge->cpu, tile_edge->dma);
+}
diff --git a/drivers/media/platform/verisilicon/hantro_vp9.h b/drivers/media/platform/verisilicon/hantro_vp9.h
new file mode 100644
index 000000000..26b69275f
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_vp9.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Hantro VP9 codec driver
+ *
+ * Copyright (C) 2021 Collabora Ltd.
+ */
+
+struct hantro_g2_mv_probs {
+ u8 joint[3];
+ u8 sign[2];
+ u8 class0_bit[2][1];
+ u8 fr[2][3];
+ u8 class0_hp[2];
+ u8 hp[2];
+ u8 classes[2][10];
+ u8 class0_fr[2][2][3];
+ u8 bits[2][10];
+};
+
+struct hantro_g2_probs {
+ u8 inter_mode[7][4];
+ u8 is_inter[4];
+ u8 uv_mode[10][8];
+ u8 tx8[2][1];
+ u8 tx16[2][2];
+ u8 tx32[2][3];
+ u8 y_mode_tail[4][1];
+ u8 y_mode[4][8];
+ u8 partition[2][16][4]; /* [keyframe][][], [inter][][] */
+ u8 uv_mode_tail[10][1];
+ u8 interp_filter[4][2];
+ u8 comp_mode[5];
+ u8 skip[3];
+
+ u8 pad1[1];
+
+ struct hantro_g2_mv_probs mv;
+
+ u8 single_ref[5][2];
+ u8 comp_ref[5];
+
+ u8 pad2[17];
+
+ u8 coef[4][2][2][6][6][4];
+};
+
+struct hantro_g2_all_probs {
+ u8 kf_y_mode_prob[10][10][8];
+
+ u8 kf_y_mode_prob_tail[10][10][1];
+ u8 ref_pred_probs[3];
+ u8 mb_segment_tree_probs[7];
+ u8 segment_pred_probs[3];
+ u8 ref_scores[4];
+ u8 prob_comppred[2];
+
+ u8 pad1[9];
+
+ u8 kf_uv_mode_prob[10][8];
+ u8 kf_uv_mode_prob_tail[10][1];
+
+ u8 pad2[6];
+
+ struct hantro_g2_probs probs;
+};
+
+struct mv_counts {
+ u32 joints[4];
+ u32 sign[2][2];
+ u32 classes[2][11];
+ u32 class0[2][2];
+ u32 bits[2][10][2];
+ u32 class0_fp[2][2][4];
+ u32 fp[2][4];
+ u32 class0_hp[2][2];
+ u32 hp[2][2];
+};
+
+struct symbol_counts {
+ u32 inter_mode_counts[7][3][2];
+ u32 sb_ymode_counts[4][10];
+ u32 uv_mode_counts[10][10];
+ u32 partition_counts[16][4];
+ u32 switchable_interp_counts[4][3];
+ u32 intra_inter_count[4][2];
+ u32 comp_inter_count[5][2];
+ u32 single_ref_count[5][2][2];
+ u32 comp_ref_count[5][2];
+ u32 tx32x32_count[2][4];
+ u32 tx16x16_count[2][3];
+ u32 tx8x8_count[2][2];
+ u32 mbskip_count[3][2];
+
+ struct mv_counts mv_counts;
+
+ u32 count_coeffs[2][2][6][6][4];
+ u32 count_coeffs8x8[2][2][6][6][4];
+ u32 count_coeffs16x16[2][2][6][6][4];
+ u32 count_coeffs32x32[2][2][6][6][4];
+
+ u32 count_eobs[4][2][2][6][6];
+};
diff --git a/drivers/media/platform/verisilicon/imx8m_vpu_hw.c b/drivers/media/platform/verisilicon/imx8m_vpu_hw.c
new file mode 100644
index 000000000..b390228fd
--- /dev/null
+++ b/drivers/media/platform/verisilicon/imx8m_vpu_hw.c
@@ -0,0 +1,400 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hantro VPU codec driver
+ *
+ * Copyright (C) 2019 Pengutronix, Philipp Zabel <kernel@pengutronix.de>
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+
+#include "hantro.h"
+#include "hantro_jpeg.h"
+#include "hantro_g1_regs.h"
+#include "hantro_g2_regs.h"
+
+#define CTRL_SOFT_RESET 0x00
+#define RESET_G1 BIT(1)
+#define RESET_G2 BIT(0)
+
+#define CTRL_CLOCK_ENABLE 0x04
+#define CLOCK_G1 BIT(1)
+#define CLOCK_G2 BIT(0)
+
+#define CTRL_G1_DEC_FUSE 0x08
+#define CTRL_G1_PP_FUSE 0x0c
+#define CTRL_G2_DEC_FUSE 0x10
+
+static void imx8m_soft_reset(struct hantro_dev *vpu, u32 reset_bits)
+{
+ u32 val;
+
+ /* Assert */
+ val = readl(vpu->ctrl_base + CTRL_SOFT_RESET);
+ val &= ~reset_bits;
+ writel(val, vpu->ctrl_base + CTRL_SOFT_RESET);
+
+ udelay(2);
+
+ /* Release */
+ val = readl(vpu->ctrl_base + CTRL_SOFT_RESET);
+ val |= reset_bits;
+ writel(val, vpu->ctrl_base + CTRL_SOFT_RESET);
+}
+
+static void imx8m_clk_enable(struct hantro_dev *vpu, u32 clock_bits)
+{
+ u32 val;
+
+ val = readl(vpu->ctrl_base + CTRL_CLOCK_ENABLE);
+ val |= clock_bits;
+ writel(val, vpu->ctrl_base + CTRL_CLOCK_ENABLE);
+}
+
+static int imx8mq_runtime_resume(struct hantro_dev *vpu)
+{
+ int ret;
+
+ ret = clk_bulk_prepare_enable(vpu->variant->num_clocks, vpu->clocks);
+ if (ret) {
+ dev_err(vpu->dev, "Failed to enable clocks\n");
+ return ret;
+ }
+
+ imx8m_soft_reset(vpu, RESET_G1 | RESET_G2);
+ imx8m_clk_enable(vpu, CLOCK_G1 | CLOCK_G2);
+
+ /* Set values of the fuse registers */
+ writel(0xffffffff, vpu->ctrl_base + CTRL_G1_DEC_FUSE);
+ writel(0xffffffff, vpu->ctrl_base + CTRL_G1_PP_FUSE);
+ writel(0xffffffff, vpu->ctrl_base + CTRL_G2_DEC_FUSE);
+
+ clk_bulk_disable_unprepare(vpu->variant->num_clocks, vpu->clocks);
+
+ return 0;
+}
+
+/*
+ * Supported formats.
+ */
+
+static const struct hantro_fmt imx8m_vpu_postproc_fmts[] = {
+ {
+ .fourcc = V4L2_PIX_FMT_YUYV,
+ .codec_mode = HANTRO_MODE_NONE,
+ .postprocessed = true,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_UHD_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_UHD_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+};
+
+static const struct hantro_fmt imx8m_vpu_dec_fmts[] = {
+ {
+ .fourcc = V4L2_PIX_FMT_NV12,
+ .codec_mode = HANTRO_MODE_NONE,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_UHD_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_UHD_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_MPEG2_SLICE,
+ .codec_mode = HANTRO_MODE_MPEG2_DEC,
+ .max_depth = 2,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_FHD_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_FHD_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_VP8_FRAME,
+ .codec_mode = HANTRO_MODE_VP8_DEC,
+ .max_depth = 2,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_UHD_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_UHD_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_H264_SLICE,
+ .codec_mode = HANTRO_MODE_H264_DEC,
+ .max_depth = 2,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_UHD_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_UHD_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+};
+
+static const struct hantro_fmt imx8m_vpu_g2_postproc_fmts[] = {
+ {
+ .fourcc = V4L2_PIX_FMT_NV12,
+ .codec_mode = HANTRO_MODE_NONE,
+ .postprocessed = true,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_UHD_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_UHD_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_P010,
+ .codec_mode = HANTRO_MODE_NONE,
+ .postprocessed = true,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_UHD_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_UHD_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+};
+
+static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = {
+ {
+ .fourcc = V4L2_PIX_FMT_NV12_4L4,
+ .codec_mode = HANTRO_MODE_NONE,
+ .match_depth = true,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_UHD_WIDTH,
+ .step_width = TILE_MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_UHD_HEIGHT,
+ .step_height = TILE_MB_DIM,
+ },
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_P010_4L4,
+ .codec_mode = HANTRO_MODE_NONE,
+ .match_depth = true,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_UHD_WIDTH,
+ .step_width = TILE_MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_UHD_HEIGHT,
+ .step_height = TILE_MB_DIM,
+ },
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_HEVC_SLICE,
+ .codec_mode = HANTRO_MODE_HEVC_DEC,
+ .max_depth = 2,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_UHD_WIDTH,
+ .step_width = TILE_MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_UHD_HEIGHT,
+ .step_height = TILE_MB_DIM,
+ },
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_VP9_FRAME,
+ .codec_mode = HANTRO_MODE_VP9_DEC,
+ .max_depth = 2,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_UHD_WIDTH,
+ .step_width = TILE_MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_UHD_HEIGHT,
+ .step_height = TILE_MB_DIM,
+ },
+ },
+};
+
+static irqreturn_t imx8m_vpu_g1_irq(int irq, void *dev_id)
+{
+ struct hantro_dev *vpu = dev_id;
+ enum vb2_buffer_state state;
+ u32 status;
+
+ status = vdpu_read(vpu, G1_REG_INTERRUPT);
+ state = (status & G1_REG_INTERRUPT_DEC_RDY_INT) ?
+ VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR;
+
+ vdpu_write(vpu, 0, G1_REG_INTERRUPT);
+ vdpu_write(vpu, G1_REG_CONFIG_DEC_CLK_GATE_E, G1_REG_CONFIG);
+
+ hantro_irq_done(vpu, state);
+
+ return IRQ_HANDLED;
+}
+
+static int imx8mq_vpu_hw_init(struct hantro_dev *vpu)
+{
+ vpu->ctrl_base = vpu->reg_bases[vpu->variant->num_regs - 1];
+
+ return 0;
+}
+
+static void imx8m_vpu_g1_reset(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+
+ imx8m_soft_reset(vpu, RESET_G1);
+}
+
+/*
+ * Supported codec ops.
+ */
+
+static const struct hantro_codec_ops imx8mq_vpu_codec_ops[] = {
+ [HANTRO_MODE_MPEG2_DEC] = {
+ .run = hantro_g1_mpeg2_dec_run,
+ .reset = imx8m_vpu_g1_reset,
+ .init = hantro_mpeg2_dec_init,
+ .exit = hantro_mpeg2_dec_exit,
+ },
+ [HANTRO_MODE_VP8_DEC] = {
+ .run = hantro_g1_vp8_dec_run,
+ .reset = imx8m_vpu_g1_reset,
+ .init = hantro_vp8_dec_init,
+ .exit = hantro_vp8_dec_exit,
+ },
+ [HANTRO_MODE_H264_DEC] = {
+ .run = hantro_g1_h264_dec_run,
+ .reset = imx8m_vpu_g1_reset,
+ .init = hantro_h264_dec_init,
+ .exit = hantro_h264_dec_exit,
+ },
+};
+
+static const struct hantro_codec_ops imx8mq_vpu_g1_codec_ops[] = {
+ [HANTRO_MODE_MPEG2_DEC] = {
+ .run = hantro_g1_mpeg2_dec_run,
+ .init = hantro_mpeg2_dec_init,
+ .exit = hantro_mpeg2_dec_exit,
+ },
+ [HANTRO_MODE_VP8_DEC] = {
+ .run = hantro_g1_vp8_dec_run,
+ .init = hantro_vp8_dec_init,
+ .exit = hantro_vp8_dec_exit,
+ },
+ [HANTRO_MODE_H264_DEC] = {
+ .run = hantro_g1_h264_dec_run,
+ .init = hantro_h264_dec_init,
+ .exit = hantro_h264_dec_exit,
+ },
+};
+
+static const struct hantro_codec_ops imx8mq_vpu_g2_codec_ops[] = {
+ [HANTRO_MODE_HEVC_DEC] = {
+ .run = hantro_g2_hevc_dec_run,
+ .init = hantro_hevc_dec_init,
+ .exit = hantro_hevc_dec_exit,
+ },
+ [HANTRO_MODE_VP9_DEC] = {
+ .run = hantro_g2_vp9_dec_run,
+ .done = hantro_g2_vp9_dec_done,
+ .init = hantro_vp9_dec_init,
+ .exit = hantro_vp9_dec_exit,
+ },
+};
+
+/*
+ * VPU variants.
+ */
+
+static const struct hantro_irq imx8mq_irqs[] = {
+ { "g1", imx8m_vpu_g1_irq },
+};
+
+static const struct hantro_irq imx8mq_g2_irqs[] = {
+ { "g2", hantro_g2_irq },
+};
+
+static const char * const imx8mq_clk_names[] = { "g1", "g2", "bus" };
+static const char * const imx8mq_reg_names[] = { "g1", "g2", "ctrl" };
+static const char * const imx8mq_g1_clk_names[] = { "g1" };
+static const char * const imx8mq_g2_clk_names[] = { "g2" };
+
+const struct hantro_variant imx8mq_vpu_variant = {
+ .dec_fmts = imx8m_vpu_dec_fmts,
+ .num_dec_fmts = ARRAY_SIZE(imx8m_vpu_dec_fmts),
+ .postproc_fmts = imx8m_vpu_postproc_fmts,
+ .num_postproc_fmts = ARRAY_SIZE(imx8m_vpu_postproc_fmts),
+ .postproc_ops = &hantro_g1_postproc_ops,
+ .codec = HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER |
+ HANTRO_H264_DECODER,
+ .codec_ops = imx8mq_vpu_codec_ops,
+ .init = imx8mq_vpu_hw_init,
+ .runtime_resume = imx8mq_runtime_resume,
+ .irqs = imx8mq_irqs,
+ .num_irqs = ARRAY_SIZE(imx8mq_irqs),
+ .clk_names = imx8mq_clk_names,
+ .num_clocks = ARRAY_SIZE(imx8mq_clk_names),
+ .reg_names = imx8mq_reg_names,
+ .num_regs = ARRAY_SIZE(imx8mq_reg_names)
+};
+
+const struct hantro_variant imx8mq_vpu_g1_variant = {
+ .dec_fmts = imx8m_vpu_dec_fmts,
+ .num_dec_fmts = ARRAY_SIZE(imx8m_vpu_dec_fmts),
+ .postproc_fmts = imx8m_vpu_postproc_fmts,
+ .num_postproc_fmts = ARRAY_SIZE(imx8m_vpu_postproc_fmts),
+ .postproc_ops = &hantro_g1_postproc_ops,
+ .codec = HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER |
+ HANTRO_H264_DECODER,
+ .codec_ops = imx8mq_vpu_g1_codec_ops,
+ .irqs = imx8mq_irqs,
+ .num_irqs = ARRAY_SIZE(imx8mq_irqs),
+ .clk_names = imx8mq_g1_clk_names,
+ .num_clocks = ARRAY_SIZE(imx8mq_g1_clk_names),
+};
+
+const struct hantro_variant imx8mq_vpu_g2_variant = {
+ .dec_offset = 0x0,
+ .dec_fmts = imx8m_vpu_g2_dec_fmts,
+ .num_dec_fmts = ARRAY_SIZE(imx8m_vpu_g2_dec_fmts),
+ .postproc_fmts = imx8m_vpu_g2_postproc_fmts,
+ .num_postproc_fmts = ARRAY_SIZE(imx8m_vpu_g2_postproc_fmts),
+ .postproc_ops = &hantro_g2_postproc_ops,
+ .codec = HANTRO_HEVC_DECODER | HANTRO_VP9_DECODER,
+ .codec_ops = imx8mq_vpu_g2_codec_ops,
+ .irqs = imx8mq_g2_irqs,
+ .num_irqs = ARRAY_SIZE(imx8mq_g2_irqs),
+ .clk_names = imx8mq_g2_clk_names,
+ .num_clocks = ARRAY_SIZE(imx8mq_g2_clk_names),
+};
+
+const struct hantro_variant imx8mm_vpu_g1_variant = {
+ .dec_fmts = imx8m_vpu_dec_fmts,
+ .num_dec_fmts = ARRAY_SIZE(imx8m_vpu_dec_fmts),
+ .codec = HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER |
+ HANTRO_H264_DECODER,
+ .codec_ops = imx8mq_vpu_g1_codec_ops,
+ .irqs = imx8mq_irqs,
+ .num_irqs = ARRAY_SIZE(imx8mq_irqs),
+ .clk_names = imx8mq_g1_clk_names,
+ .num_clocks = ARRAY_SIZE(imx8mq_g1_clk_names),
+};
diff --git a/drivers/media/platform/verisilicon/rockchip_vpu2_hw_h264_dec.c b/drivers/media/platform/verisilicon/rockchip_vpu2_hw_h264_dec.c
new file mode 100644
index 000000000..46c1a83bc
--- /dev/null
+++ b/drivers/media/platform/verisilicon/rockchip_vpu2_hw_h264_dec.c
@@ -0,0 +1,491 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hantro VPU codec driver
+ *
+ * Copyright (c) 2014 Rockchip Electronics Co., Ltd.
+ * Hertz Wong <hertz.wong@rock-chips.com>
+ * Herman Chen <herman.chen@rock-chips.com>
+ *
+ * Copyright (C) 2014 Google, Inc.
+ * Tomasz Figa <tfiga@chromium.org>
+ */
+
+#include <linux/types.h>
+#include <linux/sort.h>
+
+#include <media/v4l2-mem2mem.h>
+
+#include "hantro_hw.h"
+#include "hantro_v4l2.h"
+
+#define VDPU_SWREG(nr) ((nr) * 4)
+
+#define VDPU_REG_DEC_OUT_BASE VDPU_SWREG(63)
+#define VDPU_REG_RLC_VLC_BASE VDPU_SWREG(64)
+#define VDPU_REG_QTABLE_BASE VDPU_SWREG(61)
+#define VDPU_REG_DIR_MV_BASE VDPU_SWREG(62)
+#define VDPU_REG_REFER_BASE(i) (VDPU_SWREG(84 + (i)))
+#define VDPU_REG_DEC_E(v) ((v) ? BIT(0) : 0)
+
+#define VDPU_REG_DEC_ADV_PRE_DIS(v) ((v) ? BIT(11) : 0)
+#define VDPU_REG_DEC_SCMD_DIS(v) ((v) ? BIT(10) : 0)
+#define VDPU_REG_FILTERING_DIS(v) ((v) ? BIT(8) : 0)
+#define VDPU_REG_PIC_FIXED_QUANT(v) ((v) ? BIT(7) : 0)
+#define VDPU_REG_DEC_LATENCY(v) (((v) << 1) & GENMASK(6, 1))
+
+#define VDPU_REG_INIT_QP(v) (((v) << 25) & GENMASK(30, 25))
+#define VDPU_REG_STREAM_LEN(v) (((v) << 0) & GENMASK(23, 0))
+
+#define VDPU_REG_APF_THRESHOLD(v) (((v) << 17) & GENMASK(30, 17))
+#define VDPU_REG_STARTMB_X(v) (((v) << 8) & GENMASK(16, 8))
+#define VDPU_REG_STARTMB_Y(v) (((v) << 0) & GENMASK(7, 0))
+
+#define VDPU_REG_DEC_MODE(v) (((v) << 0) & GENMASK(3, 0))
+
+#define VDPU_REG_DEC_STRENDIAN_E(v) ((v) ? BIT(5) : 0)
+#define VDPU_REG_DEC_STRSWAP32_E(v) ((v) ? BIT(4) : 0)
+#define VDPU_REG_DEC_OUTSWAP32_E(v) ((v) ? BIT(3) : 0)
+#define VDPU_REG_DEC_INSWAP32_E(v) ((v) ? BIT(2) : 0)
+#define VDPU_REG_DEC_OUT_ENDIAN(v) ((v) ? BIT(1) : 0)
+#define VDPU_REG_DEC_IN_ENDIAN(v) ((v) ? BIT(0) : 0)
+
+#define VDPU_REG_DEC_DATA_DISC_E(v) ((v) ? BIT(22) : 0)
+#define VDPU_REG_DEC_MAX_BURST(v) (((v) << 16) & GENMASK(20, 16))
+#define VDPU_REG_DEC_AXI_WR_ID(v) (((v) << 8) & GENMASK(15, 8))
+#define VDPU_REG_DEC_AXI_RD_ID(v) (((v) << 0) & GENMASK(7, 0))
+
+#define VDPU_REG_START_CODE_E(v) ((v) ? BIT(22) : 0)
+#define VDPU_REG_CH_8PIX_ILEAV_E(v) ((v) ? BIT(21) : 0)
+#define VDPU_REG_RLC_MODE_E(v) ((v) ? BIT(20) : 0)
+#define VDPU_REG_PIC_INTERLACE_E(v) ((v) ? BIT(17) : 0)
+#define VDPU_REG_PIC_FIELDMODE_E(v) ((v) ? BIT(16) : 0)
+#define VDPU_REG_PIC_TOPFIELD_E(v) ((v) ? BIT(13) : 0)
+#define VDPU_REG_WRITE_MVS_E(v) ((v) ? BIT(10) : 0)
+#define VDPU_REG_SEQ_MBAFF_E(v) ((v) ? BIT(7) : 0)
+#define VDPU_REG_PICORD_COUNT_E(v) ((v) ? BIT(6) : 0)
+#define VDPU_REG_DEC_TIMEOUT_E(v) ((v) ? BIT(5) : 0)
+#define VDPU_REG_DEC_CLK_GATE_E(v) ((v) ? BIT(4) : 0)
+
+#define VDPU_REG_PRED_BC_TAP_0_0(v) (((v) << 22) & GENMASK(31, 22))
+#define VDPU_REG_PRED_BC_TAP_0_1(v) (((v) << 12) & GENMASK(21, 12))
+#define VDPU_REG_PRED_BC_TAP_0_2(v) (((v) << 2) & GENMASK(11, 2))
+
+#define VDPU_REG_REFBU_E(v) ((v) ? BIT(31) : 0)
+
+#define VDPU_REG_PINIT_RLIST_F9(v) (((v) << 25) & GENMASK(29, 25))
+#define VDPU_REG_PINIT_RLIST_F8(v) (((v) << 20) & GENMASK(24, 20))
+#define VDPU_REG_PINIT_RLIST_F7(v) (((v) << 15) & GENMASK(19, 15))
+#define VDPU_REG_PINIT_RLIST_F6(v) (((v) << 10) & GENMASK(14, 10))
+#define VDPU_REG_PINIT_RLIST_F5(v) (((v) << 5) & GENMASK(9, 5))
+#define VDPU_REG_PINIT_RLIST_F4(v) (((v) << 0) & GENMASK(4, 0))
+
+#define VDPU_REG_PINIT_RLIST_F15(v) (((v) << 25) & GENMASK(29, 25))
+#define VDPU_REG_PINIT_RLIST_F14(v) (((v) << 20) & GENMASK(24, 20))
+#define VDPU_REG_PINIT_RLIST_F13(v) (((v) << 15) & GENMASK(19, 15))
+#define VDPU_REG_PINIT_RLIST_F12(v) (((v) << 10) & GENMASK(14, 10))
+#define VDPU_REG_PINIT_RLIST_F11(v) (((v) << 5) & GENMASK(9, 5))
+#define VDPU_REG_PINIT_RLIST_F10(v) (((v) << 0) & GENMASK(4, 0))
+
+#define VDPU_REG_REFER1_NBR(v) (((v) << 16) & GENMASK(31, 16))
+#define VDPU_REG_REFER0_NBR(v) (((v) << 0) & GENMASK(15, 0))
+
+#define VDPU_REG_REFER3_NBR(v) (((v) << 16) & GENMASK(31, 16))
+#define VDPU_REG_REFER2_NBR(v) (((v) << 0) & GENMASK(15, 0))
+
+#define VDPU_REG_REFER5_NBR(v) (((v) << 16) & GENMASK(31, 16))
+#define VDPU_REG_REFER4_NBR(v) (((v) << 0) & GENMASK(15, 0))
+
+#define VDPU_REG_REFER7_NBR(v) (((v) << 16) & GENMASK(31, 16))
+#define VDPU_REG_REFER6_NBR(v) (((v) << 0) & GENMASK(15, 0))
+
+#define VDPU_REG_REFER9_NBR(v) (((v) << 16) & GENMASK(31, 16))
+#define VDPU_REG_REFER8_NBR(v) (((v) << 0) & GENMASK(15, 0))
+
+#define VDPU_REG_REFER11_NBR(v) (((v) << 16) & GENMASK(31, 16))
+#define VDPU_REG_REFER10_NBR(v) (((v) << 0) & GENMASK(15, 0))
+
+#define VDPU_REG_REFER13_NBR(v) (((v) << 16) & GENMASK(31, 16))
+#define VDPU_REG_REFER12_NBR(v) (((v) << 0) & GENMASK(15, 0))
+
+#define VDPU_REG_REFER15_NBR(v) (((v) << 16) & GENMASK(31, 16))
+#define VDPU_REG_REFER14_NBR(v) (((v) << 0) & GENMASK(15, 0))
+
+#define VDPU_REG_BINIT_RLIST_F5(v) (((v) << 25) & GENMASK(29, 25))
+#define VDPU_REG_BINIT_RLIST_F4(v) (((v) << 20) & GENMASK(24, 20))
+#define VDPU_REG_BINIT_RLIST_F3(v) (((v) << 15) & GENMASK(19, 15))
+#define VDPU_REG_BINIT_RLIST_F2(v) (((v) << 10) & GENMASK(14, 10))
+#define VDPU_REG_BINIT_RLIST_F1(v) (((v) << 5) & GENMASK(9, 5))
+#define VDPU_REG_BINIT_RLIST_F0(v) (((v) << 0) & GENMASK(4, 0))
+
+#define VDPU_REG_BINIT_RLIST_F11(v) (((v) << 25) & GENMASK(29, 25))
+#define VDPU_REG_BINIT_RLIST_F10(v) (((v) << 20) & GENMASK(24, 20))
+#define VDPU_REG_BINIT_RLIST_F9(v) (((v) << 15) & GENMASK(19, 15))
+#define VDPU_REG_BINIT_RLIST_F8(v) (((v) << 10) & GENMASK(14, 10))
+#define VDPU_REG_BINIT_RLIST_F7(v) (((v) << 5) & GENMASK(9, 5))
+#define VDPU_REG_BINIT_RLIST_F6(v) (((v) << 0) & GENMASK(4, 0))
+
+#define VDPU_REG_BINIT_RLIST_F15(v) (((v) << 15) & GENMASK(19, 15))
+#define VDPU_REG_BINIT_RLIST_F14(v) (((v) << 10) & GENMASK(14, 10))
+#define VDPU_REG_BINIT_RLIST_F13(v) (((v) << 5) & GENMASK(9, 5))
+#define VDPU_REG_BINIT_RLIST_F12(v) (((v) << 0) & GENMASK(4, 0))
+
+#define VDPU_REG_BINIT_RLIST_B5(v) (((v) << 25) & GENMASK(29, 25))
+#define VDPU_REG_BINIT_RLIST_B4(v) (((v) << 20) & GENMASK(24, 20))
+#define VDPU_REG_BINIT_RLIST_B3(v) (((v) << 15) & GENMASK(19, 15))
+#define VDPU_REG_BINIT_RLIST_B2(v) (((v) << 10) & GENMASK(14, 10))
+#define VDPU_REG_BINIT_RLIST_B1(v) (((v) << 5) & GENMASK(9, 5))
+#define VDPU_REG_BINIT_RLIST_B0(v) (((v) << 0) & GENMASK(4, 0))
+
+#define VDPU_REG_BINIT_RLIST_B11(v) (((v) << 25) & GENMASK(29, 25))
+#define VDPU_REG_BINIT_RLIST_B10(v) (((v) << 20) & GENMASK(24, 20))
+#define VDPU_REG_BINIT_RLIST_B9(v) (((v) << 15) & GENMASK(19, 15))
+#define VDPU_REG_BINIT_RLIST_B8(v) (((v) << 10) & GENMASK(14, 10))
+#define VDPU_REG_BINIT_RLIST_B7(v) (((v) << 5) & GENMASK(9, 5))
+#define VDPU_REG_BINIT_RLIST_B6(v) (((v) << 0) & GENMASK(4, 0))
+
+#define VDPU_REG_BINIT_RLIST_B15(v) (((v) << 15) & GENMASK(19, 15))
+#define VDPU_REG_BINIT_RLIST_B14(v) (((v) << 10) & GENMASK(14, 10))
+#define VDPU_REG_BINIT_RLIST_B13(v) (((v) << 5) & GENMASK(9, 5))
+#define VDPU_REG_BINIT_RLIST_B12(v) (((v) << 0) & GENMASK(4, 0))
+
+#define VDPU_REG_PINIT_RLIST_F3(v) (((v) << 15) & GENMASK(19, 15))
+#define VDPU_REG_PINIT_RLIST_F2(v) (((v) << 10) & GENMASK(14, 10))
+#define VDPU_REG_PINIT_RLIST_F1(v) (((v) << 5) & GENMASK(9, 5))
+#define VDPU_REG_PINIT_RLIST_F0(v) (((v) << 0) & GENMASK(4, 0))
+
+#define VDPU_REG_REFER_LTERM_E(v) (((v) << 0) & GENMASK(31, 0))
+
+#define VDPU_REG_REFER_VALID_E(v) (((v) << 0) & GENMASK(31, 0))
+
+#define VDPU_REG_STRM_START_BIT(v) (((v) << 0) & GENMASK(5, 0))
+
+#define VDPU_REG_CH_QP_OFFSET2(v) (((v) << 22) & GENMASK(26, 22))
+#define VDPU_REG_CH_QP_OFFSET(v) (((v) << 17) & GENMASK(21, 17))
+#define VDPU_REG_PIC_MB_HEIGHT_P(v) (((v) << 9) & GENMASK(16, 9))
+#define VDPU_REG_PIC_MB_WIDTH(v) (((v) << 0) & GENMASK(8, 0))
+
+#define VDPU_REG_WEIGHT_BIPR_IDC(v) (((v) << 16) & GENMASK(17, 16))
+#define VDPU_REG_REF_FRAMES(v) (((v) << 0) & GENMASK(4, 0))
+
+#define VDPU_REG_FILT_CTRL_PRES(v) ((v) ? BIT(31) : 0)
+#define VDPU_REG_RDPIC_CNT_PRES(v) ((v) ? BIT(30) : 0)
+#define VDPU_REG_FRAMENUM_LEN(v) (((v) << 16) & GENMASK(20, 16))
+#define VDPU_REG_FRAMENUM(v) (((v) << 0) & GENMASK(15, 0))
+
+#define VDPU_REG_REFPIC_MK_LEN(v) (((v) << 16) & GENMASK(26, 16))
+#define VDPU_REG_IDR_PIC_ID(v) (((v) << 0) & GENMASK(15, 0))
+
+#define VDPU_REG_PPS_ID(v) (((v) << 24) & GENMASK(31, 24))
+#define VDPU_REG_REFIDX1_ACTIVE(v) (((v) << 19) & GENMASK(23, 19))
+#define VDPU_REG_REFIDX0_ACTIVE(v) (((v) << 14) & GENMASK(18, 14))
+#define VDPU_REG_POC_LENGTH(v) (((v) << 0) & GENMASK(7, 0))
+
+#define VDPU_REG_IDR_PIC_E(v) ((v) ? BIT(8) : 0)
+#define VDPU_REG_DIR_8X8_INFER_E(v) ((v) ? BIT(7) : 0)
+#define VDPU_REG_BLACKWHITE_E(v) ((v) ? BIT(6) : 0)
+#define VDPU_REG_CABAC_E(v) ((v) ? BIT(5) : 0)
+#define VDPU_REG_WEIGHT_PRED_E(v) ((v) ? BIT(4) : 0)
+#define VDPU_REG_CONST_INTRA_E(v) ((v) ? BIT(3) : 0)
+#define VDPU_REG_8X8TRANS_FLAG_E(v) ((v) ? BIT(2) : 0)
+#define VDPU_REG_TYPE1_QUANT_E(v) ((v) ? BIT(1) : 0)
+#define VDPU_REG_FIELDPIC_FLAG_E(v) ((v) ? BIT(0) : 0)
+
+static void set_params(struct hantro_ctx *ctx, struct vb2_v4l2_buffer *src_buf)
+{
+ const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls;
+ const struct v4l2_ctrl_h264_decode_params *dec_param = ctrls->decode;
+ const struct v4l2_ctrl_h264_sps *sps = ctrls->sps;
+ const struct v4l2_ctrl_h264_pps *pps = ctrls->pps;
+ struct hantro_dev *vpu = ctx->dev;
+ u32 reg;
+
+ reg = VDPU_REG_DEC_ADV_PRE_DIS(0) |
+ VDPU_REG_DEC_SCMD_DIS(0) |
+ VDPU_REG_FILTERING_DIS(0) |
+ VDPU_REG_PIC_FIXED_QUANT(0) |
+ VDPU_REG_DEC_LATENCY(0);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(50));
+
+ reg = VDPU_REG_INIT_QP(pps->pic_init_qp_minus26 + 26) |
+ VDPU_REG_STREAM_LEN(vb2_get_plane_payload(&src_buf->vb2_buf, 0));
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(51));
+
+ reg = VDPU_REG_APF_THRESHOLD(8) |
+ VDPU_REG_STARTMB_X(0) |
+ VDPU_REG_STARTMB_Y(0);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(52));
+
+ reg = VDPU_REG_DEC_MODE(0);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(53));
+
+ reg = VDPU_REG_DEC_STRENDIAN_E(1) |
+ VDPU_REG_DEC_STRSWAP32_E(1) |
+ VDPU_REG_DEC_OUTSWAP32_E(1) |
+ VDPU_REG_DEC_INSWAP32_E(1) |
+ VDPU_REG_DEC_OUT_ENDIAN(1) |
+ VDPU_REG_DEC_IN_ENDIAN(0);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(54));
+
+ reg = VDPU_REG_DEC_DATA_DISC_E(0) |
+ VDPU_REG_DEC_MAX_BURST(16) |
+ VDPU_REG_DEC_AXI_WR_ID(0) |
+ VDPU_REG_DEC_AXI_RD_ID(0xff);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(56));
+
+ reg = VDPU_REG_START_CODE_E(1) |
+ VDPU_REG_CH_8PIX_ILEAV_E(0) |
+ VDPU_REG_RLC_MODE_E(0) |
+ VDPU_REG_PIC_INTERLACE_E(!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY) &&
+ (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD ||
+ dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC)) |
+ VDPU_REG_PIC_FIELDMODE_E(dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) |
+ VDPU_REG_PIC_TOPFIELD_E(!(dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD)) |
+ VDPU_REG_WRITE_MVS_E((sps->profile_idc > 66) && dec_param->nal_ref_idc) |
+ VDPU_REG_SEQ_MBAFF_E(sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD) |
+ VDPU_REG_PICORD_COUNT_E(sps->profile_idc > 66) |
+ VDPU_REG_DEC_TIMEOUT_E(1) |
+ VDPU_REG_DEC_CLK_GATE_E(1);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(57));
+
+ reg = VDPU_REG_PRED_BC_TAP_0_0(1) |
+ VDPU_REG_PRED_BC_TAP_0_1((u32)-5) |
+ VDPU_REG_PRED_BC_TAP_0_2(20);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(59));
+
+ reg = VDPU_REG_REFBU_E(0);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(65));
+
+ reg = VDPU_REG_STRM_START_BIT(0);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(109));
+
+ reg = VDPU_REG_CH_QP_OFFSET2(pps->second_chroma_qp_index_offset) |
+ VDPU_REG_CH_QP_OFFSET(pps->chroma_qp_index_offset) |
+ VDPU_REG_PIC_MB_HEIGHT_P(MB_HEIGHT(ctx->src_fmt.height)) |
+ VDPU_REG_PIC_MB_WIDTH(MB_WIDTH(ctx->src_fmt.width));
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(110));
+
+ reg = VDPU_REG_WEIGHT_BIPR_IDC(pps->weighted_bipred_idc) |
+ VDPU_REG_REF_FRAMES(sps->max_num_ref_frames);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(111));
+
+ reg = VDPU_REG_FILT_CTRL_PRES(pps->flags & V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT) |
+ VDPU_REG_RDPIC_CNT_PRES(pps->flags & V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT) |
+ VDPU_REG_FRAMENUM_LEN(sps->log2_max_frame_num_minus4 + 4) |
+ VDPU_REG_FRAMENUM(dec_param->frame_num);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(112));
+
+ reg = VDPU_REG_REFPIC_MK_LEN(dec_param->dec_ref_pic_marking_bit_size) |
+ VDPU_REG_IDR_PIC_ID(dec_param->idr_pic_id);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(113));
+
+ reg = VDPU_REG_PPS_ID(pps->pic_parameter_set_id) |
+ VDPU_REG_REFIDX1_ACTIVE(pps->num_ref_idx_l1_default_active_minus1 + 1) |
+ VDPU_REG_REFIDX0_ACTIVE(pps->num_ref_idx_l0_default_active_minus1 + 1) |
+ VDPU_REG_POC_LENGTH(dec_param->pic_order_cnt_bit_size);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(114));
+
+ reg = VDPU_REG_IDR_PIC_E(dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC) |
+ VDPU_REG_DIR_8X8_INFER_E(sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE) |
+ VDPU_REG_BLACKWHITE_E(sps->profile_idc >= 100 && sps->chroma_format_idc == 0) |
+ VDPU_REG_CABAC_E(pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE) |
+ VDPU_REG_WEIGHT_PRED_E(pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED) |
+ VDPU_REG_CONST_INTRA_E(pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED) |
+ VDPU_REG_8X8TRANS_FLAG_E(pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE) |
+ VDPU_REG_TYPE1_QUANT_E(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT) |
+ VDPU_REG_FIELDPIC_FLAG_E(!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY));
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(115));
+}
+
+static void set_ref(struct hantro_ctx *ctx)
+{
+ const struct v4l2_h264_reference *b0_reflist, *b1_reflist, *p_reflist;
+ struct hantro_dev *vpu = ctx->dev;
+ u32 reg;
+ int i;
+
+ b0_reflist = ctx->h264_dec.reflists.b0;
+ b1_reflist = ctx->h264_dec.reflists.b1;
+ p_reflist = ctx->h264_dec.reflists.p;
+
+ reg = VDPU_REG_PINIT_RLIST_F9(p_reflist[9].index) |
+ VDPU_REG_PINIT_RLIST_F8(p_reflist[8].index) |
+ VDPU_REG_PINIT_RLIST_F7(p_reflist[7].index) |
+ VDPU_REG_PINIT_RLIST_F6(p_reflist[6].index) |
+ VDPU_REG_PINIT_RLIST_F5(p_reflist[5].index) |
+ VDPU_REG_PINIT_RLIST_F4(p_reflist[4].index);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(74));
+
+ reg = VDPU_REG_PINIT_RLIST_F15(p_reflist[15].index) |
+ VDPU_REG_PINIT_RLIST_F14(p_reflist[14].index) |
+ VDPU_REG_PINIT_RLIST_F13(p_reflist[13].index) |
+ VDPU_REG_PINIT_RLIST_F12(p_reflist[12].index) |
+ VDPU_REG_PINIT_RLIST_F11(p_reflist[11].index) |
+ VDPU_REG_PINIT_RLIST_F10(p_reflist[10].index);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(75));
+
+ reg = VDPU_REG_REFER1_NBR(hantro_h264_get_ref_nbr(ctx, 1)) |
+ VDPU_REG_REFER0_NBR(hantro_h264_get_ref_nbr(ctx, 0));
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(76));
+
+ reg = VDPU_REG_REFER3_NBR(hantro_h264_get_ref_nbr(ctx, 3)) |
+ VDPU_REG_REFER2_NBR(hantro_h264_get_ref_nbr(ctx, 2));
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(77));
+
+ reg = VDPU_REG_REFER5_NBR(hantro_h264_get_ref_nbr(ctx, 5)) |
+ VDPU_REG_REFER4_NBR(hantro_h264_get_ref_nbr(ctx, 4));
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(78));
+
+ reg = VDPU_REG_REFER7_NBR(hantro_h264_get_ref_nbr(ctx, 7)) |
+ VDPU_REG_REFER6_NBR(hantro_h264_get_ref_nbr(ctx, 6));
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(79));
+
+ reg = VDPU_REG_REFER9_NBR(hantro_h264_get_ref_nbr(ctx, 9)) |
+ VDPU_REG_REFER8_NBR(hantro_h264_get_ref_nbr(ctx, 8));
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(80));
+
+ reg = VDPU_REG_REFER11_NBR(hantro_h264_get_ref_nbr(ctx, 11)) |
+ VDPU_REG_REFER10_NBR(hantro_h264_get_ref_nbr(ctx, 10));
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(81));
+
+ reg = VDPU_REG_REFER13_NBR(hantro_h264_get_ref_nbr(ctx, 13)) |
+ VDPU_REG_REFER12_NBR(hantro_h264_get_ref_nbr(ctx, 12));
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(82));
+
+ reg = VDPU_REG_REFER15_NBR(hantro_h264_get_ref_nbr(ctx, 15)) |
+ VDPU_REG_REFER14_NBR(hantro_h264_get_ref_nbr(ctx, 14));
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(83));
+
+ reg = VDPU_REG_BINIT_RLIST_F5(b0_reflist[5].index) |
+ VDPU_REG_BINIT_RLIST_F4(b0_reflist[4].index) |
+ VDPU_REG_BINIT_RLIST_F3(b0_reflist[3].index) |
+ VDPU_REG_BINIT_RLIST_F2(b0_reflist[2].index) |
+ VDPU_REG_BINIT_RLIST_F1(b0_reflist[1].index) |
+ VDPU_REG_BINIT_RLIST_F0(b0_reflist[0].index);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(100));
+
+ reg = VDPU_REG_BINIT_RLIST_F11(b0_reflist[11].index) |
+ VDPU_REG_BINIT_RLIST_F10(b0_reflist[10].index) |
+ VDPU_REG_BINIT_RLIST_F9(b0_reflist[9].index) |
+ VDPU_REG_BINIT_RLIST_F8(b0_reflist[8].index) |
+ VDPU_REG_BINIT_RLIST_F7(b0_reflist[7].index) |
+ VDPU_REG_BINIT_RLIST_F6(b0_reflist[6].index);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(101));
+
+ reg = VDPU_REG_BINIT_RLIST_F15(b0_reflist[15].index) |
+ VDPU_REG_BINIT_RLIST_F14(b0_reflist[14].index) |
+ VDPU_REG_BINIT_RLIST_F13(b0_reflist[13].index) |
+ VDPU_REG_BINIT_RLIST_F12(b0_reflist[12].index);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(102));
+
+ reg = VDPU_REG_BINIT_RLIST_B5(b1_reflist[5].index) |
+ VDPU_REG_BINIT_RLIST_B4(b1_reflist[4].index) |
+ VDPU_REG_BINIT_RLIST_B3(b1_reflist[3].index) |
+ VDPU_REG_BINIT_RLIST_B2(b1_reflist[2].index) |
+ VDPU_REG_BINIT_RLIST_B1(b1_reflist[1].index) |
+ VDPU_REG_BINIT_RLIST_B0(b1_reflist[0].index);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(103));
+
+ reg = VDPU_REG_BINIT_RLIST_B11(b1_reflist[11].index) |
+ VDPU_REG_BINIT_RLIST_B10(b1_reflist[10].index) |
+ VDPU_REG_BINIT_RLIST_B9(b1_reflist[9].index) |
+ VDPU_REG_BINIT_RLIST_B8(b1_reflist[8].index) |
+ VDPU_REG_BINIT_RLIST_B7(b1_reflist[7].index) |
+ VDPU_REG_BINIT_RLIST_B6(b1_reflist[6].index);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(104));
+
+ reg = VDPU_REG_BINIT_RLIST_B15(b1_reflist[15].index) |
+ VDPU_REG_BINIT_RLIST_B14(b1_reflist[14].index) |
+ VDPU_REG_BINIT_RLIST_B13(b1_reflist[13].index) |
+ VDPU_REG_BINIT_RLIST_B12(b1_reflist[12].index);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(105));
+
+ reg = VDPU_REG_PINIT_RLIST_F3(p_reflist[3].index) |
+ VDPU_REG_PINIT_RLIST_F2(p_reflist[2].index) |
+ VDPU_REG_PINIT_RLIST_F1(p_reflist[1].index) |
+ VDPU_REG_PINIT_RLIST_F0(p_reflist[0].index);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(106));
+
+ reg = VDPU_REG_REFER_LTERM_E(ctx->h264_dec.dpb_longterm);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(107));
+
+ reg = VDPU_REG_REFER_VALID_E(ctx->h264_dec.dpb_valid);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(108));
+
+ /* Set up addresses of DPB buffers. */
+ for (i = 0; i < HANTRO_H264_DPB_SIZE; i++) {
+ dma_addr_t dma_addr = hantro_h264_get_ref_buf(ctx, i);
+
+ vdpu_write_relaxed(vpu, dma_addr, VDPU_REG_REFER_BASE(i));
+ }
+}
+
+static void set_buffers(struct hantro_ctx *ctx, struct vb2_v4l2_buffer *src_buf)
+{
+ const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls;
+ struct vb2_v4l2_buffer *dst_buf;
+ struct hantro_dev *vpu = ctx->dev;
+ dma_addr_t src_dma, dst_dma;
+ size_t offset = 0;
+
+ /* Source (stream) buffer. */
+ src_dma = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
+ vdpu_write_relaxed(vpu, src_dma, VDPU_REG_RLC_VLC_BASE);
+
+ /* Destination (decoded frame) buffer. */
+ dst_buf = hantro_get_dst_buf(ctx);
+ dst_dma = hantro_get_dec_buf_addr(ctx, &dst_buf->vb2_buf);
+ /* Adjust dma addr to start at second line for bottom field */
+ if (ctrls->decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD)
+ offset = ALIGN(ctx->src_fmt.width, MB_DIM);
+ vdpu_write_relaxed(vpu, dst_dma + offset, VDPU_REG_DEC_OUT_BASE);
+
+ /* Higher profiles require DMV buffer appended to reference frames. */
+ if (ctrls->sps->profile_idc > 66 && ctrls->decode->nal_ref_idc) {
+ unsigned int bytes_per_mb = 384;
+
+ /* DMV buffer for monochrome start directly after Y-plane */
+ if (ctrls->sps->profile_idc >= 100 &&
+ ctrls->sps->chroma_format_idc == 0)
+ bytes_per_mb = 256;
+ offset = bytes_per_mb * MB_WIDTH(ctx->src_fmt.width) *
+ MB_HEIGHT(ctx->src_fmt.height);
+
+ /*
+ * DMV buffer is split in two for field encoded frames,
+ * adjust offset for bottom field
+ */
+ if (ctrls->decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD)
+ offset += 32 * MB_WIDTH(ctx->src_fmt.width) *
+ MB_HEIGHT(ctx->src_fmt.height);
+ vdpu_write_relaxed(vpu, dst_dma + offset, VDPU_REG_DIR_MV_BASE);
+ }
+
+ /* Auxiliary buffer prepared in hantro_g1_h264_dec_prepare_table(). */
+ vdpu_write_relaxed(vpu, ctx->h264_dec.priv.dma, VDPU_REG_QTABLE_BASE);
+}
+
+int rockchip_vpu2_h264_dec_run(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ struct vb2_v4l2_buffer *src_buf;
+ u32 reg;
+ int ret;
+
+ /* Prepare the H264 decoder context. */
+ ret = hantro_h264_dec_prepare_run(ctx);
+ if (ret)
+ return ret;
+
+ src_buf = hantro_get_src_buf(ctx);
+ set_params(ctx, src_buf);
+ set_ref(ctx);
+ set_buffers(ctx, src_buf);
+
+ hantro_end_prepare_run(ctx);
+
+ /* Start decoding! */
+ reg = vdpu_read(vpu, VDPU_SWREG(57)) | VDPU_REG_DEC_E(1);
+ vdpu_write(vpu, reg, VDPU_SWREG(57));
+
+ return 0;
+}
diff --git a/drivers/media/platform/verisilicon/rockchip_vpu2_hw_jpeg_enc.c b/drivers/media/platform/verisilicon/rockchip_vpu2_hw_jpeg_enc.c
new file mode 100644
index 000000000..8395c4d48
--- /dev/null
+++ b/drivers/media/platform/verisilicon/rockchip_vpu2_hw_jpeg_enc.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hantro VPU codec driver
+ *
+ * Copyright (C) 2018 Rockchip Electronics Co., Ltd.
+ *
+ * JPEG encoder
+ * ------------
+ * The VPU JPEG encoder produces JPEG baseline sequential format.
+ * The quantization coefficients are 8-bit values, complying with
+ * the baseline specification. Therefore, it requires
+ * luma and chroma quantization tables. The hardware does entropy
+ * encoding using internal Huffman tables, as specified in the JPEG
+ * specification.
+ *
+ * In other words, only the luma and chroma quantization tables are
+ * required for the encoding operation.
+ *
+ * Quantization luma table values are written to registers
+ * VEPU_swreg_0-VEPU_swreg_15, and chroma table values to
+ * VEPU_swreg_16-VEPU_swreg_31. A special order is needed, neither
+ * zigzag, nor linear.
+ */
+
+#include <asm/unaligned.h>
+#include <media/v4l2-mem2mem.h>
+#include "hantro_jpeg.h"
+#include "hantro.h"
+#include "hantro_v4l2.h"
+#include "hantro_hw.h"
+#include "rockchip_vpu2_regs.h"
+
+#define VEPU_JPEG_QUANT_TABLE_COUNT 16
+
+static void rockchip_vpu2_set_src_img_ctrl(struct hantro_dev *vpu,
+ struct hantro_ctx *ctx)
+{
+ u32 overfill_r, overfill_b;
+ u32 reg;
+
+ /*
+ * The format width and height are already macroblock aligned
+ * by .vidioc_s_fmt_vid_cap_mplane() callback. Destination
+ * format width and height can be further modified by
+ * .vidioc_s_selection(), and the width is 4-aligned.
+ */
+ overfill_r = ctx->src_fmt.width - ctx->dst_fmt.width;
+ overfill_b = ctx->src_fmt.height - ctx->dst_fmt.height;
+
+ reg = VEPU_REG_IN_IMG_CTRL_ROW_LEN(ctx->src_fmt.width);
+ vepu_write_relaxed(vpu, reg, VEPU_REG_INPUT_LUMA_INFO);
+
+ reg = VEPU_REG_IN_IMG_CTRL_OVRFLR_D4(overfill_r / 4) |
+ VEPU_REG_IN_IMG_CTRL_OVRFLB(overfill_b);
+ /*
+ * This register controls the input crop, as the offset
+ * from the right/bottom within the last macroblock. The offset from the
+ * right must be divided by 4 and so the crop must be aligned to 4 pixels
+ * horizontally.
+ */
+ vepu_write_relaxed(vpu, reg, VEPU_REG_ENC_OVER_FILL_STRM_OFFSET);
+
+ reg = VEPU_REG_IN_IMG_CTRL_FMT(ctx->vpu_src_fmt->enc_fmt);
+ vepu_write_relaxed(vpu, reg, VEPU_REG_ENC_CTRL1);
+}
+
+static void rockchip_vpu2_jpeg_enc_set_buffers(struct hantro_dev *vpu,
+ struct hantro_ctx *ctx,
+ struct vb2_buffer *src_buf,
+ struct vb2_buffer *dst_buf)
+{
+ struct v4l2_pix_format_mplane *pix_fmt = &ctx->src_fmt;
+ dma_addr_t src[3];
+ u32 size_left;
+
+ size_left = vb2_plane_size(dst_buf, 0) - ctx->vpu_dst_fmt->header_size;
+ if (WARN_ON(vb2_plane_size(dst_buf, 0) < ctx->vpu_dst_fmt->header_size))
+ size_left = 0;
+
+ WARN_ON(pix_fmt->num_planes > 3);
+
+ vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr(dst_buf, 0) +
+ ctx->vpu_dst_fmt->header_size,
+ VEPU_REG_ADDR_OUTPUT_STREAM);
+ vepu_write_relaxed(vpu, size_left, VEPU_REG_STR_BUF_LIMIT);
+
+ if (pix_fmt->num_planes == 1) {
+ src[0] = vb2_dma_contig_plane_dma_addr(src_buf, 0);
+ vepu_write_relaxed(vpu, src[0], VEPU_REG_ADDR_IN_PLANE_0);
+ } else if (pix_fmt->num_planes == 2) {
+ src[0] = vb2_dma_contig_plane_dma_addr(src_buf, 0);
+ src[1] = vb2_dma_contig_plane_dma_addr(src_buf, 1);
+ vepu_write_relaxed(vpu, src[0], VEPU_REG_ADDR_IN_PLANE_0);
+ vepu_write_relaxed(vpu, src[1], VEPU_REG_ADDR_IN_PLANE_1);
+ } else {
+ src[0] = vb2_dma_contig_plane_dma_addr(src_buf, 0);
+ src[1] = vb2_dma_contig_plane_dma_addr(src_buf, 1);
+ src[2] = vb2_dma_contig_plane_dma_addr(src_buf, 2);
+ vepu_write_relaxed(vpu, src[0], VEPU_REG_ADDR_IN_PLANE_0);
+ vepu_write_relaxed(vpu, src[1], VEPU_REG_ADDR_IN_PLANE_1);
+ vepu_write_relaxed(vpu, src[2], VEPU_REG_ADDR_IN_PLANE_2);
+ }
+}
+
+static void
+rockchip_vpu2_jpeg_enc_set_qtable(struct hantro_dev *vpu,
+ unsigned char *luma_qtable,
+ unsigned char *chroma_qtable)
+{
+ u32 reg, i;
+ __be32 *luma_qtable_p;
+ __be32 *chroma_qtable_p;
+
+ luma_qtable_p = (__be32 *)luma_qtable;
+ chroma_qtable_p = (__be32 *)chroma_qtable;
+
+ /*
+ * Quantization table registers must be written in contiguous blocks.
+ * DO NOT collapse the below two "for" loops into one.
+ */
+ for (i = 0; i < VEPU_JPEG_QUANT_TABLE_COUNT; i++) {
+ reg = get_unaligned_be32(&luma_qtable_p[i]);
+ vepu_write_relaxed(vpu, reg, VEPU_REG_JPEG_LUMA_QUAT(i));
+ }
+
+ for (i = 0; i < VEPU_JPEG_QUANT_TABLE_COUNT; i++) {
+ reg = get_unaligned_be32(&chroma_qtable_p[i]);
+ vepu_write_relaxed(vpu, reg, VEPU_REG_JPEG_CHROMA_QUAT(i));
+ }
+}
+
+int rockchip_vpu2_jpeg_enc_run(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ struct vb2_v4l2_buffer *src_buf, *dst_buf;
+ struct hantro_jpeg_ctx jpeg_ctx;
+ u32 reg;
+
+ src_buf = hantro_get_src_buf(ctx);
+ dst_buf = hantro_get_dst_buf(ctx);
+
+ hantro_start_prepare_run(ctx);
+
+ memset(&jpeg_ctx, 0, sizeof(jpeg_ctx));
+ jpeg_ctx.buffer = vb2_plane_vaddr(&dst_buf->vb2_buf, 0);
+ if (!jpeg_ctx.buffer)
+ return -ENOMEM;
+
+ jpeg_ctx.width = ctx->dst_fmt.width;
+ jpeg_ctx.height = ctx->dst_fmt.height;
+ jpeg_ctx.quality = ctx->jpeg_quality;
+ hantro_jpeg_header_assemble(&jpeg_ctx);
+
+ /* Switch to JPEG encoder mode before writing registers */
+ vepu_write_relaxed(vpu, VEPU_REG_ENCODE_FORMAT_JPEG,
+ VEPU_REG_ENCODE_START);
+
+ rockchip_vpu2_set_src_img_ctrl(vpu, ctx);
+ rockchip_vpu2_jpeg_enc_set_buffers(vpu, ctx, &src_buf->vb2_buf,
+ &dst_buf->vb2_buf);
+ rockchip_vpu2_jpeg_enc_set_qtable(vpu, jpeg_ctx.hw_luma_qtable,
+ jpeg_ctx.hw_chroma_qtable);
+
+ reg = VEPU_REG_OUTPUT_SWAP32
+ | VEPU_REG_OUTPUT_SWAP16
+ | VEPU_REG_OUTPUT_SWAP8
+ | VEPU_REG_INPUT_SWAP8
+ | VEPU_REG_INPUT_SWAP16
+ | VEPU_REG_INPUT_SWAP32;
+ /* Make sure that all registers are written at this point. */
+ vepu_write(vpu, reg, VEPU_REG_DATA_ENDIAN);
+
+ reg = VEPU_REG_AXI_CTRL_BURST_LEN(16);
+ vepu_write_relaxed(vpu, reg, VEPU_REG_AXI_CTRL);
+
+ reg = VEPU_REG_MB_WIDTH(MB_WIDTH(ctx->src_fmt.width))
+ | VEPU_REG_MB_HEIGHT(MB_HEIGHT(ctx->src_fmt.height))
+ | VEPU_REG_FRAME_TYPE_INTRA
+ | VEPU_REG_ENCODE_FORMAT_JPEG
+ | VEPU_REG_ENCODE_ENABLE;
+
+ /* Kick the watchdog and start encoding */
+ hantro_end_prepare_run(ctx);
+ vepu_write(vpu, reg, VEPU_REG_ENCODE_START);
+
+ return 0;
+}
+
+void rockchip_vpu2_jpeg_enc_done(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ u32 bytesused = vepu_read(vpu, VEPU_REG_STR_BUF_LIMIT) / 8;
+ struct vb2_v4l2_buffer *dst_buf = hantro_get_dst_buf(ctx);
+
+ vb2_set_plane_payload(&dst_buf->vb2_buf, 0,
+ ctx->vpu_dst_fmt->header_size + bytesused);
+}
diff --git a/drivers/media/platform/verisilicon/rockchip_vpu2_hw_mpeg2_dec.c b/drivers/media/platform/verisilicon/rockchip_vpu2_hw_mpeg2_dec.c
new file mode 100644
index 000000000..b66737fab
--- /dev/null
+++ b/drivers/media/platform/verisilicon/rockchip_vpu2_hw_mpeg2_dec.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hantro VPU codec driver
+ *
+ * Copyright (C) 2018 Rockchip Electronics Co., Ltd.
+ */
+
+#include <asm/unaligned.h>
+#include <linux/bitfield.h>
+#include <media/v4l2-mem2mem.h>
+#include "hantro.h"
+#include "hantro_hw.h"
+
+#define VDPU_SWREG(nr) ((nr) * 4)
+
+#define VDPU_REG_DEC_OUT_BASE VDPU_SWREG(63)
+#define VDPU_REG_RLC_VLC_BASE VDPU_SWREG(64)
+#define VDPU_REG_QTABLE_BASE VDPU_SWREG(61)
+#define VDPU_REG_REFER0_BASE VDPU_SWREG(131)
+#define VDPU_REG_REFER2_BASE VDPU_SWREG(134)
+#define VDPU_REG_REFER3_BASE VDPU_SWREG(135)
+#define VDPU_REG_REFER1_BASE VDPU_SWREG(148)
+#define VDPU_REG_DEC_E(v) ((v) ? BIT(0) : 0)
+
+#define VDPU_REG_DEC_ADV_PRE_DIS(v) ((v) ? BIT(11) : 0)
+#define VDPU_REG_DEC_SCMD_DIS(v) ((v) ? BIT(10) : 0)
+#define VDPU_REG_FILTERING_DIS(v) ((v) ? BIT(8) : 0)
+#define VDPU_REG_DEC_LATENCY(v) (((v) << 1) & GENMASK(6, 1))
+
+#define VDPU_REG_INIT_QP(v) (((v) << 25) & GENMASK(30, 25))
+#define VDPU_REG_STREAM_LEN(v) (((v) << 0) & GENMASK(23, 0))
+
+#define VDPU_REG_APF_THRESHOLD(v) (((v) << 17) & GENMASK(30, 17))
+#define VDPU_REG_STARTMB_X(v) (((v) << 8) & GENMASK(16, 8))
+#define VDPU_REG_STARTMB_Y(v) (((v) << 0) & GENMASK(7, 0))
+
+#define VDPU_REG_DEC_MODE(v) (((v) << 0) & GENMASK(3, 0))
+
+#define VDPU_REG_DEC_STRENDIAN_E(v) ((v) ? BIT(5) : 0)
+#define VDPU_REG_DEC_STRSWAP32_E(v) ((v) ? BIT(4) : 0)
+#define VDPU_REG_DEC_OUTSWAP32_E(v) ((v) ? BIT(3) : 0)
+#define VDPU_REG_DEC_INSWAP32_E(v) ((v) ? BIT(2) : 0)
+#define VDPU_REG_DEC_OUT_ENDIAN(v) ((v) ? BIT(1) : 0)
+#define VDPU_REG_DEC_IN_ENDIAN(v) ((v) ? BIT(0) : 0)
+
+#define VDPU_REG_DEC_DATA_DISC_E(v) ((v) ? BIT(22) : 0)
+#define VDPU_REG_DEC_MAX_BURST(v) (((v) << 16) & GENMASK(20, 16))
+#define VDPU_REG_DEC_AXI_WR_ID(v) (((v) << 8) & GENMASK(15, 8))
+#define VDPU_REG_DEC_AXI_RD_ID(v) (((v) << 0) & GENMASK(7, 0))
+
+#define VDPU_REG_RLC_MODE_E(v) ((v) ? BIT(20) : 0)
+#define VDPU_REG_PIC_INTERLACE_E(v) ((v) ? BIT(17) : 0)
+#define VDPU_REG_PIC_FIELDMODE_E(v) ((v) ? BIT(16) : 0)
+#define VDPU_REG_PIC_B_E(v) ((v) ? BIT(15) : 0)
+#define VDPU_REG_PIC_INTER_E(v) ((v) ? BIT(14) : 0)
+#define VDPU_REG_PIC_TOPFIELD_E(v) ((v) ? BIT(13) : 0)
+#define VDPU_REG_FWD_INTERLACE_E(v) ((v) ? BIT(12) : 0)
+#define VDPU_REG_WRITE_MVS_E(v) ((v) ? BIT(10) : 0)
+#define VDPU_REG_DEC_TIMEOUT_E(v) ((v) ? BIT(5) : 0)
+#define VDPU_REG_DEC_CLK_GATE_E(v) ((v) ? BIT(4) : 0)
+
+#define VDPU_REG_PIC_MB_WIDTH(v) (((v) << 23) & GENMASK(31, 23))
+#define VDPU_REG_PIC_MB_HEIGHT_P(v) (((v) << 11) & GENMASK(18, 11))
+#define VDPU_REG_ALT_SCAN_E(v) ((v) ? BIT(6) : 0)
+#define VDPU_REG_TOPFIELDFIRST_E(v) ((v) ? BIT(5) : 0)
+
+#define VDPU_REG_STRM_START_BIT(v) (((v) << 26) & GENMASK(31, 26))
+#define VDPU_REG_QSCALE_TYPE(v) ((v) ? BIT(24) : 0)
+#define VDPU_REG_CON_MV_E(v) ((v) ? BIT(4) : 0)
+#define VDPU_REG_INTRA_DC_PREC(v) (((v) << 2) & GENMASK(3, 2))
+#define VDPU_REG_INTRA_VLC_TAB(v) ((v) ? BIT(1) : 0)
+#define VDPU_REG_FRAME_PRED_DCT(v) ((v) ? BIT(0) : 0)
+
+#define VDPU_REG_ALT_SCAN_FLAG_E(v) ((v) ? BIT(19) : 0)
+#define VDPU_REG_FCODE_FWD_HOR(v) (((v) << 15) & GENMASK(18, 15))
+#define VDPU_REG_FCODE_FWD_VER(v) (((v) << 11) & GENMASK(14, 11))
+#define VDPU_REG_FCODE_BWD_HOR(v) (((v) << 7) & GENMASK(10, 7))
+#define VDPU_REG_FCODE_BWD_VER(v) (((v) << 3) & GENMASK(6, 3))
+#define VDPU_REG_MV_ACCURACY_FWD(v) ((v) ? BIT(2) : 0)
+#define VDPU_REG_MV_ACCURACY_BWD(v) ((v) ? BIT(1) : 0)
+
+static void
+rockchip_vpu2_mpeg2_dec_set_quantisation(struct hantro_dev *vpu,
+ struct hantro_ctx *ctx)
+{
+ struct v4l2_ctrl_mpeg2_quantisation *q;
+
+ q = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_MPEG2_QUANTISATION);
+ hantro_mpeg2_dec_copy_qtable(ctx->mpeg2_dec.qtable.cpu, q);
+ vdpu_write_relaxed(vpu, ctx->mpeg2_dec.qtable.dma, VDPU_REG_QTABLE_BASE);
+}
+
+static void
+rockchip_vpu2_mpeg2_dec_set_buffers(struct hantro_dev *vpu,
+ struct hantro_ctx *ctx,
+ struct vb2_buffer *src_buf,
+ struct vb2_buffer *dst_buf,
+ const struct v4l2_ctrl_mpeg2_sequence *seq,
+ const struct v4l2_ctrl_mpeg2_picture *pic)
+{
+ dma_addr_t forward_addr = 0, backward_addr = 0;
+ dma_addr_t current_addr, addr;
+
+ switch (pic->picture_coding_type) {
+ case V4L2_MPEG2_PIC_CODING_TYPE_B:
+ backward_addr = hantro_get_ref(ctx, pic->backward_ref_ts);
+ fallthrough;
+ case V4L2_MPEG2_PIC_CODING_TYPE_P:
+ forward_addr = hantro_get_ref(ctx, pic->forward_ref_ts);
+ }
+
+ /* Source bitstream buffer */
+ addr = vb2_dma_contig_plane_dma_addr(src_buf, 0);
+ vdpu_write_relaxed(vpu, addr, VDPU_REG_RLC_VLC_BASE);
+
+ /* Destination frame buffer */
+ addr = vb2_dma_contig_plane_dma_addr(dst_buf, 0);
+ current_addr = addr;
+
+ if (pic->picture_structure == V4L2_MPEG2_PIC_BOTTOM_FIELD)
+ addr += ALIGN(ctx->dst_fmt.width, 16);
+ vdpu_write_relaxed(vpu, addr, VDPU_REG_DEC_OUT_BASE);
+
+ if (!forward_addr)
+ forward_addr = current_addr;
+ if (!backward_addr)
+ backward_addr = current_addr;
+
+ /* Set forward ref frame (top/bottom field) */
+ if (pic->picture_structure == V4L2_MPEG2_PIC_FRAME ||
+ pic->picture_coding_type == V4L2_MPEG2_PIC_CODING_TYPE_B ||
+ (pic->picture_structure == V4L2_MPEG2_PIC_TOP_FIELD &&
+ pic->flags & V4L2_MPEG2_PIC_TOP_FIELD) ||
+ (pic->picture_structure == V4L2_MPEG2_PIC_BOTTOM_FIELD &&
+ !(pic->flags & V4L2_MPEG2_PIC_TOP_FIELD))) {
+ vdpu_write_relaxed(vpu, forward_addr, VDPU_REG_REFER0_BASE);
+ vdpu_write_relaxed(vpu, forward_addr, VDPU_REG_REFER1_BASE);
+ } else if (pic->picture_structure == V4L2_MPEG2_PIC_TOP_FIELD) {
+ vdpu_write_relaxed(vpu, forward_addr, VDPU_REG_REFER0_BASE);
+ vdpu_write_relaxed(vpu, current_addr, VDPU_REG_REFER1_BASE);
+ } else if (pic->picture_structure == V4L2_MPEG2_PIC_BOTTOM_FIELD) {
+ vdpu_write_relaxed(vpu, current_addr, VDPU_REG_REFER0_BASE);
+ vdpu_write_relaxed(vpu, forward_addr, VDPU_REG_REFER1_BASE);
+ }
+
+ /* Set backward ref frame (top/bottom field) */
+ vdpu_write_relaxed(vpu, backward_addr, VDPU_REG_REFER2_BASE);
+ vdpu_write_relaxed(vpu, backward_addr, VDPU_REG_REFER3_BASE);
+}
+
+int rockchip_vpu2_mpeg2_dec_run(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ struct vb2_v4l2_buffer *src_buf, *dst_buf;
+ const struct v4l2_ctrl_mpeg2_sequence *seq;
+ const struct v4l2_ctrl_mpeg2_picture *pic;
+ u32 reg;
+
+ src_buf = hantro_get_src_buf(ctx);
+ dst_buf = hantro_get_dst_buf(ctx);
+
+ hantro_start_prepare_run(ctx);
+
+ seq = hantro_get_ctrl(ctx,
+ V4L2_CID_STATELESS_MPEG2_SEQUENCE);
+ pic = hantro_get_ctrl(ctx,
+ V4L2_CID_STATELESS_MPEG2_PICTURE);
+
+ reg = VDPU_REG_DEC_ADV_PRE_DIS(0) |
+ VDPU_REG_DEC_SCMD_DIS(0) |
+ VDPU_REG_FILTERING_DIS(1) |
+ VDPU_REG_DEC_LATENCY(0);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(50));
+
+ reg = VDPU_REG_INIT_QP(1) |
+ VDPU_REG_STREAM_LEN(vb2_get_plane_payload(&src_buf->vb2_buf, 0));
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(51));
+
+ reg = VDPU_REG_APF_THRESHOLD(8) |
+ VDPU_REG_STARTMB_X(0) |
+ VDPU_REG_STARTMB_Y(0);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(52));
+
+ reg = VDPU_REG_DEC_MODE(5);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(53));
+
+ reg = VDPU_REG_DEC_STRENDIAN_E(1) |
+ VDPU_REG_DEC_STRSWAP32_E(1) |
+ VDPU_REG_DEC_OUTSWAP32_E(1) |
+ VDPU_REG_DEC_INSWAP32_E(1) |
+ VDPU_REG_DEC_OUT_ENDIAN(1) |
+ VDPU_REG_DEC_IN_ENDIAN(1);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(54));
+
+ reg = VDPU_REG_DEC_DATA_DISC_E(0) |
+ VDPU_REG_DEC_MAX_BURST(16) |
+ VDPU_REG_DEC_AXI_WR_ID(0) |
+ VDPU_REG_DEC_AXI_RD_ID(0);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(56));
+
+ reg = VDPU_REG_RLC_MODE_E(0) |
+ VDPU_REG_PIC_INTERLACE_E(!(seq->flags & V4L2_MPEG2_SEQ_FLAG_PROGRESSIVE)) |
+ VDPU_REG_PIC_FIELDMODE_E(pic->picture_structure != V4L2_MPEG2_PIC_FRAME) |
+ VDPU_REG_PIC_B_E(pic->picture_coding_type == V4L2_MPEG2_PIC_CODING_TYPE_B) |
+ VDPU_REG_PIC_INTER_E(pic->picture_coding_type != V4L2_MPEG2_PIC_CODING_TYPE_I) |
+ VDPU_REG_PIC_TOPFIELD_E(pic->picture_structure == V4L2_MPEG2_PIC_TOP_FIELD) |
+ VDPU_REG_FWD_INTERLACE_E(0) |
+ VDPU_REG_WRITE_MVS_E(0) |
+ VDPU_REG_DEC_TIMEOUT_E(1) |
+ VDPU_REG_DEC_CLK_GATE_E(1);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(57));
+
+ reg = VDPU_REG_PIC_MB_WIDTH(MB_WIDTH(ctx->dst_fmt.width)) |
+ VDPU_REG_PIC_MB_HEIGHT_P(MB_HEIGHT(ctx->dst_fmt.height)) |
+ VDPU_REG_ALT_SCAN_E(pic->flags & V4L2_MPEG2_PIC_FLAG_ALT_SCAN) |
+ VDPU_REG_TOPFIELDFIRST_E(pic->flags & V4L2_MPEG2_PIC_FLAG_TOP_FIELD_FIRST);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(120));
+
+ reg = VDPU_REG_STRM_START_BIT(0) |
+ VDPU_REG_QSCALE_TYPE(pic->flags & V4L2_MPEG2_PIC_FLAG_Q_SCALE_TYPE) |
+ VDPU_REG_CON_MV_E(pic->flags & V4L2_MPEG2_PIC_FLAG_CONCEALMENT_MV) |
+ VDPU_REG_INTRA_DC_PREC(pic->intra_dc_precision) |
+ VDPU_REG_INTRA_VLC_TAB(pic->flags & V4L2_MPEG2_PIC_FLAG_INTRA_VLC) |
+ VDPU_REG_FRAME_PRED_DCT(pic->flags & V4L2_MPEG2_PIC_FLAG_FRAME_PRED_DCT);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(122));
+
+ reg = VDPU_REG_ALT_SCAN_FLAG_E(pic->flags & V4L2_MPEG2_PIC_FLAG_ALT_SCAN) |
+ VDPU_REG_FCODE_FWD_HOR(pic->f_code[0][0]) |
+ VDPU_REG_FCODE_FWD_VER(pic->f_code[0][1]) |
+ VDPU_REG_FCODE_BWD_HOR(pic->f_code[1][0]) |
+ VDPU_REG_FCODE_BWD_VER(pic->f_code[1][1]) |
+ VDPU_REG_MV_ACCURACY_FWD(1) |
+ VDPU_REG_MV_ACCURACY_BWD(1);
+ vdpu_write_relaxed(vpu, reg, VDPU_SWREG(136));
+
+ rockchip_vpu2_mpeg2_dec_set_quantisation(vpu, ctx);
+
+ rockchip_vpu2_mpeg2_dec_set_buffers(vpu, ctx, &src_buf->vb2_buf,
+ &dst_buf->vb2_buf, seq, pic);
+
+ /* Kick the watchdog and start decoding */
+ hantro_end_prepare_run(ctx);
+
+ reg = vdpu_read(vpu, VDPU_SWREG(57)) | VDPU_REG_DEC_E(1);
+ vdpu_write(vpu, reg, VDPU_SWREG(57));
+
+ return 0;
+}
diff --git a/drivers/media/platform/verisilicon/rockchip_vpu2_hw_vp8_dec.c b/drivers/media/platform/verisilicon/rockchip_vpu2_hw_vp8_dec.c
new file mode 100644
index 000000000..d07907544
--- /dev/null
+++ b/drivers/media/platform/verisilicon/rockchip_vpu2_hw_vp8_dec.c
@@ -0,0 +1,600 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Rockchip VPU codec vp8 decode driver
+ *
+ * Copyright (C) 2014 Rockchip Electronics Co., Ltd.
+ * ZhiChao Yu <zhichao.yu@rock-chips.com>
+ *
+ * Copyright (C) 2014 Google LLC.
+ * Tomasz Figa <tfiga@chromium.org>
+ *
+ * Copyright (C) 2015 Rockchip Electronics Co., Ltd.
+ * Alpha Lin <alpha.lin@rock-chips.com>
+ */
+
+#include <media/v4l2-mem2mem.h>
+
+#include "hantro_hw.h"
+#include "hantro.h"
+#include "hantro_g1_regs.h"
+
+#define VDPU_REG_DEC_CTRL0 0x0c8
+#define VDPU_REG_STREAM_LEN 0x0cc
+#define VDPU_REG_DEC_FORMAT 0x0d4
+#define VDPU_REG_DEC_CTRL0_DEC_MODE(x) (((x) & 0xf) << 0)
+#define VDPU_REG_DATA_ENDIAN 0x0d8
+#define VDPU_REG_CONFIG_DEC_STRENDIAN_E BIT(5)
+#define VDPU_REG_CONFIG_DEC_STRSWAP32_E BIT(4)
+#define VDPU_REG_CONFIG_DEC_OUTSWAP32_E BIT(3)
+#define VDPU_REG_CONFIG_DEC_INSWAP32_E BIT(2)
+#define VDPU_REG_CONFIG_DEC_OUT_ENDIAN BIT(1)
+#define VDPU_REG_CONFIG_DEC_IN_ENDIAN BIT(0)
+#define VDPU_REG_AXI_CTRL 0x0e0
+#define VDPU_REG_CONFIG_DEC_MAX_BURST(x) (((x) & 0x1f) << 16)
+#define VDPU_REG_EN_FLAGS 0x0e4
+#define VDPU_REG_DEC_CTRL0_PIC_INTER_E BIT(14)
+#define VDPU_REG_CONFIG_DEC_TIMEOUT_E BIT(5)
+#define VDPU_REG_CONFIG_DEC_CLK_GATE_E BIT(4)
+#define VDPU_REG_PRED_FLT 0x0ec
+#define VDPU_REG_ADDR_QTABLE 0x0f4
+#define VDPU_REG_ADDR_DST 0x0fc
+#define VDPU_REG_ADDR_STR 0x100
+#define VDPU_REG_VP8_PIC_MB_SIZE 0x1e0
+#define VDPU_REG_VP8_DCT_START_BIT 0x1e4
+#define VDPU_REG_DEC_CTRL4_VC1_HEIGHT_EXT BIT(13)
+#define VDPU_REG_DEC_CTRL4_BILIN_MC_E BIT(12)
+#define VDPU_REG_VP8_CTRL0 0x1e8
+#define VDPU_REG_VP8_DATA_VAL 0x1f0
+#define VDPU_REG_PRED_FLT7 0x1f4
+#define VDPU_REG_PRED_FLT8 0x1f8
+#define VDPU_REG_PRED_FLT9 0x1fc
+#define VDPU_REG_PRED_FLT10 0x200
+#define VDPU_REG_FILTER_LEVEL 0x204
+#define VDPU_REG_VP8_QUANTER0 0x208
+#define VDPU_REG_VP8_ADDR_REF0 0x20c
+#define VDPU_REG_FILTER_MB_ADJ 0x210
+#define VDPU_REG_REF_PIC_FILT_TYPE_E BIT(31)
+#define VDPU_REG_REF_PIC_FILT_SHARPNESS(x) (((x) & 0x7) << 28)
+#define VDPU_REG_FILTER_REF_ADJ 0x214
+#define VDPU_REG_VP8_ADDR_REF2_5(i) (0x218 + ((i) * 0x4))
+#define VDPU_REG_VP8_GREF_SIGN_BIAS BIT(0)
+#define VDPU_REG_VP8_AREF_SIGN_BIAS BIT(0)
+#define VDPU_REG_VP8_DCT_BASE(i) \
+ (0x230 + ((((i) < 5) ? (i) : ((i) + 1)) * 0x4))
+#define VDPU_REG_VP8_ADDR_CTRL_PART 0x244
+#define VDPU_REG_VP8_SEGMENT_VAL 0x254
+#define VDPU_REG_FWD_PIC1_SEGMENT_BASE(x) ((x) << 0)
+#define VDPU_REG_FWD_PIC1_SEGMENT_UPD_E BIT(1)
+#define VDPU_REG_FWD_PIC1_SEGMENT_E BIT(0)
+#define VDPU_REG_VP8_DCT_START_BIT2 0x258
+#define VDPU_REG_VP8_QUANTER1 0x25c
+#define VDPU_REG_VP8_QUANTER2 0x260
+#define VDPU_REG_PRED_FLT1 0x264
+#define VDPU_REG_PRED_FLT2 0x268
+#define VDPU_REG_PRED_FLT3 0x26c
+#define VDPU_REG_PRED_FLT4 0x270
+#define VDPU_REG_PRED_FLT5 0x274
+#define VDPU_REG_PRED_FLT6 0x278
+
+static const struct hantro_reg vp8_dec_dct_base[8] = {
+ { VDPU_REG_ADDR_STR, 0, 0xffffffff },
+ { VDPU_REG_VP8_DCT_BASE(0), 0, 0xffffffff },
+ { VDPU_REG_VP8_DCT_BASE(1), 0, 0xffffffff },
+ { VDPU_REG_VP8_DCT_BASE(2), 0, 0xffffffff },
+ { VDPU_REG_VP8_DCT_BASE(3), 0, 0xffffffff },
+ { VDPU_REG_VP8_DCT_BASE(4), 0, 0xffffffff },
+ { VDPU_REG_VP8_DCT_BASE(5), 0, 0xffffffff },
+ { VDPU_REG_VP8_DCT_BASE(6), 0, 0xffffffff },
+};
+
+static const struct hantro_reg vp8_dec_lf_level[4] = {
+ { VDPU_REG_FILTER_LEVEL, 18, 0x3f },
+ { VDPU_REG_FILTER_LEVEL, 12, 0x3f },
+ { VDPU_REG_FILTER_LEVEL, 6, 0x3f },
+ { VDPU_REG_FILTER_LEVEL, 0, 0x3f },
+};
+
+static const struct hantro_reg vp8_dec_mb_adj[4] = {
+ { VDPU_REG_FILTER_MB_ADJ, 21, 0x7f },
+ { VDPU_REG_FILTER_MB_ADJ, 14, 0x7f },
+ { VDPU_REG_FILTER_MB_ADJ, 7, 0x7f },
+ { VDPU_REG_FILTER_MB_ADJ, 0, 0x7f },
+};
+
+static const struct hantro_reg vp8_dec_ref_adj[4] = {
+ { VDPU_REG_FILTER_REF_ADJ, 21, 0x7f },
+ { VDPU_REG_FILTER_REF_ADJ, 14, 0x7f },
+ { VDPU_REG_FILTER_REF_ADJ, 7, 0x7f },
+ { VDPU_REG_FILTER_REF_ADJ, 0, 0x7f },
+};
+
+static const struct hantro_reg vp8_dec_quant[4] = {
+ { VDPU_REG_VP8_QUANTER0, 11, 0x7ff },
+ { VDPU_REG_VP8_QUANTER0, 0, 0x7ff },
+ { VDPU_REG_VP8_QUANTER1, 11, 0x7ff },
+ { VDPU_REG_VP8_QUANTER1, 0, 0x7ff },
+};
+
+static const struct hantro_reg vp8_dec_quant_delta[5] = {
+ { VDPU_REG_VP8_QUANTER0, 27, 0x1f },
+ { VDPU_REG_VP8_QUANTER0, 22, 0x1f },
+ { VDPU_REG_VP8_QUANTER1, 27, 0x1f },
+ { VDPU_REG_VP8_QUANTER1, 22, 0x1f },
+ { VDPU_REG_VP8_QUANTER2, 27, 0x1f },
+};
+
+static const struct hantro_reg vp8_dec_dct_start_bits[8] = {
+ { VDPU_REG_VP8_CTRL0, 26, 0x3f },
+ { VDPU_REG_VP8_DCT_START_BIT, 26, 0x3f },
+ { VDPU_REG_VP8_DCT_START_BIT, 20, 0x3f },
+ { VDPU_REG_VP8_DCT_START_BIT2, 24, 0x3f },
+ { VDPU_REG_VP8_DCT_START_BIT2, 18, 0x3f },
+ { VDPU_REG_VP8_DCT_START_BIT2, 12, 0x3f },
+ { VDPU_REG_VP8_DCT_START_BIT2, 6, 0x3f },
+ { VDPU_REG_VP8_DCT_START_BIT2, 0, 0x3f },
+};
+
+static const struct hantro_reg vp8_dec_pred_bc_tap[8][6] = {
+ {
+ { 0, 0, 0},
+ { VDPU_REG_PRED_FLT, 22, 0x3ff },
+ { VDPU_REG_PRED_FLT, 12, 0x3ff },
+ { VDPU_REG_PRED_FLT, 2, 0x3ff },
+ { VDPU_REG_PRED_FLT1, 22, 0x3ff },
+ { 0, 0, 0},
+ }, {
+ { 0, 0, 0},
+ { VDPU_REG_PRED_FLT1, 12, 0x3ff },
+ { VDPU_REG_PRED_FLT1, 2, 0x3ff },
+ { VDPU_REG_PRED_FLT2, 22, 0x3ff },
+ { VDPU_REG_PRED_FLT2, 12, 0x3ff },
+ { 0, 0, 0},
+ }, {
+ { VDPU_REG_PRED_FLT10, 10, 0x3 },
+ { VDPU_REG_PRED_FLT2, 2, 0x3ff },
+ { VDPU_REG_PRED_FLT3, 22, 0x3ff },
+ { VDPU_REG_PRED_FLT3, 12, 0x3ff },
+ { VDPU_REG_PRED_FLT3, 2, 0x3ff },
+ { VDPU_REG_PRED_FLT10, 8, 0x3},
+ }, {
+ { 0, 0, 0},
+ { VDPU_REG_PRED_FLT4, 22, 0x3ff },
+ { VDPU_REG_PRED_FLT4, 12, 0x3ff },
+ { VDPU_REG_PRED_FLT4, 2, 0x3ff },
+ { VDPU_REG_PRED_FLT5, 22, 0x3ff },
+ { 0, 0, 0},
+ }, {
+ { VDPU_REG_PRED_FLT10, 6, 0x3 },
+ { VDPU_REG_PRED_FLT5, 12, 0x3ff },
+ { VDPU_REG_PRED_FLT5, 2, 0x3ff },
+ { VDPU_REG_PRED_FLT6, 22, 0x3ff },
+ { VDPU_REG_PRED_FLT6, 12, 0x3ff },
+ { VDPU_REG_PRED_FLT10, 4, 0x3 },
+ }, {
+ { 0, 0, 0},
+ { VDPU_REG_PRED_FLT6, 2, 0x3ff },
+ { VDPU_REG_PRED_FLT7, 22, 0x3ff },
+ { VDPU_REG_PRED_FLT7, 12, 0x3ff },
+ { VDPU_REG_PRED_FLT7, 2, 0x3ff },
+ { 0, 0, 0},
+ }, {
+ { VDPU_REG_PRED_FLT10, 2, 0x3 },
+ { VDPU_REG_PRED_FLT8, 22, 0x3ff },
+ { VDPU_REG_PRED_FLT8, 12, 0x3ff },
+ { VDPU_REG_PRED_FLT8, 2, 0x3ff },
+ { VDPU_REG_PRED_FLT9, 22, 0x3ff },
+ { VDPU_REG_PRED_FLT10, 0, 0x3 },
+ }, {
+ { 0, 0, 0},
+ { VDPU_REG_PRED_FLT9, 12, 0x3ff },
+ { VDPU_REG_PRED_FLT9, 2, 0x3ff },
+ { VDPU_REG_PRED_FLT10, 22, 0x3ff },
+ { VDPU_REG_PRED_FLT10, 12, 0x3ff },
+ { 0, 0, 0},
+ },
+};
+
+static const struct hantro_reg vp8_dec_mb_start_bit = {
+ .base = VDPU_REG_VP8_CTRL0,
+ .shift = 18,
+ .mask = 0x3f
+};
+
+static const struct hantro_reg vp8_dec_mb_aligned_data_len = {
+ .base = VDPU_REG_VP8_DATA_VAL,
+ .shift = 0,
+ .mask = 0x3fffff
+};
+
+static const struct hantro_reg vp8_dec_num_dct_partitions = {
+ .base = VDPU_REG_VP8_DATA_VAL,
+ .shift = 24,
+ .mask = 0xf
+};
+
+static const struct hantro_reg vp8_dec_stream_len = {
+ .base = VDPU_REG_STREAM_LEN,
+ .shift = 0,
+ .mask = 0xffffff
+};
+
+static const struct hantro_reg vp8_dec_mb_width = {
+ .base = VDPU_REG_VP8_PIC_MB_SIZE,
+ .shift = 23,
+ .mask = 0x1ff
+};
+
+static const struct hantro_reg vp8_dec_mb_height = {
+ .base = VDPU_REG_VP8_PIC_MB_SIZE,
+ .shift = 11,
+ .mask = 0xff
+};
+
+static const struct hantro_reg vp8_dec_mb_width_ext = {
+ .base = VDPU_REG_VP8_PIC_MB_SIZE,
+ .shift = 3,
+ .mask = 0x7
+};
+
+static const struct hantro_reg vp8_dec_mb_height_ext = {
+ .base = VDPU_REG_VP8_PIC_MB_SIZE,
+ .shift = 0,
+ .mask = 0x7
+};
+
+static const struct hantro_reg vp8_dec_bool_range = {
+ .base = VDPU_REG_VP8_CTRL0,
+ .shift = 0,
+ .mask = 0xff
+};
+
+static const struct hantro_reg vp8_dec_bool_value = {
+ .base = VDPU_REG_VP8_CTRL0,
+ .shift = 8,
+ .mask = 0xff
+};
+
+static const struct hantro_reg vp8_dec_filter_disable = {
+ .base = VDPU_REG_DEC_CTRL0,
+ .shift = 8,
+ .mask = 1
+};
+
+static const struct hantro_reg vp8_dec_skip_mode = {
+ .base = VDPU_REG_DEC_CTRL0,
+ .shift = 9,
+ .mask = 1
+};
+
+static const struct hantro_reg vp8_dec_start_dec = {
+ .base = VDPU_REG_EN_FLAGS,
+ .shift = 0,
+ .mask = 1
+};
+
+static void cfg_lf(struct hantro_ctx *ctx,
+ const struct v4l2_ctrl_vp8_frame *hdr)
+{
+ const struct v4l2_vp8_segment *seg = &hdr->segment;
+ const struct v4l2_vp8_loop_filter *lf = &hdr->lf;
+ struct hantro_dev *vpu = ctx->dev;
+ unsigned int i;
+ u32 reg;
+
+ if (!(seg->flags & V4L2_VP8_SEGMENT_FLAG_ENABLED)) {
+ hantro_reg_write(vpu, &vp8_dec_lf_level[0], lf->level);
+ } else if (seg->flags & V4L2_VP8_SEGMENT_FLAG_DELTA_VALUE_MODE) {
+ for (i = 0; i < 4; i++) {
+ u32 lf_level = clamp(lf->level + seg->lf_update[i],
+ 0, 63);
+
+ hantro_reg_write(vpu, &vp8_dec_lf_level[i], lf_level);
+ }
+ } else {
+ for (i = 0; i < 4; i++)
+ hantro_reg_write(vpu, &vp8_dec_lf_level[i],
+ seg->lf_update[i]);
+ }
+
+ reg = VDPU_REG_REF_PIC_FILT_SHARPNESS(lf->sharpness_level);
+ if (lf->flags & V4L2_VP8_LF_FILTER_TYPE_SIMPLE)
+ reg |= VDPU_REG_REF_PIC_FILT_TYPE_E;
+ vdpu_write_relaxed(vpu, reg, VDPU_REG_FILTER_MB_ADJ);
+
+ if (lf->flags & V4L2_VP8_LF_ADJ_ENABLE) {
+ for (i = 0; i < 4; i++) {
+ hantro_reg_write(vpu, &vp8_dec_mb_adj[i],
+ lf->mb_mode_delta[i]);
+ hantro_reg_write(vpu, &vp8_dec_ref_adj[i],
+ lf->ref_frm_delta[i]);
+ }
+ }
+}
+
+static void cfg_qp(struct hantro_ctx *ctx,
+ const struct v4l2_ctrl_vp8_frame *hdr)
+{
+ const struct v4l2_vp8_quantization *q = &hdr->quant;
+ const struct v4l2_vp8_segment *seg = &hdr->segment;
+ struct hantro_dev *vpu = ctx->dev;
+ unsigned int i;
+
+ if (!(seg->flags & V4L2_VP8_SEGMENT_FLAG_ENABLED)) {
+ hantro_reg_write(vpu, &vp8_dec_quant[0], q->y_ac_qi);
+ } else if (seg->flags & V4L2_VP8_SEGMENT_FLAG_DELTA_VALUE_MODE) {
+ for (i = 0; i < 4; i++) {
+ u32 quant = clamp(q->y_ac_qi + seg->quant_update[i],
+ 0, 127);
+
+ hantro_reg_write(vpu, &vp8_dec_quant[i], quant);
+ }
+ } else {
+ for (i = 0; i < 4; i++)
+ hantro_reg_write(vpu, &vp8_dec_quant[i],
+ seg->quant_update[i]);
+ }
+
+ hantro_reg_write(vpu, &vp8_dec_quant_delta[0], q->y_dc_delta);
+ hantro_reg_write(vpu, &vp8_dec_quant_delta[1], q->y2_dc_delta);
+ hantro_reg_write(vpu, &vp8_dec_quant_delta[2], q->y2_ac_delta);
+ hantro_reg_write(vpu, &vp8_dec_quant_delta[3], q->uv_dc_delta);
+ hantro_reg_write(vpu, &vp8_dec_quant_delta[4], q->uv_ac_delta);
+}
+
+static void cfg_parts(struct hantro_ctx *ctx,
+ const struct v4l2_ctrl_vp8_frame *hdr)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ struct vb2_v4l2_buffer *vb2_src;
+ u32 first_part_offset = V4L2_VP8_FRAME_IS_KEY_FRAME(hdr) ? 10 : 3;
+ u32 mb_size, mb_offset_bytes, mb_offset_bits, mb_start_bits;
+ u32 dct_size_part_size, dct_part_offset;
+ dma_addr_t src_dma;
+ u32 dct_part_total_len = 0;
+ u32 count = 0;
+ unsigned int i;
+
+ vb2_src = hantro_get_src_buf(ctx);
+ src_dma = vb2_dma_contig_plane_dma_addr(&vb2_src->vb2_buf, 0);
+
+ /*
+ * Calculate control partition mb data info
+ * @first_part_header_bits: bits offset of mb data from first
+ * part start pos
+ * @mb_offset_bits: bits offset of mb data from src_dma
+ * base addr
+ * @mb_offset_byte: bytes offset of mb data from src_dma
+ * base addr
+ * @mb_start_bits: bits offset of mb data from mb data
+ * 64bits alignment addr
+ */
+ mb_offset_bits = first_part_offset * 8 +
+ hdr->first_part_header_bits + 8;
+ mb_offset_bytes = mb_offset_bits / 8;
+ mb_start_bits = mb_offset_bits -
+ (mb_offset_bytes & (~DEC_8190_ALIGN_MASK)) * 8;
+ mb_size = hdr->first_part_size -
+ (mb_offset_bytes - first_part_offset) +
+ (mb_offset_bytes & DEC_8190_ALIGN_MASK);
+
+ /* Macroblock data aligned base addr */
+ vdpu_write_relaxed(vpu, (mb_offset_bytes & (~DEC_8190_ALIGN_MASK)) +
+ src_dma, VDPU_REG_VP8_ADDR_CTRL_PART);
+ hantro_reg_write(vpu, &vp8_dec_mb_start_bit, mb_start_bits);
+ hantro_reg_write(vpu, &vp8_dec_mb_aligned_data_len, mb_size);
+
+ /*
+ * Calculate DCT partition info
+ * @dct_size_part_size: Containing sizes of DCT part, every DCT part
+ * has 3 bytes to store its size, except the last
+ * DCT part
+ * @dct_part_offset: bytes offset of DCT parts from src_dma base addr
+ * @dct_part_total_len: total size of all DCT parts
+ */
+ dct_size_part_size = (hdr->num_dct_parts - 1) * 3;
+ dct_part_offset = first_part_offset + hdr->first_part_size;
+ for (i = 0; i < hdr->num_dct_parts; i++)
+ dct_part_total_len += hdr->dct_part_sizes[i];
+ dct_part_total_len += dct_size_part_size;
+ dct_part_total_len += (dct_part_offset & DEC_8190_ALIGN_MASK);
+
+ /* Number of DCT partitions */
+ hantro_reg_write(vpu, &vp8_dec_num_dct_partitions,
+ hdr->num_dct_parts - 1);
+
+ /* DCT partition length */
+ hantro_reg_write(vpu, &vp8_dec_stream_len, dct_part_total_len);
+
+ /* DCT partitions base address */
+ for (i = 0; i < hdr->num_dct_parts; i++) {
+ u32 byte_offset = dct_part_offset + dct_size_part_size + count;
+ u32 base_addr = byte_offset + src_dma;
+
+ hantro_reg_write(vpu, &vp8_dec_dct_base[i],
+ base_addr & (~DEC_8190_ALIGN_MASK));
+
+ hantro_reg_write(vpu, &vp8_dec_dct_start_bits[i],
+ (byte_offset & DEC_8190_ALIGN_MASK) * 8);
+
+ count += hdr->dct_part_sizes[i];
+ }
+}
+
+/*
+ * prediction filter taps
+ * normal 6-tap filters
+ */
+static void cfg_tap(struct hantro_ctx *ctx,
+ const struct v4l2_ctrl_vp8_frame *hdr)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ int i, j;
+
+ if ((hdr->version & 0x03) != 0)
+ return; /* Tap filter not used. */
+
+ for (i = 0; i < 8; i++) {
+ for (j = 0; j < 6; j++) {
+ if (vp8_dec_pred_bc_tap[i][j].base != 0)
+ hantro_reg_write(vpu,
+ &vp8_dec_pred_bc_tap[i][j],
+ hantro_vp8_dec_mc_filter[i][j]);
+ }
+ }
+}
+
+static void cfg_ref(struct hantro_ctx *ctx,
+ const struct v4l2_ctrl_vp8_frame *hdr,
+ struct vb2_v4l2_buffer *vb2_dst)
+{
+ struct hantro_dev *vpu = ctx->dev;
+ dma_addr_t ref;
+
+ ref = hantro_get_ref(ctx, hdr->last_frame_ts);
+ if (!ref) {
+ vpu_debug(0, "failed to find last frame ts=%llu\n",
+ hdr->last_frame_ts);
+ ref = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
+ }
+ vdpu_write_relaxed(vpu, ref, VDPU_REG_VP8_ADDR_REF0);
+
+ ref = hantro_get_ref(ctx, hdr->golden_frame_ts);
+ if (!ref && hdr->golden_frame_ts)
+ vpu_debug(0, "failed to find golden frame ts=%llu\n",
+ hdr->golden_frame_ts);
+ if (!ref)
+ ref = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
+ if (hdr->flags & V4L2_VP8_FRAME_FLAG_SIGN_BIAS_GOLDEN)
+ ref |= VDPU_REG_VP8_GREF_SIGN_BIAS;
+ vdpu_write_relaxed(vpu, ref, VDPU_REG_VP8_ADDR_REF2_5(2));
+
+ ref = hantro_get_ref(ctx, hdr->alt_frame_ts);
+ if (!ref && hdr->alt_frame_ts)
+ vpu_debug(0, "failed to find alt frame ts=%llu\n",
+ hdr->alt_frame_ts);
+ if (!ref)
+ ref = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
+ if (hdr->flags & V4L2_VP8_FRAME_FLAG_SIGN_BIAS_ALT)
+ ref |= VDPU_REG_VP8_AREF_SIGN_BIAS;
+ vdpu_write_relaxed(vpu, ref, VDPU_REG_VP8_ADDR_REF2_5(3));
+}
+
+static void cfg_buffers(struct hantro_ctx *ctx,
+ const struct v4l2_ctrl_vp8_frame *hdr,
+ struct vb2_v4l2_buffer *vb2_dst)
+{
+ const struct v4l2_vp8_segment *seg = &hdr->segment;
+ struct hantro_dev *vpu = ctx->dev;
+ dma_addr_t dst_dma;
+ u32 reg;
+
+ /* Set probability table buffer address */
+ vdpu_write_relaxed(vpu, ctx->vp8_dec.prob_tbl.dma,
+ VDPU_REG_ADDR_QTABLE);
+
+ /* Set segment map address */
+ reg = VDPU_REG_FWD_PIC1_SEGMENT_BASE(ctx->vp8_dec.segment_map.dma);
+ if (seg->flags & V4L2_VP8_SEGMENT_FLAG_ENABLED) {
+ reg |= VDPU_REG_FWD_PIC1_SEGMENT_E;
+ if (seg->flags & V4L2_VP8_SEGMENT_FLAG_UPDATE_MAP)
+ reg |= VDPU_REG_FWD_PIC1_SEGMENT_UPD_E;
+ }
+ vdpu_write_relaxed(vpu, reg, VDPU_REG_VP8_SEGMENT_VAL);
+
+ /* set output frame buffer address */
+ dst_dma = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
+ vdpu_write_relaxed(vpu, dst_dma, VDPU_REG_ADDR_DST);
+}
+
+int rockchip_vpu2_vp8_dec_run(struct hantro_ctx *ctx)
+{
+ const struct v4l2_ctrl_vp8_frame *hdr;
+ struct hantro_dev *vpu = ctx->dev;
+ struct vb2_v4l2_buffer *vb2_dst;
+ size_t height = ctx->dst_fmt.height;
+ size_t width = ctx->dst_fmt.width;
+ u32 mb_width, mb_height;
+ u32 reg;
+
+ hantro_start_prepare_run(ctx);
+
+ hdr = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_VP8_FRAME);
+ if (WARN_ON(!hdr))
+ return -EINVAL;
+
+ /* Reset segment_map buffer in keyframe */
+ if (V4L2_VP8_FRAME_IS_KEY_FRAME(hdr) && ctx->vp8_dec.segment_map.cpu)
+ memset(ctx->vp8_dec.segment_map.cpu, 0,
+ ctx->vp8_dec.segment_map.size);
+
+ hantro_vp8_prob_update(ctx, hdr);
+
+ /*
+ * Extensive testing shows that the hardware does not properly
+ * clear the internal state from previous a decoding run. This
+ * causes corruption in decoded frames for multi-instance use cases.
+ * A soft reset before programming the registers has been found
+ * to resolve those problems.
+ */
+ ctx->codec_ops->reset(ctx);
+
+ reg = VDPU_REG_CONFIG_DEC_TIMEOUT_E
+ | VDPU_REG_CONFIG_DEC_CLK_GATE_E;
+ if (!V4L2_VP8_FRAME_IS_KEY_FRAME(hdr))
+ reg |= VDPU_REG_DEC_CTRL0_PIC_INTER_E;
+ vdpu_write_relaxed(vpu, reg, VDPU_REG_EN_FLAGS);
+
+ reg = VDPU_REG_CONFIG_DEC_STRENDIAN_E
+ | VDPU_REG_CONFIG_DEC_INSWAP32_E
+ | VDPU_REG_CONFIG_DEC_STRSWAP32_E
+ | VDPU_REG_CONFIG_DEC_OUTSWAP32_E
+ | VDPU_REG_CONFIG_DEC_IN_ENDIAN
+ | VDPU_REG_CONFIG_DEC_OUT_ENDIAN;
+ vdpu_write_relaxed(vpu, reg, VDPU_REG_DATA_ENDIAN);
+
+ reg = VDPU_REG_CONFIG_DEC_MAX_BURST(16);
+ vdpu_write_relaxed(vpu, reg, VDPU_REG_AXI_CTRL);
+
+ reg = VDPU_REG_DEC_CTRL0_DEC_MODE(10);
+ vdpu_write_relaxed(vpu, reg, VDPU_REG_DEC_FORMAT);
+
+ if (!(hdr->flags & V4L2_VP8_FRAME_FLAG_MB_NO_SKIP_COEFF))
+ hantro_reg_write(vpu, &vp8_dec_skip_mode, 1);
+ if (hdr->lf.level == 0)
+ hantro_reg_write(vpu, &vp8_dec_filter_disable, 1);
+
+ /* Frame dimensions */
+ mb_width = MB_WIDTH(width);
+ mb_height = MB_HEIGHT(height);
+
+ hantro_reg_write(vpu, &vp8_dec_mb_width, mb_width);
+ hantro_reg_write(vpu, &vp8_dec_mb_height, mb_height);
+ hantro_reg_write(vpu, &vp8_dec_mb_width_ext, mb_width >> 9);
+ hantro_reg_write(vpu, &vp8_dec_mb_height_ext, mb_height >> 8);
+
+ /* Boolean decoder */
+ hantro_reg_write(vpu, &vp8_dec_bool_range, hdr->coder_state.range);
+ hantro_reg_write(vpu, &vp8_dec_bool_value, hdr->coder_state.value);
+
+ reg = vdpu_read(vpu, VDPU_REG_VP8_DCT_START_BIT);
+ if (hdr->version != 3)
+ reg |= VDPU_REG_DEC_CTRL4_VC1_HEIGHT_EXT;
+ if (hdr->version & 0x3)
+ reg |= VDPU_REG_DEC_CTRL4_BILIN_MC_E;
+ vdpu_write_relaxed(vpu, reg, VDPU_REG_VP8_DCT_START_BIT);
+
+ cfg_lf(ctx, hdr);
+ cfg_qp(ctx, hdr);
+ cfg_parts(ctx, hdr);
+ cfg_tap(ctx, hdr);
+
+ vb2_dst = hantro_get_dst_buf(ctx);
+ cfg_ref(ctx, hdr, vb2_dst);
+ cfg_buffers(ctx, hdr, vb2_dst);
+
+ hantro_end_prepare_run(ctx);
+
+ hantro_reg_write(vpu, &vp8_dec_start_dec, 1);
+
+ return 0;
+}
diff --git a/drivers/media/platform/verisilicon/rockchip_vpu2_regs.h b/drivers/media/platform/verisilicon/rockchip_vpu2_regs.h
new file mode 100644
index 000000000..49e408895
--- /dev/null
+++ b/drivers/media/platform/verisilicon/rockchip_vpu2_regs.h
@@ -0,0 +1,600 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Hantro VPU codec driver
+ *
+ * Copyright (C) 2018 Rockchip Electronics Co., Ltd.
+ * Alpha Lin <alpha.lin@rock-chips.com>
+ */
+
+#ifndef ROCKCHIP_VPU2_REGS_H_
+#define ROCKCHIP_VPU2_REGS_H_
+
+/* Encoder registers. */
+#define VEPU_REG_VP8_QUT_1ST(i) (0x000 + ((i) * 0x24))
+#define VEPU_REG_VP8_QUT_DC_Y2(x) (((x) & 0x3fff) << 16)
+#define VEPU_REG_VP8_QUT_DC_Y1(x) (((x) & 0x3fff) << 0)
+#define VEPU_REG_VP8_QUT_2ND(i) (0x004 + ((i) * 0x24))
+#define VEPU_REG_VP8_QUT_AC_Y1(x) (((x) & 0x3fff) << 16)
+#define VEPU_REG_VP8_QUT_DC_CHR(x) (((x) & 0x3fff) << 0)
+#define VEPU_REG_VP8_QUT_3RD(i) (0x008 + ((i) * 0x24))
+#define VEPU_REG_VP8_QUT_AC_CHR(x) (((x) & 0x3fff) << 16)
+#define VEPU_REG_VP8_QUT_AC_Y2(x) (((x) & 0x3fff) << 0)
+#define VEPU_REG_VP8_QUT_4TH(i) (0x00c + ((i) * 0x24))
+#define VEPU_REG_VP8_QUT_ZB_DC_CHR(x) (((x) & 0x1ff) << 18)
+#define VEPU_REG_VP8_QUT_ZB_DC_Y2(x) (((x) & 0x1ff) << 9)
+#define VEPU_REG_VP8_QUT_ZB_DC_Y1(x) (((x) & 0x1ff) << 0)
+#define VEPU_REG_VP8_QUT_5TH(i) (0x010 + ((i) * 0x24))
+#define VEPU_REG_VP8_QUT_ZB_AC_CHR(x) (((x) & 0x1ff) << 18)
+#define VEPU_REG_VP8_QUT_ZB_AC_Y2(x) (((x) & 0x1ff) << 9)
+#define VEPU_REG_VP8_QUT_ZB_AC_Y1(x) (((x) & 0x1ff) << 0)
+#define VEPU_REG_VP8_QUT_6TH(i) (0x014 + ((i) * 0x24))
+#define VEPU_REG_VP8_QUT_RND_DC_CHR(x) (((x) & 0xff) << 16)
+#define VEPU_REG_VP8_QUT_RND_DC_Y2(x) (((x) & 0xff) << 8)
+#define VEPU_REG_VP8_QUT_RND_DC_Y1(x) (((x) & 0xff) << 0)
+#define VEPU_REG_VP8_QUT_7TH(i) (0x018 + ((i) * 0x24))
+#define VEPU_REG_VP8_QUT_RND_AC_CHR(x) (((x) & 0xff) << 16)
+#define VEPU_REG_VP8_QUT_RND_AC_Y2(x) (((x) & 0xff) << 8)
+#define VEPU_REG_VP8_QUT_RND_AC_Y1(x) (((x) & 0xff) << 0)
+#define VEPU_REG_VP8_QUT_8TH(i) (0x01c + ((i) * 0x24))
+#define VEPU_REG_VP8_SEG_FILTER_LEVEL(x) (((x) & 0x3f) << 25)
+#define VEPU_REG_VP8_DEQUT_DC_CHR(x) (((x) & 0xff) << 17)
+#define VEPU_REG_VP8_DEQUT_DC_Y2(x) (((x) & 0x1ff) << 8)
+#define VEPU_REG_VP8_DEQUT_DC_Y1(x) (((x) & 0xff) << 0)
+#define VEPU_REG_VP8_QUT_9TH(i) (0x020 + ((i) * 0x24))
+#define VEPU_REG_VP8_DEQUT_AC_CHR(x) (((x) & 0x1ff) << 18)
+#define VEPU_REG_VP8_DEQUT_AC_Y2(x) (((x) & 0x1ff) << 9)
+#define VEPU_REG_VP8_DEQUT_AC_Y1(x) (((x) & 0x1ff) << 0)
+#define VEPU_REG_ADDR_VP8_SEG_MAP 0x06c
+#define VEPU_REG_VP8_INTRA_4X4_PENALTY(i) (0x070 + ((i) * 0x4))
+#define VEPU_REG_VP8_INTRA_4X4_PENALTY_0(x) (((x) & 0xfff) << 0)
+#define VEPU_REG_VP8_INTRA_4x4_PENALTY_1(x) (((x) & 0xfff) << 16)
+#define VEPU_REG_VP8_INTRA_16X16_PENALTY(i) (0x084 + ((i) * 0x4))
+#define VEPU_REG_VP8_INTRA_16X16_PENALTY_0(x) (((x) & 0xfff) << 0)
+#define VEPU_REG_VP8_INTRA_16X16_PENALTY_1(x) (((x) & 0xfff) << 16)
+#define VEPU_REG_VP8_CONTROL 0x0a0
+#define VEPU_REG_VP8_LF_MODE_DELTA_BPRED(x) (((x) & 0x1f) << 24)
+#define VEPU_REG_VP8_LF_REF_DELTA_INTRA_MB(x) (((x) & 0x7f) << 16)
+#define VEPU_REG_VP8_INTER_TYPE_BIT_COST(x) (((x) & 0xfff) << 0)
+#define VEPU_REG_VP8_REF_FRAME_VAL 0x0a4
+#define VEPU_REG_VP8_COEF_DMV_PENALTY(x) (((x) & 0xfff) << 16)
+#define VEPU_REG_VP8_REF_FRAME(x) (((x) & 0xfff) << 0)
+#define VEPU_REG_VP8_LOOP_FILTER_REF_DELTA 0x0a8
+#define VEPU_REG_VP8_LF_REF_DELTA_ALT_REF(x) (((x) & 0x7f) << 16)
+#define VEPU_REG_VP8_LF_REF_DELTA_LAST_REF(x) (((x) & 0x7f) << 8)
+#define VEPU_REG_VP8_LF_REF_DELTA_GOLDEN(x) (((x) & 0x7f) << 0)
+#define VEPU_REG_VP8_LOOP_FILTER_MODE_DELTA 0x0ac
+#define VEPU_REG_VP8_LF_MODE_DELTA_SPLITMV(x) (((x) & 0x7f) << 16)
+#define VEPU_REG_VP8_LF_MODE_DELTA_ZEROMV(x) (((x) & 0x7f) << 8)
+#define VEPU_REG_VP8_LF_MODE_DELTA_NEWMV(x) (((x) & 0x7f) << 0)
+#define VEPU_REG_JPEG_LUMA_QUAT(i) (0x000 + ((i) * 0x4))
+#define VEPU_REG_JPEG_CHROMA_QUAT(i) (0x040 + ((i) * 0x4))
+#define VEPU_REG_INTRA_SLICE_BITMAP(i) (0x0b0 + ((i) * 0x4))
+#define VEPU_REG_ADDR_VP8_DCT_PART(i) (0x0b0 + ((i) * 0x4))
+#define VEPU_REG_INTRA_AREA_CTRL 0x0b8
+#define VEPU_REG_INTRA_AREA_TOP(x) (((x) & 0xff) << 24)
+#define VEPU_REG_INTRA_AREA_BOTTOM(x) (((x) & 0xff) << 16)
+#define VEPU_REG_INTRA_AREA_LEFT(x) (((x) & 0xff) << 8)
+#define VEPU_REG_INTRA_AREA_RIGHT(x) (((x) & 0xff) << 0)
+#define VEPU_REG_CIR_INTRA_CTRL 0x0bc
+#define VEPU_REG_CIR_INTRA_FIRST_MB(x) (((x) & 0xffff) << 16)
+#define VEPU_REG_CIR_INTRA_INTERVAL(x) (((x) & 0xffff) << 0)
+#define VEPU_REG_ADDR_IN_PLANE_0 0x0c0
+#define VEPU_REG_ADDR_IN_PLANE_1 0x0c4
+#define VEPU_REG_ADDR_IN_PLANE_2 0x0c8
+#define VEPU_REG_STR_HDR_REM_MSB 0x0cc
+#define VEPU_REG_STR_HDR_REM_LSB 0x0d0
+#define VEPU_REG_STR_BUF_LIMIT 0x0d4
+#define VEPU_REG_AXI_CTRL 0x0d8
+#define VEPU_REG_AXI_CTRL_READ_ID(x) (((x) & 0xff) << 24)
+#define VEPU_REG_AXI_CTRL_WRITE_ID(x) (((x) & 0xff) << 16)
+#define VEPU_REG_AXI_CTRL_BURST_LEN(x) (((x) & 0x3f) << 8)
+#define VEPU_REG_AXI_CTRL_INCREMENT_MODE(x) (((x) & 0x01) << 2)
+#define VEPU_REG_AXI_CTRL_BIRST_DISCARD(x) (((x) & 0x01) << 1)
+#define VEPU_REG_AXI_CTRL_BIRST_DISABLE BIT(0)
+#define VEPU_QP_ADJUST_MAD_DELTA_ROI 0x0dc
+#define VEPU_REG_ROI_QP_DELTA_1 (((x) & 0xf) << 12)
+#define VEPU_REG_ROI_QP_DELTA_2 (((x) & 0xf) << 8)
+#define VEPU_REG_MAD_QP_ADJUSTMENT (((x) & 0xf) << 0)
+#define VEPU_REG_ADDR_REF_LUMA 0x0e0
+#define VEPU_REG_ADDR_REF_CHROMA 0x0e4
+#define VEPU_REG_QP_SUM_DIV2 0x0e8
+#define VEPU_REG_QP_SUM(x) (((x) & 0x001fffff) * 2)
+#define VEPU_REG_ENC_CTRL0 0x0ec
+#define VEPU_REG_DISABLE_QUARTER_PIXEL_MV BIT(28)
+#define VEPU_REG_DEBLOCKING_FILTER_MODE(x) (((x) & 0x3) << 24)
+#define VEPU_REG_CABAC_INIT_IDC(x) (((x) & 0x3) << 21)
+#define VEPU_REG_ENTROPY_CODING_MODE BIT(20)
+#define VEPU_REG_H264_TRANS8X8_MODE BIT(17)
+#define VEPU_REG_H264_INTER4X4_MODE BIT(16)
+#define VEPU_REG_H264_STREAM_MODE BIT(15)
+#define VEPU_REG_H264_SLICE_SIZE(x) (((x) & 0x7f) << 8)
+#define VEPU_REG_ENC_OVER_FILL_STRM_OFFSET 0x0f0
+#define VEPU_REG_STREAM_START_OFFSET(x) (((x) & 0x3f) << 16)
+#define VEPU_REG_SKIP_MACROBLOCK_PENALTY(x) (((x) & 0xff) << 8)
+#define VEPU_REG_IN_IMG_CTRL_OVRFLR_D4(x) (((x) & 0x3) << 4)
+#define VEPU_REG_IN_IMG_CTRL_OVRFLB(x) (((x) & 0xf) << 0)
+#define VEPU_REG_INPUT_LUMA_INFO 0x0f4
+#define VEPU_REG_IN_IMG_CHROMA_OFFSET(x) (((x) & 0x7) << 20)
+#define VEPU_REG_IN_IMG_LUMA_OFFSET(x) (((x) & 0x7) << 16)
+#define VEPU_REG_IN_IMG_CTRL_ROW_LEN(x) (((x) & 0x3fff) << 0)
+#define VEPU_REG_RLC_SUM 0x0f8
+#define VEPU_REG_RLC_SUM_OUT(x) (((x) & 0x007fffff) * 4)
+#define VEPU_REG_SPLIT_PENALTY_4X4 0x0f8
+#define VEPU_REG_VP8_SPLIT_PENALTY_4X4 (((x) & 0x1ff) << 19)
+#define VEPU_REG_ADDR_REC_LUMA 0x0fc
+#define VEPU_REG_ADDR_REC_CHROMA 0x100
+#define VEPU_REG_CHECKPOINT(i) (0x104 + ((i) * 0x4))
+#define VEPU_REG_CHECKPOINT_CHECK0(x) (((x) & 0xffff))
+#define VEPU_REG_CHECKPOINT_CHECK1(x) (((x) & 0xffff) << 16)
+#define VEPU_REG_CHECKPOINT_RESULT(x) \
+ ((((x) >> (16 - 16 * ((i) & 1))) & 0xffff) * 32)
+#define VEPU_REG_VP8_SEG0_QUANT_AC_Y1 0x104
+#define VEPU_REG_VP8_SEG0_RND_AC_Y1(x) (((x) & 0xff) << 23)
+#define VEPU_REG_VP8_SEG0_ZBIN_AC_Y1(x) (((x) & 0x1ff) << 14)
+#define VEPU_REG_VP8_SEG0_QUT_AC_Y1(x) (((x) & 0x3fff) << 0)
+#define VEPU_REG_VP8_SEG0_QUANT_DC_Y2 0x108
+#define VEPU_REG_VP8_SEG0_RND_DC_Y2(x) (((x) & 0xff) << 23)
+#define VEPU_REG_VP8_SEG0_ZBIN_DC_Y2(x) (((x) & 0x1ff) << 14)
+#define VEPU_REG_VP8_SEG0_QUT_DC_Y2(x) (((x) & 0x3fff) << 0)
+#define VEPU_REG_VP8_SEG0_QUANT_AC_Y2 0x10c
+#define VEPU_REG_VP8_SEG0_RND_AC_Y2(x) (((x) & 0xff) << 23)
+#define VEPU_REG_VP8_SEG0_ZBIN_AC_Y2(x) (((x) & 0x1ff) << 14)
+#define VEPU_REG_VP8_SEG0_QUT_AC_Y2(x) (((x) & 0x3fff) << 0)
+#define VEPU_REG_VP8_SEG0_QUANT_DC_CHR 0x110
+#define VEPU_REG_VP8_SEG0_RND_DC_CHR(x) (((x) & 0xff) << 23)
+#define VEPU_REG_VP8_SEG0_ZBIN_DC_CHR(x) (((x) & 0x1ff) << 14)
+#define VEPU_REG_VP8_SEG0_QUT_DC_CHR(x) (((x) & 0x3fff) << 0)
+#define VEPU_REG_VP8_SEG0_QUANT_AC_CHR 0x114
+#define VEPU_REG_VP8_SEG0_RND_AC_CHR(x) (((x) & 0xff) << 23)
+#define VEPU_REG_VP8_SEG0_ZBIN_AC_CHR(x) (((x) & 0x1ff) << 14)
+#define VEPU_REG_VP8_SEG0_QUT_AC_CHR(x) (((x) & 0x3fff) << 0)
+#define VEPU_REG_VP8_SEG0_QUANT_DQUT 0x118
+#define VEPU_REG_VP8_MV_REF_IDX1(x) (((x) & 0x03) << 26)
+#define VEPU_REG_VP8_SEG0_DQUT_DC_Y2(x) (((x) & 0x1ff) << 17)
+#define VEPU_REG_VP8_SEG0_DQUT_AC_Y1(x) (((x) & 0x1ff) << 8)
+#define VEPU_REG_VP8_SEG0_DQUT_DC_Y1(x) (((x) & 0xff) << 0)
+#define VEPU_REG_CHKPT_WORD_ERR(i) (0x118 + ((i) * 0x4))
+#define VEPU_REG_CHKPT_WORD_ERR_CHK0(x) (((x) & 0xffff))
+#define VEPU_REG_CHKPT_WORD_ERR_CHK1(x) (((x) & 0xffff) << 16)
+#define VEPU_REG_VP8_SEG0_QUANT_DQUT_1 0x11c
+#define VEPU_REG_VP8_SEGMENT_MAP_UPDATE BIT(30)
+#define VEPU_REG_VP8_SEGMENT_EN BIT(29)
+#define VEPU_REG_VP8_MV_REF_IDX2_EN BIT(28)
+#define VEPU_REG_VP8_MV_REF_IDX2(x) (((x) & 0x03) << 26)
+#define VEPU_REG_VP8_SEG0_DQUT_AC_CHR(x) (((x) & 0x1ff) << 17)
+#define VEPU_REG_VP8_SEG0_DQUT_DC_CHR(x) (((x) & 0xff) << 9)
+#define VEPU_REG_VP8_SEG0_DQUT_AC_Y2(x) (((x) & 0x1ff) << 0)
+#define VEPU_REG_VP8_BOOL_ENC_VALUE 0x120
+#define VEPU_REG_CHKPT_DELTA_QP 0x124
+#define VEPU_REG_CHKPT_DELTA_QP_CHK0(x) (((x) & 0x0f) << 0)
+#define VEPU_REG_CHKPT_DELTA_QP_CHK1(x) (((x) & 0x0f) << 4)
+#define VEPU_REG_CHKPT_DELTA_QP_CHK2(x) (((x) & 0x0f) << 8)
+#define VEPU_REG_CHKPT_DELTA_QP_CHK3(x) (((x) & 0x0f) << 12)
+#define VEPU_REG_CHKPT_DELTA_QP_CHK4(x) (((x) & 0x0f) << 16)
+#define VEPU_REG_CHKPT_DELTA_QP_CHK5(x) (((x) & 0x0f) << 20)
+#define VEPU_REG_CHKPT_DELTA_QP_CHK6(x) (((x) & 0x0f) << 24)
+#define VEPU_REG_VP8_ENC_CTRL2 0x124
+#define VEPU_REG_VP8_ZERO_MV_PENALTY_FOR_REF2(x) (((x) & 0xff) << 24)
+#define VEPU_REG_VP8_FILTER_SHARPNESS(x) (((x) & 0x07) << 21)
+#define VEPU_REG_VP8_FILTER_LEVEL(x) (((x) & 0x3f) << 15)
+#define VEPU_REG_VP8_DCT_PARTITION_CNT(x) (((x) & 0x03) << 13)
+#define VEPU_REG_VP8_BOOL_ENC_VALUE_BITS(x) (((x) & 0x1f) << 8)
+#define VEPU_REG_VP8_BOOL_ENC_RANGE(x) (((x) & 0xff) << 0)
+#define VEPU_REG_ENC_CTRL1 0x128
+#define VEPU_REG_MAD_THRESHOLD(x) (((x) & 0x3f) << 24)
+#define VEPU_REG_COMPLETED_SLICES(x) (((x) & 0xff) << 16)
+#define VEPU_REG_IN_IMG_CTRL_FMT(x) (((x) & 0xf) << 4)
+#define VEPU_REG_IN_IMG_ROTATE_MODE(x) (((x) & 0x3) << 2)
+#define VEPU_REG_SIZE_TABLE_PRESENT BIT(0)
+#define VEPU_REG_INTRA_INTER_MODE 0x12c
+#define VEPU_REG_INTRA16X16_MODE(x) (((x) & 0xffff) << 16)
+#define VEPU_REG_INTER_MODE(x) (((x) & 0xffff) << 0)
+#define VEPU_REG_ENC_CTRL2 0x130
+#define VEPU_REG_PPS_INIT_QP(x) (((x) & 0x3f) << 26)
+#define VEPU_REG_SLICE_FILTER_ALPHA(x) (((x) & 0xf) << 22)
+#define VEPU_REG_SLICE_FILTER_BETA(x) (((x) & 0xf) << 18)
+#define VEPU_REG_CHROMA_QP_OFFSET(x) (((x) & 0x1f) << 13)
+#define VEPU_REG_FILTER_DISABLE BIT(5)
+#define VEPU_REG_IDR_PIC_ID(x) (((x) & 0xf) << 1)
+#define VEPU_REG_CONSTRAINED_INTRA_PREDICTION BIT(0)
+#define VEPU_REG_ADDR_OUTPUT_STREAM 0x134
+#define VEPU_REG_ADDR_OUTPUT_CTRL 0x138
+#define VEPU_REG_ADDR_NEXT_PIC 0x13c
+#define VEPU_REG_ADDR_MV_OUT 0x140
+#define VEPU_REG_ADDR_CABAC_TBL 0x144
+#define VEPU_REG_ROI1 0x148
+#define VEPU_REG_ROI1_TOP_MB(x) (((x) & 0xff) << 24)
+#define VEPU_REG_ROI1_BOTTOM_MB(x) (((x) & 0xff) << 16)
+#define VEPU_REG_ROI1_LEFT_MB(x) (((x) & 0xff) << 8)
+#define VEPU_REG_ROI1_RIGHT_MB(x) (((x) & 0xff) << 0)
+#define VEPU_REG_ROI2 0x14c
+#define VEPU_REG_ROI2_TOP_MB(x) (((x) & 0xff) << 24)
+#define VEPU_REG_ROI2_BOTTOM_MB(x) (((x) & 0xff) << 16)
+#define VEPU_REG_ROI2_LEFT_MB(x) (((x) & 0xff) << 8)
+#define VEPU_REG_ROI2_RIGHT_MB(x) (((x) & 0xff) << 0)
+#define VEPU_REG_STABLE_MATRIX(i) (0x150 + ((i) * 0x4))
+#define VEPU_REG_STABLE_MOTION_SUM 0x174
+#define VEPU_REG_STABILIZATION_OUTPUT 0x178
+#define VEPU_REG_STABLE_MIN_VALUE(x) (((x) & 0xffffff) << 8)
+#define VEPU_REG_STABLE_MODE_SEL(x) (((x) & 0x3) << 6)
+#define VEPU_REG_STABLE_HOR_GMV(x) (((x) & 0x3f) << 0)
+#define VEPU_REG_RGB2YUV_CONVERSION_COEF1 0x17c
+#define VEPU_REG_RGB2YUV_CONVERSION_COEFB(x) (((x) & 0xffff) << 16)
+#define VEPU_REG_RGB2YUV_CONVERSION_COEFA(x) (((x) & 0xffff) << 0)
+#define VEPU_REG_RGB2YUV_CONVERSION_COEF2 0x180
+#define VEPU_REG_RGB2YUV_CONVERSION_COEFE(x) (((x) & 0xffff) << 16)
+#define VEPU_REG_RGB2YUV_CONVERSION_COEFC(x) (((x) & 0xffff) << 0)
+#define VEPU_REG_RGB2YUV_CONVERSION_COEF3 0x184
+#define VEPU_REG_RGB2YUV_CONVERSION_COEFF(x) (((x) & 0xffff) << 0)
+#define VEPU_REG_RGB_MASK_MSB 0x188
+#define VEPU_REG_RGB_MASK_B_MSB(x) (((x) & 0x1f) << 16)
+#define VEPU_REG_RGB_MASK_G_MSB(x) (((x) & 0x1f) << 8)
+#define VEPU_REG_RGB_MASK_R_MSB(x) (((x) & 0x1f) << 0)
+#define VEPU_REG_MV_PENALTY 0x18c
+#define VEPU_REG_1MV_PENALTY(x) (((x) & 0x3ff) << 21)
+#define VEPU_REG_QMV_PENALTY(x) (((x) & 0x3ff) << 11)
+#define VEPU_REG_4MV_PENALTY(x) (((x) & 0x3ff) << 1)
+#define VEPU_REG_SPLIT_MV_MODE_EN BIT(0)
+#define VEPU_REG_QP_VAL 0x190
+#define VEPU_REG_H264_LUMA_INIT_QP(x) (((x) & 0x3f) << 26)
+#define VEPU_REG_H264_QP_MAX(x) (((x) & 0x3f) << 20)
+#define VEPU_REG_H264_QP_MIN(x) (((x) & 0x3f) << 14)
+#define VEPU_REG_H264_CHKPT_DISTANCE(x) (((x) & 0xfff) << 0)
+#define VEPU_REG_VP8_SEG0_QUANT_DC_Y1 0x190
+#define VEPU_REG_VP8_SEG0_RND_DC_Y1(x) (((x) & 0xff) << 23)
+#define VEPU_REG_VP8_SEG0_ZBIN_DC_Y1(x) (((x) & 0x1ff) << 14)
+#define VEPU_REG_VP8_SEG0_QUT_DC_Y1(x) (((x) & 0x3fff) << 0)
+#define VEPU_REG_MVC_RELATE 0x198
+#define VEPU_REG_ZERO_MV_FAVOR_D2(x) (((x) & 0xf) << 20)
+#define VEPU_REG_PENALTY_4X4MV(x) (((x) & 0x1ff) << 11)
+#define VEPU_REG_MVC_VIEW_ID(x) (((x) & 0x7) << 8)
+#define VEPU_REG_MVC_ANCHOR_PIC_FLAG BIT(7)
+#define VEPU_REG_MVC_PRIORITY_ID(x) (((x) & 0x7) << 4)
+#define VEPU_REG_MVC_TEMPORAL_ID(x) (((x) & 0x7) << 1)
+#define VEPU_REG_MVC_INTER_VIEW_FLAG BIT(0)
+#define VEPU_REG_ENCODE_START 0x19c
+#define VEPU_REG_MB_HEIGHT(x) (((x) & 0x1ff) << 20)
+#define VEPU_REG_MB_WIDTH(x) (((x) & 0x1ff) << 8)
+#define VEPU_REG_FRAME_TYPE_INTER (0x0 << 6)
+#define VEPU_REG_FRAME_TYPE_INTRA (0x1 << 6)
+#define VEPU_REG_FRAME_TYPE_MVCINTER (0x2 << 6)
+#define VEPU_REG_ENCODE_FORMAT_JPEG (0x2 << 4)
+#define VEPU_REG_ENCODE_FORMAT_H264 (0x3 << 4)
+#define VEPU_REG_ENCODE_ENABLE BIT(0)
+#define VEPU_REG_MB_CTRL 0x1a0
+#define VEPU_REG_MB_CNT_OUT(x) (((x) & 0xffff) << 16)
+#define VEPU_REG_MB_CNT_SET(x) (((x) & 0xffff) << 0)
+#define VEPU_REG_DATA_ENDIAN 0x1a4
+#define VEPU_REG_INPUT_SWAP8 BIT(31)
+#define VEPU_REG_INPUT_SWAP16 BIT(30)
+#define VEPU_REG_INPUT_SWAP32 BIT(29)
+#define VEPU_REG_OUTPUT_SWAP8 BIT(28)
+#define VEPU_REG_OUTPUT_SWAP16 BIT(27)
+#define VEPU_REG_OUTPUT_SWAP32 BIT(26)
+#define VEPU_REG_TEST_IRQ BIT(24)
+#define VEPU_REG_TEST_COUNTER(x) (((x) & 0xf) << 20)
+#define VEPU_REG_TEST_REG BIT(19)
+#define VEPU_REG_TEST_MEMORY BIT(18)
+#define VEPU_REG_TEST_LEN(x) (((x) & 0x3ffff) << 0)
+#define VEPU_REG_ENC_CTRL3 0x1a8
+#define VEPU_REG_PPS_ID(x) (((x) & 0xff) << 24)
+#define VEPU_REG_INTRA_PRED_MODE(x) (((x) & 0xff) << 16)
+#define VEPU_REG_FRAME_NUM(x) (((x) & 0xffff) << 0)
+#define VEPU_REG_ENC_CTRL4 0x1ac
+#define VEPU_REG_MV_PENALTY_16X8_8X16(x) (((x) & 0x3ff) << 20)
+#define VEPU_REG_MV_PENALTY_8X8(x) (((x) & 0x3ff) << 10)
+#define VEPU_REG_MV_PENALTY_8X4_4X8(x) (((x) & 0x3ff) << 0)
+#define VEPU_REG_ADDR_VP8_PROB_CNT 0x1b0
+#define VEPU_REG_INTERRUPT 0x1b4
+#define VEPU_REG_INTERRUPT_NON BIT(28)
+#define VEPU_REG_MV_WRITE_EN BIT(24)
+#define VEPU_REG_RECON_WRITE_DIS BIT(20)
+#define VEPU_REG_INTERRUPT_SLICE_READY_EN BIT(16)
+#define VEPU_REG_CLK_GATING_EN BIT(12)
+#define VEPU_REG_INTERRUPT_TIMEOUT_EN BIT(10)
+#define VEPU_REG_INTERRUPT_RESET BIT(9)
+#define VEPU_REG_INTERRUPT_DIS_BIT BIT(8)
+#define VEPU_REG_INTERRUPT_TIMEOUT BIT(6)
+#define VEPU_REG_INTERRUPT_BUFFER_FULL BIT(5)
+#define VEPU_REG_INTERRUPT_BUS_ERROR BIT(4)
+#define VEPU_REG_INTERRUPT_FUSE BIT(3)
+#define VEPU_REG_INTERRUPT_SLICE_READY BIT(2)
+#define VEPU_REG_INTERRUPT_FRAME_READY BIT(1)
+#define VEPU_REG_INTERRUPT_BIT BIT(0)
+#define VEPU_REG_DMV_PENALTY_TBL(i) (0x1E0 + ((i) * 0x4))
+#define VEPU_REG_DMV_PENALTY_TABLE_BIT(x, i) ((x) << (i) * 8)
+#define VEPU_REG_DMV_Q_PIXEL_PENALTY_TBL(i) (0x260 + ((i) * 0x4))
+#define VEPU_REG_DMV_Q_PIXEL_PENALTY_TABLE_BIT(x, i) ((x) << (i) * 8)
+
+/* vpu decoder register */
+#define VDPU_REG_DEC_CTRL0 0x0c8 // 50
+#define VDPU_REG_REF_BUF_CTRL2_REFBU2_PICID(x) (((x) & 0x1f) << 25)
+#define VDPU_REG_REF_BUF_CTRL2_REFBU2_THR(x) (((x) & 0xfff) << 13)
+#define VDPU_REG_CONFIG_TILED_MODE_LSB BIT(12)
+#define VDPU_REG_CONFIG_DEC_ADV_PRE_DIS BIT(11)
+#define VDPU_REG_CONFIG_DEC_SCMD_DIS BIT(10)
+#define VDPU_REG_DEC_CTRL0_SKIP_MODE BIT(9)
+#define VDPU_REG_DEC_CTRL0_FILTERING_DIS BIT(8)
+#define VDPU_REG_DEC_CTRL0_PIC_FIXED_QUANT BIT(7)
+#define VDPU_REG_CONFIG_DEC_LATENCY(x) (((x) & 0x3f) << 1)
+#define VDPU_REG_CONFIG_TILED_MODE_MSB(x) BIT(0)
+#define VDPU_REG_CONFIG_DEC_OUT_TILED_E BIT(0)
+#define VDPU_REG_STREAM_LEN 0x0cc
+#define VDPU_REG_DEC_CTRL3_INIT_QP(x) (((x) & 0x3f) << 25)
+#define VDPU_REG_DEC_STREAM_LEN_HI BIT(24)
+#define VDPU_REG_DEC_CTRL3_STREAM_LEN(x) (((x) & 0xffffff) << 0)
+#define VDPU_REG_ERROR_CONCEALMENT 0x0d0
+#define VDPU_REG_REF_BUF_CTRL2_APF_THRESHOLD(x) (((x) & 0x3fff) << 17)
+#define VDPU_REG_ERR_CONC_STARTMB_X(x) (((x) & 0x1ff) << 8)
+#define VDPU_REG_ERR_CONC_STARTMB_Y(x) (((x) & 0xff) << 0)
+#define VDPU_REG_DEC_FORMAT 0x0d4
+#define VDPU_REG_DEC_CTRL0_DEC_MODE(x) (((x) & 0xf) << 0)
+#define VDPU_REG_DATA_ENDIAN 0x0d8
+#define VDPU_REG_CONFIG_DEC_STRENDIAN_E BIT(5)
+#define VDPU_REG_CONFIG_DEC_STRSWAP32_E BIT(4)
+#define VDPU_REG_CONFIG_DEC_OUTSWAP32_E BIT(3)
+#define VDPU_REG_CONFIG_DEC_INSWAP32_E BIT(2)
+#define VDPU_REG_CONFIG_DEC_OUT_ENDIAN BIT(1)
+#define VDPU_REG_CONFIG_DEC_IN_ENDIAN BIT(0)
+#define VDPU_REG_INTERRUPT 0x0dc
+#define VDPU_REG_INTERRUPT_DEC_TIMEOUT BIT(13)
+#define VDPU_REG_INTERRUPT_DEC_ERROR_INT BIT(12)
+#define VDPU_REG_INTERRUPT_DEC_PIC_INF BIT(10)
+#define VDPU_REG_INTERRUPT_DEC_SLICE_INT BIT(9)
+#define VDPU_REG_INTERRUPT_DEC_ASO_INT BIT(8)
+#define VDPU_REG_INTERRUPT_DEC_BUFFER_INT BIT(6)
+#define VDPU_REG_INTERRUPT_DEC_BUS_INT BIT(5)
+#define VDPU_REG_INTERRUPT_DEC_RDY_INT BIT(4)
+#define VDPU_REG_INTERRUPT_DEC_IRQ_DIS BIT(1)
+#define VDPU_REG_INTERRUPT_DEC_IRQ BIT(0)
+#define VDPU_REG_AXI_CTRL 0x0e0
+#define VDPU_REG_AXI_DEC_SEL BIT(23)
+#define VDPU_REG_CONFIG_DEC_DATA_DISC_E BIT(22)
+#define VDPU_REG_PARAL_BUS_E(x) BIT(21)
+#define VDPU_REG_CONFIG_DEC_MAX_BURST(x) (((x) & 0x1f) << 16)
+#define VDPU_REG_DEC_CTRL0_DEC_AXI_WR_ID(x) (((x) & 0xff) << 8)
+#define VDPU_REG_CONFIG_DEC_AXI_RD_ID(x) (((x) & 0xff) << 0)
+#define VDPU_REG_EN_FLAGS 0x0e4
+#define VDPU_REG_AHB_HLOCK_E BIT(31)
+#define VDPU_REG_CACHE_E BIT(29)
+#define VDPU_REG_PREFETCH_SINGLE_CHANNEL_E BIT(28)
+#define VDPU_REG_INTRA_3_CYCLE_ENHANCE BIT(27)
+#define VDPU_REG_INTRA_DOUBLE_SPEED BIT(26)
+#define VDPU_REG_INTER_DOUBLE_SPEED BIT(25)
+#define VDPU_REG_DEC_CTRL3_START_CODE_E BIT(22)
+#define VDPU_REG_DEC_CTRL3_CH_8PIX_ILEAV_E BIT(21)
+#define VDPU_REG_DEC_CTRL0_RLC_MODE_E BIT(20)
+#define VDPU_REG_DEC_CTRL0_DIVX3_E BIT(19)
+#define VDPU_REG_DEC_CTRL0_PJPEG_E BIT(18)
+#define VDPU_REG_DEC_CTRL0_PIC_INTERLACE_E BIT(17)
+#define VDPU_REG_DEC_CTRL0_PIC_FIELDMODE_E BIT(16)
+#define VDPU_REG_DEC_CTRL0_PIC_B_E BIT(15)
+#define VDPU_REG_DEC_CTRL0_PIC_INTER_E BIT(14)
+#define VDPU_REG_DEC_CTRL0_PIC_TOPFIELD_E BIT(13)
+#define VDPU_REG_DEC_CTRL0_FWD_INTERLACE_E BIT(12)
+#define VDPU_REG_DEC_CTRL0_SORENSON_E BIT(11)
+#define VDPU_REG_DEC_CTRL0_WRITE_MVS_E BIT(10)
+#define VDPU_REG_DEC_CTRL0_REF_TOPFIELD_E BIT(9)
+#define VDPU_REG_DEC_CTRL0_REFTOPFIRST_E BIT(8)
+#define VDPU_REG_DEC_CTRL0_SEQ_MBAFF_E BIT(7)
+#define VDPU_REG_DEC_CTRL0_PICORD_COUNT_E BIT(6)
+#define VDPU_REG_CONFIG_DEC_TIMEOUT_E BIT(5)
+#define VDPU_REG_CONFIG_DEC_CLK_GATE_E BIT(4)
+#define VDPU_REG_DEC_CTRL0_DEC_OUT_DIS BIT(2)
+#define VDPU_REG_REF_BUF_CTRL2_REFBU2_BUF_E BIT(1)
+#define VDPU_REG_INTERRUPT_DEC_E BIT(0)
+#define VDPU_REG_SOFT_RESET 0x0e8
+#define VDPU_REG_PRED_FLT 0x0ec
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_0_0(x) (((x) & 0x3ff) << 22)
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_0_1(x) (((x) & 0x3ff) << 12)
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_0_2(x) (((x) & 0x3ff) << 2)
+#define VDPU_REG_ADDITIONAL_CHROMA_ADDRESS 0x0f0
+#define VDPU_REG_ADDR_QTABLE 0x0f4
+#define VDPU_REG_DIRECT_MV_ADDR 0x0f8
+#define VDPU_REG_ADDR_DST 0x0fc
+#define VDPU_REG_ADDR_STR 0x100
+#define VDPU_REG_REFBUF_RELATED 0x104
+#define VDPU_REG_FWD_PIC(i) (0x128 + ((i) * 0x4))
+#define VDPU_REG_FWD_PIC_PINIT_RLIST_F5(x) (((x) & 0x1f) << 25)
+#define VDPU_REG_FWD_PIC_PINIT_RLIST_F4(x) (((x) & 0x1f) << 20)
+#define VDPU_REG_FWD_PIC_PINIT_RLIST_F3(x) (((x) & 0x1f) << 15)
+#define VDPU_REG_FWD_PIC_PINIT_RLIST_F2(x) (((x) & 0x1f) << 10)
+#define VDPU_REG_FWD_PIC_PINIT_RLIST_F1(x) (((x) & 0x1f) << 5)
+#define VDPU_REG_FWD_PIC_PINIT_RLIST_F0(x) (((x) & 0x1f) << 0)
+#define VDPU_REG_REF_PIC(i) (0x130 + ((i) * 0x4))
+#define VDPU_REG_REF_PIC_REFER1_NBR(x) (((x) & 0xffff) << 16)
+#define VDPU_REG_REF_PIC_REFER0_NBR(x) (((x) & 0xffff) << 0)
+#define VDPU_REG_H264_ADDR_REF(i) (0x150 + ((i) * 0x4))
+#define VDPU_REG_ADDR_REF_FIELD_E BIT(1)
+#define VDPU_REG_ADDR_REF_TOPC_E BIT(0)
+#define VDPU_REG_INITIAL_REF_PIC_LIST0 0x190
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F5(x) (((x) & 0x1f) << 25)
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F4(x) (((x) & 0x1f) << 20)
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F3(x) (((x) & 0x1f) << 15)
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F2(x) (((x) & 0x1f) << 10)
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F1(x) (((x) & 0x1f) << 5)
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F0(x) (((x) & 0x1f) << 0)
+#define VDPU_REG_INITIAL_REF_PIC_LIST1 0x194
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F11(x) (((x) & 0x1f) << 25)
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F10(x) (((x) & 0x1f) << 20)
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F9(x) (((x) & 0x1f) << 15)
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F8(x) (((x) & 0x1f) << 10)
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F7(x) (((x) & 0x1f) << 5)
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F6(x) (((x) & 0x1f) << 0)
+#define VDPU_REG_INITIAL_REF_PIC_LIST2 0x198
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F15(x) (((x) & 0x1f) << 15)
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F14(x) (((x) & 0x1f) << 10)
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F13(x) (((x) & 0x1f) << 5)
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_F12(x) (((x) & 0x1f) << 0)
+#define VDPU_REG_INITIAL_REF_PIC_LIST3 0x19c
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B5(x) (((x) & 0x1f) << 25)
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B4(x) (((x) & 0x1f) << 20)
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B3(x) (((x) & 0x1f) << 15)
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B2(x) (((x) & 0x1f) << 10)
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B1(x) (((x) & 0x1f) << 5)
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B0(x) (((x) & 0x1f) << 0)
+#define VDPU_REG_INITIAL_REF_PIC_LIST4 0x1a0
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B11(x) (((x) & 0x1f) << 25)
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B10(x) (((x) & 0x1f) << 20)
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B9(x) (((x) & 0x1f) << 15)
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B8(x) (((x) & 0x1f) << 10)
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B7(x) (((x) & 0x1f) << 5)
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B6(x) (((x) & 0x1f) << 0)
+#define VDPU_REG_INITIAL_REF_PIC_LIST5 0x1a4
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B15(x) (((x) & 0x1f) << 15)
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B14(x) (((x) & 0x1f) << 10)
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B13(x) (((x) & 0x1f) << 5)
+#define VDPU_REG_BD_REF_PIC_BINIT_RLIST_B12(x) (((x) & 0x1f) << 0)
+#define VDPU_REG_INITIAL_REF_PIC_LIST6 0x1a8
+#define VDPU_REG_BD_P_REF_PIC_PINIT_RLIST_F3(x) (((x) & 0x1f) << 15)
+#define VDPU_REG_BD_P_REF_PIC_PINIT_RLIST_F2(x) (((x) & 0x1f) << 10)
+#define VDPU_REG_BD_P_REF_PIC_PINIT_RLIST_F1(x) (((x) & 0x1f) << 5)
+#define VDPU_REG_BD_P_REF_PIC_PINIT_RLIST_F0(x) (((x) & 0x1f) << 0)
+#define VDPU_REG_LT_REF 0x1ac
+#define VDPU_REG_VALID_REF 0x1b0
+#define VDPU_REG_H264_PIC_MB_SIZE 0x1b8
+#define VDPU_REG_DEC_CTRL2_CH_QP_OFFSET2(x) (((x) & 0x1f) << 22)
+#define VDPU_REG_DEC_CTRL2_CH_QP_OFFSET(x) (((x) & 0x1f) << 17)
+#define VDPU_REG_DEC_CTRL1_PIC_MB_HEIGHT_P(x) (((x) & 0xff) << 9)
+#define VDPU_REG_DEC_CTRL1_PIC_MB_WIDTH(x) (((x) & 0x1ff) << 0)
+#define VDPU_REG_H264_CTRL 0x1bc
+#define VDPU_REG_DEC_CTRL4_WEIGHT_BIPR_IDC(x) (((x) & 0x3) << 16)
+#define VDPU_REG_DEC_CTRL1_REF_FRAMES(x) (((x) & 0x1f) << 0)
+#define VDPU_REG_CURRENT_FRAME 0x1c0
+#define VDPU_REG_DEC_CTRL5_FILT_CTRL_PRES BIT(31)
+#define VDPU_REG_DEC_CTRL5_RDPIC_CNT_PRES BIT(30)
+#define VDPU_REG_DEC_CTRL4_FRAMENUM_LEN(x) (((x) & 0x1f) << 16)
+#define VDPU_REG_DEC_CTRL4_FRAMENUM(x) (((x) & 0xffff) << 0)
+#define VDPU_REG_REF_FRAME 0x1c4
+#define VDPU_REG_DEC_CTRL5_REFPIC_MK_LEN(x) (((x) & 0x7ff) << 16)
+#define VDPU_REG_DEC_CTRL5_IDR_PIC_ID(x) (((x) & 0xffff) << 0)
+#define VDPU_REG_DEC_CTRL6 0x1c8
+#define VDPU_REG_DEC_CTRL6_PPS_ID(x) (((x) & 0xff) << 24)
+#define VDPU_REG_DEC_CTRL6_REFIDX1_ACTIVE(x) (((x) & 0x1f) << 19)
+#define VDPU_REG_DEC_CTRL6_REFIDX0_ACTIVE(x) (((x) & 0x1f) << 14)
+#define VDPU_REG_DEC_CTRL6_POC_LENGTH(x) (((x) & 0xff) << 0)
+#define VDPU_REG_ENABLE_FLAG 0x1cc
+#define VDPU_REG_DEC_CTRL5_IDR_PIC_E BIT(8)
+#define VDPU_REG_DEC_CTRL4_DIR_8X8_INFER_E BIT(7)
+#define VDPU_REG_DEC_CTRL4_BLACKWHITE_E BIT(6)
+#define VDPU_REG_DEC_CTRL4_CABAC_E BIT(5)
+#define VDPU_REG_DEC_CTRL4_WEIGHT_PRED_E BIT(4)
+#define VDPU_REG_DEC_CTRL5_CONST_INTRA_E BIT(3)
+#define VDPU_REG_DEC_CTRL5_8X8TRANS_FLAG_E BIT(2)
+#define VDPU_REG_DEC_CTRL2_TYPE1_QUANT_E BIT(1)
+#define VDPU_REG_DEC_CTRL2_FIELDPIC_FLAG_E BIT(0)
+#define VDPU_REG_VP8_PIC_MB_SIZE 0x1e0
+#define VDPU_REG_DEC_PIC_MB_WIDTH(x) (((x) & 0x1ff) << 23)
+#define VDPU_REG_DEC_MB_WIDTH_OFF(x) (((x) & 0xf) << 19)
+#define VDPU_REG_DEC_PIC_MB_HEIGHT_P(x) (((x) & 0xff) << 11)
+#define VDPU_REG_DEC_MB_HEIGHT_OFF(x) (((x) & 0xf) << 7)
+#define VDPU_REG_DEC_CTRL1_PIC_MB_W_EXT(x) (((x) & 0x7) << 3)
+#define VDPU_REG_DEC_CTRL1_PIC_MB_H_EXT(x) (((x) & 0x7) << 0)
+#define VDPU_REG_VP8_DCT_START_BIT 0x1e4
+#define VDPU_REG_DEC_CTRL4_DCT1_START_BIT(x) (((x) & 0x3f) << 26)
+#define VDPU_REG_DEC_CTRL4_DCT2_START_BIT(x) (((x) & 0x3f) << 20)
+#define VDPU_REG_DEC_CTRL4_VC1_HEIGHT_EXT BIT(13)
+#define VDPU_REG_DEC_CTRL4_BILIN_MC_E BIT(12)
+#define VDPU_REG_VP8_CTRL0 0x1e8
+#define VDPU_REG_DEC_CTRL2_STRM_START_BIT(x) (((x) & 0x3f) << 26)
+#define VDPU_REG_DEC_CTRL2_STRM1_START_BIT(x) (((x) & 0x3f) << 18)
+#define VDPU_REG_DEC_CTRL2_BOOLEAN_VALUE(x) (((x) & 0xff) << 8)
+#define VDPU_REG_DEC_CTRL2_BOOLEAN_RANGE(x) (((x) & 0xff) << 0)
+#define VDPU_REG_VP8_DATA_VAL 0x1f0
+#define VDPU_REG_DEC_CTRL6_COEFFS_PART_AM(x) (((x) & 0xf) << 24)
+#define VDPU_REG_DEC_CTRL6_STREAM1_LEN(x) (((x) & 0xffffff) << 0)
+#define VDPU_REG_PRED_FLT7 0x1f4
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_5_1(x) (((x) & 0x3ff) << 22)
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_5_2(x) (((x) & 0x3ff) << 12)
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_5_3(x) (((x) & 0x3ff) << 2)
+#define VDPU_REG_PRED_FLT8 0x1f8
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_6_0(x) (((x) & 0x3ff) << 22)
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_6_1(x) (((x) & 0x3ff) << 12)
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_6_2(x) (((x) & 0x3ff) << 2)
+#define VDPU_REG_PRED_FLT9 0x1fc
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_6_3(x) (((x) & 0x3ff) << 22)
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_7_0(x) (((x) & 0x3ff) << 12)
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_7_1(x) (((x) & 0x3ff) << 2)
+#define VDPU_REG_PRED_FLT10 0x200
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_7_2(x) (((x) & 0x3ff) << 22)
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_7_3(x) (((x) & 0x3ff) << 12)
+#define VDPU_REG_BD_REF_PIC_PRED_TAP_2_M1(x) (((x) & 0x3) << 10)
+#define VDPU_REG_BD_REF_PIC_PRED_TAP_2_4(x) (((x) & 0x3) << 8)
+#define VDPU_REG_BD_REF_PIC_PRED_TAP_4_M1(x) (((x) & 0x3) << 6)
+#define VDPU_REG_BD_REF_PIC_PRED_TAP_4_4(x) (((x) & 0x3) << 4)
+#define VDPU_REG_BD_REF_PIC_PRED_TAP_6_M1(x) (((x) & 0x3) << 2)
+#define VDPU_REG_BD_REF_PIC_PRED_TAP_6_4(x) (((x) & 0x3) << 0)
+#define VDPU_REG_FILTER_LEVEL 0x204
+#define VDPU_REG_REF_PIC_LF_LEVEL_0(x) (((x) & 0x3f) << 18)
+#define VDPU_REG_REF_PIC_LF_LEVEL_1(x) (((x) & 0x3f) << 12)
+#define VDPU_REG_REF_PIC_LF_LEVEL_2(x) (((x) & 0x3f) << 6)
+#define VDPU_REG_REF_PIC_LF_LEVEL_3(x) (((x) & 0x3f) << 0)
+#define VDPU_REG_VP8_QUANTER0 0x208
+#define VDPU_REG_REF_PIC_QUANT_DELTA_0(x) (((x) & 0x1f) << 27)
+#define VDPU_REG_REF_PIC_QUANT_DELTA_1(x) (((x) & 0x1f) << 22)
+#define VDPU_REG_REF_PIC_QUANT_0(x) (((x) & 0x7ff) << 11)
+#define VDPU_REG_REF_PIC_QUANT_1(x) (((x) & 0x7ff) << 0)
+#define VDPU_REG_VP8_ADDR_REF0 0x20c
+#define VDPU_REG_FILTER_MB_ADJ 0x210
+#define VDPU_REG_REF_PIC_FILT_TYPE_E BIT(31)
+#define VDPU_REG_REF_PIC_FILT_SHARPNESS(x) (((x) & 0x7) << 28)
+#define VDPU_REG_FILT_MB_ADJ_0(x) (((x) & 0x7f) << 21)
+#define VDPU_REG_FILT_MB_ADJ_1(x) (((x) & 0x7f) << 14)
+#define VDPU_REG_FILT_MB_ADJ_2(x) (((x) & 0x7f) << 7)
+#define VDPU_REG_FILT_MB_ADJ_3(x) (((x) & 0x7f) << 0)
+#define VDPU_REG_FILTER_REF_ADJ 0x214
+#define VDPU_REG_REF_PIC_ADJ_0(x) (((x) & 0x7f) << 21)
+#define VDPU_REG_REF_PIC_ADJ_1(x) (((x) & 0x7f) << 14)
+#define VDPU_REG_REF_PIC_ADJ_2(x) (((x) & 0x7f) << 7)
+#define VDPU_REG_REF_PIC_ADJ_3(x) (((x) & 0x7f) << 0)
+#define VDPU_REG_VP8_ADDR_REF2_5(i) (0x218 + ((i) * 0x4))
+#define VDPU_REG_VP8_GREF_SIGN_BIAS BIT(0)
+#define VDPU_REG_VP8_AREF_SIGN_BIAS BIT(0)
+#define VDPU_REG_VP8_DCT_BASE(i) (0x230 + ((i) * 0x4))
+#define VDPU_REG_VP8_ADDR_CTRL_PART 0x244
+#define VDPU_REG_VP8_ADDR_REF1 0x250
+#define VDPU_REG_VP8_SEGMENT_VAL 0x254
+#define VDPU_REG_FWD_PIC1_SEGMENT_BASE(x) ((x) << 0)
+#define VDPU_REG_FWD_PIC1_SEGMENT_UPD_E BIT(1)
+#define VDPU_REG_FWD_PIC1_SEGMENT_E BIT(0)
+#define VDPU_REG_VP8_DCT_START_BIT2 0x258
+#define VDPU_REG_DEC_CTRL7_DCT3_START_BIT(x) (((x) & 0x3f) << 24)
+#define VDPU_REG_DEC_CTRL7_DCT4_START_BIT(x) (((x) & 0x3f) << 18)
+#define VDPU_REG_DEC_CTRL7_DCT5_START_BIT(x) (((x) & 0x3f) << 12)
+#define VDPU_REG_DEC_CTRL7_DCT6_START_BIT(x) (((x) & 0x3f) << 6)
+#define VDPU_REG_DEC_CTRL7_DCT7_START_BIT(x) (((x) & 0x3f) << 0)
+#define VDPU_REG_VP8_QUANTER1 0x25c
+#define VDPU_REG_REF_PIC_QUANT_DELTA_2(x) (((x) & 0x1f) << 27)
+#define VDPU_REG_REF_PIC_QUANT_DELTA_3(x) (((x) & 0x1f) << 22)
+#define VDPU_REG_REF_PIC_QUANT_2(x) (((x) & 0x7ff) << 11)
+#define VDPU_REG_REF_PIC_QUANT_3(x) (((x) & 0x7ff) << 0)
+#define VDPU_REG_VP8_QUANTER2 0x260
+#define VDPU_REG_REF_PIC_QUANT_DELTA_4(x) (((x) & 0x1f) << 27)
+#define VDPU_REG_REF_PIC_QUANT_4(x) (((x) & 0x7ff) << 11)
+#define VDPU_REG_REF_PIC_QUANT_5(x) (((x) & 0x7ff) << 0)
+#define VDPU_REG_PRED_FLT1 0x264
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_0_3(x) (((x) & 0x3ff) << 22)
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_1_0(x) (((x) & 0x3ff) << 12)
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_1_1(x) (((x) & 0x3ff) << 2)
+#define VDPU_REG_PRED_FLT2 0x268
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_1_2(x) (((x) & 0x3ff) << 22)
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_1_3(x) (((x) & 0x3ff) << 12)
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_2_0(x) (((x) & 0x3ff) << 2)
+#define VDPU_REG_PRED_FLT3 0x26c
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_2_1(x) (((x) & 0x3ff) << 22)
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_2_2(x) (((x) & 0x3ff) << 12)
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_2_3(x) (((x) & 0x3ff) << 2)
+#define VDPU_REG_PRED_FLT4 0x270
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_3_0(x) (((x) & 0x3ff) << 22)
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_3_1(x) (((x) & 0x3ff) << 12)
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_3_2(x) (((x) & 0x3ff) << 2)
+#define VDPU_REG_PRED_FLT5 0x274
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_3_3(x) (((x) & 0x3ff) << 22)
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_4_0(x) (((x) & 0x3ff) << 12)
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_4_1(x) (((x) & 0x3ff) << 2)
+#define VDPU_REG_PRED_FLT6 0x278
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_4_2(x) (((x) & 0x3ff) << 22)
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_4_3(x) (((x) & 0x3ff) << 12)
+#define VDPU_REG_PRED_FLT_PRED_BC_TAP_5_0(x) (((x) & 0x3ff) << 2)
+
+#endif /* ROCKCHIP_VPU2_REGS_H_ */
diff --git a/drivers/media/platform/verisilicon/rockchip_vpu_hw.c b/drivers/media/platform/verisilicon/rockchip_vpu_hw.c
new file mode 100644
index 000000000..8de6fd2e8
--- /dev/null
+++ b/drivers/media/platform/verisilicon/rockchip_vpu_hw.c
@@ -0,0 +1,680 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hantro VPU codec driver
+ *
+ * Copyright (C) 2018 Rockchip Electronics Co., Ltd.
+ * Jeffy Chen <jeffy.chen@rock-chips.com>
+ */
+
+#include <linux/clk.h>
+
+#include "hantro.h"
+#include "hantro_jpeg.h"
+#include "hantro_g1_regs.h"
+#include "hantro_h1_regs.h"
+#include "rockchip_vpu2_regs.h"
+
+#define RK3066_ACLK_MAX_FREQ (300 * 1000 * 1000)
+#define RK3288_ACLK_MAX_FREQ (400 * 1000 * 1000)
+
+/*
+ * Supported formats.
+ */
+
+static const struct hantro_fmt rockchip_vpu_enc_fmts[] = {
+ {
+ .fourcc = V4L2_PIX_FMT_YUV420M,
+ .codec_mode = HANTRO_MODE_NONE,
+ .enc_fmt = ROCKCHIP_VPU_ENC_FMT_YUV420P,
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_NV12M,
+ .codec_mode = HANTRO_MODE_NONE,
+ .enc_fmt = ROCKCHIP_VPU_ENC_FMT_YUV420SP,
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_YUYV,
+ .codec_mode = HANTRO_MODE_NONE,
+ .enc_fmt = ROCKCHIP_VPU_ENC_FMT_YUYV422,
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_UYVY,
+ .codec_mode = HANTRO_MODE_NONE,
+ .enc_fmt = ROCKCHIP_VPU_ENC_FMT_UYVY422,
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_JPEG,
+ .codec_mode = HANTRO_MODE_JPEG_ENC,
+ .max_depth = 2,
+ .header_size = JPEG_HEADER_SIZE,
+ .frmsize = {
+ .min_width = 96,
+ .max_width = 8192,
+ .step_width = MB_DIM,
+ .min_height = 32,
+ .max_height = 8192,
+ .step_height = MB_DIM,
+ },
+ },
+};
+
+static const struct hantro_fmt rockchip_vpu1_postproc_fmts[] = {
+ {
+ .fourcc = V4L2_PIX_FMT_YUYV,
+ .codec_mode = HANTRO_MODE_NONE,
+ .postprocessed = true,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_FHD_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_FHD_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+};
+
+static const struct hantro_fmt rk3066_vpu_dec_fmts[] = {
+ {
+ .fourcc = V4L2_PIX_FMT_NV12,
+ .codec_mode = HANTRO_MODE_NONE,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_FHD_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_FHD_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_H264_SLICE,
+ .codec_mode = HANTRO_MODE_H264_DEC,
+ .max_depth = 2,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_FHD_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_FHD_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_MPEG2_SLICE,
+ .codec_mode = HANTRO_MODE_MPEG2_DEC,
+ .max_depth = 2,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_FHD_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_FHD_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_VP8_FRAME,
+ .codec_mode = HANTRO_MODE_VP8_DEC,
+ .max_depth = 2,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_FHD_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_FHD_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+};
+
+static const struct hantro_fmt rk3288_vpu_dec_fmts[] = {
+ {
+ .fourcc = V4L2_PIX_FMT_NV12,
+ .codec_mode = HANTRO_MODE_NONE,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_4K_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_4K_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_H264_SLICE,
+ .codec_mode = HANTRO_MODE_H264_DEC,
+ .max_depth = 2,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_4K_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_4K_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_MPEG2_SLICE,
+ .codec_mode = HANTRO_MODE_MPEG2_DEC,
+ .max_depth = 2,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_FHD_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_FHD_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_VP8_FRAME,
+ .codec_mode = HANTRO_MODE_VP8_DEC,
+ .max_depth = 2,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_UHD_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_UHD_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+};
+
+static const struct hantro_fmt rockchip_vdpu2_dec_fmts[] = {
+ {
+ .fourcc = V4L2_PIX_FMT_NV12,
+ .codec_mode = HANTRO_MODE_NONE,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_FHD_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_FHD_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_H264_SLICE,
+ .codec_mode = HANTRO_MODE_H264_DEC,
+ .max_depth = 2,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_FHD_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_FHD_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_MPEG2_SLICE,
+ .codec_mode = HANTRO_MODE_MPEG2_DEC,
+ .max_depth = 2,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_FHD_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_FHD_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_VP8_FRAME,
+ .codec_mode = HANTRO_MODE_VP8_DEC,
+ .max_depth = 2,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_UHD_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_UHD_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+};
+
+static const struct hantro_fmt rk3399_vpu_dec_fmts[] = {
+ {
+ .fourcc = V4L2_PIX_FMT_NV12,
+ .codec_mode = HANTRO_MODE_NONE,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_FHD_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_FHD_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_MPEG2_SLICE,
+ .codec_mode = HANTRO_MODE_MPEG2_DEC,
+ .max_depth = 2,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_FHD_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_FHD_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_VP8_FRAME,
+ .codec_mode = HANTRO_MODE_VP8_DEC,
+ .max_depth = 2,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_UHD_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_UHD_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+};
+
+static irqreturn_t rockchip_vpu1_vepu_irq(int irq, void *dev_id)
+{
+ struct hantro_dev *vpu = dev_id;
+ enum vb2_buffer_state state;
+ u32 status;
+
+ status = vepu_read(vpu, H1_REG_INTERRUPT);
+ state = (status & H1_REG_INTERRUPT_FRAME_RDY) ?
+ VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR;
+
+ vepu_write(vpu, 0, H1_REG_INTERRUPT);
+ vepu_write(vpu, 0, H1_REG_AXI_CTRL);
+
+ hantro_irq_done(vpu, state);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t rockchip_vpu2_vdpu_irq(int irq, void *dev_id)
+{
+ struct hantro_dev *vpu = dev_id;
+ enum vb2_buffer_state state;
+ u32 status;
+
+ status = vdpu_read(vpu, VDPU_REG_INTERRUPT);
+ state = (status & VDPU_REG_INTERRUPT_DEC_IRQ) ?
+ VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR;
+
+ vdpu_write(vpu, 0, VDPU_REG_INTERRUPT);
+ vdpu_write(vpu, 0, VDPU_REG_AXI_CTRL);
+
+ hantro_irq_done(vpu, state);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t rockchip_vpu2_vepu_irq(int irq, void *dev_id)
+{
+ struct hantro_dev *vpu = dev_id;
+ enum vb2_buffer_state state;
+ u32 status;
+
+ status = vepu_read(vpu, VEPU_REG_INTERRUPT);
+ state = (status & VEPU_REG_INTERRUPT_FRAME_READY) ?
+ VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR;
+
+ vepu_write(vpu, 0, VEPU_REG_INTERRUPT);
+ vepu_write(vpu, 0, VEPU_REG_AXI_CTRL);
+
+ hantro_irq_done(vpu, state);
+
+ return IRQ_HANDLED;
+}
+
+static int rk3036_vpu_hw_init(struct hantro_dev *vpu)
+{
+ /* Bump ACLK to max. possible freq. to improve performance. */
+ clk_set_rate(vpu->clocks[0].clk, RK3066_ACLK_MAX_FREQ);
+ return 0;
+}
+
+static int rk3066_vpu_hw_init(struct hantro_dev *vpu)
+{
+ /* Bump ACLKs to max. possible freq. to improve performance. */
+ clk_set_rate(vpu->clocks[0].clk, RK3066_ACLK_MAX_FREQ);
+ clk_set_rate(vpu->clocks[2].clk, RK3066_ACLK_MAX_FREQ);
+ return 0;
+}
+
+static int rockchip_vpu_hw_init(struct hantro_dev *vpu)
+{
+ /* Bump ACLK to max. possible freq. to improve performance. */
+ clk_set_rate(vpu->clocks[0].clk, RK3288_ACLK_MAX_FREQ);
+ return 0;
+}
+
+static void rk3066_vpu_dec_reset(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+
+ vdpu_write(vpu, G1_REG_INTERRUPT_DEC_IRQ_DIS, G1_REG_INTERRUPT);
+ vdpu_write(vpu, G1_REG_CONFIG_DEC_CLK_GATE_E, G1_REG_CONFIG);
+}
+
+static void rockchip_vpu1_enc_reset(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+
+ vepu_write(vpu, H1_REG_INTERRUPT_DIS_BIT, H1_REG_INTERRUPT);
+ vepu_write(vpu, 0, H1_REG_ENC_CTRL);
+ vepu_write(vpu, 0, H1_REG_AXI_CTRL);
+}
+
+static void rockchip_vpu2_dec_reset(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+
+ vdpu_write(vpu, VDPU_REG_INTERRUPT_DEC_IRQ_DIS, VDPU_REG_INTERRUPT);
+ vdpu_write(vpu, 0, VDPU_REG_EN_FLAGS);
+ vdpu_write(vpu, 1, VDPU_REG_SOFT_RESET);
+}
+
+static void rockchip_vpu2_enc_reset(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+
+ vepu_write(vpu, VEPU_REG_INTERRUPT_DIS_BIT, VEPU_REG_INTERRUPT);
+ vepu_write(vpu, 0, VEPU_REG_ENCODE_START);
+ vepu_write(vpu, 0, VEPU_REG_AXI_CTRL);
+}
+
+/*
+ * Supported codec ops.
+ */
+static const struct hantro_codec_ops rk3036_vpu_codec_ops[] = {
+ [HANTRO_MODE_H264_DEC] = {
+ .run = hantro_g1_h264_dec_run,
+ .reset = hantro_g1_reset,
+ .init = hantro_h264_dec_init,
+ .exit = hantro_h264_dec_exit,
+ },
+ [HANTRO_MODE_MPEG2_DEC] = {
+ .run = hantro_g1_mpeg2_dec_run,
+ .reset = hantro_g1_reset,
+ .init = hantro_mpeg2_dec_init,
+ .exit = hantro_mpeg2_dec_exit,
+ },
+ [HANTRO_MODE_VP8_DEC] = {
+ .run = hantro_g1_vp8_dec_run,
+ .reset = hantro_g1_reset,
+ .init = hantro_vp8_dec_init,
+ .exit = hantro_vp8_dec_exit,
+ },
+};
+
+static const struct hantro_codec_ops rk3066_vpu_codec_ops[] = {
+ [HANTRO_MODE_JPEG_ENC] = {
+ .run = hantro_h1_jpeg_enc_run,
+ .reset = rockchip_vpu1_enc_reset,
+ .done = hantro_h1_jpeg_enc_done,
+ },
+ [HANTRO_MODE_H264_DEC] = {
+ .run = hantro_g1_h264_dec_run,
+ .reset = rk3066_vpu_dec_reset,
+ .init = hantro_h264_dec_init,
+ .exit = hantro_h264_dec_exit,
+ },
+ [HANTRO_MODE_MPEG2_DEC] = {
+ .run = hantro_g1_mpeg2_dec_run,
+ .reset = rk3066_vpu_dec_reset,
+ .init = hantro_mpeg2_dec_init,
+ .exit = hantro_mpeg2_dec_exit,
+ },
+ [HANTRO_MODE_VP8_DEC] = {
+ .run = hantro_g1_vp8_dec_run,
+ .reset = rk3066_vpu_dec_reset,
+ .init = hantro_vp8_dec_init,
+ .exit = hantro_vp8_dec_exit,
+ },
+};
+
+static const struct hantro_codec_ops rk3288_vpu_codec_ops[] = {
+ [HANTRO_MODE_JPEG_ENC] = {
+ .run = hantro_h1_jpeg_enc_run,
+ .reset = rockchip_vpu1_enc_reset,
+ .done = hantro_h1_jpeg_enc_done,
+ },
+ [HANTRO_MODE_H264_DEC] = {
+ .run = hantro_g1_h264_dec_run,
+ .reset = hantro_g1_reset,
+ .init = hantro_h264_dec_init,
+ .exit = hantro_h264_dec_exit,
+ },
+ [HANTRO_MODE_MPEG2_DEC] = {
+ .run = hantro_g1_mpeg2_dec_run,
+ .reset = hantro_g1_reset,
+ .init = hantro_mpeg2_dec_init,
+ .exit = hantro_mpeg2_dec_exit,
+ },
+ [HANTRO_MODE_VP8_DEC] = {
+ .run = hantro_g1_vp8_dec_run,
+ .reset = hantro_g1_reset,
+ .init = hantro_vp8_dec_init,
+ .exit = hantro_vp8_dec_exit,
+ },
+};
+
+static const struct hantro_codec_ops rk3399_vpu_codec_ops[] = {
+ [HANTRO_MODE_JPEG_ENC] = {
+ .run = rockchip_vpu2_jpeg_enc_run,
+ .reset = rockchip_vpu2_enc_reset,
+ .done = rockchip_vpu2_jpeg_enc_done,
+ },
+ [HANTRO_MODE_H264_DEC] = {
+ .run = rockchip_vpu2_h264_dec_run,
+ .reset = rockchip_vpu2_dec_reset,
+ .init = hantro_h264_dec_init,
+ .exit = hantro_h264_dec_exit,
+ },
+ [HANTRO_MODE_MPEG2_DEC] = {
+ .run = rockchip_vpu2_mpeg2_dec_run,
+ .reset = rockchip_vpu2_dec_reset,
+ .init = hantro_mpeg2_dec_init,
+ .exit = hantro_mpeg2_dec_exit,
+ },
+ [HANTRO_MODE_VP8_DEC] = {
+ .run = rockchip_vpu2_vp8_dec_run,
+ .reset = rockchip_vpu2_dec_reset,
+ .init = hantro_vp8_dec_init,
+ .exit = hantro_vp8_dec_exit,
+ },
+};
+
+static const struct hantro_codec_ops rk3568_vepu_codec_ops[] = {
+ [HANTRO_MODE_JPEG_ENC] = {
+ .run = rockchip_vpu2_jpeg_enc_run,
+ .reset = rockchip_vpu2_enc_reset,
+ .done = rockchip_vpu2_jpeg_enc_done,
+ },
+};
+
+/*
+ * VPU variant.
+ */
+
+static const struct hantro_irq rockchip_vdpu1_irqs[] = {
+ { "vdpu", hantro_g1_irq },
+};
+
+static const struct hantro_irq rockchip_vpu1_irqs[] = {
+ { "vepu", rockchip_vpu1_vepu_irq },
+ { "vdpu", hantro_g1_irq },
+};
+
+static const struct hantro_irq rockchip_vdpu2_irqs[] = {
+ { "vdpu", rockchip_vpu2_vdpu_irq },
+};
+
+static const struct hantro_irq rockchip_vpu2_irqs[] = {
+ { "vepu", rockchip_vpu2_vepu_irq },
+ { "vdpu", rockchip_vpu2_vdpu_irq },
+};
+
+static const struct hantro_irq rk3568_vepu_irqs[] = {
+ { "vepu", rockchip_vpu2_vepu_irq },
+};
+
+static const char * const rk3066_vpu_clk_names[] = {
+ "aclk_vdpu", "hclk_vdpu",
+ "aclk_vepu", "hclk_vepu"
+};
+
+static const char * const rockchip_vpu_clk_names[] = {
+ "aclk", "hclk"
+};
+
+/* VDPU1/VEPU1 */
+
+const struct hantro_variant rk3036_vpu_variant = {
+ .dec_offset = 0x400,
+ .dec_fmts = rk3066_vpu_dec_fmts,
+ .num_dec_fmts = ARRAY_SIZE(rk3066_vpu_dec_fmts),
+ .postproc_fmts = rockchip_vpu1_postproc_fmts,
+ .num_postproc_fmts = ARRAY_SIZE(rockchip_vpu1_postproc_fmts),
+ .postproc_ops = &hantro_g1_postproc_ops,
+ .codec = HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER |
+ HANTRO_H264_DECODER,
+ .codec_ops = rk3036_vpu_codec_ops,
+ .irqs = rockchip_vdpu1_irqs,
+ .num_irqs = ARRAY_SIZE(rockchip_vdpu1_irqs),
+ .init = rk3036_vpu_hw_init,
+ .clk_names = rockchip_vpu_clk_names,
+ .num_clocks = ARRAY_SIZE(rockchip_vpu_clk_names)
+};
+
+/*
+ * Despite this variant has separate clocks for decoder and encoder,
+ * it's still required to enable all four of them for either decoding
+ * or encoding and we can't split it in separate g1/h1 variants.
+ */
+const struct hantro_variant rk3066_vpu_variant = {
+ .enc_offset = 0x0,
+ .enc_fmts = rockchip_vpu_enc_fmts,
+ .num_enc_fmts = ARRAY_SIZE(rockchip_vpu_enc_fmts),
+ .dec_offset = 0x400,
+ .dec_fmts = rk3066_vpu_dec_fmts,
+ .num_dec_fmts = ARRAY_SIZE(rk3066_vpu_dec_fmts),
+ .postproc_fmts = rockchip_vpu1_postproc_fmts,
+ .num_postproc_fmts = ARRAY_SIZE(rockchip_vpu1_postproc_fmts),
+ .postproc_ops = &hantro_g1_postproc_ops,
+ .codec = HANTRO_JPEG_ENCODER | HANTRO_MPEG2_DECODER |
+ HANTRO_VP8_DECODER | HANTRO_H264_DECODER,
+ .codec_ops = rk3066_vpu_codec_ops,
+ .irqs = rockchip_vpu1_irqs,
+ .num_irqs = ARRAY_SIZE(rockchip_vpu1_irqs),
+ .init = rk3066_vpu_hw_init,
+ .clk_names = rk3066_vpu_clk_names,
+ .num_clocks = ARRAY_SIZE(rk3066_vpu_clk_names)
+};
+
+const struct hantro_variant rk3288_vpu_variant = {
+ .enc_offset = 0x0,
+ .enc_fmts = rockchip_vpu_enc_fmts,
+ .num_enc_fmts = ARRAY_SIZE(rockchip_vpu_enc_fmts),
+ .dec_offset = 0x400,
+ .dec_fmts = rk3288_vpu_dec_fmts,
+ .num_dec_fmts = ARRAY_SIZE(rk3288_vpu_dec_fmts),
+ .postproc_fmts = rockchip_vpu1_postproc_fmts,
+ .num_postproc_fmts = ARRAY_SIZE(rockchip_vpu1_postproc_fmts),
+ .postproc_ops = &hantro_g1_postproc_ops,
+ .codec = HANTRO_JPEG_ENCODER | HANTRO_MPEG2_DECODER |
+ HANTRO_VP8_DECODER | HANTRO_H264_DECODER,
+ .codec_ops = rk3288_vpu_codec_ops,
+ .irqs = rockchip_vpu1_irqs,
+ .num_irqs = ARRAY_SIZE(rockchip_vpu1_irqs),
+ .init = rockchip_vpu_hw_init,
+ .clk_names = rockchip_vpu_clk_names,
+ .num_clocks = ARRAY_SIZE(rockchip_vpu_clk_names)
+};
+
+/* VDPU2/VEPU2 */
+
+const struct hantro_variant rk3328_vpu_variant = {
+ .dec_offset = 0x400,
+ .dec_fmts = rockchip_vdpu2_dec_fmts,
+ .num_dec_fmts = ARRAY_SIZE(rockchip_vdpu2_dec_fmts),
+ .codec = HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER |
+ HANTRO_H264_DECODER,
+ .codec_ops = rk3399_vpu_codec_ops,
+ .irqs = rockchip_vdpu2_irqs,
+ .num_irqs = ARRAY_SIZE(rockchip_vdpu2_irqs),
+ .init = rockchip_vpu_hw_init,
+ .clk_names = rockchip_vpu_clk_names,
+ .num_clocks = ARRAY_SIZE(rockchip_vpu_clk_names),
+};
+
+/*
+ * H.264 decoding explicitly disabled in RK3399.
+ * This ensures userspace applications use the Rockchip VDEC core,
+ * which has better performance.
+ */
+const struct hantro_variant rk3399_vpu_variant = {
+ .enc_offset = 0x0,
+ .enc_fmts = rockchip_vpu_enc_fmts,
+ .num_enc_fmts = ARRAY_SIZE(rockchip_vpu_enc_fmts),
+ .dec_offset = 0x400,
+ .dec_fmts = rk3399_vpu_dec_fmts,
+ .num_dec_fmts = ARRAY_SIZE(rk3399_vpu_dec_fmts),
+ .codec = HANTRO_JPEG_ENCODER | HANTRO_MPEG2_DECODER |
+ HANTRO_VP8_DECODER,
+ .codec_ops = rk3399_vpu_codec_ops,
+ .irqs = rockchip_vpu2_irqs,
+ .num_irqs = ARRAY_SIZE(rockchip_vpu2_irqs),
+ .init = rockchip_vpu_hw_init,
+ .clk_names = rockchip_vpu_clk_names,
+ .num_clocks = ARRAY_SIZE(rockchip_vpu_clk_names)
+};
+
+const struct hantro_variant rk3568_vepu_variant = {
+ .enc_offset = 0x0,
+ .enc_fmts = rockchip_vpu_enc_fmts,
+ .num_enc_fmts = ARRAY_SIZE(rockchip_vpu_enc_fmts),
+ .codec = HANTRO_JPEG_ENCODER,
+ .codec_ops = rk3568_vepu_codec_ops,
+ .irqs = rk3568_vepu_irqs,
+ .num_irqs = ARRAY_SIZE(rk3568_vepu_irqs),
+ .init = rockchip_vpu_hw_init,
+ .clk_names = rockchip_vpu_clk_names,
+ .num_clocks = ARRAY_SIZE(rockchip_vpu_clk_names)
+};
+
+const struct hantro_variant rk3568_vpu_variant = {
+ .dec_offset = 0x400,
+ .dec_fmts = rockchip_vdpu2_dec_fmts,
+ .num_dec_fmts = ARRAY_SIZE(rockchip_vdpu2_dec_fmts),
+ .codec = HANTRO_MPEG2_DECODER |
+ HANTRO_VP8_DECODER | HANTRO_H264_DECODER,
+ .codec_ops = rk3399_vpu_codec_ops,
+ .irqs = rockchip_vdpu2_irqs,
+ .num_irqs = ARRAY_SIZE(rockchip_vdpu2_irqs),
+ .init = rockchip_vpu_hw_init,
+ .clk_names = rockchip_vpu_clk_names,
+ .num_clocks = ARRAY_SIZE(rockchip_vpu_clk_names)
+};
+
+const struct hantro_variant px30_vpu_variant = {
+ .enc_offset = 0x0,
+ .enc_fmts = rockchip_vpu_enc_fmts,
+ .num_enc_fmts = ARRAY_SIZE(rockchip_vpu_enc_fmts),
+ .dec_offset = 0x400,
+ .dec_fmts = rockchip_vdpu2_dec_fmts,
+ .num_dec_fmts = ARRAY_SIZE(rockchip_vdpu2_dec_fmts),
+ .codec = HANTRO_JPEG_ENCODER | HANTRO_MPEG2_DECODER |
+ HANTRO_VP8_DECODER | HANTRO_H264_DECODER,
+ .codec_ops = rk3399_vpu_codec_ops,
+ .irqs = rockchip_vpu2_irqs,
+ .num_irqs = ARRAY_SIZE(rockchip_vpu2_irqs),
+ .init = rk3036_vpu_hw_init,
+ .clk_names = rockchip_vpu_clk_names,
+ .num_clocks = ARRAY_SIZE(rockchip_vpu_clk_names)
+};
diff --git a/drivers/media/platform/verisilicon/sama5d4_vdec_hw.c b/drivers/media/platform/verisilicon/sama5d4_vdec_hw.c
new file mode 100644
index 000000000..b205e2db5
--- /dev/null
+++ b/drivers/media/platform/verisilicon/sama5d4_vdec_hw.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hantro VDEC driver
+ *
+ * Copyright (C) 2021 Collabora Ltd, Emil Velikov <emil.velikov@collabora.com>
+ */
+
+#include "hantro.h"
+
+/*
+ * Supported formats.
+ */
+
+static const struct hantro_fmt sama5d4_vdec_postproc_fmts[] = {
+ {
+ .fourcc = V4L2_PIX_FMT_YUYV,
+ .codec_mode = HANTRO_MODE_NONE,
+ .postprocessed = true,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_HD_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_HD_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+};
+
+static const struct hantro_fmt sama5d4_vdec_fmts[] = {
+ {
+ .fourcc = V4L2_PIX_FMT_NV12,
+ .codec_mode = HANTRO_MODE_NONE,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_HD_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_HD_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_MPEG2_SLICE,
+ .codec_mode = HANTRO_MODE_MPEG2_DEC,
+ .max_depth = 2,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_HD_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_HD_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_VP8_FRAME,
+ .codec_mode = HANTRO_MODE_VP8_DEC,
+ .max_depth = 2,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_HD_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_HD_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_H264_SLICE,
+ .codec_mode = HANTRO_MODE_H264_DEC,
+ .max_depth = 2,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_HD_WIDTH,
+ .step_width = MB_DIM,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_HD_HEIGHT,
+ .step_height = MB_DIM,
+ },
+ },
+};
+
+/*
+ * Supported codec ops.
+ */
+
+static const struct hantro_codec_ops sama5d4_vdec_codec_ops[] = {
+ [HANTRO_MODE_MPEG2_DEC] = {
+ .run = hantro_g1_mpeg2_dec_run,
+ .reset = hantro_g1_reset,
+ .init = hantro_mpeg2_dec_init,
+ .exit = hantro_mpeg2_dec_exit,
+ },
+ [HANTRO_MODE_VP8_DEC] = {
+ .run = hantro_g1_vp8_dec_run,
+ .reset = hantro_g1_reset,
+ .init = hantro_vp8_dec_init,
+ .exit = hantro_vp8_dec_exit,
+ },
+ [HANTRO_MODE_H264_DEC] = {
+ .run = hantro_g1_h264_dec_run,
+ .reset = hantro_g1_reset,
+ .init = hantro_h264_dec_init,
+ .exit = hantro_h264_dec_exit,
+ },
+};
+
+static const struct hantro_irq sama5d4_irqs[] = {
+ { "vdec", hantro_g1_irq },
+};
+
+static const char * const sama5d4_clk_names[] = { "vdec_clk" };
+
+const struct hantro_variant sama5d4_vdec_variant = {
+ .dec_fmts = sama5d4_vdec_fmts,
+ .num_dec_fmts = ARRAY_SIZE(sama5d4_vdec_fmts),
+ .postproc_fmts = sama5d4_vdec_postproc_fmts,
+ .num_postproc_fmts = ARRAY_SIZE(sama5d4_vdec_postproc_fmts),
+ .postproc_ops = &hantro_g1_postproc_ops,
+ .codec = HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER |
+ HANTRO_H264_DECODER,
+ .codec_ops = sama5d4_vdec_codec_ops,
+ .irqs = sama5d4_irqs,
+ .num_irqs = ARRAY_SIZE(sama5d4_irqs),
+ .clk_names = sama5d4_clk_names,
+ .num_clocks = ARRAY_SIZE(sama5d4_clk_names),
+};
diff --git a/drivers/media/platform/verisilicon/sunxi_vpu_hw.c b/drivers/media/platform/verisilicon/sunxi_vpu_hw.c
new file mode 100644
index 000000000..02ce8b064
--- /dev/null
+++ b/drivers/media/platform/verisilicon/sunxi_vpu_hw.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Allwinner Hantro G2 VPU codec driver
+ *
+ * Copyright (C) 2021 Jernej Skrabec <jernej.skrabec@gmail.com>
+ */
+
+#include <linux/clk.h>
+
+#include "hantro.h"
+
+static const struct hantro_fmt sunxi_vpu_postproc_fmts[] = {
+ {
+ .fourcc = V4L2_PIX_FMT_NV12,
+ .codec_mode = HANTRO_MODE_NONE,
+ .postprocessed = true,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_UHD_WIDTH,
+ .step_width = 32,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_UHD_HEIGHT,
+ .step_height = 32,
+ },
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_P010,
+ .codec_mode = HANTRO_MODE_NONE,
+ .postprocessed = true,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_UHD_WIDTH,
+ .step_width = 32,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_UHD_HEIGHT,
+ .step_height = 32,
+ },
+ },
+};
+
+static const struct hantro_fmt sunxi_vpu_dec_fmts[] = {
+ {
+ .fourcc = V4L2_PIX_FMT_NV12_4L4,
+ .codec_mode = HANTRO_MODE_NONE,
+ .match_depth = true,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_UHD_WIDTH,
+ .step_width = 32,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_UHD_HEIGHT,
+ .step_height = 32,
+ },
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_P010_4L4,
+ .codec_mode = HANTRO_MODE_NONE,
+ .match_depth = true,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_UHD_WIDTH,
+ .step_width = 32,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_UHD_HEIGHT,
+ .step_height = 32,
+ },
+ },
+ {
+ .fourcc = V4L2_PIX_FMT_VP9_FRAME,
+ .codec_mode = HANTRO_MODE_VP9_DEC,
+ .max_depth = 2,
+ .frmsize = {
+ .min_width = FMT_MIN_WIDTH,
+ .max_width = FMT_UHD_WIDTH,
+ .step_width = 32,
+ .min_height = FMT_MIN_HEIGHT,
+ .max_height = FMT_UHD_HEIGHT,
+ .step_height = 32,
+ },
+ },
+};
+
+static int sunxi_vpu_hw_init(struct hantro_dev *vpu)
+{
+ clk_set_rate(vpu->clocks[0].clk, 300000000);
+
+ return 0;
+}
+
+static void sunxi_vpu_reset(struct hantro_ctx *ctx)
+{
+ struct hantro_dev *vpu = ctx->dev;
+
+ reset_control_reset(vpu->resets);
+}
+
+static const struct hantro_codec_ops sunxi_vpu_codec_ops[] = {
+ [HANTRO_MODE_VP9_DEC] = {
+ .run = hantro_g2_vp9_dec_run,
+ .done = hantro_g2_vp9_dec_done,
+ .reset = sunxi_vpu_reset,
+ .init = hantro_vp9_dec_init,
+ .exit = hantro_vp9_dec_exit,
+ },
+};
+
+static const struct hantro_irq sunxi_irqs[] = {
+ { NULL, hantro_g2_irq },
+};
+
+static const char * const sunxi_clk_names[] = { "mod", "bus" };
+
+const struct hantro_variant sunxi_vpu_variant = {
+ .dec_fmts = sunxi_vpu_dec_fmts,
+ .num_dec_fmts = ARRAY_SIZE(sunxi_vpu_dec_fmts),
+ .postproc_fmts = sunxi_vpu_postproc_fmts,
+ .num_postproc_fmts = ARRAY_SIZE(sunxi_vpu_postproc_fmts),
+ .postproc_ops = &hantro_g2_postproc_ops,
+ .codec = HANTRO_VP9_DECODER,
+ .codec_ops = sunxi_vpu_codec_ops,
+ .init = sunxi_vpu_hw_init,
+ .irqs = sunxi_irqs,
+ .num_irqs = ARRAY_SIZE(sunxi_irqs),
+ .clk_names = sunxi_clk_names,
+ .num_clocks = ARRAY_SIZE(sunxi_clk_names),
+ .double_buffer = 1,
+ .legacy_regs = 1,
+ .late_postproc = 1,
+};