diff options
Diffstat (limited to 'drivers/staging/media/sunxi/cedrus/cedrus_h264.c')
-rw-r--r-- | drivers/staging/media/sunxi/cedrus/cedrus_h264.c | 706 |
1 files changed, 706 insertions, 0 deletions
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c new file mode 100644 index 0000000000..dfb401df13 --- /dev/null +++ b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c @@ -0,0 +1,706 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Cedrus VPU driver + * + * Copyright (c) 2013 Jens Kuske <jenskuske@gmail.com> + * Copyright (c) 2018 Bootlin + */ + +#include <linux/delay.h> +#include <linux/types.h> + +#include <media/videobuf2-dma-contig.h> + +#include "cedrus.h" +#include "cedrus_hw.h" +#include "cedrus_regs.h" + +enum cedrus_h264_sram_off { + CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE = 0x000, + CEDRUS_SRAM_H264_FRAMEBUFFER_LIST = 0x100, + CEDRUS_SRAM_H264_REF_LIST_0 = 0x190, + CEDRUS_SRAM_H264_REF_LIST_1 = 0x199, + CEDRUS_SRAM_H264_SCALING_LIST_8x8_0 = 0x200, + CEDRUS_SRAM_H264_SCALING_LIST_8x8_1 = 0x210, + CEDRUS_SRAM_H264_SCALING_LIST_4x4 = 0x220, +}; + +struct cedrus_h264_sram_ref_pic { + __le32 top_field_order_cnt; + __le32 bottom_field_order_cnt; + __le32 frame_info; + __le32 luma_ptr; + __le32 chroma_ptr; + __le32 mv_col_top_ptr; + __le32 mv_col_bot_ptr; + __le32 reserved; +} __packed; + +#define CEDRUS_H264_FRAME_NUM 18 + +#define CEDRUS_NEIGHBOR_INFO_BUF_SIZE (32 * SZ_1K) +#define CEDRUS_MIN_PIC_INFO_BUF_SIZE (130 * SZ_1K) + +static void cedrus_h264_write_sram(struct cedrus_dev *dev, + enum cedrus_h264_sram_off off, + const void *data, size_t len) +{ + const u32 *buffer = data; + size_t count = DIV_ROUND_UP(len, 4); + + cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET, off << 2); + + while (count--) + cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, *buffer++); +} + +static dma_addr_t cedrus_h264_mv_col_buf_addr(struct cedrus_buffer *buf, + unsigned int field) +{ + dma_addr_t addr = buf->codec.h264.mv_col_buf_dma; + + /* Adjust for the field */ + addr += field * buf->codec.h264.mv_col_buf_size / 2; + + return addr; +} + +static void cedrus_fill_ref_pic(struct cedrus_ctx *ctx, + struct cedrus_buffer *buf, + unsigned int top_field_order_cnt, + unsigned int bottom_field_order_cnt, + struct cedrus_h264_sram_ref_pic *pic) +{ + struct vb2_buffer *vbuf = &buf->m2m_buf.vb.vb2_buf; + + pic->top_field_order_cnt = cpu_to_le32(top_field_order_cnt); + pic->bottom_field_order_cnt = cpu_to_le32(bottom_field_order_cnt); + pic->frame_info = cpu_to_le32(buf->codec.h264.pic_type << 8); + + pic->luma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 0)); + pic->chroma_ptr = cpu_to_le32(cedrus_buf_addr(vbuf, &ctx->dst_fmt, 1)); + pic->mv_col_top_ptr = cpu_to_le32(cedrus_h264_mv_col_buf_addr(buf, 0)); + pic->mv_col_bot_ptr = cpu_to_le32(cedrus_h264_mv_col_buf_addr(buf, 1)); +} + +static int cedrus_write_frame_list(struct cedrus_ctx *ctx, + struct cedrus_run *run) +{ + struct cedrus_h264_sram_ref_pic pic_list[CEDRUS_H264_FRAME_NUM]; + const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params; + const struct v4l2_ctrl_h264_sps *sps = run->h264.sps; + struct vb2_queue *cap_q; + struct cedrus_buffer *output_buf; + struct cedrus_dev *dev = ctx->dev; + unsigned long used_dpbs = 0; + unsigned int position; + int output = -1; + unsigned int i; + + cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); + + memset(pic_list, 0, sizeof(pic_list)); + + for (i = 0; i < ARRAY_SIZE(decode->dpb); i++) { + const struct v4l2_h264_dpb_entry *dpb = &decode->dpb[i]; + struct cedrus_buffer *cedrus_buf; + struct vb2_buffer *buf; + + if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_VALID)) + continue; + + buf = vb2_find_buffer(cap_q, dpb->reference_ts); + if (!buf) + continue; + + cedrus_buf = vb2_to_cedrus_buffer(buf); + position = cedrus_buf->codec.h264.position; + used_dpbs |= BIT(position); + + if (run->dst->vb2_buf.timestamp == dpb->reference_ts) { + output = position; + continue; + } + + if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) + continue; + + cedrus_fill_ref_pic(ctx, cedrus_buf, + dpb->top_field_order_cnt, + dpb->bottom_field_order_cnt, + &pic_list[position]); + } + + if (output >= 0) + position = output; + else + position = find_first_zero_bit(&used_dpbs, CEDRUS_H264_FRAME_NUM); + + output_buf = vb2_to_cedrus_buffer(&run->dst->vb2_buf); + output_buf->codec.h264.position = position; + + if (!output_buf->codec.h264.mv_col_buf_size) { + const struct v4l2_ctrl_h264_sps *sps = run->h264.sps; + unsigned int field_size; + + field_size = DIV_ROUND_UP(ctx->src_fmt.width, 16) * + DIV_ROUND_UP(ctx->src_fmt.height, 16) * 16; + if (!(sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE)) + field_size = field_size * 2; + if (!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY)) + field_size = field_size * 2; + + output_buf->codec.h264.mv_col_buf_size = field_size * 2; + /* Buffer is never accessed by CPU, so we can skip kernel mapping. */ + output_buf->codec.h264.mv_col_buf = + dma_alloc_attrs(dev->dev, + output_buf->codec.h264.mv_col_buf_size, + &output_buf->codec.h264.mv_col_buf_dma, + GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); + + if (!output_buf->codec.h264.mv_col_buf) { + output_buf->codec.h264.mv_col_buf_size = 0; + return -ENOMEM; + } + } + + if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) + output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FIELD; + else if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD) + output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_MBAFF; + else + output_buf->codec.h264.pic_type = CEDRUS_H264_PIC_TYPE_FRAME; + + cedrus_fill_ref_pic(ctx, output_buf, + decode->top_field_order_cnt, + decode->bottom_field_order_cnt, + &pic_list[position]); + + cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_FRAMEBUFFER_LIST, + pic_list, sizeof(pic_list)); + + cedrus_write(dev, VE_H264_OUTPUT_FRAME_IDX, position); + + return 0; +} + +#define CEDRUS_MAX_REF_IDX 32 + +static void _cedrus_write_ref_list(struct cedrus_ctx *ctx, + struct cedrus_run *run, + const struct v4l2_h264_reference *ref_list, + u8 num_ref, enum cedrus_h264_sram_off sram) +{ + const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params; + struct vb2_queue *cap_q; + struct cedrus_dev *dev = ctx->dev; + u8 sram_array[CEDRUS_MAX_REF_IDX]; + unsigned int i; + size_t size; + + cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); + + memset(sram_array, 0, sizeof(sram_array)); + + for (i = 0; i < num_ref; i++) { + const struct v4l2_h264_dpb_entry *dpb; + const struct cedrus_buffer *cedrus_buf; + unsigned int position; + struct vb2_buffer *buf; + u8 dpb_idx; + + dpb_idx = ref_list[i].index; + dpb = &decode->dpb[dpb_idx]; + + if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) + continue; + + buf = vb2_find_buffer(cap_q, dpb->reference_ts); + if (!buf) + continue; + + cedrus_buf = vb2_to_cedrus_buffer(buf); + position = cedrus_buf->codec.h264.position; + + sram_array[i] |= position << 1; + if (ref_list[i].fields == V4L2_H264_BOTTOM_FIELD_REF) + sram_array[i] |= BIT(0); + } + + size = min_t(size_t, ALIGN(num_ref, 4), sizeof(sram_array)); + cedrus_h264_write_sram(dev, sram, &sram_array, size); +} + +static void cedrus_write_ref_list0(struct cedrus_ctx *ctx, + struct cedrus_run *run) +{ + const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params; + + _cedrus_write_ref_list(ctx, run, + slice->ref_pic_list0, + slice->num_ref_idx_l0_active_minus1 + 1, + CEDRUS_SRAM_H264_REF_LIST_0); +} + +static void cedrus_write_ref_list1(struct cedrus_ctx *ctx, + struct cedrus_run *run) +{ + const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params; + + _cedrus_write_ref_list(ctx, run, + slice->ref_pic_list1, + slice->num_ref_idx_l1_active_minus1 + 1, + CEDRUS_SRAM_H264_REF_LIST_1); +} + +static void cedrus_write_scaling_lists(struct cedrus_ctx *ctx, + struct cedrus_run *run) +{ + const struct v4l2_ctrl_h264_scaling_matrix *scaling = + run->h264.scaling_matrix; + const struct v4l2_ctrl_h264_pps *pps = run->h264.pps; + struct cedrus_dev *dev = ctx->dev; + + if (!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT)) + return; + + cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_0, + scaling->scaling_list_8x8[0], + sizeof(scaling->scaling_list_8x8[0])); + + cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_1, + scaling->scaling_list_8x8[1], + sizeof(scaling->scaling_list_8x8[1])); + + cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_4x4, + scaling->scaling_list_4x4, + sizeof(scaling->scaling_list_4x4)); +} + +static void cedrus_write_pred_weight_table(struct cedrus_ctx *ctx, + struct cedrus_run *run) +{ + const struct v4l2_ctrl_h264_pred_weights *pred_weight = + run->h264.pred_weights; + struct cedrus_dev *dev = ctx->dev; + int i, j, k; + + cedrus_write(dev, VE_H264_SHS_WP, + ((pred_weight->chroma_log2_weight_denom & 0x7) << 4) | + ((pred_weight->luma_log2_weight_denom & 0x7) << 0)); + + cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET, + CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE << 2); + + for (i = 0; i < ARRAY_SIZE(pred_weight->weight_factors); i++) { + const struct v4l2_h264_weight_factors *factors = + &pred_weight->weight_factors[i]; + + for (j = 0; j < ARRAY_SIZE(factors->luma_weight); j++) { + u32 val; + + val = (((u32)factors->luma_offset[j] & 0x1ff) << 16) | + (factors->luma_weight[j] & 0x1ff); + cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, val); + } + + for (j = 0; j < ARRAY_SIZE(factors->chroma_weight); j++) { + for (k = 0; k < ARRAY_SIZE(factors->chroma_weight[0]); k++) { + u32 val; + + val = (((u32)factors->chroma_offset[j][k] & 0x1ff) << 16) | + (factors->chroma_weight[j][k] & 0x1ff); + cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, val); + } + } + } +} + +/* + * It turns out that using VE_H264_VLD_OFFSET to skip bits is not reliable. In + * rare cases frame is not decoded correctly. However, setting offset to 0 and + * skipping appropriate amount of bits with flush bits trigger always works. + */ +static void cedrus_skip_bits(struct cedrus_dev *dev, int num) +{ + int count = 0; + + while (count < num) { + int tmp = min(num - count, 32); + + cedrus_write(dev, VE_H264_TRIGGER_TYPE, + VE_H264_TRIGGER_TYPE_FLUSH_BITS | + VE_H264_TRIGGER_TYPE_N_BITS(tmp)); + while (cedrus_read(dev, VE_H264_STATUS) & VE_H264_STATUS_VLD_BUSY) + udelay(1); + + count += tmp; + } +} + +static void cedrus_set_params(struct cedrus_ctx *ctx, + struct cedrus_run *run) +{ + const struct v4l2_ctrl_h264_decode_params *decode = run->h264.decode_params; + const struct v4l2_ctrl_h264_slice_params *slice = run->h264.slice_params; + const struct v4l2_ctrl_h264_pps *pps = run->h264.pps; + const struct v4l2_ctrl_h264_sps *sps = run->h264.sps; + struct vb2_buffer *src_buf = &run->src->vb2_buf; + struct cedrus_dev *dev = ctx->dev; + dma_addr_t src_buf_addr; + size_t slice_bytes = vb2_get_plane_payload(src_buf, 0); + unsigned int pic_width_in_mbs; + bool mbaff_pic; + u32 reg; + + cedrus_write(dev, VE_H264_VLD_LEN, slice_bytes * 8); + cedrus_write(dev, VE_H264_VLD_OFFSET, 0); + + src_buf_addr = vb2_dma_contig_plane_dma_addr(src_buf, 0); + cedrus_write(dev, VE_H264_VLD_END, src_buf_addr + slice_bytes); + cedrus_write(dev, VE_H264_VLD_ADDR, + VE_H264_VLD_ADDR_VAL(src_buf_addr) | + VE_H264_VLD_ADDR_FIRST | VE_H264_VLD_ADDR_VALID | + VE_H264_VLD_ADDR_LAST); + + if (ctx->src_fmt.width > 2048) { + cedrus_write(dev, VE_BUF_CTRL, + VE_BUF_CTRL_INTRAPRED_MIXED_RAM | + VE_BUF_CTRL_DBLK_MIXED_RAM); + cedrus_write(dev, VE_DBLK_DRAM_BUF_ADDR, + ctx->codec.h264.deblk_buf_dma); + cedrus_write(dev, VE_INTRAPRED_DRAM_BUF_ADDR, + ctx->codec.h264.intra_pred_buf_dma); + } else { + cedrus_write(dev, VE_BUF_CTRL, + VE_BUF_CTRL_INTRAPRED_INT_SRAM | + VE_BUF_CTRL_DBLK_INT_SRAM); + } + + /* + * FIXME: Since the bitstream parsing is done in software, and + * in userspace, this shouldn't be needed anymore. But it + * turns out that removing it breaks the decoding process, + * without any clear indication why. + */ + cedrus_write(dev, VE_H264_TRIGGER_TYPE, + VE_H264_TRIGGER_TYPE_INIT_SWDEC); + + cedrus_skip_bits(dev, slice->header_bit_size); + + if (V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED(pps, slice)) + cedrus_write_pred_weight_table(ctx, run); + + if ((slice->slice_type == V4L2_H264_SLICE_TYPE_P) || + (slice->slice_type == V4L2_H264_SLICE_TYPE_SP) || + (slice->slice_type == V4L2_H264_SLICE_TYPE_B)) + cedrus_write_ref_list0(ctx, run); + + if (slice->slice_type == V4L2_H264_SLICE_TYPE_B) + cedrus_write_ref_list1(ctx, run); + + // picture parameters + reg = 0; + /* + * FIXME: the kernel headers are allowing the default value to + * be passed, but the libva doesn't give us that. + */ + reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 10; + reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 5; + reg |= (pps->weighted_bipred_idc & 0x3) << 2; + if (pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE) + reg |= VE_H264_PPS_ENTROPY_CODING_MODE; + if (pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED) + reg |= VE_H264_PPS_WEIGHTED_PRED; + if (pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED) + reg |= VE_H264_PPS_CONSTRAINED_INTRA_PRED; + if (pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE) + reg |= VE_H264_PPS_TRANSFORM_8X8_MODE; + cedrus_write(dev, VE_H264_PPS, reg); + + // sequence parameters + reg = 0; + reg |= (sps->chroma_format_idc & 0x7) << 19; + reg |= (sps->pic_width_in_mbs_minus1 & 0xff) << 8; + reg |= sps->pic_height_in_map_units_minus1 & 0xff; + if (sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY) + reg |= VE_H264_SPS_MBS_ONLY; + if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD) + reg |= VE_H264_SPS_MB_ADAPTIVE_FRAME_FIELD; + if (sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE) + reg |= VE_H264_SPS_DIRECT_8X8_INFERENCE; + cedrus_write(dev, VE_H264_SPS, reg); + + mbaff_pic = !(decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) && + (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD); + pic_width_in_mbs = sps->pic_width_in_mbs_minus1 + 1; + + // slice parameters + reg = 0; + reg |= ((slice->first_mb_in_slice % pic_width_in_mbs) & 0xff) << 24; + reg |= (((slice->first_mb_in_slice / pic_width_in_mbs) * + (mbaff_pic + 1)) & 0xff) << 16; + reg |= decode->nal_ref_idc ? BIT(12) : 0; + reg |= (slice->slice_type & 0xf) << 8; + reg |= slice->cabac_init_idc & 0x3; + if (ctx->fh.m2m_ctx->new_frame) + reg |= VE_H264_SHS_FIRST_SLICE_IN_PIC; + if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) + reg |= VE_H264_SHS_FIELD_PIC; + if (decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD) + reg |= VE_H264_SHS_BOTTOM_FIELD; + if (slice->flags & V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED) + reg |= VE_H264_SHS_DIRECT_SPATIAL_MV_PRED; + cedrus_write(dev, VE_H264_SHS, reg); + + reg = 0; + reg |= VE_H264_SHS2_NUM_REF_IDX_ACTIVE_OVRD; + reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 24; + reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 16; + reg |= (slice->disable_deblocking_filter_idc & 0x3) << 8; + reg |= (slice->slice_alpha_c0_offset_div2 & 0xf) << 4; + reg |= slice->slice_beta_offset_div2 & 0xf; + cedrus_write(dev, VE_H264_SHS2, reg); + + reg = 0; + reg |= (pps->second_chroma_qp_index_offset & 0x3f) << 16; + reg |= (pps->chroma_qp_index_offset & 0x3f) << 8; + reg |= (pps->pic_init_qp_minus26 + 26 + slice->slice_qp_delta) & 0x3f; + if (!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT)) + reg |= VE_H264_SHS_QP_SCALING_MATRIX_DEFAULT; + cedrus_write(dev, VE_H264_SHS_QP, reg); + + // clear status flags + cedrus_write(dev, VE_H264_STATUS, cedrus_read(dev, VE_H264_STATUS)); + + // enable int + cedrus_write(dev, VE_H264_CTRL, + VE_H264_CTRL_SLICE_DECODE_INT | + VE_H264_CTRL_DECODE_ERR_INT | + VE_H264_CTRL_VLD_DATA_REQ_INT); +} + +static enum cedrus_irq_status +cedrus_h264_irq_status(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + u32 reg = cedrus_read(dev, VE_H264_STATUS); + + if (reg & (VE_H264_STATUS_DECODE_ERR_INT | + VE_H264_STATUS_VLD_DATA_REQ_INT)) + return CEDRUS_IRQ_ERROR; + + if (reg & VE_H264_CTRL_SLICE_DECODE_INT) + return CEDRUS_IRQ_OK; + + return CEDRUS_IRQ_NONE; +} + +static void cedrus_h264_irq_clear(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + + cedrus_write(dev, VE_H264_STATUS, + VE_H264_STATUS_INT_MASK); +} + +static void cedrus_h264_irq_disable(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + u32 reg = cedrus_read(dev, VE_H264_CTRL); + + cedrus_write(dev, VE_H264_CTRL, + reg & ~VE_H264_CTRL_INT_MASK); +} + +static int cedrus_h264_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) +{ + struct cedrus_dev *dev = ctx->dev; + int ret; + + cedrus_engine_enable(ctx); + + cedrus_write(dev, VE_H264_SDROT_CTRL, 0); + cedrus_write(dev, VE_H264_EXTRA_BUFFER1, + ctx->codec.h264.pic_info_buf_dma); + cedrus_write(dev, VE_H264_EXTRA_BUFFER2, + ctx->codec.h264.neighbor_info_buf_dma); + + cedrus_write_scaling_lists(ctx, run); + ret = cedrus_write_frame_list(ctx, run); + if (ret) + return ret; + + cedrus_set_params(ctx, run); + + return 0; +} + +static int cedrus_h264_start(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + unsigned int pic_info_size; + int ret; + + /* + * NOTE: All buffers allocated here are only used by HW, so we + * can add DMA_ATTR_NO_KERNEL_MAPPING flag when allocating them. + */ + + /* Formula for picture buffer size is taken from CedarX source. */ + + if (ctx->src_fmt.width > 2048) + pic_info_size = CEDRUS_H264_FRAME_NUM * 0x4000; + else + pic_info_size = CEDRUS_H264_FRAME_NUM * 0x1000; + + /* + * FIXME: If V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY is set, + * there is no need to multiply by 2. + */ + pic_info_size += ctx->src_fmt.height * 2 * 64; + + if (pic_info_size < CEDRUS_MIN_PIC_INFO_BUF_SIZE) + pic_info_size = CEDRUS_MIN_PIC_INFO_BUF_SIZE; + + ctx->codec.h264.pic_info_buf_size = pic_info_size; + ctx->codec.h264.pic_info_buf = + dma_alloc_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size, + &ctx->codec.h264.pic_info_buf_dma, + GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); + if (!ctx->codec.h264.pic_info_buf) + return -ENOMEM; + + /* + * That buffer is supposed to be 16kiB in size, and be aligned + * on 16kiB as well. However, dma_alloc_attrs provides the + * guarantee that we'll have a DMA address aligned on the + * smallest page order that is greater to the requested size, + * so we don't have to overallocate. + */ + ctx->codec.h264.neighbor_info_buf = + dma_alloc_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, + &ctx->codec.h264.neighbor_info_buf_dma, + GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); + if (!ctx->codec.h264.neighbor_info_buf) { + ret = -ENOMEM; + goto err_pic_buf; + } + + if (ctx->src_fmt.width > 2048) { + /* + * Formulas for deblock and intra prediction buffer sizes + * are taken from CedarX source. + */ + + ctx->codec.h264.deblk_buf_size = + ALIGN(ctx->src_fmt.width, 32) * 12; + ctx->codec.h264.deblk_buf = + dma_alloc_attrs(dev->dev, + ctx->codec.h264.deblk_buf_size, + &ctx->codec.h264.deblk_buf_dma, + GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); + if (!ctx->codec.h264.deblk_buf) { + ret = -ENOMEM; + goto err_neighbor_buf; + } + + /* + * NOTE: Multiplying by two deviates from CedarX logic, but it + * is for some unknown reason needed for H264 4K decoding on H6. + */ + ctx->codec.h264.intra_pred_buf_size = + ALIGN(ctx->src_fmt.width, 64) * 5 * 2; + ctx->codec.h264.intra_pred_buf = + dma_alloc_attrs(dev->dev, + ctx->codec.h264.intra_pred_buf_size, + &ctx->codec.h264.intra_pred_buf_dma, + GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING); + if (!ctx->codec.h264.intra_pred_buf) { + ret = -ENOMEM; + goto err_deblk_buf; + } + } + + return 0; + +err_deblk_buf: + dma_free_attrs(dev->dev, ctx->codec.h264.deblk_buf_size, + ctx->codec.h264.deblk_buf, + ctx->codec.h264.deblk_buf_dma, + DMA_ATTR_NO_KERNEL_MAPPING); + +err_neighbor_buf: + dma_free_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, + ctx->codec.h264.neighbor_info_buf, + ctx->codec.h264.neighbor_info_buf_dma, + DMA_ATTR_NO_KERNEL_MAPPING); + +err_pic_buf: + dma_free_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size, + ctx->codec.h264.pic_info_buf, + ctx->codec.h264.pic_info_buf_dma, + DMA_ATTR_NO_KERNEL_MAPPING); + return ret; +} + +static void cedrus_h264_stop(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + struct cedrus_buffer *buf; + struct vb2_queue *vq; + unsigned int i; + + vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); + + for (i = 0; i < vq->num_buffers; i++) { + buf = vb2_to_cedrus_buffer(vb2_get_buffer(vq, i)); + + if (buf->codec.h264.mv_col_buf_size > 0) { + dma_free_attrs(dev->dev, + buf->codec.h264.mv_col_buf_size, + buf->codec.h264.mv_col_buf, + buf->codec.h264.mv_col_buf_dma, + DMA_ATTR_NO_KERNEL_MAPPING); + + buf->codec.h264.mv_col_buf_size = 0; + } + } + + dma_free_attrs(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, + ctx->codec.h264.neighbor_info_buf, + ctx->codec.h264.neighbor_info_buf_dma, + DMA_ATTR_NO_KERNEL_MAPPING); + dma_free_attrs(dev->dev, ctx->codec.h264.pic_info_buf_size, + ctx->codec.h264.pic_info_buf, + ctx->codec.h264.pic_info_buf_dma, + DMA_ATTR_NO_KERNEL_MAPPING); + if (ctx->codec.h264.deblk_buf_size) + dma_free_attrs(dev->dev, ctx->codec.h264.deblk_buf_size, + ctx->codec.h264.deblk_buf, + ctx->codec.h264.deblk_buf_dma, + DMA_ATTR_NO_KERNEL_MAPPING); + if (ctx->codec.h264.intra_pred_buf_size) + dma_free_attrs(dev->dev, ctx->codec.h264.intra_pred_buf_size, + ctx->codec.h264.intra_pred_buf, + ctx->codec.h264.intra_pred_buf_dma, + DMA_ATTR_NO_KERNEL_MAPPING); +} + +static void cedrus_h264_trigger(struct cedrus_ctx *ctx) +{ + struct cedrus_dev *dev = ctx->dev; + + cedrus_write(dev, VE_H264_TRIGGER_TYPE, + VE_H264_TRIGGER_TYPE_AVC_SLICE_DECODE); +} + +struct cedrus_dec_ops cedrus_dec_ops_h264 = { + .irq_clear = cedrus_h264_irq_clear, + .irq_disable = cedrus_h264_irq_disable, + .irq_status = cedrus_h264_irq_status, + .setup = cedrus_h264_setup, + .start = cedrus_h264_start, + .stop = cedrus_h264_stop, + .trigger = cedrus_h264_trigger, +}; |