linux/drivers/media/platform/amphion/vpu_v4l2.c

890 lines
22 KiB
C
Raw Permalink Normal View History

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2020-2021 NXP
*/
#include <linux/init.h>
#include <linux/interconnect.h>
#include <linux/ioctl.h>
#include <linux/list.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pm_runtime.h>
#include <linux/videodev2.h>
#include <media/v4l2-device.h>
#include <media/v4l2-event.h>
#include <media/v4l2-mem2mem.h>
#include <media/v4l2-ioctl.h>
#include <media/videobuf2-v4l2.h>
#include <media/videobuf2-dma-contig.h>
#include <media/videobuf2-vmalloc.h>
#include "vpu.h"
#include "vpu_core.h"
#include "vpu_v4l2.h"
#include "vpu_msgs.h"
#include "vpu_helpers.h"
void vpu_inst_lock(struct vpu_inst *inst)
{
mutex_lock(&inst->lock);
}
void vpu_inst_unlock(struct vpu_inst *inst)
{
mutex_unlock(&inst->lock);
}
dma_addr_t vpu_get_vb_phy_addr(struct vb2_buffer *vb, u32 plane_no)
{
if (plane_no >= vb->num_planes)
return 0;
return vb2_dma_contig_plane_dma_addr(vb, plane_no) +
vb->planes[plane_no].data_offset;
}
unsigned int vpu_get_vb_length(struct vb2_buffer *vb, u32 plane_no)
{
if (plane_no >= vb->num_planes)
return 0;
return vb2_plane_size(vb, plane_no) - vb->planes[plane_no].data_offset;
}
void vpu_set_buffer_state(struct vb2_v4l2_buffer *vbuf, unsigned int state)
{
struct vpu_vb2_buffer *vpu_buf = to_vpu_vb2_buffer(vbuf);
vpu_buf->state = state;
}
unsigned int vpu_get_buffer_state(struct vb2_v4l2_buffer *vbuf)
{
struct vpu_vb2_buffer *vpu_buf = to_vpu_vb2_buffer(vbuf);
return vpu_buf->state;
}
void vpu_set_buffer_average_qp(struct vb2_v4l2_buffer *vbuf, u32 qp)
{
struct vpu_vb2_buffer *vpu_buf = to_vpu_vb2_buffer(vbuf);
vpu_buf->average_qp = qp;
}
void vpu_v4l2_set_error(struct vpu_inst *inst)
{
vpu_inst_lock(inst);
dev_err(inst->dev, "some error occurs in codec\n");
if (inst->fh.m2m_ctx) {
vb2_queue_error(v4l2_m2m_get_src_vq(inst->fh.m2m_ctx));
vb2_queue_error(v4l2_m2m_get_dst_vq(inst->fh.m2m_ctx));
}
vpu_inst_unlock(inst);
}
int vpu_notify_eos(struct vpu_inst *inst)
{
static const struct v4l2_event ev = {
.id = 0,
.type = V4L2_EVENT_EOS
};
vpu_trace(inst->dev, "[%d]\n", inst->id);
v4l2_event_queue_fh(&inst->fh, &ev);
return 0;
}
int vpu_notify_source_change(struct vpu_inst *inst)
{
static const struct v4l2_event ev = {
.id = 0,
.type = V4L2_EVENT_SOURCE_CHANGE,
.u.src_change.changes = V4L2_EVENT_SRC_CH_RESOLUTION
};
vpu_trace(inst->dev, "[%d]\n", inst->id);
v4l2_event_queue_fh(&inst->fh, &ev);
return 0;
}
int vpu_set_last_buffer_dequeued(struct vpu_inst *inst, bool eos)
{
struct vb2_queue *q;
if (!inst || !inst->fh.m2m_ctx)
return -EINVAL;
q = v4l2_m2m_get_dst_vq(inst->fh.m2m_ctx);
if (!list_empty(&q->done_list))
return -EINVAL;
if (q->last_buffer_dequeued)
return 0;
vpu_trace(inst->dev, "last buffer dequeued\n");
q->last_buffer_dequeued = true;
wake_up(&q->done_wq);
if (eos)
vpu_notify_eos(inst);
return 0;
}
bool vpu_is_source_empty(struct vpu_inst *inst)
{
struct v4l2_m2m_buffer *buf = NULL;
if (!inst->fh.m2m_ctx)
return true;
v4l2_m2m_for_each_src_buf(inst->fh.m2m_ctx, buf) {
if (vpu_get_buffer_state(&buf->vb) == VPU_BUF_STATE_IDLE)
return false;
}
return true;
}
static int vpu_init_format(struct vpu_inst *inst, struct vpu_format *fmt)
{
const struct vpu_format *info;
info = vpu_helper_find_format(inst, fmt->type, fmt->pixfmt);
if (!info) {
info = vpu_helper_enum_format(inst, fmt->type, 0);
if (!info)
return -EINVAL;
}
memcpy(fmt, info, sizeof(*fmt));
return 0;
}
static int vpu_calc_fmt_bytesperline(struct v4l2_format *f, struct vpu_format *fmt)
{
struct v4l2_pix_format_mplane *pixmp = &f->fmt.pix_mp;
int i;
if (fmt->flags & V4L2_FMT_FLAG_COMPRESSED) {
for (i = 0; i < fmt->comp_planes; i++)
fmt->bytesperline[i] = 0;
return 0;
}
if (pixmp->num_planes == fmt->comp_planes) {
for (i = 0; i < fmt->comp_planes; i++)
fmt->bytesperline[i] = pixmp->plane_fmt[i].bytesperline;
return 0;
}
if (pixmp->num_planes > 1)
return -EINVAL;
/*amphion vpu only support nv12 and nv12 tiled,
* so the bytesperline of luma and chroma should be same
*/
for (i = 0; i < fmt->comp_planes; i++)
fmt->bytesperline[i] = pixmp->plane_fmt[0].bytesperline;
return 0;
}
static int vpu_calc_fmt_sizeimage(struct vpu_inst *inst, struct vpu_format *fmt)
{
u32 stride = 1;
int i;
if (!(fmt->flags & V4L2_FMT_FLAG_COMPRESSED)) {
const struct vpu_core_resources *res = vpu_get_resource(inst);
if (res)
stride = res->stride;
}
for (i = 0; i < fmt->comp_planes; i++) {
fmt->sizeimage[i] = vpu_helper_get_plane_size(fmt->pixfmt,
fmt->width,
fmt->height,
i,
stride,
fmt->field != V4L2_FIELD_NONE ? 1 : 0,
&fmt->bytesperline[i]);
fmt->sizeimage[i] = max_t(u32, fmt->sizeimage[i], PAGE_SIZE);
if (fmt->flags & V4L2_FMT_FLAG_COMPRESSED) {
fmt->sizeimage[i] = clamp_val(fmt->sizeimage[i], SZ_128K, SZ_8M);
fmt->bytesperline[i] = 0;
}
}
return 0;
}
u32 vpu_get_fmt_plane_size(struct vpu_format *fmt, u32 plane_no)
{
u32 size;
int i;
if (plane_no >= fmt->mem_planes)
return 0;
if (fmt->comp_planes == fmt->mem_planes)
return fmt->sizeimage[plane_no];
if (plane_no < fmt->mem_planes - 1)
return fmt->sizeimage[plane_no];
size = fmt->sizeimage[plane_no];
for (i = fmt->mem_planes; i < fmt->comp_planes; i++)
size += fmt->sizeimage[i];
return size;
}
int vpu_try_fmt_common(struct vpu_inst *inst, struct v4l2_format *f, struct vpu_format *fmt)
{
struct v4l2_pix_format_mplane *pixmp = &f->fmt.pix_mp;
int i;
int ret;
fmt->pixfmt = pixmp->pixelformat;
fmt->type = f->type;
ret = vpu_init_format(inst, fmt);
if (ret < 0)
return ret;
fmt->width = pixmp->width;
fmt->height = pixmp->height;
if (fmt->width)
fmt->width = vpu_helper_valid_frame_width(inst, fmt->width);
if (fmt->height)
fmt->height = vpu_helper_valid_frame_height(inst, fmt->height);
fmt->field = pixmp->field == V4L2_FIELD_ANY ? V4L2_FIELD_NONE : pixmp->field;
vpu_calc_fmt_bytesperline(f, fmt);
vpu_calc_fmt_sizeimage(inst, fmt);
if ((fmt->flags & V4L2_FMT_FLAG_COMPRESSED) && pixmp->plane_fmt[0].sizeimage)
fmt->sizeimage[0] = clamp_val(pixmp->plane_fmt[0].sizeimage, SZ_128K, SZ_8M);
pixmp->pixelformat = fmt->pixfmt;
pixmp->width = fmt->width;
pixmp->height = fmt->height;
pixmp->flags = fmt->flags;
pixmp->num_planes = fmt->mem_planes;
pixmp->field = fmt->field;
memset(pixmp->reserved, 0, sizeof(pixmp->reserved));
for (i = 0; i < pixmp->num_planes; i++) {
pixmp->plane_fmt[i].bytesperline = fmt->bytesperline[i];
pixmp->plane_fmt[i].sizeimage = vpu_get_fmt_plane_size(fmt, i);
memset(pixmp->plane_fmt[i].reserved, 0, sizeof(pixmp->plane_fmt[i].reserved));
}
return 0;
}
static bool vpu_check_ready(struct vpu_inst *inst, u32 type)
{
if (!inst)
return false;
if (inst->state == VPU_CODEC_STATE_DEINIT || inst->id < 0)
return false;
if (!inst->ops->check_ready)
return true;
return call_vop(inst, check_ready, type);
}
int vpu_process_output_buffer(struct vpu_inst *inst)
{
struct v4l2_m2m_buffer *buf = NULL;
struct vb2_v4l2_buffer *vbuf = NULL;
if (!inst || !inst->fh.m2m_ctx)
return -EINVAL;
if (!vpu_check_ready(inst, inst->out_format.type))
return -EINVAL;
v4l2_m2m_for_each_src_buf(inst->fh.m2m_ctx, buf) {
vbuf = &buf->vb;
if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_IDLE)
break;
vbuf = NULL;
}
if (!vbuf)
return -EINVAL;
dev_dbg(inst->dev, "[%d]frame id = %d / %d\n",
inst->id, vbuf->sequence, inst->sequence);
return call_vop(inst, process_output, &vbuf->vb2_buf);
}
int vpu_process_capture_buffer(struct vpu_inst *inst)
{
struct v4l2_m2m_buffer *buf = NULL;
struct vb2_v4l2_buffer *vbuf = NULL;
if (!inst || !inst->fh.m2m_ctx)
return -EINVAL;
if (!vpu_check_ready(inst, inst->cap_format.type))
return -EINVAL;
v4l2_m2m_for_each_dst_buf(inst->fh.m2m_ctx, buf) {
vbuf = &buf->vb;
if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_IDLE)
break;
vbuf = NULL;
}
if (!vbuf)
return -EINVAL;
return call_vop(inst, process_capture, &vbuf->vb2_buf);
}
struct vb2_v4l2_buffer *vpu_next_src_buf(struct vpu_inst *inst)
{
struct vb2_v4l2_buffer *src_buf = NULL;
if (!inst->fh.m2m_ctx)
return NULL;
src_buf = v4l2_m2m_next_src_buf(inst->fh.m2m_ctx);
if (!src_buf || vpu_get_buffer_state(src_buf) == VPU_BUF_STATE_IDLE)
return NULL;
while (vpu_vb_is_codecconfig(src_buf)) {
v4l2_m2m_src_buf_remove(inst->fh.m2m_ctx);
vpu_set_buffer_state(src_buf, VPU_BUF_STATE_IDLE);
v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
src_buf = v4l2_m2m_next_src_buf(inst->fh.m2m_ctx);
if (!src_buf || vpu_get_buffer_state(src_buf) == VPU_BUF_STATE_IDLE)
return NULL;
}
return src_buf;
}
void vpu_skip_frame(struct vpu_inst *inst, int count)
{
struct vb2_v4l2_buffer *src_buf;
enum vb2_buffer_state state;
int i = 0;
if (count <= 0 || !inst->fh.m2m_ctx)
return;
while (i < count) {
src_buf = v4l2_m2m_src_buf_remove(inst->fh.m2m_ctx);
if (!src_buf || vpu_get_buffer_state(src_buf) == VPU_BUF_STATE_IDLE)
return;
if (vpu_get_buffer_state(src_buf) == VPU_BUF_STATE_DECODED)
state = VB2_BUF_STATE_DONE;
else
state = VB2_BUF_STATE_ERROR;
i++;
vpu_set_buffer_state(src_buf, VPU_BUF_STATE_IDLE);
v4l2_m2m_buf_done(src_buf, state);
}
}
struct vb2_v4l2_buffer *vpu_find_buf_by_sequence(struct vpu_inst *inst, u32 type, u32 sequence)
{
struct v4l2_m2m_buffer *buf = NULL;
struct vb2_v4l2_buffer *vbuf = NULL;
if (!inst || !inst->fh.m2m_ctx)
return NULL;
if (V4L2_TYPE_IS_OUTPUT(type)) {
v4l2_m2m_for_each_src_buf(inst->fh.m2m_ctx, buf) {
vbuf = &buf->vb;
if (vbuf->sequence == sequence)
break;
vbuf = NULL;
}
} else {
v4l2_m2m_for_each_dst_buf(inst->fh.m2m_ctx, buf) {
vbuf = &buf->vb;
if (vbuf->sequence == sequence)
break;
vbuf = NULL;
}
}
return vbuf;
}
struct vb2_v4l2_buffer *vpu_find_buf_by_idx(struct vpu_inst *inst, u32 type, u32 idx)
{
struct v4l2_m2m_buffer *buf = NULL;
struct vb2_v4l2_buffer *vbuf = NULL;
if (!inst || !inst->fh.m2m_ctx)
return NULL;
if (V4L2_TYPE_IS_OUTPUT(type)) {
v4l2_m2m_for_each_src_buf(inst->fh.m2m_ctx, buf) {
vbuf = &buf->vb;
if (vbuf->vb2_buf.index == idx)
break;
vbuf = NULL;
}
} else {
v4l2_m2m_for_each_dst_buf(inst->fh.m2m_ctx, buf) {
vbuf = &buf->vb;
if (vbuf->vb2_buf.index == idx)
break;
vbuf = NULL;
}
}
return vbuf;
}
int vpu_get_num_buffers(struct vpu_inst *inst, u32 type)
{
struct vb2_queue *q;
if (!inst || !inst->fh.m2m_ctx)
return -EINVAL;
if (V4L2_TYPE_IS_OUTPUT(type))
q = v4l2_m2m_get_src_vq(inst->fh.m2m_ctx);
else
q = v4l2_m2m_get_dst_vq(inst->fh.m2m_ctx);
return vb2_get_num_buffers(q);
}
static void vpu_m2m_device_run(void *priv)
{
}
static void vpu_m2m_job_abort(void *priv)
{
struct vpu_inst *inst = priv;
struct v4l2_m2m_ctx *m2m_ctx = inst->fh.m2m_ctx;
v4l2_m2m_job_finish(m2m_ctx->m2m_dev, m2m_ctx);
}
static const struct v4l2_m2m_ops vpu_m2m_ops = {
.device_run = vpu_m2m_device_run,
.job_abort = vpu_m2m_job_abort
};
static int vpu_vb2_queue_setup(struct vb2_queue *vq,
unsigned int *buf_count,
unsigned int *plane_count,
unsigned int psize[],
struct device *allocators[])
{
struct vpu_inst *inst = vb2_get_drv_priv(vq);
struct vpu_format *cur_fmt;
int i;
cur_fmt = vpu_get_format(inst, vq->type);
if (*plane_count) {
if (*plane_count != cur_fmt->mem_planes)
return -EINVAL;
for (i = 0; i < cur_fmt->mem_planes; i++) {
if (psize[i] < vpu_get_fmt_plane_size(cur_fmt, i))
return -EINVAL;
}
return 0;
}
if (V4L2_TYPE_IS_OUTPUT(vq->type))
*buf_count = max_t(unsigned int, *buf_count, inst->min_buffer_out);
else
*buf_count = max_t(unsigned int, *buf_count, inst->min_buffer_cap);
*plane_count = cur_fmt->mem_planes;
for (i = 0; i < cur_fmt->mem_planes; i++)
psize[i] = vpu_get_fmt_plane_size(cur_fmt, i);
if (V4L2_TYPE_IS_OUTPUT(vq->type) && inst->state == VPU_CODEC_STATE_SEEK) {
vpu_trace(inst->dev, "reinit when VIDIOC_REQBUFS(OUTPUT, 0)\n");
call_void_vop(inst, release);
}
if (V4L2_TYPE_IS_CAPTURE(vq->type))
call_void_vop(inst, reset_frame_store);
return 0;
}
static int vpu_vb2_buf_init(struct vb2_buffer *vb)
{
struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
struct vpu_vb2_buffer *vpu_buf = to_vpu_vb2_buffer(vbuf);
struct vpu_inst *inst = vb2_get_drv_priv(vb->vb2_queue);
vpu_buf->fs_id = -1;
vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE);
if (!inst->ops->attach_frame_store || V4L2_TYPE_IS_OUTPUT(vb->type))
return 0;
call_void_vop(inst, attach_frame_store, vb);
return 0;
}
static int vpu_vb2_buf_out_validate(struct vb2_buffer *vb)
{
struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
vbuf->field = V4L2_FIELD_NONE;
return 0;
}
static int vpu_vb2_buf_prepare(struct vb2_buffer *vb)
{
struct vpu_inst *inst = vb2_get_drv_priv(vb->vb2_queue);
struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
struct vpu_format *cur_fmt;
u32 i;
cur_fmt = vpu_get_format(inst, vb->type);
for (i = 0; i < cur_fmt->mem_planes; i++) {
if (vpu_get_vb_length(vb, i) < vpu_get_fmt_plane_size(cur_fmt, i)) {
dev_dbg(inst->dev, "[%d] %s buf[%d] is invalid\n",
inst->id, vpu_type_name(vb->type), vb->index);
vpu_set_buffer_state(vbuf, VPU_BUF_STATE_ERROR);
}
}
return 0;
}
static void vpu_vb2_buf_finish(struct vb2_buffer *vb)
{
struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
struct vpu_inst *inst = vb2_get_drv_priv(vb->vb2_queue);
struct vb2_queue *q = vb->vb2_queue;
if (V4L2_TYPE_IS_CAPTURE(vb->type)) {
struct vpu_vb2_buffer *vpu_buf = to_vpu_vb2_buffer(vbuf);
struct v4l2_ctrl *ctrl = v4l2_ctrl_find(&inst->ctrl_handler,
V4L2_CID_MPEG_VIDEO_AVERAGE_QP);
if (ctrl)
v4l2_ctrl_s_ctrl(ctrl, vpu_buf->average_qp);
}
if (vbuf->flags & V4L2_BUF_FLAG_LAST)
vpu_notify_eos(inst);
if (list_empty(&q->done_list))
call_void_vop(inst, on_queue_empty, q->type);
}
void vpu_vb2_buffers_return(struct vpu_inst *inst, unsigned int type, enum vb2_buffer_state state)
{
struct vb2_v4l2_buffer *buf;
if (V4L2_TYPE_IS_OUTPUT(type)) {
while ((buf = v4l2_m2m_src_buf_remove(inst->fh.m2m_ctx))) {
vpu_set_buffer_state(buf, VPU_BUF_STATE_IDLE);
v4l2_m2m_buf_done(buf, state);
}
} else {
while ((buf = v4l2_m2m_dst_buf_remove(inst->fh.m2m_ctx))) {
vpu_set_buffer_state(buf, VPU_BUF_STATE_IDLE);
v4l2_m2m_buf_done(buf, state);
}
}
}
static int vpu_vb2_start_streaming(struct vb2_queue *q, unsigned int count)
{
struct vpu_inst *inst = vb2_get_drv_priv(q);
struct vpu_format *fmt = vpu_get_format(inst, q->type);
int ret;
vpu_inst_unlock(inst);
ret = vpu_inst_register(inst);
vpu_inst_lock(inst);
if (ret) {
vpu_vb2_buffers_return(inst, q->type, VB2_BUF_STATE_QUEUED);
return ret;
}
vpu_trace(inst->dev, "[%d] %s %c%c%c%c %dx%d %u(%u) %u(%u) %u(%u) %d\n",
inst->id, vpu_type_name(q->type),
fmt->pixfmt,
fmt->pixfmt >> 8,
fmt->pixfmt >> 16,
fmt->pixfmt >> 24,
fmt->width, fmt->height,
fmt->sizeimage[0], fmt->bytesperline[0],
fmt->sizeimage[1], fmt->bytesperline[1],
fmt->sizeimage[2], fmt->bytesperline[2],
vb2_get_num_buffers(q));
vb2_clear_last_buffer_dequeued(q);
ret = call_vop(inst, start, q->type);
if (ret)
vpu_vb2_buffers_return(inst, q->type, VB2_BUF_STATE_QUEUED);
return ret;
}
static void vpu_vb2_stop_streaming(struct vb2_queue *q)
{
struct vpu_inst *inst = vb2_get_drv_priv(q);
vpu_trace(inst->dev, "[%d] %s\n", inst->id, vpu_type_name(q->type));
call_void_vop(inst, stop, q->type);
vpu_vb2_buffers_return(inst, q->type, VB2_BUF_STATE_ERROR);
if (V4L2_TYPE_IS_OUTPUT(q->type))
inst->sequence = 0;
}
static void vpu_vb2_buf_queue(struct vb2_buffer *vb)
{
struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
struct vpu_inst *inst = vb2_get_drv_priv(vb->vb2_queue);
if (V4L2_TYPE_IS_OUTPUT(vb->type))
vbuf->sequence = inst->sequence++;
v4l2_m2m_buf_queue(inst->fh.m2m_ctx, vbuf);
vpu_process_output_buffer(inst);
vpu_process_capture_buffer(inst);
}
static const struct vb2_ops vpu_vb2_ops = {
.queue_setup = vpu_vb2_queue_setup,
.buf_init = vpu_vb2_buf_init,
.buf_out_validate = vpu_vb2_buf_out_validate,
.buf_prepare = vpu_vb2_buf_prepare,
.buf_finish = vpu_vb2_buf_finish,
.start_streaming = vpu_vb2_start_streaming,
.stop_streaming = vpu_vb2_stop_streaming,
.buf_queue = vpu_vb2_buf_queue,
};
static int vpu_m2m_queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq)
{
struct vpu_inst *inst = priv;
int ret;
src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
inst->out_format.type = src_vq->type;
src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
src_vq->ops = &vpu_vb2_ops;
src_vq->mem_ops = &vb2_dma_contig_memops;
if (inst->type == VPU_CORE_TYPE_DEC && inst->use_stream_buffer)
src_vq->mem_ops = &vb2_vmalloc_memops;
src_vq->drv_priv = inst;
src_vq->buf_struct_size = sizeof(struct vpu_vb2_buffer);
src_vq->min_queued_buffers = 1;
src_vq->dev = inst->vpu->dev;
src_vq->lock = &inst->lock;
ret = vb2_queue_init(src_vq);
if (ret)
return ret;
dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
inst->cap_format.type = dst_vq->type;
dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
dst_vq->ops = &vpu_vb2_ops;
dst_vq->mem_ops = &vb2_dma_contig_memops;
if (inst->type == VPU_CORE_TYPE_ENC && inst->use_stream_buffer)
dst_vq->mem_ops = &vb2_vmalloc_memops;
dst_vq->drv_priv = inst;
dst_vq->buf_struct_size = sizeof(struct vpu_vb2_buffer);
dst_vq->min_queued_buffers = 1;
dst_vq->dev = inst->vpu->dev;
dst_vq->lock = &inst->lock;
ret = vb2_queue_init(dst_vq);
if (ret) {
vb2_queue_release(src_vq);
return ret;
}
return 0;
}
static int vpu_v4l2_release(struct vpu_inst *inst)
{
vpu_trace(inst->vpu->dev, "%p\n", inst);
vpu_release_core(inst->core);
put_device(inst->dev);
if (inst->workqueue) {
cancel_work_sync(&inst->msg_work);
destroy_workqueue(inst->workqueue);
inst->workqueue = NULL;
}
v4l2_ctrl_handler_free(&inst->ctrl_handler);
mutex_destroy(&inst->lock);
v4l2_fh_del(&inst->fh);
v4l2_fh_exit(&inst->fh);
call_void_vop(inst, cleanup);
return 0;
}
int vpu_v4l2_open(struct file *file, struct vpu_inst *inst)
{
struct vpu_dev *vpu = video_drvdata(file);
struct vpu_func *func;
int ret = 0;
if (!inst || !inst->ops)
return -EINVAL;
if (inst->type == VPU_CORE_TYPE_ENC)
func = &vpu->encoder;
else
func = &vpu->decoder;
atomic_set(&inst->ref_count, 0);
media: amphion: remove mutext lock in condition of wait_event mutext_lock should not be called in condition of wait_event, otherwise, when CONFIG_DEBUG_ATOMIC_SLEEP is enabled, we may meet the following warning: do not call blocking ops when !TASK_RUNNING; state=2 WARNING: CPU: 5 PID: 741 at kernel/sched/core.c:9859 __might_sleep+0x80/0xa4 Hardware name: Freescale i.MX8QM MEK (DT) pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : __might_sleep+0x80/0xa4 lr : __might_sleep+0x80/0xa4 sp : ffffffc0123738a0 x29: ffffffc0123738a0 x28: ffffffc009194c48 x27: ffffffc00bbc1050 x26: ffffff8814b282f0 x25: ffffff8814b280d0 x24: ffffff8814b28080 x23: 0000000000000001 x22: 0000000000000032 x21: ffffffc00bbc1000 x20: 000000000000011b x19: ffffffc009324670 x18: 00000000fffffffd x17: 30303c5b20746120 x16: 74657320323d6574 x15: 617473203b474e49 x14: 00058b5b8b9aa1f1 x13: ffffffc00903cda0 x12: 00000000d744fcc9 x11: 000000000000001c x10: 00000000000009a0 x9 : ffffffc0090201f4 x8 : ffffff8828245000 x7 : 0000000000000001 x6 : 0000000000000001 x5 : 00000000410fd080 x4 : 0000000000000002 x3 : ffffff8815aab4c8 x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffffff8828244600 Call trace: __might_sleep+0x80/0xa4 mutex_lock+0x2c/0x80 sync_session_response+0x110/0x310 vpu_session_send_cmd+0x18c/0x244 vpu_session_start+0x38/0x70 vdec_start_session+0x1b4/0x3e0 vpu_vb2_start_streaming+0xa0/0x1c4 vb2_start_streaming+0x74/0x160 vb2_core_qbuf+0x488/0x650 vb2_qbuf+0x9c/0x100 v4l2_m2m_qbuf+0x7c/0x224 v4l2_m2m_ioctl_qbuf+0x20/0x2c v4l_qbuf+0x50/0x6c __video_do_ioctl+0x174/0x3f0 video_usercopy+0x210/0x7cc video_ioctl2+0x20/0x30 v4l2_ioctl+0x48/0x6c we need to refine check_is_responsed() to remove the mutext_lock, each cmd has a monotonically increasing id, and cmds are executed sequentially, so we can check the id of the last reponsed cmd, then determine whether a command has been responded or not. Signed-off-by: Ming Qian <ming.qian@nxp.com> CC: Xiaolei Wang <xiaolei.wang@windriver.com> Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
2023-12-08 15:33:42 +08:00
atomic_long_set(&inst->last_response_cmd, 0);
vpu_inst_get(inst);
inst->vpu = vpu;
inst->core = vpu_request_core(vpu, inst->type);
if (inst->core)
inst->dev = get_device(inst->core->dev);
mutex_init(&inst->lock);
INIT_LIST_HEAD(&inst->cmd_q);
inst->id = VPU_INST_NULL_ID;
inst->release = vpu_v4l2_release;
inst->pid = current->pid;
inst->tgid = current->tgid;
inst->min_buffer_cap = 2;
inst->min_buffer_out = 2;
v4l2_fh_init(&inst->fh, func->vfd);
v4l2_fh_add(&inst->fh);
ret = call_vop(inst, ctrl_init);
if (ret)
goto error;
inst->fh.m2m_ctx = v4l2_m2m_ctx_init(func->m2m_dev, inst, vpu_m2m_queue_init);
if (IS_ERR(inst->fh.m2m_ctx)) {
dev_err(vpu->dev, "v4l2_m2m_ctx_init fail\n");
ret = PTR_ERR(inst->fh.m2m_ctx);
goto error;
}
inst->fh.ctrl_handler = &inst->ctrl_handler;
file->private_data = &inst->fh;
inst->state = VPU_CODEC_STATE_DEINIT;
media: amphion: Use alloc_ordered_workqueue() to create ordered workqueues BACKGROUND ========== When multiple work items are queued to a workqueue, their execution order doesn't match the queueing order. They may get executed in any order and simultaneously. When fully serialized execution - one by one in the queueing order - is needed, an ordered workqueue should be used which can be created with alloc_ordered_workqueue(). However, alloc_ordered_workqueue() was a later addition. Before it, an ordered workqueue could be obtained by creating an UNBOUND workqueue with @max_active==1. This originally was an implementation side-effect which was broken by 4c16bd327c74 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered"). Because there were users that depended on the ordered execution, 5c0338c68706 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered") made workqueue allocation path to implicitly promote UNBOUND workqueues w/ @max_active==1 to ordered workqueues. While this has worked okay, overloading the UNBOUND allocation interface this way creates other issues. It's difficult to tell whether a given workqueue actually needs to be ordered and users that legitimately want a min concurrency level wq unexpectedly gets an ordered one instead. With planned UNBOUND workqueue updates to improve execution locality and more prevalence of chiplet designs which can benefit from such improvements, this isn't a state we wanna be in forever. This patch series audits all callsites that create an UNBOUND workqueue w/ @max_active==1 and converts them to alloc_ordered_workqueue() as necessary. WHAT TO LOOK FOR ================ The conversions are from alloc_workqueue(WQ_UNBOUND | flags, 1, args..) to alloc_ordered_workqueue(flags, args...) which don't cause any functional changes. If you know that fully ordered execution is not necessary, please let me know. I'll drop the conversion and instead add a comment noting the fact to reduce confusion while conversion is in progress. If you aren't fully sure, it's completely fine to let the conversion through. The behavior will stay exactly the same and we can always reconsider later. As there are follow-up workqueue core changes, I'd really appreciate if the patch can be routed through the workqueue tree w/ your acks. Thanks. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Hans Verkuil <hverkuil-cisco@xs4all.nl> Cc: Ming Qian <ming.qian@nxp.com> Cc: Shijie Qin <shijie.qin@nxp.com> Cc: Zhou Peng <eagle.zhou@nxp.com> Cc: Mauro Carvalho Chehab <mchehab@kernel.org> Cc: linux-media@vger.kernel.org Signed-off-by: Tejun Heo <tj@kernel.org>
2023-05-23 11:23:22 -10:00
inst->workqueue = alloc_ordered_workqueue("vpu_inst", WQ_MEM_RECLAIM);
if (inst->workqueue) {
INIT_WORK(&inst->msg_work, vpu_inst_run_work);
ret = kfifo_init(&inst->msg_fifo,
inst->msg_buffer,
rounddown_pow_of_two(sizeof(inst->msg_buffer)));
if (ret) {
destroy_workqueue(inst->workqueue);
inst->workqueue = NULL;
}
}
vpu_trace(vpu->dev, "tgid = %d, pid = %d, type = %s, inst = %p\n",
inst->tgid, inst->pid, vpu_core_type_desc(inst->type), inst);
return 0;
error:
vpu_inst_put(inst);
return ret;
}
int vpu_v4l2_close(struct file *file)
{
struct vpu_dev *vpu = video_drvdata(file);
struct vpu_inst *inst = to_inst(file);
vpu_trace(vpu->dev, "tgid = %d, pid = %d, inst = %p\n", inst->tgid, inst->pid, inst);
vpu_inst_lock(inst);
if (inst->fh.m2m_ctx) {
v4l2_m2m_ctx_release(inst->fh.m2m_ctx);
inst->fh.m2m_ctx = NULL;
}
call_void_vop(inst, release);
vpu_inst_unlock(inst);
vpu_inst_unregister(inst);
vpu_inst_put(inst);
return 0;
}
int vpu_add_func(struct vpu_dev *vpu, struct vpu_func *func)
{
struct video_device *vfd;
int ret;
if (!vpu || !func)
return -EINVAL;
if (func->vfd)
return 0;
func->m2m_dev = v4l2_m2m_init(&vpu_m2m_ops);
if (IS_ERR(func->m2m_dev)) {
dev_err(vpu->dev, "v4l2_m2m_init fail\n");
func->vfd = NULL;
return PTR_ERR(func->m2m_dev);
}
vfd = video_device_alloc();
if (!vfd) {
v4l2_m2m_release(func->m2m_dev);
dev_err(vpu->dev, "alloc vpu decoder video device fail\n");
return -ENOMEM;
}
vfd->release = video_device_release;
vfd->vfl_dir = VFL_DIR_M2M;
vfd->v4l2_dev = &vpu->v4l2_dev;
vfd->device_caps = V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_STREAMING;
if (func->type == VPU_CORE_TYPE_ENC) {
strscpy(vfd->name, "amphion-vpu-encoder", sizeof(vfd->name));
vfd->fops = venc_get_fops();
vfd->ioctl_ops = venc_get_ioctl_ops();
} else {
strscpy(vfd->name, "amphion-vpu-decoder", sizeof(vfd->name));
vfd->fops = vdec_get_fops();
vfd->ioctl_ops = vdec_get_ioctl_ops();
}
video_set_drvdata(vfd, vpu);
ret = video_register_device(vfd, VFL_TYPE_VIDEO, -1);
if (ret) {
video_device_release(vfd);
v4l2_m2m_release(func->m2m_dev);
return ret;
}
func->vfd = vfd;
ret = v4l2_m2m_register_media_controller(func->m2m_dev, func->vfd, func->function);
if (ret) {
v4l2_m2m_release(func->m2m_dev);
func->m2m_dev = NULL;
video_unregister_device(func->vfd);
func->vfd = NULL;
return ret;
}
return 0;
}
void vpu_remove_func(struct vpu_func *func)
{
if (!func)
return;
if (func->m2m_dev) {
v4l2_m2m_unregister_media_controller(func->m2m_dev);
v4l2_m2m_release(func->m2m_dev);
func->m2m_dev = NULL;
}
if (func->vfd) {
video_unregister_device(func->vfd);
func->vfd = NULL;
}
}