linux/drivers/media/usb/stk1160/stk1160-video.c

519 lines
12 KiB
C
Raw Permalink Normal View History

treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 157 Based on 3 normalized pattern(s): this program is free software you can redistribute it and or modify it under the terms of the gnu general public license as published by the free software foundation either version 2 of the license or at your option any later version this program is distributed in the hope that it will be useful but without any warranty without even the implied warranty of merchantability or fitness for a particular purpose see the gnu general public license for more details this program is free software you can redistribute it and or modify it under the terms of the gnu general public license as published by the free software foundation either version 2 of the license or at your option any later version [author] [kishon] [vijay] [abraham] [i] [kishon]@[ti] [com] this program is distributed in the hope that it will be useful but without any warranty without even the implied warranty of merchantability or fitness for a particular purpose see the gnu general public license for more details this program is free software you can redistribute it and or modify it under the terms of the gnu general public license as published by the free software foundation either version 2 of the license or at your option any later version [author] [graeme] [gregory] [gg]@[slimlogic] [co] [uk] [author] [kishon] [vijay] [abraham] [i] [kishon]@[ti] [com] [based] [on] [twl6030]_[usb] [c] [author] [hema] [hk] [hemahk]@[ti] [com] this program is distributed in the hope that it will be useful but without any warranty without even the implied warranty of merchantability or fitness for a particular purpose see the gnu general public license for more details extracted by the scancode license scanner the SPDX license identifier GPL-2.0-or-later has been chosen to replace the boilerplate/reference in 1105 file(s). Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Allison Randal <allison@lohutok.net> Reviewed-by: Richard Fontana <rfontana@redhat.com> Reviewed-by: Kate Stewart <kstewart@linuxfoundation.org> Cc: linux-spdx@vger.kernel.org Link: https://lkml.kernel.org/r/20190527070033.202006027@linutronix.de Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-05-27 08:55:06 +02:00
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* STK1160 driver
*
* Copyright (C) 2012 Ezequiel Garcia
* <elezegarcia--a.t--gmail.com>
*
* Based on Easycap driver by R.M. Thomas
* Copyright (C) 2010 R.M. Thomas
* <rmthomas--a.t--sciolus.org>
*/
#include <linux/module.h>
#include <linux/usb.h>
#include <linux/slab.h>
#include <linux/ratelimit.h>
#include "stk1160.h"
static unsigned int debug;
module_param(debug, int, 0644);
MODULE_PARM_DESC(debug, "enable debug messages");
static inline void print_err_status(struct stk1160 *dev,
int packet, int status)
{
char *errmsg = "Unknown";
switch (status) {
case -ENOENT:
errmsg = "unlinked synchronously";
break;
case -ECONNRESET:
errmsg = "unlinked asynchronously";
break;
case -ENOSR:
errmsg = "Buffer error (overrun)";
break;
case -EPIPE:
errmsg = "Stalled (device not responding)";
break;
case -EOVERFLOW:
errmsg = "Babble (bad cable?)";
break;
case -EPROTO:
errmsg = "Bit-stuff error (bad cable?)";
break;
case -EILSEQ:
errmsg = "CRC/Timeout (could be anything)";
break;
case -ETIME:
errmsg = "Device does not respond";
break;
}
if (packet < 0)
printk_ratelimited(KERN_WARNING "URB status %d [%s].\n",
status, errmsg);
else
printk_ratelimited(KERN_INFO "URB packet %d, status %d [%s].\n",
packet, status, errmsg);
}
static inline
struct stk1160_buffer *stk1160_next_buffer(struct stk1160 *dev)
{
struct stk1160_buffer *buf = NULL;
unsigned long flags = 0;
/* Current buffer must be NULL when this functions gets called */
WARN_ON(dev->isoc_ctl.buf);
spin_lock_irqsave(&dev->buf_lock, flags);
if (!list_empty(&dev->avail_bufs)) {
buf = list_first_entry(&dev->avail_bufs,
struct stk1160_buffer, list);
list_del(&buf->list);
}
spin_unlock_irqrestore(&dev->buf_lock, flags);
return buf;
}
static inline
void stk1160_buffer_done(struct stk1160 *dev)
{
struct stk1160_buffer *buf = dev->isoc_ctl.buf;
buf->vb.sequence = dev->sequence++;
buf->vb.field = V4L2_FIELD_INTERLACED;
buf->vb.vb2_buf.timestamp = ktime_get_ns();
vb2_set_plane_payload(&buf->vb.vb2_buf, 0, buf->bytesused);
vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_DONE);
dev->isoc_ctl.buf = NULL;
}
static inline
void stk1160_copy_video(struct stk1160 *dev, u8 *src, int len)
{
int linesdone, lineoff, lencopy, offset;
int bytesperline = dev->width * 2;
struct stk1160_buffer *buf = dev->isoc_ctl.buf;
u8 *dst = buf->mem;
int remain;
/*
* TODO: These stk1160_dbg are very spammy!
* We should check why we are getting them.
*
* UPDATE: One of the reasons (the only one?) for getting these
* is incorrect standard (mismatch between expected and configured).
* So perhaps, we could add a counter for errors. When the counter
* reaches some value, we simply stop streaming.
*/
len -= 4;
src += 4;
remain = len;
linesdone = buf->pos / bytesperline;
lineoff = buf->pos % bytesperline; /* offset in current line */
if (!buf->odd)
dst += bytesperline;
/* Multiply linesdone by two, to take account of the other field */
dst += linesdone * bytesperline * 2 + lineoff;
/* Copy the remaining of current line */
lencopy = min(remain, bytesperline - lineoff);
/*
* Check if we have enough space left in the buffer.
* In that case, we force loop exit after copy.
*/
offset = dst - (u8 *)buf->mem;
if (offset > buf->length) {
dev_warn_ratelimited(dev->dev, "out of bounds offset\n");
return;
}
if (lencopy > buf->length - offset) {
lencopy = buf->length - offset;
remain = lencopy;
}
/* Check if the copy is done */
if (lencopy == 0 || remain == 0)
return;
/* Let the bug hunt begin! sanity checks! */
if (lencopy < 0) {
printk_ratelimited(KERN_DEBUG "copy skipped: negative lencopy\n");
return;
}
if ((unsigned long)dst + lencopy >
(unsigned long)buf->mem + buf->length) {
printk_ratelimited(KERN_WARNING "stk1160: buffer overflow detected\n");
return;
}
memcpy(dst, src, lencopy);
buf->bytesused += lencopy;
buf->pos += lencopy;
remain -= lencopy;
/* Copy current field line by line, interlacing with the other field */
while (remain > 0) {
dst += lencopy + bytesperline;
src += lencopy;
/* Copy one line at a time */
lencopy = min(remain, bytesperline);
/*
* Check if we have enough space left in the buffer.
* In that case, we force loop exit after copy.
*/
offset = dst - (u8 *)buf->mem;
if (offset > buf->length) {
dev_warn_ratelimited(dev->dev, "offset out of bounds\n");
return;
}
if (lencopy > buf->length - offset) {
lencopy = buf->length - offset;
remain = lencopy;
}
/* Check if the copy is done */
if (lencopy == 0 || remain == 0)
return;
if (lencopy < 0) {
printk_ratelimited(KERN_WARNING "stk1160: negative lencopy detected\n");
return;
}
if ((unsigned long)dst + lencopy >
(unsigned long)buf->mem + buf->length) {
printk_ratelimited(KERN_WARNING "stk1160: buffer overflow detected\n");
return;
}
memcpy(dst, src, lencopy);
remain -= lencopy;
buf->bytesused += lencopy;
buf->pos += lencopy;
}
}
/*
* Controls the isoc copy of each urb packet
*/
static void stk1160_process_isoc(struct stk1160 *dev, struct urb *urb)
{
int i, len, status;
u8 *p;
if (!dev) {
stk1160_warn("%s called with null device\n", __func__);
return;
}
if (urb->status < 0) {
/* Print status and drop current packet (or field?) */
print_err_status(dev, -1, urb->status);
return;
}
for (i = 0; i < urb->number_of_packets; i++) {
status = urb->iso_frame_desc[i].status;
if (status < 0) {
print_err_status(dev, i, status);
continue;
}
/* Get packet actual length and pointer to data */
p = urb->transfer_buffer + urb->iso_frame_desc[i].offset;
len = urb->iso_frame_desc[i].actual_length;
/* Empty packet */
if (len <= 4)
continue;
/*
* An 8-byte packet sequence means end of field.
* So if we don't have any packet, we start receiving one now
* and if we do have a packet, then we are done with it.
*
* These end of field packets are always 0xc0 or 0x80,
* but not always 8-byte long so we don't check packet length.
*/
if (p[0] == 0xc0) {
/*
* If first byte is 0xc0 then we received
* second field, and frame has ended.
*/
if (dev->isoc_ctl.buf != NULL)
stk1160_buffer_done(dev);
dev->isoc_ctl.buf = stk1160_next_buffer(dev);
if (dev->isoc_ctl.buf == NULL)
return;
}
/*
* If we don't have a buffer here, then it means we
* haven't found the start mark sequence.
*/
if (dev->isoc_ctl.buf == NULL)
continue;
if (p[0] == 0xc0 || p[0] == 0x80) {
/* We set next packet parity and
* continue to get next one
*/
dev->isoc_ctl.buf->odd = *p & 0x40;
dev->isoc_ctl.buf->pos = 0;
continue;
}
stk1160_copy_video(dev, p, len);
}
}
/*
* IRQ callback, called by URB callback
*/
static void stk1160_isoc_irq(struct urb *urb)
{
int i, rc;
struct stk1160 *dev = urb->context;
switch (urb->status) {
case 0:
break;
case -ECONNRESET: /* kill */
case -ENOENT:
case -ESHUTDOWN:
/* TODO: check uvc driver: he frees the queue here */
return;
default:
stk1160_err("urb error! status %d\n", urb->status);
return;
}
stk1160_process_isoc(dev, urb);
/* Reset urb buffers */
for (i = 0; i < urb->number_of_packets; i++) {
urb->iso_frame_desc[i].status = 0;
urb->iso_frame_desc[i].actual_length = 0;
}
rc = usb_submit_urb(urb, GFP_ATOMIC);
if (rc)
stk1160_err("urb re-submit failed (%d)\n", rc);
}
/*
* Cancel urbs
* This function can't be called in atomic context
*/
void stk1160_cancel_isoc(struct stk1160 *dev)
{
int i, num_bufs = dev->isoc_ctl.num_bufs;
/*
* This check is not necessary, but we add it
* to avoid a spurious debug message
*/
if (!num_bufs)
return;
stk1160_dbg("killing %d urbs...\n", num_bufs);
for (i = 0; i < num_bufs; i++) {
/*
* To kill urbs we can't be in atomic context.
* We don't care for NULL pointer since
* usb_kill_urb allows it.
*/
usb_kill_urb(dev->isoc_ctl.urb_ctl[i].urb);
}
stk1160_dbg("all urbs killed\n");
}
media: stk1160: use dma_alloc_noncontiguous API Replace the urb buffers allocation to use the noncontiguous API. This improves performance on ARM platforms where DMA coherent allocations produce uncached mappings. Note that the noncontiguous API requires the driver to handle synchronization. This commit is similar to this one for the uvc driver: https://lkml.org/lkml/2021/3/12/1506 Performance tests on rock-pi4 (Arm64) shows about 15x improvements: == DMA NONCONTIGUOUS == total durations: 20.63678480 sec urb processing durations: 0.286864889 sec uS/qty: 286864/2508 avg: 114.379 min: 0.583 max: 155.461 (uS) FPS: 24.92 lost: 0 done: 500 raw decode speed: 11.603 Gbits/s bytes 414831228.000 bytes/urb: 165403 == DMA COHERENT == total durations: 20.73551767 sec urb processing durations: 4.541559160 sec uS/qty: 4541559/2509 avg: 1810.107 min: 0.583 max: 2113.163 (uS) FPS: 24.90 lost: 0 done: 500 raw decode speed: 730.738 Mbits/s bytes 414785444.000 bytes/urb: 165319 Performance tests on x86 laptop show no significant difference: == DMA NONCONTIGUOUS == total durations: 20.220590102 sec urb processing durations: 0.63021818 sec uS/qty: 63021/2512 avg: 25.088 min: 0.138 max: 146.750 (uS) FPS: 24.72 lost: 0 done: 500 raw decode speed: 52.751 Gbits/s bytes 415421032.000 bytes/urb: 165374 == DMA COHERENT == total durations: 20.220475614 sec urb processing durations: 0.64751972 sec uS/qty: 64751/2512 avg: 25.777 min: 0.168 max: 132.250 (uS) FPS: 24.72 lost: 0 done: 500 raw decode speed: 51.927 Gbits/s bytes 415422794.000 bytes/urb: 165375 [hverkuil: incorporated Ezequiel's suggestions from his review] Signed-off-by: Dafna Hirschfeld <dafna.hirschfeld@collabora.com> Reviewed-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar> Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl> Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
2022-01-25 09:02:13 +01:00
static void stk_free_urb(struct stk1160 *dev, struct stk1160_urb *stk_urb)
{
usb_free_noncoherent(dev->udev, stk_urb->urb->transfer_buffer_length,
stk_urb->transfer_buffer, DMA_FROM_DEVICE,
stk_urb->sgt);
media: stk1160: use dma_alloc_noncontiguous API Replace the urb buffers allocation to use the noncontiguous API. This improves performance on ARM platforms where DMA coherent allocations produce uncached mappings. Note that the noncontiguous API requires the driver to handle synchronization. This commit is similar to this one for the uvc driver: https://lkml.org/lkml/2021/3/12/1506 Performance tests on rock-pi4 (Arm64) shows about 15x improvements: == DMA NONCONTIGUOUS == total durations: 20.63678480 sec urb processing durations: 0.286864889 sec uS/qty: 286864/2508 avg: 114.379 min: 0.583 max: 155.461 (uS) FPS: 24.92 lost: 0 done: 500 raw decode speed: 11.603 Gbits/s bytes 414831228.000 bytes/urb: 165403 == DMA COHERENT == total durations: 20.73551767 sec urb processing durations: 4.541559160 sec uS/qty: 4541559/2509 avg: 1810.107 min: 0.583 max: 2113.163 (uS) FPS: 24.90 lost: 0 done: 500 raw decode speed: 730.738 Mbits/s bytes 414785444.000 bytes/urb: 165319 Performance tests on x86 laptop show no significant difference: == DMA NONCONTIGUOUS == total durations: 20.220590102 sec urb processing durations: 0.63021818 sec uS/qty: 63021/2512 avg: 25.088 min: 0.138 max: 146.750 (uS) FPS: 24.72 lost: 0 done: 500 raw decode speed: 52.751 Gbits/s bytes 415421032.000 bytes/urb: 165374 == DMA COHERENT == total durations: 20.220475614 sec urb processing durations: 0.64751972 sec uS/qty: 64751/2512 avg: 25.777 min: 0.168 max: 132.250 (uS) FPS: 24.72 lost: 0 done: 500 raw decode speed: 51.927 Gbits/s bytes 415422794.000 bytes/urb: 165375 [hverkuil: incorporated Ezequiel's suggestions from his review] Signed-off-by: Dafna Hirschfeld <dafna.hirschfeld@collabora.com> Reviewed-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar> Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl> Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
2022-01-25 09:02:13 +01:00
usb_free_urb(stk_urb->urb);
stk_urb->transfer_buffer = NULL;
stk_urb->sgt = NULL;
stk_urb->urb = NULL;
stk_urb->dev = NULL;
stk_urb->dma = 0;
}
/*
* Releases urb and transfer buffers
* Obviusly, associated urb must be killed before releasing it.
*/
void stk1160_free_isoc(struct stk1160 *dev)
{
int i, num_bufs = dev->isoc_ctl.num_bufs;
stk1160_dbg("freeing %d urb buffers...\n", num_bufs);
media: stk1160: use dma_alloc_noncontiguous API Replace the urb buffers allocation to use the noncontiguous API. This improves performance on ARM platforms where DMA coherent allocations produce uncached mappings. Note that the noncontiguous API requires the driver to handle synchronization. This commit is similar to this one for the uvc driver: https://lkml.org/lkml/2021/3/12/1506 Performance tests on rock-pi4 (Arm64) shows about 15x improvements: == DMA NONCONTIGUOUS == total durations: 20.63678480 sec urb processing durations: 0.286864889 sec uS/qty: 286864/2508 avg: 114.379 min: 0.583 max: 155.461 (uS) FPS: 24.92 lost: 0 done: 500 raw decode speed: 11.603 Gbits/s bytes 414831228.000 bytes/urb: 165403 == DMA COHERENT == total durations: 20.73551767 sec urb processing durations: 4.541559160 sec uS/qty: 4541559/2509 avg: 1810.107 min: 0.583 max: 2113.163 (uS) FPS: 24.90 lost: 0 done: 500 raw decode speed: 730.738 Mbits/s bytes 414785444.000 bytes/urb: 165319 Performance tests on x86 laptop show no significant difference: == DMA NONCONTIGUOUS == total durations: 20.220590102 sec urb processing durations: 0.63021818 sec uS/qty: 63021/2512 avg: 25.088 min: 0.138 max: 146.750 (uS) FPS: 24.72 lost: 0 done: 500 raw decode speed: 52.751 Gbits/s bytes 415421032.000 bytes/urb: 165374 == DMA COHERENT == total durations: 20.220475614 sec urb processing durations: 0.64751972 sec uS/qty: 64751/2512 avg: 25.777 min: 0.168 max: 132.250 (uS) FPS: 24.72 lost: 0 done: 500 raw decode speed: 51.927 Gbits/s bytes 415422794.000 bytes/urb: 165375 [hverkuil: incorporated Ezequiel's suggestions from his review] Signed-off-by: Dafna Hirschfeld <dafna.hirschfeld@collabora.com> Reviewed-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar> Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl> Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
2022-01-25 09:02:13 +01:00
for (i = 0; i < num_bufs; i++)
stk_free_urb(dev, &dev->isoc_ctl.urb_ctl[i]);
dev->isoc_ctl.num_bufs = 0;
stk1160_dbg("all urb buffers freed\n");
}
/*
* Helper for cancelling and freeing urbs
* This function can't be called in atomic context
*/
void stk1160_uninit_isoc(struct stk1160 *dev)
{
stk1160_cancel_isoc(dev);
stk1160_free_isoc(dev);
}
media: stk1160: use dma_alloc_noncontiguous API Replace the urb buffers allocation to use the noncontiguous API. This improves performance on ARM platforms where DMA coherent allocations produce uncached mappings. Note that the noncontiguous API requires the driver to handle synchronization. This commit is similar to this one for the uvc driver: https://lkml.org/lkml/2021/3/12/1506 Performance tests on rock-pi4 (Arm64) shows about 15x improvements: == DMA NONCONTIGUOUS == total durations: 20.63678480 sec urb processing durations: 0.286864889 sec uS/qty: 286864/2508 avg: 114.379 min: 0.583 max: 155.461 (uS) FPS: 24.92 lost: 0 done: 500 raw decode speed: 11.603 Gbits/s bytes 414831228.000 bytes/urb: 165403 == DMA COHERENT == total durations: 20.73551767 sec urb processing durations: 4.541559160 sec uS/qty: 4541559/2509 avg: 1810.107 min: 0.583 max: 2113.163 (uS) FPS: 24.90 lost: 0 done: 500 raw decode speed: 730.738 Mbits/s bytes 414785444.000 bytes/urb: 165319 Performance tests on x86 laptop show no significant difference: == DMA NONCONTIGUOUS == total durations: 20.220590102 sec urb processing durations: 0.63021818 sec uS/qty: 63021/2512 avg: 25.088 min: 0.138 max: 146.750 (uS) FPS: 24.72 lost: 0 done: 500 raw decode speed: 52.751 Gbits/s bytes 415421032.000 bytes/urb: 165374 == DMA COHERENT == total durations: 20.220475614 sec urb processing durations: 0.64751972 sec uS/qty: 64751/2512 avg: 25.777 min: 0.168 max: 132.250 (uS) FPS: 24.72 lost: 0 done: 500 raw decode speed: 51.927 Gbits/s bytes 415422794.000 bytes/urb: 165375 [hverkuil: incorporated Ezequiel's suggestions from his review] Signed-off-by: Dafna Hirschfeld <dafna.hirschfeld@collabora.com> Reviewed-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar> Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl> Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
2022-01-25 09:02:13 +01:00
static int stk1160_fill_urb(struct stk1160 *dev, struct stk1160_urb *stk_urb,
int sb_size, int max_packets)
{
stk_urb->urb = usb_alloc_urb(max_packets, GFP_KERNEL);
if (!stk_urb->urb)
return -ENOMEM;
stk_urb->transfer_buffer = usb_alloc_noncoherent(dev->udev, sb_size,
GFP_KERNEL, &stk_urb->dma,
DMA_FROM_DEVICE, &stk_urb->sgt);
media: stk1160: use dma_alloc_noncontiguous API Replace the urb buffers allocation to use the noncontiguous API. This improves performance on ARM platforms where DMA coherent allocations produce uncached mappings. Note that the noncontiguous API requires the driver to handle synchronization. This commit is similar to this one for the uvc driver: https://lkml.org/lkml/2021/3/12/1506 Performance tests on rock-pi4 (Arm64) shows about 15x improvements: == DMA NONCONTIGUOUS == total durations: 20.63678480 sec urb processing durations: 0.286864889 sec uS/qty: 286864/2508 avg: 114.379 min: 0.583 max: 155.461 (uS) FPS: 24.92 lost: 0 done: 500 raw decode speed: 11.603 Gbits/s bytes 414831228.000 bytes/urb: 165403 == DMA COHERENT == total durations: 20.73551767 sec urb processing durations: 4.541559160 sec uS/qty: 4541559/2509 avg: 1810.107 min: 0.583 max: 2113.163 (uS) FPS: 24.90 lost: 0 done: 500 raw decode speed: 730.738 Mbits/s bytes 414785444.000 bytes/urb: 165319 Performance tests on x86 laptop show no significant difference: == DMA NONCONTIGUOUS == total durations: 20.220590102 sec urb processing durations: 0.63021818 sec uS/qty: 63021/2512 avg: 25.088 min: 0.138 max: 146.750 (uS) FPS: 24.72 lost: 0 done: 500 raw decode speed: 52.751 Gbits/s bytes 415421032.000 bytes/urb: 165374 == DMA COHERENT == total durations: 20.220475614 sec urb processing durations: 0.64751972 sec uS/qty: 64751/2512 avg: 25.777 min: 0.168 max: 132.250 (uS) FPS: 24.72 lost: 0 done: 500 raw decode speed: 51.927 Gbits/s bytes 415422794.000 bytes/urb: 165375 [hverkuil: incorporated Ezequiel's suggestions from his review] Signed-off-by: Dafna Hirschfeld <dafna.hirschfeld@collabora.com> Reviewed-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar> Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl> Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
2022-01-25 09:02:13 +01:00
if (!stk_urb->transfer_buffer)
goto free_urb;
media: stk1160: use dma_alloc_noncontiguous API Replace the urb buffers allocation to use the noncontiguous API. This improves performance on ARM platforms where DMA coherent allocations produce uncached mappings. Note that the noncontiguous API requires the driver to handle synchronization. This commit is similar to this one for the uvc driver: https://lkml.org/lkml/2021/3/12/1506 Performance tests on rock-pi4 (Arm64) shows about 15x improvements: == DMA NONCONTIGUOUS == total durations: 20.63678480 sec urb processing durations: 0.286864889 sec uS/qty: 286864/2508 avg: 114.379 min: 0.583 max: 155.461 (uS) FPS: 24.92 lost: 0 done: 500 raw decode speed: 11.603 Gbits/s bytes 414831228.000 bytes/urb: 165403 == DMA COHERENT == total durations: 20.73551767 sec urb processing durations: 4.541559160 sec uS/qty: 4541559/2509 avg: 1810.107 min: 0.583 max: 2113.163 (uS) FPS: 24.90 lost: 0 done: 500 raw decode speed: 730.738 Mbits/s bytes 414785444.000 bytes/urb: 165319 Performance tests on x86 laptop show no significant difference: == DMA NONCONTIGUOUS == total durations: 20.220590102 sec urb processing durations: 0.63021818 sec uS/qty: 63021/2512 avg: 25.088 min: 0.138 max: 146.750 (uS) FPS: 24.72 lost: 0 done: 500 raw decode speed: 52.751 Gbits/s bytes 415421032.000 bytes/urb: 165374 == DMA COHERENT == total durations: 20.220475614 sec urb processing durations: 0.64751972 sec uS/qty: 64751/2512 avg: 25.777 min: 0.168 max: 132.250 (uS) FPS: 24.72 lost: 0 done: 500 raw decode speed: 51.927 Gbits/s bytes 415422794.000 bytes/urb: 165375 [hverkuil: incorporated Ezequiel's suggestions from his review] Signed-off-by: Dafna Hirschfeld <dafna.hirschfeld@collabora.com> Reviewed-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar> Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl> Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
2022-01-25 09:02:13 +01:00
stk_urb->dev = dev;
return 0;
media: stk1160: use dma_alloc_noncontiguous API Replace the urb buffers allocation to use the noncontiguous API. This improves performance on ARM platforms where DMA coherent allocations produce uncached mappings. Note that the noncontiguous API requires the driver to handle synchronization. This commit is similar to this one for the uvc driver: https://lkml.org/lkml/2021/3/12/1506 Performance tests on rock-pi4 (Arm64) shows about 15x improvements: == DMA NONCONTIGUOUS == total durations: 20.63678480 sec urb processing durations: 0.286864889 sec uS/qty: 286864/2508 avg: 114.379 min: 0.583 max: 155.461 (uS) FPS: 24.92 lost: 0 done: 500 raw decode speed: 11.603 Gbits/s bytes 414831228.000 bytes/urb: 165403 == DMA COHERENT == total durations: 20.73551767 sec urb processing durations: 4.541559160 sec uS/qty: 4541559/2509 avg: 1810.107 min: 0.583 max: 2113.163 (uS) FPS: 24.90 lost: 0 done: 500 raw decode speed: 730.738 Mbits/s bytes 414785444.000 bytes/urb: 165319 Performance tests on x86 laptop show no significant difference: == DMA NONCONTIGUOUS == total durations: 20.220590102 sec urb processing durations: 0.63021818 sec uS/qty: 63021/2512 avg: 25.088 min: 0.138 max: 146.750 (uS) FPS: 24.72 lost: 0 done: 500 raw decode speed: 52.751 Gbits/s bytes 415421032.000 bytes/urb: 165374 == DMA COHERENT == total durations: 20.220475614 sec urb processing durations: 0.64751972 sec uS/qty: 64751/2512 avg: 25.777 min: 0.168 max: 132.250 (uS) FPS: 24.72 lost: 0 done: 500 raw decode speed: 51.927 Gbits/s bytes 415422794.000 bytes/urb: 165375 [hverkuil: incorporated Ezequiel's suggestions from his review] Signed-off-by: Dafna Hirschfeld <dafna.hirschfeld@collabora.com> Reviewed-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar> Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl> Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
2022-01-25 09:02:13 +01:00
free_urb:
usb_free_urb(stk_urb->urb);
stk_urb->urb = NULL;
return 0;
}
/*
* Allocate URBs
*/
int stk1160_alloc_isoc(struct stk1160 *dev)
{
struct urb *urb;
int i, j, k, sb_size, max_packets, num_bufs;
media: stk1160: use dma_alloc_noncontiguous API Replace the urb buffers allocation to use the noncontiguous API. This improves performance on ARM platforms where DMA coherent allocations produce uncached mappings. Note that the noncontiguous API requires the driver to handle synchronization. This commit is similar to this one for the uvc driver: https://lkml.org/lkml/2021/3/12/1506 Performance tests on rock-pi4 (Arm64) shows about 15x improvements: == DMA NONCONTIGUOUS == total durations: 20.63678480 sec urb processing durations: 0.286864889 sec uS/qty: 286864/2508 avg: 114.379 min: 0.583 max: 155.461 (uS) FPS: 24.92 lost: 0 done: 500 raw decode speed: 11.603 Gbits/s bytes 414831228.000 bytes/urb: 165403 == DMA COHERENT == total durations: 20.73551767 sec urb processing durations: 4.541559160 sec uS/qty: 4541559/2509 avg: 1810.107 min: 0.583 max: 2113.163 (uS) FPS: 24.90 lost: 0 done: 500 raw decode speed: 730.738 Mbits/s bytes 414785444.000 bytes/urb: 165319 Performance tests on x86 laptop show no significant difference: == DMA NONCONTIGUOUS == total durations: 20.220590102 sec urb processing durations: 0.63021818 sec uS/qty: 63021/2512 avg: 25.088 min: 0.138 max: 146.750 (uS) FPS: 24.72 lost: 0 done: 500 raw decode speed: 52.751 Gbits/s bytes 415421032.000 bytes/urb: 165374 == DMA COHERENT == total durations: 20.220475614 sec urb processing durations: 0.64751972 sec uS/qty: 64751/2512 avg: 25.777 min: 0.168 max: 132.250 (uS) FPS: 24.72 lost: 0 done: 500 raw decode speed: 51.927 Gbits/s bytes 415422794.000 bytes/urb: 165375 [hverkuil: incorporated Ezequiel's suggestions from his review] Signed-off-by: Dafna Hirschfeld <dafna.hirschfeld@collabora.com> Reviewed-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar> Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl> Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
2022-01-25 09:02:13 +01:00
int ret;
/*
* It may be necessary to release isoc here,
* since isoc are only released on disconnection.
* (see new_pkt_size flag)
*/
if (dev->isoc_ctl.num_bufs)
stk1160_uninit_isoc(dev);
stk1160_dbg("allocating urbs...\n");
num_bufs = STK1160_NUM_BUFS;
max_packets = STK1160_NUM_PACKETS;
sb_size = max_packets * dev->max_pkt_size;
dev->isoc_ctl.buf = NULL;
dev->isoc_ctl.max_pkt_size = dev->max_pkt_size;
/* allocate urbs and transfer buffers */
for (i = 0; i < num_bufs; i++) {
media: stk1160: use dma_alloc_noncontiguous API Replace the urb buffers allocation to use the noncontiguous API. This improves performance on ARM platforms where DMA coherent allocations produce uncached mappings. Note that the noncontiguous API requires the driver to handle synchronization. This commit is similar to this one for the uvc driver: https://lkml.org/lkml/2021/3/12/1506 Performance tests on rock-pi4 (Arm64) shows about 15x improvements: == DMA NONCONTIGUOUS == total durations: 20.63678480 sec urb processing durations: 0.286864889 sec uS/qty: 286864/2508 avg: 114.379 min: 0.583 max: 155.461 (uS) FPS: 24.92 lost: 0 done: 500 raw decode speed: 11.603 Gbits/s bytes 414831228.000 bytes/urb: 165403 == DMA COHERENT == total durations: 20.73551767 sec urb processing durations: 4.541559160 sec uS/qty: 4541559/2509 avg: 1810.107 min: 0.583 max: 2113.163 (uS) FPS: 24.90 lost: 0 done: 500 raw decode speed: 730.738 Mbits/s bytes 414785444.000 bytes/urb: 165319 Performance tests on x86 laptop show no significant difference: == DMA NONCONTIGUOUS == total durations: 20.220590102 sec urb processing durations: 0.63021818 sec uS/qty: 63021/2512 avg: 25.088 min: 0.138 max: 146.750 (uS) FPS: 24.72 lost: 0 done: 500 raw decode speed: 52.751 Gbits/s bytes 415421032.000 bytes/urb: 165374 == DMA COHERENT == total durations: 20.220475614 sec urb processing durations: 0.64751972 sec uS/qty: 64751/2512 avg: 25.777 min: 0.168 max: 132.250 (uS) FPS: 24.72 lost: 0 done: 500 raw decode speed: 51.927 Gbits/s bytes 415422794.000 bytes/urb: 165375 [hverkuil: incorporated Ezequiel's suggestions from his review] Signed-off-by: Dafna Hirschfeld <dafna.hirschfeld@collabora.com> Reviewed-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar> Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl> Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
2022-01-25 09:02:13 +01:00
ret = stk1160_fill_urb(dev, &dev->isoc_ctl.urb_ctl[i],
sb_size, max_packets);
if (ret)
goto free_i_bufs;
media: stk1160: use dma_alloc_noncontiguous API Replace the urb buffers allocation to use the noncontiguous API. This improves performance on ARM platforms where DMA coherent allocations produce uncached mappings. Note that the noncontiguous API requires the driver to handle synchronization. This commit is similar to this one for the uvc driver: https://lkml.org/lkml/2021/3/12/1506 Performance tests on rock-pi4 (Arm64) shows about 15x improvements: == DMA NONCONTIGUOUS == total durations: 20.63678480 sec urb processing durations: 0.286864889 sec uS/qty: 286864/2508 avg: 114.379 min: 0.583 max: 155.461 (uS) FPS: 24.92 lost: 0 done: 500 raw decode speed: 11.603 Gbits/s bytes 414831228.000 bytes/urb: 165403 == DMA COHERENT == total durations: 20.73551767 sec urb processing durations: 4.541559160 sec uS/qty: 4541559/2509 avg: 1810.107 min: 0.583 max: 2113.163 (uS) FPS: 24.90 lost: 0 done: 500 raw decode speed: 730.738 Mbits/s bytes 414785444.000 bytes/urb: 165319 Performance tests on x86 laptop show no significant difference: == DMA NONCONTIGUOUS == total durations: 20.220590102 sec urb processing durations: 0.63021818 sec uS/qty: 63021/2512 avg: 25.088 min: 0.138 max: 146.750 (uS) FPS: 24.72 lost: 0 done: 500 raw decode speed: 52.751 Gbits/s bytes 415421032.000 bytes/urb: 165374 == DMA COHERENT == total durations: 20.220475614 sec urb processing durations: 0.64751972 sec uS/qty: 64751/2512 avg: 25.777 min: 0.168 max: 132.250 (uS) FPS: 24.72 lost: 0 done: 500 raw decode speed: 51.927 Gbits/s bytes 415422794.000 bytes/urb: 165375 [hverkuil: incorporated Ezequiel's suggestions from his review] Signed-off-by: Dafna Hirschfeld <dafna.hirschfeld@collabora.com> Reviewed-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar> Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl> Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
2022-01-25 09:02:13 +01:00
urb = dev->isoc_ctl.urb_ctl[i].urb;
if (!urb) {
/* Not enough transfer buffers, so just give up */
if (i < STK1160_MIN_BUFS)
goto free_i_bufs;
goto nomore_tx_bufs;
}
memset(dev->isoc_ctl.urb_ctl[i].transfer_buffer, 0, sb_size);
/*
* FIXME: Where can I get the endpoint?
*/
urb->dev = dev->udev;
urb->pipe = usb_rcvisocpipe(dev->udev, STK1160_EP_VIDEO);
urb->transfer_buffer = dev->isoc_ctl.urb_ctl[i].transfer_buffer;
urb->transfer_buffer_length = sb_size;
urb->complete = stk1160_isoc_irq;
urb->context = dev;
urb->interval = 1;
urb->start_frame = 0;
urb->number_of_packets = max_packets;
urb->transfer_flags = URB_ISO_ASAP | URB_NO_TRANSFER_DMA_MAP;
media: stk1160: use dma_alloc_noncontiguous API Replace the urb buffers allocation to use the noncontiguous API. This improves performance on ARM platforms where DMA coherent allocations produce uncached mappings. Note that the noncontiguous API requires the driver to handle synchronization. This commit is similar to this one for the uvc driver: https://lkml.org/lkml/2021/3/12/1506 Performance tests on rock-pi4 (Arm64) shows about 15x improvements: == DMA NONCONTIGUOUS == total durations: 20.63678480 sec urb processing durations: 0.286864889 sec uS/qty: 286864/2508 avg: 114.379 min: 0.583 max: 155.461 (uS) FPS: 24.92 lost: 0 done: 500 raw decode speed: 11.603 Gbits/s bytes 414831228.000 bytes/urb: 165403 == DMA COHERENT == total durations: 20.73551767 sec urb processing durations: 4.541559160 sec uS/qty: 4541559/2509 avg: 1810.107 min: 0.583 max: 2113.163 (uS) FPS: 24.90 lost: 0 done: 500 raw decode speed: 730.738 Mbits/s bytes 414785444.000 bytes/urb: 165319 Performance tests on x86 laptop show no significant difference: == DMA NONCONTIGUOUS == total durations: 20.220590102 sec urb processing durations: 0.63021818 sec uS/qty: 63021/2512 avg: 25.088 min: 0.138 max: 146.750 (uS) FPS: 24.72 lost: 0 done: 500 raw decode speed: 52.751 Gbits/s bytes 415421032.000 bytes/urb: 165374 == DMA COHERENT == total durations: 20.220475614 sec urb processing durations: 0.64751972 sec uS/qty: 64751/2512 avg: 25.777 min: 0.168 max: 132.250 (uS) FPS: 24.72 lost: 0 done: 500 raw decode speed: 51.927 Gbits/s bytes 415422794.000 bytes/urb: 165375 [hverkuil: incorporated Ezequiel's suggestions from his review] Signed-off-by: Dafna Hirschfeld <dafna.hirschfeld@collabora.com> Reviewed-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar> Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl> Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
2022-01-25 09:02:13 +01:00
urb->transfer_dma = dev->isoc_ctl.urb_ctl[i].dma;
urb->sgt = dev->isoc_ctl.urb_ctl[i].sgt;
k = 0;
for (j = 0; j < max_packets; j++) {
urb->iso_frame_desc[j].offset = k;
urb->iso_frame_desc[j].length =
dev->isoc_ctl.max_pkt_size;
k += dev->isoc_ctl.max_pkt_size;
}
}
stk1160_dbg("%d urbs allocated\n", num_bufs);
/* At last we can say we have some buffers */
dev->isoc_ctl.num_bufs = num_bufs;
return 0;
nomore_tx_bufs:
/*
* Failed to allocate desired buffer count. However, we may have
* enough to work fine, so we just free the extra urb,
* store the allocated count and keep going, fingers crossed!
*/
stk1160_warn("%d urbs allocated. Trying to continue...\n", i);
dev->isoc_ctl.num_bufs = i;
return 0;
free_i_bufs:
/* Save the allocated buffers so far, so we can properly free them */
dev->isoc_ctl.num_bufs = i;
stk1160_free_isoc(dev);
return -ENOMEM;
}