linux/tools/testing/selftests/ublk/stripe.c
Ming Lei 57ed58c132 selftests: ublk: enable zero copy for stripe target
Use io_uring vectored fixed kernel buffer for handling stripe IO.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20250325135155.935398-5-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2025-04-02 07:07:00 -06:00

353 lines
8.9 KiB
C

// SPDX-License-Identifier: GPL-2.0
#include "kublk.h"
#define NR_STRIPE MAX_BACK_FILES
struct stripe_conf {
unsigned nr_files;
unsigned shift;
};
struct stripe {
loff_t start;
unsigned nr_sects;
int seq;
struct iovec *vec;
unsigned nr_vec;
unsigned cap;
};
struct stripe_array {
struct stripe s[NR_STRIPE];
unsigned nr;
struct iovec _vec[];
};
static inline const struct stripe_conf *get_chunk_shift(const struct ublk_queue *q)
{
return (struct stripe_conf *)q->dev->private_data;
}
static inline unsigned calculate_nr_vec(const struct stripe_conf *conf,
const struct ublksrv_io_desc *iod)
{
const unsigned shift = conf->shift - 9;
const unsigned unit_sects = conf->nr_files << shift;
loff_t start = iod->start_sector;
loff_t end = start + iod->nr_sectors;
return (end / unit_sects) - (start / unit_sects) + 1;
}
static struct stripe_array *alloc_stripe_array(const struct stripe_conf *conf,
const struct ublksrv_io_desc *iod)
{
unsigned nr_vecs = calculate_nr_vec(conf, iod);
unsigned total = nr_vecs * conf->nr_files;
struct stripe_array *s;
int i;
s = malloc(sizeof(*s) + total * sizeof(struct iovec));
s->nr = 0;
for (i = 0; i < conf->nr_files; i++) {
struct stripe *t = &s->s[i];
t->nr_vec = 0;
t->vec = &s->_vec[i * nr_vecs];
t->nr_sects = 0;
t->cap = nr_vecs;
}
return s;
}
static void free_stripe_array(struct stripe_array *s)
{
free(s);
}
static void calculate_stripe_array(const struct stripe_conf *conf,
const struct ublksrv_io_desc *iod, struct stripe_array *s)
{
const unsigned shift = conf->shift - 9;
const unsigned chunk_sects = 1 << shift;
const unsigned unit_sects = conf->nr_files << shift;
off64_t start = iod->start_sector;
off64_t end = start + iod->nr_sectors;
unsigned long done = 0;
unsigned idx = 0;
while (start < end) {
unsigned nr_sects = chunk_sects - (start & (chunk_sects - 1));
loff_t unit_off = (start / unit_sects) * unit_sects;
unsigned seq = (start - unit_off) >> shift;
struct stripe *this = &s->s[idx];
loff_t stripe_off = (unit_off / conf->nr_files) +
(start & (chunk_sects - 1));
if (nr_sects > end - start)
nr_sects = end - start;
if (this->nr_sects == 0) {
this->nr_sects = nr_sects;
this->start = stripe_off;
this->seq = seq;
s->nr += 1;
} else {
assert(seq == this->seq);
assert(this->start + this->nr_sects == stripe_off);
this->nr_sects += nr_sects;
}
assert(this->nr_vec < this->cap);
this->vec[this->nr_vec].iov_base = (void *)(iod->addr + done);
this->vec[this->nr_vec++].iov_len = nr_sects << 9;
start += nr_sects;
done += nr_sects << 9;
idx = (idx + 1) % conf->nr_files;
}
}
static inline enum io_uring_op stripe_to_uring_op(
const struct ublksrv_io_desc *iod, int zc)
{
unsigned ublk_op = ublksrv_get_op(iod);
if (ublk_op == UBLK_IO_OP_READ)
return zc ? IORING_OP_READV_FIXED : IORING_OP_READV;
else if (ublk_op == UBLK_IO_OP_WRITE)
return zc ? IORING_OP_WRITEV_FIXED : IORING_OP_WRITEV;
assert(0);
}
static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
{
const struct stripe_conf *conf = get_chunk_shift(q);
int zc = !!(ublk_queue_use_zc(q) != 0);
enum io_uring_op op = stripe_to_uring_op(iod, zc);
struct io_uring_sqe *sqe[NR_STRIPE];
struct stripe_array *s = alloc_stripe_array(conf, iod);
struct ublk_io *io = ublk_get_io(q, tag);
int i, extra = zc ? 2 : 0;
io->private_data = s;
calculate_stripe_array(conf, iod, s);
ublk_queue_alloc_sqes(q, sqe, s->nr + extra);
if (zc) {
io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, tag);
sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
sqe[0]->user_data = build_user_data(tag,
ublk_cmd_op_nr(sqe[0]->cmd_op), 0, 1);
}
for (i = zc; i < s->nr + extra - zc; i++) {
struct stripe *t = &s->s[i - zc];
io_uring_prep_rw(op, sqe[i],
t->seq + 1,
(void *)t->vec,
t->nr_vec,
t->start << 9);
if (zc) {
sqe[i]->buf_index = tag;
io_uring_sqe_set_flags(sqe[i],
IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK);
} else {
io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
}
/* bit63 marks us as tgt io */
sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i - zc, 1);
}
if (zc) {
struct io_uring_sqe *unreg = sqe[s->nr + 1];
io_uring_prep_buf_unregister(unreg, 0, tag, q->q_id, tag);
unreg->user_data = build_user_data(tag, ublk_cmd_op_nr(unreg->cmd_op), 0, 1);
}
/* register buffer is skip_success */
return s->nr + zc;
}
static int handle_flush(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
{
const struct stripe_conf *conf = get_chunk_shift(q);
struct io_uring_sqe *sqe[NR_STRIPE];
int i;
ublk_queue_alloc_sqes(q, sqe, conf->nr_files);
for (i = 0; i < conf->nr_files; i++) {
io_uring_prep_fsync(sqe[i], i + 1, IORING_FSYNC_DATASYNC);
io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
sqe[i]->user_data = build_user_data(tag, UBLK_IO_OP_FLUSH, 0, 1);
}
return conf->nr_files;
}
static int stripe_queue_tgt_io(struct ublk_queue *q, int tag)
{
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
unsigned ublk_op = ublksrv_get_op(iod);
int ret = 0;
switch (ublk_op) {
case UBLK_IO_OP_FLUSH:
ret = handle_flush(q, iod, tag);
break;
case UBLK_IO_OP_WRITE_ZEROES:
case UBLK_IO_OP_DISCARD:
ret = -ENOTSUP;
break;
case UBLK_IO_OP_READ:
case UBLK_IO_OP_WRITE:
ret = stripe_queue_tgt_rw_io(q, iod, tag);
break;
default:
ret = -EINVAL;
break;
}
ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u ret %d\n", __func__, tag,
iod->op_flags, iod->start_sector, iod->nr_sectors << 9, ret);
return ret;
}
static int ublk_stripe_queue_io(struct ublk_queue *q, int tag)
{
int queued = stripe_queue_tgt_io(q, tag);
ublk_queued_tgt_io(q, tag, queued);
return 0;
}
static void ublk_stripe_io_done(struct ublk_queue *q, int tag,
const struct io_uring_cqe *cqe)
{
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
unsigned op = user_data_to_op(cqe->user_data);
struct ublk_io *io = ublk_get_io(q, tag);
int res = cqe->res;
if (res < 0 || op != ublk_cmd_op_nr(UBLK_U_IO_UNREGISTER_IO_BUF)) {
if (!io->result)
io->result = res;
if (res < 0)
ublk_err("%s: io failure %d tag %u\n", __func__, res, tag);
}
/* buffer register op is IOSQE_CQE_SKIP_SUCCESS */
if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF))
io->tgt_ios += 1;
/* fail short READ/WRITE simply */
if (op == UBLK_IO_OP_READ || op == UBLK_IO_OP_WRITE) {
unsigned seq = user_data_to_tgt_data(cqe->user_data);
struct stripe_array *s = io->private_data;
if (res < s->s[seq].nr_sects << 9) {
io->result = -EIO;
ublk_err("%s: short rw op %u res %d exp %u tag %u\n",
__func__, op, res, s->s[seq].vec->iov_len, tag);
}
}
if (ublk_completed_tgt_io(q, tag)) {
int res = io->result;
if (!res)
res = iod->nr_sectors << 9;
ublk_complete_io(q, tag, res);
free_stripe_array(io->private_data);
io->private_data = NULL;
}
}
static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
{
struct ublk_params p = {
.types = UBLK_PARAM_TYPE_BASIC,
.basic = {
.attrs = UBLK_ATTR_VOLATILE_CACHE,
.logical_bs_shift = 9,
.physical_bs_shift = 12,
.io_opt_shift = 12,
.io_min_shift = 9,
.max_sectors = dev->dev_info.max_io_buf_bytes >> 9,
},
};
unsigned chunk_size = ctx->chunk_size;
struct stripe_conf *conf;
unsigned chunk_shift;
loff_t bytes = 0;
int ret, i, mul = 1;
if ((chunk_size & (chunk_size - 1)) || !chunk_size) {
ublk_err("invalid chunk size %u\n", chunk_size);
return -EINVAL;
}
if (chunk_size < 4096 || chunk_size > 512 * 1024) {
ublk_err("invalid chunk size %u\n", chunk_size);
return -EINVAL;
}
chunk_shift = ilog2(chunk_size);
ret = backing_file_tgt_init(dev);
if (ret)
return ret;
if (!dev->tgt.nr_backing_files || dev->tgt.nr_backing_files > NR_STRIPE)
return -EINVAL;
assert(dev->nr_fds == dev->tgt.nr_backing_files + 1);
for (i = 0; i < dev->tgt.nr_backing_files; i++)
dev->tgt.backing_file_size[i] &= ~((1 << chunk_shift) - 1);
for (i = 0; i < dev->tgt.nr_backing_files; i++) {
unsigned long size = dev->tgt.backing_file_size[i];
if (size != dev->tgt.backing_file_size[0])
return -EINVAL;
bytes += size;
}
conf = malloc(sizeof(*conf));
conf->shift = chunk_shift;
conf->nr_files = dev->tgt.nr_backing_files;
dev->private_data = conf;
dev->tgt.dev_size = bytes;
p.basic.dev_sectors = bytes >> 9;
dev->tgt.params = p;
if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY)
mul = 2;
dev->tgt.sq_depth = mul * dev->dev_info.queue_depth * conf->nr_files;
dev->tgt.cq_depth = mul * dev->dev_info.queue_depth * conf->nr_files;
printf("%s: shift %u files %u\n", __func__, conf->shift, conf->nr_files);
return 0;
}
static void ublk_stripe_tgt_deinit(struct ublk_dev *dev)
{
free(dev->private_data);
backing_file_tgt_deinit(dev);
}
const struct ublk_tgt_ops stripe_tgt_ops = {
.name = "stripe",
.init_tgt = ublk_stripe_tgt_init,
.deinit_tgt = ublk_stripe_tgt_deinit,
.queue_io = ublk_stripe_queue_io,
.tgt_io_done = ublk_stripe_io_done,
};