linux/drivers/net/ethernet/microsoft/mana/hw_channel.c

910 lines
22 KiB
C
Raw Permalink Normal View History

// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/* Copyright (c) 2021, Microsoft Corporation. */
#include <net/mana/gdma.h>
#include <net/mana/mana.h>
#include <net/mana/hw_channel.h>
fix missing vmalloc.h includes Patch series "Memory allocation profiling", v6. Overview: Low overhead [1] per-callsite memory allocation profiling. Not just for debug kernels, overhead low enough to be deployed in production. Example output: root@moria-kvm:~# sort -rn /proc/allocinfo 127664128 31168 mm/page_ext.c:270 func:alloc_page_ext 56373248 4737 mm/slub.c:2259 func:alloc_slab_page 14880768 3633 mm/readahead.c:247 func:page_cache_ra_unbounded 14417920 3520 mm/mm_init.c:2530 func:alloc_large_system_hash 13377536 234 block/blk-mq.c:3421 func:blk_mq_alloc_rqs 11718656 2861 mm/filemap.c:1919 func:__filemap_get_folio 9192960 2800 kernel/fork.c:307 func:alloc_thread_stack_node 4206592 4 net/netfilter/nf_conntrack_core.c:2567 func:nf_ct_alloc_hashtable 4136960 1010 drivers/staging/ctagmod/ctagmod.c:20 [ctagmod] func:ctagmod_start 3940352 962 mm/memory.c:4214 func:alloc_anon_folio 2894464 22613 fs/kernfs/dir.c:615 func:__kernfs_new_node ... Usage: kconfig options: - CONFIG_MEM_ALLOC_PROFILING - CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT - CONFIG_MEM_ALLOC_PROFILING_DEBUG adds warnings for allocations that weren't accounted because of a missing annotation sysctl: /proc/sys/vm/mem_profiling Runtime info: /proc/allocinfo Notes: [1]: Overhead To measure the overhead we are comparing the following configurations: (1) Baseline with CONFIG_MEMCG_KMEM=n (2) Disabled by default (CONFIG_MEM_ALLOC_PROFILING=y && CONFIG_MEM_ALLOC_PROFILING_BY_DEFAULT=n) (3) Enabled by default (CONFIG_MEM_ALLOC_PROFILING=y && CONFIG_MEM_ALLOC_PROFILING_BY_DEFAULT=y) (4) Enabled at runtime (CONFIG_MEM_ALLOC_PROFILING=y && CONFIG_MEM_ALLOC_PROFILING_BY_DEFAULT=n && /proc/sys/vm/mem_profiling=1) (5) Baseline with CONFIG_MEMCG_KMEM=y && allocating with __GFP_ACCOUNT (6) Disabled by default (CONFIG_MEM_ALLOC_PROFILING=y && CONFIG_MEM_ALLOC_PROFILING_BY_DEFAULT=n) && CONFIG_MEMCG_KMEM=y (7) Enabled by default (CONFIG_MEM_ALLOC_PROFILING=y && CONFIG_MEM_ALLOC_PROFILING_BY_DEFAULT=y) && CONFIG_MEMCG_KMEM=y Performance overhead: To evaluate performance we implemented an in-kernel test executing multiple get_free_page/free_page and kmalloc/kfree calls with allocation sizes growing from 8 to 240 bytes with CPU frequency set to max and CPU affinity set to a specific CPU to minimize the noise. Below are results from running the test on Ubuntu 22.04.2 LTS with 6.8.0-rc1 kernel on 56 core Intel Xeon: kmalloc pgalloc (1 baseline) 6.764s 16.902s (2 default disabled) 6.793s (+0.43%) 17.007s (+0.62%) (3 default enabled) 7.197s (+6.40%) 23.666s (+40.02%) (4 runtime enabled) 7.405s (+9.48%) 23.901s (+41.41%) (5 memcg) 13.388s (+97.94%) 48.460s (+186.71%) (6 def disabled+memcg) 13.332s (+97.10%) 48.105s (+184.61%) (7 def enabled+memcg) 13.446s (+98.78%) 54.963s (+225.18%) Memory overhead: Kernel size: text data bss dec diff (1) 26515311 18890222 17018880 62424413 (2) 26524728 19423818 16740352 62688898 264485 (3) 26524724 19423818 16740352 62688894 264481 (4) 26524728 19423818 16740352 62688898 264485 (5) 26541782 18964374 16957440 62463596 39183 Memory consumption on a 56 core Intel CPU with 125GB of memory: Code tags: 192 kB PageExts: 262144 kB (256MB) SlabExts: 9876 kB (9.6MB) PcpuExts: 512 kB (0.5MB) Total overhead is 0.2% of total memory. Benchmarks: Hackbench tests run 100 times: hackbench -s 512 -l 200 -g 15 -f 25 -P baseline disabled profiling enabled profiling avg 0.3543 0.3559 (+0.0016) 0.3566 (+0.0023) stdev 0.0137 0.0188 0.0077 hackbench -l 10000 baseline disabled profiling enabled profiling avg 6.4218 6.4306 (+0.0088) 6.5077 (+0.0859) stdev 0.0933 0.0286 0.0489 stress-ng tests: stress-ng --class memory --seq 4 -t 60 stress-ng --class cpu --seq 4 -t 60 Results posted at: https://evilpiepirate.org/~kent/memalloc_prof_v4_stress-ng/ [2] https://lore.kernel.org/all/20240306182440.2003814-1-surenb@google.com/ This patch (of 37): The next patch drops vmalloc.h from a system header in order to fix a circular dependency; this adds it to all the files that were pulling it in implicitly. [kent.overstreet@linux.dev: fix arch/alpha/lib/memcpy.c] Link: https://lkml.kernel.org/r/20240327002152.3339937-1-kent.overstreet@linux.dev [surenb@google.com: fix arch/x86/mm/numa_32.c] Link: https://lkml.kernel.org/r/20240402180933.1663992-1-surenb@google.com [kent.overstreet@linux.dev: a few places were depending on sizes.h] Link: https://lkml.kernel.org/r/20240404034744.1664840-1-kent.overstreet@linux.dev [arnd@arndb.de: fix mm/kasan/hw_tags.c] Link: https://lkml.kernel.org/r/20240404124435.3121534-1-arnd@kernel.org [surenb@google.com: fix arc build] Link: https://lkml.kernel.org/r/20240405225115.431056-1-surenb@google.com Link: https://lkml.kernel.org/r/20240321163705.3067592-1-surenb@google.com Link: https://lkml.kernel.org/r/20240321163705.3067592-2-surenb@google.com Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> Signed-off-by: Suren Baghdasaryan <surenb@google.com> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com> Tested-by: Kees Cook <keescook@chromium.org> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Alex Gaynor <alex.gaynor@gmail.com> Cc: Alice Ryhl <aliceryhl@google.com> Cc: Andreas Hindborg <a.hindborg@samsung.com> Cc: Benno Lossin <benno.lossin@proton.me> Cc: "Björn Roy Baron" <bjorn3_gh@protonmail.com> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Christoph Lameter <cl@linux.com> Cc: Dennis Zhou <dennis@kernel.org> Cc: Gary Guo <gary@garyguo.net> Cc: Miguel Ojeda <ojeda@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Tejun Heo <tj@kernel.org> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Wedson Almeida Filho <wedsonaf@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-03-21 09:36:23 -07:00
#include <linux/vmalloc.h>
static int mana_hwc_get_msg_index(struct hw_channel_context *hwc, u16 *msg_id)
{
struct gdma_resource *r = &hwc->inflight_msg_res;
unsigned long flags;
u32 index;
down(&hwc->sema);
spin_lock_irqsave(&r->lock, flags);
index = find_first_zero_bit(hwc->inflight_msg_res.map,
hwc->inflight_msg_res.size);
bitmap_set(hwc->inflight_msg_res.map, index, 1);
spin_unlock_irqrestore(&r->lock, flags);
*msg_id = index;
return 0;
}
static void mana_hwc_put_msg_index(struct hw_channel_context *hwc, u16 msg_id)
{
struct gdma_resource *r = &hwc->inflight_msg_res;
unsigned long flags;
spin_lock_irqsave(&r->lock, flags);
bitmap_clear(hwc->inflight_msg_res.map, msg_id, 1);
spin_unlock_irqrestore(&r->lock, flags);
up(&hwc->sema);
}
static int mana_hwc_verify_resp_msg(const struct hwc_caller_ctx *caller_ctx,
const struct gdma_resp_hdr *resp_msg,
u32 resp_len)
{
if (resp_len < sizeof(*resp_msg))
return -EPROTO;
if (resp_len > caller_ctx->output_buflen)
return -EPROTO;
return 0;
}
static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq,
struct hwc_work_request *req)
{
struct device *dev = hwc_rxq->hwc->dev;
struct gdma_sge *sge;
int err;
sge = &req->sge;
sge->address = (u64)req->buf_sge_addr;
sge->mem_key = hwc_rxq->msg_buf->gpa_mkey;
sge->size = req->buf_len;
memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request));
req->wqe_req.sgl = sge;
req->wqe_req.num_sge = 1;
req->wqe_req.client_data_unit = 0;
err = mana_gd_post_and_ring(hwc_rxq->gdma_wq, &req->wqe_req, NULL);
if (err)
dev_err(dev, "Failed to post WQE on HWC RQ: %d\n", err);
return err;
}
static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len,
struct hwc_work_request *rx_req)
{
const struct gdma_resp_hdr *resp_msg = rx_req->buf_va;
struct hwc_caller_ctx *ctx;
int err;
if (!test_bit(resp_msg->response.hwc_msg_id,
hwc->inflight_msg_res.map)) {
dev_err(hwc->dev, "hwc_rx: invalid msg_id = %u\n",
resp_msg->response.hwc_msg_id);
mana_hwc_post_rx_wqe(hwc->rxq, rx_req);
return;
}
ctx = hwc->caller_ctx + resp_msg->response.hwc_msg_id;
err = mana_hwc_verify_resp_msg(ctx, resp_msg, resp_len);
if (err)
goto out;
ctx->status_code = resp_msg->status;
memcpy(ctx->output_buf, resp_msg, resp_len);
out:
ctx->error = err;
/* Must post rx wqe before complete(), otherwise the next rx may
* hit no_wqe error.
*/
mana_hwc_post_rx_wqe(hwc->rxq, rx_req);
complete(&ctx->comp_event);
}
static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self,
struct gdma_event *event)
{
union hwc_init_soc_service_type service_data;
struct hw_channel_context *hwc = ctx;
struct gdma_dev *gd = hwc->gdma_dev;
union hwc_init_type_data type_data;
union hwc_init_eq_id_db eq_db;
u32 type, val;
int ret;
switch (event->type) {
case GDMA_EQE_HWC_INIT_EQ_ID_DB:
eq_db.as_uint32 = event->details[0];
hwc->cq->gdma_eq->id = eq_db.eq_id;
gd->doorbell = eq_db.doorbell;
break;
case GDMA_EQE_HWC_INIT_DATA:
type_data.as_uint32 = event->details[0];
type = type_data.type;
val = type_data.value;
switch (type) {
case HWC_INIT_DATA_CQID:
hwc->cq->gdma_cq->id = val;
break;
case HWC_INIT_DATA_RQID:
hwc->rxq->gdma_wq->id = val;
break;
case HWC_INIT_DATA_SQID:
hwc->txq->gdma_wq->id = val;
break;
case HWC_INIT_DATA_QUEUE_DEPTH:
hwc->hwc_init_q_depth_max = (u16)val;
break;
case HWC_INIT_DATA_MAX_REQUEST:
hwc->hwc_init_max_req_msg_size = val;
break;
case HWC_INIT_DATA_MAX_RESPONSE:
hwc->hwc_init_max_resp_msg_size = val;
break;
case HWC_INIT_DATA_MAX_NUM_CQS:
gd->gdma_context->max_num_cqs = val;
break;
case HWC_INIT_DATA_PDID:
hwc->gdma_dev->pdid = val;
break;
case HWC_INIT_DATA_GPA_MKEY:
hwc->rxq->msg_buf->gpa_mkey = val;
hwc->txq->msg_buf->gpa_mkey = val;
break;
case HWC_INIT_DATA_PF_DEST_RQ_ID:
hwc->pf_dest_vrq_id = val;
break;
case HWC_INIT_DATA_PF_DEST_CQ_ID:
hwc->pf_dest_vrcq_id = val;
break;
}
break;
case GDMA_EQE_HWC_INIT_DONE:
complete(&hwc->hwc_init_eqe_comp);
break;
case GDMA_EQE_HWC_SOC_RECONFIG_DATA:
type_data.as_uint32 = event->details[0];
type = type_data.type;
val = type_data.value;
switch (type) {
case HWC_DATA_CFG_HWC_TIMEOUT:
hwc->hwc_timeout = val;
break;
default:
dev_warn(hwc->dev, "Received unknown reconfig type %u\n", type);
break;
}
break;
case GDMA_EQE_HWC_SOC_SERVICE:
service_data.as_uint32 = event->details[0];
type = service_data.type;
switch (type) {
case GDMA_SERVICE_TYPE_RDMA_SUSPEND:
case GDMA_SERVICE_TYPE_RDMA_RESUME:
ret = mana_rdma_service_event(gd->gdma_context, type);
if (ret)
dev_err(hwc->dev, "Failed to schedule adev service event: %d\n",
ret);
break;
default:
dev_warn(hwc->dev, "Received unknown SOC service type %u\n", type);
break;
}
break;
default:
dev_warn(hwc->dev, "Received unknown gdma event %u\n", event->type);
/* Ignore unknown events, which should never happen. */
break;
}
}
static void mana_hwc_rx_event_handler(void *ctx, u32 gdma_rxq_id,
const struct hwc_rx_oob *rx_oob)
{
struct hw_channel_context *hwc = ctx;
struct hwc_wq *hwc_rxq = hwc->rxq;
struct hwc_work_request *rx_req;
struct gdma_resp_hdr *resp;
struct gdma_wqe *dma_oob;
struct gdma_queue *rq;
struct gdma_sge *sge;
u64 rq_base_addr;
u64 rx_req_idx;
u8 *wqe;
if (WARN_ON_ONCE(hwc_rxq->gdma_wq->id != gdma_rxq_id))
return;
rq = hwc_rxq->gdma_wq;
wqe = mana_gd_get_wqe_ptr(rq, rx_oob->wqe_offset / GDMA_WQE_BU_SIZE);
dma_oob = (struct gdma_wqe *)wqe;
sge = (struct gdma_sge *)(wqe + 8 + dma_oob->inline_oob_size_div4 * 4);
/* Select the RX work request for virtual address and for reposting. */
rq_base_addr = hwc_rxq->msg_buf->mem_info.dma_handle;
rx_req_idx = (sge->address - rq_base_addr) / hwc->max_req_msg_size;
rx_req = &hwc_rxq->msg_buf->reqs[rx_req_idx];
resp = (struct gdma_resp_hdr *)rx_req->buf_va;
if (resp->response.hwc_msg_id >= hwc->num_inflight_msg) {
dev_err(hwc->dev, "HWC RX: wrong msg_id=%u\n",
resp->response.hwc_msg_id);
return;
}
mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, rx_req);
/* Can no longer use 'resp', because the buffer is posted to the HW
* in mana_hwc_handle_resp() above.
*/
resp = NULL;
}
static void mana_hwc_tx_event_handler(void *ctx, u32 gdma_txq_id,
const struct hwc_rx_oob *rx_oob)
{
struct hw_channel_context *hwc = ctx;
struct hwc_wq *hwc_txq = hwc->txq;
WARN_ON_ONCE(!hwc_txq || hwc_txq->gdma_wq->id != gdma_txq_id);
}
static int mana_hwc_create_gdma_wq(struct hw_channel_context *hwc,
enum gdma_queue_type type, u64 queue_size,
struct gdma_queue **queue)
{
struct gdma_queue_spec spec = {};
if (type != GDMA_SQ && type != GDMA_RQ)
return -EINVAL;
spec.type = type;
spec.monitor_avl_buf = false;
spec.queue_size = queue_size;
return mana_gd_create_hwc_queue(hwc->gdma_dev, &spec, queue);
}
static int mana_hwc_create_gdma_cq(struct hw_channel_context *hwc,
u64 queue_size,
void *ctx, gdma_cq_callback *cb,
struct gdma_queue *parent_eq,
struct gdma_queue **queue)
{
struct gdma_queue_spec spec = {};
spec.type = GDMA_CQ;
spec.monitor_avl_buf = false;
spec.queue_size = queue_size;
spec.cq.context = ctx;
spec.cq.callback = cb;
spec.cq.parent_eq = parent_eq;
return mana_gd_create_hwc_queue(hwc->gdma_dev, &spec, queue);
}
static int mana_hwc_create_gdma_eq(struct hw_channel_context *hwc,
u64 queue_size,
void *ctx, gdma_eq_callback *cb,
struct gdma_queue **queue)
{
struct gdma_queue_spec spec = {};
spec.type = GDMA_EQ;
spec.monitor_avl_buf = false;
spec.queue_size = queue_size;
spec.eq.context = ctx;
spec.eq.callback = cb;
spec.eq.log2_throttle_limit = DEFAULT_LOG2_THROTTLING_FOR_ERROR_EQ;
spec.eq.msix_index = 0;
return mana_gd_create_hwc_queue(hwc->gdma_dev, &spec, queue);
}
static void mana_hwc_comp_event(void *ctx, struct gdma_queue *q_self)
{
struct hwc_rx_oob comp_data = {};
struct gdma_comp *completions;
struct hwc_cq *hwc_cq = ctx;
int comp_read, i;
WARN_ON_ONCE(hwc_cq->gdma_cq != q_self);
completions = hwc_cq->comp_buf;
comp_read = mana_gd_poll_cq(q_self, completions, hwc_cq->queue_depth);
WARN_ON_ONCE(comp_read <= 0 || comp_read > hwc_cq->queue_depth);
for (i = 0; i < comp_read; ++i) {
comp_data = *(struct hwc_rx_oob *)completions[i].cqe_data;
if (completions[i].is_sq)
hwc_cq->tx_event_handler(hwc_cq->tx_event_ctx,
completions[i].wq_num,
&comp_data);
else
hwc_cq->rx_event_handler(hwc_cq->rx_event_ctx,
completions[i].wq_num,
&comp_data);
}
mana_gd_ring_cq(q_self, SET_ARM_BIT);
}
static void mana_hwc_destroy_cq(struct gdma_context *gc, struct hwc_cq *hwc_cq)
{
kfree(hwc_cq->comp_buf);
if (hwc_cq->gdma_cq)
mana_gd_destroy_queue(gc, hwc_cq->gdma_cq);
if (hwc_cq->gdma_eq)
mana_gd_destroy_queue(gc, hwc_cq->gdma_eq);
kfree(hwc_cq);
}
static int mana_hwc_create_cq(struct hw_channel_context *hwc, u16 q_depth,
gdma_eq_callback *callback, void *ctx,
hwc_rx_event_handler_t *rx_ev_hdlr,
void *rx_ev_ctx,
hwc_tx_event_handler_t *tx_ev_hdlr,
void *tx_ev_ctx, struct hwc_cq **hwc_cq_ptr)
{
struct gdma_queue *eq, *cq;
struct gdma_comp *comp_buf;
struct hwc_cq *hwc_cq;
u32 eq_size, cq_size;
int err;
eq_size = roundup_pow_of_two(GDMA_EQE_SIZE * q_depth);
if (eq_size < MANA_MIN_QSIZE)
eq_size = MANA_MIN_QSIZE;
cq_size = roundup_pow_of_two(GDMA_CQE_SIZE * q_depth);
if (cq_size < MANA_MIN_QSIZE)
cq_size = MANA_MIN_QSIZE;
hwc_cq = kzalloc(sizeof(*hwc_cq), GFP_KERNEL);
if (!hwc_cq)
return -ENOMEM;
err = mana_hwc_create_gdma_eq(hwc, eq_size, ctx, callback, &eq);
if (err) {
dev_err(hwc->dev, "Failed to create HWC EQ for RQ: %d\n", err);
goto out;
}
hwc_cq->gdma_eq = eq;
err = mana_hwc_create_gdma_cq(hwc, cq_size, hwc_cq, mana_hwc_comp_event,
eq, &cq);
if (err) {
dev_err(hwc->dev, "Failed to create HWC CQ for RQ: %d\n", err);
goto out;
}
hwc_cq->gdma_cq = cq;
comp_buf = kcalloc(q_depth, sizeof(*comp_buf), GFP_KERNEL);
if (!comp_buf) {
err = -ENOMEM;
goto out;
}
hwc_cq->hwc = hwc;
hwc_cq->comp_buf = comp_buf;
hwc_cq->queue_depth = q_depth;
hwc_cq->rx_event_handler = rx_ev_hdlr;
hwc_cq->rx_event_ctx = rx_ev_ctx;
hwc_cq->tx_event_handler = tx_ev_hdlr;
hwc_cq->tx_event_ctx = tx_ev_ctx;
*hwc_cq_ptr = hwc_cq;
return 0;
out:
mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc_cq);
return err;
}
static int mana_hwc_alloc_dma_buf(struct hw_channel_context *hwc, u16 q_depth,
u32 max_msg_size,
struct hwc_dma_buf **dma_buf_ptr)
{
struct gdma_context *gc = hwc->gdma_dev->gdma_context;
struct hwc_work_request *hwc_wr;
struct hwc_dma_buf *dma_buf;
struct gdma_mem_info *gmi;
void *virt_addr;
u32 buf_size;
u8 *base_pa;
int err;
u16 i;
dma_buf = kzalloc(struct_size(dma_buf, reqs, q_depth), GFP_KERNEL);
if (!dma_buf)
return -ENOMEM;
dma_buf->num_reqs = q_depth;
buf_size = MANA_PAGE_ALIGN(q_depth * max_msg_size);
gmi = &dma_buf->mem_info;
err = mana_gd_alloc_memory(gc, buf_size, gmi);
if (err) {
dev_err(hwc->dev, "Failed to allocate DMA buffer size: %u, err %d\n",
buf_size, err);
goto out;
}
virt_addr = dma_buf->mem_info.virt_addr;
base_pa = (u8 *)dma_buf->mem_info.dma_handle;
for (i = 0; i < q_depth; i++) {
hwc_wr = &dma_buf->reqs[i];
hwc_wr->buf_va = virt_addr + i * max_msg_size;
hwc_wr->buf_sge_addr = base_pa + i * max_msg_size;
hwc_wr->buf_len = max_msg_size;
}
*dma_buf_ptr = dma_buf;
return 0;
out:
kfree(dma_buf);
return err;
}
static void mana_hwc_dealloc_dma_buf(struct hw_channel_context *hwc,
struct hwc_dma_buf *dma_buf)
{
if (!dma_buf)
return;
mana_gd_free_memory(&dma_buf->mem_info);
kfree(dma_buf);
}
static void mana_hwc_destroy_wq(struct hw_channel_context *hwc,
struct hwc_wq *hwc_wq)
{
mana_hwc_dealloc_dma_buf(hwc, hwc_wq->msg_buf);
if (hwc_wq->gdma_wq)
mana_gd_destroy_queue(hwc->gdma_dev->gdma_context,
hwc_wq->gdma_wq);
kfree(hwc_wq);
}
static int mana_hwc_create_wq(struct hw_channel_context *hwc,
enum gdma_queue_type q_type, u16 q_depth,
u32 max_msg_size, struct hwc_cq *hwc_cq,
struct hwc_wq **hwc_wq_ptr)
{
struct gdma_queue *queue;
struct hwc_wq *hwc_wq;
u32 queue_size;
int err;
WARN_ON(q_type != GDMA_SQ && q_type != GDMA_RQ);
if (q_type == GDMA_RQ)
queue_size = roundup_pow_of_two(GDMA_MAX_RQE_SIZE * q_depth);
else
queue_size = roundup_pow_of_two(GDMA_MAX_SQE_SIZE * q_depth);
if (queue_size < MANA_MIN_QSIZE)
queue_size = MANA_MIN_QSIZE;
hwc_wq = kzalloc(sizeof(*hwc_wq), GFP_KERNEL);
if (!hwc_wq)
return -ENOMEM;
err = mana_hwc_create_gdma_wq(hwc, q_type, queue_size, &queue);
if (err)
goto out;
hwc_wq->hwc = hwc;
hwc_wq->gdma_wq = queue;
hwc_wq->queue_depth = q_depth;
hwc_wq->hwc_cq = hwc_cq;
err = mana_hwc_alloc_dma_buf(hwc, q_depth, max_msg_size,
&hwc_wq->msg_buf);
if (err)
goto out;
*hwc_wq_ptr = hwc_wq;
return 0;
out:
if (err)
mana_hwc_destroy_wq(hwc, hwc_wq);
dev_err(hwc->dev, "Failed to create HWC queue size= %u type= %d err= %d\n",
queue_size, q_type, err);
return err;
}
static int mana_hwc_post_tx_wqe(const struct hwc_wq *hwc_txq,
struct hwc_work_request *req,
u32 dest_virt_rq_id, u32 dest_virt_rcq_id,
bool dest_pf)
{
struct device *dev = hwc_txq->hwc->dev;
struct hwc_tx_oob *tx_oob;
struct gdma_sge *sge;
int err;
if (req->msg_size == 0 || req->msg_size > req->buf_len) {
dev_err(dev, "wrong msg_size: %u, buf_len: %u\n",
req->msg_size, req->buf_len);
return -EINVAL;
}
tx_oob = &req->tx_oob;
tx_oob->vrq_id = dest_virt_rq_id;
tx_oob->dest_vfid = 0;
tx_oob->vrcq_id = dest_virt_rcq_id;
tx_oob->vscq_id = hwc_txq->hwc_cq->gdma_cq->id;
tx_oob->loopback = false;
tx_oob->lso_override = false;
tx_oob->dest_pf = dest_pf;
tx_oob->vsq_id = hwc_txq->gdma_wq->id;
sge = &req->sge;
sge->address = (u64)req->buf_sge_addr;
sge->mem_key = hwc_txq->msg_buf->gpa_mkey;
sge->size = req->msg_size;
memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request));
req->wqe_req.sgl = sge;
req->wqe_req.num_sge = 1;
req->wqe_req.inline_oob_size = sizeof(struct hwc_tx_oob);
req->wqe_req.inline_oob_data = tx_oob;
req->wqe_req.client_data_unit = 0;
err = mana_gd_post_and_ring(hwc_txq->gdma_wq, &req->wqe_req, NULL);
if (err)
dev_err(dev, "Failed to post WQE on HWC SQ: %d\n", err);
return err;
}
static int mana_hwc_init_inflight_msg(struct hw_channel_context *hwc,
u16 num_msg)
{
int err;
sema_init(&hwc->sema, num_msg);
err = mana_gd_alloc_res_map(num_msg, &hwc->inflight_msg_res);
if (err)
dev_err(hwc->dev, "Failed to init inflight_msg_res: %d\n", err);
return err;
}
static int mana_hwc_test_channel(struct hw_channel_context *hwc, u16 q_depth,
u32 max_req_msg_size, u32 max_resp_msg_size)
{
struct gdma_context *gc = hwc->gdma_dev->gdma_context;
struct hwc_wq *hwc_rxq = hwc->rxq;
struct hwc_work_request *req;
struct hwc_caller_ctx *ctx;
int err;
int i;
/* Post all WQEs on the RQ */
for (i = 0; i < q_depth; i++) {
req = &hwc_rxq->msg_buf->reqs[i];
err = mana_hwc_post_rx_wqe(hwc_rxq, req);
if (err)
return err;
}
ctx = kcalloc(q_depth, sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
for (i = 0; i < q_depth; ++i)
init_completion(&ctx[i].comp_event);
hwc->caller_ctx = ctx;
return mana_gd_test_eq(gc, hwc->cq->gdma_eq);
}
static int mana_hwc_establish_channel(struct gdma_context *gc, u16 *q_depth,
u32 *max_req_msg_size,
u32 *max_resp_msg_size)
{
struct hw_channel_context *hwc = gc->hwc.driver_data;
struct gdma_queue *rq = hwc->rxq->gdma_wq;
struct gdma_queue *sq = hwc->txq->gdma_wq;
struct gdma_queue *eq = hwc->cq->gdma_eq;
struct gdma_queue *cq = hwc->cq->gdma_cq;
int err;
init_completion(&hwc->hwc_init_eqe_comp);
err = mana_smc_setup_hwc(&gc->shm_channel, false,
eq->mem_info.dma_handle,
cq->mem_info.dma_handle,
rq->mem_info.dma_handle,
sq->mem_info.dma_handle,
eq->eq.msix_index);
if (err)
return err;
if (!wait_for_completion_timeout(&hwc->hwc_init_eqe_comp, 60 * HZ))
return -ETIMEDOUT;
*q_depth = hwc->hwc_init_q_depth_max;
*max_req_msg_size = hwc->hwc_init_max_req_msg_size;
*max_resp_msg_size = hwc->hwc_init_max_resp_msg_size;
/* Both were set in mana_hwc_init_event_handler(). */
if (WARN_ON(cq->id >= gc->max_num_cqs))
return -EPROTO;
gc->cq_table = vcalloc(gc->max_num_cqs, sizeof(struct gdma_queue *));
if (!gc->cq_table)
return -ENOMEM;
gc->cq_table[cq->id] = cq;
return 0;
}
static int mana_hwc_init_queues(struct hw_channel_context *hwc, u16 q_depth,
u32 max_req_msg_size, u32 max_resp_msg_size)
{
int err;
err = mana_hwc_init_inflight_msg(hwc, q_depth);
if (err)
return err;
/* CQ is shared by SQ and RQ, so CQ's queue depth is the sum of SQ
* queue depth and RQ queue depth.
*/
err = mana_hwc_create_cq(hwc, q_depth * 2,
mana_hwc_init_event_handler, hwc,
mana_hwc_rx_event_handler, hwc,
mana_hwc_tx_event_handler, hwc, &hwc->cq);
if (err) {
dev_err(hwc->dev, "Failed to create HWC CQ: %d\n", err);
goto out;
}
err = mana_hwc_create_wq(hwc, GDMA_RQ, q_depth, max_req_msg_size,
hwc->cq, &hwc->rxq);
if (err) {
dev_err(hwc->dev, "Failed to create HWC RQ: %d\n", err);
goto out;
}
err = mana_hwc_create_wq(hwc, GDMA_SQ, q_depth, max_resp_msg_size,
hwc->cq, &hwc->txq);
if (err) {
dev_err(hwc->dev, "Failed to create HWC SQ: %d\n", err);
goto out;
}
hwc->num_inflight_msg = q_depth;
hwc->max_req_msg_size = max_req_msg_size;
return 0;
out:
/* mana_hwc_create_channel() will do the cleanup.*/
return err;
}
int mana_hwc_create_channel(struct gdma_context *gc)
{
u32 max_req_msg_size, max_resp_msg_size;
struct gdma_dev *gd = &gc->hwc;
struct hw_channel_context *hwc;
u16 q_depth_max;
int err;
hwc = kzalloc(sizeof(*hwc), GFP_KERNEL);
if (!hwc)
return -ENOMEM;
gd->gdma_context = gc;
gd->driver_data = hwc;
hwc->gdma_dev = gd;
hwc->dev = gc->dev;
hwc->hwc_timeout = HW_CHANNEL_WAIT_RESOURCE_TIMEOUT_MS;
/* HWC's instance number is always 0. */
gd->dev_id.as_uint32 = 0;
gd->dev_id.type = GDMA_DEVICE_HWC;
gd->pdid = INVALID_PDID;
gd->doorbell = INVALID_DOORBELL;
/* mana_hwc_init_queues() only creates the required data structures,
* and doesn't touch the HWC device.
*/
err = mana_hwc_init_queues(hwc, HW_CHANNEL_VF_BOOTSTRAP_QUEUE_DEPTH,
HW_CHANNEL_MAX_REQUEST_SIZE,
HW_CHANNEL_MAX_RESPONSE_SIZE);
if (err) {
dev_err(hwc->dev, "Failed to initialize HWC: %d\n", err);
goto out;
}
err = mana_hwc_establish_channel(gc, &q_depth_max, &max_req_msg_size,
&max_resp_msg_size);
if (err) {
dev_err(hwc->dev, "Failed to establish HWC: %d\n", err);
goto out;
}
err = mana_hwc_test_channel(gc->hwc.driver_data,
HW_CHANNEL_VF_BOOTSTRAP_QUEUE_DEPTH,
max_req_msg_size, max_resp_msg_size);
if (err) {
dev_err(hwc->dev, "Failed to test HWC: %d\n", err);
goto out;
}
return 0;
out:
mana_hwc_destroy_channel(gc);
return err;
}
void mana_hwc_destroy_channel(struct gdma_context *gc)
{
struct hw_channel_context *hwc = gc->hwc.driver_data;
if (!hwc)
return;
/* gc->max_num_cqs is set in mana_hwc_init_event_handler(). If it's
* non-zero, the HWC worked and we should tear down the HWC here.
*/
if (gc->max_num_cqs > 0) {
mana_smc_teardown_hwc(&gc->shm_channel, false);
gc->max_num_cqs = 0;
}
kfree(hwc->caller_ctx);
hwc->caller_ctx = NULL;
if (hwc->txq)
mana_hwc_destroy_wq(hwc, hwc->txq);
if (hwc->rxq)
mana_hwc_destroy_wq(hwc, hwc->rxq);
if (hwc->cq)
mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc->cq);
mana_gd_free_res_map(&hwc->inflight_msg_res);
hwc->num_inflight_msg = 0;
hwc->gdma_dev->doorbell = INVALID_DOORBELL;
hwc->gdma_dev->pdid = INVALID_PDID;
hwc->hwc_timeout = 0;
kfree(hwc);
gc->hwc.driver_data = NULL;
gc->hwc.gdma_context = NULL;
vfree(gc->cq_table);
gc->cq_table = NULL;
}
int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len,
const void *req, u32 resp_len, void *resp)
{
struct gdma_context *gc = hwc->gdma_dev->gdma_context;
struct hwc_work_request *tx_wr;
struct hwc_wq *txq = hwc->txq;
struct gdma_req_hdr *req_msg;
struct hwc_caller_ctx *ctx;
u32 dest_vrcq = 0;
u32 dest_vrq = 0;
u16 msg_id;
int err;
mana_hwc_get_msg_index(hwc, &msg_id);
tx_wr = &txq->msg_buf->reqs[msg_id];
if (req_len > tx_wr->buf_len) {
dev_err(hwc->dev, "HWC: req msg size: %d > %d\n", req_len,
tx_wr->buf_len);
err = -EINVAL;
goto out;
}
ctx = hwc->caller_ctx + msg_id;
ctx->output_buf = resp;
ctx->output_buflen = resp_len;
req_msg = (struct gdma_req_hdr *)tx_wr->buf_va;
if (req)
memcpy(req_msg, req, req_len);
req_msg->req.hwc_msg_id = msg_id;
tx_wr->msg_size = req_len;
if (gc->is_pf) {
dest_vrq = hwc->pf_dest_vrq_id;
dest_vrcq = hwc->pf_dest_vrcq_id;
}
err = mana_hwc_post_tx_wqe(txq, tx_wr, dest_vrq, dest_vrcq, false);
if (err) {
dev_err(hwc->dev, "HWC: Failed to post send WQE: %d\n", err);
goto out;
}
if (!wait_for_completion_timeout(&ctx->comp_event,
(msecs_to_jiffies(hwc->hwc_timeout)))) {
if (hwc->hwc_timeout != 0)
dev_err(hwc->dev, "HWC: Request timed out!\n");
err = -ETIMEDOUT;
goto out;
}
if (ctx->error) {
err = ctx->error;
goto out;
}
if (ctx->status_code && ctx->status_code != GDMA_STATUS_MORE_ENTRIES) {
if (ctx->status_code == GDMA_STATUS_CMD_UNSUPPORTED) {
err = -EOPNOTSUPP;
goto out;
}
if (req_msg->req.msg_type != MANA_QUERY_PHY_STAT)
dev_err(hwc->dev, "HWC: Failed hw_channel req: 0x%x\n",
ctx->status_code);
err = -EPROTO;
goto out;
}
out:
mana_hwc_put_msg_index(hwc, msg_id);
return err;
}