IB/cm: Do not hold reference on cm_id unless needed

Typically, when the CM sends a MAD it bumps a reference count
on the associated cm_id.  There are some exceptions, such
as when the MAD is a direct response to a receive MAD.  For
example, the CM may generate an MRA in response to a duplicate
REQ.  But, in general, if a MAD may be sent as a result of
the user invoking an API call (e.g. ib_send_cm_rep(),
ib_send_cm_rtu(), etc.), a reference is taken on the cm_id.

This reference is necessary if the MAD requires a response.
The reference allows routing a response MAD back to the
cm_id, or, if no response is received, allows updating the
cm_id state to reflect the failure.

For MADs which do not generate a response from the
target, however, there's no need to hold a reference on the cm_id.
Such MADs will not be retried by the MAD layer and their
completions do not change the state of the cm_id.

There are 2 internal calls used to allocate MADs which take
a reference on the cm_id: cm_alloc_msg() and cm_alloc_priv_msg().
The latter calls the former.  It turns out that all other places
where cm_alloc_msg() is called are for MADs that do not generate
a response from the target: sending an RTU, DREP, REJ, MRA, or
SIDR REP.  In all of these cases, there's no need to hold a
reference on the cm_id.

The benefit of dropping unneeded references is that it allows
destruction of the cm_id to proceed immediately.  Currently,
the cm_destroy_id() call blocks as long as there's a reference
held on the cm_id.  Worse, is that cm_destroy_id() will send
MADs, which it then needs to complete.  Sending the MADs is
beneficial, as they notify the peer that a connection is
being destroyed.  However, since the MADs hold a reference
on the cm_id, they block destruction and cannot be retried.

Move cm_id referencing from cm_alloc_msg() to cm_alloc_priv_msg().
The latter should hold a reference on the cm_id in all cases but
one, which will be handled in a separate patch.  cm_alloc_priv_msg()
is used when sending a REQ, REP, DREQ, and SIDR REQ, all of which
require a response.

Also, merge common code into cm_alloc_priv_msg() and combine the
freeing of all messages which do not need a response.

Signed-off-by: Sean Hefty <shefty@nvidia.com>
Signed-off-by: Or Har-Toov <ohartoov@nvidia.com>
Signed-off-by: Vlad Dumitrescu <vdumitrescu@nvidia.com>
Link: https://patch.msgid.link/1f0f96acace72790ecf89087fc765dead960189e.1731495873.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
This commit is contained in:
Sean Hefty 2024-11-13 13:12:55 +02:00 committed by Leon Romanovsky
parent 0492458750
commit 1e51592190

View file

@ -309,12 +309,7 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
goto out; goto out;
} }
/* Timeout set by caller if response is expected. */
m->ah = ah; m->ah = ah;
m->retries = cm_id_priv->max_cm_retries;
refcount_inc(&cm_id_priv->refcount);
m->context[0] = cm_id_priv;
out: out:
spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
@ -323,16 +318,13 @@ out:
static void cm_free_msg(struct ib_mad_send_buf *msg) static void cm_free_msg(struct ib_mad_send_buf *msg)
{ {
struct cm_id_private *cm_id_priv = msg->context[0];
if (msg->ah) if (msg->ah)
rdma_destroy_ah(msg->ah, 0); rdma_destroy_ah(msg->ah, 0);
cm_deref_id(cm_id_priv);
ib_free_send_mad(msg); ib_free_send_mad(msg);
} }
static struct ib_mad_send_buf * static struct ib_mad_send_buf *
cm_alloc_priv_msg(struct cm_id_private *cm_id_priv) cm_alloc_priv_msg(struct cm_id_private *cm_id_priv, enum ib_cm_state state)
{ {
struct ib_mad_send_buf *msg; struct ib_mad_send_buf *msg;
@ -341,7 +333,15 @@ cm_alloc_priv_msg(struct cm_id_private *cm_id_priv)
msg = cm_alloc_msg(cm_id_priv); msg = cm_alloc_msg(cm_id_priv);
if (IS_ERR(msg)) if (IS_ERR(msg))
return msg; return msg;
cm_id_priv->msg = msg; cm_id_priv->msg = msg;
refcount_inc(&cm_id_priv->refcount);
msg->context[0] = cm_id_priv;
msg->context[1] = (void *) (unsigned long) state;
msg->retries = cm_id_priv->max_cm_retries;
msg->timeout_ms = cm_id_priv->timeout_ms;
return msg; return msg;
} }
@ -413,13 +413,6 @@ static int cm_alloc_response_msg(struct cm_port *port,
return 0; return 0;
} }
static void cm_free_response_msg(struct ib_mad_send_buf *msg)
{
if (msg->ah)
rdma_destroy_ah(msg->ah, 0);
ib_free_send_mad(msg);
}
static void *cm_copy_private_data(const void *private_data, u8 private_data_len) static void *cm_copy_private_data(const void *private_data, u8 private_data_len)
{ {
void *data; void *data;
@ -1567,7 +1560,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
if (param->alternate_path) if (param->alternate_path)
cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av); cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av);
msg = cm_alloc_priv_msg(cm_id_priv); msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_REQ_SENT);
if (IS_ERR(msg)) { if (IS_ERR(msg)) {
ret = PTR_ERR(msg); ret = PTR_ERR(msg);
goto out_unlock; goto out_unlock;
@ -1576,8 +1569,6 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
req_msg = (struct cm_req_msg *)msg->mad; req_msg = (struct cm_req_msg *)msg->mad;
cm_format_req(req_msg, cm_id_priv, param); cm_format_req(req_msg, cm_id_priv, param);
cm_id_priv->tid = req_msg->hdr.tid; cm_id_priv->tid = req_msg->hdr.tid;
msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *)(unsigned long)IB_CM_REQ_SENT;
cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg)); cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg)); cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
@ -1634,7 +1625,7 @@ static int cm_issue_rej(struct cm_port *port,
IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg)); IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg));
ret = ib_post_send_mad(msg, NULL); ret = ib_post_send_mad(msg, NULL);
if (ret) if (ret)
cm_free_response_msg(msg); cm_free_msg(msg);
return ret; return ret;
} }
@ -1990,7 +1981,7 @@ static void cm_dup_req_handler(struct cm_work *work,
return; return;
unlock: spin_unlock_irq(&cm_id_priv->lock); unlock: spin_unlock_irq(&cm_id_priv->lock);
free: cm_free_response_msg(msg); free: cm_free_msg(msg);
} }
static struct cm_id_private *cm_match_req(struct cm_work *work, static struct cm_id_private *cm_match_req(struct cm_work *work,
@ -2304,7 +2295,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
goto out; goto out;
} }
msg = cm_alloc_priv_msg(cm_id_priv); msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_REP_SENT);
if (IS_ERR(msg)) { if (IS_ERR(msg)) {
ret = PTR_ERR(msg); ret = PTR_ERR(msg);
goto out; goto out;
@ -2312,8 +2303,6 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
rep_msg = (struct cm_rep_msg *) msg->mad; rep_msg = (struct cm_rep_msg *) msg->mad;
cm_format_rep(rep_msg, cm_id_priv, param); cm_format_rep(rep_msg, cm_id_priv, param);
msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
trace_icm_send_rep(cm_id); trace_icm_send_rep(cm_id);
ret = ib_post_send_mad(msg, NULL); ret = ib_post_send_mad(msg, NULL);
@ -2479,7 +2468,7 @@ static void cm_dup_rep_handler(struct cm_work *work)
goto deref; goto deref;
unlock: spin_unlock_irq(&cm_id_priv->lock); unlock: spin_unlock_irq(&cm_id_priv->lock);
free: cm_free_response_msg(msg); free: cm_free_msg(msg);
deref: cm_deref_id(cm_id_priv); deref: cm_deref_id(cm_id_priv);
} }
@ -2683,7 +2672,7 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
ib_cancel_mad(cm_id_priv->msg); ib_cancel_mad(cm_id_priv->msg);
msg = cm_alloc_priv_msg(cm_id_priv); msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_DREQ_SENT);
if (IS_ERR(msg)) { if (IS_ERR(msg)) {
cm_enter_timewait(cm_id_priv); cm_enter_timewait(cm_id_priv);
return PTR_ERR(msg); return PTR_ERR(msg);
@ -2691,8 +2680,6 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv, cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
private_data, private_data_len); private_data, private_data_len);
msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
trace_icm_send_dreq(&cm_id_priv->id); trace_icm_send_dreq(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL); ret = ib_post_send_mad(msg, NULL);
@ -2819,7 +2806,7 @@ static int cm_issue_drep(struct cm_port *port,
IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)); IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
ret = ib_post_send_mad(msg, NULL); ret = ib_post_send_mad(msg, NULL);
if (ret) if (ret)
cm_free_response_msg(msg); cm_free_msg(msg);
return ret; return ret;
} }
@ -2878,7 +2865,7 @@ static int cm_dreq_handler(struct cm_work *work)
if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) || if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
ib_post_send_mad(msg, NULL)) ib_post_send_mad(msg, NULL))
cm_free_response_msg(msg); cm_free_msg(msg);
goto deref; goto deref;
case IB_CM_DREQ_RCVD: case IB_CM_DREQ_RCVD:
atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES] atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
@ -3386,7 +3373,7 @@ static int cm_lap_handler(struct cm_work *work)
if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) || if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
ib_post_send_mad(msg, NULL)) ib_post_send_mad(msg, NULL))
cm_free_response_msg(msg); cm_free_msg(msg);
goto deref; goto deref;
case IB_CM_LAP_RCVD: case IB_CM_LAP_RCVD:
atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES] atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
@ -3525,7 +3512,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
goto out_unlock; goto out_unlock;
} }
msg = cm_alloc_priv_msg(cm_id_priv); msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_SIDR_REQ_SENT);
if (IS_ERR(msg)) { if (IS_ERR(msg)) {
ret = PTR_ERR(msg); ret = PTR_ERR(msg);
goto out_unlock; goto out_unlock;
@ -3533,8 +3520,6 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
cm_format_sidr_req((struct cm_sidr_req_msg *)msg->mad, cm_id_priv, cm_format_sidr_req((struct cm_sidr_req_msg *)msg->mad, cm_id_priv,
param); param);
msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *)(unsigned long)IB_CM_SIDR_REQ_SENT;
trace_icm_send_sidr_req(&cm_id_priv->id); trace_icm_send_sidr_req(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL); ret = ib_post_send_mad(msg, NULL);
@ -3780,17 +3765,17 @@ out:
static void cm_process_send_error(struct cm_id_private *cm_id_priv, static void cm_process_send_error(struct cm_id_private *cm_id_priv,
struct ib_mad_send_buf *msg, struct ib_mad_send_buf *msg,
enum ib_cm_state state,
enum ib_wc_status wc_status) enum ib_wc_status wc_status)
{ {
enum ib_cm_state state = (unsigned long) msg->context[1];
struct ib_cm_event cm_event = {}; struct ib_cm_event cm_event = {};
int ret; int ret;
/* Discard old sends or ones without a response. */ /* Discard old sends. */
spin_lock_irq(&cm_id_priv->lock); spin_lock_irq(&cm_id_priv->lock);
if (msg != cm_id_priv->msg) { if (msg != cm_id_priv->msg) {
spin_unlock_irq(&cm_id_priv->lock); spin_unlock_irq(&cm_id_priv->lock);
cm_free_msg(msg); cm_free_priv_msg(msg);
return; return;
} }
cm_free_priv_msg(msg); cm_free_priv_msg(msg);
@ -3839,8 +3824,6 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent,
{ {
struct ib_mad_send_buf *msg = mad_send_wc->send_buf; struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
struct cm_id_private *cm_id_priv; struct cm_id_private *cm_id_priv;
enum ib_cm_state state =
(enum ib_cm_state)(unsigned long)msg->context[1];
struct cm_port *port; struct cm_port *port;
u16 attr_index; u16 attr_index;
@ -3861,10 +3844,9 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent,
&port->counters[CM_XMIT_RETRIES][attr_index]); &port->counters[CM_XMIT_RETRIES][attr_index]);
if (cm_id_priv) if (cm_id_priv)
cm_process_send_error(cm_id_priv, msg, state, cm_process_send_error(cm_id_priv, msg, mad_send_wc->status);
mad_send_wc->status);
else else
cm_free_response_msg(msg); cm_free_msg(msg);
} }
static void cm_work_handler(struct work_struct *_work) static void cm_work_handler(struct work_struct *_work)