2018-10-11 08:57:42 -07:00
|
|
|
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
|
|
|
/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
|
2017-06-23 22:12:07 +02:00
|
|
|
|
|
|
|
#include <linux/bitfield.h>
|
|
|
|
#include <linux/netdevice.h>
|
|
|
|
#include <linux/skbuff.h>
|
2017-08-16 09:37:43 +02:00
|
|
|
#include <linux/workqueue.h>
|
2017-06-23 22:12:07 +02:00
|
|
|
#include <net/dst_metadata.h>
|
|
|
|
|
2017-06-29 22:08:19 +02:00
|
|
|
#include "main.h"
|
2017-08-16 09:37:43 +02:00
|
|
|
#include "../nfp_net.h"
|
2017-06-23 22:12:07 +02:00
|
|
|
#include "../nfp_net_repr.h"
|
|
|
|
#include "./cmsg.h"
|
|
|
|
|
|
|
|
static struct nfp_flower_cmsg_hdr *
|
|
|
|
nfp_flower_cmsg_get_hdr(struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
return (struct nfp_flower_cmsg_hdr *)skb->data;
|
|
|
|
}
|
|
|
|
|
2017-06-29 22:08:19 +02:00
|
|
|
struct sk_buff *
|
2017-06-23 22:12:07 +02:00
|
|
|
nfp_flower_cmsg_alloc(struct nfp_app *app, unsigned int size,
|
2017-11-02 01:31:30 -07:00
|
|
|
enum nfp_flower_cmsg_type_port type, gfp_t flag)
|
2017-06-23 22:12:07 +02:00
|
|
|
{
|
|
|
|
struct nfp_flower_cmsg_hdr *ch;
|
|
|
|
struct sk_buff *skb;
|
|
|
|
|
|
|
|
size += NFP_FLOWER_CMSG_HLEN;
|
|
|
|
|
2017-11-02 01:31:30 -07:00
|
|
|
skb = nfp_app_ctrl_msg_alloc(app, size, flag);
|
2017-06-23 22:12:07 +02:00
|
|
|
if (!skb)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
ch = nfp_flower_cmsg_get_hdr(skb);
|
|
|
|
ch->pad = 0;
|
|
|
|
ch->version = NFP_FLOWER_CMSG_VER1;
|
|
|
|
ch->type = type;
|
|
|
|
skb_put(skb, size);
|
|
|
|
|
|
|
|
return skb;
|
|
|
|
}
|
|
|
|
|
2017-08-11 10:25:44 +02:00
|
|
|
struct sk_buff *
|
|
|
|
nfp_flower_cmsg_mac_repr_start(struct nfp_app *app, unsigned int num_ports)
|
|
|
|
{
|
|
|
|
struct nfp_flower_cmsg_mac_repr *msg;
|
|
|
|
struct sk_buff *skb;
|
|
|
|
|
2019-02-07 21:47:25 -06:00
|
|
|
skb = nfp_flower_cmsg_alloc(app, struct_size(msg, ports, num_ports),
|
|
|
|
NFP_FLOWER_CMSG_TYPE_MAC_REPR, GFP_KERNEL);
|
2017-08-11 10:25:44 +02:00
|
|
|
if (!skb)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
msg = nfp_flower_cmsg_get_data(skb);
|
|
|
|
memset(msg->reserved, 0, sizeof(msg->reserved));
|
|
|
|
msg->num_ports = num_ports;
|
|
|
|
|
|
|
|
return skb;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
nfp_flower_cmsg_mac_repr_add(struct sk_buff *skb, unsigned int idx,
|
|
|
|
unsigned int nbi, unsigned int nbi_port,
|
|
|
|
unsigned int phys_port)
|
|
|
|
{
|
|
|
|
struct nfp_flower_cmsg_mac_repr *msg;
|
|
|
|
|
|
|
|
msg = nfp_flower_cmsg_get_data(skb);
|
|
|
|
msg->ports[idx].idx = idx;
|
|
|
|
msg->ports[idx].info = nbi & NFP_FLOWER_CMSG_MAC_REPR_NBI;
|
|
|
|
msg->ports[idx].nbi_port = nbi_port;
|
|
|
|
msg->ports[idx].phys_port = phys_port;
|
|
|
|
}
|
|
|
|
|
2018-03-28 18:50:07 -07:00
|
|
|
int nfp_flower_cmsg_portmod(struct nfp_repr *repr, bool carrier_ok,
|
|
|
|
unsigned int mtu, bool mtu_only)
|
2017-06-23 22:12:07 +02:00
|
|
|
{
|
|
|
|
struct nfp_flower_cmsg_portmod *msg;
|
|
|
|
struct sk_buff *skb;
|
|
|
|
|
|
|
|
skb = nfp_flower_cmsg_alloc(repr->app, sizeof(*msg),
|
2017-11-02 01:31:30 -07:00
|
|
|
NFP_FLOWER_CMSG_TYPE_PORT_MOD, GFP_KERNEL);
|
2017-06-23 22:12:07 +02:00
|
|
|
if (!skb)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
msg = nfp_flower_cmsg_get_data(skb);
|
|
|
|
msg->portnum = cpu_to_be32(repr->dst->u.port_info.port_id);
|
|
|
|
msg->reserved = 0;
|
|
|
|
msg->info = carrier_ok;
|
2018-03-28 18:50:07 -07:00
|
|
|
|
|
|
|
if (mtu_only)
|
|
|
|
msg->info |= NFP_FLOWER_CMSG_PORTMOD_MTU_CHANGE_ONLY;
|
|
|
|
|
|
|
|
msg->mtu = cpu_to_be16(mtu);
|
2017-06-23 22:12:07 +02:00
|
|
|
|
|
|
|
nfp_ctrl_tx(repr->app->ctrl, skb);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-01-02 19:19:01 -08:00
|
|
|
int nfp_flower_cmsg_portreify(struct nfp_repr *repr, bool exists)
|
|
|
|
{
|
|
|
|
struct nfp_flower_cmsg_portreify *msg;
|
|
|
|
struct sk_buff *skb;
|
|
|
|
|
|
|
|
skb = nfp_flower_cmsg_alloc(repr->app, sizeof(*msg),
|
|
|
|
NFP_FLOWER_CMSG_TYPE_PORT_REIFY,
|
|
|
|
GFP_KERNEL);
|
|
|
|
if (!skb)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
msg = nfp_flower_cmsg_get_data(skb);
|
|
|
|
msg->portnum = cpu_to_be32(repr->dst->u.port_info.port_id);
|
|
|
|
msg->reserved = 0;
|
|
|
|
msg->info = cpu_to_be16(exists);
|
|
|
|
|
|
|
|
nfp_ctrl_tx(repr->app->ctrl, skb);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-03-28 18:50:07 -07:00
|
|
|
static bool
|
|
|
|
nfp_flower_process_mtu_ack(struct nfp_app *app, struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
struct nfp_flower_priv *app_priv = app->priv;
|
|
|
|
struct nfp_flower_cmsg_portmod *msg;
|
|
|
|
|
|
|
|
msg = nfp_flower_cmsg_get_data(skb);
|
|
|
|
|
|
|
|
if (!(msg->info & NFP_FLOWER_CMSG_PORTMOD_MTU_CHANGE_ONLY))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
spin_lock_bh(&app_priv->mtu_conf.lock);
|
|
|
|
if (!app_priv->mtu_conf.requested_val ||
|
|
|
|
app_priv->mtu_conf.portnum != be32_to_cpu(msg->portnum) ||
|
|
|
|
be16_to_cpu(msg->mtu) != app_priv->mtu_conf.requested_val) {
|
|
|
|
/* Not an ack for requested MTU change. */
|
|
|
|
spin_unlock_bh(&app_priv->mtu_conf.lock);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
app_priv->mtu_conf.ack = true;
|
|
|
|
app_priv->mtu_conf.requested_val = 0;
|
|
|
|
wake_up(&app_priv->mtu_conf.wait_q);
|
|
|
|
spin_unlock_bh(&app_priv->mtu_conf.lock);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-06-23 22:12:07 +02:00
|
|
|
static void
|
|
|
|
nfp_flower_cmsg_portmod_rx(struct nfp_app *app, struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
struct nfp_flower_cmsg_portmod *msg;
|
|
|
|
struct net_device *netdev;
|
|
|
|
bool link;
|
|
|
|
|
|
|
|
msg = nfp_flower_cmsg_get_data(skb);
|
|
|
|
link = msg->info & NFP_FLOWER_CMSG_PORTMOD_INFO_LINK;
|
|
|
|
|
2017-09-02 18:26:05 -07:00
|
|
|
rtnl_lock();
|
2017-06-23 22:12:07 +02:00
|
|
|
rcu_read_lock();
|
2019-04-15 16:55:55 +02:00
|
|
|
netdev = nfp_app_dev_get(app, be32_to_cpu(msg->portnum), NULL);
|
2017-09-02 18:26:05 -07:00
|
|
|
rcu_read_unlock();
|
2017-06-23 22:12:07 +02:00
|
|
|
if (!netdev) {
|
|
|
|
nfp_flower_cmsg_warn(app, "ctrl msg for unknown port 0x%08x\n",
|
|
|
|
be32_to_cpu(msg->portnum));
|
2017-09-02 18:26:05 -07:00
|
|
|
rtnl_unlock();
|
2017-06-23 22:12:07 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-08-16 09:37:44 +02:00
|
|
|
if (link) {
|
|
|
|
u16 mtu = be16_to_cpu(msg->mtu);
|
|
|
|
|
2017-06-23 22:12:07 +02:00
|
|
|
netif_carrier_on(netdev);
|
2017-08-16 09:37:44 +02:00
|
|
|
|
|
|
|
/* An MTU of 0 from the firmware should be ignored */
|
|
|
|
if (mtu)
|
|
|
|
dev_set_mtu(netdev, mtu);
|
|
|
|
} else {
|
2017-06-23 22:12:07 +02:00
|
|
|
netif_carrier_off(netdev);
|
2017-08-16 09:37:44 +02:00
|
|
|
}
|
2017-09-02 18:26:05 -07:00
|
|
|
rtnl_unlock();
|
2017-06-23 22:12:07 +02:00
|
|
|
}
|
|
|
|
|
2018-01-02 19:19:01 -08:00
|
|
|
static void
|
|
|
|
nfp_flower_cmsg_portreify_rx(struct nfp_app *app, struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
struct nfp_flower_priv *priv = app->priv;
|
|
|
|
struct nfp_flower_cmsg_portreify *msg;
|
|
|
|
bool exists;
|
|
|
|
|
|
|
|
msg = nfp_flower_cmsg_get_data(skb);
|
|
|
|
|
|
|
|
rcu_read_lock();
|
2019-04-15 16:55:55 +02:00
|
|
|
exists = !!nfp_app_dev_get(app, be32_to_cpu(msg->portnum), NULL);
|
2018-01-02 19:19:01 -08:00
|
|
|
rcu_read_unlock();
|
|
|
|
if (!exists) {
|
|
|
|
nfp_flower_cmsg_warn(app, "ctrl msg for unknown port 0x%08x\n",
|
|
|
|
be32_to_cpu(msg->portnum));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
atomic_inc(&priv->reify_replies);
|
nfp: flower: increase cmesg reply timeout
QA tests report occasional timeouts on REIFY message replies. Profiling
of the two cmesg reply types under burst conditions, with a 12-core host
under heavy cpu and io load (stress --cpu 12 --io 12), show both PHY MTU
change and REIFY replies can exceed the 10ms timeout. The maximum MTU
reply wait under burst is 16ms, while the maximum REIFY wait under 40 VF
burst is 12ms. Using a 4 VF REIFY burst results in an 8ms maximum wait.
A larger VF burst does increase the delay, but not in a linear enough
way to justify a scaled REIFY delay. The worse case values between
MTU and REIFY appears close enough to justify a common timeout. Pick a
conservative 40ms to make a safer future proof common reply timeout. The
delay only effects the failure case.
Change the REIFY timeout mechanism to use wait_event_timeout() instead
of wait_event_interruptible_timeout(), to match the MTU code. In the
current implementation, theoretically, a signal could interrupt the
REIFY waiting period, with a return code of ERESTARTSYS. However, this is
caught under the general timeout error code EIO. I cannot see the benefit
of exposing the REIFY waiting period to signals with such a short delay
(40ms), while the MTU mechnism does not use the same logic. In the absence
of any reply (wakeup() call), both reply types will wake up the task after
the timeout period. The REIFY timeout applies to the entire representor
group being instantiated (e.g. VFs), while the MTU timeout apples to a
single PHY MTU change.
Signed-off-by: Fred Lotter <frederik.lotter@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-01-15 19:06:51 -08:00
|
|
|
wake_up(&priv->reify_wait_queue);
|
2018-01-02 19:19:01 -08:00
|
|
|
}
|
|
|
|
|
2019-04-15 16:55:59 +02:00
|
|
|
static void
|
|
|
|
nfp_flower_cmsg_merge_hint_rx(struct nfp_app *app, struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
unsigned int msg_len = nfp_flower_cmsg_get_data_len(skb);
|
|
|
|
struct nfp_flower_cmsg_merge_hint *msg;
|
|
|
|
struct nfp_fl_payload *sub_flows[2];
|
2023-10-09 13:21:55 +02:00
|
|
|
struct nfp_flower_priv *priv;
|
2019-04-15 16:55:59 +02:00
|
|
|
int err, i, flow_cnt;
|
|
|
|
|
|
|
|
msg = nfp_flower_cmsg_get_data(skb);
|
|
|
|
/* msg->count starts at 0 and always assumes at least 1 entry. */
|
|
|
|
flow_cnt = msg->count + 1;
|
|
|
|
|
|
|
|
if (msg_len < struct_size(msg, flow, flow_cnt)) {
|
2019-04-18 01:05:39 +01:00
|
|
|
nfp_flower_cmsg_warn(app, "Merge hint ctrl msg too short - %d bytes but expect %zd\n",
|
2019-04-15 16:55:59 +02:00
|
|
|
msg_len, struct_size(msg, flow, flow_cnt));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (flow_cnt != 2) {
|
|
|
|
nfp_flower_cmsg_warn(app, "Merge hint contains %d flows - two are expected\n",
|
|
|
|
flow_cnt);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2023-10-09 13:21:55 +02:00
|
|
|
priv = app->priv;
|
|
|
|
mutex_lock(&priv->nfp_fl_lock);
|
2019-04-15 16:55:59 +02:00
|
|
|
for (i = 0; i < flow_cnt; i++) {
|
|
|
|
u32 ctx = be32_to_cpu(msg->flow[i].host_ctx);
|
|
|
|
|
|
|
|
sub_flows[i] = nfp_flower_get_fl_payload_from_ctx(app, ctx);
|
|
|
|
if (!sub_flows[i]) {
|
|
|
|
nfp_flower_cmsg_warn(app, "Invalid flow in merge hint\n");
|
2023-10-09 13:21:55 +02:00
|
|
|
goto err_mutex_unlock;
|
2019-04-15 16:55:59 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
err = nfp_flower_merge_offloaded_flows(app, sub_flows[0], sub_flows[1]);
|
|
|
|
/* Only warn on memory fail. Hint veto will not break functionality. */
|
|
|
|
if (err == -ENOMEM)
|
|
|
|
nfp_flower_cmsg_warn(app, "Flow merge memory fail.\n");
|
|
|
|
|
2023-10-09 13:21:55 +02:00
|
|
|
err_mutex_unlock:
|
|
|
|
mutex_unlock(&priv->nfp_fl_lock);
|
2019-04-15 16:55:59 +02:00
|
|
|
}
|
|
|
|
|
2017-08-16 09:37:43 +02:00
|
|
|
static void
|
|
|
|
nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb)
|
2017-06-23 22:12:07 +02:00
|
|
|
{
|
2018-05-23 19:22:50 -07:00
|
|
|
struct nfp_flower_priv *app_priv = app->priv;
|
2017-06-23 22:12:07 +02:00
|
|
|
struct nfp_flower_cmsg_hdr *cmsg_hdr;
|
|
|
|
enum nfp_flower_cmsg_type_port type;
|
2018-05-23 19:22:54 -07:00
|
|
|
bool skb_stored = false;
|
2017-06-23 22:12:07 +02:00
|
|
|
|
|
|
|
cmsg_hdr = nfp_flower_cmsg_get_hdr(skb);
|
|
|
|
|
|
|
|
type = cmsg_hdr->type;
|
|
|
|
switch (type) {
|
|
|
|
case NFP_FLOWER_CMSG_TYPE_PORT_MOD:
|
|
|
|
nfp_flower_cmsg_portmod_rx(app, skb);
|
|
|
|
break;
|
2019-04-15 16:55:53 +02:00
|
|
|
case NFP_FLOWER_CMSG_TYPE_MERGE_HINT:
|
2020-05-19 16:15:01 +02:00
|
|
|
if (app_priv->flower_en_feats & NFP_FL_ENABLE_FLOW_MERGE) {
|
2019-04-15 16:55:59 +02:00
|
|
|
nfp_flower_cmsg_merge_hint_rx(app, skb);
|
2019-04-15 16:55:53 +02:00
|
|
|
break;
|
2019-04-15 16:55:59 +02:00
|
|
|
}
|
2019-04-15 16:55:53 +02:00
|
|
|
goto err_default;
|
2017-09-25 12:23:40 +02:00
|
|
|
case NFP_FLOWER_CMSG_TYPE_NO_NEIGH:
|
2019-12-17 21:57:21 +00:00
|
|
|
nfp_tunnel_request_route_v4(app, skb);
|
|
|
|
break;
|
|
|
|
case NFP_FLOWER_CMSG_TYPE_NO_NEIGH_V6:
|
|
|
|
nfp_tunnel_request_route_v6(app, skb);
|
2017-09-25 12:23:40 +02:00
|
|
|
break;
|
2017-09-25 12:23:41 +02:00
|
|
|
case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS:
|
|
|
|
nfp_tunnel_keep_alive(app, skb);
|
|
|
|
break;
|
2019-12-17 21:57:23 +00:00
|
|
|
case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS_V6:
|
|
|
|
nfp_tunnel_keep_alive_v6(app, skb);
|
|
|
|
break;
|
2019-05-04 04:46:28 -07:00
|
|
|
case NFP_FLOWER_CMSG_TYPE_QOS_STATS:
|
|
|
|
nfp_flower_stats_rlim_reply(app, skb);
|
|
|
|
break;
|
2018-05-23 19:22:50 -07:00
|
|
|
case NFP_FLOWER_CMSG_TYPE_LAG_CONFIG:
|
2020-05-19 16:15:01 +02:00
|
|
|
if (app_priv->flower_en_feats & NFP_FL_ENABLE_LAG) {
|
2018-05-23 19:22:54 -07:00
|
|
|
skb_stored = nfp_flower_lag_unprocessed_msg(app, skb);
|
2018-05-23 19:22:50 -07:00
|
|
|
break;
|
2018-05-23 19:22:54 -07:00
|
|
|
}
|
2020-08-23 17:36:59 -05:00
|
|
|
fallthrough;
|
2017-06-23 22:12:07 +02:00
|
|
|
default:
|
2019-04-15 16:55:53 +02:00
|
|
|
err_default:
|
2017-06-23 22:12:07 +02:00
|
|
|
nfp_flower_cmsg_warn(app, "Cannot handle invalid repr control type %u\n",
|
|
|
|
type);
|
2017-09-02 18:26:03 -07:00
|
|
|
goto out;
|
2017-06-23 22:12:07 +02:00
|
|
|
}
|
|
|
|
|
2018-05-23 19:22:54 -07:00
|
|
|
if (!skb_stored)
|
|
|
|
dev_consume_skb_any(skb);
|
2017-09-02 18:26:03 -07:00
|
|
|
return;
|
2017-06-23 22:12:07 +02:00
|
|
|
out:
|
|
|
|
dev_kfree_skb_any(skb);
|
|
|
|
}
|
2017-08-16 09:37:43 +02:00
|
|
|
|
|
|
|
void nfp_flower_cmsg_process_rx(struct work_struct *work)
|
|
|
|
{
|
2018-04-11 16:47:38 -07:00
|
|
|
struct sk_buff_head cmsg_joined;
|
2017-08-16 09:37:43 +02:00
|
|
|
struct nfp_flower_priv *priv;
|
|
|
|
struct sk_buff *skb;
|
|
|
|
|
|
|
|
priv = container_of(work, struct nfp_flower_priv, cmsg_work);
|
2018-04-11 16:47:38 -07:00
|
|
|
skb_queue_head_init(&cmsg_joined);
|
2017-08-16 09:37:43 +02:00
|
|
|
|
2018-04-11 16:47:38 -07:00
|
|
|
spin_lock_bh(&priv->cmsg_skbs_high.lock);
|
|
|
|
skb_queue_splice_tail_init(&priv->cmsg_skbs_high, &cmsg_joined);
|
|
|
|
spin_unlock_bh(&priv->cmsg_skbs_high.lock);
|
|
|
|
|
|
|
|
spin_lock_bh(&priv->cmsg_skbs_low.lock);
|
|
|
|
skb_queue_splice_tail_init(&priv->cmsg_skbs_low, &cmsg_joined);
|
|
|
|
spin_unlock_bh(&priv->cmsg_skbs_low.lock);
|
|
|
|
|
|
|
|
while ((skb = __skb_dequeue(&cmsg_joined)))
|
2017-09-02 18:26:00 -07:00
|
|
|
nfp_flower_cmsg_process_one_rx(priv->app, skb);
|
2017-08-16 09:37:43 +02:00
|
|
|
}
|
|
|
|
|
2018-04-11 16:47:38 -07:00
|
|
|
static void
|
|
|
|
nfp_flower_queue_ctl_msg(struct nfp_app *app, struct sk_buff *skb, int type)
|
2017-08-16 09:37:43 +02:00
|
|
|
{
|
|
|
|
struct nfp_flower_priv *priv = app->priv;
|
2018-04-11 16:47:38 -07:00
|
|
|
struct sk_buff_head *skb_head;
|
|
|
|
|
nfp: flower: cmsg rtnl locks can timeout reify messages
Flower control message replies are handled in different locations. The truly
high priority replies are handled in the BH (tasklet) context, while the
remaining replies are handled in a predefined Linux work queue. The work
queue handler orders replies into high and low priority groups, and always
start servicing the high priority replies within the received batch first.
Reply Type: Rtnl Lock: Handler:
CMSG_TYPE_PORT_MOD no BH tasklet (mtu)
CMSG_TYPE_TUN_NEIGH no BH tasklet
CMSG_TYPE_FLOW_STATS no BH tasklet
CMSG_TYPE_PORT_REIFY no WQ high
CMSG_TYPE_PORT_MOD yes WQ high (link/mtu)
CMSG_TYPE_MERGE_HINT yes WQ low
CMSG_TYPE_NO_NEIGH no WQ low
CMSG_TYPE_ACTIVE_TUNS no WQ low
CMSG_TYPE_QOS_STATS no WQ low
CMSG_TYPE_LAG_CONFIG no WQ low
A subset of control messages can block waiting for an rtnl lock (from both
work queue priority groups). The rtnl lock is heavily contended for by
external processes such as systemd-udevd, systemd-network and libvirtd,
especially during netdev creation, such as when flower VFs and representors
are instantiated.
Kernel netlink instrumentation shows that external processes (such as
systemd-udevd) often use successive rtnl_trylock() sequences, which can result
in an rtnl_lock() blocked control message to starve for longer periods of time
during rtnl lock contention, i.e. netdev creation.
In the current design a single blocked control message will block the entire
work queue (both priorities), and introduce a latency which is
nondeterministic and dependent on system wide rtnl lock usage.
In some extreme cases, one blocked control message at exactly the wrong time,
just before the maximum number of VFs are instantiated, can block the work
queue for long enough to prevent VF representor REIFY replies from getting
handled in time for the 40ms timeout.
The firmware will deliver the total maximum number of REIFY message replies in
around 300us.
Only REIFY and MTU update messages require replies within a timeout period (of
40ms). The MTU-only updates are already done directly in the BH (tasklet)
handler.
Move the REIFY handler down into the BH (tasklet) in order to resolve timeouts
caused by a blocked work queue waiting on rtnl locks.
Signed-off-by: Fred Lotter <frederik.lotter@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-09-06 19:29:41 +02:00
|
|
|
if (type == NFP_FLOWER_CMSG_TYPE_PORT_MOD)
|
2018-04-11 16:47:38 -07:00
|
|
|
skb_head = &priv->cmsg_skbs_high;
|
|
|
|
else
|
|
|
|
skb_head = &priv->cmsg_skbs_low;
|
|
|
|
|
|
|
|
if (skb_queue_len(skb_head) >= NFP_FLOWER_WORKQ_MAX_SKBS) {
|
|
|
|
nfp_flower_cmsg_warn(app, "Dropping queued control messages\n");
|
|
|
|
dev_kfree_skb_any(skb);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
skb_queue_tail(skb_head, skb);
|
|
|
|
schedule_work(&priv->cmsg_work);
|
|
|
|
}
|
|
|
|
|
|
|
|
void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb)
|
|
|
|
{
|
2018-01-19 17:54:08 -08:00
|
|
|
struct nfp_flower_cmsg_hdr *cmsg_hdr;
|
|
|
|
|
|
|
|
cmsg_hdr = nfp_flower_cmsg_get_hdr(skb);
|
|
|
|
|
|
|
|
if (unlikely(cmsg_hdr->version != NFP_FLOWER_CMSG_VER1)) {
|
|
|
|
nfp_flower_cmsg_warn(app, "Cannot handle repr control version %u\n",
|
|
|
|
cmsg_hdr->version);
|
|
|
|
dev_kfree_skb_any(skb);
|
|
|
|
return;
|
|
|
|
}
|
2017-08-16 09:37:43 +02:00
|
|
|
|
2018-01-19 17:54:08 -08:00
|
|
|
if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_FLOW_STATS) {
|
|
|
|
/* We need to deal with stats updates from HW asap */
|
|
|
|
nfp_flower_rx_flow_stats(app, skb);
|
|
|
|
dev_consume_skb_any(skb);
|
2018-03-28 18:50:07 -07:00
|
|
|
} else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_PORT_MOD &&
|
|
|
|
nfp_flower_process_mtu_ack(app, skb)) {
|
|
|
|
/* Handle MTU acks outside wq to prevent RTNL conflict. */
|
|
|
|
dev_consume_skb_any(skb);
|
2019-12-17 21:57:21 +00:00
|
|
|
} else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH ||
|
|
|
|
cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6) {
|
2018-04-11 16:47:37 -07:00
|
|
|
/* Acks from the NFP that the route is added - ignore. */
|
|
|
|
dev_consume_skb_any(skb);
|
nfp: flower: cmsg rtnl locks can timeout reify messages
Flower control message replies are handled in different locations. The truly
high priority replies are handled in the BH (tasklet) context, while the
remaining replies are handled in a predefined Linux work queue. The work
queue handler orders replies into high and low priority groups, and always
start servicing the high priority replies within the received batch first.
Reply Type: Rtnl Lock: Handler:
CMSG_TYPE_PORT_MOD no BH tasklet (mtu)
CMSG_TYPE_TUN_NEIGH no BH tasklet
CMSG_TYPE_FLOW_STATS no BH tasklet
CMSG_TYPE_PORT_REIFY no WQ high
CMSG_TYPE_PORT_MOD yes WQ high (link/mtu)
CMSG_TYPE_MERGE_HINT yes WQ low
CMSG_TYPE_NO_NEIGH no WQ low
CMSG_TYPE_ACTIVE_TUNS no WQ low
CMSG_TYPE_QOS_STATS no WQ low
CMSG_TYPE_LAG_CONFIG no WQ low
A subset of control messages can block waiting for an rtnl lock (from both
work queue priority groups). The rtnl lock is heavily contended for by
external processes such as systemd-udevd, systemd-network and libvirtd,
especially during netdev creation, such as when flower VFs and representors
are instantiated.
Kernel netlink instrumentation shows that external processes (such as
systemd-udevd) often use successive rtnl_trylock() sequences, which can result
in an rtnl_lock() blocked control message to starve for longer periods of time
during rtnl lock contention, i.e. netdev creation.
In the current design a single blocked control message will block the entire
work queue (both priorities), and introduce a latency which is
nondeterministic and dependent on system wide rtnl lock usage.
In some extreme cases, one blocked control message at exactly the wrong time,
just before the maximum number of VFs are instantiated, can block the work
queue for long enough to prevent VF representor REIFY replies from getting
handled in time for the 40ms timeout.
The firmware will deliver the total maximum number of REIFY message replies in
around 300us.
Only REIFY and MTU update messages require replies within a timeout period (of
40ms). The MTU-only updates are already done directly in the BH (tasklet)
handler.
Move the REIFY handler down into the BH (tasklet) in order to resolve timeouts
caused by a blocked work queue waiting on rtnl locks.
Signed-off-by: Fred Lotter <frederik.lotter@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-09-06 19:29:41 +02:00
|
|
|
} else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_PORT_REIFY) {
|
|
|
|
/* Handle REIFY acks outside wq to prevent RTNL conflict. */
|
|
|
|
nfp_flower_cmsg_portreify_rx(app, skb);
|
|
|
|
dev_consume_skb_any(skb);
|
2018-01-19 17:54:08 -08:00
|
|
|
} else {
|
2018-04-11 16:47:38 -07:00
|
|
|
nfp_flower_queue_ctl_msg(app, skb, cmsg_hdr->type);
|
2018-01-19 17:54:08 -08:00
|
|
|
}
|
2017-08-16 09:37:43 +02:00
|
|
|
}
|