mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00
devlink: Extend devlink rate API with traffic classes bandwidth management
Introduce support for specifying relative bandwidth shares between traffic classes (TC) in the devlink-rate API. This new option allows users to allocate bandwidth across multiple traffic classes in a single command. This feature provides a more granular control over traffic management, especially for scenarios requiring Enhanced Transmission Selection. Users can now define a relative bandwidth share for each traffic class. For example, assigning share values of 20 to TC0 (TCP/UDP) and 80 to TC5 (RoCE) will result in TC0 receiving 20% and TC5 receiving 80% of the total bandwidth. The actual percentage each class receives depends on the ratio of its share value to the sum of all shares. Example: DEV=pci/0000:08:00.0 $ devlink port function rate add $DEV/vfs_group tx_share 10Gbit \ tx_max 50Gbit tc-bw 0:20 1:0 2:0 3:0 4:0 5:80 6:0 7:0 $ devlink port function rate set $DEV/vfs_group \ tc-bw 0:20 1:0 2:0 3:0 4:0 5:20 6:60 7:0 Example usage with ynl: ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/devlink.yaml \ --do rate-set --json '{ "bus-name": "pci", "dev-name": "0000:08:00.0", "port-index": 1, "rate-tc-bws": [ {"rate-tc-index": 0, "rate-tc-bw": 50}, {"rate-tc-index": 1, "rate-tc-bw": 50}, {"rate-tc-index": 2, "rate-tc-bw": 0}, {"rate-tc-index": 3, "rate-tc-bw": 0}, {"rate-tc-index": 4, "rate-tc-bw": 0}, {"rate-tc-index": 5, "rate-tc-bw": 0}, {"rate-tc-index": 6, "rate-tc-bw": 0}, {"rate-tc-index": 7, "rate-tc-bw": 0} ] }' ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/devlink.yaml \ --do rate-get --json '{ "bus-name": "pci", "dev-name": "0000:08:00.0", "port-index": 1 }' output for rate-get: {'bus-name': 'pci', 'dev-name': '0000:08:00.0', 'port-index': 1, 'rate-tc-bws': [{'rate-tc-bw': 50, 'rate-tc-index': 0}, {'rate-tc-bw': 50, 'rate-tc-index': 1}, {'rate-tc-bw': 0, 'rate-tc-index': 2}, {'rate-tc-bw': 0, 'rate-tc-index': 3}, {'rate-tc-bw': 0, 'rate-tc-index': 4}, {'rate-tc-bw': 0, 'rate-tc-index': 5}, {'rate-tc-bw': 0, 'rate-tc-index': 6}, {'rate-tc-bw': 0, 'rate-tc-index': 7}], 'rate-tx-max': 0, 'rate-tx-priority': 0, 'rate-tx-share': 0, 'rate-tx-weight': 0, 'rate-type': 'leaf'} Signed-off-by: Carolina Jubran <cjubran@nvidia.com> Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com> Reviewed-by: Jiri Pirko <jiri@nvidia.com> Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Signed-off-by: Mark Bloch <mbloch@nvidia.com> Link: https://patch.msgid.link/20250629142138.361537-3-mbloch@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
42401c4238
commit
566e8f108f
7 changed files with 195 additions and 5 deletions
|
@ -224,6 +224,10 @@ definitions:
|
|||
value: 10
|
||||
-
|
||||
name: binary
|
||||
-
|
||||
name: rate-tc-index-max
|
||||
type: const
|
||||
value: 7
|
||||
|
||||
attribute-sets:
|
||||
-
|
||||
|
@ -844,7 +848,23 @@ attribute-sets:
|
|||
-
|
||||
name: region-direct
|
||||
type: flag
|
||||
|
||||
-
|
||||
name: rate-tc-bws
|
||||
type: nest
|
||||
multi-attr: true
|
||||
nested-attributes: dl-rate-tc-bws
|
||||
-
|
||||
name: rate-tc-index
|
||||
type: u8
|
||||
checks:
|
||||
max: rate-tc-index-max
|
||||
-
|
||||
name: rate-tc-bw
|
||||
type: u32
|
||||
doc: |
|
||||
Specifies the bandwidth share assigned to the Traffic Class.
|
||||
The bandwidth for the traffic class is determined
|
||||
in proportion to the sum of the shares of all configured classes.
|
||||
-
|
||||
name: dl-dev-stats
|
||||
subset-of: devlink
|
||||
|
@ -1249,6 +1269,14 @@ attribute-sets:
|
|||
-
|
||||
name: flash
|
||||
type: flag
|
||||
-
|
||||
name: dl-rate-tc-bws
|
||||
subset-of: devlink
|
||||
attributes:
|
||||
-
|
||||
name: rate-tc-index
|
||||
-
|
||||
name: rate-tc-bw
|
||||
|
||||
operations:
|
||||
enum-model: directional
|
||||
|
@ -2176,6 +2204,7 @@ operations:
|
|||
- rate-tx-priority
|
||||
- rate-tx-weight
|
||||
- rate-parent-node-name
|
||||
- rate-tc-bws
|
||||
|
||||
-
|
||||
name: rate-new
|
||||
|
@ -2196,6 +2225,7 @@ operations:
|
|||
- rate-tx-priority
|
||||
- rate-tx-weight
|
||||
- rate-parent-node-name
|
||||
- rate-tc-bws
|
||||
|
||||
-
|
||||
name: rate-del
|
||||
|
|
|
@ -418,6 +418,14 @@ API allows to configure following rate object's parameters:
|
|||
to all node children limits. ``tx_max`` is an upper limit for children.
|
||||
``tx_share`` is a total bandwidth distributed among children.
|
||||
|
||||
``tc_bw``
|
||||
Allow users to set the bandwidth allocation per traffic class on rate
|
||||
objects. This enables fine-grained QoS configurations by assigning a relative
|
||||
share value to each traffic class. The bandwidth is distributed in proportion
|
||||
to the share value for each class, relative to the sum of all shares.
|
||||
When applied to a non-leaf node, tc_bw determines how bandwidth is shared
|
||||
among its child elements.
|
||||
|
||||
``tx_priority`` and ``tx_weight`` can be used simultaneously. In that case
|
||||
nodes with the same priority form a WFQ subgroup in the sibling group
|
||||
and arbitration among them is based on assigned weights.
|
||||
|
|
|
@ -118,6 +118,8 @@ struct devlink_rate {
|
|||
|
||||
u32 tx_priority;
|
||||
u32 tx_weight;
|
||||
|
||||
u32 tc_bw[DEVLINK_RATE_TCS_MAX];
|
||||
};
|
||||
|
||||
struct devlink_port {
|
||||
|
@ -1486,6 +1488,9 @@ struct devlink_ops {
|
|||
u32 tx_priority, struct netlink_ext_ack *extack);
|
||||
int (*rate_leaf_tx_weight_set)(struct devlink_rate *devlink_rate, void *priv,
|
||||
u32 tx_weight, struct netlink_ext_ack *extack);
|
||||
int (*rate_leaf_tc_bw_set)(struct devlink_rate *devlink_rate,
|
||||
void *priv, u32 *tc_bw,
|
||||
struct netlink_ext_ack *extack);
|
||||
int (*rate_node_tx_share_set)(struct devlink_rate *devlink_rate, void *priv,
|
||||
u64 tx_share, struct netlink_ext_ack *extack);
|
||||
int (*rate_node_tx_max_set)(struct devlink_rate *devlink_rate, void *priv,
|
||||
|
@ -1494,6 +1499,9 @@ struct devlink_ops {
|
|||
u32 tx_priority, struct netlink_ext_ack *extack);
|
||||
int (*rate_node_tx_weight_set)(struct devlink_rate *devlink_rate, void *priv,
|
||||
u32 tx_weight, struct netlink_ext_ack *extack);
|
||||
int (*rate_node_tc_bw_set)(struct devlink_rate *devlink_rate,
|
||||
void *priv, u32 *tc_bw,
|
||||
struct netlink_ext_ack *extack);
|
||||
int (*rate_node_new)(struct devlink_rate *rate_node, void **priv,
|
||||
struct netlink_ext_ack *extack);
|
||||
int (*rate_node_del)(struct devlink_rate *rate_node, void *priv,
|
||||
|
|
|
@ -221,6 +221,11 @@ enum devlink_port_flavour {
|
|||
*/
|
||||
};
|
||||
|
||||
/* IEEE 802.1Qaz standard supported values. */
|
||||
|
||||
#define DEVLINK_RATE_TCS_MAX 8
|
||||
#define DEVLINK_RATE_TC_INDEX_MAX (DEVLINK_RATE_TCS_MAX - 1)
|
||||
|
||||
enum devlink_rate_type {
|
||||
DEVLINK_RATE_TYPE_LEAF,
|
||||
DEVLINK_RATE_TYPE_NODE,
|
||||
|
@ -629,6 +634,10 @@ enum devlink_attr {
|
|||
|
||||
DEVLINK_ATTR_REGION_DIRECT, /* flag */
|
||||
|
||||
DEVLINK_ATTR_RATE_TC_BWS, /* nested */
|
||||
DEVLINK_ATTR_RATE_TC_INDEX, /* u8 */
|
||||
DEVLINK_ATTR_RATE_TC_BW, /* u32 */
|
||||
|
||||
/* Add new attributes above here, update the spec in
|
||||
* Documentation/netlink/specs/devlink.yaml and re-generate
|
||||
* net/devlink/netlink_gen.c.
|
||||
|
|
|
@ -45,6 +45,11 @@ const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_FN_ATTR_
|
|||
[DEVLINK_PORT_FN_ATTR_CAPS] = NLA_POLICY_BITFIELD32(15),
|
||||
};
|
||||
|
||||
const struct nla_policy devlink_dl_rate_tc_bws_nl_policy[DEVLINK_ATTR_RATE_TC_BW + 1] = {
|
||||
[DEVLINK_ATTR_RATE_TC_INDEX] = NLA_POLICY_MAX(NLA_U8, DEVLINK_RATE_TC_INDEX_MAX),
|
||||
[DEVLINK_ATTR_RATE_TC_BW] = { .type = NLA_U32, },
|
||||
};
|
||||
|
||||
const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1] = {
|
||||
[DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG, },
|
||||
};
|
||||
|
@ -523,7 +528,7 @@ static const struct nla_policy devlink_rate_get_dump_nl_policy[DEVLINK_ATTR_DEV_
|
|||
};
|
||||
|
||||
/* DEVLINK_CMD_RATE_SET - do */
|
||||
static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_RATE_TX_WEIGHT + 1] = {
|
||||
static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_RATE_TC_BWS + 1] = {
|
||||
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
|
||||
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
|
||||
[DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, },
|
||||
|
@ -532,10 +537,11 @@ static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_RATE_TX_W
|
|||
[DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32, },
|
||||
[DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32, },
|
||||
[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING, },
|
||||
[DEVLINK_ATTR_RATE_TC_BWS] = NLA_POLICY_NESTED(devlink_dl_rate_tc_bws_nl_policy),
|
||||
};
|
||||
|
||||
/* DEVLINK_CMD_RATE_NEW - do */
|
||||
static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_RATE_TX_WEIGHT + 1] = {
|
||||
static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_RATE_TC_BWS + 1] = {
|
||||
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
|
||||
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
|
||||
[DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, },
|
||||
|
@ -544,6 +550,7 @@ static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_RATE_TX_W
|
|||
[DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32, },
|
||||
[DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32, },
|
||||
[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING, },
|
||||
[DEVLINK_ATTR_RATE_TC_BWS] = NLA_POLICY_NESTED(devlink_dl_rate_tc_bws_nl_policy),
|
||||
};
|
||||
|
||||
/* DEVLINK_CMD_RATE_DEL - do */
|
||||
|
@ -1191,7 +1198,7 @@ const struct genl_split_ops devlink_nl_ops[74] = {
|
|||
.doit = devlink_nl_rate_set_doit,
|
||||
.post_doit = devlink_nl_post_doit,
|
||||
.policy = devlink_rate_set_nl_policy,
|
||||
.maxattr = DEVLINK_ATTR_RATE_TX_WEIGHT,
|
||||
.maxattr = DEVLINK_ATTR_RATE_TC_BWS,
|
||||
.flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
|
||||
},
|
||||
{
|
||||
|
@ -1201,7 +1208,7 @@ const struct genl_split_ops devlink_nl_ops[74] = {
|
|||
.doit = devlink_nl_rate_new_doit,
|
||||
.post_doit = devlink_nl_post_doit,
|
||||
.policy = devlink_rate_new_nl_policy,
|
||||
.maxattr = DEVLINK_ATTR_RATE_TX_WEIGHT,
|
||||
.maxattr = DEVLINK_ATTR_RATE_TC_BWS,
|
||||
.flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
|
||||
},
|
||||
{
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
|
||||
/* Common nested types */
|
||||
extern const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_FN_ATTR_CAPS + 1];
|
||||
extern const struct nla_policy devlink_dl_rate_tc_bws_nl_policy[DEVLINK_ATTR_RATE_TC_BW + 1];
|
||||
extern const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1];
|
||||
|
||||
/* Ops table for devlink */
|
||||
|
|
|
@ -80,6 +80,29 @@ devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info)
|
|||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
static int devlink_rate_put_tc_bws(struct sk_buff *msg, u32 *tc_bw)
|
||||
{
|
||||
struct nlattr *nla_tc_bw;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) {
|
||||
nla_tc_bw = nla_nest_start(msg, DEVLINK_ATTR_RATE_TC_BWS);
|
||||
if (!nla_tc_bw)
|
||||
return -EMSGSIZE;
|
||||
|
||||
if (nla_put_u8(msg, DEVLINK_ATTR_RATE_TC_INDEX, i) ||
|
||||
nla_put_u32(msg, DEVLINK_ATTR_RATE_TC_BW, tc_bw[i]))
|
||||
goto nla_put_failure;
|
||||
|
||||
nla_nest_end(msg, nla_tc_bw);
|
||||
}
|
||||
return 0;
|
||||
|
||||
nla_put_failure:
|
||||
nla_nest_cancel(msg, nla_tc_bw);
|
||||
return -EMSGSIZE;
|
||||
}
|
||||
|
||||
static int devlink_nl_rate_fill(struct sk_buff *msg,
|
||||
struct devlink_rate *devlink_rate,
|
||||
enum devlink_command cmd, u32 portid, u32 seq,
|
||||
|
@ -129,6 +152,9 @@ static int devlink_nl_rate_fill(struct sk_buff *msg,
|
|||
devlink_rate->parent->name))
|
||||
goto nla_put_failure;
|
||||
|
||||
if (devlink_rate_put_tc_bws(msg, devlink_rate->tc_bw))
|
||||
goto nla_put_failure;
|
||||
|
||||
genlmsg_end(msg, hdr);
|
||||
return 0;
|
||||
|
||||
|
@ -316,6 +342,87 @@ devlink_nl_rate_parent_node_set(struct devlink_rate *devlink_rate,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int devlink_nl_rate_tc_bw_parse(struct nlattr *parent_nest, u32 *tc_bw,
|
||||
unsigned long *bitmap,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
struct nlattr *tb[DEVLINK_ATTR_MAX + 1];
|
||||
u8 tc_index;
|
||||
int err;
|
||||
|
||||
err = nla_parse_nested(tb, DEVLINK_ATTR_MAX, parent_nest,
|
||||
devlink_dl_rate_tc_bws_nl_policy, extack);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (!tb[DEVLINK_ATTR_RATE_TC_INDEX]) {
|
||||
NL_SET_ERR_ATTR_MISS(extack, parent_nest,
|
||||
DEVLINK_ATTR_RATE_TC_INDEX);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
tc_index = nla_get_u8(tb[DEVLINK_ATTR_RATE_TC_INDEX]);
|
||||
|
||||
if (!tb[DEVLINK_ATTR_RATE_TC_BW]) {
|
||||
NL_SET_ERR_ATTR_MISS(extack, parent_nest,
|
||||
DEVLINK_ATTR_RATE_TC_BW);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (test_and_set_bit(tc_index, bitmap)) {
|
||||
NL_SET_ERR_MSG_FMT(extack,
|
||||
"Duplicate traffic class index specified (%u)",
|
||||
tc_index);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
tc_bw[tc_index] = nla_get_u32(tb[DEVLINK_ATTR_RATE_TC_BW]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int devlink_nl_rate_tc_bw_set(struct devlink_rate *devlink_rate,
|
||||
struct genl_info *info)
|
||||
{
|
||||
DECLARE_BITMAP(bitmap, DEVLINK_RATE_TCS_MAX) = {};
|
||||
struct devlink *devlink = devlink_rate->devlink;
|
||||
const struct devlink_ops *ops = devlink->ops;
|
||||
u32 tc_bw[DEVLINK_RATE_TCS_MAX] = {};
|
||||
int rem, err = -EOPNOTSUPP, i;
|
||||
struct nlattr *attr;
|
||||
|
||||
nlmsg_for_each_attr_type(attr, DEVLINK_ATTR_RATE_TC_BWS, info->nlhdr,
|
||||
GENL_HDRLEN, rem) {
|
||||
err = devlink_nl_rate_tc_bw_parse(attr, tc_bw, bitmap,
|
||||
info->extack);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) {
|
||||
if (!test_bit(i, bitmap)) {
|
||||
NL_SET_ERR_MSG_FMT(info->extack,
|
||||
"Bandwidth values must be specified for all %u traffic classes",
|
||||
DEVLINK_RATE_TCS_MAX);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
if (devlink_rate_is_leaf(devlink_rate))
|
||||
err = ops->rate_leaf_tc_bw_set(devlink_rate, devlink_rate->priv,
|
||||
tc_bw, info->extack);
|
||||
else if (devlink_rate_is_node(devlink_rate))
|
||||
err = ops->rate_node_tc_bw_set(devlink_rate, devlink_rate->priv,
|
||||
tc_bw, info->extack);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
memcpy(devlink_rate->tc_bw, tc_bw, sizeof(tc_bw));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int devlink_nl_rate_set(struct devlink_rate *devlink_rate,
|
||||
const struct devlink_ops *ops,
|
||||
struct genl_info *info)
|
||||
|
@ -388,6 +495,12 @@ static int devlink_nl_rate_set(struct devlink_rate *devlink_rate,
|
|||
return err;
|
||||
}
|
||||
|
||||
if (attrs[DEVLINK_ATTR_RATE_TC_BWS]) {
|
||||
err = devlink_nl_rate_tc_bw_set(devlink_rate, info);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -423,6 +536,13 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
|
|||
"TX weight set isn't supported for the leafs");
|
||||
return false;
|
||||
}
|
||||
if (attrs[DEVLINK_ATTR_RATE_TC_BWS] &&
|
||||
!ops->rate_leaf_tc_bw_set) {
|
||||
NL_SET_ERR_MSG_ATTR(info->extack,
|
||||
attrs[DEVLINK_ATTR_RATE_TC_BWS],
|
||||
"TC bandwidth set isn't supported for the leafs");
|
||||
return false;
|
||||
}
|
||||
} else if (type == DEVLINK_RATE_TYPE_NODE) {
|
||||
if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) {
|
||||
NL_SET_ERR_MSG(info->extack, "TX share set isn't supported for the nodes");
|
||||
|
@ -449,6 +569,13 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
|
|||
"TX weight set isn't supported for the nodes");
|
||||
return false;
|
||||
}
|
||||
if (attrs[DEVLINK_ATTR_RATE_TC_BWS] &&
|
||||
!ops->rate_node_tc_bw_set) {
|
||||
NL_SET_ERR_MSG_ATTR(info->extack,
|
||||
attrs[DEVLINK_ATTR_RATE_TC_BWS],
|
||||
"TC bandwidth set isn't supported for the nodes");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
WARN(1, "Unknown type of rate object");
|
||||
return false;
|
||||
|
|
Loading…
Add table
Reference in a new issue