ipv6: Factorise ip6_route_multipath_add().

We will get rid of RTNL from RTM_NEWROUTE and SIOCADDRT and rely
on RCU to guarantee dev and nexthop lifetime.

Then, the RCU section will start before ip6_route_info_create_nh()
in ip6_route_multipath_add(), but ip6_route_info_create() is called
in the same loop and will sleep.

Let's split the loop into ip6_route_mpath_info_create() and
ip6_route_mpath_info_create_nh().

Note that ip6_route_info_append() is now integrated into
ip6_route_mpath_info_create_nh() because we need to call different
free functions for nexthops that passed ip6_route_info_create_nh().

In case of failure, the remaining nexthops that ip6_route_info_create_nh()
has not been called for will be freed by ip6_route_mpath_info_cleanup().

OTOH, if a nexthop passes ip6_route_info_create_nh(), it will be linked
to a local temporary list, which will be spliced back to rt6_nh_list.
In case of failure, these nexthops will be released by fib6_info_release()
in ip6_route_multipath_add().

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20250418000443.43734-12-kuniyu@amazon.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Kuniyuki Iwashima 2025-04-17 17:03:52 -07:00 committed by Paolo Abeni
parent 5a1ccff5c6
commit 71c0efb6d1

View file

@ -5316,29 +5316,131 @@ struct rt6_nh {
struct fib6_info *fib6_info;
struct fib6_config r_cfg;
struct list_head list;
int weight;
};
static int ip6_route_info_append(struct list_head *rt6_nh_list,
struct fib6_info *rt,
struct fib6_config *r_cfg)
static void ip6_route_mpath_info_cleanup(struct list_head *rt6_nh_list)
{
struct rt6_nh *nh;
int err = -EEXIST;
struct rt6_nh *nh, *nh_next;
list_for_each_entry(nh, rt6_nh_list, list) {
/* check if fib6_info already exists */
if (rt6_duplicate_nexthop(nh->fib6_info, rt))
return err;
list_for_each_entry_safe(nh, nh_next, rt6_nh_list, list) {
struct fib6_info *rt = nh->fib6_info;
if (rt) {
free_percpu(rt->fib6_nh->nh_common.nhc_pcpu_rth_output);
free_percpu(rt->fib6_nh->rt6i_pcpu);
ip_fib_metrics_put(rt->fib6_metrics);
kfree(rt);
}
list_del(&nh->list);
kfree(nh);
}
}
static int ip6_route_mpath_info_create(struct list_head *rt6_nh_list,
struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
struct rtnexthop *rtnh;
int remaining;
int err;
remaining = cfg->fc_mp_len;
rtnh = (struct rtnexthop *)cfg->fc_mp;
/* Parse a Multipath Entry and build a list (rt6_nh_list) of
* fib6_info structs per nexthop
*/
while (rtnh_ok(rtnh, remaining)) {
struct fib6_config r_cfg;
struct fib6_info *rt;
struct rt6_nh *nh;
int attrlen;
nh = kzalloc(sizeof(*nh), GFP_KERNEL);
if (!nh) {
err = -ENOMEM;
goto err;
}
list_add_tail(&nh->list, rt6_nh_list);
memcpy(&r_cfg, cfg, sizeof(*cfg));
if (rtnh->rtnh_ifindex)
r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
attrlen = rtnh_attrlen(rtnh);
if (attrlen > 0) {
struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
nla = nla_find(attrs, attrlen, RTA_GATEWAY);
if (nla) {
r_cfg.fc_gateway = nla_get_in6_addr(nla);
r_cfg.fc_flags |= RTF_GATEWAY;
}
r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
if (nla)
r_cfg.fc_encap_type = nla_get_u16(nla);
}
r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
goto err;
}
nh->fib6_info = rt;
nh->weight = rtnh->rtnh_hops + 1;
memcpy(&nh->r_cfg, &r_cfg, sizeof(r_cfg));
rtnh = rtnh_next(rtnh, &remaining);
}
nh = kzalloc(sizeof(*nh), GFP_KERNEL);
if (!nh)
return -ENOMEM;
nh->fib6_info = rt;
memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
list_add_tail(&nh->list, rt6_nh_list);
return 0;
err:
ip6_route_mpath_info_cleanup(rt6_nh_list);
return err;
}
static int ip6_route_mpath_info_create_nh(struct list_head *rt6_nh_list,
struct netlink_ext_ack *extack)
{
struct rt6_nh *nh, *nh_next, *nh_tmp;
LIST_HEAD(tmp);
int err;
list_for_each_entry_safe(nh, nh_next, rt6_nh_list, list) {
struct fib6_info *rt = nh->fib6_info;
err = ip6_route_info_create_nh(rt, &nh->r_cfg, extack);
if (err) {
nh->fib6_info = NULL;
goto err;
}
rt->fib6_nh->fib_nh_weight = nh->weight;
list_move_tail(&nh->list, &tmp);
list_for_each_entry(nh_tmp, rt6_nh_list, list) {
/* check if fib6_info already exists */
if (rt6_duplicate_nexthop(nh_tmp->fib6_info, rt)) {
err = -EEXIST;
goto err;
}
}
}
out:
list_splice(&tmp, rt6_nh_list);
return err;
err:
ip6_route_mpath_info_cleanup(rt6_nh_list);
goto out;
}
static void ip6_route_mpath_notify(struct fib6_info *rt,
@ -5397,75 +5499,28 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
{
struct fib6_info *rt_notif = NULL, *rt_last = NULL;
struct nl_info *info = &cfg->fc_nlinfo;
struct fib6_config r_cfg;
struct rtnexthop *rtnh;
struct fib6_info *rt;
struct rt6_nh *err_nh;
struct rt6_nh *nh, *nh_safe;
__u16 nlflags;
int remaining;
int attrlen;
int err = 1;
int nhn = 0;
int replace = (cfg->fc_nlinfo.nlh &&
(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
LIST_HEAD(rt6_nh_list);
struct rt6_nh *err_nh;
__u16 nlflags;
int nhn = 0;
int replace;
int err;
replace = (cfg->fc_nlinfo.nlh &&
(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
nlflags |= NLM_F_APPEND;
remaining = cfg->fc_mp_len;
rtnh = (struct rtnexthop *)cfg->fc_mp;
err = ip6_route_mpath_info_create(&rt6_nh_list, cfg, extack);
if (err)
return err;
/* Parse a Multipath Entry and build a list (rt6_nh_list) of
* fib6_info structs per nexthop
*/
while (rtnh_ok(rtnh, remaining)) {
memcpy(&r_cfg, cfg, sizeof(*cfg));
if (rtnh->rtnh_ifindex)
r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
attrlen = rtnh_attrlen(rtnh);
if (attrlen > 0) {
struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
nla = nla_find(attrs, attrlen, RTA_GATEWAY);
if (nla) {
r_cfg.fc_gateway = nla_get_in6_addr(nla);
r_cfg.fc_flags |= RTF_GATEWAY;
}
r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
if (nla)
r_cfg.fc_encap_type = nla_get_u16(nla);
}
r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
rt = NULL;
goto cleanup;
}
err = ip6_route_info_create_nh(rt, &r_cfg, extack);
if (err) {
rt = NULL;
goto cleanup;
}
rt->fib6_nh->fib_nh_weight = rtnh->rtnh_hops + 1;
err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
if (err) {
fib6_info_release(rt);
goto cleanup;
}
rtnh = rtnh_next(rtnh, &remaining);
}
err = ip6_route_mpath_info_create_nh(&rt6_nh_list, extack);
if (err)
goto cleanup;
/* for add and replace send one notification with all nexthops.
* Skip the notification in fib6_add_rt2node and send one with