mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-18 22:14:16 +00:00
ipv6: Fix sporadic sendmsg -EINVAL when sending to multicast groups.
Thanks to excellent diagnosis by Eduard Guzovsky. The core problem is that on a network with lots of active multicast traffic, the neighbour cache can fill up. If we try to allocate a new route and thus neighbour cache entry, the bog-standard GC attempt the neighbour layer does in ineffective because route entries hold a reference to the existing neighbour entries and GC can only liberate entries with no references. IPV4 already has a way to handle this, by doing a route cache GC in such situations (when neigh attach returns -ENOBUFS). So simply mimick this on the ipv6 side. Tested-by: Eduard Guzovsky <eguzovsky@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
eb4dea5853
commit
14deae4156
2 changed files with 49 additions and 7 deletions
|
@ -155,9 +155,9 @@ static inline struct neighbour * ndisc_get_neigh(struct net_device *dev, const s
|
|||
{
|
||||
|
||||
if (dev)
|
||||
return __neigh_lookup(&nd_tbl, addr, dev, 1);
|
||||
return __neigh_lookup_errno(&nd_tbl, addr, dev);
|
||||
|
||||
return NULL;
|
||||
return ERR_PTR(-ENODEV);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -627,6 +627,9 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
|
|||
rt = ip6_rt_copy(ort);
|
||||
|
||||
if (rt) {
|
||||
struct neighbour *neigh;
|
||||
int attempts = !in_softirq();
|
||||
|
||||
if (!(rt->rt6i_flags&RTF_GATEWAY)) {
|
||||
if (rt->rt6i_dst.plen != 128 &&
|
||||
ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
|
||||
|
@ -646,7 +649,35 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
|
|||
}
|
||||
#endif
|
||||
|
||||
rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
|
||||
retry:
|
||||
neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
|
||||
if (IS_ERR(neigh)) {
|
||||
struct net *net = dev_net(rt->rt6i_dev);
|
||||
int saved_rt_min_interval =
|
||||
net->ipv6.sysctl.ip6_rt_gc_min_interval;
|
||||
int saved_rt_elasticity =
|
||||
net->ipv6.sysctl.ip6_rt_gc_elasticity;
|
||||
|
||||
if (attempts-- > 0) {
|
||||
net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
|
||||
net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
|
||||
|
||||
ip6_dst_gc(net->ipv6.ip6_dst_ops);
|
||||
|
||||
net->ipv6.sysctl.ip6_rt_gc_elasticity =
|
||||
saved_rt_elasticity;
|
||||
net->ipv6.sysctl.ip6_rt_gc_min_interval =
|
||||
saved_rt_min_interval;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (net_ratelimit())
|
||||
printk(KERN_WARNING
|
||||
"Neighbour table overflow.\n");
|
||||
dst_free(&rt->u.dst);
|
||||
return NULL;
|
||||
}
|
||||
rt->rt6i_nexthop = neigh;
|
||||
|
||||
}
|
||||
|
||||
|
@ -945,8 +976,11 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
|
|||
dev_hold(dev);
|
||||
if (neigh)
|
||||
neigh_hold(neigh);
|
||||
else
|
||||
else {
|
||||
neigh = ndisc_get_neigh(dev, addr);
|
||||
if (IS_ERR(neigh))
|
||||
neigh = NULL;
|
||||
}
|
||||
|
||||
rt->rt6i_dev = dev;
|
||||
rt->rt6i_idev = idev;
|
||||
|
@ -1887,6 +1921,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
|
|||
{
|
||||
struct net *net = dev_net(idev->dev);
|
||||
struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
|
||||
struct neighbour *neigh;
|
||||
|
||||
if (rt == NULL)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
@ -1909,11 +1944,18 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
|
|||
rt->rt6i_flags |= RTF_ANYCAST;
|
||||
else
|
||||
rt->rt6i_flags |= RTF_LOCAL;
|
||||
rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
|
||||
if (rt->rt6i_nexthop == NULL) {
|
||||
neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
|
||||
if (IS_ERR(neigh)) {
|
||||
dst_free(&rt->u.dst);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
/* We are casting this because that is the return
|
||||
* value type. But an errno encoded pointer is the
|
||||
* same regardless of the underlying pointer type,
|
||||
* and that's what we are returning. So this is OK.
|
||||
*/
|
||||
return (struct rt6_info *) neigh;
|
||||
}
|
||||
rt->rt6i_nexthop = neigh;
|
||||
|
||||
ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
|
||||
rt->rt6i_dst.plen = 128;
|
||||
|
|
Loading…
Add table
Reference in a new issue