net: avoid potential race between netdev_get_by_index_lock() and netns switch

netdev_get_by_index_lock() performs following steps:

  rcu_lock();
  dev = lookup(netns, ifindex);
  dev_get(dev);
  rcu_unlock();
  [... lock & validate the dev ...]
  return dev

Validation right now only checks if the device is registered but since
the lookup is netns-aware we must also protect against the device
switching netns right after we dropped the RCU lock. Otherwise
the caller in netns1 may get a pointer to a device which has just
switched to netns2.

We can't hold the lock for the entire netns change process (because of
the NETDEV_UNREGISTER notifier), and there's no existing marking to
indicate that the netns is unlisted because of netns move, so add one.

AFAIU none of the existing netdev_get_by_index_lock() callers can
suffer from this problem (NAPI code double checks the netns membership
and other callers are either under rtnl_lock or not ns-sensitive),
so this patch does not have to be treated as a fix.

Reviewed-by: Joe Damato <jdamato@fastly.com>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20250408195956.412733-2-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2025-04-08 12:59:48 -07:00
parent 420aabef3a
commit a82dc19db1
3 changed files with 24 additions and 9 deletions

View file

@ -1952,6 +1952,7 @@ enum netdev_reg_state {
* @priv_destructor: Called from unregister * @priv_destructor: Called from unregister
* @npinfo: XXX: need comments on this one * @npinfo: XXX: need comments on this one
* @nd_net: Network namespace this network device is inside * @nd_net: Network namespace this network device is inside
* protected by @lock
* *
* @ml_priv: Mid-layer private * @ml_priv: Mid-layer private
* @ml_priv_type: Mid-layer private type * @ml_priv_type: Mid-layer private type
@ -2359,6 +2360,9 @@ struct net_device {
bool dismantle; bool dismantle;
/** @moving_ns: device is changing netns, protected by @lock */
bool moving_ns;
enum { enum {
RTNL_LINK_INITIALIZED, RTNL_LINK_INITIALIZED,
RTNL_LINK_INITIALIZING, RTNL_LINK_INITIALIZING,
@ -2521,7 +2525,7 @@ struct net_device {
* @net_shaper_hierarchy, @reg_state, @threaded * @net_shaper_hierarchy, @reg_state, @threaded
* *
* Double protects: * Double protects:
* @up * @up, @moving_ns, @nd_net
* *
* Double ops protects: * Double ops protects:
* @real_num_rx_queues, @real_num_tx_queues * @real_num_rx_queues, @real_num_tx_queues

View file

@ -828,7 +828,7 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id)
dev_hold(dev); dev_hold(dev);
rcu_read_unlock(); rcu_read_unlock();
dev = __netdev_put_lock(dev); dev = __netdev_put_lock(dev, net);
if (!dev) if (!dev)
return NULL; return NULL;
@ -1039,10 +1039,11 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id)
* This helper is intended for locking net_device after it has been looked up * This helper is intended for locking net_device after it has been looked up
* using a lockless lookup helper. Lock prevents the instance from going away. * using a lockless lookup helper. Lock prevents the instance from going away.
*/ */
struct net_device *__netdev_put_lock(struct net_device *dev) struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net)
{ {
netdev_lock(dev); netdev_lock(dev);
if (dev->reg_state > NETREG_REGISTERED) { if (dev->reg_state > NETREG_REGISTERED ||
dev->moving_ns || !net_eq(dev_net(dev), net)) {
netdev_unlock(dev); netdev_unlock(dev);
dev_put(dev); dev_put(dev);
return NULL; return NULL;
@ -1070,7 +1071,7 @@ struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex)
if (!dev) if (!dev)
return NULL; return NULL;
return __netdev_put_lock(dev); return __netdev_put_lock(dev, net);
} }
struct net_device * struct net_device *
@ -1090,7 +1091,7 @@ netdev_xa_find_lock(struct net *net, struct net_device *dev,
dev_hold(dev); dev_hold(dev);
rcu_read_unlock(); rcu_read_unlock();
dev = __netdev_put_lock(dev); dev = __netdev_put_lock(dev, net);
if (dev) if (dev)
return dev; return dev;
@ -12157,7 +12158,11 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
netif_close(dev); netif_close(dev);
/* And unlink it from device chain */ /* And unlink it from device chain */
unlist_netdevice(dev); unlist_netdevice(dev);
netdev_unlock_ops(dev);
if (!netdev_need_ops_lock(dev))
netdev_lock(dev);
dev->moving_ns = true;
netdev_unlock(dev);
synchronize_net(); synchronize_net();
@ -12193,7 +12198,9 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
move_netdevice_notifiers_dev_net(dev, net); move_netdevice_notifiers_dev_net(dev, net);
/* Actually switch the network namespace */ /* Actually switch the network namespace */
netdev_lock(dev);
dev_net_set(dev, net); dev_net_set(dev, net);
netdev_unlock(dev);
dev->ifindex = new_ifindex; dev->ifindex = new_ifindex;
if (new_name[0]) { if (new_name[0]) {
@ -12219,7 +12226,11 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
err = netdev_change_owner(dev, net_old, net); err = netdev_change_owner(dev, net_old, net);
WARN_ON(err); WARN_ON(err);
netdev_lock_ops(dev); netdev_lock(dev);
dev->moving_ns = false;
if (!netdev_need_ops_lock(dev))
netdev_unlock(dev);
/* Add the device back in the hashes */ /* Add the device back in the hashes */
list_netdevice(dev); list_netdevice(dev);
/* Notify protocols, that a new device appeared. */ /* Notify protocols, that a new device appeared. */

View file

@ -30,7 +30,7 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id);
struct net_device *dev_get_by_napi_id(unsigned int napi_id); struct net_device *dev_get_by_napi_id(unsigned int napi_id);
struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex); struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex);
struct net_device *__netdev_put_lock(struct net_device *dev); struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net);
struct net_device * struct net_device *
netdev_xa_find_lock(struct net *net, struct net_device *dev, netdev_xa_find_lock(struct net *net, struct net_device *dev,
unsigned long *index); unsigned long *index);