net: stop napi kthreads when THREADED napi is disabled

Once the THREADED napi is disabled, the napi kthread should also be
stopped. Keeping the kthread intact after disabling THREADED napi makes
the PID of this kthread show up in the output of netlink 'napi-get' and
ps -ef output.

The is discussed in the patch below:
https://lore.kernel.org/all/20250502191548.559cc416@kernel.org

NAPI kthread should stop only if,

- There are no pending napi poll scheduled for this thread.
- There are no new napi poll scheduled for this thread while it has
  stopped.
- The ____napi_schedule can correctly fallback to the softirq for napi
  polling.

Since napi_schedule_prep provides mutual exclusion over STATE_SCHED bit,
it is safe to unset the STATE_THREADED when SCHED_THREADED is set or the
SCHED bit is not set. SCHED_THREADED being set means that SCHED is
already set and the kthread owns this napi.

To disable threaded napi, unset STATE_THREADED bit safely if
SCHED_THREADED is set or SCHED is unset. Once STATE_THREADED is unset
safely then wait for the kthread to unset the SCHED_THREADED bit so it
safe to stop the kthread.

Add a new test in nl_netdev to verify this behaviour.

Tested:
 ./tools/testing/selftests/net/nl_netdev.py
 TAP version 13
 1..6
 ok 1 nl_netdev.empty_check
 ok 2 nl_netdev.lo_check
 ok 3 nl_netdev.page_pool_check
 ok 4 nl_netdev.napi_list_check
 ok 5 nl_netdev.dev_set_threaded
 ok 6 nl_netdev.nsim_rxq_reset_down
 # Totals: pass:6 fail:0 xfail:0 xpass:0 skip:0 error:0

Ran neper for 300 seconds and did enable/disable of thread napi in a
loop continuously.

Signed-off-by: Samiullah Khawaja <skhawaja@google.com>
Link: https://patch.msgid.link/20250609173015.3851695-1-skhawaja@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Samiullah Khawaja 2025-06-09 17:30:15 +00:00 committed by Jakub Kicinski
parent 7fc18f9476
commit 689883de94
2 changed files with 79 additions and 4 deletions

View file

@ -6926,6 +6926,43 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
return HRTIMER_NORESTART;
}
static void napi_stop_kthread(struct napi_struct *napi)
{
unsigned long val, new;
/* Wait until the napi STATE_THREADED is unset. */
while (true) {
val = READ_ONCE(napi->state);
/* If napi kthread own this napi or the napi is idle,
* STATE_THREADED can be unset here.
*/
if ((val & NAPIF_STATE_SCHED_THREADED) ||
!(val & NAPIF_STATE_SCHED)) {
new = val & (~NAPIF_STATE_THREADED);
} else {
msleep(20);
continue;
}
if (try_cmpxchg(&napi->state, &val, new))
break;
}
/* Once STATE_THREADED is unset, wait for SCHED_THREADED to be unset by
* the kthread.
*/
while (true) {
if (!test_bit(NAPIF_STATE_SCHED_THREADED, &napi->state))
break;
msleep(20);
}
kthread_stop(napi->thread);
napi->thread = NULL;
}
int dev_set_threaded(struct net_device *dev, bool threaded)
{
struct napi_struct *napi;
@ -6961,8 +6998,12 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
* softirq mode will happen in the next round of napi_schedule().
* This should not cause hiccups/stalls to the live traffic.
*/
list_for_each_entry(napi, &dev->napi_list, dev_list)
assign_bit(NAPI_STATE_THREADED, &napi->state, threaded);
list_for_each_entry(napi, &dev->napi_list, dev_list) {
if (!threaded && napi->thread)
napi_stop_kthread(napi);
else
assign_bit(NAPI_STATE_THREADED, &napi->state, threaded);
}
return err;
}

View file

@ -2,8 +2,9 @@
# SPDX-License-Identifier: GPL-2.0
import time
from os import system
from lib.py import ksft_run, ksft_exit, ksft_pr
from lib.py import ksft_eq, ksft_ge, ksft_busy_wait
from lib.py import ksft_eq, ksft_ge, ksft_ne, ksft_busy_wait
from lib.py import NetdevFamily, NetdevSimDev, ip
@ -34,6 +35,39 @@ def napi_list_check(nf) -> None:
ksft_eq(len(napis), 100,
comment=f"queue count after reset queue {q} mode {i}")
def dev_set_threaded(nf) -> None:
"""
Test that verifies various cases of napi threaded
set and unset at device level using sysfs.
"""
with NetdevSimDev(queue_count=2) as nsimdev:
nsim = nsimdev.nsims[0]
ip(f"link set dev {nsim.ifname} up")
napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
ksft_eq(len(napis), 2)
napi0_id = napis[0]['id']
napi1_id = napis[1]['id']
# set threaded
system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded")
# check napi threaded is set for both napis
napi0 = nf.napi_get({'id': napi0_id})
ksft_ne(napi0.get('pid'), None)
napi1 = nf.napi_get({'id': napi1_id})
ksft_ne(napi1.get('pid'), None)
# unset threaded
system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded")
# check napi threaded is unset for both napis
napi0 = nf.napi_get({'id': napi0_id})
ksft_eq(napi0.get('pid'), None)
napi1 = nf.napi_get({'id': napi1_id})
ksft_eq(napi1.get('pid'), None)
def nsim_rxq_reset_down(nf) -> None:
"""
@ -122,7 +156,7 @@ def page_pool_check(nf) -> None:
def main() -> None:
nf = NetdevFamily()
ksft_run([empty_check, lo_check, page_pool_check, napi_list_check,
nsim_rxq_reset_down],
dev_set_threaded, nsim_rxq_reset_down],
args=(nf, ))
ksft_exit()