linux/tools/testing/selftests/drivers/net/stats.py
Jakub Kicinski 2baa45432d selftests: drv-net: stats: use skip instead of xfail for unsupported features
XFAIL is considered a form of a pass by our CI. For HW devices returning
XFAIL for unsupported features is counter-productive because our CI
knows not to expect any HW test to pass until it sees 10 passes in a row.
If we return xfail the test shows up as pass even if the device doesn't
support the feature. netdevsim supports all features necessary for
the stats test so there is no concern about running the test in SW mode.

Make the test skip rather than xfail if driver doesn't support FEC or pause.

Link: https://patch.msgid.link/20250620161109.2146242-3-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-06-23 17:06:25 -07:00

289 lines
9.4 KiB
Python
Executable file

#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
"""
Tests related to standard netdevice statistics.
"""
import errno
import subprocess
import time
from lib.py import ksft_run, ksft_exit, ksft_pr
from lib.py import ksft_ge, ksft_eq, ksft_is, ksft_in, ksft_lt, ksft_true, ksft_raises
from lib.py import KsftSkipEx, KsftFailEx
from lib.py import ksft_disruptive
from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError
from lib.py import NetDrvEnv
from lib.py import cmd, ip, defer
ethnl = EthtoolFamily()
netfam = NetdevFamily()
rtnl = RtnlFamily()
def check_pause(cfg) -> None:
"""
Check that drivers which support Pause config also report standard
pause stats.
"""
try:
ethnl.pause_get({"header": {"dev-index": cfg.ifindex}})
except NlError as e:
if e.error == errno.EOPNOTSUPP:
raise KsftSkipEx("pause not supported by the device") from e
raise
data = ethnl.pause_get({"header": {"dev-index": cfg.ifindex,
"flags": {'stats'}}})
ksft_true(data['stats'], "driver does not report stats")
def check_fec(cfg) -> None:
"""
Check that drivers which support FEC config also report standard
FEC stats.
"""
try:
ethnl.fec_get({"header": {"dev-index": cfg.ifindex}})
except NlError as e:
if e.error == errno.EOPNOTSUPP:
raise KsftSkipEx("FEC not supported by the device") from e
raise
data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex,
"flags": {'stats'}}})
ksft_true(data['stats'], "driver does not report stats")
def pkt_byte_sum(cfg) -> None:
"""
Check that qstat and interface stats match in value.
"""
def get_qstat(test):
stats = netfam.qstats_get({}, dump=True)
if stats:
for qs in stats:
if qs["ifindex"]== test.ifindex:
return qs
return None
qstat = get_qstat(cfg)
if qstat is None:
raise KsftSkipEx("qstats not supported by the device")
for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']:
ksft_in(key, qstat, "Drivers should always report basic keys")
# Compare stats, rtnl stats and qstats must match,
# but the interface may be up, so do a series of dumps
# each time the more "recent" stats must be higher or same.
def stat_cmp(rstat, qstat):
for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']:
if rstat[key] != qstat[key]:
return rstat[key] - qstat[key]
return 0
for _ in range(10):
rtstat = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
if stat_cmp(rtstat, qstat) < 0:
raise KsftFailEx("RTNL stats are lower, fetched later")
qstat = get_qstat(cfg)
if stat_cmp(rtstat, qstat) > 0:
raise KsftFailEx("Qstats are lower, fetched later")
def qstat_by_ifindex(cfg) -> None:
""" Qstats Netlink API tests - querying by ifindex. """
# Construct a map ifindex -> [dump, by-index, dump]
ifindexes = {}
stats = netfam.qstats_get({}, dump=True)
for entry in stats:
ifindexes[entry['ifindex']] = [entry, None, None]
for ifindex in ifindexes:
entry = netfam.qstats_get({"ifindex": ifindex}, dump=True)
ksft_eq(len(entry), 1)
ifindexes[entry[0]['ifindex']][1] = entry[0]
stats = netfam.qstats_get({}, dump=True)
for entry in stats:
ifindexes[entry['ifindex']][2] = entry
if len(ifindexes) == 0:
raise KsftSkipEx("No ifindex supports qstats")
# Now make sure the stats match/make sense
for ifindex, triple in ifindexes.items():
all_keys = triple[0].keys() | triple[1].keys() | triple[2].keys()
for key in all_keys:
ksft_ge(triple[1][key], triple[0][key], comment="bad key: " + key)
ksft_ge(triple[2][key], triple[1][key], comment="bad key: " + key)
# Sanity check the dumps
queues = NetdevFamily(recv_size=4096).qstats_get({"scope": "queue"}, dump=True)
# Reformat the output into {ifindex: {rx: [id, id, ...], tx: [id, id, ...]}}
parsed = {}
for entry in queues:
ifindex = entry["ifindex"]
if ifindex not in parsed:
parsed[ifindex] = {"rx":[], "tx": []}
parsed[ifindex][entry["queue-type"]].append(entry['queue-id'])
# Now, validate
for ifindex, queues in parsed.items():
for qtype in ['rx', 'tx']:
ksft_eq(len(queues[qtype]), len(set(queues[qtype])),
comment="repeated queue keys")
ksft_eq(len(queues[qtype]), max(queues[qtype]) + 1,
comment="missing queue keys")
# Test invalid dumps
# 0 is invalid
with ksft_raises(NlError) as cm:
netfam.qstats_get({"ifindex": 0}, dump=True)
ksft_eq(cm.exception.nl_msg.error, -34)
ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
# loopback has no stats
with ksft_raises(NlError) as cm:
netfam.qstats_get({"ifindex": 1}, dump=True)
ksft_eq(cm.exception.nl_msg.error, -errno.EOPNOTSUPP)
ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
# Try to get stats for lowest unused ifindex but not 0
devs = rtnl.getlink({}, dump=True)
all_ifindexes = set(dev["ifi-index"] for dev in devs)
lowest = 2
while lowest in all_ifindexes:
lowest += 1
with ksft_raises(NlError) as cm:
netfam.qstats_get({"ifindex": lowest}, dump=True)
ksft_eq(cm.exception.nl_msg.error, -19)
ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
@ksft_disruptive
def check_down(cfg) -> None:
""" Test statistics (interface and qstat) are not impacted by ifdown """
try:
qstat = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
except NlError as e:
if e.error == errno.EOPNOTSUPP:
raise KsftSkipEx("qstats not supported by the device") from e
raise
ip(f"link set dev {cfg.dev['ifname']} down")
defer(ip, f"link set dev {cfg.dev['ifname']} up")
qstat2 = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
for k in qstat:
ksft_ge(qstat2[k], qstat[k], comment=f"{k} went backwards on device down")
# exercise per-queue API to make sure that "device down" state
# is handled correctly and doesn't crash
netfam.qstats_get({"ifindex": cfg.ifindex, "scope": "queue"}, dump=True)
def __run_inf_loop(body):
body = body.strip()
if body[-1] != ';':
body += ';'
return subprocess.Popen(f"while true; do {body} done", shell=True,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
def __stats_increase_sanely(old, new) -> None:
for k in old.keys():
ksft_ge(new[k], old[k])
ksft_lt(new[k] - old[k], 1 << 31, comment="likely wrapping error")
def procfs_hammer(cfg) -> None:
"""
Reading stats via procfs only holds the RCU lock, which is not an exclusive
lock, make sure drivers can handle parallel reads of stats.
"""
one = __run_inf_loop("cat /proc/net/dev")
defer(one.kill)
two = __run_inf_loop("cat /proc/net/dev")
defer(two.kill)
time.sleep(1)
# Make sure the processes are running
ksft_is(one.poll(), None)
ksft_is(two.poll(), None)
rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
time.sleep(2)
rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
__stats_increase_sanely(rtstat1, rtstat2)
# defers will kill the loops
@ksft_disruptive
def procfs_downup_hammer(cfg) -> None:
"""
Reading stats via procfs only holds the RCU lock, drivers often try
to sleep when reading the stats, or don't protect against races.
"""
# Max out the queues, we'll flip between max and 1
channels = ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
if channels['combined-count'] == 0:
rx_type = 'rx'
else:
rx_type = 'combined'
cur_queue_cnt = channels[f'{rx_type}-count']
max_queue_cnt = channels[f'{rx_type}-max']
cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}")
defer(cmd, f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}")
# Real test stats
stats = __run_inf_loop("cat /proc/net/dev")
defer(stats.kill)
ipset = f"ip link set dev {cfg.ifname}"
defer(ip, f"link set dev {cfg.ifname} up")
# The "echo -n 1" lets us count iterations below
updown = f"{ipset} down; sleep 0.05; {ipset} up; sleep 0.05; " + \
f"ethtool -L {cfg.ifname} {rx_type} 1; " + \
f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}; " + \
"echo -n 1"
updown = __run_inf_loop(updown)
kill_updown = defer(updown.kill)
time.sleep(1)
# Make sure the processes are running
ksft_is(stats.poll(), None)
ksft_is(updown.poll(), None)
rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
# We're looking for crashes, give it extra time
time.sleep(9)
rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
__stats_increase_sanely(rtstat1, rtstat2)
kill_updown.exec()
stdout, _ = updown.communicate(timeout=5)
ksft_pr("completed up/down cycles:", len(stdout.decode('utf-8')))
def main() -> None:
""" Ksft boiler plate main """
with NetDrvEnv(__file__, queue_count=100) as cfg:
ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex,
check_down, procfs_hammer, procfs_downup_hammer],
args=(cfg, ))
ksft_exit()
if __name__ == "__main__":
main()