linux/tools/testing/selftests/cgroup/test_cpu.c
Shashank Balaji 954bacce36 selftests/cgroup: fix cpu.max tests
Current cpu.max tests (both the normal one and the nested one) are broken.

They setup cpu.max with 1000 us quota and the default period (100,000 us).
A cpu hog is run for a duration of 1s as per wall clock time. This corresponds
to 10 periods, hence an expected usage of 10,000 us. We want the measured
usage (as per cpu.stat) to be close to 10,000 us.

Previously, this approximate equality test was done by
`!values_close(usage_usec, expected_usage_usec, 95)`: if the absolute
difference between usage_usec and expected_usage_usec is greater than 95% of
their sum, then we pass. And expected_usage_usec was set to 1,000,000 us.
Mathematically, this translates to the following being true for pass:

	|usage - expected_usage| > (usage + expected_usage)*0.95

	If usage > expected_usage:
		usage - expected_usage > (usage + expected_usage)*0.95
		0.05*usage > 1.95*expected_usage
		usage > 39*expected_usage = 39s

	If usage < expected_usage:
		expected_usage - usage > (usage + expected_usage)*0.95
		0.05*expected_usage > 1.95*usage
		usage < 0.0256*expected_usage = 25,600 us

Combined,

	Pass if usage < 25,600 us or > 39 s,

which makes no sense given that all we need is for usage_usec to be close to
10,000 us.

Fix this by explicitly calcuating the expected usage duration based on the
configured quota, default period, and the duration, and compare usage_usec
and expected_usage_usec using values_close() with a 10% error margin.

Also, use snprintf to get the quota string to write to cpu.max instead of
hardcoding the quota, ensuring a single source of truth.

Remove the check comparing user_usec and expected_usage_usec, since on running
this test modified with printfs, it's seen that user_usec and usage_usec can
regularly exceed the theoretical expected_usage_usec:

	$ sudo ./test_cpu
	user: 10485, usage: 10485, expected: 10000
	ok 1 test_cpucg_max
	user: 11127, usage: 11127, expected: 10000
	ok 2 test_cpucg_max_nested
	$ sudo ./test_cpu
	user: 10286, usage: 10286, expected: 10000
	ok 1 test_cpucg_max
	user: 10404, usage: 11271, expected: 10000
	ok 2 test_cpucg_max_nested

Hence, a values_close() check of usage_usec and expected_usage_usec is
sufficient.

Fixes: a79906570f ("cgroup: Add test_cpucg_max_nested() testcase")
Fixes: 889ab8113e ("cgroup: Add test_cpucg_max() testcase")
Acked-by: Michal Koutný <mkoutny@suse.com>
Signed-off-by: Shashank Balaji <shashank.mahadasyam@sony.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2025-07-17 08:12:19 -10:00

824 lines
18 KiB
C

// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
#include <linux/limits.h>
#include <sys/param.h>
#include <sys/sysinfo.h>
#include <sys/wait.h>
#include <errno.h>
#include <pthread.h>
#include <stdio.h>
#include <time.h>
#include <unistd.h>
#include "../kselftest.h"
#include "cgroup_util.h"
enum hog_clock_type {
// Count elapsed time using the CLOCK_PROCESS_CPUTIME_ID clock.
CPU_HOG_CLOCK_PROCESS,
// Count elapsed time using system wallclock time.
CPU_HOG_CLOCK_WALL,
};
struct cpu_hogger {
char *cgroup;
pid_t pid;
long usage;
};
struct cpu_hog_func_param {
int nprocs;
struct timespec ts;
enum hog_clock_type clock_type;
};
/*
* This test creates two nested cgroups with and without enabling
* the cpu controller.
*/
static int test_cpucg_subtree_control(const char *root)
{
char *parent = NULL, *child = NULL, *parent2 = NULL, *child2 = NULL;
int ret = KSFT_FAIL;
// Create two nested cgroups with the cpu controller enabled.
parent = cg_name(root, "cpucg_test_0");
if (!parent)
goto cleanup;
if (cg_create(parent))
goto cleanup;
if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
goto cleanup;
child = cg_name(parent, "cpucg_test_child");
if (!child)
goto cleanup;
if (cg_create(child))
goto cleanup;
if (cg_read_strstr(child, "cgroup.controllers", "cpu"))
goto cleanup;
// Create two nested cgroups without enabling the cpu controller.
parent2 = cg_name(root, "cpucg_test_1");
if (!parent2)
goto cleanup;
if (cg_create(parent2))
goto cleanup;
child2 = cg_name(parent2, "cpucg_test_child");
if (!child2)
goto cleanup;
if (cg_create(child2))
goto cleanup;
if (!cg_read_strstr(child2, "cgroup.controllers", "cpu"))
goto cleanup;
ret = KSFT_PASS;
cleanup:
cg_destroy(child);
free(child);
cg_destroy(child2);
free(child2);
cg_destroy(parent);
free(parent);
cg_destroy(parent2);
free(parent2);
return ret;
}
static void *hog_cpu_thread_func(void *arg)
{
while (1)
;
return NULL;
}
static struct timespec
timespec_sub(const struct timespec *lhs, const struct timespec *rhs)
{
struct timespec zero = {
.tv_sec = 0,
.tv_nsec = 0,
};
struct timespec ret;
if (lhs->tv_sec < rhs->tv_sec)
return zero;
ret.tv_sec = lhs->tv_sec - rhs->tv_sec;
if (lhs->tv_nsec < rhs->tv_nsec) {
if (ret.tv_sec == 0)
return zero;
ret.tv_sec--;
ret.tv_nsec = NSEC_PER_SEC - rhs->tv_nsec + lhs->tv_nsec;
} else
ret.tv_nsec = lhs->tv_nsec - rhs->tv_nsec;
return ret;
}
static int hog_cpus_timed(const char *cgroup, void *arg)
{
const struct cpu_hog_func_param *param =
(struct cpu_hog_func_param *)arg;
struct timespec ts_run = param->ts;
struct timespec ts_remaining = ts_run;
struct timespec ts_start;
int i, ret;
ret = clock_gettime(CLOCK_MONOTONIC, &ts_start);
if (ret != 0)
return ret;
for (i = 0; i < param->nprocs; i++) {
pthread_t tid;
ret = pthread_create(&tid, NULL, &hog_cpu_thread_func, NULL);
if (ret != 0)
return ret;
}
while (ts_remaining.tv_sec > 0 || ts_remaining.tv_nsec > 0) {
struct timespec ts_total;
ret = nanosleep(&ts_remaining, NULL);
if (ret && errno != EINTR)
return ret;
if (param->clock_type == CPU_HOG_CLOCK_PROCESS) {
ret = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts_total);
if (ret != 0)
return ret;
} else {
struct timespec ts_current;
ret = clock_gettime(CLOCK_MONOTONIC, &ts_current);
if (ret != 0)
return ret;
ts_total = timespec_sub(&ts_current, &ts_start);
}
ts_remaining = timespec_sub(&ts_run, &ts_total);
}
return 0;
}
/*
* Creates a cpu cgroup, burns a CPU for a few quanta, and verifies that
* cpu.stat shows the expected output.
*/
static int test_cpucg_stats(const char *root)
{
int ret = KSFT_FAIL;
long usage_usec, user_usec, system_usec;
long usage_seconds = 2;
long expected_usage_usec = usage_seconds * USEC_PER_SEC;
char *cpucg;
cpucg = cg_name(root, "cpucg_test");
if (!cpucg)
goto cleanup;
if (cg_create(cpucg))
goto cleanup;
usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
system_usec = cg_read_key_long(cpucg, "cpu.stat", "system_usec");
if (usage_usec != 0 || user_usec != 0 || system_usec != 0)
goto cleanup;
struct cpu_hog_func_param param = {
.nprocs = 1,
.ts = {
.tv_sec = usage_seconds,
.tv_nsec = 0,
},
.clock_type = CPU_HOG_CLOCK_PROCESS,
};
if (cg_run(cpucg, hog_cpus_timed, (void *)&param))
goto cleanup;
usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
if (user_usec <= 0)
goto cleanup;
if (!values_close(usage_usec, expected_usage_usec, 1))
goto cleanup;
ret = KSFT_PASS;
cleanup:
cg_destroy(cpucg);
free(cpucg);
return ret;
}
/*
* Creates a nice process that consumes CPU and checks that the elapsed
* usertime in the cgroup is close to the expected time.
*/
static int test_cpucg_nice(const char *root)
{
int ret = KSFT_FAIL;
int status;
long user_usec, nice_usec;
long usage_seconds = 2;
long expected_nice_usec = usage_seconds * USEC_PER_SEC;
char *cpucg;
pid_t pid;
cpucg = cg_name(root, "cpucg_test");
if (!cpucg)
goto cleanup;
if (cg_create(cpucg))
goto cleanup;
user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
nice_usec = cg_read_key_long(cpucg, "cpu.stat", "nice_usec");
if (nice_usec == -1)
ret = KSFT_SKIP;
if (user_usec != 0 || nice_usec != 0)
goto cleanup;
/*
* We fork here to create a new process that can be niced without
* polluting the nice value of other selftests
*/
pid = fork();
if (pid < 0) {
goto cleanup;
} else if (pid == 0) {
struct cpu_hog_func_param param = {
.nprocs = 1,
.ts = {
.tv_sec = usage_seconds,
.tv_nsec = 0,
},
.clock_type = CPU_HOG_CLOCK_PROCESS,
};
char buf[64];
snprintf(buf, sizeof(buf), "%d", getpid());
if (cg_write(cpucg, "cgroup.procs", buf))
goto cleanup;
/* Try to keep niced CPU usage as constrained to hog_cpu as possible */
nice(1);
hog_cpus_timed(cpucg, &param);
exit(0);
} else {
waitpid(pid, &status, 0);
if (!WIFEXITED(status))
goto cleanup;
user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
nice_usec = cg_read_key_long(cpucg, "cpu.stat", "nice_usec");
if (!values_close(nice_usec, expected_nice_usec, 1))
goto cleanup;
ret = KSFT_PASS;
}
cleanup:
cg_destroy(cpucg);
free(cpucg);
return ret;
}
static int
run_cpucg_weight_test(
const char *root,
pid_t (*spawn_child)(const struct cpu_hogger *child),
int (*validate)(const struct cpu_hogger *children, int num_children))
{
int ret = KSFT_FAIL, i;
char *parent = NULL;
struct cpu_hogger children[3] = {};
parent = cg_name(root, "cpucg_test_0");
if (!parent)
goto cleanup;
if (cg_create(parent))
goto cleanup;
if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
goto cleanup;
for (i = 0; i < ARRAY_SIZE(children); i++) {
children[i].cgroup = cg_name_indexed(parent, "cpucg_child", i);
if (!children[i].cgroup)
goto cleanup;
if (cg_create(children[i].cgroup))
goto cleanup;
if (cg_write_numeric(children[i].cgroup, "cpu.weight",
50 * (i + 1)))
goto cleanup;
}
for (i = 0; i < ARRAY_SIZE(children); i++) {
pid_t pid = spawn_child(&children[i]);
if (pid <= 0)
goto cleanup;
children[i].pid = pid;
}
for (i = 0; i < ARRAY_SIZE(children); i++) {
int retcode;
waitpid(children[i].pid, &retcode, 0);
if (!WIFEXITED(retcode))
goto cleanup;
if (WEXITSTATUS(retcode))
goto cleanup;
}
for (i = 0; i < ARRAY_SIZE(children); i++)
children[i].usage = cg_read_key_long(children[i].cgroup,
"cpu.stat", "usage_usec");
if (validate(children, ARRAY_SIZE(children)))
goto cleanup;
ret = KSFT_PASS;
cleanup:
for (i = 0; i < ARRAY_SIZE(children); i++) {
cg_destroy(children[i].cgroup);
free(children[i].cgroup);
}
cg_destroy(parent);
free(parent);
return ret;
}
static pid_t weight_hog_ncpus(const struct cpu_hogger *child, int ncpus)
{
long usage_seconds = 10;
struct cpu_hog_func_param param = {
.nprocs = ncpus,
.ts = {
.tv_sec = usage_seconds,
.tv_nsec = 0,
},
.clock_type = CPU_HOG_CLOCK_WALL,
};
return cg_run_nowait(child->cgroup, hog_cpus_timed, (void *)&param);
}
static pid_t weight_hog_all_cpus(const struct cpu_hogger *child)
{
return weight_hog_ncpus(child, get_nprocs());
}
static int
overprovision_validate(const struct cpu_hogger *children, int num_children)
{
int ret = KSFT_FAIL, i;
for (i = 0; i < num_children - 1; i++) {
long delta;
if (children[i + 1].usage <= children[i].usage)
goto cleanup;
delta = children[i + 1].usage - children[i].usage;
if (!values_close(delta, children[0].usage, 35))
goto cleanup;
}
ret = KSFT_PASS;
cleanup:
return ret;
}
/*
* First, this test creates the following hierarchy:
* A
* A/B cpu.weight = 50
* A/C cpu.weight = 100
* A/D cpu.weight = 150
*
* A separate process is then created for each child cgroup which spawns as
* many threads as there are cores, and hogs each CPU as much as possible
* for some time interval.
*
* Once all of the children have exited, we verify that each child cgroup
* was given proportional runtime as informed by their cpu.weight.
*/
static int test_cpucg_weight_overprovisioned(const char *root)
{
return run_cpucg_weight_test(root, weight_hog_all_cpus,
overprovision_validate);
}
static pid_t weight_hog_one_cpu(const struct cpu_hogger *child)
{
return weight_hog_ncpus(child, 1);
}
static int
underprovision_validate(const struct cpu_hogger *children, int num_children)
{
int ret = KSFT_FAIL, i;
for (i = 0; i < num_children - 1; i++) {
if (!values_close(children[i + 1].usage, children[0].usage, 15))
goto cleanup;
}
ret = KSFT_PASS;
cleanup:
return ret;
}
/*
* First, this test creates the following hierarchy:
* A
* A/B cpu.weight = 50
* A/C cpu.weight = 100
* A/D cpu.weight = 150
*
* A separate process is then created for each child cgroup which spawns a
* single thread that hogs a CPU. The testcase is only run on systems that
* have at least one core per-thread in the child processes.
*
* Once all of the children have exited, we verify that each child cgroup
* had roughly the same runtime despite having different cpu.weight.
*/
static int test_cpucg_weight_underprovisioned(const char *root)
{
// Only run the test if there are enough cores to avoid overprovisioning
// the system.
if (get_nprocs() < 4)
return KSFT_SKIP;
return run_cpucg_weight_test(root, weight_hog_one_cpu,
underprovision_validate);
}
static int
run_cpucg_nested_weight_test(const char *root, bool overprovisioned)
{
int ret = KSFT_FAIL, i;
char *parent = NULL, *child = NULL;
struct cpu_hogger leaf[3] = {};
long nested_leaf_usage, child_usage;
int nprocs = get_nprocs();
if (!overprovisioned) {
if (nprocs < 4)
/*
* Only run the test if there are enough cores to avoid overprovisioning
* the system.
*/
return KSFT_SKIP;
nprocs /= 4;
}
parent = cg_name(root, "cpucg_test");
child = cg_name(parent, "cpucg_child");
if (!parent || !child)
goto cleanup;
if (cg_create(parent))
goto cleanup;
if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
goto cleanup;
if (cg_create(child))
goto cleanup;
if (cg_write(child, "cgroup.subtree_control", "+cpu"))
goto cleanup;
if (cg_write(child, "cpu.weight", "1000"))
goto cleanup;
for (i = 0; i < ARRAY_SIZE(leaf); i++) {
const char *ancestor;
long weight;
if (i == 0) {
ancestor = parent;
weight = 1000;
} else {
ancestor = child;
weight = 5000;
}
leaf[i].cgroup = cg_name_indexed(ancestor, "cpucg_leaf", i);
if (!leaf[i].cgroup)
goto cleanup;
if (cg_create(leaf[i].cgroup))
goto cleanup;
if (cg_write_numeric(leaf[i].cgroup, "cpu.weight", weight))
goto cleanup;
}
for (i = 0; i < ARRAY_SIZE(leaf); i++) {
pid_t pid;
struct cpu_hog_func_param param = {
.nprocs = nprocs,
.ts = {
.tv_sec = 10,
.tv_nsec = 0,
},
.clock_type = CPU_HOG_CLOCK_WALL,
};
pid = cg_run_nowait(leaf[i].cgroup, hog_cpus_timed,
(void *)&param);
if (pid <= 0)
goto cleanup;
leaf[i].pid = pid;
}
for (i = 0; i < ARRAY_SIZE(leaf); i++) {
int retcode;
waitpid(leaf[i].pid, &retcode, 0);
if (!WIFEXITED(retcode))
goto cleanup;
if (WEXITSTATUS(retcode))
goto cleanup;
}
for (i = 0; i < ARRAY_SIZE(leaf); i++) {
leaf[i].usage = cg_read_key_long(leaf[i].cgroup,
"cpu.stat", "usage_usec");
if (leaf[i].usage <= 0)
goto cleanup;
}
nested_leaf_usage = leaf[1].usage + leaf[2].usage;
if (overprovisioned) {
if (!values_close(leaf[0].usage, nested_leaf_usage, 15))
goto cleanup;
} else if (!values_close(leaf[0].usage * 2, nested_leaf_usage, 15))
goto cleanup;
child_usage = cg_read_key_long(child, "cpu.stat", "usage_usec");
if (child_usage <= 0)
goto cleanup;
if (!values_close(child_usage, nested_leaf_usage, 1))
goto cleanup;
ret = KSFT_PASS;
cleanup:
for (i = 0; i < ARRAY_SIZE(leaf); i++) {
cg_destroy(leaf[i].cgroup);
free(leaf[i].cgroup);
}
cg_destroy(child);
free(child);
cg_destroy(parent);
free(parent);
return ret;
}
/*
* First, this test creates the following hierarchy:
* A
* A/B cpu.weight = 1000
* A/C cpu.weight = 1000
* A/C/D cpu.weight = 5000
* A/C/E cpu.weight = 5000
*
* A separate process is then created for each leaf, which spawn nproc threads
* that burn a CPU for a few seconds.
*
* Once all of those processes have exited, we verify that each of the leaf
* cgroups have roughly the same usage from cpu.stat.
*/
static int
test_cpucg_nested_weight_overprovisioned(const char *root)
{
return run_cpucg_nested_weight_test(root, true);
}
/*
* First, this test creates the following hierarchy:
* A
* A/B cpu.weight = 1000
* A/C cpu.weight = 1000
* A/C/D cpu.weight = 5000
* A/C/E cpu.weight = 5000
*
* A separate process is then created for each leaf, which nproc / 4 threads
* that burns a CPU for a few seconds.
*
* Once all of those processes have exited, we verify that each of the leaf
* cgroups have roughly the same usage from cpu.stat.
*/
static int
test_cpucg_nested_weight_underprovisioned(const char *root)
{
return run_cpucg_nested_weight_test(root, false);
}
/*
* This test creates a cgroup with some maximum value within a period, and
* verifies that a process in the cgroup is not overscheduled.
*/
static int test_cpucg_max(const char *root)
{
int ret = KSFT_FAIL;
long quota_usec = 1000;
long default_period_usec = 100000; /* cpu.max's default period */
long duration_seconds = 1;
long duration_usec = duration_seconds * USEC_PER_SEC;
long usage_usec, n_periods, remainder_usec, expected_usage_usec;
char *cpucg;
char quota_buf[32];
snprintf(quota_buf, sizeof(quota_buf), "%ld", quota_usec);
cpucg = cg_name(root, "cpucg_test");
if (!cpucg)
goto cleanup;
if (cg_create(cpucg))
goto cleanup;
if (cg_write(cpucg, "cpu.max", quota_buf))
goto cleanup;
struct cpu_hog_func_param param = {
.nprocs = 1,
.ts = {
.tv_sec = duration_seconds,
.tv_nsec = 0,
},
.clock_type = CPU_HOG_CLOCK_WALL,
};
if (cg_run(cpucg, hog_cpus_timed, (void *)&param))
goto cleanup;
usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
if (usage_usec <= 0)
goto cleanup;
/*
* The following calculation applies only since
* the cpu hog is set to run as per wall-clock time
*/
n_periods = duration_usec / default_period_usec;
remainder_usec = duration_usec - n_periods * default_period_usec;
expected_usage_usec
= n_periods * quota_usec + MIN(remainder_usec, quota_usec);
if (!values_close(usage_usec, expected_usage_usec, 10))
goto cleanup;
ret = KSFT_PASS;
cleanup:
cg_destroy(cpucg);
free(cpucg);
return ret;
}
/*
* This test verifies that a process inside of a nested cgroup whose parent
* group has a cpu.max value set, is properly throttled.
*/
static int test_cpucg_max_nested(const char *root)
{
int ret = KSFT_FAIL;
long quota_usec = 1000;
long default_period_usec = 100000; /* cpu.max's default period */
long duration_seconds = 1;
long duration_usec = duration_seconds * USEC_PER_SEC;
long usage_usec, n_periods, remainder_usec, expected_usage_usec;
char *parent, *child;
char quota_buf[32];
snprintf(quota_buf, sizeof(quota_buf), "%ld", quota_usec);
parent = cg_name(root, "cpucg_parent");
child = cg_name(parent, "cpucg_child");
if (!parent || !child)
goto cleanup;
if (cg_create(parent))
goto cleanup;
if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
goto cleanup;
if (cg_create(child))
goto cleanup;
if (cg_write(parent, "cpu.max", quota_buf))
goto cleanup;
struct cpu_hog_func_param param = {
.nprocs = 1,
.ts = {
.tv_sec = duration_seconds,
.tv_nsec = 0,
},
.clock_type = CPU_HOG_CLOCK_WALL,
};
if (cg_run(child, hog_cpus_timed, (void *)&param))
goto cleanup;
usage_usec = cg_read_key_long(child, "cpu.stat", "usage_usec");
if (usage_usec <= 0)
goto cleanup;
/*
* The following calculation applies only since
* the cpu hog is set to run as per wall-clock time
*/
n_periods = duration_usec / default_period_usec;
remainder_usec = duration_usec - n_periods * default_period_usec;
expected_usage_usec
= n_periods * quota_usec + MIN(remainder_usec, quota_usec);
if (!values_close(usage_usec, expected_usage_usec, 10))
goto cleanup;
ret = KSFT_PASS;
cleanup:
cg_destroy(child);
free(child);
cg_destroy(parent);
free(parent);
return ret;
}
#define T(x) { x, #x }
struct cpucg_test {
int (*fn)(const char *root);
const char *name;
} tests[] = {
T(test_cpucg_subtree_control),
T(test_cpucg_stats),
T(test_cpucg_nice),
T(test_cpucg_weight_overprovisioned),
T(test_cpucg_weight_underprovisioned),
T(test_cpucg_nested_weight_overprovisioned),
T(test_cpucg_nested_weight_underprovisioned),
T(test_cpucg_max),
T(test_cpucg_max_nested),
};
#undef T
int main(int argc, char *argv[])
{
char root[PATH_MAX];
int i, ret = EXIT_SUCCESS;
if (cg_find_unified_root(root, sizeof(root), NULL))
ksft_exit_skip("cgroup v2 isn't mounted\n");
if (cg_read_strstr(root, "cgroup.subtree_control", "cpu"))
if (cg_write(root, "cgroup.subtree_control", "+cpu"))
ksft_exit_skip("Failed to set cpu controller\n");
for (i = 0; i < ARRAY_SIZE(tests); i++) {
switch (tests[i].fn(root)) {
case KSFT_PASS:
ksft_test_result_pass("%s\n", tests[i].name);
break;
case KSFT_SKIP:
ksft_test_result_skip("%s\n", tests[i].name);
break;
default:
ret = EXIT_FAILURE;
ksft_test_result_fail("%s\n", tests[i].name);
break;
}
}
return ret;
}