linux/tools/testing/selftests/cgroup/test_memcontrol.c
Linus Torvalds 7f9039c524 Generic:
* Clean up locking of all vCPUs for a VM by using the *_nest_lock()
   family of functions, and move duplicated code to virt/kvm/.
   kernel/ patches acked by Peter Zijlstra.
 
 * Add MGLRU support to the access tracking perf test.
 
 ARM fixes:
 
 * Make the irqbypass hooks resilient to changes in the GSI<->MSI
   routing, avoiding behind stale vLPI mappings being left behind. The
   fix is to resolve the VGIC IRQ using the host IRQ (which is stable)
   and nuking the vLPI mapping upon a routing change.
 
 * Close another VGIC race where vCPU creation races with VGIC
   creation, leading to in-flight vCPUs entering the kernel w/o private
   IRQs allocated.
 
 * Fix a build issue triggered by the recently added workaround for
   Ampere's AC04_CPU_23 erratum.
 
 * Correctly sign-extend the VA when emulating a TLBI instruction
   potentially targeting a VNCR mapping.
 
 * Avoid dereferencing a NULL pointer in the VGIC debug code, which can
   happen if the device doesn't have any mapping yet.
 
 s390:
 
 * Fix interaction between some filesystems and Secure Execution
 
 * Some cleanups and refactorings, preparing for an upcoming big series
 
 x86:
 
 * Wait for target vCPU to acknowledge KVM_REQ_UPDATE_PROTECTED_GUEST_STATE to
   fix a race between AP destroy and VMRUN.
 
 * Decrypt and dump the VMSA in dump_vmcb() if debugging enabled for the VM.
 
 * Refine and harden handling of spurious faults.
 
 * Add support for ALLOWED_SEV_FEATURES.
 
 * Add #VMGEXIT to the set of handlers special cased for CONFIG_RETPOLINE=y.
 
 * Treat DEBUGCTL[5:2] as reserved to pave the way for virtualizing features
   that utilize those bits.
 
 * Don't account temporary allocations in sev_send_update_data().
 
 * Add support for KVM_CAP_X86_BUS_LOCK_EXIT on SVM, via Bus Lock Threshold.
 
 * Unify virtualization of IBRS on nested VM-Exit, and cross-vCPU IBPB, between
   SVM and VMX.
 
 * Advertise support to userspace for WRMSRNS and PREFETCHI.
 
 * Rescan I/O APIC routes after handling EOI that needed to be intercepted due
   to the old/previous routing, but not the new/current routing.
 
 * Add a module param to control and enumerate support for device posted
   interrupts.
 
 * Fix a potential overflow with nested virt on Intel systems running 32-bit kernels.
 
 * Flush shadow VMCSes on emergency reboot.
 
 * Add support for SNP to the various SEV selftests.
 
 * Add a selftest to verify fastops instructions via forced emulation.
 
 * Refine and optimize KVM's software processing of the posted interrupt bitmap, and share
   the harvesting code between KVM and the kernel's Posted MSI handler
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmg9TjwUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroOUxQf7B7nnWqIKd7jSkGzSD6YsSX9TXktr
 2tJIOfWM3zNYg5GRCidg+m4Y5+DqQWd3Hi5hH2P9wUw7RNuOjOFsDe+y0VBr8ysE
 ve39t/yp+mYalNmHVFl8s3dBDgrIeGKiz+Wgw3zCQIBZ18rJE1dREhv37RlYZ3a2
 wSvuObe8sVpCTyKIowDs1xUi7qJUBoopMSuqfleSHawRrcgCpV99U8/KNFF5plLH
 7fXOBAHHniVCVc+mqQN2wxtVJDhST+U3TaU4GwlKy9Yevr+iibdOXffveeIgNEU4
 D6q1F2zKp6UdV3+p8hxyaTTbiCVDqsp9WOgY/0I/f+CddYn0WVZgOlR+ow==
 =mYFL
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull more kvm updates from Paolo Bonzini:
  Generic:

   - Clean up locking of all vCPUs for a VM by using the *_nest_lock()
     family of functions, and move duplicated code to virt/kvm/. kernel/
     patches acked by Peter Zijlstra

   - Add MGLRU support to the access tracking perf test

  ARM fixes:

   - Make the irqbypass hooks resilient to changes in the GSI<->MSI
     routing, avoiding behind stale vLPI mappings being left behind. The
     fix is to resolve the VGIC IRQ using the host IRQ (which is stable)
     and nuking the vLPI mapping upon a routing change

   - Close another VGIC race where vCPU creation races with VGIC
     creation, leading to in-flight vCPUs entering the kernel w/o
     private IRQs allocated

   - Fix a build issue triggered by the recently added workaround for
     Ampere's AC04_CPU_23 erratum

   - Correctly sign-extend the VA when emulating a TLBI instruction
     potentially targeting a VNCR mapping

   - Avoid dereferencing a NULL pointer in the VGIC debug code, which
     can happen if the device doesn't have any mapping yet

  s390:

   - Fix interaction between some filesystems and Secure Execution

   - Some cleanups and refactorings, preparing for an upcoming big
     series

  x86:

   - Wait for target vCPU to ack KVM_REQ_UPDATE_PROTECTED_GUEST_STATE
     to fix a race between AP destroy and VMRUN

   - Decrypt and dump the VMSA in dump_vmcb() if debugging enabled for
     the VM

   - Refine and harden handling of spurious faults

   - Add support for ALLOWED_SEV_FEATURES

   - Add #VMGEXIT to the set of handlers special cased for
     CONFIG_RETPOLINE=y

   - Treat DEBUGCTL[5:2] as reserved to pave the way for virtualizing
     features that utilize those bits

   - Don't account temporary allocations in sev_send_update_data()

   - Add support for KVM_CAP_X86_BUS_LOCK_EXIT on SVM, via Bus Lock
     Threshold

   - Unify virtualization of IBRS on nested VM-Exit, and cross-vCPU
     IBPB, between SVM and VMX

   - Advertise support to userspace for WRMSRNS and PREFETCHI

   - Rescan I/O APIC routes after handling EOI that needed to be
     intercepted due to the old/previous routing, but not the
     new/current routing

   - Add a module param to control and enumerate support for device
     posted interrupts

   - Fix a potential overflow with nested virt on Intel systems running
     32-bit kernels

   - Flush shadow VMCSes on emergency reboot

   - Add support for SNP to the various SEV selftests

   - Add a selftest to verify fastops instructions via forced emulation

   - Refine and optimize KVM's software processing of the posted
     interrupt bitmap, and share the harvesting code between KVM and the
     kernel's Posted MSI handler"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (93 commits)
  rtmutex_api: provide correct extern functions
  KVM: arm64: vgic-debug: Avoid dereferencing NULL ITE pointer
  KVM: arm64: vgic-init: Plug vCPU vs. VGIC creation race
  KVM: arm64: Unmap vLPIs affected by changes to GSI routing information
  KVM: arm64: Resolve vLPI by host IRQ in vgic_v4_unset_forwarding()
  KVM: arm64: Protect vLPI translation with vgic_irq::irq_lock
  KVM: arm64: Use lock guard in vgic_v4_set_forwarding()
  KVM: arm64: Mask out non-VA bits from TLBI VA* on VNCR invalidation
  arm64: sysreg: Drag linux/kconfig.h to work around vdso build issue
  KVM: s390: Simplify and move pv code
  KVM: s390: Refactor and split some gmap helpers
  KVM: s390: Remove unneeded srcu lock
  s390: Remove unneeded includes
  s390/uv: Improve splitting of large folios that cannot be split while dirty
  s390/uv: Always return 0 from s390_wiggle_split_folio() if successful
  s390/uv: Don't return 0 from make_hva_secure() if the operation was not successful
  rust: add helper for mutex_trylock
  RISC-V: KVM: use kvm_trylock_all_vcpus when locking all vCPUs
  KVM: arm64: use kvm_trylock_all_vcpus when locking all vCPUs
  x86: KVM: SVM: use kvm_lock_all_vcpus instead of a custom implementation
  ...
2025-06-02 12:24:58 -07:00

1695 lines
36 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#define _GNU_SOURCE
#include <linux/limits.h>
#include <linux/oom.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/socket.h>
#include <sys/wait.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <netdb.h>
#include <errno.h>
#include <sys/mman.h>
#include "../kselftest.h"
#include "cgroup_util.h"
static bool has_localevents;
static bool has_recursiveprot;
int get_temp_fd(void)
{
return open(".", O_TMPFILE | O_RDWR | O_EXCL);
}
int alloc_pagecache(int fd, size_t size)
{
char buf[PAGE_SIZE];
struct stat st;
int i;
if (fstat(fd, &st))
goto cleanup;
size += st.st_size;
if (ftruncate(fd, size))
goto cleanup;
for (i = 0; i < size; i += sizeof(buf))
read(fd, buf, sizeof(buf));
return 0;
cleanup:
return -1;
}
int alloc_anon(const char *cgroup, void *arg)
{
size_t size = (unsigned long)arg;
char *buf, *ptr;
buf = malloc(size);
for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
*ptr = 0;
free(buf);
return 0;
}
int is_swap_enabled(void)
{
char buf[PAGE_SIZE];
const char delim[] = "\n";
int cnt = 0;
char *line;
if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
return -1;
for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
cnt++;
return cnt > 1;
}
int set_oom_adj_score(int pid, int score)
{
char path[PATH_MAX];
int fd, len;
sprintf(path, "/proc/%d/oom_score_adj", pid);
fd = open(path, O_WRONLY | O_APPEND);
if (fd < 0)
return fd;
len = dprintf(fd, "%d", score);
if (len < 0) {
close(fd);
return len;
}
close(fd);
return 0;
}
/*
* This test creates two nested cgroups with and without enabling
* the memory controller.
*/
static int test_memcg_subtree_control(const char *root)
{
char *parent, *child, *parent2 = NULL, *child2 = NULL;
int ret = KSFT_FAIL;
char buf[PAGE_SIZE];
/* Create two nested cgroups with the memory controller enabled */
parent = cg_name(root, "memcg_test_0");
child = cg_name(root, "memcg_test_0/memcg_test_1");
if (!parent || !child)
goto cleanup_free;
if (cg_create(parent))
goto cleanup_free;
if (cg_write(parent, "cgroup.subtree_control", "+memory"))
goto cleanup_parent;
if (cg_create(child))
goto cleanup_parent;
if (cg_read_strstr(child, "cgroup.controllers", "memory"))
goto cleanup_child;
/* Create two nested cgroups without enabling memory controller */
parent2 = cg_name(root, "memcg_test_1");
child2 = cg_name(root, "memcg_test_1/memcg_test_1");
if (!parent2 || !child2)
goto cleanup_free2;
if (cg_create(parent2))
goto cleanup_free2;
if (cg_create(child2))
goto cleanup_parent2;
if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
goto cleanup_all;
if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
goto cleanup_all;
ret = KSFT_PASS;
cleanup_all:
cg_destroy(child2);
cleanup_parent2:
cg_destroy(parent2);
cleanup_free2:
free(parent2);
free(child2);
cleanup_child:
cg_destroy(child);
cleanup_parent:
cg_destroy(parent);
cleanup_free:
free(parent);
free(child);
return ret;
}
static int alloc_anon_50M_check(const char *cgroup, void *arg)
{
size_t size = MB(50);
char *buf, *ptr;
long anon, current;
int ret = -1;
buf = malloc(size);
if (buf == NULL) {
fprintf(stderr, "malloc() failed\n");
return -1;
}
for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
*ptr = 0;
current = cg_read_long(cgroup, "memory.current");
if (current < size)
goto cleanup;
if (!values_close(size, current, 3))
goto cleanup;
anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
if (anon < 0)
goto cleanup;
if (!values_close(anon, current, 3))
goto cleanup;
ret = 0;
cleanup:
free(buf);
return ret;
}
static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
{
size_t size = MB(50);
int ret = -1;
long current, file;
int fd;
fd = get_temp_fd();
if (fd < 0)
return -1;
if (alloc_pagecache(fd, size))
goto cleanup;
current = cg_read_long(cgroup, "memory.current");
if (current < size)
goto cleanup;
file = cg_read_key_long(cgroup, "memory.stat", "file ");
if (file < 0)
goto cleanup;
if (!values_close(file, current, 10))
goto cleanup;
ret = 0;
cleanup:
close(fd);
return ret;
}
/*
* This test create a memory cgroup, allocates
* some anonymous memory and some pagecache
* and checks memory.current, memory.peak, and some memory.stat values.
*/
static int test_memcg_current_peak(const char *root)
{
int ret = KSFT_FAIL;
long current, peak, peak_reset;
char *memcg;
bool fd2_closed = false, fd3_closed = false, fd4_closed = false;
int peak_fd = -1, peak_fd2 = -1, peak_fd3 = -1, peak_fd4 = -1;
struct stat ss;
memcg = cg_name(root, "memcg_test");
if (!memcg)
goto cleanup;
if (cg_create(memcg))
goto cleanup;
current = cg_read_long(memcg, "memory.current");
if (current != 0)
goto cleanup;
peak = cg_read_long(memcg, "memory.peak");
if (peak != 0)
goto cleanup;
if (cg_run(memcg, alloc_anon_50M_check, NULL))
goto cleanup;
peak = cg_read_long(memcg, "memory.peak");
if (peak < MB(50))
goto cleanup;
/*
* We'll open a few FDs for the same memory.peak file to exercise the free-path
* We need at least three to be closed in a different order than writes occurred to test
* the linked-list handling.
*/
peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
if (peak_fd == -1) {
if (errno == ENOENT)
ret = KSFT_SKIP;
goto cleanup;
}
/*
* Before we try to use memory.peak's fd, try to figure out whether
* this kernel supports writing to that file in the first place. (by
* checking the writable bit on the file's st_mode)
*/
if (fstat(peak_fd, &ss))
goto cleanup;
if ((ss.st_mode & S_IWUSR) == 0) {
ret = KSFT_SKIP;
goto cleanup;
}
peak_fd2 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
if (peak_fd2 == -1)
goto cleanup;
peak_fd3 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
if (peak_fd3 == -1)
goto cleanup;
/* any non-empty string resets, but make it clear */
static const char reset_string[] = "reset\n";
peak_reset = write(peak_fd, reset_string, sizeof(reset_string));
if (peak_reset != sizeof(reset_string))
goto cleanup;
peak_reset = write(peak_fd2, reset_string, sizeof(reset_string));
if (peak_reset != sizeof(reset_string))
goto cleanup;
peak_reset = write(peak_fd3, reset_string, sizeof(reset_string));
if (peak_reset != sizeof(reset_string))
goto cleanup;
/* Make sure a completely independent read isn't affected by our FD-local reset above*/
peak = cg_read_long(memcg, "memory.peak");
if (peak < MB(50))
goto cleanup;
fd2_closed = true;
if (close(peak_fd2))
goto cleanup;
peak_fd4 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
if (peak_fd4 == -1)
goto cleanup;
peak_reset = write(peak_fd4, reset_string, sizeof(reset_string));
if (peak_reset != sizeof(reset_string))
goto cleanup;
peak = cg_read_long_fd(peak_fd);
if (peak > MB(30) || peak < 0)
goto cleanup;
if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
goto cleanup;
peak = cg_read_long(memcg, "memory.peak");
if (peak < MB(50))
goto cleanup;
/* Make sure everything is back to normal */
peak = cg_read_long_fd(peak_fd);
if (peak < MB(50))
goto cleanup;
peak = cg_read_long_fd(peak_fd4);
if (peak < MB(50))
goto cleanup;
fd3_closed = true;
if (close(peak_fd3))
goto cleanup;
fd4_closed = true;
if (close(peak_fd4))
goto cleanup;
ret = KSFT_PASS;
cleanup:
close(peak_fd);
if (!fd2_closed)
close(peak_fd2);
if (!fd3_closed)
close(peak_fd3);
if (!fd4_closed)
close(peak_fd4);
cg_destroy(memcg);
free(memcg);
return ret;
}
static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
{
int fd = (long)arg;
int ppid = getppid();
if (alloc_pagecache(fd, MB(50)))
return -1;
while (getppid() == ppid)
sleep(1);
return 0;
}
static int alloc_anon_noexit(const char *cgroup, void *arg)
{
int ppid = getppid();
size_t size = (unsigned long)arg;
char *buf, *ptr;
buf = malloc(size);
if (buf == NULL) {
fprintf(stderr, "malloc() failed\n");
return -1;
}
for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
*ptr = 0;
while (getppid() == ppid)
sleep(1);
free(buf);
return 0;
}
/*
* Wait until processes are killed asynchronously by the OOM killer
* If we exceed a timeout, fail.
*/
static int cg_test_proc_killed(const char *cgroup)
{
int limit;
for (limit = 10; limit > 0; limit--) {
if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0)
return 0;
usleep(100000);
}
return -1;
}
static bool reclaim_until(const char *memcg, long goal);
/*
* First, this test creates the following hierarchy:
* A memory.min = 0, memory.max = 200M
* A/B memory.min = 50M
* A/B/C memory.min = 75M, memory.current = 50M
* A/B/D memory.min = 25M, memory.current = 50M
* A/B/E memory.min = 0, memory.current = 50M
* A/B/F memory.min = 500M, memory.current = 0
*
* (or memory.low if we test soft protection)
*
* Usages are pagecache and the test keeps a running
* process in every leaf cgroup.
* Then it creates A/G and creates a significant
* memory pressure in A.
*
* Then it checks actual memory usages and expects that:
* A/B memory.current ~= 50M
* A/B/C memory.current ~= 29M [memory.events:low > 0]
* A/B/D memory.current ~= 21M [memory.events:low > 0]
* A/B/E memory.current ~= 0 [memory.events:low == 0 if !memory_recursiveprot,
* undefined otherwise]
* A/B/F memory.current = 0 [memory.events:low == 0]
* (for origin of the numbers, see model in memcg_protection.m.)
*
* After that it tries to allocate more than there is
* unprotected memory in A available, and checks that:
* a) memory.min protects pagecache even in this case,
* b) memory.low allows reclaiming page cache with low events.
*
* Then we try to reclaim from A/B/C using memory.reclaim until its
* usage reaches 10M.
* This makes sure that:
* (a) We ignore the protection of the reclaim target memcg.
* (b) The previously calculated emin value (~29M) should be dismissed.
*/
static int test_memcg_protection(const char *root, bool min)
{
int ret = KSFT_FAIL, rc;
char *parent[3] = {NULL};
char *children[4] = {NULL};
const char *attribute = min ? "memory.min" : "memory.low";
long c[4];
long current;
int i, attempts;
int fd;
fd = get_temp_fd();
if (fd < 0)
goto cleanup;
parent[0] = cg_name(root, "memcg_test_0");
if (!parent[0])
goto cleanup;
parent[1] = cg_name(parent[0], "memcg_test_1");
if (!parent[1])
goto cleanup;
parent[2] = cg_name(parent[0], "memcg_test_2");
if (!parent[2])
goto cleanup;
if (cg_create(parent[0]))
goto cleanup;
if (cg_read_long(parent[0], attribute)) {
/* No memory.min on older kernels is fine */
if (min)
ret = KSFT_SKIP;
goto cleanup;
}
if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
goto cleanup;
if (cg_write(parent[0], "memory.max", "200M"))
goto cleanup;
if (cg_write(parent[0], "memory.swap.max", "0"))
goto cleanup;
if (cg_create(parent[1]))
goto cleanup;
if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
goto cleanup;
if (cg_create(parent[2]))
goto cleanup;
for (i = 0; i < ARRAY_SIZE(children); i++) {
children[i] = cg_name_indexed(parent[1], "child_memcg", i);
if (!children[i])
goto cleanup;
if (cg_create(children[i]))
goto cleanup;
if (i > 2)
continue;
cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
(void *)(long)fd);
}
if (cg_write(parent[1], attribute, "50M"))
goto cleanup;
if (cg_write(children[0], attribute, "75M"))
goto cleanup;
if (cg_write(children[1], attribute, "25M"))
goto cleanup;
if (cg_write(children[2], attribute, "0"))
goto cleanup;
if (cg_write(children[3], attribute, "500M"))
goto cleanup;
attempts = 0;
while (!values_close(cg_read_long(parent[1], "memory.current"),
MB(150), 3)) {
if (attempts++ > 5)
break;
sleep(1);
}
if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
goto cleanup;
if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
goto cleanup;
for (i = 0; i < ARRAY_SIZE(children); i++)
c[i] = cg_read_long(children[i], "memory.current");
if (!values_close(c[0], MB(29), 15))
goto cleanup;
if (!values_close(c[1], MB(21), 20))
goto cleanup;
if (c[3] != 0)
goto cleanup;
rc = cg_run(parent[2], alloc_anon, (void *)MB(170));
if (min && !rc)
goto cleanup;
else if (!min && rc) {
fprintf(stderr,
"memory.low prevents from allocating anon memory\n");
goto cleanup;
}
current = min ? MB(50) : MB(30);
if (!values_close(cg_read_long(parent[1], "memory.current"), current, 3))
goto cleanup;
if (!reclaim_until(children[0], MB(10)))
goto cleanup;
if (min) {
ret = KSFT_PASS;
goto cleanup;
}
/*
* Child 2 has memory.low=0, but some low protection may still be
* distributed down from its parent with memory.low=50M if cgroup2
* memory_recursiveprot mount option is enabled. Ignore the low
* event count in this case.
*/
for (i = 0; i < ARRAY_SIZE(children); i++) {
int ignore_low_events_index = has_recursiveprot ? 2 : -1;
int no_low_events_index = 1;
long low, oom;
oom = cg_read_key_long(children[i], "memory.events", "oom ");
low = cg_read_key_long(children[i], "memory.events", "low ");
if (oom)
goto cleanup;
if (i == ignore_low_events_index)
continue;
if (i <= no_low_events_index && low <= 0)
goto cleanup;
if (i > no_low_events_index && low)
goto cleanup;
}
ret = KSFT_PASS;
cleanup:
for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
if (!children[i])
continue;
cg_destroy(children[i]);
free(children[i]);
}
for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
if (!parent[i])
continue;
cg_destroy(parent[i]);
free(parent[i]);
}
close(fd);
return ret;
}
static int test_memcg_min(const char *root)
{
return test_memcg_protection(root, true);
}
static int test_memcg_low(const char *root)
{
return test_memcg_protection(root, false);
}
static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
{
size_t size = MB(50);
int ret = -1;
long current, high, max;
int fd;
high = cg_read_long(cgroup, "memory.high");
max = cg_read_long(cgroup, "memory.max");
if (high != MB(30) && max != MB(30))
return -1;
fd = get_temp_fd();
if (fd < 0)
return -1;
if (alloc_pagecache(fd, size))
goto cleanup;
current = cg_read_long(cgroup, "memory.current");
if (!values_close(current, MB(30), 5))
goto cleanup;
ret = 0;
cleanup:
close(fd);
return ret;
}
/*
* This test checks that memory.high limits the amount of
* memory which can be consumed by either anonymous memory
* or pagecache.
*/
static int test_memcg_high(const char *root)
{
int ret = KSFT_FAIL;
char *memcg;
long high;
memcg = cg_name(root, "memcg_test");
if (!memcg)
goto cleanup;
if (cg_create(memcg))
goto cleanup;
if (cg_read_strcmp(memcg, "memory.high", "max\n"))
goto cleanup;
if (cg_write(memcg, "memory.swap.max", "0"))
goto cleanup;
if (cg_write(memcg, "memory.high", "30M"))
goto cleanup;
if (cg_run(memcg, alloc_anon, (void *)MB(31)))
goto cleanup;
if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
goto cleanup;
if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
goto cleanup;
high = cg_read_key_long(memcg, "memory.events", "high ");
if (high <= 0)
goto cleanup;
ret = KSFT_PASS;
cleanup:
cg_destroy(memcg);
free(memcg);
return ret;
}
static int alloc_anon_mlock(const char *cgroup, void *arg)
{
size_t size = (size_t)arg;
void *buf;
buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
0, 0);
if (buf == MAP_FAILED)
return -1;
mlock(buf, size);
munmap(buf, size);
return 0;
}
/*
* This test checks that memory.high is able to throttle big single shot
* allocation i.e. large allocation within one kernel entry.
*/
static int test_memcg_high_sync(const char *root)
{
int ret = KSFT_FAIL, pid, fd = -1;
char *memcg;
long pre_high, pre_max;
long post_high, post_max;
memcg = cg_name(root, "memcg_test");
if (!memcg)
goto cleanup;
if (cg_create(memcg))
goto cleanup;
pre_high = cg_read_key_long(memcg, "memory.events", "high ");
pre_max = cg_read_key_long(memcg, "memory.events", "max ");
if (pre_high < 0 || pre_max < 0)
goto cleanup;
if (cg_write(memcg, "memory.swap.max", "0"))
goto cleanup;
if (cg_write(memcg, "memory.high", "30M"))
goto cleanup;
if (cg_write(memcg, "memory.max", "140M"))
goto cleanup;
fd = memcg_prepare_for_wait(memcg);
if (fd < 0)
goto cleanup;
pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200));
if (pid < 0)
goto cleanup;
cg_wait_for(fd);
post_high = cg_read_key_long(memcg, "memory.events", "high ");
post_max = cg_read_key_long(memcg, "memory.events", "max ");
if (post_high < 0 || post_max < 0)
goto cleanup;
if (pre_high == post_high || pre_max != post_max)
goto cleanup;
ret = KSFT_PASS;
cleanup:
if (fd >= 0)
close(fd);
cg_destroy(memcg);
free(memcg);
return ret;
}
/*
* This test checks that memory.max limits the amount of
* memory which can be consumed by either anonymous memory
* or pagecache.
*/
static int test_memcg_max(const char *root)
{
int ret = KSFT_FAIL;
char *memcg;
long current, max;
memcg = cg_name(root, "memcg_test");
if (!memcg)
goto cleanup;
if (cg_create(memcg))
goto cleanup;
if (cg_read_strcmp(memcg, "memory.max", "max\n"))
goto cleanup;
if (cg_write(memcg, "memory.swap.max", "0"))
goto cleanup;
if (cg_write(memcg, "memory.max", "30M"))
goto cleanup;
/* Should be killed by OOM killer */
if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
goto cleanup;
if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
goto cleanup;
current = cg_read_long(memcg, "memory.current");
if (current > MB(30) || !current)
goto cleanup;
max = cg_read_key_long(memcg, "memory.events", "max ");
if (max <= 0)
goto cleanup;
ret = KSFT_PASS;
cleanup:
cg_destroy(memcg);
free(memcg);
return ret;
}
/*
* Reclaim from @memcg until usage reaches @goal by writing to
* memory.reclaim.
*
* This function will return false if the usage is already below the
* goal.
*
* This function assumes that writing to memory.reclaim is the only
* source of change in memory.current (no concurrent allocations or
* reclaim).
*
* This function makes sure memory.reclaim is sane. It will return
* false if memory.reclaim's error codes do not make sense, even if
* the usage goal was satisfied.
*/
static bool reclaim_until(const char *memcg, long goal)
{
char buf[64];
int retries, err;
long current, to_reclaim;
bool reclaimed = false;
for (retries = 5; retries > 0; retries--) {
current = cg_read_long(memcg, "memory.current");
if (current < goal || values_close(current, goal, 3))
break;
/* Did memory.reclaim return 0 incorrectly? */
else if (reclaimed)
return false;
to_reclaim = current - goal;
snprintf(buf, sizeof(buf), "%ld", to_reclaim);
err = cg_write(memcg, "memory.reclaim", buf);
if (!err)
reclaimed = true;
else if (err != -EAGAIN)
return false;
}
return reclaimed;
}
/*
* This test checks that memory.reclaim reclaims the given
* amount of memory (from both anon and file, if possible).
*/
static int test_memcg_reclaim(const char *root)
{
int ret = KSFT_FAIL;
int fd = -1;
int retries;
char *memcg;
long current, expected_usage;
memcg = cg_name(root, "memcg_test");
if (!memcg)
goto cleanup;
if (cg_create(memcg))
goto cleanup;
current = cg_read_long(memcg, "memory.current");
if (current != 0)
goto cleanup;
fd = get_temp_fd();
if (fd < 0)
goto cleanup;
cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd);
/*
* If swap is enabled, try to reclaim from both anon and file, else try
* to reclaim from file only.
*/
if (is_swap_enabled()) {
cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50));
expected_usage = MB(100);
} else
expected_usage = MB(50);
/*
* Wait until current usage reaches the expected usage (or we run out of
* retries).
*/
retries = 5;
while (!values_close(cg_read_long(memcg, "memory.current"),
expected_usage, 10)) {
if (retries--) {
sleep(1);
continue;
} else {
fprintf(stderr,
"failed to allocate %ld for memcg reclaim test\n",
expected_usage);
goto cleanup;
}
}
/*
* Reclaim until current reaches 30M, this makes sure we hit both anon
* and file if swap is enabled.
*/
if (!reclaim_until(memcg, MB(30)))
goto cleanup;
ret = KSFT_PASS;
cleanup:
cg_destroy(memcg);
free(memcg);
close(fd);
return ret;
}
static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
{
long mem_max = (long)arg;
size_t size = MB(50);
char *buf, *ptr;
long mem_current, swap_current;
int ret = -1;
buf = malloc(size);
if (buf == NULL) {
fprintf(stderr, "malloc() failed\n");
return -1;
}
for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
*ptr = 0;
mem_current = cg_read_long(cgroup, "memory.current");
if (!mem_current || !values_close(mem_current, mem_max, 3))
goto cleanup;
swap_current = cg_read_long(cgroup, "memory.swap.current");
if (!swap_current ||
!values_close(mem_current + swap_current, size, 3))
goto cleanup;
ret = 0;
cleanup:
free(buf);
return ret;
}
/*
* This test checks that memory.swap.max limits the amount of
* anonymous memory which can be swapped out. Additionally, it verifies that
* memory.swap.peak reflects the high watermark and can be reset.
*/
static int test_memcg_swap_max_peak(const char *root)
{
int ret = KSFT_FAIL;
char *memcg;
long max, peak;
struct stat ss;
int swap_peak_fd = -1, mem_peak_fd = -1;
/* any non-empty string resets */
static const char reset_string[] = "foobarbaz";
if (!is_swap_enabled())
return KSFT_SKIP;
memcg = cg_name(root, "memcg_test");
if (!memcg)
goto cleanup;
if (cg_create(memcg))
goto cleanup;
if (cg_read_long(memcg, "memory.swap.current")) {
ret = KSFT_SKIP;
goto cleanup;
}
swap_peak_fd = cg_open(memcg, "memory.swap.peak",
O_RDWR | O_APPEND | O_CLOEXEC);
if (swap_peak_fd == -1) {
if (errno == ENOENT)
ret = KSFT_SKIP;
goto cleanup;
}
/*
* Before we try to use memory.swap.peak's fd, try to figure out
* whether this kernel supports writing to that file in the first
* place. (by checking the writable bit on the file's st_mode)
*/
if (fstat(swap_peak_fd, &ss))
goto cleanup;
if ((ss.st_mode & S_IWUSR) == 0) {
ret = KSFT_SKIP;
goto cleanup;
}
mem_peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC);
if (mem_peak_fd == -1)
goto cleanup;
if (cg_read_long(memcg, "memory.swap.peak"))
goto cleanup;
if (cg_read_long_fd(swap_peak_fd))
goto cleanup;
/* switch the swap and mem fds into local-peak tracking mode*/
int peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string));
if (peak_reset != sizeof(reset_string))
goto cleanup;
if (cg_read_long_fd(swap_peak_fd))
goto cleanup;
if (cg_read_long(memcg, "memory.peak"))
goto cleanup;
if (cg_read_long_fd(mem_peak_fd))
goto cleanup;
peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string));
if (peak_reset != sizeof(reset_string))
goto cleanup;
if (cg_read_long_fd(mem_peak_fd))
goto cleanup;
if (cg_read_strcmp(memcg, "memory.max", "max\n"))
goto cleanup;
if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
goto cleanup;
if (cg_write(memcg, "memory.swap.max", "30M"))
goto cleanup;
if (cg_write(memcg, "memory.max", "30M"))
goto cleanup;
/* Should be killed by OOM killer */
if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
goto cleanup;
if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
goto cleanup;
if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
goto cleanup;
peak = cg_read_long(memcg, "memory.peak");
if (peak < MB(29))
goto cleanup;
peak = cg_read_long(memcg, "memory.swap.peak");
if (peak < MB(29))
goto cleanup;
peak = cg_read_long_fd(mem_peak_fd);
if (peak < MB(29))
goto cleanup;
peak = cg_read_long_fd(swap_peak_fd);
if (peak < MB(29))
goto cleanup;
/*
* open, reset and close the peak swap on another FD to make sure
* multiple extant fds don't corrupt the linked-list
*/
peak_reset = cg_write(memcg, "memory.swap.peak", (char *)reset_string);
if (peak_reset)
goto cleanup;
peak_reset = cg_write(memcg, "memory.peak", (char *)reset_string);
if (peak_reset)
goto cleanup;
/* actually reset on the fds */
peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string));
if (peak_reset != sizeof(reset_string))
goto cleanup;
peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string));
if (peak_reset != sizeof(reset_string))
goto cleanup;
peak = cg_read_long_fd(swap_peak_fd);
if (peak > MB(10))
goto cleanup;
/*
* The cgroup is now empty, but there may be a page or two associated
* with the open FD accounted to it.
*/
peak = cg_read_long_fd(mem_peak_fd);
if (peak > MB(1))
goto cleanup;
if (cg_read_long(memcg, "memory.peak") < MB(29))
goto cleanup;
if (cg_read_long(memcg, "memory.swap.peak") < MB(29))
goto cleanup;
if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
goto cleanup;
max = cg_read_key_long(memcg, "memory.events", "max ");
if (max <= 0)
goto cleanup;
peak = cg_read_long(memcg, "memory.peak");
if (peak < MB(29))
goto cleanup;
peak = cg_read_long(memcg, "memory.swap.peak");
if (peak < MB(29))
goto cleanup;
peak = cg_read_long_fd(mem_peak_fd);
if (peak < MB(29))
goto cleanup;
peak = cg_read_long_fd(swap_peak_fd);
if (peak < MB(19))
goto cleanup;
ret = KSFT_PASS;
cleanup:
if (mem_peak_fd != -1 && close(mem_peak_fd))
ret = KSFT_FAIL;
if (swap_peak_fd != -1 && close(swap_peak_fd))
ret = KSFT_FAIL;
cg_destroy(memcg);
free(memcg);
return ret;
}
/*
* This test disables swapping and tries to allocate anonymous memory
* up to OOM. Then it checks for oom and oom_kill events in
* memory.events.
*/
static int test_memcg_oom_events(const char *root)
{
int ret = KSFT_FAIL;
char *memcg;
memcg = cg_name(root, "memcg_test");
if (!memcg)
goto cleanup;
if (cg_create(memcg))
goto cleanup;
if (cg_write(memcg, "memory.max", "30M"))
goto cleanup;
if (cg_write(memcg, "memory.swap.max", "0"))
goto cleanup;
if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
goto cleanup;
if (cg_read_strcmp(memcg, "cgroup.procs", ""))
goto cleanup;
if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
goto cleanup;
if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
goto cleanup;
ret = KSFT_PASS;
cleanup:
cg_destroy(memcg);
free(memcg);
return ret;
}
struct tcp_server_args {
unsigned short port;
int ctl[2];
};
static int tcp_server(const char *cgroup, void *arg)
{
struct tcp_server_args *srv_args = arg;
struct sockaddr_in6 saddr = { 0 };
socklen_t slen = sizeof(saddr);
int sk, client_sk, ctl_fd, yes = 1, ret = -1;
close(srv_args->ctl[0]);
ctl_fd = srv_args->ctl[1];
saddr.sin6_family = AF_INET6;
saddr.sin6_addr = in6addr_any;
saddr.sin6_port = htons(srv_args->port);
sk = socket(AF_INET6, SOCK_STREAM, 0);
if (sk < 0)
return ret;
if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
goto cleanup;
if (bind(sk, (struct sockaddr *)&saddr, slen)) {
write(ctl_fd, &errno, sizeof(errno));
goto cleanup;
}
if (listen(sk, 1))
goto cleanup;
ret = 0;
if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
ret = -1;
goto cleanup;
}
client_sk = accept(sk, NULL, NULL);
if (client_sk < 0)
goto cleanup;
ret = -1;
for (;;) {
uint8_t buf[0x100000];
if (write(client_sk, buf, sizeof(buf)) <= 0) {
if (errno == ECONNRESET)
ret = 0;
break;
}
}
close(client_sk);
cleanup:
close(sk);
return ret;
}
static int tcp_client(const char *cgroup, unsigned short port)
{
const char server[] = "localhost";
struct addrinfo *ai;
char servport[6];
int retries = 0x10; /* nice round number */
int sk, ret;
long allocated;
allocated = cg_read_long(cgroup, "memory.current");
snprintf(servport, sizeof(servport), "%hd", port);
ret = getaddrinfo(server, servport, NULL, &ai);
if (ret)
return ret;
sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
if (sk < 0)
goto free_ainfo;
ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
if (ret < 0)
goto close_sk;
ret = KSFT_FAIL;
while (retries--) {
uint8_t buf[0x100000];
long current, sock;
if (read(sk, buf, sizeof(buf)) <= 0)
goto close_sk;
current = cg_read_long(cgroup, "memory.current");
sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
if (current < 0 || sock < 0)
goto close_sk;
/* exclude the memory not related to socket connection */
if (values_close(current - allocated, sock, 10)) {
ret = KSFT_PASS;
break;
}
}
close_sk:
close(sk);
free_ainfo:
freeaddrinfo(ai);
return ret;
}
/*
* This test checks socket memory accounting.
* The test forks a TCP server listens on a random port between 1000
* and 61000. Once it gets a client connection, it starts writing to
* its socket.
* The TCP client interleaves reads from the socket with check whether
* memory.current and memory.stat.sock are similar.
*/
static int test_memcg_sock(const char *root)
{
int bind_retries = 5, ret = KSFT_FAIL, pid, err;
unsigned short port;
char *memcg;
memcg = cg_name(root, "memcg_test");
if (!memcg)
goto cleanup;
if (cg_create(memcg))
goto cleanup;
while (bind_retries--) {
struct tcp_server_args args;
if (pipe(args.ctl))
goto cleanup;
port = args.port = 1000 + rand() % 60000;
pid = cg_run_nowait(memcg, tcp_server, &args);
if (pid < 0)
goto cleanup;
close(args.ctl[1]);
if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
goto cleanup;
close(args.ctl[0]);
if (!err)
break;
if (err != EADDRINUSE)
goto cleanup;
waitpid(pid, NULL, 0);
}
if (err == EADDRINUSE) {
ret = KSFT_SKIP;
goto cleanup;
}
if (tcp_client(memcg, port) != KSFT_PASS)
goto cleanup;
waitpid(pid, &err, 0);
if (WEXITSTATUS(err))
goto cleanup;
if (cg_read_long(memcg, "memory.current") < 0)
goto cleanup;
if (cg_read_key_long(memcg, "memory.stat", "sock "))
goto cleanup;
ret = KSFT_PASS;
cleanup:
cg_destroy(memcg);
free(memcg);
return ret;
}
/*
* This test disables swapping and tries to allocate anonymous memory
* up to OOM with memory.group.oom set. Then it checks that all
* processes in the leaf were killed. It also checks that oom_events
* were propagated to the parent level.
*/
static int test_memcg_oom_group_leaf_events(const char *root)
{
int ret = KSFT_FAIL;
char *parent, *child;
long parent_oom_events;
parent = cg_name(root, "memcg_test_0");
child = cg_name(root, "memcg_test_0/memcg_test_1");
if (!parent || !child)
goto cleanup;
if (cg_create(parent))
goto cleanup;
if (cg_create(child))
goto cleanup;
if (cg_write(parent, "cgroup.subtree_control", "+memory"))
goto cleanup;
if (cg_write(child, "memory.max", "50M"))
goto cleanup;
if (cg_write(child, "memory.swap.max", "0"))
goto cleanup;
if (cg_write(child, "memory.oom.group", "1"))
goto cleanup;
cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
if (!cg_run(child, alloc_anon, (void *)MB(100)))
goto cleanup;
if (cg_test_proc_killed(child))
goto cleanup;
if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
goto cleanup;
parent_oom_events = cg_read_key_long(
parent, "memory.events", "oom_kill ");
/*
* If memory_localevents is not enabled (the default), the parent should
* count OOM events in its children groups. Otherwise, it should not
* have observed any events.
*/
if (has_localevents && parent_oom_events != 0)
goto cleanup;
else if (!has_localevents && parent_oom_events <= 0)
goto cleanup;
ret = KSFT_PASS;
cleanup:
if (child)
cg_destroy(child);
if (parent)
cg_destroy(parent);
free(child);
free(parent);
return ret;
}
/*
* This test disables swapping and tries to allocate anonymous memory
* up to OOM with memory.group.oom set. Then it checks that all
* processes in the parent and leaf were killed.
*/
static int test_memcg_oom_group_parent_events(const char *root)
{
int ret = KSFT_FAIL;
char *parent, *child;
parent = cg_name(root, "memcg_test_0");
child = cg_name(root, "memcg_test_0/memcg_test_1");
if (!parent || !child)
goto cleanup;
if (cg_create(parent))
goto cleanup;
if (cg_create(child))
goto cleanup;
if (cg_write(parent, "memory.max", "80M"))
goto cleanup;
if (cg_write(parent, "memory.swap.max", "0"))
goto cleanup;
if (cg_write(parent, "memory.oom.group", "1"))
goto cleanup;
cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
if (!cg_run(child, alloc_anon, (void *)MB(100)))
goto cleanup;
if (cg_test_proc_killed(child))
goto cleanup;
if (cg_test_proc_killed(parent))
goto cleanup;
ret = KSFT_PASS;
cleanup:
if (child)
cg_destroy(child);
if (parent)
cg_destroy(parent);
free(child);
free(parent);
return ret;
}
/*
* This test disables swapping and tries to allocate anonymous memory
* up to OOM with memory.group.oom set. Then it checks that all
* processes were killed except those set with OOM_SCORE_ADJ_MIN
*/
static int test_memcg_oom_group_score_events(const char *root)
{
int ret = KSFT_FAIL;
char *memcg;
int safe_pid;
memcg = cg_name(root, "memcg_test_0");
if (!memcg)
goto cleanup;
if (cg_create(memcg))
goto cleanup;
if (cg_write(memcg, "memory.max", "50M"))
goto cleanup;
if (cg_write(memcg, "memory.swap.max", "0"))
goto cleanup;
if (cg_write(memcg, "memory.oom.group", "1"))
goto cleanup;
safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN))
goto cleanup;
cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
goto cleanup;
if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
goto cleanup;
if (kill(safe_pid, SIGKILL))
goto cleanup;
ret = KSFT_PASS;
cleanup:
if (memcg)
cg_destroy(memcg);
free(memcg);
return ret;
}
#define T(x) { x, #x }
struct memcg_test {
int (*fn)(const char *root);
const char *name;
} tests[] = {
T(test_memcg_subtree_control),
T(test_memcg_current_peak),
T(test_memcg_min),
T(test_memcg_low),
T(test_memcg_high),
T(test_memcg_high_sync),
T(test_memcg_max),
T(test_memcg_reclaim),
T(test_memcg_oom_events),
T(test_memcg_swap_max_peak),
T(test_memcg_sock),
T(test_memcg_oom_group_leaf_events),
T(test_memcg_oom_group_parent_events),
T(test_memcg_oom_group_score_events),
};
#undef T
int main(int argc, char **argv)
{
char root[PATH_MAX];
int i, proc_status, ret = EXIT_SUCCESS;
if (cg_find_unified_root(root, sizeof(root), NULL))
ksft_exit_skip("cgroup v2 isn't mounted\n");
/*
* Check that memory controller is available:
* memory is listed in cgroup.controllers
*/
if (cg_read_strstr(root, "cgroup.controllers", "memory"))
ksft_exit_skip("memory controller isn't available\n");
if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
if (cg_write(root, "cgroup.subtree_control", "+memory"))
ksft_exit_skip("Failed to set memory controller\n");
proc_status = proc_mount_contains("memory_recursiveprot");
if (proc_status < 0)
ksft_exit_skip("Failed to query cgroup mount option\n");
has_recursiveprot = proc_status;
proc_status = proc_mount_contains("memory_localevents");
if (proc_status < 0)
ksft_exit_skip("Failed to query cgroup mount option\n");
has_localevents = proc_status;
for (i = 0; i < ARRAY_SIZE(tests); i++) {
switch (tests[i].fn(root)) {
case KSFT_PASS:
ksft_test_result_pass("%s\n", tests[i].name);
break;
case KSFT_SKIP:
ksft_test_result_skip("%s\n", tests[i].name);
break;
default:
ret = EXIT_FAILURE;
ksft_test_result_fail("%s\n", tests[i].name);
break;
}
}
return ret;
}