linux/tools/testing/selftests/bpf/progs/task_kfunc_success.c

317 lines
6.4 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
#include <vmlinux.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_helpers.h>
#include "task_kfunc_common.h"
char _license[] SEC("license") = "GPL";
int err, pid;
/* Prototype for all of the program trace events below:
*
* TRACE_EVENT(task_newtask,
* TP_PROTO(struct task_struct *p, u64 clone_flags)
*/
struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym __weak;
struct task_struct *bpf_task_acquire___one(struct task_struct *task) __ksym __weak;
/* The two-param bpf_task_acquire doesn't exist */
struct task_struct *bpf_task_acquire___two(struct task_struct *p, void *ctx) __ksym __weak;
/* Incorrect type for first param */
struct task_struct *bpf_task_acquire___three(void *ctx) __ksym __weak;
void invalid_kfunc(void) __ksym __weak;
void bpf_testmod_test_mod_kfunc(int i) __ksym __weak;
static bool is_test_kfunc_task(void)
{
int cur_pid = bpf_get_current_pid_tgid() >> 32;
return pid == cur_pid;
}
static int test_acquire_release(struct task_struct *task)
{
struct task_struct *acquired = NULL;
if (!bpf_ksym_exists(bpf_task_acquire)) {
err = 3;
return 0;
}
if (!bpf_ksym_exists(bpf_testmod_test_mod_kfunc)) {
err = 4;
return 0;
}
if (bpf_ksym_exists(invalid_kfunc)) {
/* the verifier's dead code elimination should remove this */
err = 5;
asm volatile ("goto -1"); /* for (;;); */
}
acquired = bpf_task_acquire(task);
bpf: Make struct task_struct an RCU-safe type struct task_struct objects are a bit interesting in terms of how their lifetime is protected by refcounts. task structs have two refcount fields: 1. refcount_t usage: Protects the memory backing the task struct. When this refcount drops to 0, the task is immediately freed, without waiting for an RCU grace period to elapse. This is the field that most callers in the kernel currently use to ensure that a task remains valid while it's being referenced, and is what's currently tracked with bpf_task_acquire() and bpf_task_release(). 2. refcount_t rcu_users: A refcount field which, when it drops to 0, schedules an RCU callback that drops a reference held on the 'usage' field above (which is acquired when the task is first created). This field therefore provides a form of RCU protection on the task by ensuring that at least one 'usage' refcount will be held until an RCU grace period has elapsed. The qualifier "a form of" is important here, as a task can remain valid after task->rcu_users has dropped to 0 and the subsequent RCU gp has elapsed. In terms of BPF, we want to use task->rcu_users to protect tasks that function as referenced kptrs, and to allow tasks stored as referenced kptrs in maps to be accessed with RCU protection. Let's first determine whether we can safely use task->rcu_users to protect tasks stored in maps. All of the bpf_task* kfuncs can only be called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program types. For tracepoint and struct_ops programs, the struct task_struct passed to a program handler will always be trusted, so it will always be safe to call bpf_task_acquire() with any task passed to a program. Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL, as it is possible that the task has exited by the time the program is invoked, even if the pointer is still currently valid because the main kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks should never be passed as an argument to the any program handlers, so it should not be relevant. The second question is whether it's safe to use RCU to access a task that was acquired with bpf_task_acquire(), and stored in a map. Because bpf_task_acquire() now uses task->rcu_users, it follows that if the task is present in the map, that it must have had at least one task->rcu_users refcount by the time the current RCU cs was started. Therefore, it's safe to access that task until the end of the current RCU cs. With all that said, this patch makes struct task_struct is an RCU-protected object. In doing so, we also change bpf_task_acquire() to be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as necessary. A subsequent patch will remove bpf_task_kptr_get(), and bpf_task_acquire_not_zero() respectively. Signed-off-by: David Vernet <void@manifault.com> Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-31 14:57:31 -05:00
if (acquired)
bpf_task_release(acquired);
else
err = 6;
return 0;
}
SEC("tp_btf/task_newtask")
int BPF_PROG(test_task_kfunc_flavor_relo, struct task_struct *task, u64 clone_flags)
{
struct task_struct *acquired = NULL;
int fake_ctx = 42;
if (bpf_ksym_exists(bpf_task_acquire___one)) {
acquired = bpf_task_acquire___one(task);
} else if (bpf_ksym_exists(bpf_task_acquire___two)) {
/* Here, bpf_object__resolve_ksym_func_btf_id's find_ksym_btf_id
* call will find vmlinux's bpf_task_acquire, but subsequent
* bpf_core_types_are_compat will fail
*/
acquired = bpf_task_acquire___two(task, &fake_ctx);
err = 3;
return 0;
} else if (bpf_ksym_exists(bpf_task_acquire___three)) {
/* bpf_core_types_are_compat will fail similarly to above case */
acquired = bpf_task_acquire___three(&fake_ctx);
err = 4;
return 0;
}
if (acquired)
bpf_task_release(acquired);
else
err = 5;
return 0;
}
SEC("tp_btf/task_newtask")
int BPF_PROG(test_task_kfunc_flavor_relo_not_found, struct task_struct *task, u64 clone_flags)
{
/* Neither symbol should successfully resolve.
* Success or failure of one ___flavor should not affect others
*/
if (bpf_ksym_exists(bpf_task_acquire___two))
err = 1;
else if (bpf_ksym_exists(bpf_task_acquire___three))
err = 2;
return 0;
}
SEC("tp_btf/task_newtask")
int BPF_PROG(test_task_acquire_release_argument, struct task_struct *task, u64 clone_flags)
{
if (!is_test_kfunc_task())
return 0;
return test_acquire_release(task);
}
SEC("tp_btf/task_newtask")
int BPF_PROG(test_task_acquire_release_current, struct task_struct *task, u64 clone_flags)
{
if (!is_test_kfunc_task())
return 0;
return test_acquire_release(bpf_get_current_task_btf());
}
SEC("tp_btf/task_newtask")
int BPF_PROG(test_task_acquire_leave_in_map, struct task_struct *task, u64 clone_flags)
{
long status;
if (!is_test_kfunc_task())
return 0;
status = tasks_kfunc_map_insert(task);
if (status)
err = 1;
return 0;
}
SEC("tp_btf/task_newtask")
int BPF_PROG(test_task_xchg_release, struct task_struct *task, u64 clone_flags)
{
struct task_struct *kptr;
struct __tasks_kfunc_map_value *v;
long status;
if (!is_test_kfunc_task())
return 0;
status = tasks_kfunc_map_insert(task);
if (status) {
err = 1;
return 0;
}
v = tasks_kfunc_map_value_lookup(task);
if (!v) {
err = 2;
return 0;
}
kptr = bpf_kptr_xchg(&v->task, NULL);
if (!kptr) {
err = 3;
return 0;
}
bpf_task_release(kptr);
return 0;
}
SEC("tp_btf/task_newtask")
int BPF_PROG(test_task_map_acquire_release, struct task_struct *task, u64 clone_flags)
{
struct task_struct *kptr;
struct __tasks_kfunc_map_value *v;
long status;
if (!is_test_kfunc_task())
return 0;
status = tasks_kfunc_map_insert(task);
if (status) {
err = 1;
return 0;
}
v = tasks_kfunc_map_value_lookup(task);
if (!v) {
err = 2;
return 0;
}
bpf_rcu_read_lock();
kptr = v->task;
if (!kptr) {
err = 3;
} else {
kptr = bpf_task_acquire(kptr);
if (!kptr)
err = 4;
else
bpf_task_release(kptr);
}
bpf_rcu_read_unlock();
return 0;
}
SEC("tp_btf/task_newtask")
int BPF_PROG(test_task_current_acquire_release, struct task_struct *task, u64 clone_flags)
{
struct task_struct *current, *acquired;
if (!is_test_kfunc_task())
return 0;
current = bpf_get_current_task_btf();
acquired = bpf_task_acquire(current);
bpf: Make struct task_struct an RCU-safe type struct task_struct objects are a bit interesting in terms of how their lifetime is protected by refcounts. task structs have two refcount fields: 1. refcount_t usage: Protects the memory backing the task struct. When this refcount drops to 0, the task is immediately freed, without waiting for an RCU grace period to elapse. This is the field that most callers in the kernel currently use to ensure that a task remains valid while it's being referenced, and is what's currently tracked with bpf_task_acquire() and bpf_task_release(). 2. refcount_t rcu_users: A refcount field which, when it drops to 0, schedules an RCU callback that drops a reference held on the 'usage' field above (which is acquired when the task is first created). This field therefore provides a form of RCU protection on the task by ensuring that at least one 'usage' refcount will be held until an RCU grace period has elapsed. The qualifier "a form of" is important here, as a task can remain valid after task->rcu_users has dropped to 0 and the subsequent RCU gp has elapsed. In terms of BPF, we want to use task->rcu_users to protect tasks that function as referenced kptrs, and to allow tasks stored as referenced kptrs in maps to be accessed with RCU protection. Let's first determine whether we can safely use task->rcu_users to protect tasks stored in maps. All of the bpf_task* kfuncs can only be called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program types. For tracepoint and struct_ops programs, the struct task_struct passed to a program handler will always be trusted, so it will always be safe to call bpf_task_acquire() with any task passed to a program. Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL, as it is possible that the task has exited by the time the program is invoked, even if the pointer is still currently valid because the main kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks should never be passed as an argument to the any program handlers, so it should not be relevant. The second question is whether it's safe to use RCU to access a task that was acquired with bpf_task_acquire(), and stored in a map. Because bpf_task_acquire() now uses task->rcu_users, it follows that if the task is present in the map, that it must have had at least one task->rcu_users refcount by the time the current RCU cs was started. Therefore, it's safe to access that task until the end of the current RCU cs. With all that said, this patch makes struct task_struct is an RCU-protected object. In doing so, we also change bpf_task_acquire() to be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as necessary. A subsequent patch will remove bpf_task_kptr_get(), and bpf_task_acquire_not_zero() respectively. Signed-off-by: David Vernet <void@manifault.com> Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-31 14:57:31 -05:00
if (acquired)
bpf_task_release(acquired);
else
err = 1;
return 0;
}
static void lookup_compare_pid(const struct task_struct *p)
{
struct task_struct *acquired;
acquired = bpf_task_from_pid(p->pid);
if (!acquired) {
err = 1;
return;
}
if (acquired->pid != p->pid)
err = 2;
bpf_task_release(acquired);
}
SEC("tp_btf/task_newtask")
int BPF_PROG(test_task_from_pid_arg, struct task_struct *task, u64 clone_flags)
{
if (!is_test_kfunc_task())
return 0;
lookup_compare_pid(task);
return 0;
}
SEC("tp_btf/task_newtask")
int BPF_PROG(test_task_from_pid_current, struct task_struct *task, u64 clone_flags)
{
if (!is_test_kfunc_task())
return 0;
lookup_compare_pid(bpf_get_current_task_btf());
return 0;
}
static int is_pid_lookup_valid(s32 pid)
{
struct task_struct *acquired;
acquired = bpf_task_from_pid(pid);
if (acquired) {
bpf_task_release(acquired);
return 1;
}
return 0;
}
SEC("tp_btf/task_newtask")
int BPF_PROG(test_task_from_pid_invalid, struct task_struct *task, u64 clone_flags)
{
if (!is_test_kfunc_task())
return 0;
bpf_strncmp(task->comm, 12, "foo");
bpf_strncmp(task->comm, 16, "foo");
bpf_strncmp(&task->comm[8], 4, "foo");
if (is_pid_lookup_valid(-1)) {
err = 1;
return 0;
}
if (is_pid_lookup_valid(0xcafef00d)) {
err = 2;
return 0;
}
return 0;
}
bpf: Make struct task_struct an RCU-safe type struct task_struct objects are a bit interesting in terms of how their lifetime is protected by refcounts. task structs have two refcount fields: 1. refcount_t usage: Protects the memory backing the task struct. When this refcount drops to 0, the task is immediately freed, without waiting for an RCU grace period to elapse. This is the field that most callers in the kernel currently use to ensure that a task remains valid while it's being referenced, and is what's currently tracked with bpf_task_acquire() and bpf_task_release(). 2. refcount_t rcu_users: A refcount field which, when it drops to 0, schedules an RCU callback that drops a reference held on the 'usage' field above (which is acquired when the task is first created). This field therefore provides a form of RCU protection on the task by ensuring that at least one 'usage' refcount will be held until an RCU grace period has elapsed. The qualifier "a form of" is important here, as a task can remain valid after task->rcu_users has dropped to 0 and the subsequent RCU gp has elapsed. In terms of BPF, we want to use task->rcu_users to protect tasks that function as referenced kptrs, and to allow tasks stored as referenced kptrs in maps to be accessed with RCU protection. Let's first determine whether we can safely use task->rcu_users to protect tasks stored in maps. All of the bpf_task* kfuncs can only be called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program types. For tracepoint and struct_ops programs, the struct task_struct passed to a program handler will always be trusted, so it will always be safe to call bpf_task_acquire() with any task passed to a program. Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL, as it is possible that the task has exited by the time the program is invoked, even if the pointer is still currently valid because the main kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks should never be passed as an argument to the any program handlers, so it should not be relevant. The second question is whether it's safe to use RCU to access a task that was acquired with bpf_task_acquire(), and stored in a map. Because bpf_task_acquire() now uses task->rcu_users, it follows that if the task is present in the map, that it must have had at least one task->rcu_users refcount by the time the current RCU cs was started. Therefore, it's safe to access that task until the end of the current RCU cs. With all that said, this patch makes struct task_struct is an RCU-protected object. In doing so, we also change bpf_task_acquire() to be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as necessary. A subsequent patch will remove bpf_task_kptr_get(), and bpf_task_acquire_not_zero() respectively. Signed-off-by: David Vernet <void@manifault.com> Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-31 14:57:31 -05:00
SEC("tp_btf/task_newtask")
int BPF_PROG(task_kfunc_acquire_trusted_walked, struct task_struct *task, u64 clone_flags)
{
struct task_struct *acquired;
/* task->group_leader is listed as a trusted, non-NULL field of task struct. */
acquired = bpf_task_acquire(task->group_leader);
if (acquired)
bpf_task_release(acquired);
else
err = 1;
return 0;
}