linux/tools/testing/selftests/bpf/progs/task_kfunc_failure.c

327 lines
7.8 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
#include <vmlinux.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
#include "task_kfunc_common.h"
char _license[] SEC("license") = "GPL";
/* Prototype for all of the program trace events below:
*
* TRACE_EVENT(task_newtask,
* TP_PROTO(struct task_struct *p, u64 clone_flags)
*/
static struct __tasks_kfunc_map_value *insert_lookup_task(struct task_struct *task)
{
int status;
status = tasks_kfunc_map_insert(task);
if (status)
return NULL;
return tasks_kfunc_map_value_lookup(task);
}
SEC("tp_btf/task_newtask")
__failure __msg("Possibly NULL pointer passed to trusted arg0")
int BPF_PROG(task_kfunc_acquire_untrusted, struct task_struct *task, u64 clone_flags)
{
struct task_struct *acquired;
struct __tasks_kfunc_map_value *v;
v = insert_lookup_task(task);
if (!v)
return 0;
/* Can't invoke bpf_task_acquire() on an untrusted pointer. */
acquired = bpf_task_acquire(v->task);
bpf: Make struct task_struct an RCU-safe type struct task_struct objects are a bit interesting in terms of how their lifetime is protected by refcounts. task structs have two refcount fields: 1. refcount_t usage: Protects the memory backing the task struct. When this refcount drops to 0, the task is immediately freed, without waiting for an RCU grace period to elapse. This is the field that most callers in the kernel currently use to ensure that a task remains valid while it's being referenced, and is what's currently tracked with bpf_task_acquire() and bpf_task_release(). 2. refcount_t rcu_users: A refcount field which, when it drops to 0, schedules an RCU callback that drops a reference held on the 'usage' field above (which is acquired when the task is first created). This field therefore provides a form of RCU protection on the task by ensuring that at least one 'usage' refcount will be held until an RCU grace period has elapsed. The qualifier "a form of" is important here, as a task can remain valid after task->rcu_users has dropped to 0 and the subsequent RCU gp has elapsed. In terms of BPF, we want to use task->rcu_users to protect tasks that function as referenced kptrs, and to allow tasks stored as referenced kptrs in maps to be accessed with RCU protection. Let's first determine whether we can safely use task->rcu_users to protect tasks stored in maps. All of the bpf_task* kfuncs can only be called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program types. For tracepoint and struct_ops programs, the struct task_struct passed to a program handler will always be trusted, so it will always be safe to call bpf_task_acquire() with any task passed to a program. Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL, as it is possible that the task has exited by the time the program is invoked, even if the pointer is still currently valid because the main kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks should never be passed as an argument to the any program handlers, so it should not be relevant. The second question is whether it's safe to use RCU to access a task that was acquired with bpf_task_acquire(), and stored in a map. Because bpf_task_acquire() now uses task->rcu_users, it follows that if the task is present in the map, that it must have had at least one task->rcu_users refcount by the time the current RCU cs was started. Therefore, it's safe to access that task until the end of the current RCU cs. With all that said, this patch makes struct task_struct is an RCU-protected object. In doing so, we also change bpf_task_acquire() to be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as necessary. A subsequent patch will remove bpf_task_kptr_get(), and bpf_task_acquire_not_zero() respectively. Signed-off-by: David Vernet <void@manifault.com> Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-31 14:57:31 -05:00
if (!acquired)
return 0;
bpf_task_release(acquired);
return 0;
}
SEC("tp_btf/task_newtask")
__failure __msg("arg#0 pointer type STRUCT task_struct must point")
int BPF_PROG(task_kfunc_acquire_fp, struct task_struct *task, u64 clone_flags)
{
struct task_struct *acquired, *stack_task = (struct task_struct *)&clone_flags;
/* Can't invoke bpf_task_acquire() on a random frame pointer. */
acquired = bpf_task_acquire((struct task_struct *)&stack_task);
bpf: Make struct task_struct an RCU-safe type struct task_struct objects are a bit interesting in terms of how their lifetime is protected by refcounts. task structs have two refcount fields: 1. refcount_t usage: Protects the memory backing the task struct. When this refcount drops to 0, the task is immediately freed, without waiting for an RCU grace period to elapse. This is the field that most callers in the kernel currently use to ensure that a task remains valid while it's being referenced, and is what's currently tracked with bpf_task_acquire() and bpf_task_release(). 2. refcount_t rcu_users: A refcount field which, when it drops to 0, schedules an RCU callback that drops a reference held on the 'usage' field above (which is acquired when the task is first created). This field therefore provides a form of RCU protection on the task by ensuring that at least one 'usage' refcount will be held until an RCU grace period has elapsed. The qualifier "a form of" is important here, as a task can remain valid after task->rcu_users has dropped to 0 and the subsequent RCU gp has elapsed. In terms of BPF, we want to use task->rcu_users to protect tasks that function as referenced kptrs, and to allow tasks stored as referenced kptrs in maps to be accessed with RCU protection. Let's first determine whether we can safely use task->rcu_users to protect tasks stored in maps. All of the bpf_task* kfuncs can only be called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program types. For tracepoint and struct_ops programs, the struct task_struct passed to a program handler will always be trusted, so it will always be safe to call bpf_task_acquire() with any task passed to a program. Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL, as it is possible that the task has exited by the time the program is invoked, even if the pointer is still currently valid because the main kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks should never be passed as an argument to the any program handlers, so it should not be relevant. The second question is whether it's safe to use RCU to access a task that was acquired with bpf_task_acquire(), and stored in a map. Because bpf_task_acquire() now uses task->rcu_users, it follows that if the task is present in the map, that it must have had at least one task->rcu_users refcount by the time the current RCU cs was started. Therefore, it's safe to access that task until the end of the current RCU cs. With all that said, this patch makes struct task_struct is an RCU-protected object. In doing so, we also change bpf_task_acquire() to be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as necessary. A subsequent patch will remove bpf_task_kptr_get(), and bpf_task_acquire_not_zero() respectively. Signed-off-by: David Vernet <void@manifault.com> Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-31 14:57:31 -05:00
if (!acquired)
return 0;
bpf_task_release(acquired);
return 0;
}
SEC("kretprobe/free_task")
bpf: Make struct task_struct an RCU-safe type struct task_struct objects are a bit interesting in terms of how their lifetime is protected by refcounts. task structs have two refcount fields: 1. refcount_t usage: Protects the memory backing the task struct. When this refcount drops to 0, the task is immediately freed, without waiting for an RCU grace period to elapse. This is the field that most callers in the kernel currently use to ensure that a task remains valid while it's being referenced, and is what's currently tracked with bpf_task_acquire() and bpf_task_release(). 2. refcount_t rcu_users: A refcount field which, when it drops to 0, schedules an RCU callback that drops a reference held on the 'usage' field above (which is acquired when the task is first created). This field therefore provides a form of RCU protection on the task by ensuring that at least one 'usage' refcount will be held until an RCU grace period has elapsed. The qualifier "a form of" is important here, as a task can remain valid after task->rcu_users has dropped to 0 and the subsequent RCU gp has elapsed. In terms of BPF, we want to use task->rcu_users to protect tasks that function as referenced kptrs, and to allow tasks stored as referenced kptrs in maps to be accessed with RCU protection. Let's first determine whether we can safely use task->rcu_users to protect tasks stored in maps. All of the bpf_task* kfuncs can only be called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program types. For tracepoint and struct_ops programs, the struct task_struct passed to a program handler will always be trusted, so it will always be safe to call bpf_task_acquire() with any task passed to a program. Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL, as it is possible that the task has exited by the time the program is invoked, even if the pointer is still currently valid because the main kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks should never be passed as an argument to the any program handlers, so it should not be relevant. The second question is whether it's safe to use RCU to access a task that was acquired with bpf_task_acquire(), and stored in a map. Because bpf_task_acquire() now uses task->rcu_users, it follows that if the task is present in the map, that it must have had at least one task->rcu_users refcount by the time the current RCU cs was started. Therefore, it's safe to access that task until the end of the current RCU cs. With all that said, this patch makes struct task_struct is an RCU-protected object. In doing so, we also change bpf_task_acquire() to be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as necessary. A subsequent patch will remove bpf_task_kptr_get(), and bpf_task_acquire_not_zero() respectively. Signed-off-by: David Vernet <void@manifault.com> Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-31 14:57:31 -05:00
__failure __msg("calling kernel function bpf_task_acquire is not allowed")
int BPF_PROG(task_kfunc_acquire_unsafe_kretprobe, struct task_struct *task, u64 clone_flags)
{
struct task_struct *acquired;
bpf: Make struct task_struct an RCU-safe type struct task_struct objects are a bit interesting in terms of how their lifetime is protected by refcounts. task structs have two refcount fields: 1. refcount_t usage: Protects the memory backing the task struct. When this refcount drops to 0, the task is immediately freed, without waiting for an RCU grace period to elapse. This is the field that most callers in the kernel currently use to ensure that a task remains valid while it's being referenced, and is what's currently tracked with bpf_task_acquire() and bpf_task_release(). 2. refcount_t rcu_users: A refcount field which, when it drops to 0, schedules an RCU callback that drops a reference held on the 'usage' field above (which is acquired when the task is first created). This field therefore provides a form of RCU protection on the task by ensuring that at least one 'usage' refcount will be held until an RCU grace period has elapsed. The qualifier "a form of" is important here, as a task can remain valid after task->rcu_users has dropped to 0 and the subsequent RCU gp has elapsed. In terms of BPF, we want to use task->rcu_users to protect tasks that function as referenced kptrs, and to allow tasks stored as referenced kptrs in maps to be accessed with RCU protection. Let's first determine whether we can safely use task->rcu_users to protect tasks stored in maps. All of the bpf_task* kfuncs can only be called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program types. For tracepoint and struct_ops programs, the struct task_struct passed to a program handler will always be trusted, so it will always be safe to call bpf_task_acquire() with any task passed to a program. Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL, as it is possible that the task has exited by the time the program is invoked, even if the pointer is still currently valid because the main kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks should never be passed as an argument to the any program handlers, so it should not be relevant. The second question is whether it's safe to use RCU to access a task that was acquired with bpf_task_acquire(), and stored in a map. Because bpf_task_acquire() now uses task->rcu_users, it follows that if the task is present in the map, that it must have had at least one task->rcu_users refcount by the time the current RCU cs was started. Therefore, it's safe to access that task until the end of the current RCU cs. With all that said, this patch makes struct task_struct is an RCU-protected object. In doing so, we also change bpf_task_acquire() to be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as necessary. A subsequent patch will remove bpf_task_kptr_get(), and bpf_task_acquire_not_zero() respectively. Signed-off-by: David Vernet <void@manifault.com> Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-31 14:57:31 -05:00
/* Can't call bpf_task_acquire() or bpf_task_release() in an untrusted prog. */
acquired = bpf_task_acquire(task);
bpf: Make struct task_struct an RCU-safe type struct task_struct objects are a bit interesting in terms of how their lifetime is protected by refcounts. task structs have two refcount fields: 1. refcount_t usage: Protects the memory backing the task struct. When this refcount drops to 0, the task is immediately freed, without waiting for an RCU grace period to elapse. This is the field that most callers in the kernel currently use to ensure that a task remains valid while it's being referenced, and is what's currently tracked with bpf_task_acquire() and bpf_task_release(). 2. refcount_t rcu_users: A refcount field which, when it drops to 0, schedules an RCU callback that drops a reference held on the 'usage' field above (which is acquired when the task is first created). This field therefore provides a form of RCU protection on the task by ensuring that at least one 'usage' refcount will be held until an RCU grace period has elapsed. The qualifier "a form of" is important here, as a task can remain valid after task->rcu_users has dropped to 0 and the subsequent RCU gp has elapsed. In terms of BPF, we want to use task->rcu_users to protect tasks that function as referenced kptrs, and to allow tasks stored as referenced kptrs in maps to be accessed with RCU protection. Let's first determine whether we can safely use task->rcu_users to protect tasks stored in maps. All of the bpf_task* kfuncs can only be called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program types. For tracepoint and struct_ops programs, the struct task_struct passed to a program handler will always be trusted, so it will always be safe to call bpf_task_acquire() with any task passed to a program. Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL, as it is possible that the task has exited by the time the program is invoked, even if the pointer is still currently valid because the main kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks should never be passed as an argument to the any program handlers, so it should not be relevant. The second question is whether it's safe to use RCU to access a task that was acquired with bpf_task_acquire(), and stored in a map. Because bpf_task_acquire() now uses task->rcu_users, it follows that if the task is present in the map, that it must have had at least one task->rcu_users refcount by the time the current RCU cs was started. Therefore, it's safe to access that task until the end of the current RCU cs. With all that said, this patch makes struct task_struct is an RCU-protected object. In doing so, we also change bpf_task_acquire() to be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as necessary. A subsequent patch will remove bpf_task_kptr_get(), and bpf_task_acquire_not_zero() respectively. Signed-off-by: David Vernet <void@manifault.com> Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-31 14:57:31 -05:00
if (!acquired)
return 0;
bpf_task_release(acquired);
return 0;
}
bpf: Make struct task_struct an RCU-safe type struct task_struct objects are a bit interesting in terms of how their lifetime is protected by refcounts. task structs have two refcount fields: 1. refcount_t usage: Protects the memory backing the task struct. When this refcount drops to 0, the task is immediately freed, without waiting for an RCU grace period to elapse. This is the field that most callers in the kernel currently use to ensure that a task remains valid while it's being referenced, and is what's currently tracked with bpf_task_acquire() and bpf_task_release(). 2. refcount_t rcu_users: A refcount field which, when it drops to 0, schedules an RCU callback that drops a reference held on the 'usage' field above (which is acquired when the task is first created). This field therefore provides a form of RCU protection on the task by ensuring that at least one 'usage' refcount will be held until an RCU grace period has elapsed. The qualifier "a form of" is important here, as a task can remain valid after task->rcu_users has dropped to 0 and the subsequent RCU gp has elapsed. In terms of BPF, we want to use task->rcu_users to protect tasks that function as referenced kptrs, and to allow tasks stored as referenced kptrs in maps to be accessed with RCU protection. Let's first determine whether we can safely use task->rcu_users to protect tasks stored in maps. All of the bpf_task* kfuncs can only be called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program types. For tracepoint and struct_ops programs, the struct task_struct passed to a program handler will always be trusted, so it will always be safe to call bpf_task_acquire() with any task passed to a program. Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL, as it is possible that the task has exited by the time the program is invoked, even if the pointer is still currently valid because the main kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks should never be passed as an argument to the any program handlers, so it should not be relevant. The second question is whether it's safe to use RCU to access a task that was acquired with bpf_task_acquire(), and stored in a map. Because bpf_task_acquire() now uses task->rcu_users, it follows that if the task is present in the map, that it must have had at least one task->rcu_users refcount by the time the current RCU cs was started. Therefore, it's safe to access that task until the end of the current RCU cs. With all that said, this patch makes struct task_struct is an RCU-protected object. In doing so, we also change bpf_task_acquire() to be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as necessary. A subsequent patch will remove bpf_task_kptr_get(), and bpf_task_acquire_not_zero() respectively. Signed-off-by: David Vernet <void@manifault.com> Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-31 14:57:31 -05:00
SEC("kretprobe/free_task")
__failure __msg("calling kernel function bpf_task_acquire is not allowed")
int BPF_PROG(task_kfunc_acquire_unsafe_kretprobe_rcu, struct task_struct *task, u64 clone_flags)
{
struct task_struct *acquired;
bpf: Make struct task_struct an RCU-safe type struct task_struct objects are a bit interesting in terms of how their lifetime is protected by refcounts. task structs have two refcount fields: 1. refcount_t usage: Protects the memory backing the task struct. When this refcount drops to 0, the task is immediately freed, without waiting for an RCU grace period to elapse. This is the field that most callers in the kernel currently use to ensure that a task remains valid while it's being referenced, and is what's currently tracked with bpf_task_acquire() and bpf_task_release(). 2. refcount_t rcu_users: A refcount field which, when it drops to 0, schedules an RCU callback that drops a reference held on the 'usage' field above (which is acquired when the task is first created). This field therefore provides a form of RCU protection on the task by ensuring that at least one 'usage' refcount will be held until an RCU grace period has elapsed. The qualifier "a form of" is important here, as a task can remain valid after task->rcu_users has dropped to 0 and the subsequent RCU gp has elapsed. In terms of BPF, we want to use task->rcu_users to protect tasks that function as referenced kptrs, and to allow tasks stored as referenced kptrs in maps to be accessed with RCU protection. Let's first determine whether we can safely use task->rcu_users to protect tasks stored in maps. All of the bpf_task* kfuncs can only be called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program types. For tracepoint and struct_ops programs, the struct task_struct passed to a program handler will always be trusted, so it will always be safe to call bpf_task_acquire() with any task passed to a program. Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL, as it is possible that the task has exited by the time the program is invoked, even if the pointer is still currently valid because the main kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks should never be passed as an argument to the any program handlers, so it should not be relevant. The second question is whether it's safe to use RCU to access a task that was acquired with bpf_task_acquire(), and stored in a map. Because bpf_task_acquire() now uses task->rcu_users, it follows that if the task is present in the map, that it must have had at least one task->rcu_users refcount by the time the current RCU cs was started. Therefore, it's safe to access that task until the end of the current RCU cs. With all that said, this patch makes struct task_struct is an RCU-protected object. In doing so, we also change bpf_task_acquire() to be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as necessary. A subsequent patch will remove bpf_task_kptr_get(), and bpf_task_acquire_not_zero() respectively. Signed-off-by: David Vernet <void@manifault.com> Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-31 14:57:31 -05:00
bpf_rcu_read_lock();
if (!task) {
bpf_rcu_read_unlock();
return 0;
}
/* Can't call bpf_task_acquire() or bpf_task_release() in an untrusted prog. */
acquired = bpf_task_acquire(task);
if (acquired)
bpf_task_release(acquired);
bpf_rcu_read_unlock();
return 0;
}
SEC("tp_btf/task_newtask")
__failure __msg("Possibly NULL pointer passed to trusted arg0")
int BPF_PROG(task_kfunc_acquire_null, struct task_struct *task, u64 clone_flags)
{
struct task_struct *acquired;
/* Can't invoke bpf_task_acquire() on a NULL pointer. */
acquired = bpf_task_acquire(NULL);
if (!acquired)
return 0;
bpf_task_release(acquired);
return 0;
}
SEC("tp_btf/task_newtask")
__failure __msg("Unreleased reference")
int BPF_PROG(task_kfunc_acquire_unreleased, struct task_struct *task, u64 clone_flags)
{
struct task_struct *acquired;
acquired = bpf_task_acquire(task);
/* Acquired task is never released. */
__sink(acquired);
return 0;
}
SEC("tp_btf/task_newtask")
__failure __msg("Unreleased reference")
int BPF_PROG(task_kfunc_xchg_unreleased, struct task_struct *task, u64 clone_flags)
{
struct task_struct *kptr;
struct __tasks_kfunc_map_value *v;
v = insert_lookup_task(task);
if (!v)
return 0;
kptr = bpf_kptr_xchg(&v->task, NULL);
if (!kptr)
return 0;
/* Kptr retrieved from map is never released. */
return 0;
}
bpf: Make struct task_struct an RCU-safe type struct task_struct objects are a bit interesting in terms of how their lifetime is protected by refcounts. task structs have two refcount fields: 1. refcount_t usage: Protects the memory backing the task struct. When this refcount drops to 0, the task is immediately freed, without waiting for an RCU grace period to elapse. This is the field that most callers in the kernel currently use to ensure that a task remains valid while it's being referenced, and is what's currently tracked with bpf_task_acquire() and bpf_task_release(). 2. refcount_t rcu_users: A refcount field which, when it drops to 0, schedules an RCU callback that drops a reference held on the 'usage' field above (which is acquired when the task is first created). This field therefore provides a form of RCU protection on the task by ensuring that at least one 'usage' refcount will be held until an RCU grace period has elapsed. The qualifier "a form of" is important here, as a task can remain valid after task->rcu_users has dropped to 0 and the subsequent RCU gp has elapsed. In terms of BPF, we want to use task->rcu_users to protect tasks that function as referenced kptrs, and to allow tasks stored as referenced kptrs in maps to be accessed with RCU protection. Let's first determine whether we can safely use task->rcu_users to protect tasks stored in maps. All of the bpf_task* kfuncs can only be called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program types. For tracepoint and struct_ops programs, the struct task_struct passed to a program handler will always be trusted, so it will always be safe to call bpf_task_acquire() with any task passed to a program. Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL, as it is possible that the task has exited by the time the program is invoked, even if the pointer is still currently valid because the main kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks should never be passed as an argument to the any program handlers, so it should not be relevant. The second question is whether it's safe to use RCU to access a task that was acquired with bpf_task_acquire(), and stored in a map. Because bpf_task_acquire() now uses task->rcu_users, it follows that if the task is present in the map, that it must have had at least one task->rcu_users refcount by the time the current RCU cs was started. Therefore, it's safe to access that task until the end of the current RCU cs. With all that said, this patch makes struct task_struct is an RCU-protected object. In doing so, we also change bpf_task_acquire() to be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as necessary. A subsequent patch will remove bpf_task_kptr_get(), and bpf_task_acquire_not_zero() respectively. Signed-off-by: David Vernet <void@manifault.com> Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-31 14:57:31 -05:00
SEC("tp_btf/task_newtask")
__failure __msg("Possibly NULL pointer passed to trusted arg0")
int BPF_PROG(task_kfunc_acquire_release_no_null_check, struct task_struct *task, u64 clone_flags)
{
struct task_struct *acquired;
acquired = bpf_task_acquire(task);
/* Can't invoke bpf_task_release() on an acquired task without a NULL check. */
bpf_task_release(acquired);
return 0;
}
SEC("tp_btf/task_newtask")
bpf: Treat KF_RELEASE kfuncs as KF_TRUSTED_ARGS KF_RELEASE kfuncs are not currently treated as having KF_TRUSTED_ARGS, even though they have a superset of the requirements of KF_TRUSTED_ARGS. Like KF_TRUSTED_ARGS, KF_RELEASE kfuncs require a 0-offset argument, and don't allow NULL-able arguments. Unlike KF_TRUSTED_ARGS which require _either_ an argument with ref_obj_id > 0, _or_ (ref->type & BPF_REG_TRUSTED_MODIFIERS) (and no unsafe modifiers allowed), KF_RELEASE only allows for ref_obj_id > 0. Because KF_RELEASE today doesn't automatically imply KF_TRUSTED_ARGS, some of these requirements are enforced in different ways that can make the behavior of the verifier feel unpredictable. For example, a KF_RELEASE kfunc with a NULL-able argument will currently fail in the verifier with a message like, "arg#0 is ptr_or_null_ expected ptr_ or socket" rather than "Possibly NULL pointer passed to trusted arg0". Our intention is the same, but the semantics are different due to implemenetation details that kfunc authors and BPF program writers should not need to care about. Let's make the behavior of the verifier more consistent and intuitive by having KF_RELEASE kfuncs imply the presence of KF_TRUSTED_ARGS. Our eventual goal is to have all kfuncs assume KF_TRUSTED_ARGS by default anyways, so this takes us a step in that direction. Note that it does not make sense to assume KF_TRUSTED_ARGS for all KF_ACQUIRE kfuncs. KF_ACQUIRE kfuncs can have looser semantics than KF_RELEASE, with e.g. KF_RCU | KF_RET_NULL. We may want to have KF_ACQUIRE imply KF_TRUSTED_ARGS _unless_ KF_RCU is specified, but that can be left to another patch set, and there are no such subtleties to address for KF_RELEASE. Signed-off-by: David Vernet <void@manifault.com> Link: https://lore.kernel.org/r/20230325213144.486885-4-void@manifault.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-25 16:31:46 -05:00
__failure __msg("Possibly NULL pointer passed to trusted arg0")
int BPF_PROG(task_kfunc_release_untrusted, struct task_struct *task, u64 clone_flags)
{
struct __tasks_kfunc_map_value *v;
v = insert_lookup_task(task);
if (!v)
return 0;
/* Can't invoke bpf_task_release() on an untrusted pointer. */
bpf_task_release(v->task);
return 0;
}
SEC("tp_btf/task_newtask")
__failure __msg("arg#0 pointer type STRUCT task_struct must point")
int BPF_PROG(task_kfunc_release_fp, struct task_struct *task, u64 clone_flags)
{
struct task_struct *acquired = (struct task_struct *)&clone_flags;
/* Cannot release random frame pointer. */
bpf_task_release(acquired);
return 0;
}
SEC("tp_btf/task_newtask")
bpf: Treat KF_RELEASE kfuncs as KF_TRUSTED_ARGS KF_RELEASE kfuncs are not currently treated as having KF_TRUSTED_ARGS, even though they have a superset of the requirements of KF_TRUSTED_ARGS. Like KF_TRUSTED_ARGS, KF_RELEASE kfuncs require a 0-offset argument, and don't allow NULL-able arguments. Unlike KF_TRUSTED_ARGS which require _either_ an argument with ref_obj_id > 0, _or_ (ref->type & BPF_REG_TRUSTED_MODIFIERS) (and no unsafe modifiers allowed), KF_RELEASE only allows for ref_obj_id > 0. Because KF_RELEASE today doesn't automatically imply KF_TRUSTED_ARGS, some of these requirements are enforced in different ways that can make the behavior of the verifier feel unpredictable. For example, a KF_RELEASE kfunc with a NULL-able argument will currently fail in the verifier with a message like, "arg#0 is ptr_or_null_ expected ptr_ or socket" rather than "Possibly NULL pointer passed to trusted arg0". Our intention is the same, but the semantics are different due to implemenetation details that kfunc authors and BPF program writers should not need to care about. Let's make the behavior of the verifier more consistent and intuitive by having KF_RELEASE kfuncs imply the presence of KF_TRUSTED_ARGS. Our eventual goal is to have all kfuncs assume KF_TRUSTED_ARGS by default anyways, so this takes us a step in that direction. Note that it does not make sense to assume KF_TRUSTED_ARGS for all KF_ACQUIRE kfuncs. KF_ACQUIRE kfuncs can have looser semantics than KF_RELEASE, with e.g. KF_RCU | KF_RET_NULL. We may want to have KF_ACQUIRE imply KF_TRUSTED_ARGS _unless_ KF_RCU is specified, but that can be left to another patch set, and there are no such subtleties to address for KF_RELEASE. Signed-off-by: David Vernet <void@manifault.com> Link: https://lore.kernel.org/r/20230325213144.486885-4-void@manifault.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-25 16:31:46 -05:00
__failure __msg("Possibly NULL pointer passed to trusted arg0")
int BPF_PROG(task_kfunc_release_null, struct task_struct *task, u64 clone_flags)
{
struct __tasks_kfunc_map_value local, *v;
long status;
struct task_struct *acquired, *old;
s32 pid;
status = bpf_probe_read_kernel(&pid, sizeof(pid), &task->pid);
if (status)
return 0;
local.task = NULL;
status = bpf_map_update_elem(&__tasks_kfunc_map, &pid, &local, BPF_NOEXIST);
if (status)
return status;
v = bpf_map_lookup_elem(&__tasks_kfunc_map, &pid);
if (!v)
return -ENOENT;
acquired = bpf_task_acquire(task);
bpf: Make struct task_struct an RCU-safe type struct task_struct objects are a bit interesting in terms of how their lifetime is protected by refcounts. task structs have two refcount fields: 1. refcount_t usage: Protects the memory backing the task struct. When this refcount drops to 0, the task is immediately freed, without waiting for an RCU grace period to elapse. This is the field that most callers in the kernel currently use to ensure that a task remains valid while it's being referenced, and is what's currently tracked with bpf_task_acquire() and bpf_task_release(). 2. refcount_t rcu_users: A refcount field which, when it drops to 0, schedules an RCU callback that drops a reference held on the 'usage' field above (which is acquired when the task is first created). This field therefore provides a form of RCU protection on the task by ensuring that at least one 'usage' refcount will be held until an RCU grace period has elapsed. The qualifier "a form of" is important here, as a task can remain valid after task->rcu_users has dropped to 0 and the subsequent RCU gp has elapsed. In terms of BPF, we want to use task->rcu_users to protect tasks that function as referenced kptrs, and to allow tasks stored as referenced kptrs in maps to be accessed with RCU protection. Let's first determine whether we can safely use task->rcu_users to protect tasks stored in maps. All of the bpf_task* kfuncs can only be called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program types. For tracepoint and struct_ops programs, the struct task_struct passed to a program handler will always be trusted, so it will always be safe to call bpf_task_acquire() with any task passed to a program. Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL, as it is possible that the task has exited by the time the program is invoked, even if the pointer is still currently valid because the main kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks should never be passed as an argument to the any program handlers, so it should not be relevant. The second question is whether it's safe to use RCU to access a task that was acquired with bpf_task_acquire(), and stored in a map. Because bpf_task_acquire() now uses task->rcu_users, it follows that if the task is present in the map, that it must have had at least one task->rcu_users refcount by the time the current RCU cs was started. Therefore, it's safe to access that task until the end of the current RCU cs. With all that said, this patch makes struct task_struct is an RCU-protected object. In doing so, we also change bpf_task_acquire() to be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as necessary. A subsequent patch will remove bpf_task_kptr_get(), and bpf_task_acquire_not_zero() respectively. Signed-off-by: David Vernet <void@manifault.com> Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-31 14:57:31 -05:00
if (!acquired)
return -EEXIST;
old = bpf_kptr_xchg(&v->task, acquired);
/* old cannot be passed to bpf_task_release() without a NULL check. */
bpf_task_release(old);
return 0;
}
SEC("tp_btf/task_newtask")
__failure __msg("release kernel function bpf_task_release expects")
int BPF_PROG(task_kfunc_release_unacquired, struct task_struct *task, u64 clone_flags)
{
/* Cannot release trusted task pointer which was not acquired. */
bpf_task_release(task);
return 0;
}
SEC("tp_btf/task_newtask")
bpf: Treat KF_RELEASE kfuncs as KF_TRUSTED_ARGS KF_RELEASE kfuncs are not currently treated as having KF_TRUSTED_ARGS, even though they have a superset of the requirements of KF_TRUSTED_ARGS. Like KF_TRUSTED_ARGS, KF_RELEASE kfuncs require a 0-offset argument, and don't allow NULL-able arguments. Unlike KF_TRUSTED_ARGS which require _either_ an argument with ref_obj_id > 0, _or_ (ref->type & BPF_REG_TRUSTED_MODIFIERS) (and no unsafe modifiers allowed), KF_RELEASE only allows for ref_obj_id > 0. Because KF_RELEASE today doesn't automatically imply KF_TRUSTED_ARGS, some of these requirements are enforced in different ways that can make the behavior of the verifier feel unpredictable. For example, a KF_RELEASE kfunc with a NULL-able argument will currently fail in the verifier with a message like, "arg#0 is ptr_or_null_ expected ptr_ or socket" rather than "Possibly NULL pointer passed to trusted arg0". Our intention is the same, but the semantics are different due to implemenetation details that kfunc authors and BPF program writers should not need to care about. Let's make the behavior of the verifier more consistent and intuitive by having KF_RELEASE kfuncs imply the presence of KF_TRUSTED_ARGS. Our eventual goal is to have all kfuncs assume KF_TRUSTED_ARGS by default anyways, so this takes us a step in that direction. Note that it does not make sense to assume KF_TRUSTED_ARGS for all KF_ACQUIRE kfuncs. KF_ACQUIRE kfuncs can have looser semantics than KF_RELEASE, with e.g. KF_RCU | KF_RET_NULL. We may want to have KF_ACQUIRE imply KF_TRUSTED_ARGS _unless_ KF_RCU is specified, but that can be left to another patch set, and there are no such subtleties to address for KF_RELEASE. Signed-off-by: David Vernet <void@manifault.com> Link: https://lore.kernel.org/r/20230325213144.486885-4-void@manifault.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-25 16:31:46 -05:00
__failure __msg("Possibly NULL pointer passed to trusted arg0")
int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 clone_flags)
{
struct task_struct *acquired;
acquired = bpf_task_from_pid(task->pid);
/* Releasing bpf_task_from_pid() lookup without a NULL check. */
bpf_task_release(acquired);
return 0;
}
SEC("lsm/task_free")
__failure __msg("R1 must be a rcu pointer")
int BPF_PROG(task_kfunc_from_lsm_task_free, struct task_struct *task)
{
struct task_struct *acquired;
/* the argument of lsm task_free hook is untrusted. */
acquired = bpf_task_acquire(task);
bpf: Make struct task_struct an RCU-safe type struct task_struct objects are a bit interesting in terms of how their lifetime is protected by refcounts. task structs have two refcount fields: 1. refcount_t usage: Protects the memory backing the task struct. When this refcount drops to 0, the task is immediately freed, without waiting for an RCU grace period to elapse. This is the field that most callers in the kernel currently use to ensure that a task remains valid while it's being referenced, and is what's currently tracked with bpf_task_acquire() and bpf_task_release(). 2. refcount_t rcu_users: A refcount field which, when it drops to 0, schedules an RCU callback that drops a reference held on the 'usage' field above (which is acquired when the task is first created). This field therefore provides a form of RCU protection on the task by ensuring that at least one 'usage' refcount will be held until an RCU grace period has elapsed. The qualifier "a form of" is important here, as a task can remain valid after task->rcu_users has dropped to 0 and the subsequent RCU gp has elapsed. In terms of BPF, we want to use task->rcu_users to protect tasks that function as referenced kptrs, and to allow tasks stored as referenced kptrs in maps to be accessed with RCU protection. Let's first determine whether we can safely use task->rcu_users to protect tasks stored in maps. All of the bpf_task* kfuncs can only be called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program types. For tracepoint and struct_ops programs, the struct task_struct passed to a program handler will always be trusted, so it will always be safe to call bpf_task_acquire() with any task passed to a program. Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL, as it is possible that the task has exited by the time the program is invoked, even if the pointer is still currently valid because the main kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks should never be passed as an argument to the any program handlers, so it should not be relevant. The second question is whether it's safe to use RCU to access a task that was acquired with bpf_task_acquire(), and stored in a map. Because bpf_task_acquire() now uses task->rcu_users, it follows that if the task is present in the map, that it must have had at least one task->rcu_users refcount by the time the current RCU cs was started. Therefore, it's safe to access that task until the end of the current RCU cs. With all that said, this patch makes struct task_struct is an RCU-protected object. In doing so, we also change bpf_task_acquire() to be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as necessary. A subsequent patch will remove bpf_task_kptr_get(), and bpf_task_acquire_not_zero() respectively. Signed-off-by: David Vernet <void@manifault.com> Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-31 14:57:31 -05:00
if (!acquired)
return 0;
bpf_task_release(acquired);
return 0;
}
SEC("tp_btf/task_newtask")
__failure __msg("access beyond the end of member comm")
int BPF_PROG(task_access_comm1, struct task_struct *task, u64 clone_flags)
{
bpf_strncmp(task->comm, 17, "foo");
return 0;
}
SEC("tp_btf/task_newtask")
__failure __msg("access beyond the end of member comm")
int BPF_PROG(task_access_comm2, struct task_struct *task, u64 clone_flags)
{
bpf_strncmp(task->comm + 1, 16, "foo");
return 0;
}
SEC("tp_btf/task_newtask")
__failure __msg("write into memory")
int BPF_PROG(task_access_comm3, struct task_struct *task, u64 clone_flags)
{
bpf_probe_read_kernel(task->comm, 16, task->comm);
return 0;
}
SEC("fentry/__set_task_comm")
__failure __msg("R1 type=ptr_ expected")
int BPF_PROG(task_access_comm4, struct task_struct *task, const char *buf, bool exec)
{
/*
* task->comm is a legacy ptr_to_btf_id. The verifier cannot guarantee
* its safety. Hence it cannot be accessed with normal load insns.
*/
bpf_strncmp(task->comm, 16, "foo");
return 0;
}
bpf: Make struct task_struct an RCU-safe type struct task_struct objects are a bit interesting in terms of how their lifetime is protected by refcounts. task structs have two refcount fields: 1. refcount_t usage: Protects the memory backing the task struct. When this refcount drops to 0, the task is immediately freed, without waiting for an RCU grace period to elapse. This is the field that most callers in the kernel currently use to ensure that a task remains valid while it's being referenced, and is what's currently tracked with bpf_task_acquire() and bpf_task_release(). 2. refcount_t rcu_users: A refcount field which, when it drops to 0, schedules an RCU callback that drops a reference held on the 'usage' field above (which is acquired when the task is first created). This field therefore provides a form of RCU protection on the task by ensuring that at least one 'usage' refcount will be held until an RCU grace period has elapsed. The qualifier "a form of" is important here, as a task can remain valid after task->rcu_users has dropped to 0 and the subsequent RCU gp has elapsed. In terms of BPF, we want to use task->rcu_users to protect tasks that function as referenced kptrs, and to allow tasks stored as referenced kptrs in maps to be accessed with RCU protection. Let's first determine whether we can safely use task->rcu_users to protect tasks stored in maps. All of the bpf_task* kfuncs can only be called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program types. For tracepoint and struct_ops programs, the struct task_struct passed to a program handler will always be trusted, so it will always be safe to call bpf_task_acquire() with any task passed to a program. Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL, as it is possible that the task has exited by the time the program is invoked, even if the pointer is still currently valid because the main kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks should never be passed as an argument to the any program handlers, so it should not be relevant. The second question is whether it's safe to use RCU to access a task that was acquired with bpf_task_acquire(), and stored in a map. Because bpf_task_acquire() now uses task->rcu_users, it follows that if the task is present in the map, that it must have had at least one task->rcu_users refcount by the time the current RCU cs was started. Therefore, it's safe to access that task until the end of the current RCU cs. With all that said, this patch makes struct task_struct is an RCU-protected object. In doing so, we also change bpf_task_acquire() to be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as necessary. A subsequent patch will remove bpf_task_kptr_get(), and bpf_task_acquire_not_zero() respectively. Signed-off-by: David Vernet <void@manifault.com> Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-31 14:57:31 -05:00
SEC("tp_btf/task_newtask")
__failure __msg("R1 must be referenced or trusted")
int BPF_PROG(task_kfunc_release_in_map, struct task_struct *task, u64 clone_flags)
{
struct task_struct *local;
struct __tasks_kfunc_map_value *v;
if (tasks_kfunc_map_insert(task))
return 0;
v = tasks_kfunc_map_value_lookup(task);
if (!v)
return 0;
bpf_rcu_read_lock();
local = v->task;
if (!local) {
bpf_rcu_read_unlock();
return 0;
}
/* Can't release a kptr that's still stored in a map. */
bpf_task_release(local);
bpf_rcu_read_unlock();
return 0;
}