2022-11-19 23:10:04 -06:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
|
|
|
|
|
|
|
|
#include <vmlinux.h>
|
|
|
|
#include <bpf/bpf_tracing.h>
|
|
|
|
#include <bpf/bpf_helpers.h>
|
|
|
|
|
2023-01-19 20:18:44 -06:00
|
|
|
#include "bpf_misc.h"
|
2022-11-19 23:10:04 -06:00
|
|
|
#include "task_kfunc_common.h"
|
|
|
|
|
|
|
|
char _license[] SEC("license") = "GPL";
|
|
|
|
|
|
|
|
/* Prototype for all of the program trace events below:
|
|
|
|
*
|
|
|
|
* TRACE_EVENT(task_newtask,
|
|
|
|
* TP_PROTO(struct task_struct *p, u64 clone_flags)
|
|
|
|
*/
|
|
|
|
|
|
|
|
static struct __tasks_kfunc_map_value *insert_lookup_task(struct task_struct *task)
|
|
|
|
{
|
|
|
|
int status;
|
|
|
|
|
|
|
|
status = tasks_kfunc_map_insert(task);
|
|
|
|
if (status)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
return tasks_kfunc_map_value_lookup(task);
|
|
|
|
}
|
|
|
|
|
|
|
|
SEC("tp_btf/task_newtask")
|
bpf: Disallow NULLable pointers for trusted kfuncs
KF_TRUSTED_ARGS kfuncs currently have a subtle and insidious bug in
validating pointers to scalars. Say that you have a kfunc like the
following, which takes an array as the first argument:
bool bpf_cpumask_empty(const struct cpumask *cpumask)
{
return cpumask_empty(cpumask);
}
...
BTF_ID_FLAGS(func, bpf_cpumask_empty, KF_TRUSTED_ARGS)
...
If a BPF program were to invoke the kfunc with a NULL argument, it would
crash the kernel. The reason is that struct cpumask is defined as a
bitmap, which is itself defined as an array, and is accessed as a memory
address by bitmap operations. So when the verifier analyzes the
register, it interprets it as a pointer to a scalar struct, which is an
array of size 8. check_mem_reg() then sees that the register is NULL and
returns 0, and the kfunc crashes when it passes it down to the cpumask
wrappers.
To fix this, this patch adds a check for KF_ARG_PTR_TO_MEM which
verifies that the register doesn't contain a possibly-NULL pointer if
the kfunc is KF_TRUSTED_ARGS.
Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230125143816.721952-2-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-01-25 08:38:10 -06:00
|
|
|
__failure __msg("Possibly NULL pointer passed to trusted arg0")
|
2022-11-19 23:10:04 -06:00
|
|
|
int BPF_PROG(task_kfunc_acquire_untrusted, struct task_struct *task, u64 clone_flags)
|
|
|
|
{
|
|
|
|
struct task_struct *acquired;
|
|
|
|
struct __tasks_kfunc_map_value *v;
|
|
|
|
|
|
|
|
v = insert_lookup_task(task);
|
|
|
|
if (!v)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Can't invoke bpf_task_acquire() on an untrusted pointer. */
|
|
|
|
acquired = bpf_task_acquire(v->task);
|
bpf: Make struct task_struct an RCU-safe type
struct task_struct objects are a bit interesting in terms of how their
lifetime is protected by refcounts. task structs have two refcount
fields:
1. refcount_t usage: Protects the memory backing the task struct. When
this refcount drops to 0, the task is immediately freed, without
waiting for an RCU grace period to elapse. This is the field that
most callers in the kernel currently use to ensure that a task
remains valid while it's being referenced, and is what's currently
tracked with bpf_task_acquire() and bpf_task_release().
2. refcount_t rcu_users: A refcount field which, when it drops to 0,
schedules an RCU callback that drops a reference held on the 'usage'
field above (which is acquired when the task is first created). This
field therefore provides a form of RCU protection on the task by
ensuring that at least one 'usage' refcount will be held until an RCU
grace period has elapsed. The qualifier "a form of" is important
here, as a task can remain valid after task->rcu_users has dropped to
0 and the subsequent RCU gp has elapsed.
In terms of BPF, we want to use task->rcu_users to protect tasks that
function as referenced kptrs, and to allow tasks stored as referenced
kptrs in maps to be accessed with RCU protection.
Let's first determine whether we can safely use task->rcu_users to
protect tasks stored in maps. All of the bpf_task* kfuncs can only be
called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program
types. For tracepoint and struct_ops programs, the struct task_struct
passed to a program handler will always be trusted, so it will always be
safe to call bpf_task_acquire() with any task passed to a program.
Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL,
as it is possible that the task has exited by the time the program is
invoked, even if the pointer is still currently valid because the main
kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks
should never be passed as an argument to the any program handlers, so it
should not be relevant.
The second question is whether it's safe to use RCU to access a task
that was acquired with bpf_task_acquire(), and stored in a map. Because
bpf_task_acquire() now uses task->rcu_users, it follows that if the task
is present in the map, that it must have had at least one
task->rcu_users refcount by the time the current RCU cs was started.
Therefore, it's safe to access that task until the end of the current
RCU cs.
With all that said, this patch makes struct task_struct is an
RCU-protected object. In doing so, we also change bpf_task_acquire() to
be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as
necessary. A subsequent patch will remove bpf_task_kptr_get(), and
bpf_task_acquire_not_zero() respectively.
Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-31 14:57:31 -05:00
|
|
|
if (!acquired)
|
|
|
|
return 0;
|
|
|
|
|
2022-11-19 23:10:04 -06:00
|
|
|
bpf_task_release(acquired);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
SEC("tp_btf/task_newtask")
|
2023-01-19 20:18:44 -06:00
|
|
|
__failure __msg("arg#0 pointer type STRUCT task_struct must point")
|
2022-11-19 23:10:04 -06:00
|
|
|
int BPF_PROG(task_kfunc_acquire_fp, struct task_struct *task, u64 clone_flags)
|
|
|
|
{
|
|
|
|
struct task_struct *acquired, *stack_task = (struct task_struct *)&clone_flags;
|
|
|
|
|
|
|
|
/* Can't invoke bpf_task_acquire() on a random frame pointer. */
|
|
|
|
acquired = bpf_task_acquire((struct task_struct *)&stack_task);
|
bpf: Make struct task_struct an RCU-safe type
struct task_struct objects are a bit interesting in terms of how their
lifetime is protected by refcounts. task structs have two refcount
fields:
1. refcount_t usage: Protects the memory backing the task struct. When
this refcount drops to 0, the task is immediately freed, without
waiting for an RCU grace period to elapse. This is the field that
most callers in the kernel currently use to ensure that a task
remains valid while it's being referenced, and is what's currently
tracked with bpf_task_acquire() and bpf_task_release().
2. refcount_t rcu_users: A refcount field which, when it drops to 0,
schedules an RCU callback that drops a reference held on the 'usage'
field above (which is acquired when the task is first created). This
field therefore provides a form of RCU protection on the task by
ensuring that at least one 'usage' refcount will be held until an RCU
grace period has elapsed. The qualifier "a form of" is important
here, as a task can remain valid after task->rcu_users has dropped to
0 and the subsequent RCU gp has elapsed.
In terms of BPF, we want to use task->rcu_users to protect tasks that
function as referenced kptrs, and to allow tasks stored as referenced
kptrs in maps to be accessed with RCU protection.
Let's first determine whether we can safely use task->rcu_users to
protect tasks stored in maps. All of the bpf_task* kfuncs can only be
called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program
types. For tracepoint and struct_ops programs, the struct task_struct
passed to a program handler will always be trusted, so it will always be
safe to call bpf_task_acquire() with any task passed to a program.
Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL,
as it is possible that the task has exited by the time the program is
invoked, even if the pointer is still currently valid because the main
kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks
should never be passed as an argument to the any program handlers, so it
should not be relevant.
The second question is whether it's safe to use RCU to access a task
that was acquired with bpf_task_acquire(), and stored in a map. Because
bpf_task_acquire() now uses task->rcu_users, it follows that if the task
is present in the map, that it must have had at least one
task->rcu_users refcount by the time the current RCU cs was started.
Therefore, it's safe to access that task until the end of the current
RCU cs.
With all that said, this patch makes struct task_struct is an
RCU-protected object. In doing so, we also change bpf_task_acquire() to
be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as
necessary. A subsequent patch will remove bpf_task_kptr_get(), and
bpf_task_acquire_not_zero() respectively.
Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-31 14:57:31 -05:00
|
|
|
if (!acquired)
|
|
|
|
return 0;
|
|
|
|
|
2022-11-19 23:10:04 -06:00
|
|
|
bpf_task_release(acquired);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
SEC("kretprobe/free_task")
|
bpf: Make struct task_struct an RCU-safe type
struct task_struct objects are a bit interesting in terms of how their
lifetime is protected by refcounts. task structs have two refcount
fields:
1. refcount_t usage: Protects the memory backing the task struct. When
this refcount drops to 0, the task is immediately freed, without
waiting for an RCU grace period to elapse. This is the field that
most callers in the kernel currently use to ensure that a task
remains valid while it's being referenced, and is what's currently
tracked with bpf_task_acquire() and bpf_task_release().
2. refcount_t rcu_users: A refcount field which, when it drops to 0,
schedules an RCU callback that drops a reference held on the 'usage'
field above (which is acquired when the task is first created). This
field therefore provides a form of RCU protection on the task by
ensuring that at least one 'usage' refcount will be held until an RCU
grace period has elapsed. The qualifier "a form of" is important
here, as a task can remain valid after task->rcu_users has dropped to
0 and the subsequent RCU gp has elapsed.
In terms of BPF, we want to use task->rcu_users to protect tasks that
function as referenced kptrs, and to allow tasks stored as referenced
kptrs in maps to be accessed with RCU protection.
Let's first determine whether we can safely use task->rcu_users to
protect tasks stored in maps. All of the bpf_task* kfuncs can only be
called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program
types. For tracepoint and struct_ops programs, the struct task_struct
passed to a program handler will always be trusted, so it will always be
safe to call bpf_task_acquire() with any task passed to a program.
Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL,
as it is possible that the task has exited by the time the program is
invoked, even if the pointer is still currently valid because the main
kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks
should never be passed as an argument to the any program handlers, so it
should not be relevant.
The second question is whether it's safe to use RCU to access a task
that was acquired with bpf_task_acquire(), and stored in a map. Because
bpf_task_acquire() now uses task->rcu_users, it follows that if the task
is present in the map, that it must have had at least one
task->rcu_users refcount by the time the current RCU cs was started.
Therefore, it's safe to access that task until the end of the current
RCU cs.
With all that said, this patch makes struct task_struct is an
RCU-protected object. In doing so, we also change bpf_task_acquire() to
be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as
necessary. A subsequent patch will remove bpf_task_kptr_get(), and
bpf_task_acquire_not_zero() respectively.
Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-31 14:57:31 -05:00
|
|
|
__failure __msg("calling kernel function bpf_task_acquire is not allowed")
|
2022-11-19 23:10:04 -06:00
|
|
|
int BPF_PROG(task_kfunc_acquire_unsafe_kretprobe, struct task_struct *task, u64 clone_flags)
|
|
|
|
{
|
|
|
|
struct task_struct *acquired;
|
|
|
|
|
bpf: Make struct task_struct an RCU-safe type
struct task_struct objects are a bit interesting in terms of how their
lifetime is protected by refcounts. task structs have two refcount
fields:
1. refcount_t usage: Protects the memory backing the task struct. When
this refcount drops to 0, the task is immediately freed, without
waiting for an RCU grace period to elapse. This is the field that
most callers in the kernel currently use to ensure that a task
remains valid while it's being referenced, and is what's currently
tracked with bpf_task_acquire() and bpf_task_release().
2. refcount_t rcu_users: A refcount field which, when it drops to 0,
schedules an RCU callback that drops a reference held on the 'usage'
field above (which is acquired when the task is first created). This
field therefore provides a form of RCU protection on the task by
ensuring that at least one 'usage' refcount will be held until an RCU
grace period has elapsed. The qualifier "a form of" is important
here, as a task can remain valid after task->rcu_users has dropped to
0 and the subsequent RCU gp has elapsed.
In terms of BPF, we want to use task->rcu_users to protect tasks that
function as referenced kptrs, and to allow tasks stored as referenced
kptrs in maps to be accessed with RCU protection.
Let's first determine whether we can safely use task->rcu_users to
protect tasks stored in maps. All of the bpf_task* kfuncs can only be
called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program
types. For tracepoint and struct_ops programs, the struct task_struct
passed to a program handler will always be trusted, so it will always be
safe to call bpf_task_acquire() with any task passed to a program.
Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL,
as it is possible that the task has exited by the time the program is
invoked, even if the pointer is still currently valid because the main
kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks
should never be passed as an argument to the any program handlers, so it
should not be relevant.
The second question is whether it's safe to use RCU to access a task
that was acquired with bpf_task_acquire(), and stored in a map. Because
bpf_task_acquire() now uses task->rcu_users, it follows that if the task
is present in the map, that it must have had at least one
task->rcu_users refcount by the time the current RCU cs was started.
Therefore, it's safe to access that task until the end of the current
RCU cs.
With all that said, this patch makes struct task_struct is an
RCU-protected object. In doing so, we also change bpf_task_acquire() to
be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as
necessary. A subsequent patch will remove bpf_task_kptr_get(), and
bpf_task_acquire_not_zero() respectively.
Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-31 14:57:31 -05:00
|
|
|
/* Can't call bpf_task_acquire() or bpf_task_release() in an untrusted prog. */
|
2022-11-19 23:10:04 -06:00
|
|
|
acquired = bpf_task_acquire(task);
|
bpf: Make struct task_struct an RCU-safe type
struct task_struct objects are a bit interesting in terms of how their
lifetime is protected by refcounts. task structs have two refcount
fields:
1. refcount_t usage: Protects the memory backing the task struct. When
this refcount drops to 0, the task is immediately freed, without
waiting for an RCU grace period to elapse. This is the field that
most callers in the kernel currently use to ensure that a task
remains valid while it's being referenced, and is what's currently
tracked with bpf_task_acquire() and bpf_task_release().
2. refcount_t rcu_users: A refcount field which, when it drops to 0,
schedules an RCU callback that drops a reference held on the 'usage'
field above (which is acquired when the task is first created). This
field therefore provides a form of RCU protection on the task by
ensuring that at least one 'usage' refcount will be held until an RCU
grace period has elapsed. The qualifier "a form of" is important
here, as a task can remain valid after task->rcu_users has dropped to
0 and the subsequent RCU gp has elapsed.
In terms of BPF, we want to use task->rcu_users to protect tasks that
function as referenced kptrs, and to allow tasks stored as referenced
kptrs in maps to be accessed with RCU protection.
Let's first determine whether we can safely use task->rcu_users to
protect tasks stored in maps. All of the bpf_task* kfuncs can only be
called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program
types. For tracepoint and struct_ops programs, the struct task_struct
passed to a program handler will always be trusted, so it will always be
safe to call bpf_task_acquire() with any task passed to a program.
Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL,
as it is possible that the task has exited by the time the program is
invoked, even if the pointer is still currently valid because the main
kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks
should never be passed as an argument to the any program handlers, so it
should not be relevant.
The second question is whether it's safe to use RCU to access a task
that was acquired with bpf_task_acquire(), and stored in a map. Because
bpf_task_acquire() now uses task->rcu_users, it follows that if the task
is present in the map, that it must have had at least one
task->rcu_users refcount by the time the current RCU cs was started.
Therefore, it's safe to access that task until the end of the current
RCU cs.
With all that said, this patch makes struct task_struct is an
RCU-protected object. In doing so, we also change bpf_task_acquire() to
be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as
necessary. A subsequent patch will remove bpf_task_kptr_get(), and
bpf_task_acquire_not_zero() respectively.
Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-31 14:57:31 -05:00
|
|
|
if (!acquired)
|
|
|
|
return 0;
|
2022-11-19 23:10:04 -06:00
|
|
|
bpf_task_release(acquired);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
bpf: Make struct task_struct an RCU-safe type
struct task_struct objects are a bit interesting in terms of how their
lifetime is protected by refcounts. task structs have two refcount
fields:
1. refcount_t usage: Protects the memory backing the task struct. When
this refcount drops to 0, the task is immediately freed, without
waiting for an RCU grace period to elapse. This is the field that
most callers in the kernel currently use to ensure that a task
remains valid while it's being referenced, and is what's currently
tracked with bpf_task_acquire() and bpf_task_release().
2. refcount_t rcu_users: A refcount field which, when it drops to 0,
schedules an RCU callback that drops a reference held on the 'usage'
field above (which is acquired when the task is first created). This
field therefore provides a form of RCU protection on the task by
ensuring that at least one 'usage' refcount will be held until an RCU
grace period has elapsed. The qualifier "a form of" is important
here, as a task can remain valid after task->rcu_users has dropped to
0 and the subsequent RCU gp has elapsed.
In terms of BPF, we want to use task->rcu_users to protect tasks that
function as referenced kptrs, and to allow tasks stored as referenced
kptrs in maps to be accessed with RCU protection.
Let's first determine whether we can safely use task->rcu_users to
protect tasks stored in maps. All of the bpf_task* kfuncs can only be
called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program
types. For tracepoint and struct_ops programs, the struct task_struct
passed to a program handler will always be trusted, so it will always be
safe to call bpf_task_acquire() with any task passed to a program.
Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL,
as it is possible that the task has exited by the time the program is
invoked, even if the pointer is still currently valid because the main
kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks
should never be passed as an argument to the any program handlers, so it
should not be relevant.
The second question is whether it's safe to use RCU to access a task
that was acquired with bpf_task_acquire(), and stored in a map. Because
bpf_task_acquire() now uses task->rcu_users, it follows that if the task
is present in the map, that it must have had at least one
task->rcu_users refcount by the time the current RCU cs was started.
Therefore, it's safe to access that task until the end of the current
RCU cs.
With all that said, this patch makes struct task_struct is an
RCU-protected object. In doing so, we also change bpf_task_acquire() to
be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as
necessary. A subsequent patch will remove bpf_task_kptr_get(), and
bpf_task_acquire_not_zero() respectively.
Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-31 14:57:31 -05:00
|
|
|
SEC("kretprobe/free_task")
|
|
|
|
__failure __msg("calling kernel function bpf_task_acquire is not allowed")
|
|
|
|
int BPF_PROG(task_kfunc_acquire_unsafe_kretprobe_rcu, struct task_struct *task, u64 clone_flags)
|
2022-11-19 23:10:04 -06:00
|
|
|
{
|
|
|
|
struct task_struct *acquired;
|
|
|
|
|
bpf: Make struct task_struct an RCU-safe type
struct task_struct objects are a bit interesting in terms of how their
lifetime is protected by refcounts. task structs have two refcount
fields:
1. refcount_t usage: Protects the memory backing the task struct. When
this refcount drops to 0, the task is immediately freed, without
waiting for an RCU grace period to elapse. This is the field that
most callers in the kernel currently use to ensure that a task
remains valid while it's being referenced, and is what's currently
tracked with bpf_task_acquire() and bpf_task_release().
2. refcount_t rcu_users: A refcount field which, when it drops to 0,
schedules an RCU callback that drops a reference held on the 'usage'
field above (which is acquired when the task is first created). This
field therefore provides a form of RCU protection on the task by
ensuring that at least one 'usage' refcount will be held until an RCU
grace period has elapsed. The qualifier "a form of" is important
here, as a task can remain valid after task->rcu_users has dropped to
0 and the subsequent RCU gp has elapsed.
In terms of BPF, we want to use task->rcu_users to protect tasks that
function as referenced kptrs, and to allow tasks stored as referenced
kptrs in maps to be accessed with RCU protection.
Let's first determine whether we can safely use task->rcu_users to
protect tasks stored in maps. All of the bpf_task* kfuncs can only be
called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program
types. For tracepoint and struct_ops programs, the struct task_struct
passed to a program handler will always be trusted, so it will always be
safe to call bpf_task_acquire() with any task passed to a program.
Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL,
as it is possible that the task has exited by the time the program is
invoked, even if the pointer is still currently valid because the main
kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks
should never be passed as an argument to the any program handlers, so it
should not be relevant.
The second question is whether it's safe to use RCU to access a task
that was acquired with bpf_task_acquire(), and stored in a map. Because
bpf_task_acquire() now uses task->rcu_users, it follows that if the task
is present in the map, that it must have had at least one
task->rcu_users refcount by the time the current RCU cs was started.
Therefore, it's safe to access that task until the end of the current
RCU cs.
With all that said, this patch makes struct task_struct is an
RCU-protected object. In doing so, we also change bpf_task_acquire() to
be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as
necessary. A subsequent patch will remove bpf_task_kptr_get(), and
bpf_task_acquire_not_zero() respectively.
Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-31 14:57:31 -05:00
|
|
|
bpf_rcu_read_lock();
|
|
|
|
if (!task) {
|
|
|
|
bpf_rcu_read_unlock();
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
/* Can't call bpf_task_acquire() or bpf_task_release() in an untrusted prog. */
|
|
|
|
acquired = bpf_task_acquire(task);
|
|
|
|
if (acquired)
|
|
|
|
bpf_task_release(acquired);
|
|
|
|
bpf_rcu_read_unlock();
|
2022-11-19 23:10:04 -06:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
SEC("tp_btf/task_newtask")
|
bpf: Disallow NULLable pointers for trusted kfuncs
KF_TRUSTED_ARGS kfuncs currently have a subtle and insidious bug in
validating pointers to scalars. Say that you have a kfunc like the
following, which takes an array as the first argument:
bool bpf_cpumask_empty(const struct cpumask *cpumask)
{
return cpumask_empty(cpumask);
}
...
BTF_ID_FLAGS(func, bpf_cpumask_empty, KF_TRUSTED_ARGS)
...
If a BPF program were to invoke the kfunc with a NULL argument, it would
crash the kernel. The reason is that struct cpumask is defined as a
bitmap, which is itself defined as an array, and is accessed as a memory
address by bitmap operations. So when the verifier analyzes the
register, it interprets it as a pointer to a scalar struct, which is an
array of size 8. check_mem_reg() then sees that the register is NULL and
returns 0, and the kfunc crashes when it passes it down to the cpumask
wrappers.
To fix this, this patch adds a check for KF_ARG_PTR_TO_MEM which
verifies that the register doesn't contain a possibly-NULL pointer if
the kfunc is KF_TRUSTED_ARGS.
Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230125143816.721952-2-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-01-25 08:38:10 -06:00
|
|
|
__failure __msg("Possibly NULL pointer passed to trusted arg0")
|
2022-11-19 23:10:04 -06:00
|
|
|
int BPF_PROG(task_kfunc_acquire_null, struct task_struct *task, u64 clone_flags)
|
|
|
|
{
|
|
|
|
struct task_struct *acquired;
|
|
|
|
|
|
|
|
/* Can't invoke bpf_task_acquire() on a NULL pointer. */
|
|
|
|
acquired = bpf_task_acquire(NULL);
|
|
|
|
if (!acquired)
|
|
|
|
return 0;
|
|
|
|
bpf_task_release(acquired);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
SEC("tp_btf/task_newtask")
|
2023-01-19 20:18:44 -06:00
|
|
|
__failure __msg("Unreleased reference")
|
2022-11-19 23:10:04 -06:00
|
|
|
int BPF_PROG(task_kfunc_acquire_unreleased, struct task_struct *task, u64 clone_flags)
|
|
|
|
{
|
|
|
|
struct task_struct *acquired;
|
|
|
|
|
|
|
|
acquired = bpf_task_acquire(task);
|
|
|
|
|
|
|
|
/* Acquired task is never released. */
|
2023-03-08 21:40:14 -08:00
|
|
|
__sink(acquired);
|
2022-11-19 23:10:04 -06:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
SEC("tp_btf/task_newtask")
|
2023-01-19 20:18:44 -06:00
|
|
|
__failure __msg("Unreleased reference")
|
2022-11-19 23:10:04 -06:00
|
|
|
int BPF_PROG(task_kfunc_xchg_unreleased, struct task_struct *task, u64 clone_flags)
|
|
|
|
{
|
|
|
|
struct task_struct *kptr;
|
|
|
|
struct __tasks_kfunc_map_value *v;
|
|
|
|
|
|
|
|
v = insert_lookup_task(task);
|
|
|
|
if (!v)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
kptr = bpf_kptr_xchg(&v->task, NULL);
|
|
|
|
if (!kptr)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Kptr retrieved from map is never released. */
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
bpf: Make struct task_struct an RCU-safe type
struct task_struct objects are a bit interesting in terms of how their
lifetime is protected by refcounts. task structs have two refcount
fields:
1. refcount_t usage: Protects the memory backing the task struct. When
this refcount drops to 0, the task is immediately freed, without
waiting for an RCU grace period to elapse. This is the field that
most callers in the kernel currently use to ensure that a task
remains valid while it's being referenced, and is what's currently
tracked with bpf_task_acquire() and bpf_task_release().
2. refcount_t rcu_users: A refcount field which, when it drops to 0,
schedules an RCU callback that drops a reference held on the 'usage'
field above (which is acquired when the task is first created). This
field therefore provides a form of RCU protection on the task by
ensuring that at least one 'usage' refcount will be held until an RCU
grace period has elapsed. The qualifier "a form of" is important
here, as a task can remain valid after task->rcu_users has dropped to
0 and the subsequent RCU gp has elapsed.
In terms of BPF, we want to use task->rcu_users to protect tasks that
function as referenced kptrs, and to allow tasks stored as referenced
kptrs in maps to be accessed with RCU protection.
Let's first determine whether we can safely use task->rcu_users to
protect tasks stored in maps. All of the bpf_task* kfuncs can only be
called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program
types. For tracepoint and struct_ops programs, the struct task_struct
passed to a program handler will always be trusted, so it will always be
safe to call bpf_task_acquire() with any task passed to a program.
Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL,
as it is possible that the task has exited by the time the program is
invoked, even if the pointer is still currently valid because the main
kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks
should never be passed as an argument to the any program handlers, so it
should not be relevant.
The second question is whether it's safe to use RCU to access a task
that was acquired with bpf_task_acquire(), and stored in a map. Because
bpf_task_acquire() now uses task->rcu_users, it follows that if the task
is present in the map, that it must have had at least one
task->rcu_users refcount by the time the current RCU cs was started.
Therefore, it's safe to access that task until the end of the current
RCU cs.
With all that said, this patch makes struct task_struct is an
RCU-protected object. In doing so, we also change bpf_task_acquire() to
be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as
necessary. A subsequent patch will remove bpf_task_kptr_get(), and
bpf_task_acquire_not_zero() respectively.
Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-31 14:57:31 -05:00
|
|
|
SEC("tp_btf/task_newtask")
|
|
|
|
__failure __msg("Possibly NULL pointer passed to trusted arg0")
|
|
|
|
int BPF_PROG(task_kfunc_acquire_release_no_null_check, struct task_struct *task, u64 clone_flags)
|
|
|
|
{
|
|
|
|
struct task_struct *acquired;
|
|
|
|
|
|
|
|
acquired = bpf_task_acquire(task);
|
|
|
|
/* Can't invoke bpf_task_release() on an acquired task without a NULL check. */
|
|
|
|
bpf_task_release(acquired);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-11-19 23:10:04 -06:00
|
|
|
SEC("tp_btf/task_newtask")
|
bpf: Treat KF_RELEASE kfuncs as KF_TRUSTED_ARGS
KF_RELEASE kfuncs are not currently treated as having KF_TRUSTED_ARGS,
even though they have a superset of the requirements of KF_TRUSTED_ARGS.
Like KF_TRUSTED_ARGS, KF_RELEASE kfuncs require a 0-offset argument, and
don't allow NULL-able arguments. Unlike KF_TRUSTED_ARGS which require
_either_ an argument with ref_obj_id > 0, _or_ (ref->type &
BPF_REG_TRUSTED_MODIFIERS) (and no unsafe modifiers allowed), KF_RELEASE
only allows for ref_obj_id > 0. Because KF_RELEASE today doesn't
automatically imply KF_TRUSTED_ARGS, some of these requirements are
enforced in different ways that can make the behavior of the verifier
feel unpredictable. For example, a KF_RELEASE kfunc with a NULL-able
argument will currently fail in the verifier with a message like, "arg#0
is ptr_or_null_ expected ptr_ or socket" rather than "Possibly NULL
pointer passed to trusted arg0". Our intention is the same, but the
semantics are different due to implemenetation details that kfunc authors
and BPF program writers should not need to care about.
Let's make the behavior of the verifier more consistent and intuitive by
having KF_RELEASE kfuncs imply the presence of KF_TRUSTED_ARGS. Our
eventual goal is to have all kfuncs assume KF_TRUSTED_ARGS by default
anyways, so this takes us a step in that direction.
Note that it does not make sense to assume KF_TRUSTED_ARGS for all
KF_ACQUIRE kfuncs. KF_ACQUIRE kfuncs can have looser semantics than
KF_RELEASE, with e.g. KF_RCU | KF_RET_NULL. We may want to have
KF_ACQUIRE imply KF_TRUSTED_ARGS _unless_ KF_RCU is specified, but that
can be left to another patch set, and there are no such subtleties to
address for KF_RELEASE.
Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230325213144.486885-4-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-25 16:31:46 -05:00
|
|
|
__failure __msg("Possibly NULL pointer passed to trusted arg0")
|
2022-11-19 23:10:04 -06:00
|
|
|
int BPF_PROG(task_kfunc_release_untrusted, struct task_struct *task, u64 clone_flags)
|
|
|
|
{
|
|
|
|
struct __tasks_kfunc_map_value *v;
|
|
|
|
|
|
|
|
v = insert_lookup_task(task);
|
|
|
|
if (!v)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Can't invoke bpf_task_release() on an untrusted pointer. */
|
|
|
|
bpf_task_release(v->task);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
SEC("tp_btf/task_newtask")
|
2023-01-19 20:18:44 -06:00
|
|
|
__failure __msg("arg#0 pointer type STRUCT task_struct must point")
|
2022-11-19 23:10:04 -06:00
|
|
|
int BPF_PROG(task_kfunc_release_fp, struct task_struct *task, u64 clone_flags)
|
|
|
|
{
|
|
|
|
struct task_struct *acquired = (struct task_struct *)&clone_flags;
|
|
|
|
|
|
|
|
/* Cannot release random frame pointer. */
|
|
|
|
bpf_task_release(acquired);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
SEC("tp_btf/task_newtask")
|
bpf: Treat KF_RELEASE kfuncs as KF_TRUSTED_ARGS
KF_RELEASE kfuncs are not currently treated as having KF_TRUSTED_ARGS,
even though they have a superset of the requirements of KF_TRUSTED_ARGS.
Like KF_TRUSTED_ARGS, KF_RELEASE kfuncs require a 0-offset argument, and
don't allow NULL-able arguments. Unlike KF_TRUSTED_ARGS which require
_either_ an argument with ref_obj_id > 0, _or_ (ref->type &
BPF_REG_TRUSTED_MODIFIERS) (and no unsafe modifiers allowed), KF_RELEASE
only allows for ref_obj_id > 0. Because KF_RELEASE today doesn't
automatically imply KF_TRUSTED_ARGS, some of these requirements are
enforced in different ways that can make the behavior of the verifier
feel unpredictable. For example, a KF_RELEASE kfunc with a NULL-able
argument will currently fail in the verifier with a message like, "arg#0
is ptr_or_null_ expected ptr_ or socket" rather than "Possibly NULL
pointer passed to trusted arg0". Our intention is the same, but the
semantics are different due to implemenetation details that kfunc authors
and BPF program writers should not need to care about.
Let's make the behavior of the verifier more consistent and intuitive by
having KF_RELEASE kfuncs imply the presence of KF_TRUSTED_ARGS. Our
eventual goal is to have all kfuncs assume KF_TRUSTED_ARGS by default
anyways, so this takes us a step in that direction.
Note that it does not make sense to assume KF_TRUSTED_ARGS for all
KF_ACQUIRE kfuncs. KF_ACQUIRE kfuncs can have looser semantics than
KF_RELEASE, with e.g. KF_RCU | KF_RET_NULL. We may want to have
KF_ACQUIRE imply KF_TRUSTED_ARGS _unless_ KF_RCU is specified, but that
can be left to another patch set, and there are no such subtleties to
address for KF_RELEASE.
Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230325213144.486885-4-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-25 16:31:46 -05:00
|
|
|
__failure __msg("Possibly NULL pointer passed to trusted arg0")
|
2022-11-19 23:10:04 -06:00
|
|
|
int BPF_PROG(task_kfunc_release_null, struct task_struct *task, u64 clone_flags)
|
|
|
|
{
|
|
|
|
struct __tasks_kfunc_map_value local, *v;
|
|
|
|
long status;
|
|
|
|
struct task_struct *acquired, *old;
|
|
|
|
s32 pid;
|
|
|
|
|
|
|
|
status = bpf_probe_read_kernel(&pid, sizeof(pid), &task->pid);
|
|
|
|
if (status)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
local.task = NULL;
|
|
|
|
status = bpf_map_update_elem(&__tasks_kfunc_map, &pid, &local, BPF_NOEXIST);
|
|
|
|
if (status)
|
|
|
|
return status;
|
|
|
|
|
|
|
|
v = bpf_map_lookup_elem(&__tasks_kfunc_map, &pid);
|
|
|
|
if (!v)
|
|
|
|
return -ENOENT;
|
|
|
|
|
|
|
|
acquired = bpf_task_acquire(task);
|
bpf: Make struct task_struct an RCU-safe type
struct task_struct objects are a bit interesting in terms of how their
lifetime is protected by refcounts. task structs have two refcount
fields:
1. refcount_t usage: Protects the memory backing the task struct. When
this refcount drops to 0, the task is immediately freed, without
waiting for an RCU grace period to elapse. This is the field that
most callers in the kernel currently use to ensure that a task
remains valid while it's being referenced, and is what's currently
tracked with bpf_task_acquire() and bpf_task_release().
2. refcount_t rcu_users: A refcount field which, when it drops to 0,
schedules an RCU callback that drops a reference held on the 'usage'
field above (which is acquired when the task is first created). This
field therefore provides a form of RCU protection on the task by
ensuring that at least one 'usage' refcount will be held until an RCU
grace period has elapsed. The qualifier "a form of" is important
here, as a task can remain valid after task->rcu_users has dropped to
0 and the subsequent RCU gp has elapsed.
In terms of BPF, we want to use task->rcu_users to protect tasks that
function as referenced kptrs, and to allow tasks stored as referenced
kptrs in maps to be accessed with RCU protection.
Let's first determine whether we can safely use task->rcu_users to
protect tasks stored in maps. All of the bpf_task* kfuncs can only be
called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program
types. For tracepoint and struct_ops programs, the struct task_struct
passed to a program handler will always be trusted, so it will always be
safe to call bpf_task_acquire() with any task passed to a program.
Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL,
as it is possible that the task has exited by the time the program is
invoked, even if the pointer is still currently valid because the main
kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks
should never be passed as an argument to the any program handlers, so it
should not be relevant.
The second question is whether it's safe to use RCU to access a task
that was acquired with bpf_task_acquire(), and stored in a map. Because
bpf_task_acquire() now uses task->rcu_users, it follows that if the task
is present in the map, that it must have had at least one
task->rcu_users refcount by the time the current RCU cs was started.
Therefore, it's safe to access that task until the end of the current
RCU cs.
With all that said, this patch makes struct task_struct is an
RCU-protected object. In doing so, we also change bpf_task_acquire() to
be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as
necessary. A subsequent patch will remove bpf_task_kptr_get(), and
bpf_task_acquire_not_zero() respectively.
Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-31 14:57:31 -05:00
|
|
|
if (!acquired)
|
|
|
|
return -EEXIST;
|
2022-11-19 23:10:04 -06:00
|
|
|
|
|
|
|
old = bpf_kptr_xchg(&v->task, acquired);
|
|
|
|
|
|
|
|
/* old cannot be passed to bpf_task_release() without a NULL check. */
|
|
|
|
bpf_task_release(old);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
SEC("tp_btf/task_newtask")
|
2023-01-19 20:18:44 -06:00
|
|
|
__failure __msg("release kernel function bpf_task_release expects")
|
2022-11-19 23:10:04 -06:00
|
|
|
int BPF_PROG(task_kfunc_release_unacquired, struct task_struct *task, u64 clone_flags)
|
|
|
|
{
|
|
|
|
/* Cannot release trusted task pointer which was not acquired. */
|
|
|
|
bpf_task_release(task);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2022-11-22 08:53:00 -06:00
|
|
|
|
|
|
|
SEC("tp_btf/task_newtask")
|
bpf: Treat KF_RELEASE kfuncs as KF_TRUSTED_ARGS
KF_RELEASE kfuncs are not currently treated as having KF_TRUSTED_ARGS,
even though they have a superset of the requirements of KF_TRUSTED_ARGS.
Like KF_TRUSTED_ARGS, KF_RELEASE kfuncs require a 0-offset argument, and
don't allow NULL-able arguments. Unlike KF_TRUSTED_ARGS which require
_either_ an argument with ref_obj_id > 0, _or_ (ref->type &
BPF_REG_TRUSTED_MODIFIERS) (and no unsafe modifiers allowed), KF_RELEASE
only allows for ref_obj_id > 0. Because KF_RELEASE today doesn't
automatically imply KF_TRUSTED_ARGS, some of these requirements are
enforced in different ways that can make the behavior of the verifier
feel unpredictable. For example, a KF_RELEASE kfunc with a NULL-able
argument will currently fail in the verifier with a message like, "arg#0
is ptr_or_null_ expected ptr_ or socket" rather than "Possibly NULL
pointer passed to trusted arg0". Our intention is the same, but the
semantics are different due to implemenetation details that kfunc authors
and BPF program writers should not need to care about.
Let's make the behavior of the verifier more consistent and intuitive by
having KF_RELEASE kfuncs imply the presence of KF_TRUSTED_ARGS. Our
eventual goal is to have all kfuncs assume KF_TRUSTED_ARGS by default
anyways, so this takes us a step in that direction.
Note that it does not make sense to assume KF_TRUSTED_ARGS for all
KF_ACQUIRE kfuncs. KF_ACQUIRE kfuncs can have looser semantics than
KF_RELEASE, with e.g. KF_RCU | KF_RET_NULL. We may want to have
KF_ACQUIRE imply KF_TRUSTED_ARGS _unless_ KF_RCU is specified, but that
can be left to another patch set, and there are no such subtleties to
address for KF_RELEASE.
Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230325213144.486885-4-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-25 16:31:46 -05:00
|
|
|
__failure __msg("Possibly NULL pointer passed to trusted arg0")
|
2022-11-22 08:53:00 -06:00
|
|
|
int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 clone_flags)
|
|
|
|
{
|
|
|
|
struct task_struct *acquired;
|
|
|
|
|
|
|
|
acquired = bpf_task_from_pid(task->pid);
|
|
|
|
|
|
|
|
/* Releasing bpf_task_from_pid() lookup without a NULL check. */
|
|
|
|
bpf_task_release(acquired);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2022-12-03 12:49:54 -08:00
|
|
|
|
|
|
|
SEC("lsm/task_free")
|
2023-12-14 17:13:26 -08:00
|
|
|
__failure __msg("R1 must be a rcu pointer")
|
2022-12-03 12:49:54 -08:00
|
|
|
int BPF_PROG(task_kfunc_from_lsm_task_free, struct task_struct *task)
|
|
|
|
{
|
|
|
|
struct task_struct *acquired;
|
|
|
|
|
|
|
|
/* the argument of lsm task_free hook is untrusted. */
|
|
|
|
acquired = bpf_task_acquire(task);
|
bpf: Make struct task_struct an RCU-safe type
struct task_struct objects are a bit interesting in terms of how their
lifetime is protected by refcounts. task structs have two refcount
fields:
1. refcount_t usage: Protects the memory backing the task struct. When
this refcount drops to 0, the task is immediately freed, without
waiting for an RCU grace period to elapse. This is the field that
most callers in the kernel currently use to ensure that a task
remains valid while it's being referenced, and is what's currently
tracked with bpf_task_acquire() and bpf_task_release().
2. refcount_t rcu_users: A refcount field which, when it drops to 0,
schedules an RCU callback that drops a reference held on the 'usage'
field above (which is acquired when the task is first created). This
field therefore provides a form of RCU protection on the task by
ensuring that at least one 'usage' refcount will be held until an RCU
grace period has elapsed. The qualifier "a form of" is important
here, as a task can remain valid after task->rcu_users has dropped to
0 and the subsequent RCU gp has elapsed.
In terms of BPF, we want to use task->rcu_users to protect tasks that
function as referenced kptrs, and to allow tasks stored as referenced
kptrs in maps to be accessed with RCU protection.
Let's first determine whether we can safely use task->rcu_users to
protect tasks stored in maps. All of the bpf_task* kfuncs can only be
called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program
types. For tracepoint and struct_ops programs, the struct task_struct
passed to a program handler will always be trusted, so it will always be
safe to call bpf_task_acquire() with any task passed to a program.
Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL,
as it is possible that the task has exited by the time the program is
invoked, even if the pointer is still currently valid because the main
kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks
should never be passed as an argument to the any program handlers, so it
should not be relevant.
The second question is whether it's safe to use RCU to access a task
that was acquired with bpf_task_acquire(), and stored in a map. Because
bpf_task_acquire() now uses task->rcu_users, it follows that if the task
is present in the map, that it must have had at least one
task->rcu_users refcount by the time the current RCU cs was started.
Therefore, it's safe to access that task until the end of the current
RCU cs.
With all that said, this patch makes struct task_struct is an
RCU-protected object. In doing so, we also change bpf_task_acquire() to
be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as
necessary. A subsequent patch will remove bpf_task_kptr_get(), and
bpf_task_acquire_not_zero() respectively.
Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-31 14:57:31 -05:00
|
|
|
if (!acquired)
|
|
|
|
return 0;
|
|
|
|
|
2022-12-03 12:49:54 -08:00
|
|
|
bpf_task_release(acquired);
|
|
|
|
return 0;
|
|
|
|
}
|
2023-03-13 16:58:45 -07:00
|
|
|
|
|
|
|
SEC("tp_btf/task_newtask")
|
|
|
|
__failure __msg("access beyond the end of member comm")
|
|
|
|
int BPF_PROG(task_access_comm1, struct task_struct *task, u64 clone_flags)
|
|
|
|
{
|
|
|
|
bpf_strncmp(task->comm, 17, "foo");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
SEC("tp_btf/task_newtask")
|
|
|
|
__failure __msg("access beyond the end of member comm")
|
|
|
|
int BPF_PROG(task_access_comm2, struct task_struct *task, u64 clone_flags)
|
|
|
|
{
|
|
|
|
bpf_strncmp(task->comm + 1, 16, "foo");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
SEC("tp_btf/task_newtask")
|
|
|
|
__failure __msg("write into memory")
|
|
|
|
int BPF_PROG(task_access_comm3, struct task_struct *task, u64 clone_flags)
|
|
|
|
{
|
|
|
|
bpf_probe_read_kernel(task->comm, 16, task->comm);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
SEC("fentry/__set_task_comm")
|
|
|
|
__failure __msg("R1 type=ptr_ expected")
|
|
|
|
int BPF_PROG(task_access_comm4, struct task_struct *task, const char *buf, bool exec)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* task->comm is a legacy ptr_to_btf_id. The verifier cannot guarantee
|
|
|
|
* its safety. Hence it cannot be accessed with normal load insns.
|
|
|
|
*/
|
|
|
|
bpf_strncmp(task->comm, 16, "foo");
|
|
|
|
return 0;
|
|
|
|
}
|
bpf: Make struct task_struct an RCU-safe type
struct task_struct objects are a bit interesting in terms of how their
lifetime is protected by refcounts. task structs have two refcount
fields:
1. refcount_t usage: Protects the memory backing the task struct. When
this refcount drops to 0, the task is immediately freed, without
waiting for an RCU grace period to elapse. This is the field that
most callers in the kernel currently use to ensure that a task
remains valid while it's being referenced, and is what's currently
tracked with bpf_task_acquire() and bpf_task_release().
2. refcount_t rcu_users: A refcount field which, when it drops to 0,
schedules an RCU callback that drops a reference held on the 'usage'
field above (which is acquired when the task is first created). This
field therefore provides a form of RCU protection on the task by
ensuring that at least one 'usage' refcount will be held until an RCU
grace period has elapsed. The qualifier "a form of" is important
here, as a task can remain valid after task->rcu_users has dropped to
0 and the subsequent RCU gp has elapsed.
In terms of BPF, we want to use task->rcu_users to protect tasks that
function as referenced kptrs, and to allow tasks stored as referenced
kptrs in maps to be accessed with RCU protection.
Let's first determine whether we can safely use task->rcu_users to
protect tasks stored in maps. All of the bpf_task* kfuncs can only be
called from tracepoint, struct_ops, or BPF_PROG_TYPE_SCHED_CLS, program
types. For tracepoint and struct_ops programs, the struct task_struct
passed to a program handler will always be trusted, so it will always be
safe to call bpf_task_acquire() with any task passed to a program.
Note, however, that we must update bpf_task_acquire() to be KF_RET_NULL,
as it is possible that the task has exited by the time the program is
invoked, even if the pointer is still currently valid because the main
kernel holds a task->usage refcount. For BPF_PROG_TYPE_SCHED_CLS, tasks
should never be passed as an argument to the any program handlers, so it
should not be relevant.
The second question is whether it's safe to use RCU to access a task
that was acquired with bpf_task_acquire(), and stored in a map. Because
bpf_task_acquire() now uses task->rcu_users, it follows that if the task
is present in the map, that it must have had at least one
task->rcu_users refcount by the time the current RCU cs was started.
Therefore, it's safe to access that task until the end of the current
RCU cs.
With all that said, this patch makes struct task_struct is an
RCU-protected object. In doing so, we also change bpf_task_acquire() to
be KF_ACQUIRE | KF_RCU | KF_RET_NULL, and adjust any selftests as
necessary. A subsequent patch will remove bpf_task_kptr_get(), and
bpf_task_acquire_not_zero() respectively.
Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230331195733.699708-2-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2023-03-31 14:57:31 -05:00
|
|
|
|
|
|
|
SEC("tp_btf/task_newtask")
|
|
|
|
__failure __msg("R1 must be referenced or trusted")
|
|
|
|
int BPF_PROG(task_kfunc_release_in_map, struct task_struct *task, u64 clone_flags)
|
|
|
|
{
|
|
|
|
struct task_struct *local;
|
|
|
|
struct __tasks_kfunc_map_value *v;
|
|
|
|
|
|
|
|
if (tasks_kfunc_map_insert(task))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
v = tasks_kfunc_map_value_lookup(task);
|
|
|
|
if (!v)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
bpf_rcu_read_lock();
|
|
|
|
local = v->task;
|
|
|
|
if (!local) {
|
|
|
|
bpf_rcu_read_unlock();
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
/* Can't release a kptr that's still stored in a map. */
|
|
|
|
bpf_task_release(local);
|
|
|
|
bpf_rcu_read_unlock();
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|