linux/tools/testing/selftests/bpf/progs/iters.c

1438 lines
29 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
#include <stdbool.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
#include "bpf_compiler.h"
#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0]))
static volatile int zero = 0;
int my_pid;
int arr[256];
int small_arr[16] SEC(".data.small_arr");
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 10);
__type(key, int);
__type(value, int);
} amap SEC(".maps");
#ifdef REAL_TEST
#define MY_PID_GUARD() if (my_pid != (bpf_get_current_pid_tgid() >> 32)) return 0
#else
#define MY_PID_GUARD() ({ })
#endif
SEC("?raw_tp")
__failure __msg("math between map_value pointer and register with unbounded min value is not allowed")
int iter_err_unsafe_c_loop(const void *ctx)
{
struct bpf_iter_num it;
int *v, i = zero; /* obscure initial value of i */
MY_PID_GUARD();
bpf_iter_num_new(&it, 0, 1000);
while ((v = bpf_iter_num_next(&it))) {
i++;
}
bpf_iter_num_destroy(&it);
small_arr[i] = 123; /* invalid */
return 0;
}
SEC("?raw_tp")
__failure __msg("unbounded memory access")
int iter_err_unsafe_asm_loop(const void *ctx)
{
struct bpf_iter_num it;
MY_PID_GUARD();
asm volatile (
"r6 = %[zero];" /* iteration counter */
"r1 = %[it];" /* iterator state */
"r2 = 0;"
"r3 = 1000;"
"r4 = 1;"
"call %[bpf_iter_num_new];"
"loop:"
"r1 = %[it];"
"call %[bpf_iter_num_next];"
"if r0 == 0 goto out;"
"r6 += 1;"
"goto loop;"
"out:"
"r1 = %[it];"
"call %[bpf_iter_num_destroy];"
"r1 = %[small_arr];"
"r2 = r6;"
"r2 <<= 2;"
"r1 += r2;"
"*(u32 *)(r1 + 0) = r6;" /* invalid */
:
: [it]"r"(&it),
bpf: Use r constraint instead of p constraint in selftests Some of the BPF selftests use the "p" constraint in inline assembly snippets, for input operands for MOV (rN = rM) instructions. This is mainly done via the __imm_ptr macro defined in tools/testing/selftests/bpf/progs/bpf_misc.h: #define __imm_ptr(name) [name]"p"(&name) Example: int consume_first_item_only(void *ctx) { struct bpf_iter_num iter; asm volatile ( /* create iterator */ "r1 = %[iter];" [...] : : __imm_ptr(iter) : CLOBBERS); [...] } The "p" constraint is a tricky one. It is documented in the GCC manual section "Simple Constraints": An operand that is a valid memory address is allowed. This is for ``load address'' and ``push address'' instructions. p in the constraint must be accompanied by address_operand as the predicate in the match_operand. This predicate interprets the mode specified in the match_operand as the mode of the memory reference for which the address would be valid. There are two problems: 1. It is questionable whether that constraint was ever intended to be used in inline assembly templates, because its behavior really depends on compiler internals. A "memory address" is not the same than a "memory operand" or a "memory reference" (constraint "m"), and in fact its usage in the template above results in an error in both x86_64-linux-gnu and bpf-unkonwn-none: foo.c: In function ‘bar’: foo.c:6:3: error: invalid 'asm': invalid expression as operand 6 | asm volatile ("r1 = %[jorl]" : : [jorl]"p"(&jorl)); | ^~~ I would assume the same happens with aarch64, riscv, and most/all other targets in GCC, that do not accept operands of the form A + B that are not wrapped either in a const or in a memory reference. To avoid that error, the usage of the "p" constraint in internal GCC instruction templates is supposed to be complemented by the 'a' modifier, like in: asm volatile ("r1 = %a[jorl]" : : [jorl]"p"(&jorl)); Internally documented (in GCC's final.cc) as: %aN means expect operand N to be a memory address (not a memory reference!) and print a reference to that address. That works because when the modifier 'a' is found, GCC prints an "operand address", which is not the same than an "operand". But... 2. Even if we used the internal 'a' modifier (we shouldn't) the 'rN = rM' instruction really requires a register argument. In cases involving automatics, like in the examples above, we easily end with: bar: #APP r1 = r10-4 #NO_APP In other cases we could conceibly also end with a 64-bit label that may overflow the 32-bit immediate operand of `rN = imm32' instructions: r1 = foo All of which is clearly wrong. clang happens to do "the right thing" in the current usage of __imm_ptr in the BPF tests, because even with -O2 it seems to "reload" the fp-relative address of the automatic to a register like in: bar: r1 = r10 r1 += -4 #APP r1 = r1 #NO_APP Which is what GCC would generate with -O0. Whether this is by chance or by design, the compiler shouln't be expected to do that reload driven by the "p" constraint. This patch changes the usage of the "p" constraint in the BPF selftests macros to use the "r" constraint instead. If a register is what is required, we should let the compiler know. Previous discussion in bpf@vger: https://lore.kernel.org/bpf/87h6p5ebpb.fsf@oracle.com/T/#ef0df83d6975c34dff20bf0dd52e078f5b8ca2767 Tested in bpf-next master. No regressions. Signed-off-by: Jose E. Marchesi <jose.marchesi@oracle.com> Cc: Yonghong Song <yonghong.song@linux.dev> Cc: Eduard Zingerman <eddyz87@gmail.com> Acked-by: Yonghong Song <yonghong.song@linux.dev> Link: https://lore.kernel.org/r/20240123181309.19853-1-jose.marchesi@oracle.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2024-01-23 19:13:09 +01:00
[small_arr]"r"(small_arr),
[zero]"r"(zero),
__imm(bpf_iter_num_new),
__imm(bpf_iter_num_next),
__imm(bpf_iter_num_destroy)
: __clobber_common, "r6"
);
return 0;
}
SEC("raw_tp")
__success
int iter_while_loop(const void *ctx)
{
struct bpf_iter_num it;
int *v;
MY_PID_GUARD();
bpf_iter_num_new(&it, 0, 3);
while ((v = bpf_iter_num_next(&it))) {
bpf_printk("ITER_BASIC: E1 VAL: v=%d", *v);
}
bpf_iter_num_destroy(&it);
return 0;
}
SEC("raw_tp")
__success
int iter_while_loop_auto_cleanup(const void *ctx)
{
__attribute__((cleanup(bpf_iter_num_destroy))) struct bpf_iter_num it;
int *v;
MY_PID_GUARD();
bpf_iter_num_new(&it, 0, 3);
while ((v = bpf_iter_num_next(&it))) {
bpf_printk("ITER_BASIC: E1 VAL: v=%d", *v);
}
/* (!) no explicit bpf_iter_num_destroy() */
return 0;
}
SEC("raw_tp")
__success
int iter_for_loop(const void *ctx)
{
struct bpf_iter_num it;
int *v;
MY_PID_GUARD();
bpf_iter_num_new(&it, 5, 10);
for (v = bpf_iter_num_next(&it); v; v = bpf_iter_num_next(&it)) {
bpf_printk("ITER_BASIC: E2 VAL: v=%d", *v);
}
bpf_iter_num_destroy(&it);
return 0;
}
SEC("raw_tp")
__success
int iter_bpf_for_each_macro(const void *ctx)
{
int *v;
MY_PID_GUARD();
bpf_for_each(num, v, 5, 10) {
bpf_printk("ITER_BASIC: E2 VAL: v=%d", *v);
}
return 0;
}
SEC("raw_tp")
__success
int iter_bpf_for_macro(const void *ctx)
{
int i;
MY_PID_GUARD();
bpf_for(i, 5, 10) {
bpf_printk("ITER_BASIC: E2 VAL: v=%d", i);
}
return 0;
}
SEC("raw_tp")
__success
int iter_pragma_unroll_loop(const void *ctx)
{
struct bpf_iter_num it;
int *v, i;
MY_PID_GUARD();
bpf_iter_num_new(&it, 0, 2);
__pragma_loop_no_unroll
for (i = 0; i < 3; i++) {
v = bpf_iter_num_next(&it);
bpf_printk("ITER_BASIC: E3 VAL: i=%d v=%d", i, v ? *v : -1);
}
bpf_iter_num_destroy(&it);
return 0;
}
SEC("raw_tp")
__success
int iter_manual_unroll_loop(const void *ctx)
{
struct bpf_iter_num it;
int *v;
MY_PID_GUARD();
bpf_iter_num_new(&it, 100, 200);
v = bpf_iter_num_next(&it);
bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1);
v = bpf_iter_num_next(&it);
bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1);
v = bpf_iter_num_next(&it);
bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1);
v = bpf_iter_num_next(&it);
bpf_printk("ITER_BASIC: E4 VAL: v=%d\n", v ? *v : -1);
bpf_iter_num_destroy(&it);
return 0;
}
SEC("raw_tp")
__success
int iter_multiple_sequential_loops(const void *ctx)
{
struct bpf_iter_num it;
int *v, i;
MY_PID_GUARD();
bpf_iter_num_new(&it, 0, 3);
while ((v = bpf_iter_num_next(&it))) {
bpf_printk("ITER_BASIC: E1 VAL: v=%d", *v);
}
bpf_iter_num_destroy(&it);
bpf_iter_num_new(&it, 5, 10);
for (v = bpf_iter_num_next(&it); v; v = bpf_iter_num_next(&it)) {
bpf_printk("ITER_BASIC: E2 VAL: v=%d", *v);
}
bpf_iter_num_destroy(&it);
bpf_iter_num_new(&it, 0, 2);
__pragma_loop_no_unroll
for (i = 0; i < 3; i++) {
v = bpf_iter_num_next(&it);
bpf_printk("ITER_BASIC: E3 VAL: i=%d v=%d", i, v ? *v : -1);
}
bpf_iter_num_destroy(&it);
bpf_iter_num_new(&it, 100, 200);
v = bpf_iter_num_next(&it);
bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1);
v = bpf_iter_num_next(&it);
bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1);
v = bpf_iter_num_next(&it);
bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1);
v = bpf_iter_num_next(&it);
bpf_printk("ITER_BASIC: E4 VAL: v=%d\n", v ? *v : -1);
bpf_iter_num_destroy(&it);
return 0;
}
SEC("raw_tp")
__success
int iter_limit_cond_break_loop(const void *ctx)
{
struct bpf_iter_num it;
int *v, i = 0, sum = 0;
MY_PID_GUARD();
bpf_iter_num_new(&it, 0, 10);
while ((v = bpf_iter_num_next(&it))) {
bpf_printk("ITER_SIMPLE: i=%d v=%d", i, *v);
sum += *v;
i++;
if (i > 3)
break;
}
bpf_iter_num_destroy(&it);
bpf_printk("ITER_SIMPLE: sum=%d\n", sum);
return 0;
}
SEC("raw_tp")
__success
int iter_obfuscate_counter(const void *ctx)
{
struct bpf_iter_num it;
int *v, sum = 0;
/* Make i's initial value unknowable for verifier to prevent it from
* pruning if/else branch inside the loop body and marking i as precise.
*/
int i = zero;
MY_PID_GUARD();
bpf_iter_num_new(&it, 0, 10);
while ((v = bpf_iter_num_next(&it))) {
int x;
i += 1;
/* If we initialized i as `int i = 0;` above, verifier would
* track that i becomes 1 on first iteration after increment
* above, and here verifier would eagerly prune else branch
* and mark i as precise, ruining open-coded iterator logic
* completely, as each next iteration would have a different
* *precise* value of i, and thus there would be no
* convergence of state. This would result in reaching maximum
* instruction limit, no matter what the limit is.
*/
if (i == 1)
x = 123;
else
x = i * 3 + 1;
bpf_printk("ITER_OBFUSCATE_COUNTER: i=%d v=%d x=%d", i, *v, x);
sum += x;
}
bpf_iter_num_destroy(&it);
bpf_printk("ITER_OBFUSCATE_COUNTER: sum=%d\n", sum);
return 0;
}
SEC("raw_tp")
__success
int iter_search_loop(const void *ctx)
{
struct bpf_iter_num it;
int *v, *elem = NULL;
bool found = false;
MY_PID_GUARD();
bpf_iter_num_new(&it, 0, 10);
while ((v = bpf_iter_num_next(&it))) {
bpf_printk("ITER_SEARCH_LOOP: v=%d", *v);
if (*v == 2) {
found = true;
elem = v;
barrier_var(elem);
}
}
/* should fail to verify if bpf_iter_num_destroy() is here */
if (found)
/* here found element will be wrong, we should have copied
* value to a variable, but here we want to make sure we can
* access memory after the loop anyways
*/
bpf_printk("ITER_SEARCH_LOOP: FOUND IT = %d!\n", *elem);
else
bpf_printk("ITER_SEARCH_LOOP: NOT FOUND IT!\n");
bpf_iter_num_destroy(&it);
return 0;
}
SEC("raw_tp")
__success
int iter_array_fill(const void *ctx)
{
int sum, i;
MY_PID_GUARD();
bpf_for(i, 0, ARRAY_SIZE(arr)) {
arr[i] = i * 2;
}
sum = 0;
bpf_for(i, 0, ARRAY_SIZE(arr)) {
sum += arr[i];
}
bpf_printk("ITER_ARRAY_FILL: sum=%d (should be %d)\n", sum, 255 * 256);
return 0;
}
static int arr2d[4][5];
static int arr2d_row_sums[4];
static int arr2d_col_sums[5];
SEC("raw_tp")
__success
int iter_nested_iters(const void *ctx)
{
int sum, row, col;
MY_PID_GUARD();
bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
bpf_for( col, 0, ARRAY_SIZE(arr2d[0])) {
arr2d[row][col] = row * col;
}
}
/* zero-initialize sums */
sum = 0;
bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
arr2d_row_sums[row] = 0;
}
bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
arr2d_col_sums[col] = 0;
}
/* calculate sums */
bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
sum += arr2d[row][col];
arr2d_row_sums[row] += arr2d[row][col];
arr2d_col_sums[col] += arr2d[row][col];
}
}
bpf_printk("ITER_NESTED_ITERS: total sum=%d", sum);
bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
bpf_printk("ITER_NESTED_ITERS: row #%d sum=%d", row, arr2d_row_sums[row]);
}
bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
bpf_printk("ITER_NESTED_ITERS: col #%d sum=%d%s",
col, arr2d_col_sums[col],
col == ARRAY_SIZE(arr2d[0]) - 1 ? "\n" : "");
}
return 0;
}
SEC("raw_tp")
__success
int iter_nested_deeply_iters(const void *ctx)
{
int sum = 0;
MY_PID_GUARD();
bpf_repeat(10) {
bpf_repeat(10) {
bpf_repeat(10) {
bpf_repeat(10) {
bpf_repeat(10) {
sum += 1;
}
}
}
}
/* validate that we can break from inside bpf_repeat() */
break;
}
return sum;
}
static __noinline void fill_inner_dimension(int row)
{
int col;
bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
arr2d[row][col] = row * col;
}
}
static __noinline int sum_inner_dimension(int row)
{
int sum = 0, col;
bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
sum += arr2d[row][col];
arr2d_row_sums[row] += arr2d[row][col];
arr2d_col_sums[col] += arr2d[row][col];
}
return sum;
}
SEC("raw_tp")
__success
int iter_subprog_iters(const void *ctx)
{
int sum, row, col;
MY_PID_GUARD();
bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
fill_inner_dimension(row);
}
/* zero-initialize sums */
sum = 0;
bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
arr2d_row_sums[row] = 0;
}
bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
arr2d_col_sums[col] = 0;
}
/* calculate sums */
bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
sum += sum_inner_dimension(row);
}
bpf_printk("ITER_SUBPROG_ITERS: total sum=%d", sum);
bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
bpf_printk("ITER_SUBPROG_ITERS: row #%d sum=%d",
row, arr2d_row_sums[row]);
}
bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
bpf_printk("ITER_SUBPROG_ITERS: col #%d sum=%d%s",
col, arr2d_col_sums[col],
col == ARRAY_SIZE(arr2d[0]) - 1 ? "\n" : "");
}
return 0;
}
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, int);
__type(value, int);
__uint(max_entries, 1000);
} arr_map SEC(".maps");
SEC("?raw_tp")
__failure __msg("invalid mem access 'scalar'")
int iter_err_too_permissive1(const void *ctx)
{
int *map_val = NULL;
int key = 0;
MY_PID_GUARD();
map_val = bpf_map_lookup_elem(&arr_map, &key);
if (!map_val)
return 0;
bpf_repeat(1000000) {
map_val = NULL;
}
*map_val = 123;
return 0;
}
SEC("?raw_tp")
__failure __msg("invalid mem access 'map_value_or_null'")
int iter_err_too_permissive2(const void *ctx)
{
int *map_val = NULL;
int key = 0;
MY_PID_GUARD();
map_val = bpf_map_lookup_elem(&arr_map, &key);
if (!map_val)
return 0;
bpf_repeat(1000000) {
map_val = bpf_map_lookup_elem(&arr_map, &key);
}
*map_val = 123;
return 0;
}
SEC("?raw_tp")
__failure __msg("invalid mem access 'map_value_or_null'")
int iter_err_too_permissive3(const void *ctx)
{
int *map_val = NULL;
int key = 0;
bool found = false;
MY_PID_GUARD();
bpf_repeat(1000000) {
map_val = bpf_map_lookup_elem(&arr_map, &key);
found = true;
}
if (found)
*map_val = 123;
return 0;
}
SEC("raw_tp")
__success
int iter_tricky_but_fine(const void *ctx)
{
int *map_val = NULL;
int key = 0;
bool found = false;
MY_PID_GUARD();
bpf_repeat(1000000) {
map_val = bpf_map_lookup_elem(&arr_map, &key);
if (map_val) {
found = true;
break;
}
}
if (found)
*map_val = 123;
return 0;
}
#define __bpf_memzero(p, sz) bpf_probe_read_kernel((p), (sz), 0)
SEC("raw_tp")
__success
int iter_stack_array_loop(const void *ctx)
{
long arr1[16], arr2[16], sum = 0;
int i;
MY_PID_GUARD();
/* zero-init arr1 and arr2 in such a way that verifier doesn't know
* it's all zeros; if we don't do that, we'll make BPF verifier track
* all combination of zero/non-zero stack slots for arr1/arr2, which
* will lead to O(2^(ARRAY_SIZE(arr1)+ARRAY_SIZE(arr2))) different
* states
*/
__bpf_memzero(arr1, sizeof(arr1));
__bpf_memzero(arr2, sizeof(arr1));
/* validate that we can break and continue when using bpf_for() */
bpf_for(i, 0, ARRAY_SIZE(arr1)) {
if (i & 1) {
arr1[i] = i;
continue;
} else {
arr2[i] = i;
break;
}
}
bpf_for(i, 0, ARRAY_SIZE(arr1)) {
sum += arr1[i] + arr2[i];
}
return sum;
}
static __noinline void fill(struct bpf_iter_num *it, int *arr, __u32 n, int mul)
{
int *t, i;
while ((t = bpf_iter_num_next(it))) {
i = *t;
if (i >= n)
break;
arr[i] = i * mul;
}
}
static __noinline int sum(struct bpf_iter_num *it, int *arr, __u32 n)
{
int *t, i, sum = 0;
while ((t = bpf_iter_num_next(it))) {
i = *t;
if ((__u32)i >= n)
break;
sum += arr[i];
}
return sum;
}
SEC("raw_tp")
__success
int iter_pass_iter_ptr_to_subprog(const void *ctx)
{
int arr1[16], arr2[32];
struct bpf_iter_num it;
int n, sum1, sum2;
MY_PID_GUARD();
/* fill arr1 */
n = ARRAY_SIZE(arr1);
bpf_iter_num_new(&it, 0, n);
fill(&it, arr1, n, 2);
bpf_iter_num_destroy(&it);
/* fill arr2 */
n = ARRAY_SIZE(arr2);
bpf_iter_num_new(&it, 0, n);
fill(&it, arr2, n, 10);
bpf_iter_num_destroy(&it);
/* sum arr1 */
n = ARRAY_SIZE(arr1);
bpf_iter_num_new(&it, 0, n);
sum1 = sum(&it, arr1, n);
bpf_iter_num_destroy(&it);
/* sum arr2 */
n = ARRAY_SIZE(arr2);
bpf_iter_num_new(&it, 0, n);
sum2 = sum(&it, arr2, n);
bpf_iter_num_destroy(&it);
bpf_printk("sum1=%d, sum2=%d", sum1, sum2);
return 0;
}
SEC("?raw_tp")
__failure
__msg("R1 type=scalar expected=fp")
__naked int delayed_read_mark(void)
{
/* This is equivalent to C program below.
* The call to bpf_iter_num_next() is reachable with r7 values &fp[-16] and 0xdead.
* State with r7=&fp[-16] is visited first and follows r6 != 42 ... continue branch.
* At this point iterator next() call is reached with r7 that has no read mark.
* Loop body with r7=0xdead would only be visited if verifier would decide to continue
* with second loop iteration. Absence of read mark on r7 might affect state
* equivalent logic used for iterator convergence tracking.
*
* r7 = &fp[-16]
* fp[-16] = 0
* r6 = bpf_get_prandom_u32()
* bpf_iter_num_new(&fp[-8], 0, 10)
* while (bpf_iter_num_next(&fp[-8])) {
* r6++
* if (r6 != 42) {
* r7 = 0xdead
* continue;
* }
* bpf_probe_read_user(r7, 8, 0xdeadbeef); // this is not safe
* }
* bpf_iter_num_destroy(&fp[-8])
* return 0
*/
asm volatile (
"r7 = r10;"
"r7 += -16;"
"r0 = 0;"
"*(u64 *)(r7 + 0) = r0;"
"call %[bpf_get_prandom_u32];"
"r6 = r0;"
"r1 = r10;"
"r1 += -8;"
"r2 = 0;"
"r3 = 10;"
"call %[bpf_iter_num_new];"
"1:"
"r1 = r10;"
"r1 += -8;"
"call %[bpf_iter_num_next];"
"if r0 == 0 goto 2f;"
"r6 += 1;"
"if r6 != 42 goto 3f;"
"r7 = 0xdead;"
"goto 1b;"
"3:"
"r1 = r7;"
"r2 = 8;"
"r3 = 0xdeadbeef;"
"call %[bpf_probe_read_user];"
"goto 1b;"
"2:"
"r1 = r10;"
"r1 += -8;"
"call %[bpf_iter_num_destroy];"
"r0 = 0;"
"exit;"
:
: __imm(bpf_get_prandom_u32),
__imm(bpf_iter_num_new),
__imm(bpf_iter_num_next),
__imm(bpf_iter_num_destroy),
__imm(bpf_probe_read_user)
: __clobber_all
);
}
SEC("?raw_tp")
__failure
__msg("math between fp pointer and register with unbounded")
__naked int delayed_precision_mark(void)
{
/* This is equivalent to C program below.
* The test is similar to delayed_iter_mark but verifies that incomplete
* precision don't fool verifier.
* The call to bpf_iter_num_next() is reachable with r7 values -16 and -32.
* State with r7=-16 is visited first and follows r6 != 42 ... continue branch.
* At this point iterator next() call is reached with r7 that has no read
* and precision marks.
* Loop body with r7=-32 would only be visited if verifier would decide to continue
* with second loop iteration. Absence of precision mark on r7 might affect state
* equivalent logic used for iterator convergence tracking.
*
* r8 = 0
* fp[-16] = 0
* r7 = -16
* r6 = bpf_get_prandom_u32()
* bpf_iter_num_new(&fp[-8], 0, 10)
* while (bpf_iter_num_next(&fp[-8])) {
* if (r6 != 42) {
* r7 = -32
* r6 = bpf_get_prandom_u32()
* continue;
* }
* r0 = r10
* r0 += r7
* r8 = *(u64 *)(r0 + 0) // this is not safe
* r6 = bpf_get_prandom_u32()
* }
* bpf_iter_num_destroy(&fp[-8])
* return r8
*/
asm volatile (
"r8 = 0;"
"*(u64 *)(r10 - 16) = r8;"
"r7 = -16;"
"call %[bpf_get_prandom_u32];"
"r6 = r0;"
"r1 = r10;"
"r1 += -8;"
"r2 = 0;"
"r3 = 10;"
"call %[bpf_iter_num_new];"
"1:"
"r1 = r10;"
"r1 += -8;\n"
"call %[bpf_iter_num_next];"
"if r0 == 0 goto 2f;"
"if r6 != 42 goto 3f;"
bpf: Fix accesses to uninit stack slots Privileged programs are supposed to be able to read uninitialized stack memory (ever since 6715df8d5) but, before this patch, these accesses were permitted inconsistently. In particular, accesses were permitted above state->allocated_stack, but not below it. In other words, if the stack was already "large enough", the access was permitted, but otherwise the access was rejected instead of being allowed to "grow the stack". This undesired rejection was happening in two places: - in check_stack_slot_within_bounds() - in check_stack_range_initialized() This patch arranges for these accesses to be permitted. A bunch of tests that were relying on the old rejection had to change; all of them were changed to add also run unprivileged, in which case the old behavior persists. One tests couldn't be updated - global_func16 - because it can't run unprivileged for other reasons. This patch also fixes the tracking of the stack size for variable-offset reads. This second fix is bundled in the same commit as the first one because they're inter-related. Before this patch, writes to the stack using registers containing a variable offset (as opposed to registers with fixed, known values) were not properly contributing to the function's needed stack size. As a result, it was possible for a program to verify, but then to attempt to read out-of-bounds data at runtime because a too small stack had been allocated for it. Each function tracks the size of the stack it needs in bpf_subprog_info.stack_depth, which is maintained by update_stack_depth(). For regular memory accesses, check_mem_access() was calling update_state_depth() but it was passing in only the fixed part of the offset register, ignoring the variable offset. This was incorrect; the minimum possible value of that register should be used instead. This tracking is now fixed by centralizing the tracking of stack size in grow_stack_state(), and by lifting the calls to grow_stack_state() to check_stack_access_within_bounds() as suggested by Andrii. The code is now simpler and more convincingly tracks the correct maximum stack size. check_stack_range_initialized() can now rely on enough stack having been allocated for the access; this helps with the fix for the first issue. A few tests were changed to also check the stack depth computation. The one that fails without this patch is verifier_var_off:stack_write_priv_vs_unpriv. Fixes: 01f810ace9ed3 ("bpf: Allow variable-offset stack access") Reported-by: Hao Sun <sunhao.th@gmail.com> Signed-off-by: Andrei Matei <andreimatei1@gmail.com> Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Acked-by: Andrii Nakryiko <andrii@kernel.org> Link: https://lore.kernel.org/bpf/20231208032519.260451-3-andreimatei1@gmail.com Closes: https://lore.kernel.org/bpf/CABWLsev9g8UP_c3a=1qbuZUi20tGoUXoU07FPf-5FLvhOKOY+Q@mail.gmail.com/
2023-12-07 22:25:18 -05:00
"r7 = -33;"
"call %[bpf_get_prandom_u32];"
"r6 = r0;"
"goto 1b;\n"
"3:"
"r0 = r10;"
"r0 += r7;"
"r8 = *(u64 *)(r0 + 0);"
"call %[bpf_get_prandom_u32];"
"r6 = r0;"
"goto 1b;\n"
"2:"
"r1 = r10;"
"r1 += -8;"
"call %[bpf_iter_num_destroy];"
"r0 = r8;"
"exit;"
:
: __imm(bpf_get_prandom_u32),
__imm(bpf_iter_num_new),
__imm(bpf_iter_num_next),
__imm(bpf_iter_num_destroy),
__imm(bpf_probe_read_user)
: __clobber_all
);
}
SEC("?raw_tp")
__failure
__msg("math between fp pointer and register with unbounded")
__flag(BPF_F_TEST_STATE_FREQ)
__naked int loop_state_deps1(void)
{
/* This is equivalent to C program below.
*
* The case turns out to be tricky in a sense that:
* - states with c=-25 are explored only on a second iteration
* of the outer loop;
* - states with read+precise mark on c are explored only on
* second iteration of the inner loop and in a state which
* is pushed to states stack first.
*
* Depending on the details of iterator convergence logic
* verifier might stop states traversal too early and miss
* unsafe c=-25 memory access.
*
* j = iter_new(); // fp[-16]
* a = 0; // r6
* b = 0; // r7
* c = -24; // r8
* while (iter_next(j)) {
* i = iter_new(); // fp[-8]
* a = 0; // r6
* b = 0; // r7
* while (iter_next(i)) {
* if (a == 1) {
* a = 0;
* b = 1;
* } else if (a == 0) {
* a = 1;
* if (random() == 42)
* continue;
* if (b == 1) {
* *(r10 + c) = 7; // this is not safe
* iter_destroy(i);
* iter_destroy(j);
* return;
* }
* }
* }
* iter_destroy(i);
* a = 0;
* b = 0;
* c = -25;
* }
* iter_destroy(j);
* return;
*/
asm volatile (
"r1 = r10;"
"r1 += -16;"
"r2 = 0;"
"r3 = 10;"
"call %[bpf_iter_num_new];"
"r6 = 0;"
"r7 = 0;"
"r8 = -24;"
"j_loop_%=:"
"r1 = r10;"
"r1 += -16;"
"call %[bpf_iter_num_next];"
"if r0 == 0 goto j_loop_end_%=;"
"r1 = r10;"
"r1 += -8;"
"r2 = 0;"
"r3 = 10;"
"call %[bpf_iter_num_new];"
"r6 = 0;"
"r7 = 0;"
"i_loop_%=:"
"r1 = r10;"
"r1 += -8;"
"call %[bpf_iter_num_next];"
"if r0 == 0 goto i_loop_end_%=;"
"check_one_r6_%=:"
"if r6 != 1 goto check_zero_r6_%=;"
"r6 = 0;"
"r7 = 1;"
"goto i_loop_%=;"
"check_zero_r6_%=:"
"if r6 != 0 goto i_loop_%=;"
"r6 = 1;"
"call %[bpf_get_prandom_u32];"
"if r0 != 42 goto check_one_r7_%=;"
"goto i_loop_%=;"
"check_one_r7_%=:"
"if r7 != 1 goto i_loop_%=;"
"r0 = r10;"
"r0 += r8;"
"r1 = 7;"
"*(u64 *)(r0 + 0) = r1;"
"r1 = r10;"
"r1 += -8;"
"call %[bpf_iter_num_destroy];"
"r1 = r10;"
"r1 += -16;"
"call %[bpf_iter_num_destroy];"
"r0 = 0;"
"exit;"
"i_loop_end_%=:"
"r1 = r10;"
"r1 += -8;"
"call %[bpf_iter_num_destroy];"
"r6 = 0;"
"r7 = 0;"
"r8 = -25;"
"goto j_loop_%=;"
"j_loop_end_%=:"
"r1 = r10;"
"r1 += -16;"
"call %[bpf_iter_num_destroy];"
"r0 = 0;"
"exit;"
:
: __imm(bpf_get_prandom_u32),
__imm(bpf_iter_num_new),
__imm(bpf_iter_num_next),
__imm(bpf_iter_num_destroy)
: __clobber_all
);
}
SEC("?raw_tp")
__failure
__msg("math between fp pointer and register with unbounded")
__flag(BPF_F_TEST_STATE_FREQ)
__naked int loop_state_deps2(void)
{
/* This is equivalent to C program below.
*
* The case turns out to be tricky in a sense that:
* - states with read+precise mark on c are explored only on a second
* iteration of the first inner loop and in a state which is pushed to
* states stack first.
* - states with c=-25 are explored only on a second iteration of the
* second inner loop and in a state which is pushed to states stack
* first.
*
* Depending on the details of iterator convergence logic
* verifier might stop states traversal too early and miss
* unsafe c=-25 memory access.
*
* j = iter_new(); // fp[-16]
* a = 0; // r6
* b = 0; // r7
* c = -24; // r8
* while (iter_next(j)) {
* i = iter_new(); // fp[-8]
* a = 0; // r6
* b = 0; // r7
* while (iter_next(i)) {
* if (a == 1) {
* a = 0;
* b = 1;
* } else if (a == 0) {
* a = 1;
* if (random() == 42)
* continue;
* if (b == 1) {
* *(r10 + c) = 7; // this is not safe
* iter_destroy(i);
* iter_destroy(j);
* return;
* }
* }
* }
* iter_destroy(i);
* i = iter_new(); // fp[-8]
* a = 0; // r6
* b = 0; // r7
* while (iter_next(i)) {
* if (a == 1) {
* a = 0;
* b = 1;
* } else if (a == 0) {
* a = 1;
* if (random() == 42)
* continue;
* if (b == 1) {
* a = 0;
* c = -25;
* }
* }
* }
* iter_destroy(i);
* }
* iter_destroy(j);
* return;
*/
asm volatile (
"r1 = r10;"
"r1 += -16;"
"r2 = 0;"
"r3 = 10;"
"call %[bpf_iter_num_new];"
"r6 = 0;"
"r7 = 0;"
"r8 = -24;"
"j_loop_%=:"
"r1 = r10;"
"r1 += -16;"
"call %[bpf_iter_num_next];"
"if r0 == 0 goto j_loop_end_%=;"
/* first inner loop */
"r1 = r10;"
"r1 += -8;"
"r2 = 0;"
"r3 = 10;"
"call %[bpf_iter_num_new];"
"r6 = 0;"
"r7 = 0;"
"i_loop_%=:"
"r1 = r10;"
"r1 += -8;"
"call %[bpf_iter_num_next];"
"if r0 == 0 goto i_loop_end_%=;"
"check_one_r6_%=:"
"if r6 != 1 goto check_zero_r6_%=;"
"r6 = 0;"
"r7 = 1;"
"goto i_loop_%=;"
"check_zero_r6_%=:"
"if r6 != 0 goto i_loop_%=;"
"r6 = 1;"
"call %[bpf_get_prandom_u32];"
"if r0 != 42 goto check_one_r7_%=;"
"goto i_loop_%=;"
"check_one_r7_%=:"
"if r7 != 1 goto i_loop_%=;"
"r0 = r10;"
"r0 += r8;"
"r1 = 7;"
"*(u64 *)(r0 + 0) = r1;"
"r1 = r10;"
"r1 += -8;"
"call %[bpf_iter_num_destroy];"
"r1 = r10;"
"r1 += -16;"
"call %[bpf_iter_num_destroy];"
"r0 = 0;"
"exit;"
"i_loop_end_%=:"
"r1 = r10;"
"r1 += -8;"
"call %[bpf_iter_num_destroy];"
/* second inner loop */
"r1 = r10;"
"r1 += -8;"
"r2 = 0;"
"r3 = 10;"
"call %[bpf_iter_num_new];"
"r6 = 0;"
"r7 = 0;"
"i2_loop_%=:"
"r1 = r10;"
"r1 += -8;"
"call %[bpf_iter_num_next];"
"if r0 == 0 goto i2_loop_end_%=;"
"check2_one_r6_%=:"
"if r6 != 1 goto check2_zero_r6_%=;"
"r6 = 0;"
"r7 = 1;"
"goto i2_loop_%=;"
"check2_zero_r6_%=:"
"if r6 != 0 goto i2_loop_%=;"
"r6 = 1;"
"call %[bpf_get_prandom_u32];"
"if r0 != 42 goto check2_one_r7_%=;"
"goto i2_loop_%=;"
"check2_one_r7_%=:"
"if r7 != 1 goto i2_loop_%=;"
"r6 = 0;"
"r8 = -25;"
"goto i2_loop_%=;"
"i2_loop_end_%=:"
"r1 = r10;"
"r1 += -8;"
"call %[bpf_iter_num_destroy];"
"r6 = 0;"
"r7 = 0;"
"goto j_loop_%=;"
"j_loop_end_%=:"
"r1 = r10;"
"r1 += -16;"
"call %[bpf_iter_num_destroy];"
"r0 = 0;"
"exit;"
:
: __imm(bpf_get_prandom_u32),
__imm(bpf_iter_num_new),
__imm(bpf_iter_num_next),
__imm(bpf_iter_num_destroy)
: __clobber_all
);
}
SEC("?raw_tp")
__success
__naked int triple_continue(void)
{
/* This is equivalent to C program below.
* High branching factor of the loop body turned out to be
* problematic for one of the iterator convergence tracking
* algorithms explored.
*
* r6 = bpf_get_prandom_u32()
* bpf_iter_num_new(&fp[-8], 0, 10)
* while (bpf_iter_num_next(&fp[-8])) {
* if (bpf_get_prandom_u32() != 42)
* continue;
* if (bpf_get_prandom_u32() != 42)
* continue;
* if (bpf_get_prandom_u32() != 42)
* continue;
* r0 += 0;
* }
* bpf_iter_num_destroy(&fp[-8])
* return 0
*/
asm volatile (
"r1 = r10;"
"r1 += -8;"
"r2 = 0;"
"r3 = 10;"
"call %[bpf_iter_num_new];"
"loop_%=:"
"r1 = r10;"
"r1 += -8;"
"call %[bpf_iter_num_next];"
"if r0 == 0 goto loop_end_%=;"
"call %[bpf_get_prandom_u32];"
"if r0 != 42 goto loop_%=;"
"call %[bpf_get_prandom_u32];"
"if r0 != 42 goto loop_%=;"
"call %[bpf_get_prandom_u32];"
"if r0 != 42 goto loop_%=;"
"r0 += 0;"
"goto loop_%=;"
"loop_end_%=:"
"r1 = r10;"
"r1 += -8;"
"call %[bpf_iter_num_destroy];"
"r0 = 0;"
"exit;"
:
: __imm(bpf_get_prandom_u32),
__imm(bpf_iter_num_new),
__imm(bpf_iter_num_next),
__imm(bpf_iter_num_destroy)
: __clobber_all
);
}
SEC("?raw_tp")
__success
__naked int widen_spill(void)
{
/* This is equivalent to C program below.
* The counter is stored in fp[-16], if this counter is not widened
* verifier states representing loop iterations would never converge.
*
* fp[-16] = 0
* bpf_iter_num_new(&fp[-8], 0, 10)
* while (bpf_iter_num_next(&fp[-8])) {
* r0 = fp[-16];
* r0 += 1;
* fp[-16] = r0;
* }
* bpf_iter_num_destroy(&fp[-8])
* return 0
*/
asm volatile (
"r0 = 0;"
"*(u64 *)(r10 - 16) = r0;"
"r1 = r10;"
"r1 += -8;"
"r2 = 0;"
"r3 = 10;"
"call %[bpf_iter_num_new];"
"loop_%=:"
"r1 = r10;"
"r1 += -8;"
"call %[bpf_iter_num_next];"
"if r0 == 0 goto loop_end_%=;"
"r0 = *(u64 *)(r10 - 16);"
"r0 += 1;"
"*(u64 *)(r10 - 16) = r0;"
"goto loop_%=;"
"loop_end_%=:"
"r1 = r10;"
"r1 += -8;"
"call %[bpf_iter_num_destroy];"
"r0 = 0;"
"exit;"
:
: __imm(bpf_iter_num_new),
__imm(bpf_iter_num_next),
__imm(bpf_iter_num_destroy)
: __clobber_all
);
}
SEC("raw_tp")
__success
__naked int checkpoint_states_deletion(void)
{
/* This is equivalent to C program below.
*
* int *a, *b, *c, *d, *e, *f;
* int i, sum = 0;
* bpf_for(i, 0, 10) {
* a = bpf_map_lookup_elem(&amap, &i);
* b = bpf_map_lookup_elem(&amap, &i);
* c = bpf_map_lookup_elem(&amap, &i);
* d = bpf_map_lookup_elem(&amap, &i);
* e = bpf_map_lookup_elem(&amap, &i);
* f = bpf_map_lookup_elem(&amap, &i);
* if (a) sum += 1;
* if (b) sum += 1;
* if (c) sum += 1;
* if (d) sum += 1;
* if (e) sum += 1;
* if (f) sum += 1;
* }
* return 0;
*
* The body of the loop spawns multiple simulation paths
* with different combination of NULL/non-NULL information for a/b/c/d/e/f.
* Each combination is unique from states_equal() point of view.
* Explored states checkpoint is created after each iterator next call.
* Iterator convergence logic expects that eventually current state
* would get equal to one of the explored states and thus loop
* exploration would be finished (at-least for a specific path).
* Verifier evicts explored states with high miss to hit ratio
* to to avoid comparing current state with too many explored
* states per instruction.
* This test is designed to "stress test" eviction policy defined using formula:
*
* sl->miss_cnt > sl->hit_cnt * N + N // if true sl->state is evicted
*
* Currently N is set to 64, which allows for 6 variables in this test.
*/
asm volatile (
"r6 = 0;" /* a */
"r7 = 0;" /* b */
"r8 = 0;" /* c */
"*(u64 *)(r10 - 24) = r6;" /* d */
"*(u64 *)(r10 - 32) = r6;" /* e */
"*(u64 *)(r10 - 40) = r6;" /* f */
"r9 = 0;" /* sum */
"r1 = r10;"
"r1 += -8;"
"r2 = 0;"
"r3 = 10;"
"call %[bpf_iter_num_new];"
"loop_%=:"
"r1 = r10;"
"r1 += -8;"
"call %[bpf_iter_num_next];"
"if r0 == 0 goto loop_end_%=;"
"*(u64 *)(r10 - 16) = r0;"
"r1 = %[amap] ll;"
"r2 = r10;"
"r2 += -16;"
"call %[bpf_map_lookup_elem];"
"r6 = r0;"
"r1 = %[amap] ll;"
"r2 = r10;"
"r2 += -16;"
"call %[bpf_map_lookup_elem];"
"r7 = r0;"
"r1 = %[amap] ll;"
"r2 = r10;"
"r2 += -16;"
"call %[bpf_map_lookup_elem];"
"r8 = r0;"
"r1 = %[amap] ll;"
"r2 = r10;"
"r2 += -16;"
"call %[bpf_map_lookup_elem];"
"*(u64 *)(r10 - 24) = r0;"
"r1 = %[amap] ll;"
"r2 = r10;"
"r2 += -16;"
"call %[bpf_map_lookup_elem];"
"*(u64 *)(r10 - 32) = r0;"
"r1 = %[amap] ll;"
"r2 = r10;"
"r2 += -16;"
"call %[bpf_map_lookup_elem];"
"*(u64 *)(r10 - 40) = r0;"
"if r6 == 0 goto +1;"
"r9 += 1;"
"if r7 == 0 goto +1;"
"r9 += 1;"
"if r8 == 0 goto +1;"
"r9 += 1;"
"r0 = *(u64 *)(r10 - 24);"
"if r0 == 0 goto +1;"
"r9 += 1;"
"r0 = *(u64 *)(r10 - 32);"
"if r0 == 0 goto +1;"
"r9 += 1;"
"r0 = *(u64 *)(r10 - 40);"
"if r0 == 0 goto +1;"
"r9 += 1;"
"goto loop_%=;"
"loop_end_%=:"
"r1 = r10;"
"r1 += -8;"
"call %[bpf_iter_num_destroy];"
"r0 = 0;"
"exit;"
:
: __imm(bpf_map_lookup_elem),
__imm(bpf_iter_num_new),
__imm(bpf_iter_num_next),
__imm(bpf_iter_num_destroy),
__imm_addr(amap)
: __clobber_all
);
}
struct {
int data[32];
int n;
} loop_data;
SEC("raw_tp")
__success
int iter_arr_with_actual_elem_count(const void *ctx)
{
int i, n = loop_data.n, sum = 0;
if (n > ARRAY_SIZE(loop_data.data))
return 0;
bpf_for(i, 0, n) {
/* no rechecking of i against ARRAY_SIZE(loop_data.n) */
sum += loop_data.data[i];
}
return sum;
}
char _license[] SEC("license") = "GPL";