linux/tools/testing/radix-tree/multiorder.c
Kemeng Shi 7e060df04f Xarray: do not return sibling entries from xas_find_marked()
Patch series "Fixes and cleanups to xarray", v5.

This series contains some random fixes and cleanups to xarray.  Patch 1-2
are fixes and patch 3-6 are cleanups.  More details can be found in
respective patches.


This patch (of 5):

Similar to issue fixed in commit cbc0285433 ("XArray: Do not return
sibling entries from xa_load()"), we may return sibling entries from
xas_find_marked as following:
    Thread A:               Thread B:
                            xa_store_range(xa, entry, 6, 7, gfp);
			    xa_set_mark(xa, 6, mark)
    XA_STATE(xas, xa, 6);
    xas_find_marked(&xas, 7, mark);
    offset = xas_find_chunk(xas, advance, mark);
    [offset is 6 which points to a valid entry]
                            xa_store_range(xa, entry, 4, 7, gfp);
    entry = xa_entry(xa, node, 6);
    [entry is a sibling of 4]
    if (!xa_is_node(entry))
        return entry;

Skip sibling entry like xas_find() does to protect caller from seeing
sibling entry from xas_find_marked() or caller may use sibling entry
as a valid entry and crash the kernel.

Besides, load_race() test is modified to catch mentioned issue and modified
load_race() only passes after this fix is merged.

Here is an example how this bug could be triggerred in tmpfs which
enables large folio in mapping:
Let's take a look at involved racer:
1. How pages could be created and dirtied in shmem file.
write
 ksys_write
  vfs_write
   new_sync_write
    shmem_file_write_iter
     generic_perform_write
      shmem_write_begin
       shmem_get_folio
        shmem_allowable_huge_orders
        shmem_alloc_and_add_folios
        shmem_alloc_folio
        __folio_set_locked
        shmem_add_to_page_cache
         XA_STATE_ORDER(..., index, order)
         xax_store()
      shmem_write_end
       folio_mark_dirty()

2. How dirty pages could be deleted in shmem file.
ioctl
 do_vfs_ioctl
  file_ioctl
   ioctl_preallocate
    vfs_fallocate
     shmem_fallocate
      shmem_truncate_range
       shmem_undo_range
        truncate_inode_folio
         filemap_remove_folio
          page_cache_delete
           xas_store(&xas, NULL);

3. How dirty pages could be lockless searched
sync_file_range
 ksys_sync_file_range
  __filemap_fdatawrite_range
   filemap_fdatawrite_wbc
    do_writepages
     writeback_use_writepage
      writeback_iter
       writeback_get_folio
        filemap_get_folios_tag
         find_get_entry
          folio = xas_find_marked()
          folio_try_get(folio)

Kernel will crash as following:
1.Create               2.Search             3.Delete
/* write page 2,3 */
write
 ...
  shmem_write_begin
   XA_STATE_ORDER(xas, i_pages, index = 2, order = 1)
   xa_store(&xas, folio)
  shmem_write_end
   folio_mark_dirty()

                       /* sync page 2 and page 3 */
                       sync_file_range
                        ...
                         find_get_entry
                          folio = xas_find_marked()
                          /* offset will be 2 */
                          offset = xas_find_chunk()

                                             /* delete page 2 and page 3 */
                                             ioctl
                                              ...
                                               xas_store(&xas, NULL);

/* write page 0-3 */
write
 ...
  shmem_write_begin
   XA_STATE_ORDER(xas, i_pages, index = 0, order = 2)
   xa_store(&xas, folio)
  shmem_write_end
   folio_mark_dirty(folio)

                          /* get sibling entry from offset 2 */
                          entry = xa_entry(.., 2)
                          /* use sibling entry as folio and crash kernel */
                          folio_try_get(folio)

Link: https://lkml.kernel.org/r/20241213122523.12764-1-shikemeng@huaweicloud.com
Link: https://lkml.kernel.org/r/20241213122523.12764-2-shikemeng@huaweicloud.com
Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
Cc: Mattew Wilcox <willy@infradead.org> [English fixes]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-01-24 22:47:27 -08:00

300 lines
6.8 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* multiorder.c: Multi-order radix tree entry testing
* Copyright (c) 2016 Intel Corporation
* Author: Ross Zwisler <ross.zwisler@linux.intel.com>
* Author: Matthew Wilcox <matthew.r.wilcox@intel.com>
*/
#include <linux/radix-tree.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include <pthread.h>
#include "test.h"
static int item_insert_order(struct xarray *xa, unsigned long index,
unsigned order)
{
XA_STATE_ORDER(xas, xa, index, order);
struct item *item = item_create(index, order);
do {
xas_lock(&xas);
xas_store(&xas, item);
xas_unlock(&xas);
} while (xas_nomem(&xas, GFP_KERNEL));
if (!xas_error(&xas))
return 0;
free(item);
return xas_error(&xas);
}
void multiorder_iteration(struct xarray *xa)
{
XA_STATE(xas, xa, 0);
struct item *item;
int i, j, err;
#define NUM_ENTRIES 11
int index[NUM_ENTRIES] = {0, 2, 4, 8, 16, 32, 34, 36, 64, 72, 128};
int order[NUM_ENTRIES] = {1, 1, 2, 3, 4, 1, 0, 1, 3, 0, 7};
printv(1, "Multiorder iteration test\n");
for (i = 0; i < NUM_ENTRIES; i++) {
err = item_insert_order(xa, index[i], order[i]);
assert(!err);
}
for (j = 0; j < 256; j++) {
for (i = 0; i < NUM_ENTRIES; i++)
if (j <= (index[i] | ((1 << order[i]) - 1)))
break;
xas_set(&xas, j);
xas_for_each(&xas, item, ULONG_MAX) {
int height = order[i] / XA_CHUNK_SHIFT;
int shift = height * XA_CHUNK_SHIFT;
unsigned long mask = (1UL << order[i]) - 1;
assert((xas.xa_index | mask) == (index[i] | mask));
assert(xas.xa_node->shift == shift);
assert(!radix_tree_is_internal_node(item));
assert((item->index | mask) == (index[i] | mask));
assert(item->order == order[i]);
i++;
}
}
item_kill_tree(xa);
}
void multiorder_tagged_iteration(struct xarray *xa)
{
XA_STATE(xas, xa, 0);
struct item *item;
int i, j;
#define MT_NUM_ENTRIES 9
int index[MT_NUM_ENTRIES] = {0, 2, 4, 16, 32, 40, 64, 72, 128};
int order[MT_NUM_ENTRIES] = {1, 0, 2, 4, 3, 1, 3, 0, 7};
#define TAG_ENTRIES 7
int tag_index[TAG_ENTRIES] = {0, 4, 16, 40, 64, 72, 128};
printv(1, "Multiorder tagged iteration test\n");
for (i = 0; i < MT_NUM_ENTRIES; i++)
assert(!item_insert_order(xa, index[i], order[i]));
assert(!xa_marked(xa, XA_MARK_1));
for (i = 0; i < TAG_ENTRIES; i++)
xa_set_mark(xa, tag_index[i], XA_MARK_1);
for (j = 0; j < 256; j++) {
int k;
for (i = 0; i < TAG_ENTRIES; i++) {
for (k = i; index[k] < tag_index[i]; k++)
;
if (j <= (index[k] | ((1 << order[k]) - 1)))
break;
}
xas_set(&xas, j);
xas_for_each_marked(&xas, item, ULONG_MAX, XA_MARK_1) {
unsigned long mask;
for (k = i; index[k] < tag_index[i]; k++)
;
mask = (1UL << order[k]) - 1;
assert((xas.xa_index | mask) == (tag_index[i] | mask));
assert(!xa_is_internal(item));
assert((item->index | mask) == (tag_index[i] | mask));
assert(item->order == order[k]);
i++;
}
}
assert(tag_tagged_items(xa, 0, ULONG_MAX, TAG_ENTRIES, XA_MARK_1,
XA_MARK_2) == TAG_ENTRIES);
for (j = 0; j < 256; j++) {
int mask, k;
for (i = 0; i < TAG_ENTRIES; i++) {
for (k = i; index[k] < tag_index[i]; k++)
;
if (j <= (index[k] | ((1 << order[k]) - 1)))
break;
}
xas_set(&xas, j);
xas_for_each_marked(&xas, item, ULONG_MAX, XA_MARK_2) {
for (k = i; index[k] < tag_index[i]; k++)
;
mask = (1 << order[k]) - 1;
assert((xas.xa_index | mask) == (tag_index[i] | mask));
assert(!xa_is_internal(item));
assert((item->index | mask) == (tag_index[i] | mask));
assert(item->order == order[k]);
i++;
}
}
assert(tag_tagged_items(xa, 1, ULONG_MAX, MT_NUM_ENTRIES * 2, XA_MARK_1,
XA_MARK_0) == TAG_ENTRIES);
i = 0;
xas_set(&xas, 0);
xas_for_each_marked(&xas, item, ULONG_MAX, XA_MARK_0) {
assert(xas.xa_index == tag_index[i]);
i++;
}
assert(i == TAG_ENTRIES);
item_kill_tree(xa);
}
bool stop_iteration;
static void *creator_func(void *ptr)
{
/* 'order' is set up to ensure we have sibling entries */
unsigned int order = RADIX_TREE_MAP_SHIFT - 1;
struct radix_tree_root *tree = ptr;
int i;
for (i = 0; i < 10000; i++) {
item_insert_order(tree, 0, order);
item_delete_rcu(tree, 0);
}
stop_iteration = true;
return NULL;
}
static void *iterator_func(void *ptr)
{
XA_STATE(xas, ptr, 0);
struct item *item;
while (!stop_iteration) {
rcu_read_lock();
xas_for_each(&xas, item, ULONG_MAX) {
if (xas_retry(&xas, item))
continue;
item_sanity(item, xas.xa_index);
}
rcu_read_unlock();
}
return NULL;
}
static void multiorder_iteration_race(struct xarray *xa)
{
const int num_threads = sysconf(_SC_NPROCESSORS_ONLN);
pthread_t worker_thread[num_threads];
int i;
stop_iteration = false;
pthread_create(&worker_thread[0], NULL, &creator_func, xa);
for (i = 1; i < num_threads; i++)
pthread_create(&worker_thread[i], NULL, &iterator_func, xa);
for (i = 0; i < num_threads; i++)
pthread_join(worker_thread[i], NULL);
item_kill_tree(xa);
}
static void *load_creator(void *ptr)
{
/* 'order' is set up to ensure we have sibling entries */
unsigned int order;
struct radix_tree_root *tree = ptr;
int i;
rcu_register_thread();
item_insert_order(tree, 3 << RADIX_TREE_MAP_SHIFT, 0);
item_insert_order(tree, 2 << RADIX_TREE_MAP_SHIFT, 0);
for (i = 0; i < 10000; i++) {
for (order = 1; order < RADIX_TREE_MAP_SHIFT; order++) {
unsigned long index = (3 << RADIX_TREE_MAP_SHIFT) -
(1 << order);
item_insert_order(tree, index, order);
xa_set_mark(tree, index, XA_MARK_1);
item_delete_rcu(tree, index);
}
}
rcu_unregister_thread();
stop_iteration = true;
return NULL;
}
static void *load_worker(void *ptr)
{
unsigned long index = (3 << RADIX_TREE_MAP_SHIFT) - 1;
rcu_register_thread();
while (!stop_iteration) {
unsigned long find_index = (2 << RADIX_TREE_MAP_SHIFT) + 1;
struct item *item = xa_load(ptr, index);
assert(!xa_is_internal(item));
item = xa_find(ptr, &find_index, index, XA_MARK_1);
assert(!xa_is_internal(item));
}
rcu_unregister_thread();
return NULL;
}
static void load_race(struct xarray *xa)
{
const int num_threads = sysconf(_SC_NPROCESSORS_ONLN) * 4;
pthread_t worker_thread[num_threads];
int i;
stop_iteration = false;
pthread_create(&worker_thread[0], NULL, &load_creator, xa);
for (i = 1; i < num_threads; i++)
pthread_create(&worker_thread[i], NULL, &load_worker, xa);
for (i = 0; i < num_threads; i++)
pthread_join(worker_thread[i], NULL);
item_kill_tree(xa);
}
static DEFINE_XARRAY(array);
void multiorder_checks(void)
{
multiorder_iteration(&array);
multiorder_tagged_iteration(&array);
multiorder_iteration_race(&array);
load_race(&array);
radix_tree_cpu_dead(0);
}
int __weak main(int argc, char **argv)
{
int opt;
while ((opt = getopt(argc, argv, "ls:v")) != -1) {
if (opt == 'v')
test_verbose++;
}
rcu_register_thread();
radix_tree_init();
multiorder_checks();
rcu_unregister_thread();
return 0;
}