mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-18 22:14:16 +00:00
20 hotfixes. 7 are cc:stable and the remainder address post-6.15 issues
or aren't considered necessary for -stable kernels. Only 4 are for MM. - The 3 patch series `Revert "bcache: update min_heap_callbacks to use default builtin swap"' from Kuan-Wei Chiu backs out the author's recent min_heap changes due to a performance regression. A fix for this regression has been developed but we felt it best to go back to the known-good version to give the new code more bake time. - A lot of MAINTAINERS maintenance. I like to get these changes upstreamed promptly because they can't break things and more accurate/complete MAINTAINERS info hopefully improves the speed and accuracy of our responses to submitters and reporters. -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCaFizWwAKCRDdBJ7gKXxA jhivAQDGQXgzgzPCu/5/fTQjjq+D/8M2QjGxNy4o1itKoK+fYAEAzQGTL/8ay9FY yhcipreU4A3lrxf94iOidiBCYkZaOgk= =kFFb -----END PGP SIGNATURE----- Merge tag 'mm-hotfixes-stable-2025-06-22-18-52' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Pull misc fixes from Andrew Morton: "20 hotfixes. 7 are cc:stable and the remainder address post-6.15 issues or aren't considered necessary for -stable kernels. Only 4 are for MM. - The series `Revert "bcache: update min_heap_callbacks to use default builtin swap"' from Kuan-Wei Chiu backs out the author's recent min_heap changes due to a performance regression. A fix for this regression has been developed but we felt it best to go back to the known-good version to give the new code more bake time. - A lot of MAINTAINERS maintenance. I like to get these changes upstreamed promptly because they can't break things and more accurate/complete MAINTAINERS info hopefully improves the speed and accuracy of our responses to submitters and reporters" * tag 'mm-hotfixes-stable-2025-06-22-18-52' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: MAINTAINERS: add additional mmap-related files to mmap section MAINTAINERS: add memfd, shmem quota files to shmem section MAINTAINERS: add stray rmap file to mm rmap section MAINTAINERS: add hugetlb_cgroup.c to hugetlb section MAINTAINERS: add further init files to mm init block MAINTAINERS: update maintainers for HugeTLB maple_tree: fix MA_STATE_PREALLOC flag in mas_preallocate() MAINTAINERS: add missing test files to mm gup section MAINTAINERS: add missing mm/workingset.c file to mm reclaim section selftests/mm: skip uprobe vma merge test if uprobes are not enabled bcache: remove unnecessary select MIN_HEAP Revert "bcache: remove heap-related macros and switch to generic min_heap" Revert "bcache: update min_heap_callbacks to use default builtin swap" selftests/mm: add configs to fix testcase failure kho: initialize tail pages for higher order folios properly MAINTAINERS: add linux-mm@ list to Kexec Handover mm: userfaultfd: fix race of userfaultfd_move and swap cache mm/gup: revert "mm: gup: fix infinite loop within __get_longterm_locked" selftests/mm: increase timeout from 180 to 900 seconds mm/shmem, swap: fix softlockup with mTHP swapin
This commit is contained in:
commit
c06944560a
23 changed files with 332 additions and 276 deletions
21
MAINTAINERS
21
MAINTAINERS
|
@ -11155,7 +11155,8 @@ F: include/linux/platform_data/huawei-gaokun-ec.h
|
|||
|
||||
HUGETLB SUBSYSTEM
|
||||
M: Muchun Song <muchun.song@linux.dev>
|
||||
R: Oscar Salvador <osalvador@suse.de>
|
||||
M: Oscar Salvador <osalvador@suse.de>
|
||||
R: David Hildenbrand <david@redhat.com>
|
||||
L: linux-mm@kvack.org
|
||||
S: Maintained
|
||||
F: Documentation/ABI/testing/sysfs-kernel-mm-hugepages
|
||||
|
@ -11166,6 +11167,7 @@ F: fs/hugetlbfs/
|
|||
F: include/linux/hugetlb.h
|
||||
F: include/trace/events/hugetlbfs.h
|
||||
F: mm/hugetlb.c
|
||||
F: mm/hugetlb_cgroup.c
|
||||
F: mm/hugetlb_cma.c
|
||||
F: mm/hugetlb_cma.h
|
||||
F: mm/hugetlb_vmemmap.c
|
||||
|
@ -13345,6 +13347,7 @@ M: Alexander Graf <graf@amazon.com>
|
|||
M: Mike Rapoport <rppt@kernel.org>
|
||||
M: Changyuan Lyu <changyuanl@google.com>
|
||||
L: kexec@lists.infradead.org
|
||||
L: linux-mm@kvack.org
|
||||
S: Maintained
|
||||
F: Documentation/admin-guide/mm/kho.rst
|
||||
F: Documentation/core-api/kho/*
|
||||
|
@ -15676,8 +15679,11 @@ S: Maintained
|
|||
F: Documentation/core-api/boot-time-mm.rst
|
||||
F: Documentation/core-api/kho/bindings/memblock/*
|
||||
F: include/linux/memblock.h
|
||||
F: mm/bootmem_info.c
|
||||
F: mm/memblock.c
|
||||
F: mm/memtest.c
|
||||
F: mm/mm_init.c
|
||||
F: mm/rodata_test.c
|
||||
F: tools/testing/memblock/
|
||||
|
||||
MEMORY ALLOCATION PROFILING
|
||||
|
@ -15732,7 +15738,6 @@ F: Documentation/admin-guide/mm/
|
|||
F: Documentation/mm/
|
||||
F: include/linux/gfp.h
|
||||
F: include/linux/gfp_types.h
|
||||
F: include/linux/memfd.h
|
||||
F: include/linux/memory_hotplug.h
|
||||
F: include/linux/memory-tiers.h
|
||||
F: include/linux/mempolicy.h
|
||||
|
@ -15792,6 +15797,10 @@ S: Maintained
|
|||
W: http://www.linux-mm.org
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
|
||||
F: mm/gup.c
|
||||
F: mm/gup_test.c
|
||||
F: mm/gup_test.h
|
||||
F: tools/testing/selftests/mm/gup_longterm.c
|
||||
F: tools/testing/selftests/mm/gup_test.c
|
||||
|
||||
MEMORY MANAGEMENT - KSM (Kernel Samepage Merging)
|
||||
M: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
@ -15868,6 +15877,7 @@ L: linux-mm@kvack.org
|
|||
S: Maintained
|
||||
F: mm/pt_reclaim.c
|
||||
F: mm/vmscan.c
|
||||
F: mm/workingset.c
|
||||
|
||||
MEMORY MANAGEMENT - RMAP (REVERSE MAPPING)
|
||||
M: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
@ -15880,6 +15890,7 @@ R: Harry Yoo <harry.yoo@oracle.com>
|
|||
L: linux-mm@kvack.org
|
||||
S: Maintained
|
||||
F: include/linux/rmap.h
|
||||
F: mm/page_vma_mapped.c
|
||||
F: mm/rmap.c
|
||||
|
||||
MEMORY MANAGEMENT - SECRETMEM
|
||||
|
@ -15972,11 +15983,14 @@ S: Maintained
|
|||
W: http://www.linux-mm.org
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
|
||||
F: include/trace/events/mmap.h
|
||||
F: mm/mincore.c
|
||||
F: mm/mlock.c
|
||||
F: mm/mmap.c
|
||||
F: mm/mprotect.c
|
||||
F: mm/mremap.c
|
||||
F: mm/mseal.c
|
||||
F: mm/msync.c
|
||||
F: mm/nommu.c
|
||||
F: mm/vma.c
|
||||
F: mm/vma.h
|
||||
F: mm/vma_exec.c
|
||||
|
@ -25027,8 +25041,11 @@ M: Hugh Dickins <hughd@google.com>
|
|||
R: Baolin Wang <baolin.wang@linux.alibaba.com>
|
||||
L: linux-mm@kvack.org
|
||||
S: Maintained
|
||||
F: include/linux/memfd.h
|
||||
F: include/linux/shmem_fs.h
|
||||
F: mm/memfd.c
|
||||
F: mm/shmem.c
|
||||
F: mm/shmem_quota.c
|
||||
|
||||
TOMOYO SECURITY MODULE
|
||||
M: Kentaro Takeda <takedakn@nttdata.co.jp>
|
||||
|
|
|
@ -5,7 +5,6 @@ config BCACHE
|
|||
select BLOCK_HOLDER_DEPRECATED if SYSFS
|
||||
select CRC64
|
||||
select CLOSURES
|
||||
select MIN_HEAP
|
||||
help
|
||||
Allows a block device to be used as cache for other devices; uses
|
||||
a btree for indexing and the layout is optimized for SSDs.
|
||||
|
|
|
@ -164,61 +164,40 @@ static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
|
|||
* prio is worth 1/8th of what INITIAL_PRIO is worth.
|
||||
*/
|
||||
|
||||
static inline unsigned int new_bucket_prio(struct cache *ca, struct bucket *b)
|
||||
{
|
||||
unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8;
|
||||
#define bucket_prio(b) \
|
||||
({ \
|
||||
unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; \
|
||||
\
|
||||
(b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); \
|
||||
})
|
||||
|
||||
return (b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b);
|
||||
}
|
||||
|
||||
static inline bool new_bucket_max_cmp(const void *l, const void *r, void *args)
|
||||
{
|
||||
struct bucket **lhs = (struct bucket **)l;
|
||||
struct bucket **rhs = (struct bucket **)r;
|
||||
struct cache *ca = args;
|
||||
|
||||
return new_bucket_prio(ca, *lhs) > new_bucket_prio(ca, *rhs);
|
||||
}
|
||||
|
||||
static inline bool new_bucket_min_cmp(const void *l, const void *r, void *args)
|
||||
{
|
||||
struct bucket **lhs = (struct bucket **)l;
|
||||
struct bucket **rhs = (struct bucket **)r;
|
||||
struct cache *ca = args;
|
||||
|
||||
return new_bucket_prio(ca, *lhs) < new_bucket_prio(ca, *rhs);
|
||||
}
|
||||
#define bucket_max_cmp(l, r) (bucket_prio(l) < bucket_prio(r))
|
||||
#define bucket_min_cmp(l, r) (bucket_prio(l) > bucket_prio(r))
|
||||
|
||||
static void invalidate_buckets_lru(struct cache *ca)
|
||||
{
|
||||
struct bucket *b;
|
||||
const struct min_heap_callbacks bucket_max_cmp_callback = {
|
||||
.less = new_bucket_max_cmp,
|
||||
.swp = NULL,
|
||||
};
|
||||
const struct min_heap_callbacks bucket_min_cmp_callback = {
|
||||
.less = new_bucket_min_cmp,
|
||||
.swp = NULL,
|
||||
};
|
||||
ssize_t i;
|
||||
|
||||
ca->heap.nr = 0;
|
||||
ca->heap.used = 0;
|
||||
|
||||
for_each_bucket(b, ca) {
|
||||
if (!bch_can_invalidate_bucket(ca, b))
|
||||
continue;
|
||||
|
||||
if (!min_heap_full(&ca->heap))
|
||||
min_heap_push(&ca->heap, &b, &bucket_max_cmp_callback, ca);
|
||||
else if (!new_bucket_max_cmp(&b, min_heap_peek(&ca->heap), ca)) {
|
||||
if (!heap_full(&ca->heap))
|
||||
heap_add(&ca->heap, b, bucket_max_cmp);
|
||||
else if (bucket_max_cmp(b, heap_peek(&ca->heap))) {
|
||||
ca->heap.data[0] = b;
|
||||
min_heap_sift_down(&ca->heap, 0, &bucket_max_cmp_callback, ca);
|
||||
heap_sift(&ca->heap, 0, bucket_max_cmp);
|
||||
}
|
||||
}
|
||||
|
||||
min_heapify_all(&ca->heap, &bucket_min_cmp_callback, ca);
|
||||
for (i = ca->heap.used / 2 - 1; i >= 0; --i)
|
||||
heap_sift(&ca->heap, i, bucket_min_cmp);
|
||||
|
||||
while (!fifo_full(&ca->free_inc)) {
|
||||
if (!ca->heap.nr) {
|
||||
if (!heap_pop(&ca->heap, b, bucket_min_cmp)) {
|
||||
/*
|
||||
* We don't want to be calling invalidate_buckets()
|
||||
* multiple times when it can't do anything
|
||||
|
@ -227,8 +206,6 @@ static void invalidate_buckets_lru(struct cache *ca)
|
|||
wake_up_gc(ca->set);
|
||||
return;
|
||||
}
|
||||
b = min_heap_peek(&ca->heap)[0];
|
||||
min_heap_pop(&ca->heap, &bucket_min_cmp_callback, ca);
|
||||
|
||||
bch_invalidate_one_bucket(ca, b);
|
||||
}
|
||||
|
|
|
@ -458,7 +458,7 @@ struct cache {
|
|||
/* Allocation stuff: */
|
||||
struct bucket *buckets;
|
||||
|
||||
DEFINE_MIN_HEAP(struct bucket *, cache_heap) heap;
|
||||
DECLARE_HEAP(struct bucket *, heap);
|
||||
|
||||
/*
|
||||
* If nonzero, we know we aren't going to find any buckets to invalidate
|
||||
|
|
|
@ -54,11 +54,9 @@ void bch_dump_bucket(struct btree_keys *b)
|
|||
int __bch_count_data(struct btree_keys *b)
|
||||
{
|
||||
unsigned int ret = 0;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
struct bkey *k;
|
||||
|
||||
min_heap_init(&iter.heap, NULL, MAX_BSETS);
|
||||
|
||||
if (b->ops->is_extents)
|
||||
for_each_key(b, k, &iter)
|
||||
ret += KEY_SIZE(k);
|
||||
|
@ -69,11 +67,9 @@ void __bch_check_keys(struct btree_keys *b, const char *fmt, ...)
|
|||
{
|
||||
va_list args;
|
||||
struct bkey *k, *p = NULL;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
const char *err;
|
||||
|
||||
min_heap_init(&iter.heap, NULL, MAX_BSETS);
|
||||
|
||||
for_each_key(b, k, &iter) {
|
||||
if (b->ops->is_extents) {
|
||||
err = "Keys out of order";
|
||||
|
@ -114,9 +110,9 @@ bug:
|
|||
|
||||
static void bch_btree_iter_next_check(struct btree_iter *iter)
|
||||
{
|
||||
struct bkey *k = iter->heap.data->k, *next = bkey_next(k);
|
||||
struct bkey *k = iter->data->k, *next = bkey_next(k);
|
||||
|
||||
if (next < iter->heap.data->end &&
|
||||
if (next < iter->data->end &&
|
||||
bkey_cmp(k, iter->b->ops->is_extents ?
|
||||
&START_KEY(next) : next) > 0) {
|
||||
bch_dump_bucket(iter->b);
|
||||
|
@ -883,14 +879,12 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
|
|||
unsigned int status = BTREE_INSERT_STATUS_NO_INSERT;
|
||||
struct bset *i = bset_tree_last(b)->data;
|
||||
struct bkey *m, *prev = NULL;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
struct bkey preceding_key_on_stack = ZERO_KEY;
|
||||
struct bkey *preceding_key_p = &preceding_key_on_stack;
|
||||
|
||||
BUG_ON(b->ops->is_extents && !KEY_SIZE(k));
|
||||
|
||||
min_heap_init(&iter.heap, NULL, MAX_BSETS);
|
||||
|
||||
/*
|
||||
* If k has preceding key, preceding_key_p will be set to address
|
||||
* of k's preceding key; otherwise preceding_key_p will be set
|
||||
|
@ -901,9 +895,9 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
|
|||
else
|
||||
preceding_key(k, &preceding_key_p);
|
||||
|
||||
m = bch_btree_iter_init(b, &iter, preceding_key_p);
|
||||
m = bch_btree_iter_stack_init(b, &iter, preceding_key_p);
|
||||
|
||||
if (b->ops->insert_fixup(b, k, &iter, replace_key))
|
||||
if (b->ops->insert_fixup(b, k, &iter.iter, replace_key))
|
||||
return status;
|
||||
|
||||
status = BTREE_INSERT_STATUS_INSERT;
|
||||
|
@ -1083,94 +1077,79 @@ struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t,
|
|||
|
||||
/* Btree iterator */
|
||||
|
||||
typedef bool (new_btree_iter_cmp_fn)(const void *, const void *, void *);
|
||||
typedef bool (btree_iter_cmp_fn)(struct btree_iter_set,
|
||||
struct btree_iter_set);
|
||||
|
||||
static inline bool new_btree_iter_cmp(const void *l, const void *r, void __always_unused *args)
|
||||
static inline bool btree_iter_cmp(struct btree_iter_set l,
|
||||
struct btree_iter_set r)
|
||||
{
|
||||
const struct btree_iter_set *_l = l;
|
||||
const struct btree_iter_set *_r = r;
|
||||
|
||||
return bkey_cmp(_l->k, _r->k) <= 0;
|
||||
return bkey_cmp(l.k, r.k) > 0;
|
||||
}
|
||||
|
||||
static inline bool btree_iter_end(struct btree_iter *iter)
|
||||
{
|
||||
return !iter->heap.nr;
|
||||
return !iter->used;
|
||||
}
|
||||
|
||||
void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k,
|
||||
struct bkey *end)
|
||||
{
|
||||
const struct min_heap_callbacks callbacks = {
|
||||
.less = new_btree_iter_cmp,
|
||||
.swp = NULL,
|
||||
};
|
||||
|
||||
if (k != end)
|
||||
BUG_ON(!min_heap_push(&iter->heap,
|
||||
&((struct btree_iter_set) { k, end }),
|
||||
&callbacks,
|
||||
NULL));
|
||||
BUG_ON(!heap_add(iter,
|
||||
((struct btree_iter_set) { k, end }),
|
||||
btree_iter_cmp));
|
||||
}
|
||||
|
||||
static struct bkey *__bch_btree_iter_init(struct btree_keys *b,
|
||||
struct btree_iter *iter,
|
||||
struct bkey *search,
|
||||
struct bset_tree *start)
|
||||
static struct bkey *__bch_btree_iter_stack_init(struct btree_keys *b,
|
||||
struct btree_iter_stack *iter,
|
||||
struct bkey *search,
|
||||
struct bset_tree *start)
|
||||
{
|
||||
struct bkey *ret = NULL;
|
||||
|
||||
iter->heap.size = ARRAY_SIZE(iter->heap.preallocated);
|
||||
iter->heap.nr = 0;
|
||||
iter->iter.size = ARRAY_SIZE(iter->stack_data);
|
||||
iter->iter.used = 0;
|
||||
|
||||
#ifdef CONFIG_BCACHE_DEBUG
|
||||
iter->b = b;
|
||||
iter->iter.b = b;
|
||||
#endif
|
||||
|
||||
for (; start <= bset_tree_last(b); start++) {
|
||||
ret = bch_bset_search(b, start, search);
|
||||
bch_btree_iter_push(iter, ret, bset_bkey_last(start->data));
|
||||
bch_btree_iter_push(&iter->iter, ret, bset_bkey_last(start->data));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct bkey *bch_btree_iter_init(struct btree_keys *b,
|
||||
struct btree_iter *iter,
|
||||
struct bkey *bch_btree_iter_stack_init(struct btree_keys *b,
|
||||
struct btree_iter_stack *iter,
|
||||
struct bkey *search)
|
||||
{
|
||||
return __bch_btree_iter_init(b, iter, search, b->set);
|
||||
return __bch_btree_iter_stack_init(b, iter, search, b->set);
|
||||
}
|
||||
|
||||
static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
|
||||
new_btree_iter_cmp_fn *cmp)
|
||||
btree_iter_cmp_fn *cmp)
|
||||
{
|
||||
struct btree_iter_set b __maybe_unused;
|
||||
struct bkey *ret = NULL;
|
||||
const struct min_heap_callbacks callbacks = {
|
||||
.less = cmp,
|
||||
.swp = NULL,
|
||||
};
|
||||
|
||||
if (!btree_iter_end(iter)) {
|
||||
bch_btree_iter_next_check(iter);
|
||||
|
||||
ret = iter->heap.data->k;
|
||||
iter->heap.data->k = bkey_next(iter->heap.data->k);
|
||||
ret = iter->data->k;
|
||||
iter->data->k = bkey_next(iter->data->k);
|
||||
|
||||
if (iter->heap.data->k > iter->heap.data->end) {
|
||||
if (iter->data->k > iter->data->end) {
|
||||
WARN_ONCE(1, "bset was corrupt!\n");
|
||||
iter->heap.data->k = iter->heap.data->end;
|
||||
iter->data->k = iter->data->end;
|
||||
}
|
||||
|
||||
if (iter->heap.data->k == iter->heap.data->end) {
|
||||
if (iter->heap.nr) {
|
||||
b = min_heap_peek(&iter->heap)[0];
|
||||
min_heap_pop(&iter->heap, &callbacks, NULL);
|
||||
}
|
||||
}
|
||||
if (iter->data->k == iter->data->end)
|
||||
heap_pop(iter, b, cmp);
|
||||
else
|
||||
min_heap_sift_down(&iter->heap, 0, &callbacks, NULL);
|
||||
heap_sift(iter, 0, cmp);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
@ -1178,7 +1157,7 @@ static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
|
|||
|
||||
struct bkey *bch_btree_iter_next(struct btree_iter *iter)
|
||||
{
|
||||
return __bch_btree_iter_next(iter, new_btree_iter_cmp);
|
||||
return __bch_btree_iter_next(iter, btree_iter_cmp);
|
||||
|
||||
}
|
||||
|
||||
|
@ -1216,18 +1195,16 @@ static void btree_mergesort(struct btree_keys *b, struct bset *out,
|
|||
struct btree_iter *iter,
|
||||
bool fixup, bool remove_stale)
|
||||
{
|
||||
int i;
|
||||
struct bkey *k, *last = NULL;
|
||||
BKEY_PADDED(k) tmp;
|
||||
bool (*bad)(struct btree_keys *, const struct bkey *) = remove_stale
|
||||
? bch_ptr_bad
|
||||
: bch_ptr_invalid;
|
||||
const struct min_heap_callbacks callbacks = {
|
||||
.less = b->ops->sort_cmp,
|
||||
.swp = NULL,
|
||||
};
|
||||
|
||||
/* Heapify the iterator, using our comparison function */
|
||||
min_heapify_all(&iter->heap, &callbacks, NULL);
|
||||
for (i = iter->used / 2 - 1; i >= 0; --i)
|
||||
heap_sift(iter, i, b->ops->sort_cmp);
|
||||
|
||||
while (!btree_iter_end(iter)) {
|
||||
if (b->ops->sort_fixup && fixup)
|
||||
|
@ -1316,11 +1293,10 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
|
|||
struct bset_sort_state *state)
|
||||
{
|
||||
size_t order = b->page_order, keys = 0;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
int oldsize = bch_count_data(b);
|
||||
|
||||
min_heap_init(&iter.heap, NULL, MAX_BSETS);
|
||||
__bch_btree_iter_init(b, &iter, NULL, &b->set[start]);
|
||||
__bch_btree_iter_stack_init(b, &iter, NULL, &b->set[start]);
|
||||
|
||||
if (start) {
|
||||
unsigned int i;
|
||||
|
@ -1331,7 +1307,7 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
|
|||
order = get_order(__set_bytes(b->set->data, keys));
|
||||
}
|
||||
|
||||
__btree_sort(b, &iter, start, order, false, state);
|
||||
__btree_sort(b, &iter.iter, start, order, false, state);
|
||||
|
||||
EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize);
|
||||
}
|
||||
|
@ -1347,13 +1323,11 @@ void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new,
|
|||
struct bset_sort_state *state)
|
||||
{
|
||||
uint64_t start_time = local_clock();
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
|
||||
min_heap_init(&iter.heap, NULL, MAX_BSETS);
|
||||
bch_btree_iter_stack_init(b, &iter, NULL);
|
||||
|
||||
bch_btree_iter_init(b, &iter, NULL);
|
||||
|
||||
btree_mergesort(b, new->set->data, &iter, false, true);
|
||||
btree_mergesort(b, new->set->data, &iter.iter, false, true);
|
||||
|
||||
bch_time_stats_update(&state->time, start_time);
|
||||
|
||||
|
|
|
@ -187,9 +187,8 @@ struct bset_tree {
|
|||
};
|
||||
|
||||
struct btree_keys_ops {
|
||||
bool (*sort_cmp)(const void *l,
|
||||
const void *r,
|
||||
void *args);
|
||||
bool (*sort_cmp)(struct btree_iter_set l,
|
||||
struct btree_iter_set r);
|
||||
struct bkey *(*sort_fixup)(struct btree_iter *iter,
|
||||
struct bkey *tmp);
|
||||
bool (*insert_fixup)(struct btree_keys *b,
|
||||
|
@ -313,17 +312,23 @@ enum {
|
|||
BTREE_INSERT_STATUS_FRONT_MERGE,
|
||||
};
|
||||
|
||||
struct btree_iter_set {
|
||||
struct bkey *k, *end;
|
||||
};
|
||||
|
||||
/* Btree key iteration */
|
||||
|
||||
struct btree_iter {
|
||||
size_t size, used;
|
||||
#ifdef CONFIG_BCACHE_DEBUG
|
||||
struct btree_keys *b;
|
||||
#endif
|
||||
MIN_HEAP_PREALLOCATED(struct btree_iter_set, btree_iter_heap, MAX_BSETS) heap;
|
||||
struct btree_iter_set {
|
||||
struct bkey *k, *end;
|
||||
} data[];
|
||||
};
|
||||
|
||||
/* Fixed-size btree_iter that can be allocated on the stack */
|
||||
|
||||
struct btree_iter_stack {
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_set stack_data[MAX_BSETS];
|
||||
};
|
||||
|
||||
typedef bool (*ptr_filter_fn)(struct btree_keys *b, const struct bkey *k);
|
||||
|
@ -335,9 +340,9 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
|
|||
|
||||
void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k,
|
||||
struct bkey *end);
|
||||
struct bkey *bch_btree_iter_init(struct btree_keys *b,
|
||||
struct btree_iter *iter,
|
||||
struct bkey *search);
|
||||
struct bkey *bch_btree_iter_stack_init(struct btree_keys *b,
|
||||
struct btree_iter_stack *iter,
|
||||
struct bkey *search);
|
||||
|
||||
struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t,
|
||||
const struct bkey *search);
|
||||
|
@ -352,13 +357,14 @@ static inline struct bkey *bch_bset_search(struct btree_keys *b,
|
|||
return search ? __bch_bset_search(b, t, search) : t->data->start;
|
||||
}
|
||||
|
||||
#define for_each_key_filter(b, k, iter, filter) \
|
||||
for (bch_btree_iter_init((b), (iter), NULL); \
|
||||
((k) = bch_btree_iter_next_filter((iter), (b), filter));)
|
||||
#define for_each_key_filter(b, k, stack_iter, filter) \
|
||||
for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \
|
||||
((k) = bch_btree_iter_next_filter(&((stack_iter)->iter), (b), \
|
||||
filter));)
|
||||
|
||||
#define for_each_key(b, k, iter) \
|
||||
for (bch_btree_iter_init((b), (iter), NULL); \
|
||||
((k) = bch_btree_iter_next(iter));)
|
||||
#define for_each_key(b, k, stack_iter) \
|
||||
for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \
|
||||
((k) = bch_btree_iter_next(&((stack_iter)->iter)));)
|
||||
|
||||
/* Sorting */
|
||||
|
||||
|
|
|
@ -148,19 +148,19 @@ void bch_btree_node_read_done(struct btree *b)
|
|||
{
|
||||
const char *err = "bad btree header";
|
||||
struct bset *i = btree_bset_first(b);
|
||||
struct btree_iter iter;
|
||||
struct btree_iter *iter;
|
||||
|
||||
/*
|
||||
* c->fill_iter can allocate an iterator with more memory space
|
||||
* than static MAX_BSETS.
|
||||
* See the comment arount cache_set->fill_iter.
|
||||
*/
|
||||
iter.heap.data = mempool_alloc(&b->c->fill_iter, GFP_NOIO);
|
||||
iter.heap.size = b->c->cache->sb.bucket_size / b->c->cache->sb.block_size;
|
||||
iter.heap.nr = 0;
|
||||
iter = mempool_alloc(&b->c->fill_iter, GFP_NOIO);
|
||||
iter->size = b->c->cache->sb.bucket_size / b->c->cache->sb.block_size;
|
||||
iter->used = 0;
|
||||
|
||||
#ifdef CONFIG_BCACHE_DEBUG
|
||||
iter.b = &b->keys;
|
||||
iter->b = &b->keys;
|
||||
#endif
|
||||
|
||||
if (!i->seq)
|
||||
|
@ -198,7 +198,7 @@ void bch_btree_node_read_done(struct btree *b)
|
|||
if (i != b->keys.set[0].data && !i->keys)
|
||||
goto err;
|
||||
|
||||
bch_btree_iter_push(&iter, i->start, bset_bkey_last(i));
|
||||
bch_btree_iter_push(iter, i->start, bset_bkey_last(i));
|
||||
|
||||
b->written += set_blocks(i, block_bytes(b->c->cache));
|
||||
}
|
||||
|
@ -210,7 +210,7 @@ void bch_btree_node_read_done(struct btree *b)
|
|||
if (i->seq == b->keys.set[0].data->seq)
|
||||
goto err;
|
||||
|
||||
bch_btree_sort_and_fix_extents(&b->keys, &iter, &b->c->sort);
|
||||
bch_btree_sort_and_fix_extents(&b->keys, iter, &b->c->sort);
|
||||
|
||||
i = b->keys.set[0].data;
|
||||
err = "short btree key";
|
||||
|
@ -222,7 +222,7 @@ void bch_btree_node_read_done(struct btree *b)
|
|||
bch_bset_init_next(&b->keys, write_block(b),
|
||||
bset_magic(&b->c->cache->sb));
|
||||
out:
|
||||
mempool_free(iter.heap.data, &b->c->fill_iter);
|
||||
mempool_free(iter, &b->c->fill_iter);
|
||||
return;
|
||||
err:
|
||||
set_btree_node_io_error(b);
|
||||
|
@ -1306,11 +1306,9 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc)
|
|||
uint8_t stale = 0;
|
||||
unsigned int keys = 0, good_keys = 0;
|
||||
struct bkey *k;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
struct bset_tree *t;
|
||||
|
||||
min_heap_init(&iter.heap, NULL, MAX_BSETS);
|
||||
|
||||
gc->nodes++;
|
||||
|
||||
for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) {
|
||||
|
@ -1569,11 +1567,9 @@ static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op,
|
|||
static unsigned int btree_gc_count_keys(struct btree *b)
|
||||
{
|
||||
struct bkey *k;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
unsigned int ret = 0;
|
||||
|
||||
min_heap_init(&iter.heap, NULL, MAX_BSETS);
|
||||
|
||||
for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
|
||||
ret += bkey_u64s(k);
|
||||
|
||||
|
@ -1612,18 +1608,18 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
|
|||
int ret = 0;
|
||||
bool should_rewrite;
|
||||
struct bkey *k;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
struct gc_merge_info r[GC_MERGE_NODES];
|
||||
struct gc_merge_info *i, *last = r + ARRAY_SIZE(r) - 1;
|
||||
|
||||
min_heap_init(&iter.heap, NULL, MAX_BSETS);
|
||||
bch_btree_iter_init(&b->keys, &iter, &b->c->gc_done);
|
||||
bch_btree_iter_stack_init(&b->keys, &iter, &b->c->gc_done);
|
||||
|
||||
for (i = r; i < r + ARRAY_SIZE(r); i++)
|
||||
i->b = ERR_PTR(-EINTR);
|
||||
|
||||
while (1) {
|
||||
k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad);
|
||||
k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
|
||||
bch_ptr_bad);
|
||||
if (k) {
|
||||
r->b = bch_btree_node_get(b->c, op, k, b->level - 1,
|
||||
true, b);
|
||||
|
@ -1918,9 +1914,7 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
|
|||
{
|
||||
int ret = 0;
|
||||
struct bkey *k, *p = NULL;
|
||||
struct btree_iter iter;
|
||||
|
||||
min_heap_init(&iter.heap, NULL, MAX_BSETS);
|
||||
struct btree_iter_stack iter;
|
||||
|
||||
for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid)
|
||||
bch_initial_mark_key(b->c, b->level, k);
|
||||
|
@ -1928,10 +1922,10 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
|
|||
bch_initial_mark_key(b->c, b->level + 1, &b->key);
|
||||
|
||||
if (b->level) {
|
||||
bch_btree_iter_init(&b->keys, &iter, NULL);
|
||||
bch_btree_iter_stack_init(&b->keys, &iter, NULL);
|
||||
|
||||
do {
|
||||
k = bch_btree_iter_next_filter(&iter, &b->keys,
|
||||
k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
|
||||
bch_ptr_bad);
|
||||
if (k) {
|
||||
btree_node_prefetch(b, k);
|
||||
|
@ -1959,7 +1953,7 @@ static int bch_btree_check_thread(void *arg)
|
|||
struct btree_check_info *info = arg;
|
||||
struct btree_check_state *check_state = info->state;
|
||||
struct cache_set *c = check_state->c;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
struct bkey *k, *p;
|
||||
int cur_idx, prev_idx, skip_nr;
|
||||
|
||||
|
@ -1967,11 +1961,9 @@ static int bch_btree_check_thread(void *arg)
|
|||
cur_idx = prev_idx = 0;
|
||||
ret = 0;
|
||||
|
||||
min_heap_init(&iter.heap, NULL, MAX_BSETS);
|
||||
|
||||
/* root node keys are checked before thread created */
|
||||
bch_btree_iter_init(&c->root->keys, &iter, NULL);
|
||||
k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
|
||||
bch_btree_iter_stack_init(&c->root->keys, &iter, NULL);
|
||||
k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad);
|
||||
BUG_ON(!k);
|
||||
|
||||
p = k;
|
||||
|
@ -1989,7 +1981,7 @@ static int bch_btree_check_thread(void *arg)
|
|||
skip_nr = cur_idx - prev_idx;
|
||||
|
||||
while (skip_nr) {
|
||||
k = bch_btree_iter_next_filter(&iter,
|
||||
k = bch_btree_iter_next_filter(&iter.iter,
|
||||
&c->root->keys,
|
||||
bch_ptr_bad);
|
||||
if (k)
|
||||
|
@ -2062,11 +2054,9 @@ int bch_btree_check(struct cache_set *c)
|
|||
int ret = 0;
|
||||
int i;
|
||||
struct bkey *k = NULL;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
struct btree_check_state check_state;
|
||||
|
||||
min_heap_init(&iter.heap, NULL, MAX_BSETS);
|
||||
|
||||
/* check and mark root node keys */
|
||||
for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid)
|
||||
bch_initial_mark_key(c, c->root->level, k);
|
||||
|
@ -2560,12 +2550,11 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op,
|
|||
|
||||
if (b->level) {
|
||||
struct bkey *k;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
|
||||
min_heap_init(&iter.heap, NULL, MAX_BSETS);
|
||||
bch_btree_iter_init(&b->keys, &iter, from);
|
||||
bch_btree_iter_stack_init(&b->keys, &iter, from);
|
||||
|
||||
while ((k = bch_btree_iter_next_filter(&iter, &b->keys,
|
||||
while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
|
||||
bch_ptr_bad))) {
|
||||
ret = bcache_btree(map_nodes_recurse, k, b,
|
||||
op, from, fn, flags);
|
||||
|
@ -2594,12 +2583,12 @@ int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
|
|||
{
|
||||
int ret = MAP_CONTINUE;
|
||||
struct bkey *k;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
|
||||
min_heap_init(&iter.heap, NULL, MAX_BSETS);
|
||||
bch_btree_iter_init(&b->keys, &iter, from);
|
||||
bch_btree_iter_stack_init(&b->keys, &iter, from);
|
||||
|
||||
while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) {
|
||||
while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
|
||||
bch_ptr_bad))) {
|
||||
ret = !b->level
|
||||
? fn(op, b, k)
|
||||
: bcache_btree(map_keys_recurse, k,
|
||||
|
|
|
@ -33,16 +33,15 @@ static void sort_key_next(struct btree_iter *iter,
|
|||
i->k = bkey_next(i->k);
|
||||
|
||||
if (i->k == i->end)
|
||||
*i = iter->heap.data[--iter->heap.nr];
|
||||
*i = iter->data[--iter->used];
|
||||
}
|
||||
|
||||
static bool new_bch_key_sort_cmp(const void *l, const void *r, void *args)
|
||||
static bool bch_key_sort_cmp(struct btree_iter_set l,
|
||||
struct btree_iter_set r)
|
||||
{
|
||||
struct btree_iter_set *_l = (struct btree_iter_set *)l;
|
||||
struct btree_iter_set *_r = (struct btree_iter_set *)r;
|
||||
int64_t c = bkey_cmp(_l->k, _r->k);
|
||||
int64_t c = bkey_cmp(l.k, r.k);
|
||||
|
||||
return !(c ? c > 0 : _l->k < _r->k);
|
||||
return c ? c > 0 : l.k < r.k;
|
||||
}
|
||||
|
||||
static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
|
||||
|
@ -239,7 +238,7 @@ static bool bch_btree_ptr_insert_fixup(struct btree_keys *bk,
|
|||
}
|
||||
|
||||
const struct btree_keys_ops bch_btree_keys_ops = {
|
||||
.sort_cmp = new_bch_key_sort_cmp,
|
||||
.sort_cmp = bch_key_sort_cmp,
|
||||
.insert_fixup = bch_btree_ptr_insert_fixup,
|
||||
.key_invalid = bch_btree_ptr_invalid,
|
||||
.key_bad = bch_btree_ptr_bad,
|
||||
|
@ -256,28 +255,22 @@ const struct btree_keys_ops bch_btree_keys_ops = {
|
|||
* Necessary for btree_sort_fixup() - if there are multiple keys that compare
|
||||
* equal in different sets, we have to process them newest to oldest.
|
||||
*/
|
||||
|
||||
static bool new_bch_extent_sort_cmp(const void *l, const void *r, void __always_unused *args)
|
||||
static bool bch_extent_sort_cmp(struct btree_iter_set l,
|
||||
struct btree_iter_set r)
|
||||
{
|
||||
struct btree_iter_set *_l = (struct btree_iter_set *)l;
|
||||
struct btree_iter_set *_r = (struct btree_iter_set *)r;
|
||||
int64_t c = bkey_cmp(&START_KEY(_l->k), &START_KEY(_r->k));
|
||||
int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k));
|
||||
|
||||
return !(c ? c > 0 : _l->k < _r->k);
|
||||
return c ? c > 0 : l.k < r.k;
|
||||
}
|
||||
|
||||
static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
|
||||
struct bkey *tmp)
|
||||
{
|
||||
const struct min_heap_callbacks callbacks = {
|
||||
.less = new_bch_extent_sort_cmp,
|
||||
.swp = NULL,
|
||||
};
|
||||
while (iter->heap.nr > 1) {
|
||||
struct btree_iter_set *top = iter->heap.data, *i = top + 1;
|
||||
while (iter->used > 1) {
|
||||
struct btree_iter_set *top = iter->data, *i = top + 1;
|
||||
|
||||
if (iter->heap.nr > 2 &&
|
||||
!new_bch_extent_sort_cmp(&i[0], &i[1], NULL))
|
||||
if (iter->used > 2 &&
|
||||
bch_extent_sort_cmp(i[0], i[1]))
|
||||
i++;
|
||||
|
||||
if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0)
|
||||
|
@ -285,7 +278,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
|
|||
|
||||
if (!KEY_SIZE(i->k)) {
|
||||
sort_key_next(iter, i);
|
||||
min_heap_sift_down(&iter->heap, i - top, &callbacks, NULL);
|
||||
heap_sift(iter, i - top, bch_extent_sort_cmp);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -295,7 +288,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
|
|||
else
|
||||
bch_cut_front(top->k, i->k);
|
||||
|
||||
min_heap_sift_down(&iter->heap, i - top, &callbacks, NULL);
|
||||
heap_sift(iter, i - top, bch_extent_sort_cmp);
|
||||
} else {
|
||||
/* can't happen because of comparison func */
|
||||
BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
|
||||
|
@ -305,7 +298,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
|
|||
|
||||
bch_cut_back(&START_KEY(i->k), tmp);
|
||||
bch_cut_front(i->k, top->k);
|
||||
min_heap_sift_down(&iter->heap, 0, &callbacks, NULL);
|
||||
heap_sift(iter, 0, bch_extent_sort_cmp);
|
||||
|
||||
return tmp;
|
||||
} else {
|
||||
|
@ -625,7 +618,7 @@ static bool bch_extent_merge(struct btree_keys *bk,
|
|||
}
|
||||
|
||||
const struct btree_keys_ops bch_extent_keys_ops = {
|
||||
.sort_cmp = new_bch_extent_sort_cmp,
|
||||
.sort_cmp = bch_extent_sort_cmp,
|
||||
.sort_fixup = bch_extent_sort_fixup,
|
||||
.insert_fixup = bch_extent_insert_fixup,
|
||||
.key_invalid = bch_extent_invalid,
|
||||
|
|
|
@ -182,19 +182,16 @@ err: if (!IS_ERR_OR_NULL(w->private))
|
|||
closure_sync(&cl);
|
||||
}
|
||||
|
||||
static bool new_bucket_cmp(const void *l, const void *r, void __always_unused *args)
|
||||
static bool bucket_cmp(struct bucket *l, struct bucket *r)
|
||||
{
|
||||
struct bucket **_l = (struct bucket **)l;
|
||||
struct bucket **_r = (struct bucket **)r;
|
||||
|
||||
return GC_SECTORS_USED(*_l) >= GC_SECTORS_USED(*_r);
|
||||
return GC_SECTORS_USED(l) < GC_SECTORS_USED(r);
|
||||
}
|
||||
|
||||
static unsigned int bucket_heap_top(struct cache *ca)
|
||||
{
|
||||
struct bucket *b;
|
||||
|
||||
return (b = min_heap_peek(&ca->heap)[0]) ? GC_SECTORS_USED(b) : 0;
|
||||
return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0;
|
||||
}
|
||||
|
||||
void bch_moving_gc(struct cache_set *c)
|
||||
|
@ -202,10 +199,6 @@ void bch_moving_gc(struct cache_set *c)
|
|||
struct cache *ca = c->cache;
|
||||
struct bucket *b;
|
||||
unsigned long sectors_to_move, reserve_sectors;
|
||||
const struct min_heap_callbacks callbacks = {
|
||||
.less = new_bucket_cmp,
|
||||
.swp = NULL,
|
||||
};
|
||||
|
||||
if (!c->copy_gc_enabled)
|
||||
return;
|
||||
|
@ -216,7 +209,7 @@ void bch_moving_gc(struct cache_set *c)
|
|||
reserve_sectors = ca->sb.bucket_size *
|
||||
fifo_used(&ca->free[RESERVE_MOVINGGC]);
|
||||
|
||||
ca->heap.nr = 0;
|
||||
ca->heap.used = 0;
|
||||
|
||||
for_each_bucket(b, ca) {
|
||||
if (GC_MARK(b) == GC_MARK_METADATA ||
|
||||
|
@ -225,31 +218,25 @@ void bch_moving_gc(struct cache_set *c)
|
|||
atomic_read(&b->pin))
|
||||
continue;
|
||||
|
||||
if (!min_heap_full(&ca->heap)) {
|
||||
if (!heap_full(&ca->heap)) {
|
||||
sectors_to_move += GC_SECTORS_USED(b);
|
||||
min_heap_push(&ca->heap, &b, &callbacks, NULL);
|
||||
} else if (!new_bucket_cmp(&b, min_heap_peek(&ca->heap), ca)) {
|
||||
heap_add(&ca->heap, b, bucket_cmp);
|
||||
} else if (bucket_cmp(b, heap_peek(&ca->heap))) {
|
||||
sectors_to_move -= bucket_heap_top(ca);
|
||||
sectors_to_move += GC_SECTORS_USED(b);
|
||||
|
||||
ca->heap.data[0] = b;
|
||||
min_heap_sift_down(&ca->heap, 0, &callbacks, NULL);
|
||||
heap_sift(&ca->heap, 0, bucket_cmp);
|
||||
}
|
||||
}
|
||||
|
||||
while (sectors_to_move > reserve_sectors) {
|
||||
if (ca->heap.nr) {
|
||||
b = min_heap_peek(&ca->heap)[0];
|
||||
min_heap_pop(&ca->heap, &callbacks, NULL);
|
||||
}
|
||||
heap_pop(&ca->heap, b, bucket_cmp);
|
||||
sectors_to_move -= GC_SECTORS_USED(b);
|
||||
}
|
||||
|
||||
while (ca->heap.nr) {
|
||||
b = min_heap_peek(&ca->heap)[0];
|
||||
min_heap_pop(&ca->heap, &callbacks, NULL);
|
||||
while (heap_pop(&ca->heap, b, bucket_cmp))
|
||||
SET_GC_MOVE(b, 1);
|
||||
}
|
||||
|
||||
mutex_unlock(&c->bucket_lock);
|
||||
|
||||
|
|
|
@ -1912,7 +1912,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
|
|||
INIT_LIST_HEAD(&c->btree_cache_freed);
|
||||
INIT_LIST_HEAD(&c->data_buckets);
|
||||
|
||||
iter_size = ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) *
|
||||
iter_size = sizeof(struct btree_iter) +
|
||||
((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) *
|
||||
sizeof(struct btree_iter_set);
|
||||
|
||||
c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL);
|
||||
|
|
|
@ -660,9 +660,7 @@ static unsigned int bch_root_usage(struct cache_set *c)
|
|||
unsigned int bytes = 0;
|
||||
struct bkey *k;
|
||||
struct btree *b;
|
||||
struct btree_iter iter;
|
||||
|
||||
min_heap_init(&iter.heap, NULL, MAX_BSETS);
|
||||
struct btree_iter_stack iter;
|
||||
|
||||
goto lock_root;
|
||||
|
||||
|
|
|
@ -9,7 +9,6 @@
|
|||
#include <linux/kernel.h>
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/llist.h>
|
||||
#include <linux/min_heap.h>
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
@ -31,10 +30,16 @@ struct closure;
|
|||
|
||||
#endif
|
||||
|
||||
#define DECLARE_HEAP(type, name) \
|
||||
struct { \
|
||||
size_t size, used; \
|
||||
type *data; \
|
||||
} name
|
||||
|
||||
#define init_heap(heap, _size, gfp) \
|
||||
({ \
|
||||
size_t _bytes; \
|
||||
(heap)->nr = 0; \
|
||||
(heap)->used = 0; \
|
||||
(heap)->size = (_size); \
|
||||
_bytes = (heap)->size * sizeof(*(heap)->data); \
|
||||
(heap)->data = kvmalloc(_bytes, (gfp) & GFP_KERNEL); \
|
||||
|
@ -47,6 +52,64 @@ do { \
|
|||
(heap)->data = NULL; \
|
||||
} while (0)
|
||||
|
||||
#define heap_swap(h, i, j) swap((h)->data[i], (h)->data[j])
|
||||
|
||||
#define heap_sift(h, i, cmp) \
|
||||
do { \
|
||||
size_t _r, _j = i; \
|
||||
\
|
||||
for (; _j * 2 + 1 < (h)->used; _j = _r) { \
|
||||
_r = _j * 2 + 1; \
|
||||
if (_r + 1 < (h)->used && \
|
||||
cmp((h)->data[_r], (h)->data[_r + 1])) \
|
||||
_r++; \
|
||||
\
|
||||
if (cmp((h)->data[_r], (h)->data[_j])) \
|
||||
break; \
|
||||
heap_swap(h, _r, _j); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define heap_sift_down(h, i, cmp) \
|
||||
do { \
|
||||
while (i) { \
|
||||
size_t p = (i - 1) / 2; \
|
||||
if (cmp((h)->data[i], (h)->data[p])) \
|
||||
break; \
|
||||
heap_swap(h, i, p); \
|
||||
i = p; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define heap_add(h, d, cmp) \
|
||||
({ \
|
||||
bool _r = !heap_full(h); \
|
||||
if (_r) { \
|
||||
size_t _i = (h)->used++; \
|
||||
(h)->data[_i] = d; \
|
||||
\
|
||||
heap_sift_down(h, _i, cmp); \
|
||||
heap_sift(h, _i, cmp); \
|
||||
} \
|
||||
_r; \
|
||||
})
|
||||
|
||||
#define heap_pop(h, d, cmp) \
|
||||
({ \
|
||||
bool _r = (h)->used; \
|
||||
if (_r) { \
|
||||
(d) = (h)->data[0]; \
|
||||
(h)->used--; \
|
||||
heap_swap(h, 0, (h)->used); \
|
||||
heap_sift(h, 0, cmp); \
|
||||
} \
|
||||
_r; \
|
||||
})
|
||||
|
||||
#define heap_peek(h) ((h)->used ? (h)->data[0] : NULL)
|
||||
|
||||
#define heap_full(h) ((h)->used == (h)->size)
|
||||
|
||||
#define DECLARE_FIFO(type, name) \
|
||||
struct { \
|
||||
size_t front, back, size, mask; \
|
||||
|
|
|
@ -908,16 +908,15 @@ static int bch_dirty_init_thread(void *arg)
|
|||
struct dirty_init_thrd_info *info = arg;
|
||||
struct bch_dirty_init_state *state = info->state;
|
||||
struct cache_set *c = state->c;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
struct bkey *k, *p;
|
||||
int cur_idx, prev_idx, skip_nr;
|
||||
|
||||
k = p = NULL;
|
||||
prev_idx = 0;
|
||||
|
||||
min_heap_init(&iter.heap, NULL, MAX_BSETS);
|
||||
bch_btree_iter_init(&c->root->keys, &iter, NULL);
|
||||
k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
|
||||
bch_btree_iter_stack_init(&c->root->keys, &iter, NULL);
|
||||
k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad);
|
||||
BUG_ON(!k);
|
||||
|
||||
p = k;
|
||||
|
@ -931,7 +930,7 @@ static int bch_dirty_init_thread(void *arg)
|
|||
skip_nr = cur_idx - prev_idx;
|
||||
|
||||
while (skip_nr) {
|
||||
k = bch_btree_iter_next_filter(&iter,
|
||||
k = bch_btree_iter_next_filter(&iter.iter,
|
||||
&c->root->keys,
|
||||
bch_ptr_bad);
|
||||
if (k)
|
||||
|
@ -980,13 +979,11 @@ void bch_sectors_dirty_init(struct bcache_device *d)
|
|||
int i;
|
||||
struct btree *b = NULL;
|
||||
struct bkey *k = NULL;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
struct sectors_dirty_init op;
|
||||
struct cache_set *c = d->c;
|
||||
struct bch_dirty_init_state state;
|
||||
|
||||
min_heap_init(&iter.heap, NULL, MAX_BSETS);
|
||||
|
||||
retry_lock:
|
||||
b = c->root;
|
||||
rw_lock(0, b, b->level);
|
||||
|
|
|
@ -164,11 +164,21 @@ static int __kho_preserve_order(struct kho_mem_track *track, unsigned long pfn,
|
|||
}
|
||||
|
||||
/* almost as free_reserved_page(), just don't free the page */
|
||||
static void kho_restore_page(struct page *page)
|
||||
static void kho_restore_page(struct page *page, unsigned int order)
|
||||
{
|
||||
ClearPageReserved(page);
|
||||
init_page_count(page);
|
||||
adjust_managed_page_count(page, 1);
|
||||
unsigned int nr_pages = (1 << order);
|
||||
|
||||
/* Head page gets refcount of 1. */
|
||||
set_page_count(page, 1);
|
||||
|
||||
/* For higher order folios, tail pages get a page count of zero. */
|
||||
for (unsigned int i = 1; i < nr_pages; i++)
|
||||
set_page_count(page + i, 0);
|
||||
|
||||
if (order > 0)
|
||||
prep_compound_page(page, order);
|
||||
|
||||
adjust_managed_page_count(page, nr_pages);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -186,15 +196,10 @@ struct folio *kho_restore_folio(phys_addr_t phys)
|
|||
return NULL;
|
||||
|
||||
order = page->private;
|
||||
if (order) {
|
||||
if (order > MAX_PAGE_ORDER)
|
||||
return NULL;
|
||||
|
||||
prep_compound_page(page, order);
|
||||
} else {
|
||||
kho_restore_page(page);
|
||||
}
|
||||
if (order > MAX_PAGE_ORDER)
|
||||
return NULL;
|
||||
|
||||
kho_restore_page(page, order);
|
||||
return page_folio(page);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kho_restore_folio);
|
||||
|
|
|
@ -5527,8 +5527,9 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
|
|||
mas->store_type = mas_wr_store_type(&wr_mas);
|
||||
request = mas_prealloc_calc(&wr_mas, entry);
|
||||
if (!request)
|
||||
return ret;
|
||||
goto set_flag;
|
||||
|
||||
mas->mas_flags &= ~MA_STATE_PREALLOC;
|
||||
mas_node_count_gfp(mas, request, gfp);
|
||||
if (mas_is_err(mas)) {
|
||||
mas_set_alloc_req(mas, 0);
|
||||
|
@ -5538,6 +5539,7 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
|
|||
return ret;
|
||||
}
|
||||
|
||||
set_flag:
|
||||
mas->mas_flags |= MA_STATE_PREALLOC;
|
||||
return ret;
|
||||
}
|
||||
|
|
14
mm/gup.c
14
mm/gup.c
|
@ -2303,13 +2303,13 @@ static void pofs_unpin(struct pages_or_folios *pofs)
|
|||
/*
|
||||
* Returns the number of collected folios. Return value is always >= 0.
|
||||
*/
|
||||
static void collect_longterm_unpinnable_folios(
|
||||
static unsigned long collect_longterm_unpinnable_folios(
|
||||
struct list_head *movable_folio_list,
|
||||
struct pages_or_folios *pofs)
|
||||
{
|
||||
unsigned long i, collected = 0;
|
||||
struct folio *prev_folio = NULL;
|
||||
bool drain_allow = true;
|
||||
unsigned long i;
|
||||
|
||||
for (i = 0; i < pofs->nr_entries; i++) {
|
||||
struct folio *folio = pofs_get_folio(pofs, i);
|
||||
|
@ -2321,6 +2321,8 @@ static void collect_longterm_unpinnable_folios(
|
|||
if (folio_is_longterm_pinnable(folio))
|
||||
continue;
|
||||
|
||||
collected++;
|
||||
|
||||
if (folio_is_device_coherent(folio))
|
||||
continue;
|
||||
|
||||
|
@ -2342,6 +2344,8 @@ static void collect_longterm_unpinnable_folios(
|
|||
NR_ISOLATED_ANON + folio_is_file_lru(folio),
|
||||
folio_nr_pages(folio));
|
||||
}
|
||||
|
||||
return collected;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2418,9 +2422,11 @@ static long
|
|||
check_and_migrate_movable_pages_or_folios(struct pages_or_folios *pofs)
|
||||
{
|
||||
LIST_HEAD(movable_folio_list);
|
||||
unsigned long collected;
|
||||
|
||||
collect_longterm_unpinnable_folios(&movable_folio_list, pofs);
|
||||
if (list_empty(&movable_folio_list))
|
||||
collected = collect_longterm_unpinnable_folios(&movable_folio_list,
|
||||
pofs);
|
||||
if (!collected)
|
||||
return 0;
|
||||
|
||||
return migrate_longterm_unpinnable_folios(&movable_folio_list, pofs);
|
||||
|
|
20
mm/memory.c
20
mm/memory.c
|
@ -4315,26 +4315,6 @@ static struct folio *__alloc_swap_folio(struct vm_fault *vmf)
|
|||
}
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
|
||||
{
|
||||
struct swap_info_struct *si = swp_swap_info(entry);
|
||||
pgoff_t offset = swp_offset(entry);
|
||||
int i;
|
||||
|
||||
/*
|
||||
* While allocating a large folio and doing swap_read_folio, which is
|
||||
* the case the being faulted pte doesn't have swapcache. We need to
|
||||
* ensure all PTEs have no cache as well, otherwise, we might go to
|
||||
* swap devices while the content is in swapcache.
|
||||
*/
|
||||
for (i = 0; i < max_nr; i++) {
|
||||
if ((si->swap_map[offset + i] & SWAP_HAS_CACHE))
|
||||
return i;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if the PTEs within a range are contiguous swap entries
|
||||
* and have consistent swapcache, zeromap.
|
||||
|
|
|
@ -2259,6 +2259,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
|
|||
folio = swap_cache_get_folio(swap, NULL, 0);
|
||||
order = xa_get_order(&mapping->i_pages, index);
|
||||
if (!folio) {
|
||||
int nr_pages = 1 << order;
|
||||
bool fallback_order0 = false;
|
||||
|
||||
/* Or update major stats only when swapin succeeds?? */
|
||||
|
@ -2272,9 +2273,12 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
|
|||
* If uffd is active for the vma, we need per-page fault
|
||||
* fidelity to maintain the uffd semantics, then fallback
|
||||
* to swapin order-0 folio, as well as for zswap case.
|
||||
* Any existing sub folio in the swap cache also blocks
|
||||
* mTHP swapin.
|
||||
*/
|
||||
if (order > 0 && ((vma && unlikely(userfaultfd_armed(vma))) ||
|
||||
!zswap_never_enabled()))
|
||||
!zswap_never_enabled() ||
|
||||
non_swapcache_batch(swap, nr_pages) != nr_pages))
|
||||
fallback_order0 = true;
|
||||
|
||||
/* Skip swapcache for synchronous device. */
|
||||
|
|
23
mm/swap.h
23
mm/swap.h
|
@ -106,6 +106,25 @@ static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr,
|
|||
return find_next_bit(sis->zeromap, end, start) - start;
|
||||
}
|
||||
|
||||
static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
|
||||
{
|
||||
struct swap_info_struct *si = swp_swap_info(entry);
|
||||
pgoff_t offset = swp_offset(entry);
|
||||
int i;
|
||||
|
||||
/*
|
||||
* While allocating a large folio and doing mTHP swapin, we need to
|
||||
* ensure all entries are not cached, otherwise, the mTHP folio will
|
||||
* be in conflict with the folio in swap cache.
|
||||
*/
|
||||
for (i = 0; i < max_nr; i++) {
|
||||
if ((si->swap_map[offset + i] & SWAP_HAS_CACHE))
|
||||
return i;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
#else /* CONFIG_SWAP */
|
||||
struct swap_iocb;
|
||||
static inline void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
|
||||
|
@ -199,6 +218,10 @@ static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_SWAP */
|
||||
|
||||
/**
|
||||
|
|
|
@ -1084,8 +1084,18 @@ static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma,
|
|||
pte_t orig_dst_pte, pte_t orig_src_pte,
|
||||
pmd_t *dst_pmd, pmd_t dst_pmdval,
|
||||
spinlock_t *dst_ptl, spinlock_t *src_ptl,
|
||||
struct folio *src_folio)
|
||||
struct folio *src_folio,
|
||||
struct swap_info_struct *si, swp_entry_t entry)
|
||||
{
|
||||
/*
|
||||
* Check if the folio still belongs to the target swap entry after
|
||||
* acquiring the lock. Folio can be freed in the swap cache while
|
||||
* not locked.
|
||||
*/
|
||||
if (src_folio && unlikely(!folio_test_swapcache(src_folio) ||
|
||||
entry.val != src_folio->swap.val))
|
||||
return -EAGAIN;
|
||||
|
||||
double_pt_lock(dst_ptl, src_ptl);
|
||||
|
||||
if (!is_pte_pages_stable(dst_pte, src_pte, orig_dst_pte, orig_src_pte,
|
||||
|
@ -1102,6 +1112,25 @@ static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma,
|
|||
if (src_folio) {
|
||||
folio_move_anon_rmap(src_folio, dst_vma);
|
||||
src_folio->index = linear_page_index(dst_vma, dst_addr);
|
||||
} else {
|
||||
/*
|
||||
* Check if the swap entry is cached after acquiring the src_pte
|
||||
* lock. Otherwise, we might miss a newly loaded swap cache folio.
|
||||
*
|
||||
* Check swap_map directly to minimize overhead, READ_ONCE is sufficient.
|
||||
* We are trying to catch newly added swap cache, the only possible case is
|
||||
* when a folio is swapped in and out again staying in swap cache, using the
|
||||
* same entry before the PTE check above. The PTL is acquired and released
|
||||
* twice, each time after updating the swap_map's flag. So holding
|
||||
* the PTL here ensures we see the updated value. False positive is possible,
|
||||
* e.g. SWP_SYNCHRONOUS_IO swapin may set the flag without touching the
|
||||
* cache, or during the tiny synchronization window between swap cache and
|
||||
* swap_map, but it will be gone very quickly, worst result is retry jitters.
|
||||
*/
|
||||
if (READ_ONCE(si->swap_map[swp_offset(entry)]) & SWAP_HAS_CACHE) {
|
||||
double_pt_unlock(dst_ptl, src_ptl);
|
||||
return -EAGAIN;
|
||||
}
|
||||
}
|
||||
|
||||
orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte);
|
||||
|
@ -1412,7 +1441,7 @@ retry:
|
|||
}
|
||||
err = move_swap_pte(mm, dst_vma, dst_addr, src_addr, dst_pte, src_pte,
|
||||
orig_dst_pte, orig_src_pte, dst_pmd, dst_pmdval,
|
||||
dst_ptl, src_ptl, src_folio);
|
||||
dst_ptl, src_ptl, src_folio, si, entry);
|
||||
}
|
||||
|
||||
out:
|
||||
|
|
|
@ -8,3 +8,6 @@ CONFIG_GUP_TEST=y
|
|||
CONFIG_TRANSPARENT_HUGEPAGE=y
|
||||
CONFIG_MEM_SOFT_DIRTY=y
|
||||
CONFIG_ANON_VMA_NAME=y
|
||||
CONFIG_FTRACE=y
|
||||
CONFIG_PROFILING=y
|
||||
CONFIG_UPROBES=y
|
||||
|
|
|
@ -470,7 +470,9 @@ TEST_F(merge, handle_uprobe_upon_merged_vma)
|
|||
ASSERT_GE(fd, 0);
|
||||
|
||||
ASSERT_EQ(ftruncate(fd, page_size), 0);
|
||||
ASSERT_EQ(read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type), 0);
|
||||
if (read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type) != 0) {
|
||||
SKIP(goto out, "Failed to read uprobe sysfs file, skipping");
|
||||
}
|
||||
|
||||
memset(&attr, 0, attr_sz);
|
||||
attr.size = attr_sz;
|
||||
|
@ -491,6 +493,7 @@ TEST_F(merge, handle_uprobe_upon_merged_vma)
|
|||
ASSERT_NE(mremap(ptr2, page_size, page_size,
|
||||
MREMAP_MAYMOVE | MREMAP_FIXED, ptr1), MAP_FAILED);
|
||||
|
||||
out:
|
||||
close(fd);
|
||||
remove(probe_file);
|
||||
}
|
||||
|
|
|
@ -1 +1 @@
|
|||
timeout=180
|
||||
timeout=900
|
||||
|
|
Loading…
Add table
Reference in a new issue