mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-04-13 09:59:31 +00:00
slab updates for 6.13
-----BEGIN PGP SIGNATURE----- iQEzBAABCAAdFiEEe7vIQRWZI0iWSE3xu+CwddJFiJoFAmdERvEACgkQu+CwddJF iJre6Af9EBMVQiWJrmoMOjbGLqLgmZzSXRNxR862WGn4D/wesA1HmSlWgEn54hgc GIYIeD++v4JaIRNH0yZqb2UBSKjF/rYPDkKstnqgFaVakLoDrwkkwV2n3Gk5BEgR m/SzLGgoDWKR65I/oMpL6e2KrMOfMfjpB31qiVvdlaQd2Nv/5rw+gUVylxhNIZEH W11N3IC+e9hmgT3ZBpTmHeqNrlXE1+USWPrp/HV05Ndz6yf97JnP4Wr9f9pcyN3R aflLHR38+Q9cCfO7y8wNqtYvIV/kbqgdaqD76frSgalC4Lmz9+L+TZ2NuENCPoGj Xdbip2z+iffWhvqM+qooOLVxR0XqTA== =Sepb -----END PGP SIGNATURE----- Merge tag 'slab-for-6.13-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab Pull slab updates from Vlastimil Babka: - Add new slab_strict_numa boot parameter to enforce per-object memory policies on top of slab folio policies, for systems where saving cost of remote accesses is more important than minimizing slab allocation overhead (Christoph Lameter) - Fix for freeptr_offset alignment check being too strict for m68k (Geert Uytterhoeven) - krealloc() fixes for not violating __GFP_ZERO guarantees on krealloc() when slub_debug (redzone and object tracking) is enabled (Feng Tang) - Fix a memory leak in case sysfs registration fails for a slab cache, and also no longer fail to create the cache in that case (Hyeonggon Yoo) - Fix handling of detected consistency problems (due to buggy slab user) with slub_debug enabled, so that it does not cause further list corruption bugs (yuan.gao) - Code cleanup and kerneldocs polishing (Zhen Lei, Vlastimil Babka) * tag 'slab-for-6.13-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab: slab: Fix too strict alignment check in create_cache() mm/slab: Allow cache creation to proceed even if sysfs registration fails mm/slub: Avoid list corruption when removing a slab from the full list mm/slub, kunit: Add testcase for krealloc redzone and zeroing mm/slub: Improve redzone check and zeroing for krealloc() mm/slub: Consider kfence case for get_orig_size() SLUB: Add support for per object memory policies mm, slab: add kerneldocs for common SLAB_ flags mm/slab: remove duplicate check in create_cache() mm/slub: Move krealloc() and related code to slub.c mm/kasan: Don't store metadata inside kmalloc object when slub_debug_orig_size is on
This commit is contained in:
commit
e06635e26c
8 changed files with 325 additions and 137 deletions
|
@ -6158,6 +6158,16 @@
|
||||||
For more information see Documentation/mm/slub.rst.
|
For more information see Documentation/mm/slub.rst.
|
||||||
(slub_nomerge legacy name also accepted for now)
|
(slub_nomerge legacy name also accepted for now)
|
||||||
|
|
||||||
|
slab_strict_numa [MM]
|
||||||
|
Support memory policies on a per object level
|
||||||
|
in the slab allocator. The default is for memory
|
||||||
|
policies to be applied at the folio level when
|
||||||
|
a new folio is needed or a partial folio is
|
||||||
|
retrieved from the lists. Increases overhead
|
||||||
|
in the slab fastpaths but gains more accurate
|
||||||
|
NUMA kernel object placement which helps with slow
|
||||||
|
interconnects in NUMA systems.
|
||||||
|
|
||||||
slram= [HW,MTD]
|
slram= [HW,MTD]
|
||||||
|
|
||||||
smart2= [HW]
|
smart2= [HW]
|
||||||
|
|
|
@ -175,6 +175,15 @@ can be influenced by kernel parameters:
|
||||||
``slab_max_order`` to 0, what cause minimum possible order of
|
``slab_max_order`` to 0, what cause minimum possible order of
|
||||||
slabs allocation.
|
slabs allocation.
|
||||||
|
|
||||||
|
``slab_strict_numa``
|
||||||
|
Enables the application of memory policies on each
|
||||||
|
allocation. This results in more accurate placement of
|
||||||
|
objects which may result in the reduction of accesses
|
||||||
|
to remote nodes. The default is to only apply memory
|
||||||
|
policies at the folio level when a new folio is acquired
|
||||||
|
or a folio is retrieved from the lists. Enabling this
|
||||||
|
option reduces the fastpath performance of the slab allocator.
|
||||||
|
|
||||||
SLUB Debug output
|
SLUB Debug output
|
||||||
=================
|
=================
|
||||||
|
|
||||||
|
|
|
@ -77,7 +77,17 @@ enum _slab_flag_bits {
|
||||||
#define SLAB_POISON __SLAB_FLAG_BIT(_SLAB_POISON)
|
#define SLAB_POISON __SLAB_FLAG_BIT(_SLAB_POISON)
|
||||||
/* Indicate a kmalloc slab */
|
/* Indicate a kmalloc slab */
|
||||||
#define SLAB_KMALLOC __SLAB_FLAG_BIT(_SLAB_KMALLOC)
|
#define SLAB_KMALLOC __SLAB_FLAG_BIT(_SLAB_KMALLOC)
|
||||||
/* Align objs on cache lines */
|
/**
|
||||||
|
* define SLAB_HWCACHE_ALIGN - Align objects on cache line boundaries.
|
||||||
|
*
|
||||||
|
* Sufficiently large objects are aligned on cache line boundary. For object
|
||||||
|
* size smaller than a half of cache line size, the alignment is on the half of
|
||||||
|
* cache line size. In general, if object size is smaller than 1/2^n of cache
|
||||||
|
* line size, the alignment is adjusted to 1/2^n.
|
||||||
|
*
|
||||||
|
* If explicit alignment is also requested by the respective
|
||||||
|
* &struct kmem_cache_args field, the greater of both is alignments is applied.
|
||||||
|
*/
|
||||||
#define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN)
|
#define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN)
|
||||||
/* Use GFP_DMA memory */
|
/* Use GFP_DMA memory */
|
||||||
#define SLAB_CACHE_DMA __SLAB_FLAG_BIT(_SLAB_CACHE_DMA)
|
#define SLAB_CACHE_DMA __SLAB_FLAG_BIT(_SLAB_CACHE_DMA)
|
||||||
|
@ -87,8 +97,8 @@ enum _slab_flag_bits {
|
||||||
#define SLAB_STORE_USER __SLAB_FLAG_BIT(_SLAB_STORE_USER)
|
#define SLAB_STORE_USER __SLAB_FLAG_BIT(_SLAB_STORE_USER)
|
||||||
/* Panic if kmem_cache_create() fails */
|
/* Panic if kmem_cache_create() fails */
|
||||||
#define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC)
|
#define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC)
|
||||||
/*
|
/**
|
||||||
* SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS!
|
* define SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS!
|
||||||
*
|
*
|
||||||
* This delays freeing the SLAB page by a grace period, it does _NOT_
|
* This delays freeing the SLAB page by a grace period, it does _NOT_
|
||||||
* delay object freeing. This means that if you do kmem_cache_free()
|
* delay object freeing. This means that if you do kmem_cache_free()
|
||||||
|
@ -99,20 +109,22 @@ enum _slab_flag_bits {
|
||||||
* stays valid, the trick to using this is relying on an independent
|
* stays valid, the trick to using this is relying on an independent
|
||||||
* object validation pass. Something like:
|
* object validation pass. Something like:
|
||||||
*
|
*
|
||||||
* begin:
|
* ::
|
||||||
* rcu_read_lock();
|
|
||||||
* obj = lockless_lookup(key);
|
|
||||||
* if (obj) {
|
|
||||||
* if (!try_get_ref(obj)) // might fail for free objects
|
|
||||||
* rcu_read_unlock();
|
|
||||||
* goto begin;
|
|
||||||
*
|
*
|
||||||
* if (obj->key != key) { // not the object we expected
|
* begin:
|
||||||
* put_ref(obj);
|
* rcu_read_lock();
|
||||||
* rcu_read_unlock();
|
* obj = lockless_lookup(key);
|
||||||
* goto begin;
|
* if (obj) {
|
||||||
* }
|
* if (!try_get_ref(obj)) // might fail for free objects
|
||||||
* }
|
* rcu_read_unlock();
|
||||||
|
* goto begin;
|
||||||
|
*
|
||||||
|
* if (obj->key != key) { // not the object we expected
|
||||||
|
* put_ref(obj);
|
||||||
|
* rcu_read_unlock();
|
||||||
|
* goto begin;
|
||||||
|
* }
|
||||||
|
* }
|
||||||
* rcu_read_unlock();
|
* rcu_read_unlock();
|
||||||
*
|
*
|
||||||
* This is useful if we need to approach a kernel structure obliquely,
|
* This is useful if we need to approach a kernel structure obliquely,
|
||||||
|
@ -137,7 +149,6 @@ enum _slab_flag_bits {
|
||||||
*
|
*
|
||||||
* Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU.
|
* Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU.
|
||||||
*/
|
*/
|
||||||
/* Defer freeing slabs to RCU */
|
|
||||||
#define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU)
|
#define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU)
|
||||||
/* Trace allocations and frees */
|
/* Trace allocations and frees */
|
||||||
#define SLAB_TRACE __SLAB_FLAG_BIT(_SLAB_TRACE)
|
#define SLAB_TRACE __SLAB_FLAG_BIT(_SLAB_TRACE)
|
||||||
|
@ -170,7 +181,12 @@ enum _slab_flag_bits {
|
||||||
#else
|
#else
|
||||||
# define SLAB_FAILSLAB __SLAB_FLAG_UNUSED
|
# define SLAB_FAILSLAB __SLAB_FLAG_UNUSED
|
||||||
#endif
|
#endif
|
||||||
/* Account to memcg */
|
/**
|
||||||
|
* define SLAB_ACCOUNT - Account allocations to memcg.
|
||||||
|
*
|
||||||
|
* All object allocations from this cache will be memcg accounted, regardless of
|
||||||
|
* __GFP_ACCOUNT being or not being passed to individual allocations.
|
||||||
|
*/
|
||||||
#ifdef CONFIG_MEMCG
|
#ifdef CONFIG_MEMCG
|
||||||
# define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT)
|
# define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT)
|
||||||
#else
|
#else
|
||||||
|
@ -197,7 +213,13 @@ enum _slab_flag_bits {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* The following flags affect the page allocator grouping pages by mobility */
|
/* The following flags affect the page allocator grouping pages by mobility */
|
||||||
/* Objects are reclaimable */
|
/**
|
||||||
|
* define SLAB_RECLAIM_ACCOUNT - Objects are reclaimable.
|
||||||
|
*
|
||||||
|
* Use this flag for caches that have an associated shrinker. As a result, slab
|
||||||
|
* pages are allocated with __GFP_RECLAIMABLE, which affects grouping pages by
|
||||||
|
* mobility, and are accounted in SReclaimable counter in /proc/meminfo
|
||||||
|
*/
|
||||||
#ifndef CONFIG_SLUB_TINY
|
#ifndef CONFIG_SLUB_TINY
|
||||||
#define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_BIT(_SLAB_RECLAIM_ACCOUNT)
|
#define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_BIT(_SLAB_RECLAIM_ACCOUNT)
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -192,6 +192,47 @@ static void test_leak_destroy(struct kunit *test)
|
||||||
KUNIT_EXPECT_EQ(test, 2, slab_errors);
|
KUNIT_EXPECT_EQ(test, 2, slab_errors);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void test_krealloc_redzone_zeroing(struct kunit *test)
|
||||||
|
{
|
||||||
|
u8 *p;
|
||||||
|
int i;
|
||||||
|
struct kmem_cache *s = test_kmem_cache_create("TestSlub_krealloc", 64,
|
||||||
|
SLAB_KMALLOC|SLAB_STORE_USER|SLAB_RED_ZONE);
|
||||||
|
|
||||||
|
p = alloc_hooks(__kmalloc_cache_noprof(s, GFP_KERNEL, 48));
|
||||||
|
memset(p, 0xff, 48);
|
||||||
|
|
||||||
|
kasan_disable_current();
|
||||||
|
OPTIMIZER_HIDE_VAR(p);
|
||||||
|
|
||||||
|
/* Test shrink */
|
||||||
|
p = krealloc(p, 40, GFP_KERNEL | __GFP_ZERO);
|
||||||
|
for (i = 40; i < 64; i++)
|
||||||
|
KUNIT_EXPECT_EQ(test, p[i], SLUB_RED_ACTIVE);
|
||||||
|
|
||||||
|
/* Test grow within the same 64B kmalloc object */
|
||||||
|
p = krealloc(p, 56, GFP_KERNEL | __GFP_ZERO);
|
||||||
|
for (i = 40; i < 56; i++)
|
||||||
|
KUNIT_EXPECT_EQ(test, p[i], 0);
|
||||||
|
for (i = 56; i < 64; i++)
|
||||||
|
KUNIT_EXPECT_EQ(test, p[i], SLUB_RED_ACTIVE);
|
||||||
|
|
||||||
|
validate_slab_cache(s);
|
||||||
|
KUNIT_EXPECT_EQ(test, 0, slab_errors);
|
||||||
|
|
||||||
|
memset(p, 0xff, 56);
|
||||||
|
/* Test grow with allocating a bigger 128B object */
|
||||||
|
p = krealloc(p, 112, GFP_KERNEL | __GFP_ZERO);
|
||||||
|
for (i = 0; i < 56; i++)
|
||||||
|
KUNIT_EXPECT_EQ(test, p[i], 0xff);
|
||||||
|
for (i = 56; i < 112; i++)
|
||||||
|
KUNIT_EXPECT_EQ(test, p[i], 0);
|
||||||
|
|
||||||
|
kfree(p);
|
||||||
|
kasan_enable_current();
|
||||||
|
kmem_cache_destroy(s);
|
||||||
|
}
|
||||||
|
|
||||||
static int test_init(struct kunit *test)
|
static int test_init(struct kunit *test)
|
||||||
{
|
{
|
||||||
slab_errors = 0;
|
slab_errors = 0;
|
||||||
|
@ -214,6 +255,7 @@ static struct kunit_case test_cases[] = {
|
||||||
KUNIT_CASE(test_kmalloc_redzone_access),
|
KUNIT_CASE(test_kmalloc_redzone_access),
|
||||||
KUNIT_CASE(test_kfree_rcu),
|
KUNIT_CASE(test_kfree_rcu),
|
||||||
KUNIT_CASE(test_leak_destroy),
|
KUNIT_CASE(test_leak_destroy),
|
||||||
|
KUNIT_CASE(test_krealloc_redzone_zeroing),
|
||||||
{}
|
{}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -392,9 +392,12 @@ void kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
|
||||||
* 1. Object is SLAB_TYPESAFE_BY_RCU, which means that it can
|
* 1. Object is SLAB_TYPESAFE_BY_RCU, which means that it can
|
||||||
* be touched after it was freed, or
|
* be touched after it was freed, or
|
||||||
* 2. Object has a constructor, which means it's expected to
|
* 2. Object has a constructor, which means it's expected to
|
||||||
* retain its content until the next allocation.
|
* retain its content until the next allocation, or
|
||||||
|
* 3. It is from a kmalloc cache which enables the debug option
|
||||||
|
* to store original size.
|
||||||
*/
|
*/
|
||||||
if ((cache->flags & SLAB_TYPESAFE_BY_RCU) || cache->ctor) {
|
if ((cache->flags & SLAB_TYPESAFE_BY_RCU) || cache->ctor ||
|
||||||
|
slub_debug_orig_size(cache)) {
|
||||||
cache->kasan_info.free_meta_offset = *size;
|
cache->kasan_info.free_meta_offset = *size;
|
||||||
*size += sizeof(struct kasan_free_meta);
|
*size += sizeof(struct kasan_free_meta);
|
||||||
goto free_meta_added;
|
goto free_meta_added;
|
||||||
|
|
11
mm/slab.h
11
mm/slab.h
|
@ -73,6 +73,11 @@ struct slab {
|
||||||
struct {
|
struct {
|
||||||
unsigned inuse:16;
|
unsigned inuse:16;
|
||||||
unsigned objects:15;
|
unsigned objects:15;
|
||||||
|
/*
|
||||||
|
* If slab debugging is enabled then the
|
||||||
|
* frozen bit can be reused to indicate
|
||||||
|
* that the slab was corrupted
|
||||||
|
*/
|
||||||
unsigned frozen:1;
|
unsigned frozen:1;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
@ -695,6 +700,12 @@ void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
|
||||||
void __check_heap_object(const void *ptr, unsigned long n,
|
void __check_heap_object(const void *ptr, unsigned long n,
|
||||||
const struct slab *slab, bool to_user);
|
const struct slab *slab, bool to_user);
|
||||||
|
|
||||||
|
static inline bool slub_debug_orig_size(struct kmem_cache *s)
|
||||||
|
{
|
||||||
|
return (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&
|
||||||
|
(s->flags & SLAB_KMALLOC));
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_SLUB_DEBUG
|
#ifdef CONFIG_SLUB_DEBUG
|
||||||
void skip_orig_size_check(struct kmem_cache *s, const void *object);
|
void skip_orig_size_check(struct kmem_cache *s, const void *object);
|
||||||
#endif
|
#endif
|
||||||
|
|
103
mm/slab_common.c
103
mm/slab_common.c
|
@ -222,15 +222,12 @@ static struct kmem_cache *create_cache(const char *name,
|
||||||
struct kmem_cache *s;
|
struct kmem_cache *s;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
if (WARN_ON(args->useroffset + args->usersize > object_size))
|
|
||||||
args->useroffset = args->usersize = 0;
|
|
||||||
|
|
||||||
/* If a custom freelist pointer is requested make sure it's sane. */
|
/* If a custom freelist pointer is requested make sure it's sane. */
|
||||||
err = -EINVAL;
|
err = -EINVAL;
|
||||||
if (args->use_freeptr_offset &&
|
if (args->use_freeptr_offset &&
|
||||||
(args->freeptr_offset >= object_size ||
|
(args->freeptr_offset >= object_size ||
|
||||||
!(flags & SLAB_TYPESAFE_BY_RCU) ||
|
!(flags & SLAB_TYPESAFE_BY_RCU) ||
|
||||||
!IS_ALIGNED(args->freeptr_offset, sizeof(freeptr_t))))
|
!IS_ALIGNED(args->freeptr_offset, __alignof__(freeptr_t))))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
|
@ -257,11 +254,23 @@ out:
|
||||||
* @object_size: The size of objects to be created in this cache.
|
* @object_size: The size of objects to be created in this cache.
|
||||||
* @args: Additional arguments for the cache creation (see
|
* @args: Additional arguments for the cache creation (see
|
||||||
* &struct kmem_cache_args).
|
* &struct kmem_cache_args).
|
||||||
* @flags: See %SLAB_* flags for an explanation of individual @flags.
|
* @flags: See the desriptions of individual flags. The common ones are listed
|
||||||
|
* in the description below.
|
||||||
*
|
*
|
||||||
* Not to be called directly, use the kmem_cache_create() wrapper with the same
|
* Not to be called directly, use the kmem_cache_create() wrapper with the same
|
||||||
* parameters.
|
* parameters.
|
||||||
*
|
*
|
||||||
|
* Commonly used @flags:
|
||||||
|
*
|
||||||
|
* &SLAB_ACCOUNT - Account allocations to memcg.
|
||||||
|
*
|
||||||
|
* &SLAB_HWCACHE_ALIGN - Align objects on cache line boundaries.
|
||||||
|
*
|
||||||
|
* &SLAB_RECLAIM_ACCOUNT - Objects are reclaimable.
|
||||||
|
*
|
||||||
|
* &SLAB_TYPESAFE_BY_RCU - Slab page (not individual objects) freeing delayed
|
||||||
|
* by a grace period - see the full description before using.
|
||||||
|
*
|
||||||
* Context: Cannot be called within a interrupt, but can be interrupted.
|
* Context: Cannot be called within a interrupt, but can be interrupted.
|
||||||
*
|
*
|
||||||
* Return: a pointer to the cache on success, NULL on failure.
|
* Return: a pointer to the cache on success, NULL on failure.
|
||||||
|
@ -1199,90 +1208,6 @@ module_init(slab_proc_init);
|
||||||
|
|
||||||
#endif /* CONFIG_SLUB_DEBUG */
|
#endif /* CONFIG_SLUB_DEBUG */
|
||||||
|
|
||||||
static __always_inline __realloc_size(2) void *
|
|
||||||
__do_krealloc(const void *p, size_t new_size, gfp_t flags)
|
|
||||||
{
|
|
||||||
void *ret;
|
|
||||||
size_t ks;
|
|
||||||
|
|
||||||
/* Check for double-free before calling ksize. */
|
|
||||||
if (likely(!ZERO_OR_NULL_PTR(p))) {
|
|
||||||
if (!kasan_check_byte(p))
|
|
||||||
return NULL;
|
|
||||||
ks = ksize(p);
|
|
||||||
} else
|
|
||||||
ks = 0;
|
|
||||||
|
|
||||||
/* If the object still fits, repoison it precisely. */
|
|
||||||
if (ks >= new_size) {
|
|
||||||
/* Zero out spare memory. */
|
|
||||||
if (want_init_on_alloc(flags)) {
|
|
||||||
kasan_disable_current();
|
|
||||||
memset(kasan_reset_tag(p) + new_size, 0, ks - new_size);
|
|
||||||
kasan_enable_current();
|
|
||||||
}
|
|
||||||
|
|
||||||
p = kasan_krealloc((void *)p, new_size, flags);
|
|
||||||
return (void *)p;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = kmalloc_node_track_caller_noprof(new_size, flags, NUMA_NO_NODE, _RET_IP_);
|
|
||||||
if (ret && p) {
|
|
||||||
/* Disable KASAN checks as the object's redzone is accessed. */
|
|
||||||
kasan_disable_current();
|
|
||||||
memcpy(ret, kasan_reset_tag(p), ks);
|
|
||||||
kasan_enable_current();
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* krealloc - reallocate memory. The contents will remain unchanged.
|
|
||||||
* @p: object to reallocate memory for.
|
|
||||||
* @new_size: how many bytes of memory are required.
|
|
||||||
* @flags: the type of memory to allocate.
|
|
||||||
*
|
|
||||||
* If @p is %NULL, krealloc() behaves exactly like kmalloc(). If @new_size
|
|
||||||
* is 0 and @p is not a %NULL pointer, the object pointed to is freed.
|
|
||||||
*
|
|
||||||
* If __GFP_ZERO logic is requested, callers must ensure that, starting with the
|
|
||||||
* initial memory allocation, every subsequent call to this API for the same
|
|
||||||
* memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that
|
|
||||||
* __GFP_ZERO is not fully honored by this API.
|
|
||||||
*
|
|
||||||
* This is the case, since krealloc() only knows about the bucket size of an
|
|
||||||
* allocation (but not the exact size it was allocated with) and hence
|
|
||||||
* implements the following semantics for shrinking and growing buffers with
|
|
||||||
* __GFP_ZERO.
|
|
||||||
*
|
|
||||||
* new bucket
|
|
||||||
* 0 size size
|
|
||||||
* |--------|----------------|
|
|
||||||
* | keep | zero |
|
|
||||||
*
|
|
||||||
* In any case, the contents of the object pointed to are preserved up to the
|
|
||||||
* lesser of the new and old sizes.
|
|
||||||
*
|
|
||||||
* Return: pointer to the allocated memory or %NULL in case of error
|
|
||||||
*/
|
|
||||||
void *krealloc_noprof(const void *p, size_t new_size, gfp_t flags)
|
|
||||||
{
|
|
||||||
void *ret;
|
|
||||||
|
|
||||||
if (unlikely(!new_size)) {
|
|
||||||
kfree(p);
|
|
||||||
return ZERO_SIZE_PTR;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = __do_krealloc(p, new_size, flags);
|
|
||||||
if (ret && kasan_reset_tag(p) != kasan_reset_tag(ret))
|
|
||||||
kfree(p);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(krealloc_noprof);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* kfree_sensitive - Clear sensitive information in memory before freeing
|
* kfree_sensitive - Clear sensitive information in memory before freeing
|
||||||
* @p: object to free memory of
|
* @p: object to free memory of
|
||||||
|
|
220
mm/slub.c
220
mm/slub.c
|
@ -218,6 +218,10 @@ DEFINE_STATIC_KEY_FALSE(slub_debug_enabled);
|
||||||
#endif
|
#endif
|
||||||
#endif /* CONFIG_SLUB_DEBUG */
|
#endif /* CONFIG_SLUB_DEBUG */
|
||||||
|
|
||||||
|
#ifdef CONFIG_NUMA
|
||||||
|
static DEFINE_STATIC_KEY_FALSE(strict_numa);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Structure holding parameters for get_partial() call chain */
|
/* Structure holding parameters for get_partial() call chain */
|
||||||
struct partial_context {
|
struct partial_context {
|
||||||
gfp_t flags;
|
gfp_t flags;
|
||||||
|
@ -230,12 +234,6 @@ static inline bool kmem_cache_debug(struct kmem_cache *s)
|
||||||
return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS);
|
return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool slub_debug_orig_size(struct kmem_cache *s)
|
|
||||||
{
|
|
||||||
return (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&
|
|
||||||
(s->flags & SLAB_KMALLOC));
|
|
||||||
}
|
|
||||||
|
|
||||||
void *fixup_red_left(struct kmem_cache *s, void *p)
|
void *fixup_red_left(struct kmem_cache *s, void *p)
|
||||||
{
|
{
|
||||||
if (kmem_cache_debug_flags(s, SLAB_RED_ZONE))
|
if (kmem_cache_debug_flags(s, SLAB_RED_ZONE))
|
||||||
|
@ -760,21 +758,10 @@ static inline void set_orig_size(struct kmem_cache *s,
|
||||||
void *object, unsigned int orig_size)
|
void *object, unsigned int orig_size)
|
||||||
{
|
{
|
||||||
void *p = kasan_reset_tag(object);
|
void *p = kasan_reset_tag(object);
|
||||||
unsigned int kasan_meta_size;
|
|
||||||
|
|
||||||
if (!slub_debug_orig_size(s))
|
if (!slub_debug_orig_size(s))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/*
|
|
||||||
* KASAN can save its free meta data inside of the object at offset 0.
|
|
||||||
* If this meta data size is larger than 'orig_size', it will overlap
|
|
||||||
* the data redzone in [orig_size+1, object_size]. Thus, we adjust
|
|
||||||
* 'orig_size' to be as at least as big as KASAN's meta data.
|
|
||||||
*/
|
|
||||||
kasan_meta_size = kasan_metadata_size(s, true);
|
|
||||||
if (kasan_meta_size > orig_size)
|
|
||||||
orig_size = kasan_meta_size;
|
|
||||||
|
|
||||||
p += get_info_end(s);
|
p += get_info_end(s);
|
||||||
p += sizeof(struct track) * 2;
|
p += sizeof(struct track) * 2;
|
||||||
|
|
||||||
|
@ -785,6 +772,9 @@ static inline unsigned int get_orig_size(struct kmem_cache *s, void *object)
|
||||||
{
|
{
|
||||||
void *p = kasan_reset_tag(object);
|
void *p = kasan_reset_tag(object);
|
||||||
|
|
||||||
|
if (is_kfence_address(object))
|
||||||
|
return kfence_ksize(object);
|
||||||
|
|
||||||
if (!slub_debug_orig_size(s))
|
if (!slub_debug_orig_size(s))
|
||||||
return s->object_size;
|
return s->object_size;
|
||||||
|
|
||||||
|
@ -1423,6 +1413,11 @@ static int check_slab(struct kmem_cache *s, struct slab *slab)
|
||||||
slab->inuse, slab->objects);
|
slab->inuse, slab->objects);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
if (slab->frozen) {
|
||||||
|
slab_err(s, slab, "Slab disabled since SLUB metadata consistency check failed");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* Slab_pad_check fixes things up after itself */
|
/* Slab_pad_check fixes things up after itself */
|
||||||
slab_pad_check(s, slab);
|
slab_pad_check(s, slab);
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -1603,6 +1598,7 @@ bad:
|
||||||
slab_fix(s, "Marking all objects used");
|
slab_fix(s, "Marking all objects used");
|
||||||
slab->inuse = slab->objects;
|
slab->inuse = slab->objects;
|
||||||
slab->freelist = NULL;
|
slab->freelist = NULL;
|
||||||
|
slab->frozen = 1; /* mark consistency-failed slab as frozen */
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -2744,7 +2740,8 @@ static void *alloc_single_from_partial(struct kmem_cache *s,
|
||||||
slab->inuse++;
|
slab->inuse++;
|
||||||
|
|
||||||
if (!alloc_debug_processing(s, slab, object, orig_size)) {
|
if (!alloc_debug_processing(s, slab, object, orig_size)) {
|
||||||
remove_partial(n, slab);
|
if (folio_test_slab(slab_folio(slab)))
|
||||||
|
remove_partial(n, slab);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3956,6 +3953,28 @@ redo:
|
||||||
object = c->freelist;
|
object = c->freelist;
|
||||||
slab = c->slab;
|
slab = c->slab;
|
||||||
|
|
||||||
|
#ifdef CONFIG_NUMA
|
||||||
|
if (static_branch_unlikely(&strict_numa) &&
|
||||||
|
node == NUMA_NO_NODE) {
|
||||||
|
|
||||||
|
struct mempolicy *mpol = current->mempolicy;
|
||||||
|
|
||||||
|
if (mpol) {
|
||||||
|
/*
|
||||||
|
* Special BIND rule support. If existing slab
|
||||||
|
* is in permitted set then do not redirect
|
||||||
|
* to a particular node.
|
||||||
|
* Otherwise we apply the memory policy to get
|
||||||
|
* the node we need to allocate on.
|
||||||
|
*/
|
||||||
|
if (mpol->mode != MPOL_BIND || !slab ||
|
||||||
|
!node_isset(slab_nid(slab), mpol->nodes))
|
||||||
|
|
||||||
|
node = mempolicy_slab_node();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (!USE_LOCKLESS_FAST_PATH() ||
|
if (!USE_LOCKLESS_FAST_PATH() ||
|
||||||
unlikely(!object || !slab || !node_match(slab, node))) {
|
unlikely(!object || !slab || !node_match(slab, node))) {
|
||||||
object = __slab_alloc(s, gfpflags, node, addr, c, orig_size);
|
object = __slab_alloc(s, gfpflags, node, addr, c, orig_size);
|
||||||
|
@ -4728,6 +4747,126 @@ void kfree(const void *object)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(kfree);
|
EXPORT_SYMBOL(kfree);
|
||||||
|
|
||||||
|
static __always_inline __realloc_size(2) void *
|
||||||
|
__do_krealloc(const void *p, size_t new_size, gfp_t flags)
|
||||||
|
{
|
||||||
|
void *ret;
|
||||||
|
size_t ks = 0;
|
||||||
|
int orig_size = 0;
|
||||||
|
struct kmem_cache *s = NULL;
|
||||||
|
|
||||||
|
if (unlikely(ZERO_OR_NULL_PTR(p)))
|
||||||
|
goto alloc_new;
|
||||||
|
|
||||||
|
/* Check for double-free. */
|
||||||
|
if (!kasan_check_byte(p))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (is_kfence_address(p)) {
|
||||||
|
ks = orig_size = kfence_ksize(p);
|
||||||
|
} else {
|
||||||
|
struct folio *folio;
|
||||||
|
|
||||||
|
folio = virt_to_folio(p);
|
||||||
|
if (unlikely(!folio_test_slab(folio))) {
|
||||||
|
/* Big kmalloc object */
|
||||||
|
WARN_ON(folio_size(folio) <= KMALLOC_MAX_CACHE_SIZE);
|
||||||
|
WARN_ON(p != folio_address(folio));
|
||||||
|
ks = folio_size(folio);
|
||||||
|
} else {
|
||||||
|
s = folio_slab(folio)->slab_cache;
|
||||||
|
orig_size = get_orig_size(s, (void *)p);
|
||||||
|
ks = s->object_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If the old object doesn't fit, allocate a bigger one */
|
||||||
|
if (new_size > ks)
|
||||||
|
goto alloc_new;
|
||||||
|
|
||||||
|
/* Zero out spare memory. */
|
||||||
|
if (want_init_on_alloc(flags)) {
|
||||||
|
kasan_disable_current();
|
||||||
|
if (orig_size && orig_size < new_size)
|
||||||
|
memset(kasan_reset_tag(p) + orig_size, 0, new_size - orig_size);
|
||||||
|
else
|
||||||
|
memset(kasan_reset_tag(p) + new_size, 0, ks - new_size);
|
||||||
|
kasan_enable_current();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Setup kmalloc redzone when needed */
|
||||||
|
if (s && slub_debug_orig_size(s)) {
|
||||||
|
set_orig_size(s, (void *)p, new_size);
|
||||||
|
if (s->flags & SLAB_RED_ZONE && new_size < ks)
|
||||||
|
memset_no_sanitize_memory(kasan_reset_tag(p) + new_size,
|
||||||
|
SLUB_RED_ACTIVE, ks - new_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
p = kasan_krealloc(p, new_size, flags);
|
||||||
|
return (void *)p;
|
||||||
|
|
||||||
|
alloc_new:
|
||||||
|
ret = kmalloc_node_track_caller_noprof(new_size, flags, NUMA_NO_NODE, _RET_IP_);
|
||||||
|
if (ret && p) {
|
||||||
|
/* Disable KASAN checks as the object's redzone is accessed. */
|
||||||
|
kasan_disable_current();
|
||||||
|
memcpy(ret, kasan_reset_tag(p), orig_size ?: ks);
|
||||||
|
kasan_enable_current();
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* krealloc - reallocate memory. The contents will remain unchanged.
|
||||||
|
* @p: object to reallocate memory for.
|
||||||
|
* @new_size: how many bytes of memory are required.
|
||||||
|
* @flags: the type of memory to allocate.
|
||||||
|
*
|
||||||
|
* If @p is %NULL, krealloc() behaves exactly like kmalloc(). If @new_size
|
||||||
|
* is 0 and @p is not a %NULL pointer, the object pointed to is freed.
|
||||||
|
*
|
||||||
|
* If __GFP_ZERO logic is requested, callers must ensure that, starting with the
|
||||||
|
* initial memory allocation, every subsequent call to this API for the same
|
||||||
|
* memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that
|
||||||
|
* __GFP_ZERO is not fully honored by this API.
|
||||||
|
*
|
||||||
|
* When slub_debug_orig_size() is off, krealloc() only knows about the bucket
|
||||||
|
* size of an allocation (but not the exact size it was allocated with) and
|
||||||
|
* hence implements the following semantics for shrinking and growing buffers
|
||||||
|
* with __GFP_ZERO.
|
||||||
|
*
|
||||||
|
* new bucket
|
||||||
|
* 0 size size
|
||||||
|
* |--------|----------------|
|
||||||
|
* | keep | zero |
|
||||||
|
*
|
||||||
|
* Otherwise, the original allocation size 'orig_size' could be used to
|
||||||
|
* precisely clear the requested size, and the new size will also be stored
|
||||||
|
* as the new 'orig_size'.
|
||||||
|
*
|
||||||
|
* In any case, the contents of the object pointed to are preserved up to the
|
||||||
|
* lesser of the new and old sizes.
|
||||||
|
*
|
||||||
|
* Return: pointer to the allocated memory or %NULL in case of error
|
||||||
|
*/
|
||||||
|
void *krealloc_noprof(const void *p, size_t new_size, gfp_t flags)
|
||||||
|
{
|
||||||
|
void *ret;
|
||||||
|
|
||||||
|
if (unlikely(!new_size)) {
|
||||||
|
kfree(p);
|
||||||
|
return ZERO_SIZE_PTR;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = __do_krealloc(p, new_size, flags);
|
||||||
|
if (ret && kasan_reset_tag(p) != kasan_reset_tag(ret))
|
||||||
|
kfree(p);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(krealloc_noprof);
|
||||||
|
|
||||||
struct detached_freelist {
|
struct detached_freelist {
|
||||||
struct slab *slab;
|
struct slab *slab;
|
||||||
void *tail;
|
void *tail;
|
||||||
|
@ -5602,6 +5741,23 @@ static int __init setup_slub_min_objects(char *str)
|
||||||
__setup("slab_min_objects=", setup_slub_min_objects);
|
__setup("slab_min_objects=", setup_slub_min_objects);
|
||||||
__setup_param("slub_min_objects=", slub_min_objects, setup_slub_min_objects, 0);
|
__setup_param("slub_min_objects=", slub_min_objects, setup_slub_min_objects, 0);
|
||||||
|
|
||||||
|
#ifdef CONFIG_NUMA
|
||||||
|
static int __init setup_slab_strict_numa(char *str)
|
||||||
|
{
|
||||||
|
if (nr_node_ids > 1) {
|
||||||
|
static_branch_enable(&strict_numa);
|
||||||
|
pr_info("SLUB: Strict NUMA enabled.\n");
|
||||||
|
} else {
|
||||||
|
pr_warn("slab_strict_numa parameter set on non NUMA system.\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
__setup("slab_strict_numa", setup_slab_strict_numa);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#ifdef CONFIG_HARDENED_USERCOPY
|
#ifdef CONFIG_HARDENED_USERCOPY
|
||||||
/*
|
/*
|
||||||
* Rejects incorrectly sized objects and objects that are to be copied
|
* Rejects incorrectly sized objects and objects that are to be copied
|
||||||
|
@ -5960,7 +6116,8 @@ __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
|
||||||
s = find_mergeable(size, align, flags, name, ctor);
|
s = find_mergeable(size, align, flags, name, ctor);
|
||||||
if (s) {
|
if (s) {
|
||||||
if (sysfs_slab_alias(s, name))
|
if (sysfs_slab_alias(s, name))
|
||||||
return NULL;
|
pr_err("SLUB: Unable to add cache alias %s to sysfs\n",
|
||||||
|
name);
|
||||||
|
|
||||||
s->refcount++;
|
s->refcount++;
|
||||||
|
|
||||||
|
@ -6042,16 +6199,19 @@ int do_kmem_cache_create(struct kmem_cache *s, const char *name,
|
||||||
if (!alloc_kmem_cache_cpus(s))
|
if (!alloc_kmem_cache_cpus(s))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
/* Mutex is not taken during early boot */
|
err = 0;
|
||||||
if (slab_state <= UP) {
|
|
||||||
err = 0;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
err = sysfs_slab_add(s);
|
/* Mutex is not taken during early boot */
|
||||||
if (err)
|
if (slab_state <= UP)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Failing to create sysfs files is not critical to SLUB functionality.
|
||||||
|
* If it fails, proceed with cache creation without these files.
|
||||||
|
*/
|
||||||
|
if (sysfs_slab_add(s))
|
||||||
|
pr_err("SLUB: Unable to add cache %s to sysfs\n", s->name);
|
||||||
|
|
||||||
if (s->flags & SLAB_STORE_USER)
|
if (s->flags & SLAB_STORE_USER)
|
||||||
debugfs_slab_add(s);
|
debugfs_slab_add(s);
|
||||||
|
|
||||||
|
@ -7120,7 +7280,8 @@ out_del_kobj:
|
||||||
|
|
||||||
void sysfs_slab_unlink(struct kmem_cache *s)
|
void sysfs_slab_unlink(struct kmem_cache *s)
|
||||||
{
|
{
|
||||||
kobject_del(&s->kobj);
|
if (s->kobj.state_in_sysfs)
|
||||||
|
kobject_del(&s->kobj);
|
||||||
}
|
}
|
||||||
|
|
||||||
void sysfs_slab_release(struct kmem_cache *s)
|
void sysfs_slab_release(struct kmem_cache *s)
|
||||||
|
@ -7149,6 +7310,11 @@ static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
|
||||||
* If we have a leftover link then remove it.
|
* If we have a leftover link then remove it.
|
||||||
*/
|
*/
|
||||||
sysfs_remove_link(&slab_kset->kobj, name);
|
sysfs_remove_link(&slab_kset->kobj, name);
|
||||||
|
/*
|
||||||
|
* The original cache may have failed to generate sysfs file.
|
||||||
|
* In that case, sysfs_create_link() returns -ENOENT and
|
||||||
|
* symbolic link creation is skipped.
|
||||||
|
*/
|
||||||
return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
|
return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue