linux/arch/arm64/include/asm/cache.h

141 lines
3.8 KiB
C
Raw Permalink Normal View History

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
*/
#ifndef __ASM_CACHE_H
#define __ASM_CACHE_H
#define L1_CACHE_SHIFT (6)
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
#define CLIDR_LOUU_SHIFT 27
#define CLIDR_LOC_SHIFT 24
#define CLIDR_LOUIS_SHIFT 21
#define CLIDR_LOUU(clidr) (((clidr) >> CLIDR_LOUU_SHIFT) & 0x7)
#define CLIDR_LOC(clidr) (((clidr) >> CLIDR_LOC_SHIFT) & 0x7)
#define CLIDR_LOUIS(clidr) (((clidr) >> CLIDR_LOUIS_SHIFT) & 0x7)
/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */
#define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1))
#define CLIDR_CTYPE_MASK(level) (7 << CLIDR_CTYPE_SHIFT(level))
#define CLIDR_CTYPE(clidr, level) \
(((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level))
/* Ttypen, bits [2(n - 1) + 34 : 2(n - 1) + 33], for n = 1 to 7 */
#define CLIDR_TTYPE_SHIFT(level) (2 * ((level) - 1) + CLIDR_EL1_Ttypen_SHIFT)
/*
* Memory returned by kmalloc() may be used for DMA, so we must make
* sure that all such allocations are cache aligned. Otherwise,
* unrelated code may cause parts of the buffer to be read into the
* cache before the transfer is done, causing old data to be seen by
* the CPU.
*/
#define ARCH_DMA_MINALIGN (128)
arm64: allow kmalloc() caches aligned to the smaller cache_line_size() On arm64, ARCH_DMA_MINALIGN is 128, larger than the cache line size on most of the current platforms (typically 64). Define ARCH_KMALLOC_MINALIGN to 8 (the default for architectures without their own ARCH_DMA_MINALIGN) and override dma_get_cache_alignment() to return cache_line_size(), probed at run-time. The kmalloc() caches will be limited to the cache line size. This will allow the additional kmalloc-{64,192} caches on most arm64 platforms. Link: https://lkml.kernel.org/r/20230612153201.554742-12-catalin.marinas@arm.com Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> Tested-by: Isaac J. Manjarres <isaacmanjarres@google.com> Cc: Will Deacon <will@kernel.org> Cc: Alasdair Kergon <agk@redhat.com> Cc: Ard Biesheuvel <ardb@kernel.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@lst.de> Cc: Daniel Vetter <daniel@ffwll.ch> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Herbert Xu <herbert@gondor.apana.org.au> Cc: Jerry Snitselaar <jsnitsel@redhat.com> Cc: Joerg Roedel <joro@8bytes.org> Cc: Jonathan Cameron <jic23@kernel.org> Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com> Cc: Lars-Peter Clausen <lars@metafoo.de> Cc: Logan Gunthorpe <logang@deltatee.com> Cc: Marc Zyngier <maz@kernel.org> Cc: Mark Brown <broonie@kernel.org> Cc: Mike Snitzer <snitzer@kernel.org> Cc: "Rafael J. Wysocki" <rafael@kernel.org> Cc: Robin Murphy <robin.murphy@arm.com> Cc: Saravana Kannan <saravanak@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-06-12 16:31:55 +01:00
#define ARCH_KMALLOC_MINALIGN (8)
#if !defined(__ASSEMBLY__) && !defined(BUILD_VDSO)
mm: make minimum slab alignment a runtime property When CONFIG_KASAN_HW_TAGS is enabled we currently increase the minimum slab alignment to 16. This happens even if MTE is not supported in hardware or disabled via kasan=off, which creates an unnecessary memory overhead in those cases. Eliminate this overhead by making the minimum slab alignment a runtime property and only aligning to 16 if KASAN is enabled at runtime. On a DragonBoard 845c (non-MTE hardware) with a kernel built with CONFIG_KASAN_HW_TAGS, waiting for quiescence after a full Android boot I see the following Slab measurements in /proc/meminfo (median of 3 reboots): Before: 169020 kB After: 167304 kB [akpm@linux-foundation.org: make slab alignment type `unsigned int' to avoid casting] Link: https://linux-review.googlesource.com/id/I752e725179b43b144153f4b6f584ceb646473ead Link: https://lkml.kernel.org/r/20220427195820.1716975-2-pcc@google.com Signed-off-by: Peter Collingbourne <pcc@google.com> Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com> Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Tested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Acked-by: David Rientjes <rientjes@google.com> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Pekka Enberg <penberg@kernel.org> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Herbert Xu <herbert@gondor.apana.org.au> Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com> Cc: Alexander Potapenko <glider@google.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Kees Cook <keescook@chromium.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2022-05-09 18:20:53 -07:00
#include <linux/bitops.h>
#include <linux/kasan-enabled.h>
#include <asm/cputype.h>
#include <asm/mte-def.h>
#include <asm/sysreg.h>
#ifdef CONFIG_KASAN_SW_TAGS
#define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT)
#elif defined(CONFIG_KASAN_HW_TAGS)
mm: make minimum slab alignment a runtime property When CONFIG_KASAN_HW_TAGS is enabled we currently increase the minimum slab alignment to 16. This happens even if MTE is not supported in hardware or disabled via kasan=off, which creates an unnecessary memory overhead in those cases. Eliminate this overhead by making the minimum slab alignment a runtime property and only aligning to 16 if KASAN is enabled at runtime. On a DragonBoard 845c (non-MTE hardware) with a kernel built with CONFIG_KASAN_HW_TAGS, waiting for quiescence after a full Android boot I see the following Slab measurements in /proc/meminfo (median of 3 reboots): Before: 169020 kB After: 167304 kB [akpm@linux-foundation.org: make slab alignment type `unsigned int' to avoid casting] Link: https://linux-review.googlesource.com/id/I752e725179b43b144153f4b6f584ceb646473ead Link: https://lkml.kernel.org/r/20220427195820.1716975-2-pcc@google.com Signed-off-by: Peter Collingbourne <pcc@google.com> Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com> Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Tested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Acked-by: David Rientjes <rientjes@google.com> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Pekka Enberg <penberg@kernel.org> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Herbert Xu <herbert@gondor.apana.org.au> Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com> Cc: Alexander Potapenko <glider@google.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Kees Cook <keescook@chromium.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2022-05-09 18:20:53 -07:00
static inline unsigned int arch_slab_minalign(void)
{
return kasan_hw_tags_enabled() ? MTE_GRANULE_SIZE :
__alignof__(unsigned long long);
}
#define arch_slab_minalign() arch_slab_minalign()
#endif
#define CTR_L1IP(ctr) SYS_FIELD_GET(CTR_EL0, L1Ip, ctr)
#define ICACHEF_ALIASING 0
extern unsigned long __icache_flags;
/*
* Whilst the D-side always behaves as PIPT on AArch64, aliasing is
* permitted in the I-cache.
*/
static inline int icache_is_aliasing(void)
{
return test_bit(ICACHEF_ALIASING, &__icache_flags);
}
static inline u32 cache_type_cwg(void)
{
return SYS_FIELD_GET(CTR_EL0, CWG, read_cpuid_cachetype());
}
#define __read_mostly __section(".data..read_mostly")
static inline int cache_line_size_of_cpu(void)
{
u32 cwg = cache_type_cwg();
return cwg ? 4 << cwg : ARCH_DMA_MINALIGN;
}
int cache_line_size(void);
arm64: allow kmalloc() caches aligned to the smaller cache_line_size() On arm64, ARCH_DMA_MINALIGN is 128, larger than the cache line size on most of the current platforms (typically 64). Define ARCH_KMALLOC_MINALIGN to 8 (the default for architectures without their own ARCH_DMA_MINALIGN) and override dma_get_cache_alignment() to return cache_line_size(), probed at run-time. The kmalloc() caches will be limited to the cache line size. This will allow the additional kmalloc-{64,192} caches on most arm64 platforms. Link: https://lkml.kernel.org/r/20230612153201.554742-12-catalin.marinas@arm.com Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> Tested-by: Isaac J. Manjarres <isaacmanjarres@google.com> Cc: Will Deacon <will@kernel.org> Cc: Alasdair Kergon <agk@redhat.com> Cc: Ard Biesheuvel <ardb@kernel.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@lst.de> Cc: Daniel Vetter <daniel@ffwll.ch> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Herbert Xu <herbert@gondor.apana.org.au> Cc: Jerry Snitselaar <jsnitsel@redhat.com> Cc: Joerg Roedel <joro@8bytes.org> Cc: Jonathan Cameron <jic23@kernel.org> Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com> Cc: Lars-Peter Clausen <lars@metafoo.de> Cc: Logan Gunthorpe <logang@deltatee.com> Cc: Marc Zyngier <maz@kernel.org> Cc: Mark Brown <broonie@kernel.org> Cc: Mike Snitzer <snitzer@kernel.org> Cc: "Rafael J. Wysocki" <rafael@kernel.org> Cc: Robin Murphy <robin.murphy@arm.com> Cc: Saravana Kannan <saravanak@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-06-12 16:31:55 +01:00
#define dma_get_cache_alignment cache_line_size
/* Compress a u64 MPIDR value into 32 bits. */
static inline u64 arch_compact_of_hwid(u64 id)
{
u64 aff3 = MPIDR_AFFINITY_LEVEL(id, 3);
/*
* These bits are expected to be RES0. If not, return a value with
* the upper 32 bits set to force the caller to give up on 32 bit
* cache ids.
*/
if (FIELD_GET(GENMASK_ULL(63, 40), id))
return id;
return (aff3 << 24) | FIELD_GET(GENMASK_ULL(23, 0), id);
}
#define arch_compact_of_hwid arch_compact_of_hwid
/*
* Read the effective value of CTR_EL0.
*
* According to ARM ARM for ARMv8-A (ARM DDI 0487C.a),
* section D10.2.33 "CTR_EL0, Cache Type Register" :
*
* CTR_EL0.IDC reports the data cache clean requirements for
* instruction to data coherence.
*
* 0 - dcache clean to PoU is required unless :
* (CLIDR_EL1.LoC == 0) || (CLIDR_EL1.LoUIS == 0 && CLIDR_EL1.LoUU == 0)
* 1 - dcache clean to PoU is not required for i-to-d coherence.
*
* This routine provides the CTR_EL0 with the IDC field updated to the
* effective state.
*/
static inline u32 __attribute_const__ read_cpuid_effective_cachetype(void)
{
u32 ctr = read_cpuid_cachetype();
if (!(ctr & BIT(CTR_EL0_IDC_SHIFT))) {
u64 clidr = read_sysreg(clidr_el1);
if (CLIDR_LOC(clidr) == 0 ||
(CLIDR_LOUIS(clidr) == 0 && CLIDR_LOUU(clidr) == 0))
ctr |= BIT(CTR_EL0_IDC_SHIFT);
}
return ctr;
}
#endif /* !defined(__ASSEMBLY__) && !defined(BUILD_VDSO) */
#endif