mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00

Reorganize the crc-pclmul static_call initialization to place more of the logic in the *_mod_init_arch() functions instead of in the INIT_CRC_PCLMUL macro. This provides the flexibility to do more than a single static_call update for each CPU feature check. Right away, optimize crc64_mod_init_arch() to check the CPU features just once instead of twice, doing both the crc64_msb and crc64_lsb static_call updates together. A later commit will also use this to initialize an additional static_key when crc32_lsb_vpclmul_avx512() is enabled. Acked-by: Ard Biesheuvel <ardb@kernel.org> Link: https://lore.kernel.org/r/20250719224938.126512-2-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers@kernel.org>
72 lines
2.6 KiB
C
72 lines
2.6 KiB
C
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
/*
|
|
* Macros for accessing the [V]PCLMULQDQ-based CRC functions that are
|
|
* instantiated by crc-pclmul-template.S
|
|
*
|
|
* Copyright 2025 Google LLC
|
|
*
|
|
* Author: Eric Biggers <ebiggers@google.com>
|
|
*/
|
|
#ifndef _CRC_PCLMUL_TEMPLATE_H
|
|
#define _CRC_PCLMUL_TEMPLATE_H
|
|
|
|
#include <asm/cpufeatures.h>
|
|
#include <asm/simd.h>
|
|
#include <crypto/internal/simd.h>
|
|
#include <linux/static_call.h>
|
|
#include "crc-pclmul-consts.h"
|
|
|
|
#define DECLARE_CRC_PCLMUL_FUNCS(prefix, crc_t) \
|
|
crc_t prefix##_pclmul_sse(crc_t crc, const u8 *p, size_t len, \
|
|
const void *consts_ptr); \
|
|
crc_t prefix##_vpclmul_avx2(crc_t crc, const u8 *p, size_t len, \
|
|
const void *consts_ptr); \
|
|
crc_t prefix##_vpclmul_avx512(crc_t crc, const u8 *p, size_t len, \
|
|
const void *consts_ptr); \
|
|
DEFINE_STATIC_CALL(prefix##_pclmul, prefix##_pclmul_sse)
|
|
|
|
static inline bool have_vpclmul(void)
|
|
{
|
|
return boot_cpu_has(X86_FEATURE_VPCLMULQDQ) &&
|
|
boot_cpu_has(X86_FEATURE_AVX2) &&
|
|
cpu_has_xfeatures(XFEATURE_MASK_YMM, NULL);
|
|
}
|
|
|
|
static inline bool have_avx512(void)
|
|
{
|
|
return boot_cpu_has(X86_FEATURE_AVX512BW) &&
|
|
boot_cpu_has(X86_FEATURE_AVX512VL) &&
|
|
!boot_cpu_has(X86_FEATURE_PREFER_YMM) &&
|
|
cpu_has_xfeatures(XFEATURE_MASK_AVX512, NULL);
|
|
}
|
|
|
|
/*
|
|
* Call a [V]PCLMULQDQ optimized CRC function if the data length is at least 16
|
|
* bytes, the CPU has PCLMULQDQ support, and the current context may use SIMD.
|
|
*
|
|
* 16 bytes is the minimum length supported by the [V]PCLMULQDQ functions.
|
|
* There is overhead associated with kernel_fpu_begin() and kernel_fpu_end(),
|
|
* varying by CPU and factors such as which parts of the "FPU" state userspace
|
|
* has touched, which could result in a larger cutoff being better. Indeed, a
|
|
* larger cutoff is usually better for a *single* message. However, the
|
|
* overhead of the FPU section gets amortized if multiple FPU sections get
|
|
* executed before returning to userspace, since the XSAVE and XRSTOR occur only
|
|
* once. Considering that and the fact that the [V]PCLMULQDQ code is lighter on
|
|
* the dcache than the table-based code is, a 16-byte cutoff seems to work well.
|
|
*/
|
|
#define CRC_PCLMUL(crc, p, len, prefix, consts, have_pclmulqdq) \
|
|
do { \
|
|
if ((len) >= 16 && static_branch_likely(&(have_pclmulqdq)) && \
|
|
crypto_simd_usable()) { \
|
|
const void *consts_ptr; \
|
|
\
|
|
consts_ptr = (consts).fold_across_128_bits_consts; \
|
|
kernel_fpu_begin(); \
|
|
crc = static_call(prefix##_pclmul)((crc), (p), (len), \
|
|
consts_ptr); \
|
|
kernel_fpu_end(); \
|
|
return crc; \
|
|
} \
|
|
} while (0)
|
|
|
|
#endif /* _CRC_PCLMUL_TEMPLATE_H */
|