2023-04-26 15:11:46 -04:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
/*
|
|
|
|
* Poly1305 authenticator algorithm, RFC7539.
|
|
|
|
*
|
|
|
|
* Copyright 2023- IBM Corp. All rights reserved.
|
|
|
|
*/
|
2025-04-28 12:56:16 +08:00
|
|
|
#include <asm/switch_to.h>
|
|
|
|
#include <crypto/internal/poly1305.h>
|
|
|
|
#include <linux/cpufeature.h>
|
|
|
|
#include <linux/jump_label.h>
|
2023-04-26 15:11:46 -04:00
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/module.h>
|
2024-10-01 15:35:57 -04:00
|
|
|
#include <linux/unaligned.h>
|
2023-04-26 15:11:46 -04:00
|
|
|
|
2025-04-28 12:56:16 +08:00
|
|
|
asmlinkage void poly1305_p10le_4blocks(struct poly1305_block_state *state, const u8 *m, u32 mlen);
|
|
|
|
asmlinkage void poly1305_64s(struct poly1305_block_state *state, const u8 *m, u32 mlen, int highbit);
|
2025-05-10 13:10:22 +08:00
|
|
|
asmlinkage void poly1305_emit_64(const struct poly1305_state *state, const u32 nonce[4], u8 digest[POLY1305_DIGEST_SIZE]);
|
2023-04-26 15:11:46 -04:00
|
|
|
|
2025-04-12 21:54:13 -07:00
|
|
|
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_p10);
|
|
|
|
|
2023-04-26 15:11:46 -04:00
|
|
|
static void vsx_begin(void)
|
|
|
|
{
|
|
|
|
preempt_disable();
|
|
|
|
enable_kernel_vsx();
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vsx_end(void)
|
|
|
|
{
|
|
|
|
disable_kernel_vsx();
|
|
|
|
preempt_enable();
|
|
|
|
}
|
|
|
|
|
2025-04-28 12:56:16 +08:00
|
|
|
void poly1305_block_init_arch(struct poly1305_block_state *dctx,
|
|
|
|
const u8 raw_key[POLY1305_BLOCK_SIZE])
|
2023-04-26 15:11:46 -04:00
|
|
|
{
|
2025-05-20 10:35:29 +08:00
|
|
|
if (!static_key_enabled(&have_p10))
|
2025-04-28 12:56:16 +08:00
|
|
|
return poly1305_block_init_generic(dctx, raw_key);
|
2025-04-12 21:54:13 -07:00
|
|
|
|
|
|
|
dctx->h = (struct poly1305_state){};
|
2025-04-28 12:56:16 +08:00
|
|
|
dctx->core_r.key.r64[0] = get_unaligned_le64(raw_key + 0);
|
|
|
|
dctx->core_r.key.r64[1] = get_unaligned_le64(raw_key + 8);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(poly1305_block_init_arch);
|
|
|
|
|
|
|
|
void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src,
|
|
|
|
unsigned int len, u32 padbit)
|
|
|
|
{
|
2025-05-20 10:35:29 +08:00
|
|
|
if (!static_key_enabled(&have_p10))
|
2025-04-28 12:56:16 +08:00
|
|
|
return poly1305_blocks_generic(state, src, len, padbit);
|
|
|
|
vsx_begin();
|
|
|
|
if (len >= POLY1305_BLOCK_SIZE * 4) {
|
|
|
|
poly1305_p10le_4blocks(state, src, len);
|
|
|
|
src += len - (len % (POLY1305_BLOCK_SIZE * 4));
|
|
|
|
len %= POLY1305_BLOCK_SIZE * 4;
|
|
|
|
}
|
|
|
|
while (len >= POLY1305_BLOCK_SIZE) {
|
|
|
|
poly1305_64s(state, src, POLY1305_BLOCK_SIZE, padbit);
|
|
|
|
len -= POLY1305_BLOCK_SIZE;
|
|
|
|
src += POLY1305_BLOCK_SIZE;
|
|
|
|
}
|
|
|
|
vsx_end();
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(poly1305_blocks_arch);
|
|
|
|
|
2025-05-10 13:10:22 +08:00
|
|
|
void poly1305_emit_arch(const struct poly1305_state *state,
|
|
|
|
u8 digest[POLY1305_DIGEST_SIZE],
|
|
|
|
const u32 nonce[4])
|
|
|
|
{
|
2025-05-20 10:35:29 +08:00
|
|
|
if (!static_key_enabled(&have_p10))
|
2025-05-10 13:10:22 +08:00
|
|
|
return poly1305_emit_generic(state, digest, nonce);
|
|
|
|
poly1305_emit_64(state, nonce, digest);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(poly1305_emit_arch);
|
|
|
|
|
crypto: poly1305 - centralize the shash wrappers for arch code
Following the example of the crc32, crc32c, and chacha code, make the
crypto subsystem register both generic and architecture-optimized
poly1305 shash algorithms, both implemented on top of the appropriate
library functions. This eliminates the need for every architecture to
implement the same shash glue code.
Note that the poly1305 shash requires that the key be prepended to the
data, which differs from the library functions where the key is simply a
parameter to poly1305_init(). Previously this was handled at a fairly
low level, polluting the library code with shash-specific code.
Reorganize things so that the shash code handles this quirk itself.
Also, to register the architecture-optimized shashes only when
architecture-optimized code is actually being used, add a function
poly1305_is_arch_optimized() and make each arch implement it. Change
each architecture's Poly1305 module_init function to arch_initcall so
that the CPU feature detection is guaranteed to run before
poly1305_is_arch_optimized() gets called by crypto/poly1305.c. (In
cases where poly1305_is_arch_optimized() just returns true
unconditionally, using arch_initcall is not strictly needed, but it's
still good to be consistent across architectures.)
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2025-04-12 21:54:14 -07:00
|
|
|
bool poly1305_is_arch_optimized(void)
|
|
|
|
{
|
|
|
|
return static_key_enabled(&have_p10);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(poly1305_is_arch_optimized);
|
|
|
|
|
2023-04-26 15:11:46 -04:00
|
|
|
static int __init poly1305_p10_init(void)
|
|
|
|
{
|
2025-04-12 21:54:13 -07:00
|
|
|
if (cpu_has_feature(CPU_FTR_ARCH_31))
|
|
|
|
static_branch_enable(&have_p10);
|
|
|
|
return 0;
|
2023-04-26 15:11:46 -04:00
|
|
|
}
|
2025-04-30 16:17:02 +08:00
|
|
|
subsys_initcall(poly1305_p10_init);
|
2023-04-26 15:11:46 -04:00
|
|
|
|
2025-04-17 21:00:17 -07:00
|
|
|
static void __exit poly1305_p10_exit(void)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
module_exit(poly1305_p10_exit);
|
|
|
|
|
2023-04-26 15:11:46 -04:00
|
|
|
MODULE_LICENSE("GPL");
|
|
|
|
MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com>");
|
|
|
|
MODULE_DESCRIPTION("Optimized Poly1305 for P10");
|