2017-01-11 16:41:49 +00:00
|
|
|
/*
|
2025-04-05 11:26:04 -07:00
|
|
|
* ChaCha and HChaCha functions (ARM64 optimized)
|
2017-01-11 16:41:49 +00:00
|
|
|
*
|
2017-07-24 11:28:14 +01:00
|
|
|
* Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
|
2017-01-11 16:41:49 +00:00
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* Based on:
|
|
|
|
* ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
|
|
|
|
*
|
|
|
|
* Copyright (C) 2015 Martin Willi
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*/
|
|
|
|
|
2025-04-05 11:26:04 -07:00
|
|
|
#include <crypto/chacha.h>
|
2019-03-12 22:12:50 -07:00
|
|
|
#include <crypto/internal/simd.h>
|
2019-11-08 13:22:12 +01:00
|
|
|
#include <linux/jump_label.h>
|
2017-01-11 16:41:49 +00:00
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
|
|
|
|
#include <asm/hwcap.h>
|
|
|
|
#include <asm/neon.h>
|
2017-07-24 11:28:14 +01:00
|
|
|
#include <asm/simd.h>
|
2017-01-11 16:41:49 +00:00
|
|
|
|
2025-05-05 11:18:21 -07:00
|
|
|
asmlinkage void chacha_block_xor_neon(const struct chacha_state *state,
|
|
|
|
u8 *dst, const u8 *src, int nrounds);
|
|
|
|
asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state,
|
|
|
|
u8 *dst, const u8 *src,
|
2018-12-04 14:13:32 +01:00
|
|
|
int nrounds, int bytes);
|
2025-05-05 11:18:21 -07:00
|
|
|
asmlinkage void hchacha_block_neon(const struct chacha_state *state,
|
2025-05-05 11:18:24 -07:00
|
|
|
u32 out[HCHACHA_OUT_WORDS], int nrounds);
|
2017-01-11 16:41:49 +00:00
|
|
|
|
2019-11-08 13:22:12 +01:00
|
|
|
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
|
|
|
|
|
2025-05-05 11:18:21 -07:00
|
|
|
static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src,
|
2018-12-04 14:13:32 +01:00
|
|
|
int bytes, int nrounds)
|
2017-01-11 16:41:49 +00:00
|
|
|
{
|
2018-12-04 14:13:32 +01:00
|
|
|
while (bytes > 0) {
|
2018-12-04 14:13:33 +01:00
|
|
|
int l = min(bytes, CHACHA_BLOCK_SIZE * 5);
|
|
|
|
|
|
|
|
if (l <= CHACHA_BLOCK_SIZE) {
|
|
|
|
u8 buf[CHACHA_BLOCK_SIZE];
|
|
|
|
|
|
|
|
memcpy(buf, src, l);
|
|
|
|
chacha_block_xor_neon(state, buf, buf, nrounds);
|
|
|
|
memcpy(dst, buf, l);
|
2025-05-05 11:18:21 -07:00
|
|
|
state->x[12] += 1;
|
2018-12-04 14:13:33 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
chacha_4block_xor_neon(state, dst, src, nrounds, l);
|
crypto: arm64/chacha - correctly walk through blocks
Prior, passing in chunks of 2, 3, or 4, followed by any additional
chunks would result in the chacha state counter getting out of sync,
resulting in incorrect encryption/decryption, which is a pretty nasty
crypto vuln: "why do images look weird on webpages?" WireGuard users
never experienced this prior, because we have always, out of tree, used
a different crypto library, until the recent Frankenzinc addition. This
commit fixes the issue by advancing the pointers and state counter by
the actual size processed. It also fixes up a bug in the (optional,
costly) stride test that prevented it from running on arm64.
Fixes: b3aad5bad26a ("crypto: arm64/chacha - expose arm64 ChaCha routine as library function")
Reported-and-tested-by: Emil Renner Berthing <kernel@esmil.dk>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: stable@vger.kernel.org # v5.5+
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-03-18 20:27:32 -06:00
|
|
|
bytes -= l;
|
|
|
|
src += l;
|
|
|
|
dst += l;
|
2025-05-05 11:18:21 -07:00
|
|
|
state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
|
2017-01-11 16:41:49 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-05-05 11:18:24 -07:00
|
|
|
void hchacha_block_arch(const struct chacha_state *state,
|
|
|
|
u32 out[HCHACHA_OUT_WORDS], int nrounds)
|
2019-11-08 13:22:12 +01:00
|
|
|
{
|
|
|
|
if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) {
|
2025-05-05 11:18:24 -07:00
|
|
|
hchacha_block_generic(state, out, nrounds);
|
2019-11-08 13:22:12 +01:00
|
|
|
} else {
|
|
|
|
kernel_neon_begin();
|
2025-05-05 11:18:24 -07:00
|
|
|
hchacha_block_neon(state, out, nrounds);
|
2019-11-08 13:22:12 +01:00
|
|
|
kernel_neon_end();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(hchacha_block_arch);
|
|
|
|
|
2025-05-05 11:18:21 -07:00
|
|
|
void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src,
|
|
|
|
unsigned int bytes, int nrounds)
|
2019-11-08 13:22:12 +01:00
|
|
|
{
|
|
|
|
if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE ||
|
|
|
|
!crypto_simd_usable())
|
|
|
|
return chacha_crypt_generic(state, dst, src, bytes, nrounds);
|
|
|
|
|
crypto: arch/lib - limit simd usage to 4k chunks
The initial Zinc patchset, after some mailing list discussion, contained
code to ensure that kernel_fpu_enable would not be kept on for more than
a 4k chunk, since it disables preemption. The choice of 4k isn't totally
scientific, but it's not a bad guess either, and it's what's used in
both the x86 poly1305, blake2s, and nhpoly1305 code already (in the form
of PAGE_SIZE, which this commit corrects to be explicitly 4k for the
former two).
Ard did some back of the envelope calculations and found that
at 5 cycles/byte (overestimate) on a 1ghz processor (pretty slow), 4k
means we have a maximum preemption disabling of 20us, which Sebastian
confirmed was probably a good limit.
Unfortunately the chunking appears to have been left out of the final
patchset that added the glue code. So, this commit adds it back in.
Fixes: 84e03fa39fbe ("crypto: x86/chacha - expose SIMD ChaCha routine as library function")
Fixes: b3aad5bad26a ("crypto: arm64/chacha - expose arm64 ChaCha routine as library function")
Fixes: a44a3430d71b ("crypto: arm/chacha - expose ARM ChaCha routine as library function")
Fixes: d7d7b8535662 ("crypto: x86/poly1305 - wire up faster implementations for kernel")
Fixes: f569ca164751 ("crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation")
Fixes: a6b803b3ddc7 ("crypto: arm/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation")
Fixes: ed0356eda153 ("crypto: blake2s - x86_64 SIMD implementation")
Cc: Eric Biggers <ebiggers@google.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: stable@vger.kernel.org
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-04-22 17:18:53 -06:00
|
|
|
do {
|
|
|
|
unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
|
|
|
|
|
|
|
|
kernel_neon_begin();
|
|
|
|
chacha_doneon(state, dst, src, todo, nrounds);
|
|
|
|
kernel_neon_end();
|
|
|
|
|
|
|
|
bytes -= todo;
|
|
|
|
src += todo;
|
|
|
|
dst += todo;
|
|
|
|
} while (bytes);
|
2019-11-08 13:22:12 +01:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(chacha_crypt_arch);
|
|
|
|
|
2025-04-05 11:26:02 -07:00
|
|
|
bool chacha_is_arch_optimized(void)
|
|
|
|
{
|
|
|
|
return static_key_enabled(&have_neon);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(chacha_is_arch_optimized);
|
|
|
|
|
2018-12-03 19:52:51 -08:00
|
|
|
static int __init chacha_simd_mod_init(void)
|
2017-01-11 16:41:49 +00:00
|
|
|
{
|
2025-04-05 11:26:04 -07:00
|
|
|
if (cpu_have_named_feature(ASIMD))
|
|
|
|
static_branch_enable(&have_neon);
|
|
|
|
return 0;
|
2017-01-11 16:41:49 +00:00
|
|
|
}
|
2025-04-30 16:17:02 +08:00
|
|
|
subsys_initcall(chacha_simd_mod_init);
|
2017-01-11 16:41:49 +00:00
|
|
|
|
2025-04-17 20:59:09 -07:00
|
|
|
static void __exit chacha_simd_mod_exit(void)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
module_exit(chacha_simd_mod_exit);
|
|
|
|
|
2025-04-05 11:26:04 -07:00
|
|
|
MODULE_DESCRIPTION("ChaCha and HChaCha functions (ARM64 optimized)");
|
2017-01-11 16:41:49 +00:00
|
|
|
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
|
|
|
MODULE_LICENSE("GPL v2");
|