linux/lib/crypto/arm/sha1-ce-core.S
Eric Biggers 70cb6ca58f lib/crypto: arm/sha1: Migrate optimized code into library
Instead of exposing the arm-optimized SHA-1 code via arm-specific
crypto_shash algorithms, instead just implement the sha1_blocks()
library function.  This is much simpler, it makes the SHA-1 library
functions be arm-optimized, and it fixes the longstanding issue where
the arm-optimized SHA-1 code was disabled by default.  SHA-1 still
remains available through crypto_shash, but individual architectures no
longer need to handle it.

To match sha1_blocks(), change the type of the nblocks parameter of the
assembly functions from int to size_t.  The assembly functions actually
already treated it as size_t.

Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20250712232329.818226-8-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
2025-07-14 11:11:29 -07:00

123 lines
2.5 KiB
ArmAsm

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
*
* Copyright (C) 2015 Linaro Ltd.
* Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
.text
.arch armv8-a
.fpu crypto-neon-fp-armv8
k0 .req q0
k1 .req q1
k2 .req q2
k3 .req q3
ta0 .req q4
ta1 .req q5
tb0 .req q5
tb1 .req q4
dga .req q6
dgb .req q7
dgbs .req s28
dg0 .req q12
dg1a0 .req q13
dg1a1 .req q14
dg1b0 .req q14
dg1b1 .req q13
.macro add_only, op, ev, rc, s0, dg1
.ifnb \s0
vadd.u32 tb\ev, q\s0, \rc
.endif
sha1h.32 dg1b\ev, dg0
.ifb \dg1
sha1\op\().32 dg0, dg1a\ev, ta\ev
.else
sha1\op\().32 dg0, \dg1, ta\ev
.endif
.endm
.macro add_update, op, ev, rc, s0, s1, s2, s3, dg1
sha1su0.32 q\s0, q\s1, q\s2
add_only \op, \ev, \rc, \s1, \dg1
sha1su1.32 q\s0, q\s3
.endm
.align 6
.Lsha1_rcon:
.word 0x5a827999, 0x5a827999, 0x5a827999, 0x5a827999
.word 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1
.word 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc
.word 0xca62c1d6, 0xca62c1d6, 0xca62c1d6, 0xca62c1d6
/*
* void sha1_ce_transform(struct sha1_block_state *state,
* const u8 *data, size_t nblocks);
*/
ENTRY(sha1_ce_transform)
/* load round constants */
adr ip, .Lsha1_rcon
vld1.32 {k0-k1}, [ip, :128]!
vld1.32 {k2-k3}, [ip, :128]
/* load state */
vld1.32 {dga}, [r0]
vldr dgbs, [r0, #16]
/* load input */
0: vld1.32 {q8-q9}, [r1]!
vld1.32 {q10-q11}, [r1]!
subs r2, r2, #1
#ifndef CONFIG_CPU_BIG_ENDIAN
vrev32.8 q8, q8
vrev32.8 q9, q9
vrev32.8 q10, q10
vrev32.8 q11, q11
#endif
vadd.u32 ta0, q8, k0
vmov dg0, dga
add_update c, 0, k0, 8, 9, 10, 11, dgb
add_update c, 1, k0, 9, 10, 11, 8
add_update c, 0, k0, 10, 11, 8, 9
add_update c, 1, k0, 11, 8, 9, 10
add_update c, 0, k1, 8, 9, 10, 11
add_update p, 1, k1, 9, 10, 11, 8
add_update p, 0, k1, 10, 11, 8, 9
add_update p, 1, k1, 11, 8, 9, 10
add_update p, 0, k1, 8, 9, 10, 11
add_update p, 1, k2, 9, 10, 11, 8
add_update m, 0, k2, 10, 11, 8, 9
add_update m, 1, k2, 11, 8, 9, 10
add_update m, 0, k2, 8, 9, 10, 11
add_update m, 1, k2, 9, 10, 11, 8
add_update m, 0, k3, 10, 11, 8, 9
add_update p, 1, k3, 11, 8, 9, 10
add_only p, 0, k3, 9
add_only p, 1, k3, 10
add_only p, 0, k3, 11
add_only p, 1
/* update state */
vadd.u32 dga, dga, dg0
vadd.u32 dgb, dgb, dg1a0
bne 0b
/* store new state */
vst1.32 {dga}, [r0]
vstr dgbs, [r0, #16]
bx lr
ENDPROC(sha1_ce_transform)