crypto: riscv - parallelize AES-CBC decryption

Since CBC decryption is parallelizable, make the RISC-V implementation
of AES-CBC decryption process multiple blocks at a time, instead of
processing the blocks one by one.  This should improve performance.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Link: https://lore.kernel.org/r/20240208060851.154129-1-ebiggers@kernel.org
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
This commit is contained in:
Eric Biggers 2024-02-07 22:08:51 -08:00 committed by Palmer Dabbelt
parent 028d1aee1f
commit da215b089b
No known key found for this signature in database
GPG key ID: 2E1319F35FBB1889

View file

@ -139,19 +139,25 @@ SYM_FUNC_END(aes_ecb_decrypt_zvkned)
.endm .endm
.macro aes_cbc_decrypt keylen .macro aes_cbc_decrypt keylen
srli LEN, LEN, 2 // Convert LEN from bytes to words
vle32.v v16, (IVP) // Load IV vle32.v v16, (IVP) // Load IV
1: 1:
vle32.v v17, (INP) // Load ciphertext block vsetvli t0, LEN, e32, m4, ta, ma
vmv.v.v v18, v17 // Save ciphertext block vle32.v v20, (INP) // Load ciphertext blocks
aes_decrypt v17, \keylen // Decrypt vslideup.vi v16, v20, 4 // Setup prev ciphertext blocks
vxor.vv v17, v17, v16 // XOR with IV or prev ciphertext block addi t1, t0, -4
vse32.v v17, (OUTP) // Store plaintext block vslidedown.vx v24, v20, t1 // Save last ciphertext block
vmv.v.v v16, v18 // Next "IV" is prev ciphertext block aes_decrypt v20, \keylen // Decrypt the blocks
addi INP, INP, 16 vxor.vv v20, v20, v16 // XOR with prev ciphertext blocks
addi OUTP, OUTP, 16 vse32.v v20, (OUTP) // Store plaintext blocks
addi LEN, LEN, -16 vmv.v.v v16, v24 // Next "IV" is last ciphertext block
slli t1, t0, 2 // Words to bytes
add INP, INP, t1
add OUTP, OUTP, t1
sub LEN, LEN, t0
bnez LEN, 1b bnez LEN, 1b
vsetivli zero, 4, e32, m1, ta, ma
vse32.v v16, (IVP) // Store next IV vse32.v v16, (IVP) // Store next IV
ret ret
.endm .endm