mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-18 22:14:16 +00:00 
			
		
		
		
	crypto: blake2s - x86_64 SIMD implementation
These implementations from Samuel Neves support AVX and AVX-512VL. Originally this used AVX-512F, but Skylake thermal throttling made AVX-512VL more attractive and possible to do with negligable difference. Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> Signed-off-by: Samuel Neves <sneves@dei.uc.pt> Co-developed-by: Samuel Neves <sneves@dei.uc.pt> [ardb: move to arch/x86/crypto, wire into lib/crypto framework] Signed-off-by: Ard Biesheuvel <ardb@kernel.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
		
							parent
							
								
									7f9b088092
								
							
						
					
					
						commit
						ed0356eda1
					
				
					 4 changed files with 499 additions and 0 deletions
				
			
		|  | @ -48,6 +48,7 @@ ifeq ($(avx_supported),yes) | |||
| 	obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o | ||||
| 	obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o | ||||
| 	obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o | ||||
| 	obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o | ||||
| endif | ||||
| 
 | ||||
| # These modules require assembler to support AVX2.
 | ||||
|  | @ -70,6 +71,7 @@ serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o | |||
| aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o | ||||
| 
 | ||||
| nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o | ||||
| blake2s-x86_64-y := blake2s-core.o blake2s-glue.o | ||||
| 
 | ||||
| ifeq ($(avx_supported),yes) | ||||
| 	camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
 | ||||
|  |  | |||
							
								
								
									
										258
									
								
								arch/x86/crypto/blake2s-core.S
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										258
									
								
								arch/x86/crypto/blake2s-core.S
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,258 @@ | |||
| /* SPDX-License-Identifier: GPL-2.0 OR MIT */ | ||||
| /* | ||||
|  * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
 | ||||
|  * Copyright (C) 2017-2019 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
 | ||||
|  */ | ||||
| 
 | ||||
| #include <linux/linkage.h> | ||||
| 
 | ||||
| .section .rodata.cst32.BLAKE2S_IV, "aM", @progbits, 32
 | ||||
| .align 32
 | ||||
| IV:	.octa 0xA54FF53A3C6EF372BB67AE856A09E667 | ||||
| 	.octa 0x5BE0CD191F83D9AB9B05688C510E527F
 | ||||
| .section .rodata.cst16.ROT16, "aM", @progbits, 16
 | ||||
| .align 16
 | ||||
| ROT16:	.octa 0x0D0C0F0E09080B0A0504070601000302 | ||||
| .section .rodata.cst16.ROR328, "aM", @progbits, 16
 | ||||
| .align 16
 | ||||
| ROR328:	.octa 0x0C0F0E0D080B0A090407060500030201 | ||||
| .section .rodata.cst64.BLAKE2S_SIGMA, "aM", @progbits, 160
 | ||||
| .align 64
 | ||||
| SIGMA: | ||||
| .byte  0,  2,  4,  6,  1,  3,  5,  7, 14,  8, 10, 12, 15,  9, 11, 13 | ||||
| .byte 14,  4,  9, 13, 10,  8, 15,  6,  5,  1,  0, 11,  3, 12,  2,  7 | ||||
| .byte 11, 12,  5, 15,  8,  0,  2, 13,  9, 10,  3,  7,  4, 14,  6,  1 | ||||
| .byte  7,  3, 13, 11,  9,  1, 12, 14, 15,  2,  5,  4,  8,  6, 10,  0 | ||||
| .byte  9,  5,  2, 10,  0,  7,  4, 15,  3, 14, 11,  6, 13,  1, 12,  8 | ||||
| .byte  2,  6,  0,  8, 12, 10, 11,  3,  1,  4,  7, 15,  9, 13,  5, 14 | ||||
| .byte 12,  1, 14,  4,  5, 15, 13, 10,  8,  0,  6,  9, 11,  7,  3,  2 | ||||
| .byte 13,  7, 12,  3, 11, 14,  1,  9,  2,  5, 15,  8, 10,  0,  4,  6 | ||||
| .byte  6, 14, 11,  0, 15,  9,  3,  8, 10, 12, 13,  1,  5,  2,  7,  4 | ||||
| .byte 10,  8,  7,  1,  2,  4,  6,  5, 13, 15,  9,  3,  0, 11, 14, 12 | ||||
| #ifdef CONFIG_AS_AVX512 | ||||
| .section .rodata.cst64.BLAKE2S_SIGMA2, "aM", @progbits, 640
 | ||||
| .align 64
 | ||||
| SIGMA2: | ||||
| .long  0,  2,  4,  6,  1,  3,  5,  7, 14,  8, 10, 12, 15,  9, 11, 13 | ||||
| .long  8,  2, 13, 15, 10,  9, 12,  3,  6,  4,  0, 14,  5, 11,  1,  7 | ||||
| .long 11, 13,  8,  6,  5, 10, 14,  3,  2,  4, 12, 15,  1,  0,  7,  9 | ||||
| .long 11, 10,  7,  0,  8, 15,  1, 13,  3,  6,  2, 12,  4, 14,  9,  5 | ||||
| .long  4, 10,  9, 14, 15,  0, 11,  8,  1,  7,  3, 13,  2,  5,  6, 12 | ||||
| .long  2, 11,  4, 15, 14,  3, 10,  8, 13,  6,  5,  7,  0, 12,  1,  9 | ||||
| .long  4,  8, 15,  9, 14, 11, 13,  5,  3,  2,  1, 12,  6, 10,  7,  0 | ||||
| .long  6, 13,  0, 14, 12,  2,  1, 11, 15,  4,  5,  8,  7,  9,  3, 10 | ||||
| .long 15,  5,  4, 13, 10,  7,  3, 11, 12,  2,  0,  6,  9,  8,  1, 14 | ||||
| .long  8,  7, 14, 11, 13, 15,  0, 12, 10,  4,  5,  6,  3,  2,  1,  9 | ||||
| #endif /* CONFIG_AS_AVX512 */ | ||||
| 
 | ||||
| .text | ||||
| #ifdef CONFIG_AS_SSSE3 | ||||
| ENTRY(blake2s_compress_ssse3) | ||||
| 	testq		%rdx,%rdx | ||||
| 	je		.Lendofloop | ||||
| 	movdqu		(%rdi),%xmm0 | ||||
| 	movdqu		0x10(%rdi),%xmm1 | ||||
| 	movdqa		ROT16(%rip),%xmm12 | ||||
| 	movdqa		ROR328(%rip),%xmm13 | ||||
| 	movdqu		0x20(%rdi),%xmm14 | ||||
| 	movq		%rcx,%xmm15 | ||||
| 	leaq		SIGMA+0xa0(%rip),%r8 | ||||
| 	jmp		.Lbeginofloop | ||||
| 	.align		32
 | ||||
| .Lbeginofloop: | ||||
| 	movdqa		%xmm0,%xmm10 | ||||
| 	movdqa		%xmm1,%xmm11 | ||||
| 	paddq		%xmm15,%xmm14 | ||||
| 	movdqa		IV(%rip),%xmm2 | ||||
| 	movdqa		%xmm14,%xmm3 | ||||
| 	pxor		IV+0x10(%rip),%xmm3 | ||||
| 	leaq		SIGMA(%rip),%rcx | ||||
| .Lroundloop: | ||||
| 	movzbl		(%rcx),%eax | ||||
| 	movd		(%rsi,%rax,4),%xmm4 | ||||
| 	movzbl		0x1(%rcx),%eax | ||||
| 	movd		(%rsi,%rax,4),%xmm5 | ||||
| 	movzbl		0x2(%rcx),%eax | ||||
| 	movd		(%rsi,%rax,4),%xmm6 | ||||
| 	movzbl		0x3(%rcx),%eax | ||||
| 	movd		(%rsi,%rax,4),%xmm7 | ||||
| 	punpckldq	%xmm5,%xmm4 | ||||
| 	punpckldq	%xmm7,%xmm6 | ||||
| 	punpcklqdq	%xmm6,%xmm4 | ||||
| 	paddd		%xmm4,%xmm0 | ||||
| 	paddd		%xmm1,%xmm0 | ||||
| 	pxor		%xmm0,%xmm3 | ||||
| 	pshufb		%xmm12,%xmm3 | ||||
| 	paddd		%xmm3,%xmm2 | ||||
| 	pxor		%xmm2,%xmm1 | ||||
| 	movdqa		%xmm1,%xmm8 | ||||
| 	psrld		$0xc,%xmm1 | ||||
| 	pslld		$0x14,%xmm8 | ||||
| 	por		%xmm8,%xmm1 | ||||
| 	movzbl		0x4(%rcx),%eax | ||||
| 	movd		(%rsi,%rax,4),%xmm5 | ||||
| 	movzbl		0x5(%rcx),%eax | ||||
| 	movd		(%rsi,%rax,4),%xmm6 | ||||
| 	movzbl		0x6(%rcx),%eax | ||||
| 	movd		(%rsi,%rax,4),%xmm7 | ||||
| 	movzbl		0x7(%rcx),%eax | ||||
| 	movd		(%rsi,%rax,4),%xmm4 | ||||
| 	punpckldq	%xmm6,%xmm5 | ||||
| 	punpckldq	%xmm4,%xmm7 | ||||
| 	punpcklqdq	%xmm7,%xmm5 | ||||
| 	paddd		%xmm5,%xmm0 | ||||
| 	paddd		%xmm1,%xmm0 | ||||
| 	pxor		%xmm0,%xmm3 | ||||
| 	pshufb		%xmm13,%xmm3 | ||||
| 	paddd		%xmm3,%xmm2 | ||||
| 	pxor		%xmm2,%xmm1 | ||||
| 	movdqa		%xmm1,%xmm8 | ||||
| 	psrld		$0x7,%xmm1 | ||||
| 	pslld		$0x19,%xmm8 | ||||
| 	por		%xmm8,%xmm1 | ||||
| 	pshufd		$0x93,%xmm0,%xmm0 | ||||
| 	pshufd		$0x4e,%xmm3,%xmm3 | ||||
| 	pshufd		$0x39,%xmm2,%xmm2 | ||||
| 	movzbl		0x8(%rcx),%eax | ||||
| 	movd		(%rsi,%rax,4),%xmm6 | ||||
| 	movzbl		0x9(%rcx),%eax | ||||
| 	movd		(%rsi,%rax,4),%xmm7 | ||||
| 	movzbl		0xa(%rcx),%eax | ||||
| 	movd		(%rsi,%rax,4),%xmm4 | ||||
| 	movzbl		0xb(%rcx),%eax | ||||
| 	movd		(%rsi,%rax,4),%xmm5 | ||||
| 	punpckldq	%xmm7,%xmm6 | ||||
| 	punpckldq	%xmm5,%xmm4 | ||||
| 	punpcklqdq	%xmm4,%xmm6 | ||||
| 	paddd		%xmm6,%xmm0 | ||||
| 	paddd		%xmm1,%xmm0 | ||||
| 	pxor		%xmm0,%xmm3 | ||||
| 	pshufb		%xmm12,%xmm3 | ||||
| 	paddd		%xmm3,%xmm2 | ||||
| 	pxor		%xmm2,%xmm1 | ||||
| 	movdqa		%xmm1,%xmm8 | ||||
| 	psrld		$0xc,%xmm1 | ||||
| 	pslld		$0x14,%xmm8 | ||||
| 	por		%xmm8,%xmm1 | ||||
| 	movzbl		0xc(%rcx),%eax | ||||
| 	movd		(%rsi,%rax,4),%xmm7 | ||||
| 	movzbl		0xd(%rcx),%eax | ||||
| 	movd		(%rsi,%rax,4),%xmm4 | ||||
| 	movzbl		0xe(%rcx),%eax | ||||
| 	movd		(%rsi,%rax,4),%xmm5 | ||||
| 	movzbl		0xf(%rcx),%eax | ||||
| 	movd		(%rsi,%rax,4),%xmm6 | ||||
| 	punpckldq	%xmm4,%xmm7 | ||||
| 	punpckldq	%xmm6,%xmm5 | ||||
| 	punpcklqdq	%xmm5,%xmm7 | ||||
| 	paddd		%xmm7,%xmm0 | ||||
| 	paddd		%xmm1,%xmm0 | ||||
| 	pxor		%xmm0,%xmm3 | ||||
| 	pshufb		%xmm13,%xmm3 | ||||
| 	paddd		%xmm3,%xmm2 | ||||
| 	pxor		%xmm2,%xmm1 | ||||
| 	movdqa		%xmm1,%xmm8 | ||||
| 	psrld		$0x7,%xmm1 | ||||
| 	pslld		$0x19,%xmm8 | ||||
| 	por		%xmm8,%xmm1 | ||||
| 	pshufd		$0x39,%xmm0,%xmm0 | ||||
| 	pshufd		$0x4e,%xmm3,%xmm3 | ||||
| 	pshufd		$0x93,%xmm2,%xmm2 | ||||
| 	addq		$0x10,%rcx | ||||
| 	cmpq		%r8,%rcx | ||||
| 	jnz		.Lroundloop | ||||
| 	pxor		%xmm2,%xmm0 | ||||
| 	pxor		%xmm3,%xmm1 | ||||
| 	pxor		%xmm10,%xmm0 | ||||
| 	pxor		%xmm11,%xmm1 | ||||
| 	addq		$0x40,%rsi | ||||
| 	decq		%rdx | ||||
| 	jnz		.Lbeginofloop | ||||
| 	movdqu		%xmm0,(%rdi) | ||||
| 	movdqu		%xmm1,0x10(%rdi) | ||||
| 	movdqu		%xmm14,0x20(%rdi) | ||||
| .Lendofloop: | ||||
| 	ret | ||||
| ENDPROC(blake2s_compress_ssse3) | ||||
| #endif /* CONFIG_AS_SSSE3 */ | ||||
| 
 | ||||
| #ifdef CONFIG_AS_AVX512 | ||||
| ENTRY(blake2s_compress_avx512) | ||||
| 	vmovdqu		(%rdi),%xmm0 | ||||
| 	vmovdqu		0x10(%rdi),%xmm1 | ||||
| 	vmovdqu		0x20(%rdi),%xmm4 | ||||
| 	vmovq		%rcx,%xmm5 | ||||
| 	vmovdqa		IV(%rip),%xmm14 | ||||
| 	vmovdqa		IV+16(%rip),%xmm15 | ||||
| 	jmp		.Lblake2s_compress_avx512_mainloop | ||||
| .align 32
 | ||||
| .Lblake2s_compress_avx512_mainloop: | ||||
| 	vmovdqa		%xmm0,%xmm10 | ||||
| 	vmovdqa		%xmm1,%xmm11 | ||||
| 	vpaddq		%xmm5,%xmm4,%xmm4 | ||||
| 	vmovdqa		%xmm14,%xmm2 | ||||
| 	vpxor		%xmm15,%xmm4,%xmm3 | ||||
| 	vmovdqu		(%rsi),%ymm6 | ||||
| 	vmovdqu		0x20(%rsi),%ymm7 | ||||
| 	addq		$0x40,%rsi | ||||
| 	leaq		SIGMA2(%rip),%rax | ||||
| 	movb		$0xa,%cl | ||||
| .Lblake2s_compress_avx512_roundloop: | ||||
| 	addq		$0x40,%rax | ||||
| 	vmovdqa		-0x40(%rax),%ymm8 | ||||
| 	vmovdqa		-0x20(%rax),%ymm9 | ||||
| 	vpermi2d	%ymm7,%ymm6,%ymm8 | ||||
| 	vpermi2d	%ymm7,%ymm6,%ymm9 | ||||
| 	vmovdqa		%ymm8,%ymm6 | ||||
| 	vmovdqa		%ymm9,%ymm7 | ||||
| 	vpaddd		%xmm8,%xmm0,%xmm0 | ||||
| 	vpaddd		%xmm1,%xmm0,%xmm0 | ||||
| 	vpxor		%xmm0,%xmm3,%xmm3 | ||||
| 	vprord		$0x10,%xmm3,%xmm3 | ||||
| 	vpaddd		%xmm3,%xmm2,%xmm2 | ||||
| 	vpxor		%xmm2,%xmm1,%xmm1 | ||||
| 	vprord		$0xc,%xmm1,%xmm1 | ||||
| 	vextracti128	$0x1,%ymm8,%xmm8 | ||||
| 	vpaddd		%xmm8,%xmm0,%xmm0 | ||||
| 	vpaddd		%xmm1,%xmm0,%xmm0 | ||||
| 	vpxor		%xmm0,%xmm3,%xmm3 | ||||
| 	vprord		$0x8,%xmm3,%xmm3 | ||||
| 	vpaddd		%xmm3,%xmm2,%xmm2 | ||||
| 	vpxor		%xmm2,%xmm1,%xmm1 | ||||
| 	vprord		$0x7,%xmm1,%xmm1 | ||||
| 	vpshufd		$0x93,%xmm0,%xmm0 | ||||
| 	vpshufd		$0x4e,%xmm3,%xmm3 | ||||
| 	vpshufd		$0x39,%xmm2,%xmm2 | ||||
| 	vpaddd		%xmm9,%xmm0,%xmm0 | ||||
| 	vpaddd		%xmm1,%xmm0,%xmm0 | ||||
| 	vpxor		%xmm0,%xmm3,%xmm3 | ||||
| 	vprord		$0x10,%xmm3,%xmm3 | ||||
| 	vpaddd		%xmm3,%xmm2,%xmm2 | ||||
| 	vpxor		%xmm2,%xmm1,%xmm1 | ||||
| 	vprord		$0xc,%xmm1,%xmm1 | ||||
| 	vextracti128	$0x1,%ymm9,%xmm9 | ||||
| 	vpaddd		%xmm9,%xmm0,%xmm0 | ||||
| 	vpaddd		%xmm1,%xmm0,%xmm0 | ||||
| 	vpxor		%xmm0,%xmm3,%xmm3 | ||||
| 	vprord		$0x8,%xmm3,%xmm3 | ||||
| 	vpaddd		%xmm3,%xmm2,%xmm2 | ||||
| 	vpxor		%xmm2,%xmm1,%xmm1 | ||||
| 	vprord		$0x7,%xmm1,%xmm1 | ||||
| 	vpshufd		$0x39,%xmm0,%xmm0 | ||||
| 	vpshufd		$0x4e,%xmm3,%xmm3 | ||||
| 	vpshufd		$0x93,%xmm2,%xmm2 | ||||
| 	decb		%cl | ||||
| 	jne		.Lblake2s_compress_avx512_roundloop | ||||
| 	vpxor		%xmm10,%xmm0,%xmm0 | ||||
| 	vpxor		%xmm11,%xmm1,%xmm1 | ||||
| 	vpxor		%xmm2,%xmm0,%xmm0 | ||||
| 	vpxor		%xmm3,%xmm1,%xmm1 | ||||
| 	decq		%rdx | ||||
| 	jne		.Lblake2s_compress_avx512_mainloop | ||||
| 	vmovdqu		%xmm0,(%rdi) | ||||
| 	vmovdqu		%xmm1,0x10(%rdi) | ||||
| 	vmovdqu		%xmm4,0x20(%rdi) | ||||
| 	vzeroupper | ||||
| 	retq | ||||
| ENDPROC(blake2s_compress_avx512) | ||||
| #endif /* CONFIG_AS_AVX512 */ | ||||
							
								
								
									
										233
									
								
								arch/x86/crypto/blake2s-glue.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										233
									
								
								arch/x86/crypto/blake2s-glue.c
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,233 @@ | |||
| // SPDX-License-Identifier: GPL-2.0 OR MIT
 | ||||
| /*
 | ||||
|  * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. | ||||
|  */ | ||||
| 
 | ||||
| #include <crypto/internal/blake2s.h> | ||||
| #include <crypto/internal/simd.h> | ||||
| #include <crypto/internal/hash.h> | ||||
| 
 | ||||
| #include <linux/types.h> | ||||
| #include <linux/jump_label.h> | ||||
| #include <linux/kernel.h> | ||||
| #include <linux/module.h> | ||||
| 
 | ||||
| #include <asm/cpufeature.h> | ||||
| #include <asm/fpu/api.h> | ||||
| #include <asm/processor.h> | ||||
| #include <asm/simd.h> | ||||
| 
 | ||||
| asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state, | ||||
| 				       const u8 *block, const size_t nblocks, | ||||
| 				       const u32 inc); | ||||
| asmlinkage void blake2s_compress_avx512(struct blake2s_state *state, | ||||
| 					const u8 *block, const size_t nblocks, | ||||
| 					const u32 inc); | ||||
| 
 | ||||
| static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3); | ||||
| static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512); | ||||
| 
 | ||||
| void blake2s_compress_arch(struct blake2s_state *state, | ||||
| 			   const u8 *block, size_t nblocks, | ||||
| 			   const u32 inc) | ||||
| { | ||||
| 	/* SIMD disables preemption, so relax after processing each page. */ | ||||
| 	BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8); | ||||
| 
 | ||||
| 	if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) { | ||||
| 		blake2s_compress_generic(state, block, nblocks, inc); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	for (;;) { | ||||
| 		const size_t blocks = min_t(size_t, nblocks, | ||||
| 					    PAGE_SIZE / BLAKE2S_BLOCK_SIZE); | ||||
| 
 | ||||
| 		kernel_fpu_begin(); | ||||
| 		if (IS_ENABLED(CONFIG_AS_AVX512) && | ||||
| 		    static_branch_likely(&blake2s_use_avx512)) | ||||
| 			blake2s_compress_avx512(state, block, blocks, inc); | ||||
| 		else | ||||
| 			blake2s_compress_ssse3(state, block, blocks, inc); | ||||
| 		kernel_fpu_end(); | ||||
| 
 | ||||
| 		nblocks -= blocks; | ||||
| 		if (!nblocks) | ||||
| 			break; | ||||
| 		block += blocks * BLAKE2S_BLOCK_SIZE; | ||||
| 	} | ||||
| } | ||||
| EXPORT_SYMBOL(blake2s_compress_arch); | ||||
| 
 | ||||
| static int crypto_blake2s_setkey(struct crypto_shash *tfm, const u8 *key, | ||||
| 				 unsigned int keylen) | ||||
| { | ||||
| 	struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm); | ||||
| 
 | ||||
| 	if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE) { | ||||
| 		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); | ||||
| 		return -EINVAL; | ||||
| 	} | ||||
| 
 | ||||
| 	memcpy(tctx->key, key, keylen); | ||||
| 	tctx->keylen = keylen; | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static int crypto_blake2s_init(struct shash_desc *desc) | ||||
| { | ||||
| 	struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); | ||||
| 	struct blake2s_state *state = shash_desc_ctx(desc); | ||||
| 	const int outlen = crypto_shash_digestsize(desc->tfm); | ||||
| 
 | ||||
| 	if (tctx->keylen) | ||||
| 		blake2s_init_key(state, outlen, tctx->key, tctx->keylen); | ||||
| 	else | ||||
| 		blake2s_init(state, outlen); | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static int crypto_blake2s_update(struct shash_desc *desc, const u8 *in, | ||||
| 				 unsigned int inlen) | ||||
| { | ||||
| 	struct blake2s_state *state = shash_desc_ctx(desc); | ||||
| 	const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; | ||||
| 
 | ||||
| 	if (unlikely(!inlen)) | ||||
| 		return 0; | ||||
| 	if (inlen > fill) { | ||||
| 		memcpy(state->buf + state->buflen, in, fill); | ||||
| 		blake2s_compress_arch(state, state->buf, 1, BLAKE2S_BLOCK_SIZE); | ||||
| 		state->buflen = 0; | ||||
| 		in += fill; | ||||
| 		inlen -= fill; | ||||
| 	} | ||||
| 	if (inlen > BLAKE2S_BLOCK_SIZE) { | ||||
| 		const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); | ||||
| 		/* Hash one less (full) block than strictly possible */ | ||||
| 		blake2s_compress_arch(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE); | ||||
| 		in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); | ||||
| 		inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); | ||||
| 	} | ||||
| 	memcpy(state->buf + state->buflen, in, inlen); | ||||
| 	state->buflen += inlen; | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static int crypto_blake2s_final(struct shash_desc *desc, u8 *out) | ||||
| { | ||||
| 	struct blake2s_state *state = shash_desc_ctx(desc); | ||||
| 
 | ||||
| 	blake2s_set_lastblock(state); | ||||
| 	memset(state->buf + state->buflen, 0, | ||||
| 	       BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ | ||||
| 	blake2s_compress_arch(state, state->buf, 1, state->buflen); | ||||
| 	cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); | ||||
| 	memcpy(out, state->h, state->outlen); | ||||
| 	memzero_explicit(state, sizeof(*state)); | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static struct shash_alg blake2s_algs[] = {{ | ||||
| 	.base.cra_name		= "blake2s-128", | ||||
| 	.base.cra_driver_name	= "blake2s-128-x86", | ||||
| 	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY, | ||||
| 	.base.cra_ctxsize	= sizeof(struct blake2s_tfm_ctx), | ||||
| 	.base.cra_priority	= 200, | ||||
| 	.base.cra_blocksize     = BLAKE2S_BLOCK_SIZE, | ||||
| 	.base.cra_module	= THIS_MODULE, | ||||
| 
 | ||||
| 	.digestsize		= BLAKE2S_128_HASH_SIZE, | ||||
| 	.setkey			= crypto_blake2s_setkey, | ||||
| 	.init			= crypto_blake2s_init, | ||||
| 	.update			= crypto_blake2s_update, | ||||
| 	.final			= crypto_blake2s_final, | ||||
| 	.descsize		= sizeof(struct blake2s_state), | ||||
| }, { | ||||
| 	.base.cra_name		= "blake2s-160", | ||||
| 	.base.cra_driver_name	= "blake2s-160-x86", | ||||
| 	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY, | ||||
| 	.base.cra_ctxsize	= sizeof(struct blake2s_tfm_ctx), | ||||
| 	.base.cra_priority	= 200, | ||||
| 	.base.cra_blocksize     = BLAKE2S_BLOCK_SIZE, | ||||
| 	.base.cra_module	= THIS_MODULE, | ||||
| 
 | ||||
| 	.digestsize		= BLAKE2S_160_HASH_SIZE, | ||||
| 	.setkey			= crypto_blake2s_setkey, | ||||
| 	.init			= crypto_blake2s_init, | ||||
| 	.update			= crypto_blake2s_update, | ||||
| 	.final			= crypto_blake2s_final, | ||||
| 	.descsize		= sizeof(struct blake2s_state), | ||||
| }, { | ||||
| 	.base.cra_name		= "blake2s-224", | ||||
| 	.base.cra_driver_name	= "blake2s-224-x86", | ||||
| 	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY, | ||||
| 	.base.cra_ctxsize	= sizeof(struct blake2s_tfm_ctx), | ||||
| 	.base.cra_priority	= 200, | ||||
| 	.base.cra_blocksize     = BLAKE2S_BLOCK_SIZE, | ||||
| 	.base.cra_module	= THIS_MODULE, | ||||
| 
 | ||||
| 	.digestsize		= BLAKE2S_224_HASH_SIZE, | ||||
| 	.setkey			= crypto_blake2s_setkey, | ||||
| 	.init			= crypto_blake2s_init, | ||||
| 	.update			= crypto_blake2s_update, | ||||
| 	.final			= crypto_blake2s_final, | ||||
| 	.descsize		= sizeof(struct blake2s_state), | ||||
| }, { | ||||
| 	.base.cra_name		= "blake2s-256", | ||||
| 	.base.cra_driver_name	= "blake2s-256-x86", | ||||
| 	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY, | ||||
| 	.base.cra_ctxsize	= sizeof(struct blake2s_tfm_ctx), | ||||
| 	.base.cra_priority	= 200, | ||||
| 	.base.cra_blocksize     = BLAKE2S_BLOCK_SIZE, | ||||
| 	.base.cra_module	= THIS_MODULE, | ||||
| 
 | ||||
| 	.digestsize		= BLAKE2S_256_HASH_SIZE, | ||||
| 	.setkey			= crypto_blake2s_setkey, | ||||
| 	.init			= crypto_blake2s_init, | ||||
| 	.update			= crypto_blake2s_update, | ||||
| 	.final			= crypto_blake2s_final, | ||||
| 	.descsize		= sizeof(struct blake2s_state), | ||||
| }}; | ||||
| 
 | ||||
| static int __init blake2s_mod_init(void) | ||||
| { | ||||
| 	if (!boot_cpu_has(X86_FEATURE_SSSE3)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	static_branch_enable(&blake2s_use_ssse3); | ||||
| 
 | ||||
| 	if (IS_ENABLED(CONFIG_AS_AVX512) && | ||||
| 	    boot_cpu_has(X86_FEATURE_AVX) && | ||||
| 	    boot_cpu_has(X86_FEATURE_AVX2) && | ||||
| 	    boot_cpu_has(X86_FEATURE_AVX512F) && | ||||
| 	    boot_cpu_has(X86_FEATURE_AVX512VL) && | ||||
| 	    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | | ||||
| 			      XFEATURE_MASK_AVX512, NULL)) | ||||
| 		static_branch_enable(&blake2s_use_avx512); | ||||
| 
 | ||||
| 	return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); | ||||
| } | ||||
| 
 | ||||
| static void __exit blake2s_mod_exit(void) | ||||
| { | ||||
| 	if (boot_cpu_has(X86_FEATURE_SSSE3)) | ||||
| 		crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); | ||||
| } | ||||
| 
 | ||||
| module_init(blake2s_mod_init); | ||||
| module_exit(blake2s_mod_exit); | ||||
| 
 | ||||
| MODULE_ALIAS_CRYPTO("blake2s-128"); | ||||
| MODULE_ALIAS_CRYPTO("blake2s-128-x86"); | ||||
| MODULE_ALIAS_CRYPTO("blake2s-160"); | ||||
| MODULE_ALIAS_CRYPTO("blake2s-160-x86"); | ||||
| MODULE_ALIAS_CRYPTO("blake2s-224"); | ||||
| MODULE_ALIAS_CRYPTO("blake2s-224-x86"); | ||||
| MODULE_ALIAS_CRYPTO("blake2s-256"); | ||||
| MODULE_ALIAS_CRYPTO("blake2s-256-x86"); | ||||
| MODULE_LICENSE("GPL v2"); | ||||
|  | @ -674,6 +674,12 @@ config CRYPTO_BLAKE2S | |||
| 
 | ||||
| 	  See https://blake2.net for further information. | ||||
| 
 | ||||
| config CRYPTO_BLAKE2S_X86 | ||||
| 	tristate "BLAKE2s digest algorithm (x86 accelerated version)" | ||||
| 	depends on X86 && 64BIT | ||||
| 	select CRYPTO_LIB_BLAKE2S_GENERIC | ||||
| 	select CRYPTO_ARCH_HAVE_LIB_BLAKE2S | ||||
| 
 | ||||
| config CRYPTO_CRCT10DIF | ||||
| 	tristate "CRCT10DIF algorithm" | ||||
| 	select CRYPTO_HASH | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Jason A. Donenfeld
						Jason A. Donenfeld