2019-05-27 08:55:01 +02:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
2009-01-18 16:28:34 +11:00
|
|
|
/*
|
|
|
|
* Implement AES algorithm in Intel AES-NI instructions.
|
|
|
|
*
|
|
|
|
* The white paper of AES-NI instructions can be downloaded from:
|
|
|
|
* http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
|
|
|
|
*
|
|
|
|
* Copyright (C) 2008, Intel Corp.
|
|
|
|
* Author: Huang Ying <ying.huang@intel.com>
|
|
|
|
* Vinodh Gopal <vinodh.gopal@intel.com>
|
|
|
|
* Kahraman Akdemir
|
|
|
|
*
|
crypto: x86/aes-gcm - rewrite the AES-NI optimized AES-GCM
Rewrite the AES-NI implementations of AES-GCM, taking advantage of
things I learned while writing the VAES-AVX10 implementations. This is
a complete rewrite that reduces the AES-NI GCM source code size by about
70% and the binary code size by about 95%, while not regressing
performance and in fact improving it significantly in many cases.
The following summarizes the state before this patch:
- The aesni-intel module registered algorithms "generic-gcm-aesni" and
"rfc4106-gcm-aesni" with the crypto API that actually delegated to one
of three underlying implementations according to the CPU capabilities
detected at runtime: AES-NI, AES-NI + AVX, or AES-NI + AVX2.
- The AES-NI + AVX and AES-NI + AVX2 assembly code was in
aesni-intel_avx-x86_64.S and consisted of 2804 lines of source and
257 KB of binary. This massive binary size was not really
appropriate, and depending on the kconfig it could take up over 1% the
size of the entire vmlinux. The main loops did 8 blocks per
iteration. The AVX code minimized the use of carryless multiplication
whereas the AVX2 code did not. The "AVX2" code did not actually use
AVX2; the check for AVX2 was really a check for Intel Haswell or later
to detect support for fast carryless multiplication. The long source
length was caused by factors such as significant code duplication.
- The AES-NI only assembly code was in aesni-intel_asm.S and consisted
of 1501 lines of source and 15 KB of binary. The main loops did 4
blocks per iteration and minimized the use of carryless multiplication
by using Karatsuba multiplication and a multiplication-less reduction.
- The assembly code was contributed in 2010-2013. Maintenance has been
sporadic and most design choices haven't been revisited.
- The assembly function prototypes and the corresponding glue code were
separate from and were not consistent with the new VAES-AVX10 code I
recently added. The older code had several issues such as not
precomputing the GHASH key powers, which hurt performance.
This rewrite achieves the following goals:
- Much shorter source and binary sizes. The assembly source shrinks
from 4300 lines to 1130 lines, and it produces about 9 KB of binary
instead of 272 KB. This is achieved via a better designed AES-GCM
implementation that doesn't excessively unroll the code and instead
prioritizes the parts that really matter. Sharing the C glue code
with the VAES-AVX10 implementations also saves 250 lines of C source.
- Improve performance on most (possibly all) CPUs on which this code
runs, for most (possibly all) message lengths. Benchmark results are
given in Tables 1 and 2 below.
- Use the same function prototypes and glue code as the new VAES-AVX10
algorithms. This fixes some issues with the integration of the
assembly and results in some significant performance improvements,
primarily on short messages. Also, the AVX and non-AVX
implementations are now registered as separate algorithms with the
crypto API, which makes them both testable by the self-tests.
- Keep support for AES-NI without AVX (for Westmere, Silvermont,
Goldmont, and Tremont), but unify the source code with AES-NI + AVX.
Since 256-bit vectors cannot be used without VAES anyway, this is made
feasible by just using the non-VEX coded form of most instructions.
- Use a unified approach where the main loop does 8 blocks per iteration
and uses Karatsuba multiplication to save one pclmulqdq per block but
does not use the multiplication-less reduction. This strikes a good
balance across the range of CPUs on which this code runs.
- Don't spam the kernel log with an informational message on every boot.
The following tables summarize the improvement in AES-GCM throughput on
various CPU microarchitectures as a result of this patch:
Table 1: AES-256-GCM encryption throughput improvement,
CPU microarchitecture vs. message length in bytes:
| 16384 | 4096 | 4095 | 1420 | 512 | 500 |
-------------------+-------+-------+-------+-------+-------+-------+
Intel Broadwell | 2% | 8% | 11% | 18% | 31% | 26% |
Intel Skylake | 1% | 4% | 7% | 12% | 26% | 19% |
Intel Cascade Lake | 3% | 8% | 10% | 18% | 33% | 24% |
AMD Zen 1 | 6% | 12% | 6% | 15% | 27% | 24% |
AMD Zen 2 | 8% | 13% | 13% | 19% | 26% | 28% |
AMD Zen 3 | 8% | 14% | 13% | 19% | 26% | 25% |
| 300 | 200 | 64 | 63 | 16 |
-------------------+-------+-------+-------+-------+-------+
Intel Broadwell | 35% | 29% | 45% | 55% | 54% |
Intel Skylake | 25% | 19% | 28% | 33% | 27% |
Intel Cascade Lake | 36% | 28% | 39% | 49% | 54% |
AMD Zen 1 | 27% | 22% | 23% | 29% | 26% |
AMD Zen 2 | 32% | 24% | 22% | 25% | 31% |
AMD Zen 3 | 30% | 24% | 22% | 23% | 26% |
Table 2: AES-256-GCM decryption throughput improvement,
CPU microarchitecture vs. message length in bytes:
| 16384 | 4096 | 4095 | 1420 | 512 | 500 |
-------------------+-------+-------+-------+-------+-------+-------+
Intel Broadwell | 3% | 8% | 11% | 19% | 32% | 28% |
Intel Skylake | 3% | 4% | 7% | 13% | 28% | 27% |
Intel Cascade Lake | 3% | 9% | 11% | 19% | 33% | 28% |
AMD Zen 1 | 15% | 18% | 14% | 20% | 36% | 33% |
AMD Zen 2 | 9% | 16% | 13% | 21% | 26% | 27% |
AMD Zen 3 | 8% | 15% | 12% | 18% | 23% | 23% |
| 300 | 200 | 64 | 63 | 16 |
-------------------+-------+-------+-------+-------+-------+
Intel Broadwell | 36% | 31% | 40% | 51% | 53% |
Intel Skylake | 28% | 21% | 23% | 30% | 30% |
Intel Cascade Lake | 36% | 29% | 36% | 47% | 53% |
AMD Zen 1 | 35% | 31% | 32% | 35% | 36% |
AMD Zen 2 | 31% | 30% | 27% | 38% | 30% |
AMD Zen 3 | 27% | 23% | 24% | 32% | 26% |
The above numbers are percentage improvements in single-thread
throughput, so e.g. an increase from 3000 MB/s to 3300 MB/s would be
listed as 10%. They were collected by directly measuring the Linux
crypto API performance using a custom kernel module. Note that indirect
benchmarks (e.g. 'cryptsetup benchmark' or benchmarking dm-crypt I/O)
include more overhead and won't see quite as much of a difference. All
these benchmarks used an associated data length of 16 bytes. Note that
AES-GCM is almost always used with short associated data lengths.
I didn't test Intel CPUs before Broadwell, AMD CPUs before Zen 1, or
Intel low-power CPUs, as these weren't readily available to me.
However, based on the design of the new code and the available
information about these other CPU microarchitectures, I wouldn't expect
any significant regressions, and there's a good chance performance is
improved just as it is above.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2024-06-02 15:22:20 -07:00
|
|
|
* Copyright (c) 2010, Intel Corporation.
|
2010-11-04 15:00:45 -04:00
|
|
|
*
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
* Ported x86_64 version to x86:
|
|
|
|
* Author: Mathias Krause <minipli@googlemail.com>
|
2009-01-18 16:28:34 +11:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/linkage.h>
|
2025-02-07 13:15:33 +01:00
|
|
|
#include <linux/objtool.h>
|
2016-01-21 16:49:19 -06:00
|
|
|
#include <asm/frame.h>
|
2010-11-04 15:00:45 -04:00
|
|
|
|
2009-01-18 16:28:34 +11:00
|
|
|
#define STATE1 %xmm0
|
|
|
|
#define STATE2 %xmm4
|
|
|
|
#define STATE3 %xmm5
|
|
|
|
#define STATE4 %xmm6
|
|
|
|
#define STATE STATE1
|
|
|
|
#define IN1 %xmm1
|
|
|
|
#define IN2 %xmm7
|
|
|
|
#define IN3 %xmm8
|
|
|
|
#define IN4 %xmm9
|
|
|
|
#define IN IN1
|
|
|
|
#define KEY %xmm2
|
|
|
|
#define IV %xmm3
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
|
2010-03-10 18:28:55 +08:00
|
|
|
#define BSWAP_MASK %xmm10
|
|
|
|
#define CTR %xmm11
|
|
|
|
#define INC %xmm12
|
2009-01-18 16:28:34 +11:00
|
|
|
|
2020-12-31 17:41:55 +01:00
|
|
|
#define GF128MUL_MASK %xmm7
|
2013-04-08 21:51:16 +03:00
|
|
|
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#ifdef __x86_64__
|
|
|
|
#define AREG %rax
|
2009-01-18 16:28:34 +11:00
|
|
|
#define KEYP %rdi
|
|
|
|
#define OUTP %rsi
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#define UKEYP OUTP
|
2009-01-18 16:28:34 +11:00
|
|
|
#define INP %rdx
|
|
|
|
#define LEN %rcx
|
|
|
|
#define IVP %r8
|
|
|
|
#define KLEN %r9d
|
|
|
|
#define T1 %r10
|
|
|
|
#define TKEYP T1
|
|
|
|
#define T2 %r11
|
2010-03-10 18:28:55 +08:00
|
|
|
#define TCTR_LOW T2
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#else
|
|
|
|
#define AREG %eax
|
|
|
|
#define KEYP %edi
|
|
|
|
#define OUTP AREG
|
|
|
|
#define UKEYP OUTP
|
|
|
|
#define INP %edx
|
|
|
|
#define LEN %esi
|
|
|
|
#define IVP %ebp
|
|
|
|
#define KLEN %ebx
|
|
|
|
#define T1 %ecx
|
|
|
|
#define TKEYP T1
|
|
|
|
#endif
|
2009-01-18 16:28:34 +11:00
|
|
|
|
2019-10-11 13:50:46 +02:00
|
|
|
SYM_FUNC_START_LOCAL(_key_expansion_256a)
|
2009-01-18 16:28:34 +11:00
|
|
|
pshufd $0b11111111, %xmm1, %xmm1
|
|
|
|
shufps $0b00010000, %xmm0, %xmm4
|
|
|
|
pxor %xmm4, %xmm0
|
|
|
|
shufps $0b10001100, %xmm0, %xmm4
|
|
|
|
pxor %xmm4, %xmm0
|
|
|
|
pxor %xmm1, %xmm0
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
movaps %xmm0, (TKEYP)
|
|
|
|
add $0x10, TKEYP
|
2021-12-04 14:43:40 +01:00
|
|
|
RET
|
2019-10-11 13:50:46 +02:00
|
|
|
SYM_FUNC_END(_key_expansion_256a)
|
2022-02-16 16:22:28 +00:00
|
|
|
SYM_FUNC_ALIAS_LOCAL(_key_expansion_128, _key_expansion_256a)
|
2009-01-18 16:28:34 +11:00
|
|
|
|
2019-10-11 13:50:46 +02:00
|
|
|
SYM_FUNC_START_LOCAL(_key_expansion_192a)
|
2009-01-18 16:28:34 +11:00
|
|
|
pshufd $0b01010101, %xmm1, %xmm1
|
|
|
|
shufps $0b00010000, %xmm0, %xmm4
|
|
|
|
pxor %xmm4, %xmm0
|
|
|
|
shufps $0b10001100, %xmm0, %xmm4
|
|
|
|
pxor %xmm4, %xmm0
|
|
|
|
pxor %xmm1, %xmm0
|
|
|
|
|
|
|
|
movaps %xmm2, %xmm5
|
|
|
|
movaps %xmm2, %xmm6
|
|
|
|
pslldq $4, %xmm5
|
|
|
|
pshufd $0b11111111, %xmm0, %xmm3
|
|
|
|
pxor %xmm3, %xmm2
|
|
|
|
pxor %xmm5, %xmm2
|
|
|
|
|
|
|
|
movaps %xmm0, %xmm1
|
|
|
|
shufps $0b01000100, %xmm0, %xmm6
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
movaps %xmm6, (TKEYP)
|
2009-01-18 16:28:34 +11:00
|
|
|
shufps $0b01001110, %xmm2, %xmm1
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
movaps %xmm1, 0x10(TKEYP)
|
|
|
|
add $0x20, TKEYP
|
2021-12-04 14:43:40 +01:00
|
|
|
RET
|
2019-10-11 13:50:46 +02:00
|
|
|
SYM_FUNC_END(_key_expansion_192a)
|
2009-01-18 16:28:34 +11:00
|
|
|
|
2019-10-11 13:50:46 +02:00
|
|
|
SYM_FUNC_START_LOCAL(_key_expansion_192b)
|
2009-01-18 16:28:34 +11:00
|
|
|
pshufd $0b01010101, %xmm1, %xmm1
|
|
|
|
shufps $0b00010000, %xmm0, %xmm4
|
|
|
|
pxor %xmm4, %xmm0
|
|
|
|
shufps $0b10001100, %xmm0, %xmm4
|
|
|
|
pxor %xmm4, %xmm0
|
|
|
|
pxor %xmm1, %xmm0
|
|
|
|
|
|
|
|
movaps %xmm2, %xmm5
|
|
|
|
pslldq $4, %xmm5
|
|
|
|
pshufd $0b11111111, %xmm0, %xmm3
|
|
|
|
pxor %xmm3, %xmm2
|
|
|
|
pxor %xmm5, %xmm2
|
|
|
|
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
movaps %xmm0, (TKEYP)
|
|
|
|
add $0x10, TKEYP
|
2021-12-04 14:43:40 +01:00
|
|
|
RET
|
2019-10-11 13:50:46 +02:00
|
|
|
SYM_FUNC_END(_key_expansion_192b)
|
2009-01-18 16:28:34 +11:00
|
|
|
|
2019-10-11 13:50:46 +02:00
|
|
|
SYM_FUNC_START_LOCAL(_key_expansion_256b)
|
2009-01-18 16:28:34 +11:00
|
|
|
pshufd $0b10101010, %xmm1, %xmm1
|
|
|
|
shufps $0b00010000, %xmm2, %xmm4
|
|
|
|
pxor %xmm4, %xmm2
|
|
|
|
shufps $0b10001100, %xmm2, %xmm4
|
|
|
|
pxor %xmm4, %xmm2
|
|
|
|
pxor %xmm1, %xmm2
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
movaps %xmm2, (TKEYP)
|
|
|
|
add $0x10, TKEYP
|
2021-12-04 14:43:40 +01:00
|
|
|
RET
|
2019-10-11 13:50:46 +02:00
|
|
|
SYM_FUNC_END(_key_expansion_256b)
|
2009-01-18 16:28:34 +11:00
|
|
|
|
|
|
|
/*
|
2024-03-22 16:04:59 -07:00
|
|
|
* void aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
|
|
|
|
* unsigned int key_len)
|
2009-01-18 16:28:34 +11:00
|
|
|
*/
|
2019-10-11 13:51:04 +02:00
|
|
|
SYM_FUNC_START(aesni_set_key)
|
2016-01-21 16:49:19 -06:00
|
|
|
FRAME_BEGIN
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#ifndef __x86_64__
|
|
|
|
pushl KEYP
|
2016-01-21 16:49:19 -06:00
|
|
|
movl (FRAME_OFFSET+8)(%esp), KEYP # ctx
|
|
|
|
movl (FRAME_OFFSET+12)(%esp), UKEYP # in_key
|
|
|
|
movl (FRAME_OFFSET+16)(%esp), %edx # key_len
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#endif
|
|
|
|
movups (UKEYP), %xmm0 # user key (first 16 bytes)
|
|
|
|
movaps %xmm0, (KEYP)
|
|
|
|
lea 0x10(KEYP), TKEYP # key addr
|
|
|
|
movl %edx, 480(KEYP)
|
2009-01-18 16:28:34 +11:00
|
|
|
pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x
|
|
|
|
cmp $24, %dl
|
|
|
|
jb .Lenc_key128
|
|
|
|
je .Lenc_key192
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
movups 0x10(UKEYP), %xmm2 # other user key
|
|
|
|
movaps %xmm2, (TKEYP)
|
|
|
|
add $0x10, TKEYP
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_256a
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x1, %xmm0, %xmm1
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_256b
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_256a
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x2, %xmm0, %xmm1
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_256b
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_256a
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x4, %xmm0, %xmm1
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_256b
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_256a
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x8, %xmm0, %xmm1
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_256b
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_256a
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x10, %xmm0, %xmm1
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_256b
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_256a
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x20, %xmm0, %xmm1
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_256b
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_256a
|
|
|
|
jmp .Ldec_key
|
|
|
|
.Lenc_key192:
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
movq 0x10(UKEYP), %xmm2 # other user key
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_192a
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_192b
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_192a
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_192b
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_192a
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_192b
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_192a
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x80, %xmm2, %xmm1 # round 8
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_192b
|
|
|
|
jmp .Ldec_key
|
|
|
|
.Lenc_key128:
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x1, %xmm0, %xmm1 # round 1
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_128
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x2, %xmm0, %xmm1 # round 2
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_128
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x4, %xmm0, %xmm1 # round 3
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_128
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x8, %xmm0, %xmm1 # round 4
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_128
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x10, %xmm0, %xmm1 # round 5
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_128
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x20, %xmm0, %xmm1 # round 6
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_128
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x40, %xmm0, %xmm1 # round 7
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_128
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x80, %xmm0, %xmm1 # round 8
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_128
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_128
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aeskeygenassist $0x36, %xmm0, %xmm1 # round 10
|
2009-01-18 16:28:34 +11:00
|
|
|
call _key_expansion_128
|
|
|
|
.Ldec_key:
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
sub $0x10, TKEYP
|
|
|
|
movaps (KEYP), %xmm0
|
|
|
|
movaps (TKEYP), %xmm1
|
|
|
|
movaps %xmm0, 240(TKEYP)
|
|
|
|
movaps %xmm1, 240(KEYP)
|
|
|
|
add $0x10, KEYP
|
|
|
|
lea 240-16(TKEYP), UKEYP
|
2009-01-18 16:28:34 +11:00
|
|
|
.align 4
|
|
|
|
.Ldec_key_loop:
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
movaps (KEYP), %xmm0
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesimc %xmm0, %xmm1
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
movaps %xmm1, (UKEYP)
|
|
|
|
add $0x10, KEYP
|
|
|
|
sub $0x10, UKEYP
|
|
|
|
cmp TKEYP, KEYP
|
2009-01-18 16:28:34 +11:00
|
|
|
jb .Ldec_key_loop
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#ifndef __x86_64__
|
|
|
|
popl KEYP
|
|
|
|
#endif
|
2016-01-21 16:49:19 -06:00
|
|
|
FRAME_END
|
2021-12-04 14:43:40 +01:00
|
|
|
RET
|
2019-10-11 13:51:04 +02:00
|
|
|
SYM_FUNC_END(aesni_set_key)
|
2009-01-18 16:28:34 +11:00
|
|
|
|
|
|
|
/*
|
2019-11-26 22:08:02 -08:00
|
|
|
* void aesni_enc(const void *ctx, u8 *dst, const u8 *src)
|
2009-01-18 16:28:34 +11:00
|
|
|
*/
|
2019-10-11 13:51:04 +02:00
|
|
|
SYM_FUNC_START(aesni_enc)
|
2016-01-21 16:49:19 -06:00
|
|
|
FRAME_BEGIN
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#ifndef __x86_64__
|
|
|
|
pushl KEYP
|
|
|
|
pushl KLEN
|
2016-01-21 16:49:19 -06:00
|
|
|
movl (FRAME_OFFSET+12)(%esp), KEYP # ctx
|
|
|
|
movl (FRAME_OFFSET+16)(%esp), OUTP # dst
|
|
|
|
movl (FRAME_OFFSET+20)(%esp), INP # src
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#endif
|
2009-01-18 16:28:34 +11:00
|
|
|
movl 480(KEYP), KLEN # key length
|
|
|
|
movups (INP), STATE # input
|
|
|
|
call _aesni_enc1
|
|
|
|
movups STATE, (OUTP) # output
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#ifndef __x86_64__
|
|
|
|
popl KLEN
|
|
|
|
popl KEYP
|
|
|
|
#endif
|
2016-01-21 16:49:19 -06:00
|
|
|
FRAME_END
|
2021-12-04 14:43:40 +01:00
|
|
|
RET
|
2019-10-11 13:51:04 +02:00
|
|
|
SYM_FUNC_END(aesni_enc)
|
2009-01-18 16:28:34 +11:00
|
|
|
|
|
|
|
/*
|
|
|
|
* _aesni_enc1: internal ABI
|
|
|
|
* input:
|
|
|
|
* KEYP: key struct pointer
|
|
|
|
* KLEN: round count
|
|
|
|
* STATE: initial state (input)
|
|
|
|
* output:
|
|
|
|
* STATE: finial state (output)
|
|
|
|
* changed:
|
|
|
|
* KEY
|
|
|
|
* TKEYP (T1)
|
|
|
|
*/
|
2019-10-11 13:50:46 +02:00
|
|
|
SYM_FUNC_START_LOCAL(_aesni_enc1)
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps (KEYP), KEY # key
|
|
|
|
mov KEYP, TKEYP
|
|
|
|
pxor KEY, STATE # round 0
|
|
|
|
add $0x30, TKEYP
|
|
|
|
cmp $24, KLEN
|
|
|
|
jb .Lenc128
|
|
|
|
lea 0x20(TKEYP), TKEYP
|
|
|
|
je .Lenc192
|
|
|
|
add $0x20, TKEYP
|
|
|
|
movaps -0x60(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenc KEY, STATE
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps -0x50(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenc KEY, STATE
|
2009-01-18 16:28:34 +11:00
|
|
|
.align 4
|
|
|
|
.Lenc192:
|
|
|
|
movaps -0x40(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenc KEY, STATE
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps -0x30(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenc KEY, STATE
|
2009-01-18 16:28:34 +11:00
|
|
|
.align 4
|
|
|
|
.Lenc128:
|
|
|
|
movaps -0x20(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenc KEY, STATE
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps -0x10(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenc KEY, STATE
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps (TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenc KEY, STATE
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x10(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenc KEY, STATE
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x20(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenc KEY, STATE
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x30(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenc KEY, STATE
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x40(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenc KEY, STATE
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x50(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenc KEY, STATE
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x60(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenc KEY, STATE
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x70(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenclast KEY, STATE
|
2021-12-04 14:43:40 +01:00
|
|
|
RET
|
2019-10-11 13:50:46 +02:00
|
|
|
SYM_FUNC_END(_aesni_enc1)
|
2009-01-18 16:28:34 +11:00
|
|
|
|
|
|
|
/*
|
|
|
|
* _aesni_enc4: internal ABI
|
|
|
|
* input:
|
|
|
|
* KEYP: key struct pointer
|
|
|
|
* KLEN: round count
|
|
|
|
* STATE1: initial state (input)
|
|
|
|
* STATE2
|
|
|
|
* STATE3
|
|
|
|
* STATE4
|
|
|
|
* output:
|
|
|
|
* STATE1: finial state (output)
|
|
|
|
* STATE2
|
|
|
|
* STATE3
|
|
|
|
* STATE4
|
|
|
|
* changed:
|
|
|
|
* KEY
|
|
|
|
* TKEYP (T1)
|
|
|
|
*/
|
2019-10-11 13:50:46 +02:00
|
|
|
SYM_FUNC_START_LOCAL(_aesni_enc4)
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps (KEYP), KEY # key
|
|
|
|
mov KEYP, TKEYP
|
|
|
|
pxor KEY, STATE1 # round 0
|
|
|
|
pxor KEY, STATE2
|
|
|
|
pxor KEY, STATE3
|
|
|
|
pxor KEY, STATE4
|
|
|
|
add $0x30, TKEYP
|
|
|
|
cmp $24, KLEN
|
|
|
|
jb .L4enc128
|
|
|
|
lea 0x20(TKEYP), TKEYP
|
|
|
|
je .L4enc192
|
|
|
|
add $0x20, TKEYP
|
|
|
|
movaps -0x60(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenc KEY, STATE1
|
|
|
|
aesenc KEY, STATE2
|
|
|
|
aesenc KEY, STATE3
|
|
|
|
aesenc KEY, STATE4
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps -0x50(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenc KEY, STATE1
|
|
|
|
aesenc KEY, STATE2
|
|
|
|
aesenc KEY, STATE3
|
|
|
|
aesenc KEY, STATE4
|
2009-01-18 16:28:34 +11:00
|
|
|
#.align 4
|
|
|
|
.L4enc192:
|
|
|
|
movaps -0x40(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenc KEY, STATE1
|
|
|
|
aesenc KEY, STATE2
|
|
|
|
aesenc KEY, STATE3
|
|
|
|
aesenc KEY, STATE4
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps -0x30(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenc KEY, STATE1
|
|
|
|
aesenc KEY, STATE2
|
|
|
|
aesenc KEY, STATE3
|
|
|
|
aesenc KEY, STATE4
|
2009-01-18 16:28:34 +11:00
|
|
|
#.align 4
|
|
|
|
.L4enc128:
|
|
|
|
movaps -0x20(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenc KEY, STATE1
|
|
|
|
aesenc KEY, STATE2
|
|
|
|
aesenc KEY, STATE3
|
|
|
|
aesenc KEY, STATE4
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps -0x10(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenc KEY, STATE1
|
|
|
|
aesenc KEY, STATE2
|
|
|
|
aesenc KEY, STATE3
|
|
|
|
aesenc KEY, STATE4
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps (TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenc KEY, STATE1
|
|
|
|
aesenc KEY, STATE2
|
|
|
|
aesenc KEY, STATE3
|
|
|
|
aesenc KEY, STATE4
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x10(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenc KEY, STATE1
|
|
|
|
aesenc KEY, STATE2
|
|
|
|
aesenc KEY, STATE3
|
|
|
|
aesenc KEY, STATE4
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x20(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenc KEY, STATE1
|
|
|
|
aesenc KEY, STATE2
|
|
|
|
aesenc KEY, STATE3
|
|
|
|
aesenc KEY, STATE4
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x30(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenc KEY, STATE1
|
|
|
|
aesenc KEY, STATE2
|
|
|
|
aesenc KEY, STATE3
|
|
|
|
aesenc KEY, STATE4
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x40(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenc KEY, STATE1
|
|
|
|
aesenc KEY, STATE2
|
|
|
|
aesenc KEY, STATE3
|
|
|
|
aesenc KEY, STATE4
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x50(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenc KEY, STATE1
|
|
|
|
aesenc KEY, STATE2
|
|
|
|
aesenc KEY, STATE3
|
|
|
|
aesenc KEY, STATE4
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x60(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenc KEY, STATE1
|
|
|
|
aesenc KEY, STATE2
|
|
|
|
aesenc KEY, STATE3
|
|
|
|
aesenc KEY, STATE4
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x70(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesenclast KEY, STATE1 # last round
|
|
|
|
aesenclast KEY, STATE2
|
|
|
|
aesenclast KEY, STATE3
|
|
|
|
aesenclast KEY, STATE4
|
2021-12-04 14:43:40 +01:00
|
|
|
RET
|
2019-10-11 13:50:46 +02:00
|
|
|
SYM_FUNC_END(_aesni_enc4)
|
2009-01-18 16:28:34 +11:00
|
|
|
|
|
|
|
/*
|
2019-11-26 22:08:02 -08:00
|
|
|
* void aesni_dec (const void *ctx, u8 *dst, const u8 *src)
|
2009-01-18 16:28:34 +11:00
|
|
|
*/
|
2019-10-11 13:51:04 +02:00
|
|
|
SYM_FUNC_START(aesni_dec)
|
2016-01-21 16:49:19 -06:00
|
|
|
FRAME_BEGIN
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#ifndef __x86_64__
|
|
|
|
pushl KEYP
|
|
|
|
pushl KLEN
|
2016-01-21 16:49:19 -06:00
|
|
|
movl (FRAME_OFFSET+12)(%esp), KEYP # ctx
|
|
|
|
movl (FRAME_OFFSET+16)(%esp), OUTP # dst
|
|
|
|
movl (FRAME_OFFSET+20)(%esp), INP # src
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#endif
|
2009-01-18 16:28:34 +11:00
|
|
|
mov 480(KEYP), KLEN # key length
|
|
|
|
add $240, KEYP
|
|
|
|
movups (INP), STATE # input
|
|
|
|
call _aesni_dec1
|
|
|
|
movups STATE, (OUTP) #output
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#ifndef __x86_64__
|
|
|
|
popl KLEN
|
|
|
|
popl KEYP
|
|
|
|
#endif
|
2016-01-21 16:49:19 -06:00
|
|
|
FRAME_END
|
2021-12-04 14:43:40 +01:00
|
|
|
RET
|
2019-10-11 13:51:04 +02:00
|
|
|
SYM_FUNC_END(aesni_dec)
|
2009-01-18 16:28:34 +11:00
|
|
|
|
|
|
|
/*
|
|
|
|
* _aesni_dec1: internal ABI
|
|
|
|
* input:
|
|
|
|
* KEYP: key struct pointer
|
|
|
|
* KLEN: key length
|
|
|
|
* STATE: initial state (input)
|
|
|
|
* output:
|
|
|
|
* STATE: finial state (output)
|
|
|
|
* changed:
|
|
|
|
* KEY
|
|
|
|
* TKEYP (T1)
|
|
|
|
*/
|
2019-10-11 13:50:46 +02:00
|
|
|
SYM_FUNC_START_LOCAL(_aesni_dec1)
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps (KEYP), KEY # key
|
|
|
|
mov KEYP, TKEYP
|
|
|
|
pxor KEY, STATE # round 0
|
|
|
|
add $0x30, TKEYP
|
|
|
|
cmp $24, KLEN
|
|
|
|
jb .Ldec128
|
|
|
|
lea 0x20(TKEYP), TKEYP
|
|
|
|
je .Ldec192
|
|
|
|
add $0x20, TKEYP
|
|
|
|
movaps -0x60(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdec KEY, STATE
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps -0x50(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdec KEY, STATE
|
2009-01-18 16:28:34 +11:00
|
|
|
.align 4
|
|
|
|
.Ldec192:
|
|
|
|
movaps -0x40(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdec KEY, STATE
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps -0x30(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdec KEY, STATE
|
2009-01-18 16:28:34 +11:00
|
|
|
.align 4
|
|
|
|
.Ldec128:
|
|
|
|
movaps -0x20(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdec KEY, STATE
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps -0x10(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdec KEY, STATE
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps (TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdec KEY, STATE
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x10(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdec KEY, STATE
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x20(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdec KEY, STATE
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x30(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdec KEY, STATE
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x40(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdec KEY, STATE
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x50(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdec KEY, STATE
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x60(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdec KEY, STATE
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x70(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdeclast KEY, STATE
|
2021-12-04 14:43:40 +01:00
|
|
|
RET
|
2019-10-11 13:50:46 +02:00
|
|
|
SYM_FUNC_END(_aesni_dec1)
|
2009-01-18 16:28:34 +11:00
|
|
|
|
|
|
|
/*
|
|
|
|
* _aesni_dec4: internal ABI
|
|
|
|
* input:
|
|
|
|
* KEYP: key struct pointer
|
|
|
|
* KLEN: key length
|
|
|
|
* STATE1: initial state (input)
|
|
|
|
* STATE2
|
|
|
|
* STATE3
|
|
|
|
* STATE4
|
|
|
|
* output:
|
|
|
|
* STATE1: finial state (output)
|
|
|
|
* STATE2
|
|
|
|
* STATE3
|
|
|
|
* STATE4
|
|
|
|
* changed:
|
|
|
|
* KEY
|
|
|
|
* TKEYP (T1)
|
|
|
|
*/
|
2019-10-11 13:50:46 +02:00
|
|
|
SYM_FUNC_START_LOCAL(_aesni_dec4)
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps (KEYP), KEY # key
|
|
|
|
mov KEYP, TKEYP
|
|
|
|
pxor KEY, STATE1 # round 0
|
|
|
|
pxor KEY, STATE2
|
|
|
|
pxor KEY, STATE3
|
|
|
|
pxor KEY, STATE4
|
|
|
|
add $0x30, TKEYP
|
|
|
|
cmp $24, KLEN
|
|
|
|
jb .L4dec128
|
|
|
|
lea 0x20(TKEYP), TKEYP
|
|
|
|
je .L4dec192
|
|
|
|
add $0x20, TKEYP
|
|
|
|
movaps -0x60(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdec KEY, STATE1
|
|
|
|
aesdec KEY, STATE2
|
|
|
|
aesdec KEY, STATE3
|
|
|
|
aesdec KEY, STATE4
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps -0x50(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdec KEY, STATE1
|
|
|
|
aesdec KEY, STATE2
|
|
|
|
aesdec KEY, STATE3
|
|
|
|
aesdec KEY, STATE4
|
2009-01-18 16:28:34 +11:00
|
|
|
.align 4
|
|
|
|
.L4dec192:
|
|
|
|
movaps -0x40(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdec KEY, STATE1
|
|
|
|
aesdec KEY, STATE2
|
|
|
|
aesdec KEY, STATE3
|
|
|
|
aesdec KEY, STATE4
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps -0x30(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdec KEY, STATE1
|
|
|
|
aesdec KEY, STATE2
|
|
|
|
aesdec KEY, STATE3
|
|
|
|
aesdec KEY, STATE4
|
2009-01-18 16:28:34 +11:00
|
|
|
.align 4
|
|
|
|
.L4dec128:
|
|
|
|
movaps -0x20(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdec KEY, STATE1
|
|
|
|
aesdec KEY, STATE2
|
|
|
|
aesdec KEY, STATE3
|
|
|
|
aesdec KEY, STATE4
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps -0x10(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdec KEY, STATE1
|
|
|
|
aesdec KEY, STATE2
|
|
|
|
aesdec KEY, STATE3
|
|
|
|
aesdec KEY, STATE4
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps (TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdec KEY, STATE1
|
|
|
|
aesdec KEY, STATE2
|
|
|
|
aesdec KEY, STATE3
|
|
|
|
aesdec KEY, STATE4
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x10(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdec KEY, STATE1
|
|
|
|
aesdec KEY, STATE2
|
|
|
|
aesdec KEY, STATE3
|
|
|
|
aesdec KEY, STATE4
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x20(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdec KEY, STATE1
|
|
|
|
aesdec KEY, STATE2
|
|
|
|
aesdec KEY, STATE3
|
|
|
|
aesdec KEY, STATE4
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x30(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdec KEY, STATE1
|
|
|
|
aesdec KEY, STATE2
|
|
|
|
aesdec KEY, STATE3
|
|
|
|
aesdec KEY, STATE4
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x40(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdec KEY, STATE1
|
|
|
|
aesdec KEY, STATE2
|
|
|
|
aesdec KEY, STATE3
|
|
|
|
aesdec KEY, STATE4
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x50(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdec KEY, STATE1
|
|
|
|
aesdec KEY, STATE2
|
|
|
|
aesdec KEY, STATE3
|
|
|
|
aesdec KEY, STATE4
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x60(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdec KEY, STATE1
|
|
|
|
aesdec KEY, STATE2
|
|
|
|
aesdec KEY, STATE3
|
|
|
|
aesdec KEY, STATE4
|
2009-01-18 16:28:34 +11:00
|
|
|
movaps 0x70(TKEYP), KEY
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
aesdeclast KEY, STATE1 # last round
|
|
|
|
aesdeclast KEY, STATE2
|
|
|
|
aesdeclast KEY, STATE3
|
|
|
|
aesdeclast KEY, STATE4
|
2021-12-04 14:43:40 +01:00
|
|
|
RET
|
2019-10-11 13:50:46 +02:00
|
|
|
SYM_FUNC_END(_aesni_dec4)
|
2009-01-18 16:28:34 +11:00
|
|
|
|
|
|
|
/*
|
|
|
|
* void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
|
|
|
|
* size_t len)
|
|
|
|
*/
|
2019-10-11 13:51:04 +02:00
|
|
|
SYM_FUNC_START(aesni_ecb_enc)
|
2016-01-21 16:49:19 -06:00
|
|
|
FRAME_BEGIN
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#ifndef __x86_64__
|
|
|
|
pushl LEN
|
|
|
|
pushl KEYP
|
|
|
|
pushl KLEN
|
2016-01-21 16:49:19 -06:00
|
|
|
movl (FRAME_OFFSET+16)(%esp), KEYP # ctx
|
|
|
|
movl (FRAME_OFFSET+20)(%esp), OUTP # dst
|
|
|
|
movl (FRAME_OFFSET+24)(%esp), INP # src
|
|
|
|
movl (FRAME_OFFSET+28)(%esp), LEN # len
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#endif
|
2009-01-18 16:28:34 +11:00
|
|
|
test LEN, LEN # check length
|
|
|
|
jz .Lecb_enc_ret
|
|
|
|
mov 480(KEYP), KLEN
|
|
|
|
cmp $16, LEN
|
|
|
|
jb .Lecb_enc_ret
|
|
|
|
cmp $64, LEN
|
|
|
|
jb .Lecb_enc_loop1
|
|
|
|
.align 4
|
|
|
|
.Lecb_enc_loop4:
|
|
|
|
movups (INP), STATE1
|
|
|
|
movups 0x10(INP), STATE2
|
|
|
|
movups 0x20(INP), STATE3
|
|
|
|
movups 0x30(INP), STATE4
|
|
|
|
call _aesni_enc4
|
|
|
|
movups STATE1, (OUTP)
|
|
|
|
movups STATE2, 0x10(OUTP)
|
|
|
|
movups STATE3, 0x20(OUTP)
|
|
|
|
movups STATE4, 0x30(OUTP)
|
|
|
|
sub $64, LEN
|
|
|
|
add $64, INP
|
|
|
|
add $64, OUTP
|
|
|
|
cmp $64, LEN
|
|
|
|
jge .Lecb_enc_loop4
|
|
|
|
cmp $16, LEN
|
|
|
|
jb .Lecb_enc_ret
|
|
|
|
.align 4
|
|
|
|
.Lecb_enc_loop1:
|
|
|
|
movups (INP), STATE1
|
|
|
|
call _aesni_enc1
|
|
|
|
movups STATE1, (OUTP)
|
|
|
|
sub $16, LEN
|
|
|
|
add $16, INP
|
|
|
|
add $16, OUTP
|
|
|
|
cmp $16, LEN
|
|
|
|
jge .Lecb_enc_loop1
|
|
|
|
.Lecb_enc_ret:
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#ifndef __x86_64__
|
|
|
|
popl KLEN
|
|
|
|
popl KEYP
|
|
|
|
popl LEN
|
|
|
|
#endif
|
2016-01-21 16:49:19 -06:00
|
|
|
FRAME_END
|
2021-12-04 14:43:40 +01:00
|
|
|
RET
|
2019-10-11 13:51:04 +02:00
|
|
|
SYM_FUNC_END(aesni_ecb_enc)
|
2009-01-18 16:28:34 +11:00
|
|
|
|
|
|
|
/*
|
|
|
|
* void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
|
|
|
|
* size_t len);
|
|
|
|
*/
|
2019-10-11 13:51:04 +02:00
|
|
|
SYM_FUNC_START(aesni_ecb_dec)
|
2016-01-21 16:49:19 -06:00
|
|
|
FRAME_BEGIN
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#ifndef __x86_64__
|
|
|
|
pushl LEN
|
|
|
|
pushl KEYP
|
|
|
|
pushl KLEN
|
2016-01-21 16:49:19 -06:00
|
|
|
movl (FRAME_OFFSET+16)(%esp), KEYP # ctx
|
|
|
|
movl (FRAME_OFFSET+20)(%esp), OUTP # dst
|
|
|
|
movl (FRAME_OFFSET+24)(%esp), INP # src
|
|
|
|
movl (FRAME_OFFSET+28)(%esp), LEN # len
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#endif
|
2009-01-18 16:28:34 +11:00
|
|
|
test LEN, LEN
|
|
|
|
jz .Lecb_dec_ret
|
|
|
|
mov 480(KEYP), KLEN
|
|
|
|
add $240, KEYP
|
|
|
|
cmp $16, LEN
|
|
|
|
jb .Lecb_dec_ret
|
|
|
|
cmp $64, LEN
|
|
|
|
jb .Lecb_dec_loop1
|
|
|
|
.align 4
|
|
|
|
.Lecb_dec_loop4:
|
|
|
|
movups (INP), STATE1
|
|
|
|
movups 0x10(INP), STATE2
|
|
|
|
movups 0x20(INP), STATE3
|
|
|
|
movups 0x30(INP), STATE4
|
|
|
|
call _aesni_dec4
|
|
|
|
movups STATE1, (OUTP)
|
|
|
|
movups STATE2, 0x10(OUTP)
|
|
|
|
movups STATE3, 0x20(OUTP)
|
|
|
|
movups STATE4, 0x30(OUTP)
|
|
|
|
sub $64, LEN
|
|
|
|
add $64, INP
|
|
|
|
add $64, OUTP
|
|
|
|
cmp $64, LEN
|
|
|
|
jge .Lecb_dec_loop4
|
|
|
|
cmp $16, LEN
|
|
|
|
jb .Lecb_dec_ret
|
|
|
|
.align 4
|
|
|
|
.Lecb_dec_loop1:
|
|
|
|
movups (INP), STATE1
|
|
|
|
call _aesni_dec1
|
|
|
|
movups STATE1, (OUTP)
|
|
|
|
sub $16, LEN
|
|
|
|
add $16, INP
|
|
|
|
add $16, OUTP
|
|
|
|
cmp $16, LEN
|
|
|
|
jge .Lecb_dec_loop1
|
|
|
|
.Lecb_dec_ret:
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#ifndef __x86_64__
|
|
|
|
popl KLEN
|
|
|
|
popl KEYP
|
|
|
|
popl LEN
|
|
|
|
#endif
|
2016-01-21 16:49:19 -06:00
|
|
|
FRAME_END
|
2021-12-04 14:43:40 +01:00
|
|
|
RET
|
2019-10-11 13:51:04 +02:00
|
|
|
SYM_FUNC_END(aesni_ecb_dec)
|
2009-01-18 16:28:34 +11:00
|
|
|
|
|
|
|
/*
|
|
|
|
* void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
|
|
|
|
* size_t len, u8 *iv)
|
|
|
|
*/
|
2019-10-11 13:51:04 +02:00
|
|
|
SYM_FUNC_START(aesni_cbc_enc)
|
2016-01-21 16:49:19 -06:00
|
|
|
FRAME_BEGIN
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#ifndef __x86_64__
|
|
|
|
pushl IVP
|
|
|
|
pushl LEN
|
|
|
|
pushl KEYP
|
|
|
|
pushl KLEN
|
2016-01-21 16:49:19 -06:00
|
|
|
movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
|
|
|
|
movl (FRAME_OFFSET+24)(%esp), OUTP # dst
|
|
|
|
movl (FRAME_OFFSET+28)(%esp), INP # src
|
|
|
|
movl (FRAME_OFFSET+32)(%esp), LEN # len
|
|
|
|
movl (FRAME_OFFSET+36)(%esp), IVP # iv
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#endif
|
2009-01-18 16:28:34 +11:00
|
|
|
cmp $16, LEN
|
|
|
|
jb .Lcbc_enc_ret
|
|
|
|
mov 480(KEYP), KLEN
|
|
|
|
movups (IVP), STATE # load iv as initial state
|
|
|
|
.align 4
|
|
|
|
.Lcbc_enc_loop:
|
|
|
|
movups (INP), IN # load input
|
|
|
|
pxor IN, STATE
|
|
|
|
call _aesni_enc1
|
|
|
|
movups STATE, (OUTP) # store output
|
|
|
|
sub $16, LEN
|
|
|
|
add $16, INP
|
|
|
|
add $16, OUTP
|
|
|
|
cmp $16, LEN
|
|
|
|
jge .Lcbc_enc_loop
|
|
|
|
movups STATE, (IVP)
|
|
|
|
.Lcbc_enc_ret:
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#ifndef __x86_64__
|
|
|
|
popl KLEN
|
|
|
|
popl KEYP
|
|
|
|
popl LEN
|
|
|
|
popl IVP
|
|
|
|
#endif
|
2016-01-21 16:49:19 -06:00
|
|
|
FRAME_END
|
2021-12-04 14:43:40 +01:00
|
|
|
RET
|
2019-10-11 13:51:04 +02:00
|
|
|
SYM_FUNC_END(aesni_cbc_enc)
|
2009-01-18 16:28:34 +11:00
|
|
|
|
|
|
|
/*
|
|
|
|
* void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
|
|
|
|
* size_t len, u8 *iv)
|
|
|
|
*/
|
2019-10-11 13:51:04 +02:00
|
|
|
SYM_FUNC_START(aesni_cbc_dec)
|
2016-01-21 16:49:19 -06:00
|
|
|
FRAME_BEGIN
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#ifndef __x86_64__
|
|
|
|
pushl IVP
|
|
|
|
pushl LEN
|
|
|
|
pushl KEYP
|
|
|
|
pushl KLEN
|
2016-01-21 16:49:19 -06:00
|
|
|
movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
|
|
|
|
movl (FRAME_OFFSET+24)(%esp), OUTP # dst
|
|
|
|
movl (FRAME_OFFSET+28)(%esp), INP # src
|
|
|
|
movl (FRAME_OFFSET+32)(%esp), LEN # len
|
|
|
|
movl (FRAME_OFFSET+36)(%esp), IVP # iv
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#endif
|
2009-01-18 16:28:34 +11:00
|
|
|
cmp $16, LEN
|
2009-06-18 19:33:57 +08:00
|
|
|
jb .Lcbc_dec_just_ret
|
2009-01-18 16:28:34 +11:00
|
|
|
mov 480(KEYP), KLEN
|
|
|
|
add $240, KEYP
|
|
|
|
movups (IVP), IV
|
|
|
|
cmp $64, LEN
|
|
|
|
jb .Lcbc_dec_loop1
|
|
|
|
.align 4
|
|
|
|
.Lcbc_dec_loop4:
|
|
|
|
movups (INP), IN1
|
|
|
|
movaps IN1, STATE1
|
|
|
|
movups 0x10(INP), IN2
|
|
|
|
movaps IN2, STATE2
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#ifdef __x86_64__
|
2009-01-18 16:28:34 +11:00
|
|
|
movups 0x20(INP), IN3
|
|
|
|
movaps IN3, STATE3
|
|
|
|
movups 0x30(INP), IN4
|
|
|
|
movaps IN4, STATE4
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#else
|
|
|
|
movups 0x20(INP), IN1
|
|
|
|
movaps IN1, STATE3
|
|
|
|
movups 0x30(INP), IN2
|
|
|
|
movaps IN2, STATE4
|
|
|
|
#endif
|
2009-01-18 16:28:34 +11:00
|
|
|
call _aesni_dec4
|
|
|
|
pxor IV, STATE1
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#ifdef __x86_64__
|
2009-01-18 16:28:34 +11:00
|
|
|
pxor IN1, STATE2
|
|
|
|
pxor IN2, STATE3
|
|
|
|
pxor IN3, STATE4
|
|
|
|
movaps IN4, IV
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#else
|
|
|
|
pxor IN1, STATE4
|
|
|
|
movaps IN2, IV
|
2012-05-30 01:43:08 +02:00
|
|
|
movups (INP), IN1
|
|
|
|
pxor IN1, STATE2
|
|
|
|
movups 0x10(INP), IN2
|
|
|
|
pxor IN2, STATE3
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#endif
|
2009-01-18 16:28:34 +11:00
|
|
|
movups STATE1, (OUTP)
|
|
|
|
movups STATE2, 0x10(OUTP)
|
|
|
|
movups STATE3, 0x20(OUTP)
|
|
|
|
movups STATE4, 0x30(OUTP)
|
|
|
|
sub $64, LEN
|
|
|
|
add $64, INP
|
|
|
|
add $64, OUTP
|
|
|
|
cmp $64, LEN
|
|
|
|
jge .Lcbc_dec_loop4
|
|
|
|
cmp $16, LEN
|
|
|
|
jb .Lcbc_dec_ret
|
|
|
|
.align 4
|
|
|
|
.Lcbc_dec_loop1:
|
|
|
|
movups (INP), IN
|
|
|
|
movaps IN, STATE
|
|
|
|
call _aesni_dec1
|
|
|
|
pxor IV, STATE
|
|
|
|
movups STATE, (OUTP)
|
|
|
|
movaps IN, IV
|
|
|
|
sub $16, LEN
|
|
|
|
add $16, INP
|
|
|
|
add $16, OUTP
|
|
|
|
cmp $16, LEN
|
|
|
|
jge .Lcbc_dec_loop1
|
|
|
|
.Lcbc_dec_ret:
|
2009-06-18 19:33:57 +08:00
|
|
|
movups IV, (IVP)
|
|
|
|
.Lcbc_dec_just_ret:
|
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2010-11-27 16:34:46 +08:00
|
|
|
#ifndef __x86_64__
|
|
|
|
popl KLEN
|
|
|
|
popl KEYP
|
|
|
|
popl LEN
|
|
|
|
popl IVP
|
|
|
|
#endif
|
2016-01-21 16:49:19 -06:00
|
|
|
FRAME_END
|
2021-12-04 14:43:40 +01:00
|
|
|
RET
|
2019-10-11 13:51:04 +02:00
|
|
|
SYM_FUNC_END(aesni_cbc_dec)
|
2010-03-10 18:28:55 +08:00
|
|
|
|
2020-12-08 00:34:02 +01:00
|
|
|
/*
|
|
|
|
* void aesni_cts_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
|
|
|
|
* size_t len, u8 *iv)
|
|
|
|
*/
|
|
|
|
SYM_FUNC_START(aesni_cts_cbc_enc)
|
|
|
|
FRAME_BEGIN
|
|
|
|
#ifndef __x86_64__
|
|
|
|
pushl IVP
|
|
|
|
pushl LEN
|
|
|
|
pushl KEYP
|
|
|
|
pushl KLEN
|
|
|
|
movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
|
|
|
|
movl (FRAME_OFFSET+24)(%esp), OUTP # dst
|
|
|
|
movl (FRAME_OFFSET+28)(%esp), INP # src
|
|
|
|
movl (FRAME_OFFSET+32)(%esp), LEN # len
|
|
|
|
movl (FRAME_OFFSET+36)(%esp), IVP # iv
|
|
|
|
lea .Lcts_permute_table, T1
|
|
|
|
#else
|
|
|
|
lea .Lcts_permute_table(%rip), T1
|
|
|
|
#endif
|
|
|
|
mov 480(KEYP), KLEN
|
|
|
|
movups (IVP), STATE
|
|
|
|
sub $16, LEN
|
|
|
|
mov T1, IVP
|
|
|
|
add $32, IVP
|
|
|
|
add LEN, T1
|
|
|
|
sub LEN, IVP
|
|
|
|
movups (T1), %xmm4
|
|
|
|
movups (IVP), %xmm5
|
|
|
|
|
|
|
|
movups (INP), IN1
|
|
|
|
add LEN, INP
|
|
|
|
movups (INP), IN2
|
|
|
|
|
|
|
|
pxor IN1, STATE
|
|
|
|
call _aesni_enc1
|
|
|
|
|
|
|
|
pshufb %xmm5, IN2
|
|
|
|
pxor STATE, IN2
|
|
|
|
pshufb %xmm4, STATE
|
|
|
|
add OUTP, LEN
|
|
|
|
movups STATE, (LEN)
|
|
|
|
|
|
|
|
movaps IN2, STATE
|
|
|
|
call _aesni_enc1
|
|
|
|
movups STATE, (OUTP)
|
|
|
|
|
|
|
|
#ifndef __x86_64__
|
|
|
|
popl KLEN
|
|
|
|
popl KEYP
|
|
|
|
popl LEN
|
|
|
|
popl IVP
|
|
|
|
#endif
|
|
|
|
FRAME_END
|
2021-12-04 14:43:40 +01:00
|
|
|
RET
|
2020-12-08 00:34:02 +01:00
|
|
|
SYM_FUNC_END(aesni_cts_cbc_enc)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* void aesni_cts_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
|
|
|
|
* size_t len, u8 *iv)
|
|
|
|
*/
|
|
|
|
SYM_FUNC_START(aesni_cts_cbc_dec)
|
|
|
|
FRAME_BEGIN
|
|
|
|
#ifndef __x86_64__
|
|
|
|
pushl IVP
|
|
|
|
pushl LEN
|
|
|
|
pushl KEYP
|
|
|
|
pushl KLEN
|
|
|
|
movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
|
|
|
|
movl (FRAME_OFFSET+24)(%esp), OUTP # dst
|
|
|
|
movl (FRAME_OFFSET+28)(%esp), INP # src
|
|
|
|
movl (FRAME_OFFSET+32)(%esp), LEN # len
|
|
|
|
movl (FRAME_OFFSET+36)(%esp), IVP # iv
|
|
|
|
lea .Lcts_permute_table, T1
|
|
|
|
#else
|
|
|
|
lea .Lcts_permute_table(%rip), T1
|
|
|
|
#endif
|
|
|
|
mov 480(KEYP), KLEN
|
|
|
|
add $240, KEYP
|
|
|
|
movups (IVP), IV
|
|
|
|
sub $16, LEN
|
|
|
|
mov T1, IVP
|
|
|
|
add $32, IVP
|
|
|
|
add LEN, T1
|
|
|
|
sub LEN, IVP
|
|
|
|
movups (T1), %xmm4
|
|
|
|
|
|
|
|
movups (INP), STATE
|
|
|
|
add LEN, INP
|
|
|
|
movups (INP), IN1
|
|
|
|
|
|
|
|
call _aesni_dec1
|
|
|
|
movaps STATE, IN2
|
|
|
|
pshufb %xmm4, STATE
|
|
|
|
pxor IN1, STATE
|
|
|
|
|
|
|
|
add OUTP, LEN
|
|
|
|
movups STATE, (LEN)
|
|
|
|
|
|
|
|
movups (IVP), %xmm0
|
|
|
|
pshufb %xmm0, IN1
|
|
|
|
pblendvb IN2, IN1
|
|
|
|
movaps IN1, STATE
|
|
|
|
call _aesni_dec1
|
|
|
|
|
|
|
|
pxor IV, STATE
|
|
|
|
movups STATE, (OUTP)
|
|
|
|
|
|
|
|
#ifndef __x86_64__
|
|
|
|
popl KLEN
|
|
|
|
popl KEYP
|
|
|
|
popl LEN
|
|
|
|
popl IVP
|
|
|
|
#endif
|
|
|
|
FRAME_END
|
2021-12-04 14:43:40 +01:00
|
|
|
RET
|
2020-12-08 00:34:02 +01:00
|
|
|
SYM_FUNC_END(aesni_cts_cbc_dec)
|
|
|
|
|
x86/asm/crypto: Move .Lbswap_mask data to .rodata section
stacktool reports the following warning:
stacktool: arch/x86/crypto/aesni-intel_asm.o: _aesni_inc_init(): can't find starting instruction
stacktool gets confused when it tries to disassemble the following data
in the .text section:
.Lbswap_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
Move it to .rodata which is a more appropriate section for read-only
data.
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Reviewed-by: Borislav Petkov <bp@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Bernd Petrovitsch <bernd@petrovitsch.priv.at>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Chris J Arges <chris.j.arges@canonical.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Pedro Alves <palves@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: live-patching@vger.kernel.org
Link: http://lkml.kernel.org/r/b6a2f3f8bda705143e127c025edb2b53c86e6eb4.1453405861.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-01-21 16:49:15 -06:00
|
|
|
.pushsection .rodata
|
2010-03-10 18:28:55 +08:00
|
|
|
.align 16
|
2020-12-08 00:34:02 +01:00
|
|
|
.Lcts_permute_table:
|
|
|
|
.byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
|
|
|
|
.byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
|
|
|
|
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
|
|
|
|
.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
|
|
|
|
.byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
|
|
|
|
.byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
|
|
|
|
#ifdef __x86_64__
|
2010-03-10 18:28:55 +08:00
|
|
|
.Lbswap_mask:
|
|
|
|
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
|
2020-12-08 00:34:02 +01:00
|
|
|
#endif
|
x86/asm/crypto: Move .Lbswap_mask data to .rodata section
stacktool reports the following warning:
stacktool: arch/x86/crypto/aesni-intel_asm.o: _aesni_inc_init(): can't find starting instruction
stacktool gets confused when it tries to disassemble the following data
in the .text section:
.Lbswap_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
Move it to .rodata which is a more appropriate section for read-only
data.
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Reviewed-by: Borislav Petkov <bp@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Bernd Petrovitsch <bernd@petrovitsch.priv.at>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Chris J Arges <chris.j.arges@canonical.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Pedro Alves <palves@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: live-patching@vger.kernel.org
Link: http://lkml.kernel.org/r/b6a2f3f8bda705143e127c025edb2b53c86e6eb4.1453405861.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-01-21 16:49:15 -06:00
|
|
|
.popsection
|
2010-03-10 18:28:55 +08:00
|
|
|
|
2020-12-08 00:34:02 +01:00
|
|
|
#ifdef __x86_64__
|
2010-03-10 18:28:55 +08:00
|
|
|
/*
|
|
|
|
* _aesni_inc_init: internal ABI
|
|
|
|
* setup registers used by _aesni_inc
|
|
|
|
* input:
|
|
|
|
* IV
|
|
|
|
* output:
|
|
|
|
* CTR: == IV, in little endian
|
|
|
|
* TCTR_LOW: == lower qword of CTR
|
|
|
|
* INC: == 1, in little endian
|
|
|
|
* BSWAP_MASK == endian swapping mask
|
|
|
|
*/
|
2019-10-11 13:50:46 +02:00
|
|
|
SYM_FUNC_START_LOCAL(_aesni_inc_init)
|
2023-04-12 13:00:24 +02:00
|
|
|
movaps .Lbswap_mask(%rip), BSWAP_MASK
|
2010-03-10 18:28:55 +08:00
|
|
|
movaps IV, CTR
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
pshufb BSWAP_MASK, CTR
|
2010-03-10 18:28:55 +08:00
|
|
|
mov $1, TCTR_LOW
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
movq TCTR_LOW, INC
|
|
|
|
movq CTR, TCTR_LOW
|
2021-12-04 14:43:40 +01:00
|
|
|
RET
|
2019-10-11 13:50:46 +02:00
|
|
|
SYM_FUNC_END(_aesni_inc_init)
|
2010-03-10 18:28:55 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* _aesni_inc: internal ABI
|
|
|
|
* Increase IV by 1, IV is in big endian
|
|
|
|
* input:
|
|
|
|
* IV
|
|
|
|
* CTR: == IV, in little endian
|
|
|
|
* TCTR_LOW: == lower qword of CTR
|
|
|
|
* INC: == 1, in little endian
|
|
|
|
* BSWAP_MASK == endian swapping mask
|
|
|
|
* output:
|
|
|
|
* IV: Increase by 1
|
|
|
|
* changed:
|
|
|
|
* CTR: == output IV, in little endian
|
|
|
|
* TCTR_LOW: == lower qword of CTR
|
|
|
|
*/
|
2019-10-11 13:50:46 +02:00
|
|
|
SYM_FUNC_START_LOCAL(_aesni_inc)
|
2010-03-10 18:28:55 +08:00
|
|
|
paddq INC, CTR
|
|
|
|
add $1, TCTR_LOW
|
|
|
|
jnc .Linc_low
|
|
|
|
pslldq $8, INC
|
|
|
|
paddq INC, CTR
|
|
|
|
psrldq $8, INC
|
|
|
|
.Linc_low:
|
|
|
|
movaps CTR, IV
|
crypto: x86 - Remove include/asm/inst.h
Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.
Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.
The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-07-09 17:08:57 +02:00
|
|
|
pshufb BSWAP_MASK, IV
|
2021-12-04 14:43:40 +01:00
|
|
|
RET
|
2019-10-11 13:50:46 +02:00
|
|
|
SYM_FUNC_END(_aesni_inc)
|
2010-03-10 18:28:55 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
|
|
|
|
* size_t len, u8 *iv)
|
|
|
|
*/
|
2019-10-11 13:51:04 +02:00
|
|
|
SYM_FUNC_START(aesni_ctr_enc)
|
2025-02-07 13:15:33 +01:00
|
|
|
ANNOTATE_NOENDBR
|
2016-01-21 16:49:19 -06:00
|
|
|
FRAME_BEGIN
|
2010-03-10 18:28:55 +08:00
|
|
|
cmp $16, LEN
|
|
|
|
jb .Lctr_enc_just_ret
|
|
|
|
mov 480(KEYP), KLEN
|
|
|
|
movups (IVP), IV
|
|
|
|
call _aesni_inc_init
|
|
|
|
cmp $64, LEN
|
|
|
|
jb .Lctr_enc_loop1
|
|
|
|
.align 4
|
|
|
|
.Lctr_enc_loop4:
|
|
|
|
movaps IV, STATE1
|
|
|
|
call _aesni_inc
|
|
|
|
movups (INP), IN1
|
|
|
|
movaps IV, STATE2
|
|
|
|
call _aesni_inc
|
|
|
|
movups 0x10(INP), IN2
|
|
|
|
movaps IV, STATE3
|
|
|
|
call _aesni_inc
|
|
|
|
movups 0x20(INP), IN3
|
|
|
|
movaps IV, STATE4
|
|
|
|
call _aesni_inc
|
|
|
|
movups 0x30(INP), IN4
|
|
|
|
call _aesni_enc4
|
|
|
|
pxor IN1, STATE1
|
|
|
|
movups STATE1, (OUTP)
|
|
|
|
pxor IN2, STATE2
|
|
|
|
movups STATE2, 0x10(OUTP)
|
|
|
|
pxor IN3, STATE3
|
|
|
|
movups STATE3, 0x20(OUTP)
|
|
|
|
pxor IN4, STATE4
|
|
|
|
movups STATE4, 0x30(OUTP)
|
|
|
|
sub $64, LEN
|
|
|
|
add $64, INP
|
|
|
|
add $64, OUTP
|
|
|
|
cmp $64, LEN
|
|
|
|
jge .Lctr_enc_loop4
|
|
|
|
cmp $16, LEN
|
|
|
|
jb .Lctr_enc_ret
|
|
|
|
.align 4
|
|
|
|
.Lctr_enc_loop1:
|
|
|
|
movaps IV, STATE
|
|
|
|
call _aesni_inc
|
|
|
|
movups (INP), IN
|
|
|
|
call _aesni_enc1
|
|
|
|
pxor IN, STATE
|
|
|
|
movups STATE, (OUTP)
|
|
|
|
sub $16, LEN
|
|
|
|
add $16, INP
|
|
|
|
add $16, OUTP
|
|
|
|
cmp $16, LEN
|
|
|
|
jge .Lctr_enc_loop1
|
|
|
|
.Lctr_enc_ret:
|
|
|
|
movups IV, (IVP)
|
|
|
|
.Lctr_enc_just_ret:
|
2016-01-21 16:49:19 -06:00
|
|
|
FRAME_END
|
2021-12-04 14:43:40 +01:00
|
|
|
RET
|
2019-10-11 13:51:04 +02:00
|
|
|
SYM_FUNC_END(aesni_ctr_enc)
|
2013-04-08 21:51:16 +03:00
|
|
|
|
2020-12-31 17:41:55 +01:00
|
|
|
#endif
|
|
|
|
|
|
|
|
.section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16
|
|
|
|
.align 16
|
|
|
|
.Lgf128mul_x_ble_mask:
|
|
|
|
.octa 0x00000000000000010000000000000087
|
|
|
|
.previous
|
|
|
|
|
2013-04-08 21:51:16 +03:00
|
|
|
/*
|
2024-04-12 17:09:47 -07:00
|
|
|
* _aesni_gf128mul_x_ble: Multiply in GF(2^128) for XTS IVs
|
2013-04-08 21:51:16 +03:00
|
|
|
* input:
|
|
|
|
* IV: current IV
|
|
|
|
* GF128MUL_MASK == mask with 0x87 and 0x01
|
|
|
|
* output:
|
|
|
|
* IV: next IV
|
|
|
|
* changed:
|
2024-04-12 17:09:47 -07:00
|
|
|
* KEY: == temporary value
|
2013-04-08 21:51:16 +03:00
|
|
|
*/
|
2024-04-12 17:09:47 -07:00
|
|
|
.macro _aesni_gf128mul_x_ble
|
|
|
|
pshufd $0x13, IV, KEY
|
|
|
|
paddq IV, IV
|
|
|
|
psrad $31, KEY
|
|
|
|
pand GF128MUL_MASK, KEY
|
|
|
|
pxor KEY, IV
|
|
|
|
.endm
|
2013-04-08 21:51:16 +03:00
|
|
|
|
2024-04-12 17:09:47 -07:00
|
|
|
.macro _aesni_xts_crypt enc
|
2016-01-21 16:49:19 -06:00
|
|
|
FRAME_BEGIN
|
2020-12-31 17:41:55 +01:00
|
|
|
#ifndef __x86_64__
|
|
|
|
pushl IVP
|
|
|
|
pushl LEN
|
|
|
|
pushl KEYP
|
|
|
|
pushl KLEN
|
|
|
|
movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
|
|
|
|
movl (FRAME_OFFSET+24)(%esp), OUTP # dst
|
|
|
|
movl (FRAME_OFFSET+28)(%esp), INP # src
|
|
|
|
movl (FRAME_OFFSET+32)(%esp), LEN # len
|
|
|
|
movl (FRAME_OFFSET+36)(%esp), IVP # iv
|
2013-04-08 21:51:16 +03:00
|
|
|
movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
|
2020-12-31 17:41:55 +01:00
|
|
|
#else
|
|
|
|
movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK
|
|
|
|
#endif
|
2013-04-08 21:51:16 +03:00
|
|
|
movups (IVP), IV
|
|
|
|
|
|
|
|
mov 480(KEYP), KLEN
|
2024-04-12 17:09:47 -07:00
|
|
|
.if !\enc
|
|
|
|
add $240, KEYP
|
|
|
|
|
|
|
|
test $15, LEN
|
|
|
|
jz .Lxts_loop4\@
|
|
|
|
sub $16, LEN
|
|
|
|
.endif
|
2013-04-08 21:51:16 +03:00
|
|
|
|
2024-04-12 17:09:47 -07:00
|
|
|
.Lxts_loop4\@:
|
2020-12-31 17:41:55 +01:00
|
|
|
sub $64, LEN
|
2024-04-12 17:09:47 -07:00
|
|
|
jl .Lxts_1x\@
|
2020-12-31 17:41:55 +01:00
|
|
|
|
2013-04-08 21:51:16 +03:00
|
|
|
movdqa IV, STATE1
|
2020-12-31 17:41:55 +01:00
|
|
|
movdqu 0x00(INP), IN
|
|
|
|
pxor IN, STATE1
|
2013-04-08 21:51:16 +03:00
|
|
|
movdqu IV, 0x00(OUTP)
|
|
|
|
|
2024-04-12 17:09:47 -07:00
|
|
|
_aesni_gf128mul_x_ble
|
2013-04-08 21:51:16 +03:00
|
|
|
movdqa IV, STATE2
|
2020-12-31 17:41:55 +01:00
|
|
|
movdqu 0x10(INP), IN
|
|
|
|
pxor IN, STATE2
|
2013-04-08 21:51:16 +03:00
|
|
|
movdqu IV, 0x10(OUTP)
|
|
|
|
|
2024-04-12 17:09:47 -07:00
|
|
|
_aesni_gf128mul_x_ble
|
2013-04-08 21:51:16 +03:00
|
|
|
movdqa IV, STATE3
|
2020-12-31 17:41:55 +01:00
|
|
|
movdqu 0x20(INP), IN
|
|
|
|
pxor IN, STATE3
|
2013-04-08 21:51:16 +03:00
|
|
|
movdqu IV, 0x20(OUTP)
|
|
|
|
|
2024-04-12 17:09:47 -07:00
|
|
|
_aesni_gf128mul_x_ble
|
2013-04-08 21:51:16 +03:00
|
|
|
movdqa IV, STATE4
|
2020-12-31 17:41:55 +01:00
|
|
|
movdqu 0x30(INP), IN
|
|
|
|
pxor IN, STATE4
|
2013-04-08 21:51:16 +03:00
|
|
|
movdqu IV, 0x30(OUTP)
|
|
|
|
|
2024-04-12 17:09:47 -07:00
|
|
|
.if \enc
|
2020-12-31 17:41:54 +01:00
|
|
|
call _aesni_enc4
|
2024-04-12 17:09:47 -07:00
|
|
|
.else
|
|
|
|
call _aesni_dec4
|
|
|
|
.endif
|
2013-04-08 21:51:16 +03:00
|
|
|
|
2020-12-31 17:41:55 +01:00
|
|
|
movdqu 0x00(OUTP), IN
|
|
|
|
pxor IN, STATE1
|
2013-04-08 21:51:16 +03:00
|
|
|
movdqu STATE1, 0x00(OUTP)
|
|
|
|
|
2020-12-31 17:41:55 +01:00
|
|
|
movdqu 0x10(OUTP), IN
|
|
|
|
pxor IN, STATE2
|
2013-04-08 21:51:16 +03:00
|
|
|
movdqu STATE2, 0x10(OUTP)
|
|
|
|
|
2020-12-31 17:41:55 +01:00
|
|
|
movdqu 0x20(OUTP), IN
|
|
|
|
pxor IN, STATE3
|
2013-04-08 21:51:16 +03:00
|
|
|
movdqu STATE3, 0x20(OUTP)
|
|
|
|
|
2020-12-31 17:41:55 +01:00
|
|
|
movdqu 0x30(OUTP), IN
|
|
|
|
pxor IN, STATE4
|
2013-04-08 21:51:16 +03:00
|
|
|
movdqu STATE4, 0x30(OUTP)
|
|
|
|
|
2024-04-12 17:09:47 -07:00
|
|
|
_aesni_gf128mul_x_ble
|
2013-04-08 21:51:16 +03:00
|
|
|
|
2020-12-31 17:41:54 +01:00
|
|
|
add $64, INP
|
|
|
|
add $64, OUTP
|
2020-12-31 17:41:55 +01:00
|
|
|
test LEN, LEN
|
2024-04-12 17:09:47 -07:00
|
|
|
jnz .Lxts_loop4\@
|
2020-12-31 17:41:54 +01:00
|
|
|
|
2024-04-12 17:09:47 -07:00
|
|
|
.Lxts_ret_iv\@:
|
2013-04-08 21:51:16 +03:00
|
|
|
movups IV, (IVP)
|
|
|
|
|
2024-04-12 17:09:47 -07:00
|
|
|
.Lxts_ret\@:
|
2020-12-31 17:41:55 +01:00
|
|
|
#ifndef __x86_64__
|
|
|
|
popl KLEN
|
|
|
|
popl KEYP
|
|
|
|
popl LEN
|
|
|
|
popl IVP
|
|
|
|
#endif
|
2020-12-31 17:41:54 +01:00
|
|
|
FRAME_END
|
2021-12-04 14:43:40 +01:00
|
|
|
RET
|
2020-12-31 17:41:55 +01:00
|
|
|
|
2024-04-12 17:09:47 -07:00
|
|
|
.Lxts_1x\@:
|
2020-12-31 17:41:55 +01:00
|
|
|
add $64, LEN
|
2024-04-12 17:09:47 -07:00
|
|
|
jz .Lxts_ret_iv\@
|
|
|
|
.if \enc
|
2020-12-31 17:41:55 +01:00
|
|
|
sub $16, LEN
|
2024-04-12 17:09:47 -07:00
|
|
|
jl .Lxts_cts4\@
|
|
|
|
.endif
|
2020-12-31 17:41:55 +01:00
|
|
|
|
2024-04-12 17:09:47 -07:00
|
|
|
.Lxts_loop1\@:
|
2020-12-31 17:41:55 +01:00
|
|
|
movdqu (INP), STATE
|
2024-04-12 17:09:47 -07:00
|
|
|
.if \enc
|
2020-12-31 17:41:55 +01:00
|
|
|
pxor IV, STATE
|
|
|
|
call _aesni_enc1
|
2024-04-12 17:09:47 -07:00
|
|
|
.else
|
|
|
|
add $16, INP
|
|
|
|
sub $16, LEN
|
|
|
|
jl .Lxts_cts1\@
|
2020-12-31 17:41:55 +01:00
|
|
|
pxor IV, STATE
|
2024-04-12 17:09:47 -07:00
|
|
|
call _aesni_dec1
|
|
|
|
.endif
|
|
|
|
pxor IV, STATE
|
|
|
|
_aesni_gf128mul_x_ble
|
2020-12-31 17:41:55 +01:00
|
|
|
|
|
|
|
test LEN, LEN
|
2024-04-12 17:09:47 -07:00
|
|
|
jz .Lxts_out\@
|
2020-12-31 17:41:55 +01:00
|
|
|
|
2024-04-12 17:09:47 -07:00
|
|
|
.if \enc
|
2020-12-31 17:41:55 +01:00
|
|
|
add $16, INP
|
|
|
|
sub $16, LEN
|
2024-04-12 17:09:47 -07:00
|
|
|
jl .Lxts_cts1\@
|
|
|
|
.endif
|
2020-12-31 17:41:55 +01:00
|
|
|
|
|
|
|
movdqu STATE, (OUTP)
|
|
|
|
add $16, OUTP
|
2024-04-12 17:09:47 -07:00
|
|
|
jmp .Lxts_loop1\@
|
2020-12-31 17:41:55 +01:00
|
|
|
|
2024-04-12 17:09:47 -07:00
|
|
|
.Lxts_out\@:
|
2020-12-31 17:41:55 +01:00
|
|
|
movdqu STATE, (OUTP)
|
2024-04-12 17:09:47 -07:00
|
|
|
jmp .Lxts_ret_iv\@
|
2020-12-31 17:41:55 +01:00
|
|
|
|
2024-04-12 17:09:47 -07:00
|
|
|
.if \enc
|
|
|
|
.Lxts_cts4\@:
|
2020-12-31 17:41:55 +01:00
|
|
|
movdqa STATE4, STATE
|
|
|
|
sub $16, OUTP
|
2024-04-12 17:09:47 -07:00
|
|
|
.Lxts_cts1\@:
|
|
|
|
.else
|
|
|
|
.Lxts_cts1\@:
|
|
|
|
movdqa IV, STATE4
|
|
|
|
_aesni_gf128mul_x_ble
|
2020-12-31 17:41:55 +01:00
|
|
|
|
2024-04-12 17:09:47 -07:00
|
|
|
pxor IV, STATE
|
|
|
|
call _aesni_dec1
|
|
|
|
pxor IV, STATE
|
|
|
|
.endif
|
2020-12-31 17:41:55 +01:00
|
|
|
#ifndef __x86_64__
|
|
|
|
lea .Lcts_permute_table, T1
|
|
|
|
#else
|
|
|
|
lea .Lcts_permute_table(%rip), T1
|
|
|
|
#endif
|
|
|
|
add LEN, INP /* rewind input pointer */
|
|
|
|
add $16, LEN /* # bytes in final block */
|
|
|
|
movups (INP), IN1
|
|
|
|
|
|
|
|
mov T1, IVP
|
|
|
|
add $32, IVP
|
|
|
|
add LEN, T1
|
|
|
|
sub LEN, IVP
|
|
|
|
add OUTP, LEN
|
|
|
|
|
|
|
|
movups (T1), %xmm4
|
|
|
|
movaps STATE, IN2
|
|
|
|
pshufb %xmm4, STATE
|
|
|
|
movups STATE, (LEN)
|
|
|
|
|
|
|
|
movups (IVP), %xmm0
|
|
|
|
pshufb %xmm0, IN1
|
|
|
|
pblendvb IN2, IN1
|
|
|
|
movaps IN1, STATE
|
|
|
|
|
2024-04-12 17:09:47 -07:00
|
|
|
.if \enc
|
2020-12-31 17:41:55 +01:00
|
|
|
pxor IV, STATE
|
|
|
|
call _aesni_enc1
|
|
|
|
pxor IV, STATE
|
2024-04-12 17:09:47 -07:00
|
|
|
.else
|
|
|
|
pxor STATE4, STATE
|
|
|
|
call _aesni_dec1
|
|
|
|
pxor STATE4, STATE
|
|
|
|
.endif
|
2020-12-31 17:41:55 +01:00
|
|
|
|
|
|
|
movups STATE, (OUTP)
|
2024-04-12 17:09:47 -07:00
|
|
|
jmp .Lxts_ret\@
|
|
|
|
.endm
|
|
|
|
|
|
|
|
/*
|
|
|
|
* void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst,
|
|
|
|
* const u8 *src, unsigned int len, le128 *iv)
|
|
|
|
*/
|
|
|
|
SYM_FUNC_START(aesni_xts_enc)
|
|
|
|
_aesni_xts_crypt 1
|
2024-04-07 17:22:31 -04:00
|
|
|
SYM_FUNC_END(aesni_xts_enc)
|
2020-12-31 17:41:54 +01:00
|
|
|
|
|
|
|
/*
|
2024-04-07 17:22:31 -04:00
|
|
|
* void aesni_xts_dec(const struct crypto_aes_ctx *ctx, u8 *dst,
|
|
|
|
* const u8 *src, unsigned int len, le128 *iv)
|
2020-12-31 17:41:54 +01:00
|
|
|
*/
|
2024-04-07 17:22:31 -04:00
|
|
|
SYM_FUNC_START(aesni_xts_dec)
|
2024-04-12 17:09:47 -07:00
|
|
|
_aesni_xts_crypt 0
|
2024-04-07 17:22:31 -04:00
|
|
|
SYM_FUNC_END(aesni_xts_dec)
|