2019-05-27 08:55:01 +02:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
2005-04-16 15:20:36 -07:00
|
|
|
/*
|
2007-12-07 18:52:49 +08:00
|
|
|
* Cryptographic scatter and gather helpers.
|
2005-04-16 15:20:36 -07:00
|
|
|
*
|
|
|
|
* Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
|
|
|
|
* Copyright (c) 2002 Adam J. Richter <adam@yggdrasil.com>
|
|
|
|
* Copyright (c) 2004 Jean-Luc Cooke <jlcooke@certainkey.com>
|
2007-12-07 18:52:49 +08:00
|
|
|
* Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au>
|
2005-04-16 15:20:36 -07:00
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _CRYPTO_SCATTERWALK_H
|
|
|
|
#define _CRYPTO_SCATTERWALK_H
|
2006-08-12 21:56:17 +10:00
|
|
|
|
2025-04-27 08:42:54 +08:00
|
|
|
#include <linux/errno.h>
|
2007-12-07 18:52:49 +08:00
|
|
|
#include <linux/highmem.h>
|
2021-12-10 16:30:09 +02:00
|
|
|
#include <linux/mm.h>
|
2006-08-12 21:56:17 +10:00
|
|
|
#include <linux/scatterlist.h>
|
2025-04-27 08:42:54 +08:00
|
|
|
#include <linux/types.h>
|
|
|
|
|
|
|
|
struct scatter_walk {
|
|
|
|
/* Must be the first member, see struct skcipher_walk. */
|
|
|
|
union {
|
|
|
|
void *const addr;
|
|
|
|
|
|
|
|
/* Private API field, do not touch. */
|
|
|
|
union crypto_no_such_thing *__addr;
|
|
|
|
};
|
|
|
|
struct scatterlist *sg;
|
|
|
|
unsigned int offset;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct skcipher_walk {
|
|
|
|
union {
|
|
|
|
/* Virtual address of the source. */
|
|
|
|
struct {
|
|
|
|
struct {
|
|
|
|
const void *const addr;
|
|
|
|
} virt;
|
|
|
|
} src;
|
|
|
|
|
|
|
|
/* Private field for the API, do not use. */
|
|
|
|
struct scatter_walk in;
|
|
|
|
};
|
|
|
|
|
|
|
|
union {
|
|
|
|
/* Virtual address of the destination. */
|
|
|
|
struct {
|
|
|
|
struct {
|
|
|
|
void *const addr;
|
|
|
|
} virt;
|
|
|
|
} dst;
|
|
|
|
|
|
|
|
/* Private field for the API, do not use. */
|
|
|
|
struct scatter_walk out;
|
|
|
|
};
|
|
|
|
|
|
|
|
unsigned int nbytes;
|
|
|
|
unsigned int total;
|
|
|
|
|
|
|
|
u8 *page;
|
|
|
|
u8 *buffer;
|
|
|
|
u8 *oiv;
|
|
|
|
void *iv;
|
|
|
|
|
|
|
|
unsigned int ivsize;
|
|
|
|
|
|
|
|
int flags;
|
|
|
|
unsigned int blocksize;
|
|
|
|
unsigned int stride;
|
|
|
|
unsigned int alignmask;
|
|
|
|
};
|
2005-04-16 15:20:36 -07:00
|
|
|
|
2010-11-22 11:25:50 +01:00
|
|
|
static inline void scatterwalk_crypto_chain(struct scatterlist *head,
|
2018-07-23 10:01:33 -07:00
|
|
|
struct scatterlist *sg, int num)
|
2010-11-22 11:25:50 +01:00
|
|
|
{
|
|
|
|
if (sg)
|
2015-08-07 18:15:13 +02:00
|
|
|
sg_chain(head, num, sg);
|
2010-11-22 11:25:50 +01:00
|
|
|
else
|
|
|
|
sg_mark_end(head);
|
|
|
|
}
|
|
|
|
|
crypto: scatterwalk - move to next sg entry just in time
The scatterwalk_* functions are designed to advance to the next sg entry
only when there is more data from the request to process. Compared to
the alternative of advancing after each step if !sg_is_last(sg), this
has the advantage that it doesn't cause problems if users accidentally
don't terminate their scatterlist with the end marker (which is an easy
mistake to make, and there are examples of this).
Currently, the advance to the next sg entry happens in
scatterwalk_done(), which is called after each "step" of the walk. It
requires the caller to pass in a boolean 'more' that indicates whether
there is more data. This works when the caller immediately knows
whether there is more data, though it adds some complexity. However in
the case of scatterwalk_copychunks() it's not immediately known whether
there is more data, so the call to scatterwalk_done() has to happen
higher up the stack. This is error-prone, and indeed the needed call to
scatterwalk_done() is not always made, e.g. scatterwalk_copychunks() is
sometimes called multiple times in a row. This causes a zero-length
step to get added in some cases, which is unexpected and seems to work
only by accident.
This patch begins the switch to a less error-prone approach where the
advance to the next sg entry happens just in time instead. For now,
that means just doing the advance in scatterwalk_clamp() if it's needed
there. Initially this is redundant, but it's needed to keep the tree in
a working state as later patches change things to the final state.
Later patches will similarly move the dcache flushing logic out of
scatterwalk_done() and then remove scatterwalk_done() entirely.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2025-02-19 10:23:23 -08:00
|
|
|
static inline void scatterwalk_start(struct scatter_walk *walk,
|
|
|
|
struct scatterlist *sg)
|
|
|
|
{
|
|
|
|
walk->sg = sg;
|
|
|
|
walk->offset = sg->offset;
|
|
|
|
}
|
|
|
|
|
2025-02-19 10:23:24 -08:00
|
|
|
/*
|
|
|
|
* This is equivalent to scatterwalk_start(walk, sg) followed by
|
|
|
|
* scatterwalk_skip(walk, pos).
|
|
|
|
*/
|
|
|
|
static inline void scatterwalk_start_at_pos(struct scatter_walk *walk,
|
|
|
|
struct scatterlist *sg,
|
|
|
|
unsigned int pos)
|
|
|
|
{
|
|
|
|
while (pos > sg->length) {
|
|
|
|
pos -= sg->length;
|
|
|
|
sg = sg_next(sg);
|
|
|
|
}
|
|
|
|
walk->sg = sg;
|
|
|
|
walk->offset = sg->offset + pos;
|
|
|
|
}
|
|
|
|
|
2005-07-06 13:51:31 -07:00
|
|
|
static inline unsigned int scatterwalk_clamp(struct scatter_walk *walk,
|
|
|
|
unsigned int nbytes)
|
2005-04-16 15:20:36 -07:00
|
|
|
{
|
crypto: scatterwalk - don't split at page boundaries when !HIGHMEM
When !HIGHMEM, the kmap_local_page() in the scatterlist walker does not
actually map anything, and the address it returns is just the address
from the kernel's direct map, where each sg entry's data is virtually
contiguous. To improve performance, stop unnecessarily clamping data
segments to page boundaries in this case.
For now, still limit segments to PAGE_SIZE. This is needed to prevent
preemption from being disabled for too long when SIMD is used, and to
support the alignmask case which still uses a page-sized bounce buffer.
Even so, this change still helps a lot in cases where messages cross a
page boundary. For example, testing IPsec with AES-GCM on x86_64, the
messages are 1424 bytes which is less than PAGE_SIZE, but on the Rx side
over a third cross a page boundary. These ended up being processed in
three parts, with the middle part going through skcipher_next_slow which
uses a 16-byte bounce buffer. That was causing a significant amount of
overhead which unnecessarily reduced the performance benefit of the new
x86_64 AES-GCM assembly code. This change solves the problem; all these
messages now get passed to the assembly code in one part.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2025-02-19 10:23:41 -08:00
|
|
|
unsigned int len_this_sg;
|
|
|
|
unsigned int limit;
|
|
|
|
|
crypto: scatterwalk - move to next sg entry just in time
The scatterwalk_* functions are designed to advance to the next sg entry
only when there is more data from the request to process. Compared to
the alternative of advancing after each step if !sg_is_last(sg), this
has the advantage that it doesn't cause problems if users accidentally
don't terminate their scatterlist with the end marker (which is an easy
mistake to make, and there are examples of this).
Currently, the advance to the next sg entry happens in
scatterwalk_done(), which is called after each "step" of the walk. It
requires the caller to pass in a boolean 'more' that indicates whether
there is more data. This works when the caller immediately knows
whether there is more data, though it adds some complexity. However in
the case of scatterwalk_copychunks() it's not immediately known whether
there is more data, so the call to scatterwalk_done() has to happen
higher up the stack. This is error-prone, and indeed the needed call to
scatterwalk_done() is not always made, e.g. scatterwalk_copychunks() is
sometimes called multiple times in a row. This causes a zero-length
step to get added in some cases, which is unexpected and seems to work
only by accident.
This patch begins the switch to a less error-prone approach where the
advance to the next sg entry happens just in time instead. For now,
that means just doing the advance in scatterwalk_clamp() if it's needed
there. Initially this is redundant, but it's needed to keep the tree in
a working state as later patches change things to the final state.
Later patches will similarly move the dcache flushing logic out of
scatterwalk_done() and then remove scatterwalk_done() entirely.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2025-02-19 10:23:23 -08:00
|
|
|
if (walk->offset >= walk->sg->offset + walk->sg->length)
|
|
|
|
scatterwalk_start(walk, sg_next(walk->sg));
|
crypto: scatterwalk - don't split at page boundaries when !HIGHMEM
When !HIGHMEM, the kmap_local_page() in the scatterlist walker does not
actually map anything, and the address it returns is just the address
from the kernel's direct map, where each sg entry's data is virtually
contiguous. To improve performance, stop unnecessarily clamping data
segments to page boundaries in this case.
For now, still limit segments to PAGE_SIZE. This is needed to prevent
preemption from being disabled for too long when SIMD is used, and to
support the alignmask case which still uses a page-sized bounce buffer.
Even so, this change still helps a lot in cases where messages cross a
page boundary. For example, testing IPsec with AES-GCM on x86_64, the
messages are 1424 bytes which is less than PAGE_SIZE, but on the Rx side
over a third cross a page boundary. These ended up being processed in
three parts, with the middle part going through skcipher_next_slow which
uses a 16-byte bounce buffer. That was causing a significant amount of
overhead which unnecessarily reduced the performance benefit of the new
x86_64 AES-GCM assembly code. This change solves the problem; all these
messages now get passed to the assembly code in one part.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2025-02-19 10:23:41 -08:00
|
|
|
len_this_sg = walk->sg->offset + walk->sg->length - walk->offset;
|
2005-04-16 15:20:36 -07:00
|
|
|
|
crypto: scatterwalk - don't split at page boundaries when !HIGHMEM
When !HIGHMEM, the kmap_local_page() in the scatterlist walker does not
actually map anything, and the address it returns is just the address
from the kernel's direct map, where each sg entry's data is virtually
contiguous. To improve performance, stop unnecessarily clamping data
segments to page boundaries in this case.
For now, still limit segments to PAGE_SIZE. This is needed to prevent
preemption from being disabled for too long when SIMD is used, and to
support the alignmask case which still uses a page-sized bounce buffer.
Even so, this change still helps a lot in cases where messages cross a
page boundary. For example, testing IPsec with AES-GCM on x86_64, the
messages are 1424 bytes which is less than PAGE_SIZE, but on the Rx side
over a third cross a page boundary. These ended up being processed in
three parts, with the middle part going through skcipher_next_slow which
uses a 16-byte bounce buffer. That was causing a significant amount of
overhead which unnecessarily reduced the performance benefit of the new
x86_64 AES-GCM assembly code. This change solves the problem; all these
messages now get passed to the assembly code in one part.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2025-02-19 10:23:41 -08:00
|
|
|
/*
|
|
|
|
* HIGHMEM case: the page may have to be mapped into memory. To avoid
|
|
|
|
* the complexity of having to map multiple pages at once per sg entry,
|
|
|
|
* clamp the returned length to not cross a page boundary.
|
|
|
|
*
|
|
|
|
* !HIGHMEM case: no mapping is needed; all pages of the sg entry are
|
|
|
|
* already mapped contiguously in the kernel's direct map. For improved
|
|
|
|
* performance, allow the walker to return data segments that cross a
|
|
|
|
* page boundary. Do still cap the length to PAGE_SIZE, since some
|
|
|
|
* users rely on that to avoid disabling preemption for too long when
|
|
|
|
* using SIMD. It's also needed for when skcipher_walk uses a bounce
|
|
|
|
* page due to the data not being aligned to the algorithm's alignmask.
|
|
|
|
*/
|
|
|
|
if (IS_ENABLED(CONFIG_HIGHMEM))
|
|
|
|
limit = PAGE_SIZE - offset_in_page(walk->offset);
|
|
|
|
else
|
|
|
|
limit = PAGE_SIZE;
|
|
|
|
|
|
|
|
return min3(nbytes, len_this_sg, limit);
|
2006-08-12 21:56:17 +10:00
|
|
|
}
|
|
|
|
|
2025-02-19 10:23:27 -08:00
|
|
|
/*
|
|
|
|
* Create a scatterlist that represents the remaining data in a walk. Uses
|
|
|
|
* chaining to reference the original scatterlist, so this uses at most two
|
|
|
|
* entries in @sg_out regardless of the number of entries in the original list.
|
|
|
|
* Assumes that sg_init_table() was already done.
|
|
|
|
*/
|
|
|
|
static inline void scatterwalk_get_sglist(struct scatter_walk *walk,
|
|
|
|
struct scatterlist sg_out[2])
|
|
|
|
{
|
|
|
|
if (walk->offset >= walk->sg->offset + walk->sg->length)
|
|
|
|
scatterwalk_start(walk, sg_next(walk->sg));
|
|
|
|
sg_set_page(sg_out, sg_page(walk->sg),
|
|
|
|
walk->sg->offset + walk->sg->length - walk->offset,
|
|
|
|
walk->offset);
|
|
|
|
scatterwalk_crypto_chain(sg_out, sg_next(walk->sg), 2);
|
|
|
|
}
|
|
|
|
|
2025-03-10 10:20:16 -07:00
|
|
|
static inline void scatterwalk_map(struct scatter_walk *walk)
|
2016-07-12 13:18:00 +08:00
|
|
|
{
|
crypto: scatterwalk - don't split at page boundaries when !HIGHMEM
When !HIGHMEM, the kmap_local_page() in the scatterlist walker does not
actually map anything, and the address it returns is just the address
from the kernel's direct map, where each sg entry's data is virtually
contiguous. To improve performance, stop unnecessarily clamping data
segments to page boundaries in this case.
For now, still limit segments to PAGE_SIZE. This is needed to prevent
preemption from being disabled for too long when SIMD is used, and to
support the alignmask case which still uses a page-sized bounce buffer.
Even so, this change still helps a lot in cases where messages cross a
page boundary. For example, testing IPsec with AES-GCM on x86_64, the
messages are 1424 bytes which is less than PAGE_SIZE, but on the Rx side
over a third cross a page boundary. These ended up being processed in
three parts, with the middle part going through skcipher_next_slow which
uses a 16-byte bounce buffer. That was causing a significant amount of
overhead which unnecessarily reduced the performance benefit of the new
x86_64 AES-GCM assembly code. This change solves the problem; all these
messages now get passed to the assembly code in one part.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2025-02-19 10:23:41 -08:00
|
|
|
struct page *base_page = sg_page(walk->sg);
|
2025-03-14 11:27:20 +08:00
|
|
|
unsigned int offset = walk->offset;
|
|
|
|
void *addr;
|
crypto: scatterwalk - don't split at page boundaries when !HIGHMEM
When !HIGHMEM, the kmap_local_page() in the scatterlist walker does not
actually map anything, and the address it returns is just the address
from the kernel's direct map, where each sg entry's data is virtually
contiguous. To improve performance, stop unnecessarily clamping data
segments to page boundaries in this case.
For now, still limit segments to PAGE_SIZE. This is needed to prevent
preemption from being disabled for too long when SIMD is used, and to
support the alignmask case which still uses a page-sized bounce buffer.
Even so, this change still helps a lot in cases where messages cross a
page boundary. For example, testing IPsec with AES-GCM on x86_64, the
messages are 1424 bytes which is less than PAGE_SIZE, but on the Rx side
over a third cross a page boundary. These ended up being processed in
three parts, with the middle part going through skcipher_next_slow which
uses a 16-byte bounce buffer. That was causing a significant amount of
overhead which unnecessarily reduced the performance benefit of the new
x86_64 AES-GCM assembly code. This change solves the problem; all these
messages now get passed to the assembly code in one part.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2025-02-19 10:23:41 -08:00
|
|
|
|
2025-03-10 10:20:16 -07:00
|
|
|
if (IS_ENABLED(CONFIG_HIGHMEM)) {
|
2025-03-14 11:27:20 +08:00
|
|
|
struct page *page;
|
|
|
|
|
|
|
|
page = nth_page(base_page, offset >> PAGE_SHIFT);
|
|
|
|
offset = offset_in_page(offset);
|
|
|
|
addr = kmap_local_page(page) + offset;
|
2025-03-10 10:20:16 -07:00
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* When !HIGHMEM we allow the walker to return segments that
|
|
|
|
* span a page boundary; see scatterwalk_clamp(). To make it
|
|
|
|
* clear that in this case we're working in the linear buffer of
|
|
|
|
* the whole sg entry in the kernel's direct map rather than
|
|
|
|
* within the mapped buffer of a single page, compute the
|
|
|
|
* address as an offset from the page_address() of the first
|
|
|
|
* page of the sg entry. Either way the result is the address
|
|
|
|
* in the direct map, but this makes it clearer what is really
|
|
|
|
* going on.
|
|
|
|
*/
|
2025-03-14 11:27:20 +08:00
|
|
|
addr = page_address(base_page) + offset;
|
2025-03-10 10:20:16 -07:00
|
|
|
}
|
2025-03-14 11:27:20 +08:00
|
|
|
|
|
|
|
walk->__addr = addr;
|
2016-07-12 13:18:00 +08:00
|
|
|
}
|
|
|
|
|
2025-02-19 10:23:25 -08:00
|
|
|
/**
|
|
|
|
* scatterwalk_next() - Get the next data buffer in a scatterlist walk
|
|
|
|
* @walk: the scatter_walk
|
|
|
|
* @total: the total number of bytes remaining, > 0
|
|
|
|
*
|
2025-03-08 20:45:21 +08:00
|
|
|
* A virtual address for the next segment of data from the scatterlist will
|
|
|
|
* be placed into @walk->addr. The caller must call scatterwalk_done_src()
|
|
|
|
* or scatterwalk_done_dst() when it is done using this virtual address.
|
|
|
|
*
|
|
|
|
* Returns: the next number of bytes available, <= @total
|
2025-02-19 10:23:25 -08:00
|
|
|
*/
|
2025-03-08 20:45:21 +08:00
|
|
|
static inline unsigned int scatterwalk_next(struct scatter_walk *walk,
|
|
|
|
unsigned int total)
|
2025-02-19 10:23:25 -08:00
|
|
|
{
|
2025-03-08 20:45:21 +08:00
|
|
|
unsigned int nbytes = scatterwalk_clamp(walk, total);
|
|
|
|
|
2025-03-10 10:20:16 -07:00
|
|
|
scatterwalk_map(walk);
|
2025-03-08 20:45:21 +08:00
|
|
|
return nbytes;
|
2025-02-19 10:23:25 -08:00
|
|
|
}
|
|
|
|
|
2025-03-10 10:20:16 -07:00
|
|
|
static inline void scatterwalk_unmap(struct scatter_walk *walk)
|
crypto: scatterwalk - don't split at page boundaries when !HIGHMEM
When !HIGHMEM, the kmap_local_page() in the scatterlist walker does not
actually map anything, and the address it returns is just the address
from the kernel's direct map, where each sg entry's data is virtually
contiguous. To improve performance, stop unnecessarily clamping data
segments to page boundaries in this case.
For now, still limit segments to PAGE_SIZE. This is needed to prevent
preemption from being disabled for too long when SIMD is used, and to
support the alignmask case which still uses a page-sized bounce buffer.
Even so, this change still helps a lot in cases where messages cross a
page boundary. For example, testing IPsec with AES-GCM on x86_64, the
messages are 1424 bytes which is less than PAGE_SIZE, but on the Rx side
over a third cross a page boundary. These ended up being processed in
three parts, with the middle part going through skcipher_next_slow which
uses a 16-byte bounce buffer. That was causing a significant amount of
overhead which unnecessarily reduced the performance benefit of the new
x86_64 AES-GCM assembly code. This change solves the problem; all these
messages now get passed to the assembly code in one part.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2025-02-19 10:23:41 -08:00
|
|
|
{
|
|
|
|
if (IS_ENABLED(CONFIG_HIGHMEM))
|
2025-03-10 10:20:16 -07:00
|
|
|
kunmap_local(walk->__addr);
|
crypto: scatterwalk - don't split at page boundaries when !HIGHMEM
When !HIGHMEM, the kmap_local_page() in the scatterlist walker does not
actually map anything, and the address it returns is just the address
from the kernel's direct map, where each sg entry's data is virtually
contiguous. To improve performance, stop unnecessarily clamping data
segments to page boundaries in this case.
For now, still limit segments to PAGE_SIZE. This is needed to prevent
preemption from being disabled for too long when SIMD is used, and to
support the alignmask case which still uses a page-sized bounce buffer.
Even so, this change still helps a lot in cases where messages cross a
page boundary. For example, testing IPsec with AES-GCM on x86_64, the
messages are 1424 bytes which is less than PAGE_SIZE, but on the Rx side
over a third cross a page boundary. These ended up being processed in
three parts, with the middle part going through skcipher_next_slow which
uses a 16-byte bounce buffer. That was causing a significant amount of
overhead which unnecessarily reduced the performance benefit of the new
x86_64 AES-GCM assembly code. This change solves the problem; all these
messages now get passed to the assembly code in one part.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2025-02-19 10:23:41 -08:00
|
|
|
}
|
|
|
|
|
2025-02-19 10:23:25 -08:00
|
|
|
static inline void scatterwalk_advance(struct scatter_walk *walk,
|
|
|
|
unsigned int nbytes)
|
|
|
|
{
|
|
|
|
walk->offset += nbytes;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* scatterwalk_done_src() - Finish one step of a walk of source scatterlist
|
|
|
|
* @walk: the scatter_walk
|
|
|
|
* @nbytes: the number of bytes processed this step, less than or equal to the
|
|
|
|
* number of bytes that scatterwalk_next() returned.
|
|
|
|
*
|
2025-03-08 20:45:21 +08:00
|
|
|
* Use this if the mapped address was not written to, i.e. it is source data.
|
2025-02-19 10:23:25 -08:00
|
|
|
*/
|
|
|
|
static inline void scatterwalk_done_src(struct scatter_walk *walk,
|
2025-03-08 20:45:21 +08:00
|
|
|
unsigned int nbytes)
|
2025-02-19 10:23:25 -08:00
|
|
|
{
|
2025-03-10 10:20:16 -07:00
|
|
|
scatterwalk_unmap(walk);
|
2025-02-19 10:23:25 -08:00
|
|
|
scatterwalk_advance(walk, nbytes);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* scatterwalk_done_dst() - Finish one step of a walk of destination scatterlist
|
|
|
|
* @walk: the scatter_walk
|
|
|
|
* @nbytes: the number of bytes processed this step, less than or equal to the
|
|
|
|
* number of bytes that scatterwalk_next() returned.
|
|
|
|
*
|
2025-03-08 20:45:21 +08:00
|
|
|
* Use this if the mapped address may have been written to, i.e. it is
|
|
|
|
* destination data.
|
2025-02-19 10:23:25 -08:00
|
|
|
*/
|
|
|
|
static inline void scatterwalk_done_dst(struct scatter_walk *walk,
|
2025-03-08 20:45:21 +08:00
|
|
|
unsigned int nbytes)
|
2025-02-19 10:23:25 -08:00
|
|
|
{
|
2025-03-10 10:20:16 -07:00
|
|
|
scatterwalk_unmap(walk);
|
2025-02-19 10:23:25 -08:00
|
|
|
/*
|
|
|
|
* Explicitly check ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE instead of just
|
|
|
|
* relying on flush_dcache_page() being a no-op when not implemented,
|
|
|
|
* since otherwise the BUG_ON in sg_page() does not get optimized out.
|
crypto: scatterwalk - don't split at page boundaries when !HIGHMEM
When !HIGHMEM, the kmap_local_page() in the scatterlist walker does not
actually map anything, and the address it returns is just the address
from the kernel's direct map, where each sg entry's data is virtually
contiguous. To improve performance, stop unnecessarily clamping data
segments to page boundaries in this case.
For now, still limit segments to PAGE_SIZE. This is needed to prevent
preemption from being disabled for too long when SIMD is used, and to
support the alignmask case which still uses a page-sized bounce buffer.
Even so, this change still helps a lot in cases where messages cross a
page boundary. For example, testing IPsec with AES-GCM on x86_64, the
messages are 1424 bytes which is less than PAGE_SIZE, but on the Rx side
over a third cross a page boundary. These ended up being processed in
three parts, with the middle part going through skcipher_next_slow which
uses a 16-byte bounce buffer. That was causing a significant amount of
overhead which unnecessarily reduced the performance benefit of the new
x86_64 AES-GCM assembly code. This change solves the problem; all these
messages now get passed to the assembly code in one part.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2025-02-19 10:23:41 -08:00
|
|
|
* This also avoids having to consider whether the loop would get
|
|
|
|
* reliably optimized out or not.
|
2025-02-19 10:23:25 -08:00
|
|
|
*/
|
crypto: scatterwalk - don't split at page boundaries when !HIGHMEM
When !HIGHMEM, the kmap_local_page() in the scatterlist walker does not
actually map anything, and the address it returns is just the address
from the kernel's direct map, where each sg entry's data is virtually
contiguous. To improve performance, stop unnecessarily clamping data
segments to page boundaries in this case.
For now, still limit segments to PAGE_SIZE. This is needed to prevent
preemption from being disabled for too long when SIMD is used, and to
support the alignmask case which still uses a page-sized bounce buffer.
Even so, this change still helps a lot in cases where messages cross a
page boundary. For example, testing IPsec with AES-GCM on x86_64, the
messages are 1424 bytes which is less than PAGE_SIZE, but on the Rx side
over a third cross a page boundary. These ended up being processed in
three parts, with the middle part going through skcipher_next_slow which
uses a 16-byte bounce buffer. That was causing a significant amount of
overhead which unnecessarily reduced the performance benefit of the new
x86_64 AES-GCM assembly code. This change solves the problem; all these
messages now get passed to the assembly code in one part.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2025-02-19 10:23:41 -08:00
|
|
|
if (ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE) {
|
2025-03-14 11:27:20 +08:00
|
|
|
struct page *base_page;
|
|
|
|
unsigned int offset;
|
|
|
|
int start, end, i;
|
crypto: scatterwalk - don't split at page boundaries when !HIGHMEM
When !HIGHMEM, the kmap_local_page() in the scatterlist walker does not
actually map anything, and the address it returns is just the address
from the kernel's direct map, where each sg entry's data is virtually
contiguous. To improve performance, stop unnecessarily clamping data
segments to page boundaries in this case.
For now, still limit segments to PAGE_SIZE. This is needed to prevent
preemption from being disabled for too long when SIMD is used, and to
support the alignmask case which still uses a page-sized bounce buffer.
Even so, this change still helps a lot in cases where messages cross a
page boundary. For example, testing IPsec with AES-GCM on x86_64, the
messages are 1424 bytes which is less than PAGE_SIZE, but on the Rx side
over a third cross a page boundary. These ended up being processed in
three parts, with the middle part going through skcipher_next_slow which
uses a 16-byte bounce buffer. That was causing a significant amount of
overhead which unnecessarily reduced the performance benefit of the new
x86_64 AES-GCM assembly code. This change solves the problem; all these
messages now get passed to the assembly code in one part.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2025-02-19 10:23:41 -08:00
|
|
|
|
|
|
|
base_page = sg_page(walk->sg);
|
2025-03-14 11:27:20 +08:00
|
|
|
offset = walk->offset;
|
|
|
|
start = offset >> PAGE_SHIFT;
|
|
|
|
end = start + (nbytes >> PAGE_SHIFT);
|
|
|
|
end += (offset_in_page(offset) + offset_in_page(nbytes) +
|
|
|
|
PAGE_SIZE - 1) >> PAGE_SHIFT;
|
|
|
|
for (i = start; i < end; i++)
|
|
|
|
flush_dcache_page(nth_page(base_page, i));
|
crypto: scatterwalk - don't split at page boundaries when !HIGHMEM
When !HIGHMEM, the kmap_local_page() in the scatterlist walker does not
actually map anything, and the address it returns is just the address
from the kernel's direct map, where each sg entry's data is virtually
contiguous. To improve performance, stop unnecessarily clamping data
segments to page boundaries in this case.
For now, still limit segments to PAGE_SIZE. This is needed to prevent
preemption from being disabled for too long when SIMD is used, and to
support the alignmask case which still uses a page-sized bounce buffer.
Even so, this change still helps a lot in cases where messages cross a
page boundary. For example, testing IPsec with AES-GCM on x86_64, the
messages are 1424 bytes which is less than PAGE_SIZE, but on the Rx side
over a third cross a page boundary. These ended up being processed in
three parts, with the middle part going through skcipher_next_slow which
uses a 16-byte bounce buffer. That was causing a significant amount of
overhead which unnecessarily reduced the performance benefit of the new
x86_64 AES-GCM assembly code. This change solves the problem; all these
messages now get passed to the assembly code in one part.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2025-02-19 10:23:41 -08:00
|
|
|
}
|
2025-02-19 10:23:25 -08:00
|
|
|
scatterwalk_advance(walk, nbytes);
|
|
|
|
}
|
|
|
|
|
2025-02-19 10:23:24 -08:00
|
|
|
void scatterwalk_skip(struct scatter_walk *walk, unsigned int nbytes);
|
|
|
|
|
2025-02-19 10:23:26 -08:00
|
|
|
void memcpy_from_scatterwalk(void *buf, struct scatter_walk *walk,
|
|
|
|
unsigned int nbytes);
|
|
|
|
|
|
|
|
void memcpy_to_scatterwalk(struct scatter_walk *walk, const void *buf,
|
|
|
|
unsigned int nbytes);
|
|
|
|
|
|
|
|
void memcpy_from_sglist(void *buf, struct scatterlist *sg,
|
|
|
|
unsigned int start, unsigned int nbytes);
|
|
|
|
|
|
|
|
void memcpy_to_sglist(struct scatterlist *sg, unsigned int start,
|
|
|
|
const void *buf, unsigned int nbytes);
|
|
|
|
|
2025-03-08 20:45:23 +08:00
|
|
|
void memcpy_sglist(struct scatterlist *dst, struct scatterlist *src,
|
|
|
|
unsigned int nbytes);
|
|
|
|
|
2025-02-19 10:23:26 -08:00
|
|
|
/* In new code, please use memcpy_{from,to}_sglist() directly instead. */
|
|
|
|
static inline void scatterwalk_map_and_copy(void *buf, struct scatterlist *sg,
|
|
|
|
unsigned int start,
|
|
|
|
unsigned int nbytes, int out)
|
|
|
|
{
|
|
|
|
if (out)
|
|
|
|
memcpy_to_sglist(sg, start, buf, nbytes);
|
|
|
|
else
|
|
|
|
memcpy_from_sglist(buf, sg, start, nbytes);
|
|
|
|
}
|
2007-08-29 16:31:34 +08:00
|
|
|
|
2015-05-21 15:10:59 +08:00
|
|
|
struct scatterlist *scatterwalk_ffwd(struct scatterlist dst[2],
|
|
|
|
struct scatterlist *src,
|
|
|
|
unsigned int len);
|
|
|
|
|
2025-04-27 08:42:54 +08:00
|
|
|
int skcipher_walk_first(struct skcipher_walk *walk, bool atomic);
|
|
|
|
int skcipher_walk_done(struct skcipher_walk *walk, int res);
|
|
|
|
|
|
|
|
static inline void skcipher_walk_abort(struct skcipher_walk *walk)
|
|
|
|
{
|
|
|
|
skcipher_walk_done(walk, -ECANCELED);
|
|
|
|
}
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
#endif /* _CRYPTO_SCATTERWALK_H */
|