crypto: tegra - Fix HASH intermediate result handling

The intermediate hash values generated during an update task were
handled incorrectly in the driver. The values have a defined format for
each algorithm. Copying and pasting from the HASH_RESULT register
balantly would not work for all the supported algorithms. This incorrect
handling causes failures when there is a context switch between multiple
operations.

To handle the expected format correctly, add a separate buffer for
storing the intermediate results for each request. Remove the previous
copy/paste functions which read/wrote to the registers directly. Instead
configure the hardware to get the intermediate result copied to the
buffer and use host1x path to restore the intermediate hash results.

Fixes: 0880bb3b00 ("crypto: tegra - Add Tegra Security Engine driver")
Signed-off-by: Akhil R <akhilrajeev@nvidia.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
Akhil R 2025-02-24 14:46:06 +05:30 committed by Herbert Xu
parent 97ee15ea10
commit ff4b7df0b5
2 changed files with 98 additions and 52 deletions

View file

@ -34,6 +34,7 @@ struct tegra_sha_reqctx {
struct tegra_se_datbuf datbuf;
struct tegra_se_datbuf residue;
struct tegra_se_datbuf digest;
struct tegra_se_datbuf intr_res;
unsigned int alg;
unsigned int config;
unsigned int total_len;
@ -211,9 +212,62 @@ static int tegra_sha_fallback_export(struct ahash_request *req, void *out)
return crypto_ahash_export(&rctx->fallback_req, out);
}
static int tegra_sha_prep_cmd(struct tegra_se *se, u32 *cpuvaddr,
static int tegra_se_insert_hash_result(struct tegra_sha_ctx *ctx, u32 *cpuvaddr,
struct tegra_sha_reqctx *rctx)
{
__be32 *res_be = (__be32 *)rctx->intr_res.buf;
u32 *res = (u32 *)rctx->intr_res.buf;
int i = 0, j;
cpuvaddr[i++] = 0;
cpuvaddr[i++] = host1x_opcode_setpayload(HASH_RESULT_REG_COUNT);
cpuvaddr[i++] = se_host1x_opcode_incr_w(SE_SHA_HASH_RESULT);
for (j = 0; j < HASH_RESULT_REG_COUNT; j++) {
int idx = j;
/*
* The initial, intermediate and final hash value of SHA-384, SHA-512
* in SHA_HASH_RESULT registers follow the below layout of bytes.
*
* +---------------+------------+
* | HASH_RESULT_0 | B4...B7 |
* +---------------+------------+
* | HASH_RESULT_1 | B0...B3 |
* +---------------+------------+
* | HASH_RESULT_2 | B12...B15 |
* +---------------+------------+
* | HASH_RESULT_3 | B8...B11 |
* +---------------+------------+
* | ...... |
* +---------------+------------+
* | HASH_RESULT_14| B60...B63 |
* +---------------+------------+
* | HASH_RESULT_15| B56...B59 |
* +---------------+------------+
*
*/
if (ctx->alg == SE_ALG_SHA384 || ctx->alg == SE_ALG_SHA512)
idx = (j % 2) ? j - 1 : j + 1;
/* For SHA-1, SHA-224, SHA-256, SHA-384, SHA-512 the initial
* intermediate and final hash value when stored in
* SHA_HASH_RESULT registers, the byte order is NOT in
* little-endian.
*/
if (ctx->alg <= SE_ALG_SHA512)
cpuvaddr[i++] = be32_to_cpu(res_be[idx]);
else
cpuvaddr[i++] = res[idx];
}
return i;
}
static int tegra_sha_prep_cmd(struct tegra_sha_ctx *ctx, u32 *cpuvaddr,
struct tegra_sha_reqctx *rctx)
{
struct tegra_se *se = ctx->se;
u64 msg_len, msg_left;
int i = 0;
@ -241,7 +295,7 @@ static int tegra_sha_prep_cmd(struct tegra_se *se, u32 *cpuvaddr,
cpuvaddr[i++] = upper_32_bits(msg_left);
cpuvaddr[i++] = 0;
cpuvaddr[i++] = 0;
cpuvaddr[i++] = host1x_opcode_setpayload(6);
cpuvaddr[i++] = host1x_opcode_setpayload(2);
cpuvaddr[i++] = se_host1x_opcode_incr_w(SE_SHA_CFG);
cpuvaddr[i++] = rctx->config;
@ -249,15 +303,29 @@ static int tegra_sha_prep_cmd(struct tegra_se *se, u32 *cpuvaddr,
cpuvaddr[i++] = SE_SHA_TASK_HASH_INIT;
rctx->task &= ~SHA_FIRST;
} else {
cpuvaddr[i++] = 0;
/*
* If it isn't the first task, program the HASH_RESULT register
* with the intermediate result from the previous task
*/
i += tegra_se_insert_hash_result(ctx, cpuvaddr + i, rctx);
}
cpuvaddr[i++] = host1x_opcode_setpayload(4);
cpuvaddr[i++] = se_host1x_opcode_incr_w(SE_SHA_IN_ADDR);
cpuvaddr[i++] = rctx->datbuf.addr;
cpuvaddr[i++] = (u32)(SE_ADDR_HI_MSB(upper_32_bits(rctx->datbuf.addr)) |
SE_ADDR_HI_SZ(rctx->datbuf.size));
cpuvaddr[i++] = rctx->digest.addr;
cpuvaddr[i++] = (u32)(SE_ADDR_HI_MSB(upper_32_bits(rctx->digest.addr)) |
SE_ADDR_HI_SZ(rctx->digest.size));
if (rctx->task & SHA_UPDATE) {
cpuvaddr[i++] = rctx->intr_res.addr;
cpuvaddr[i++] = (u32)(SE_ADDR_HI_MSB(upper_32_bits(rctx->intr_res.addr)) |
SE_ADDR_HI_SZ(rctx->intr_res.size));
} else {
cpuvaddr[i++] = rctx->digest.addr;
cpuvaddr[i++] = (u32)(SE_ADDR_HI_MSB(upper_32_bits(rctx->digest.addr)) |
SE_ADDR_HI_SZ(rctx->digest.size));
}
if (rctx->key_id) {
cpuvaddr[i++] = host1x_opcode_setpayload(1);
cpuvaddr[i++] = se_host1x_opcode_nonincr_w(SE_SHA_CRYPTO_CFG);
@ -266,36 +334,18 @@ static int tegra_sha_prep_cmd(struct tegra_se *se, u32 *cpuvaddr,
cpuvaddr[i++] = host1x_opcode_setpayload(1);
cpuvaddr[i++] = se_host1x_opcode_nonincr_w(SE_SHA_OPERATION);
cpuvaddr[i++] = SE_SHA_OP_WRSTALL |
SE_SHA_OP_START |
cpuvaddr[i++] = SE_SHA_OP_WRSTALL | SE_SHA_OP_START |
SE_SHA_OP_LASTBUF;
cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1);
cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) |
host1x_uclass_incr_syncpt_indx_f(se->syncpt_id);
dev_dbg(se->dev, "msg len %llu msg left %llu cfg %#x",
msg_len, msg_left, rctx->config);
dev_dbg(se->dev, "msg len %llu msg left %llu sz %lu cfg %#x",
msg_len, msg_left, rctx->datbuf.size, rctx->config);
return i;
}
static void tegra_sha_copy_hash_result(struct tegra_se *se, struct tegra_sha_reqctx *rctx)
{
int i;
for (i = 0; i < HASH_RESULT_REG_COUNT; i++)
rctx->result[i] = readl(se->base + se->hw->regs->result + (i * 4));
}
static void tegra_sha_paste_hash_result(struct tegra_se *se, struct tegra_sha_reqctx *rctx)
{
int i;
for (i = 0; i < HASH_RESULT_REG_COUNT; i++)
writel(rctx->result[i],
se->base + se->hw->regs->result + (i * 4));
}
static int tegra_sha_do_init(struct ahash_request *req)
{
struct tegra_sha_reqctx *rctx = ahash_request_ctx(req);
@ -325,8 +375,17 @@ static int tegra_sha_do_init(struct ahash_request *req)
if (!rctx->residue.buf)
goto resbuf_fail;
rctx->intr_res.size = HASH_RESULT_REG_COUNT * 4;
rctx->intr_res.buf = dma_alloc_coherent(se->dev, rctx->intr_res.size,
&rctx->intr_res.addr, GFP_KERNEL);
if (!rctx->intr_res.buf)
goto intr_res_fail;
return 0;
intr_res_fail:
dma_free_coherent(se->dev, rctx->residue.size, rctx->residue.buf,
rctx->residue.addr);
resbuf_fail:
dma_free_coherent(se->dev, rctx->digest.size, rctx->digest.buf,
rctx->digest.addr);
@ -356,7 +415,6 @@ static int tegra_sha_do_update(struct ahash_request *req)
rctx->src_sg = req->src;
rctx->datbuf.size = (req->nbytes + rctx->residue.size) - nresidue;
rctx->total_len += rctx->datbuf.size;
/*
* If nbytes are less than a block size, copy it residue and
@ -365,12 +423,12 @@ static int tegra_sha_do_update(struct ahash_request *req)
if (nblks < 1) {
scatterwalk_map_and_copy(rctx->residue.buf + rctx->residue.size,
rctx->src_sg, 0, req->nbytes, 0);
rctx->residue.size += req->nbytes;
return 0;
}
rctx->datbuf.buf = dma_alloc_coherent(ctx->se->dev, rctx->datbuf.size,
rctx->datbuf.buf = dma_alloc_coherent(se->dev, rctx->datbuf.size,
&rctx->datbuf.addr, GFP_KERNEL);
if (!rctx->datbuf.buf)
return -ENOMEM;
@ -387,31 +445,15 @@ static int tegra_sha_do_update(struct ahash_request *req)
/* Update residue value with the residue after current block */
rctx->residue.size = nresidue;
rctx->total_len += rctx->datbuf.size;
rctx->config = tegra_sha_get_config(rctx->alg) |
SE_SHA_DST_HASH_REG;
/*
* If this is not the first 'update' call, paste the previous copied
* intermediate results to the registers so that it gets picked up.
* This is to support the import/export functionality.
*/
if (!(rctx->task & SHA_FIRST))
tegra_sha_paste_hash_result(se, rctx);
size = tegra_sha_prep_cmd(se, cpuvaddr, rctx);
SE_SHA_DST_MEMORY;
size = tegra_sha_prep_cmd(ctx, cpuvaddr, rctx);
ret = tegra_se_host1x_submit(se, se->cmdbuf, size);
/*
* If this is not the final update, copy the intermediate results
* from the registers so that it can be used in the next 'update'
* call. This is to support the import/export functionality.
*/
if (!(rctx->task & SHA_FINAL))
tegra_sha_copy_hash_result(se, rctx);
dma_free_coherent(ctx->se->dev, rctx->datbuf.size,
dma_free_coherent(se->dev, rctx->datbuf.size,
rctx->datbuf.buf, rctx->datbuf.addr);
return ret;
@ -443,8 +485,7 @@ static int tegra_sha_do_final(struct ahash_request *req)
rctx->config = tegra_sha_get_config(rctx->alg) |
SE_SHA_DST_MEMORY;
size = tegra_sha_prep_cmd(se, cpuvaddr, rctx);
size = tegra_sha_prep_cmd(ctx, cpuvaddr, rctx);
ret = tegra_se_host1x_submit(se, se->cmdbuf, size);
if (ret)
goto out;
@ -461,6 +502,10 @@ out_free:
rctx->residue.buf, rctx->residue.addr);
dma_free_coherent(se->dev, rctx->digest.size, rctx->digest.buf,
rctx->digest.addr);
dma_free_coherent(se->dev, rctx->intr_res.size, rctx->intr_res.buf,
rctx->intr_res.addr);
return ret;
}

View file

@ -24,6 +24,7 @@
#define SE_STREAM_ID 0x90
#define SE_SHA_CFG 0x4004
#define SE_SHA_IN_ADDR 0x400c
#define SE_SHA_KEY_ADDR 0x4094
#define SE_SHA_KEY_DATA 0x4098
#define SE_SHA_KEYMANIFEST 0x409c