aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-12-02 18:53:50 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-12-02 18:53:50 -0800
commitf617d24606553159a271f43e36d1c71a4c317e48 (patch)
treef39957a87580dcc1835e2c1f0bfad5adeef93483
parent906003e15160642658358153e7598302d1b38166 (diff)
parent5dc8d277520be6f0be11f36712e557167b3964c8 (diff)
downloadlinux-f617d24606553159a271f43e36d1c71a4c317e48.tar.gz
Merge tag 'fpsimd-on-stack-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux
Pull arm64 FPSIMD on-stack buffer updates from Eric Biggers: "This is a core arm64 change. However, I was asked to take this because most uses of kernel-mode FPSIMD are in crypto or CRC code. In v6.8, the size of task_struct on arm64 increased by 528 bytes due to the new 'kernel_fpsimd_state' field. This field was added to allow kernel-mode FPSIMD code to be preempted. Unfortunately, 528 bytes is kind of a lot for task_struct. This regression in the task_struct size was noticed and reported. Recover that space by making this state be allocated on the stack at the beginning of each kernel-mode FPSIMD section. To make it easier for all the users of kernel-mode FPSIMD to do that correctly, introduce and use a 'scoped_ksimd' abstraction" * tag 'fpsimd-on-stack-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux: (23 commits) lib/crypto: arm64: Move remaining algorithms to scoped ksimd API lib/crypto: arm/blake2b: Move to scoped ksimd API arm64/fpsimd: Allocate kernel mode FP/SIMD buffers on the stack arm64/fpu: Enforce task-context only for generic kernel mode FPU net/mlx5: Switch to more abstract scoped ksimd guard API on arm64 arm64/xorblocks: Switch to 'ksimd' scoped guard API crypto/arm64: sm4 - Switch to 'ksimd' scoped guard API crypto/arm64: sm3 - Switch to 'ksimd' scoped guard API crypto/arm64: sha3 - Switch to 'ksimd' scoped guard API crypto/arm64: polyval - Switch to 'ksimd' scoped guard API crypto/arm64: nhpoly1305 - Switch to 'ksimd' scoped guard API crypto/arm64: aes-gcm - Switch to 'ksimd' scoped guard API crypto/arm64: aes-blk - Switch to 'ksimd' scoped guard API crypto/arm64: aes-ccm - Switch to 'ksimd' scoped guard API raid6: Move to more abstract 'ksimd' guard API crypto: aegis128-neon - Move to more abstract 'ksimd' guard API crypto/arm64: sm4-ce-gcm - Avoid pointless yield of the NEON unit crypto/arm64: sm4-ce-ccm - Avoid pointless yield of the NEON unit crypto/arm64: aes-ce-ccm - Avoid pointless yield of the NEON unit lib/crc: Switch ARM and arm64 to 'ksimd' scoped guard API ...
-rw-r--r--arch/arm/include/asm/simd.h7
-rw-r--r--arch/arm64/crypto/aes-ce-ccm-glue.c116
-rw-r--r--arch/arm64/crypto/aes-ce-glue.c87
-rw-r--r--arch/arm64/crypto/aes-glue.c139
-rw-r--r--arch/arm64/crypto/aes-neonbs-glue.c150
-rw-r--r--arch/arm64/crypto/ghash-ce-glue.c27
-rw-r--r--arch/arm64/crypto/nhpoly1305-neon-glue.c5
-rw-r--r--arch/arm64/crypto/sm3-ce-glue.c15
-rw-r--r--arch/arm64/crypto/sm3-neon-glue.c16
-rw-r--r--arch/arm64/crypto/sm4-ce-ccm-glue.c49
-rw-r--r--arch/arm64/crypto/sm4-ce-cipher-glue.c10
-rw-r--r--arch/arm64/crypto/sm4-ce-gcm-glue.c62
-rw-r--r--arch/arm64/crypto/sm4-ce-glue.c214
-rw-r--r--arch/arm64/crypto/sm4-neon-glue.c25
-rw-r--r--arch/arm64/include/asm/fpu.h16
-rw-r--r--arch/arm64/include/asm/neon.h4
-rw-r--r--arch/arm64/include/asm/processor.h7
-rw-r--r--arch/arm64/include/asm/simd.h10
-rw-r--r--arch/arm64/include/asm/xor.h22
-rw-r--r--arch/arm64/kernel/fpsimd.c54
-rw-r--r--crypto/aegis128-neon.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wc.c19
-rw-r--r--lib/crc/arm/crc-t10dif.h19
-rw-r--r--lib/crc/arm/crc32.h11
-rw-r--r--lib/crc/arm64/crc-t10dif.h19
-rw-r--r--lib/crc/arm64/crc32.h16
-rw-r--r--lib/crypto/arm/blake2b.h5
-rw-r--r--lib/crypto/arm/chacha.h11
-rw-r--r--lib/crypto/arm/curve25519.h5
-rw-r--r--lib/crypto/arm/poly1305.h6
-rw-r--r--lib/crypto/arm/sha1.h13
-rw-r--r--lib/crypto/arm/sha256.h12
-rw-r--r--lib/crypto/arm/sha512.h5
-rw-r--r--lib/crypto/arm64/chacha.h11
-rw-r--r--lib/crypto/arm64/poly1305.h6
-rw-r--r--lib/crypto/arm64/polyval.h24
-rw-r--r--lib/crypto/arm64/sha1.h7
-rw-r--r--lib/crypto/arm64/sha256.h19
-rw-r--r--lib/crypto/arm64/sha3.h13
-rw-r--r--lib/crypto/arm64/sha512.h8
-rw-r--r--lib/raid6/neon.c17
-rw-r--r--lib/raid6/recov_neon.c15
42 files changed, 617 insertions, 712 deletions
diff --git a/arch/arm/include/asm/simd.h b/arch/arm/include/asm/simd.h
index be08a8da046f92..8549fa8b7253bd 100644
--- a/arch/arm/include/asm/simd.h
+++ b/arch/arm/include/asm/simd.h
@@ -2,14 +2,21 @@
#ifndef _ASM_SIMD_H
#define _ASM_SIMD_H
+#include <linux/cleanup.h>
#include <linux/compiler_attributes.h>
#include <linux/preempt.h>
#include <linux/types.h>
+#include <asm/neon.h>
+
static __must_check inline bool may_use_simd(void)
{
return IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && !in_hardirq()
&& !irqs_disabled();
}
+DEFINE_LOCK_GUARD_0(ksimd, kernel_neon_begin(), kernel_neon_end())
+
+#define scoped_ksimd() scoped_guard(ksimd)
+
#endif /* _ASM_SIMD_H */
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index 2d791d51891b46..c4fd648471f181 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -8,7 +8,6 @@
* Author: Ard Biesheuvel <ardb@kernel.org>
*/
-#include <asm/neon.h>
#include <linux/unaligned.h>
#include <crypto/aes.h>
#include <crypto/scatterwalk.h>
@@ -16,6 +15,8 @@
#include <crypto/internal/skcipher.h>
#include <linux/module.h>
+#include <asm/simd.h>
+
#include "aes-ce-setkey.h"
MODULE_IMPORT_NS("CRYPTO_INTERNAL");
@@ -114,11 +115,8 @@ static u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
in += adv;
abytes -= adv;
- if (unlikely(rem)) {
- kernel_neon_end();
- kernel_neon_begin();
+ if (unlikely(rem))
macp = 0;
- }
} else {
u32 l = min(AES_BLOCK_SIZE - macp, abytes);
@@ -187,40 +185,38 @@ static int ccm_encrypt(struct aead_request *req)
if (unlikely(err))
return err;
- kernel_neon_begin();
+ scoped_ksimd() {
+ if (req->assoclen)
+ ccm_calculate_auth_mac(req, mac);
- if (req->assoclen)
- ccm_calculate_auth_mac(req, mac);
+ do {
+ u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+ u8 buf[AES_BLOCK_SIZE];
+ u8 *final_iv = NULL;
- do {
- u32 tail = walk.nbytes % AES_BLOCK_SIZE;
- const u8 *src = walk.src.virt.addr;
- u8 *dst = walk.dst.virt.addr;
- u8 buf[AES_BLOCK_SIZE];
- u8 *final_iv = NULL;
-
- if (walk.nbytes == walk.total) {
- tail = 0;
- final_iv = orig_iv;
- }
-
- if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
- src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes],
- src, walk.nbytes);
+ if (walk.nbytes == walk.total) {
+ tail = 0;
+ final_iv = orig_iv;
+ }
- ce_aes_ccm_encrypt(dst, src, walk.nbytes - tail,
- ctx->key_enc, num_rounds(ctx),
- mac, walk.iv, final_iv);
+ if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
+ src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes],
+ src, walk.nbytes);
- if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
- memcpy(walk.dst.virt.addr, dst, walk.nbytes);
+ ce_aes_ccm_encrypt(dst, src, walk.nbytes - tail,
+ ctx->key_enc, num_rounds(ctx),
+ mac, walk.iv, final_iv);
- if (walk.nbytes) {
- err = skcipher_walk_done(&walk, tail);
- }
- } while (walk.nbytes);
+ if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
+ memcpy(walk.dst.virt.addr, dst, walk.nbytes);
- kernel_neon_end();
+ if (walk.nbytes) {
+ err = skcipher_walk_done(&walk, tail);
+ }
+ } while (walk.nbytes);
+ }
if (unlikely(err))
return err;
@@ -254,40 +250,38 @@ static int ccm_decrypt(struct aead_request *req)
if (unlikely(err))
return err;
- kernel_neon_begin();
-
- if (req->assoclen)
- ccm_calculate_auth_mac(req, mac);
+ scoped_ksimd() {
+ if (req->assoclen)
+ ccm_calculate_auth_mac(req, mac);
- do {
- u32 tail = walk.nbytes % AES_BLOCK_SIZE;
- const u8 *src = walk.src.virt.addr;
- u8 *dst = walk.dst.virt.addr;
- u8 buf[AES_BLOCK_SIZE];
- u8 *final_iv = NULL;
-
- if (walk.nbytes == walk.total) {
- tail = 0;
- final_iv = orig_iv;
- }
+ do {
+ u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+ u8 buf[AES_BLOCK_SIZE];
+ u8 *final_iv = NULL;
- if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
- src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes],
- src, walk.nbytes);
+ if (walk.nbytes == walk.total) {
+ tail = 0;
+ final_iv = orig_iv;
+ }
- ce_aes_ccm_decrypt(dst, src, walk.nbytes - tail,
- ctx->key_enc, num_rounds(ctx),
- mac, walk.iv, final_iv);
+ if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
+ src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes],
+ src, walk.nbytes);
- if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
- memcpy(walk.dst.virt.addr, dst, walk.nbytes);
+ ce_aes_ccm_decrypt(dst, src, walk.nbytes - tail,
+ ctx->key_enc, num_rounds(ctx),
+ mac, walk.iv, final_iv);
- if (walk.nbytes) {
- err = skcipher_walk_done(&walk, tail);
- }
- } while (walk.nbytes);
+ if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
+ memcpy(walk.dst.virt.addr, dst, walk.nbytes);
- kernel_neon_end();
+ if (walk.nbytes) {
+ err = skcipher_walk_done(&walk, tail);
+ }
+ } while (walk.nbytes);
+ }
if (unlikely(err))
return err;
diff --git a/arch/arm64/crypto/aes-ce-glue.c b/arch/arm64/crypto/aes-ce-glue.c
index 00b8749013c5bf..a4dad370991df6 100644
--- a/arch/arm64/crypto/aes-ce-glue.c
+++ b/arch/arm64/crypto/aes-ce-glue.c
@@ -52,9 +52,8 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
return;
}
- kernel_neon_begin();
- __aes_ce_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
- kernel_neon_end();
+ scoped_ksimd()
+ __aes_ce_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
}
static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
@@ -66,9 +65,8 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
return;
}
- kernel_neon_begin();
- __aes_ce_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
- kernel_neon_end();
+ scoped_ksimd()
+ __aes_ce_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
}
int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
@@ -94,47 +92,48 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
for (i = 0; i < kwords; i++)
ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
- kernel_neon_begin();
- for (i = 0; i < sizeof(rcon); i++) {
- u32 *rki = ctx->key_enc + (i * kwords);
- u32 *rko = rki + kwords;
-
- rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
- rko[1] = rko[0] ^ rki[1];
- rko[2] = rko[1] ^ rki[2];
- rko[3] = rko[2] ^ rki[3];
-
- if (key_len == AES_KEYSIZE_192) {
- if (i >= 7)
- break;
- rko[4] = rko[3] ^ rki[4];
- rko[5] = rko[4] ^ rki[5];
- } else if (key_len == AES_KEYSIZE_256) {
- if (i >= 6)
- break;
- rko[4] = __aes_ce_sub(rko[3]) ^ rki[4];
- rko[5] = rko[4] ^ rki[5];
- rko[6] = rko[5] ^ rki[6];
- rko[7] = rko[6] ^ rki[7];
+ scoped_ksimd() {
+ for (i = 0; i < sizeof(rcon); i++) {
+ u32 *rki = ctx->key_enc + (i * kwords);
+ u32 *rko = rki + kwords;
+
+ rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^
+ rcon[i] ^ rki[0];
+ rko[1] = rko[0] ^ rki[1];
+ rko[2] = rko[1] ^ rki[2];
+ rko[3] = rko[2] ^ rki[3];
+
+ if (key_len == AES_KEYSIZE_192) {
+ if (i >= 7)
+ break;
+ rko[4] = rko[3] ^ rki[4];
+ rko[5] = rko[4] ^ rki[5];
+ } else if (key_len == AES_KEYSIZE_256) {
+ if (i >= 6)
+ break;
+ rko[4] = __aes_ce_sub(rko[3]) ^ rki[4];
+ rko[5] = rko[4] ^ rki[5];
+ rko[6] = rko[5] ^ rki[6];
+ rko[7] = rko[6] ^ rki[7];
+ }
}
- }
- /*
- * Generate the decryption keys for the Equivalent Inverse Cipher.
- * This involves reversing the order of the round keys, and applying
- * the Inverse Mix Columns transformation on all but the first and
- * the last one.
- */
- key_enc = (struct aes_block *)ctx->key_enc;
- key_dec = (struct aes_block *)ctx->key_dec;
- j = num_rounds(ctx);
-
- key_dec[0] = key_enc[j];
- for (i = 1, j--; j > 0; i++, j--)
- __aes_ce_invert(key_dec + i, key_enc + j);
- key_dec[i] = key_enc[0];
+ /*
+ * Generate the decryption keys for the Equivalent Inverse
+ * Cipher. This involves reversing the order of the round
+ * keys, and applying the Inverse Mix Columns transformation on
+ * all but the first and the last one.
+ */
+ key_enc = (struct aes_block *)ctx->key_enc;
+ key_dec = (struct aes_block *)ctx->key_dec;
+ j = num_rounds(ctx);
+
+ key_dec[0] = key_enc[j];
+ for (i = 1, j--; j > 0; i++, j--)
+ __aes_ce_invert(key_dec + i, key_enc + j);
+ key_dec[i] = key_enc[0];
+ }
- kernel_neon_end();
return 0;
}
EXPORT_SYMBOL(ce_aes_expandkey);
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 5e207ff34482f5..b087b900d2790b 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -5,8 +5,6 @@
* Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*/
-#include <asm/hwcap.h>
-#include <asm/neon.h>
#include <crypto/aes.h>
#include <crypto/ctr.h>
#include <crypto/internal/hash.h>
@@ -20,6 +18,9 @@
#include <linux/module.h>
#include <linux/string.h>
+#include <asm/hwcap.h>
+#include <asm/simd.h>
+
#include "aes-ce-setkey.h"
#ifdef USE_V8_CRYPTO_EXTENSIONS
@@ -186,10 +187,9 @@ static int __maybe_unused ecb_encrypt(struct skcipher_request *req)
err = skcipher_walk_virt(&walk, req, false);
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
- kernel_neon_begin();
- aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_enc, rounds, blocks);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, rounds, blocks);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err;
@@ -206,10 +206,9 @@ static int __maybe_unused ecb_decrypt(struct skcipher_request *req)
err = skcipher_walk_virt(&walk, req, false);
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
- kernel_neon_begin();
- aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_dec, rounds, blocks);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_dec, rounds, blocks);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err;
@@ -224,10 +223,9 @@ static int cbc_encrypt_walk(struct skcipher_request *req,
unsigned int blocks;
while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) {
- kernel_neon_begin();
- aes_cbc_encrypt(walk->dst.virt.addr, walk->src.virt.addr,
- ctx->key_enc, rounds, blocks, walk->iv);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_cbc_encrypt(walk->dst.virt.addr, walk->src.virt.addr,
+ ctx->key_enc, rounds, blocks, walk->iv);
err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE);
}
return err;
@@ -253,10 +251,9 @@ static int cbc_decrypt_walk(struct skcipher_request *req,
unsigned int blocks;
while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) {
- kernel_neon_begin();
- aes_cbc_decrypt(walk->dst.virt.addr, walk->src.virt.addr,
- ctx->key_dec, rounds, blocks, walk->iv);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_cbc_decrypt(walk->dst.virt.addr, walk->src.virt.addr,
+ ctx->key_dec, rounds, blocks, walk->iv);
err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE);
}
return err;
@@ -322,10 +319,9 @@ static int cts_cbc_encrypt(struct skcipher_request *req)
if (err)
return err;
- kernel_neon_begin();
- aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_enc, rounds, walk.nbytes, walk.iv);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, rounds, walk.nbytes, walk.iv);
return skcipher_walk_done(&walk, 0);
}
@@ -379,10 +375,9 @@ static int cts_cbc_decrypt(struct skcipher_request *req)
if (err)
return err;
- kernel_neon_begin();
- aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_dec, rounds, walk.nbytes, walk.iv);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_dec, rounds, walk.nbytes, walk.iv);
return skcipher_walk_done(&walk, 0);
}
@@ -399,11 +394,11 @@ static int __maybe_unused essiv_cbc_encrypt(struct skcipher_request *req)
blocks = walk.nbytes / AES_BLOCK_SIZE;
if (blocks) {
- kernel_neon_begin();
- aes_essiv_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key1.key_enc, rounds, blocks,
- req->iv, ctx->key2.key_enc);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_essiv_cbc_encrypt(walk.dst.virt.addr,
+ walk.src.virt.addr,
+ ctx->key1.key_enc, rounds, blocks,
+ req->iv, ctx->key2.key_enc);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err ?: cbc_encrypt_walk(req, &walk);
@@ -421,11 +416,11 @@ static int __maybe_unused essiv_cbc_decrypt(struct skcipher_request *req)
blocks = walk.nbytes / AES_BLOCK_SIZE;
if (blocks) {
- kernel_neon_begin();
- aes_essiv_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key1.key_dec, rounds, blocks,
- req->iv, ctx->key2.key_enc);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_essiv_cbc_decrypt(walk.dst.virt.addr,
+ walk.src.virt.addr,
+ ctx->key1.key_dec, rounds, blocks,
+ req->iv, ctx->key2.key_enc);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err ?: cbc_decrypt_walk(req, &walk);
@@ -461,10 +456,9 @@ static int __maybe_unused xctr_encrypt(struct skcipher_request *req)
else if (nbytes < walk.total)
nbytes &= ~(AES_BLOCK_SIZE - 1);
- kernel_neon_begin();
- aes_xctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes,
- walk.iv, byte_ctr);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_xctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes,
+ walk.iv, byte_ctr);
if (unlikely(nbytes < AES_BLOCK_SIZE))
memcpy(walk.dst.virt.addr,
@@ -506,10 +500,9 @@ static int __maybe_unused ctr_encrypt(struct skcipher_request *req)
else if (nbytes < walk.total)
nbytes &= ~(AES_BLOCK_SIZE - 1);
- kernel_neon_begin();
- aes_ctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes,
- walk.iv);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_ctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes,
+ walk.iv);
if (unlikely(nbytes < AES_BLOCK_SIZE))
memcpy(walk.dst.virt.addr,
@@ -562,11 +555,10 @@ static int __maybe_unused xts_encrypt(struct skcipher_request *req)
if (walk.nbytes < walk.total)
nbytes &= ~(AES_BLOCK_SIZE - 1);
- kernel_neon_begin();
- aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key1.key_enc, rounds, nbytes,
- ctx->key2.key_enc, walk.iv, first);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key1.key_enc, rounds, nbytes,
+ ctx->key2.key_enc, walk.iv, first);
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
@@ -584,11 +576,10 @@ static int __maybe_unused xts_encrypt(struct skcipher_request *req)
if (err)
return err;
- kernel_neon_begin();
- aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key1.key_enc, rounds, walk.nbytes,
- ctx->key2.key_enc, walk.iv, first);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key1.key_enc, rounds, walk.nbytes,
+ ctx->key2.key_enc, walk.iv, first);
return skcipher_walk_done(&walk, 0);
}
@@ -634,11 +625,10 @@ static int __maybe_unused xts_decrypt(struct skcipher_request *req)
if (walk.nbytes < walk.total)
nbytes &= ~(AES_BLOCK_SIZE - 1);
- kernel_neon_begin();
- aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key1.key_dec, rounds, nbytes,
- ctx->key2.key_enc, walk.iv, first);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key1.key_dec, rounds, nbytes,
+ ctx->key2.key_enc, walk.iv, first);
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
@@ -657,11 +647,10 @@ static int __maybe_unused xts_decrypt(struct skcipher_request *req)
return err;
- kernel_neon_begin();
- aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key1.key_dec, rounds, walk.nbytes,
- ctx->key2.key_enc, walk.iv, first);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key1.key_dec, rounds, walk.nbytes,
+ ctx->key2.key_enc, walk.iv, first);
return skcipher_walk_done(&walk, 0);
}
@@ -808,10 +797,9 @@ static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
return err;
/* encrypt the zero vector */
- kernel_neon_begin();
- aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, ctx->key.key_enc,
- rounds, 1);
- kernel_neon_end();
+ scoped_ksimd()
+ aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){},
+ ctx->key.key_enc, rounds, 1);
cmac_gf128_mul_by_x(consts, consts);
cmac_gf128_mul_by_x(consts + 1, consts);
@@ -837,10 +825,10 @@ static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
if (err)
return err;
- kernel_neon_begin();
- aes_ecb_encrypt(key, ks[0], ctx->key.key_enc, rounds, 1);
- aes_ecb_encrypt(ctx->consts, ks[1], ctx->key.key_enc, rounds, 2);
- kernel_neon_end();
+ scoped_ksimd() {
+ aes_ecb_encrypt(key, ks[0], ctx->key.key_enc, rounds, 1);
+ aes_ecb_encrypt(ctx->consts, ks[1], ctx->key.key_enc, rounds, 2);
+ }
return cbcmac_setkey(tfm, key, sizeof(key));
}
@@ -860,10 +848,9 @@ static void mac_do_update(struct crypto_aes_ctx *ctx, u8 const in[], int blocks,
int rem;
do {
- kernel_neon_begin();
- rem = aes_mac_update(in, ctx->key_enc, rounds, blocks,
- dg, enc_before, !enc_before);
- kernel_neon_end();
+ scoped_ksimd()
+ rem = aes_mac_update(in, ctx->key_enc, rounds, blocks,
+ dg, enc_before, !enc_before);
in += (blocks - rem) * AES_BLOCK_SIZE;
blocks = rem;
} while (blocks);
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
index c4a623e865934b..d496effb0a5b77 100644
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -85,9 +85,8 @@ static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
ctx->rounds = 6 + key_len / 4;
- kernel_neon_begin();
- aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds);
- kernel_neon_end();
+ scoped_ksimd()
+ aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds);
return 0;
}
@@ -110,10 +109,9 @@ static int __ecb_crypt(struct skcipher_request *req,
blocks = round_down(blocks,
walk.stride / AES_BLOCK_SIZE);
- kernel_neon_begin();
- fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk,
- ctx->rounds, blocks);
- kernel_neon_end();
+ scoped_ksimd()
+ fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk,
+ ctx->rounds, blocks);
err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE);
}
@@ -146,9 +144,8 @@ static int aesbs_cbc_ctr_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc));
- kernel_neon_begin();
- aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds);
- kernel_neon_end();
+ scoped_ksimd()
+ aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds);
memzero_explicit(&rk, sizeof(rk));
return 0;
@@ -167,11 +164,11 @@ static int cbc_encrypt(struct skcipher_request *req)
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
/* fall back to the non-bitsliced NEON implementation */
- kernel_neon_begin();
- neon_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->enc, ctx->key.rounds, blocks,
- walk.iv);
- kernel_neon_end();
+ scoped_ksimd()
+ neon_aes_cbc_encrypt(walk.dst.virt.addr,
+ walk.src.virt.addr,
+ ctx->enc, ctx->key.rounds, blocks,
+ walk.iv);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err;
@@ -193,11 +190,10 @@ static int cbc_decrypt(struct skcipher_request *req)
blocks = round_down(blocks,
walk.stride / AES_BLOCK_SIZE);
- kernel_neon_begin();
- aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key.rk, ctx->key.rounds, blocks,
- walk.iv);
- kernel_neon_end();
+ scoped_ksimd()
+ aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key.rk, ctx->key.rounds, blocks,
+ walk.iv);
err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE);
}
@@ -220,30 +216,32 @@ static int ctr_encrypt(struct skcipher_request *req)
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
- kernel_neon_begin();
- if (blocks >= 8) {
- aesbs_ctr_encrypt(dst, src, ctx->key.rk, ctx->key.rounds,
- blocks, walk.iv);
- dst += blocks * AES_BLOCK_SIZE;
- src += blocks * AES_BLOCK_SIZE;
- }
- if (nbytes && walk.nbytes == walk.total) {
- u8 buf[AES_BLOCK_SIZE];
- u8 *d = dst;
-
- if (unlikely(nbytes < AES_BLOCK_SIZE))
- src = dst = memcpy(buf + sizeof(buf) - nbytes,
- src, nbytes);
-
- neon_aes_ctr_encrypt(dst, src, ctx->enc, ctx->key.rounds,
- nbytes, walk.iv);
+ scoped_ksimd() {
+ if (blocks >= 8) {
+ aesbs_ctr_encrypt(dst, src, ctx->key.rk,
+ ctx->key.rounds, blocks,
+ walk.iv);
+ dst += blocks * AES_BLOCK_SIZE;
+ src += blocks * AES_BLOCK_SIZE;
+ }
+ if (nbytes && walk.nbytes == walk.total) {
+ u8 buf[AES_BLOCK_SIZE];
+ u8 *d = dst;
+
+ if (unlikely(nbytes < AES_BLOCK_SIZE))
+ src = dst = memcpy(buf + sizeof(buf) -
+ nbytes, src, nbytes);
+
+ neon_aes_ctr_encrypt(dst, src, ctx->enc,
+ ctx->key.rounds, nbytes,
+ walk.iv);
- if (unlikely(nbytes < AES_BLOCK_SIZE))
- memcpy(d, dst, nbytes);
+ if (unlikely(nbytes < AES_BLOCK_SIZE))
+ memcpy(d, dst, nbytes);
- nbytes = 0;
+ nbytes = 0;
+ }
}
- kernel_neon_end();
err = skcipher_walk_done(&walk, nbytes);
}
return err;
@@ -320,33 +318,33 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
in = walk.src.virt.addr;
nbytes = walk.nbytes;
- kernel_neon_begin();
- if (blocks >= 8) {
- if (first == 1)
- neon_aes_ecb_encrypt(walk.iv, walk.iv,
- ctx->twkey,
- ctx->key.rounds, 1);
- first = 2;
-
- fn(out, in, ctx->key.rk, ctx->key.rounds, blocks,
- walk.iv);
-
- out += blocks * AES_BLOCK_SIZE;
- in += blocks * AES_BLOCK_SIZE;
- nbytes -= blocks * AES_BLOCK_SIZE;
+ scoped_ksimd() {
+ if (blocks >= 8) {
+ if (first == 1)
+ neon_aes_ecb_encrypt(walk.iv, walk.iv,
+ ctx->twkey,
+ ctx->key.rounds, 1);
+ first = 2;
+
+ fn(out, in, ctx->key.rk, ctx->key.rounds, blocks,
+ walk.iv);
+
+ out += blocks * AES_BLOCK_SIZE;
+ in += blocks * AES_BLOCK_SIZE;
+ nbytes -= blocks * AES_BLOCK_SIZE;
+ }
+ if (walk.nbytes == walk.total && nbytes > 0) {
+ if (encrypt)
+ neon_aes_xts_encrypt(out, in, ctx->cts.key_enc,
+ ctx->key.rounds, nbytes,
+ ctx->twkey, walk.iv, first);
+ else
+ neon_aes_xts_decrypt(out, in, ctx->cts.key_dec,
+ ctx->key.rounds, nbytes,
+ ctx->twkey, walk.iv, first);
+ nbytes = first = 0;
+ }
}
- if (walk.nbytes == walk.total && nbytes > 0) {
- if (encrypt)
- neon_aes_xts_encrypt(out, in, ctx->cts.key_enc,
- ctx->key.rounds, nbytes,
- ctx->twkey, walk.iv, first);
- else
- neon_aes_xts_decrypt(out, in, ctx->cts.key_dec,
- ctx->key.rounds, nbytes,
- ctx->twkey, walk.iv, first);
- nbytes = first = 0;
- }
- kernel_neon_end();
err = skcipher_walk_done(&walk, nbytes);
}
@@ -369,14 +367,16 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
in = walk.src.virt.addr;
nbytes = walk.nbytes;
- kernel_neon_begin();
- if (encrypt)
- neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, ctx->key.rounds,
- nbytes, ctx->twkey, walk.iv, first);
- else
- neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, ctx->key.rounds,
- nbytes, ctx->twkey, walk.iv, first);
- kernel_neon_end();
+ scoped_ksimd() {
+ if (encrypt)
+ neon_aes_xts_encrypt(out, in, ctx->cts.key_enc,
+ ctx->key.rounds, nbytes, ctx->twkey,
+ walk.iv, first);
+ else
+ neon_aes_xts_decrypt(out, in, ctx->cts.key_dec,
+ ctx->key.rounds, nbytes, ctx->twkey,
+ walk.iv, first);
+ }
return skcipher_walk_done(&walk, 0);
}
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 4995b6e2233501..7951557a285a9d 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -5,7 +5,6 @@
* Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
*/
-#include <asm/neon.h>
#include <crypto/aes.h>
#include <crypto/b128ops.h>
#include <crypto/gcm.h>
@@ -22,6 +21,8 @@
#include <linux/string.h>
#include <linux/unaligned.h>
+#include <asm/simd.h>
+
MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
@@ -74,9 +75,8 @@ void ghash_do_simd_update(int blocks, u64 dg[], const char *src,
u64 const h[][2],
const char *head))
{
- kernel_neon_begin();
- simd_update(blocks, dg, src, key->h, head);
- kernel_neon_end();
+ scoped_ksimd()
+ simd_update(blocks, dg, src, key->h, head);
}
/* avoid hogging the CPU for too long */
@@ -329,11 +329,10 @@ static int gcm_encrypt(struct aead_request *req, char *iv, int assoclen)
tag = NULL;
}
- kernel_neon_begin();
- pmull_gcm_encrypt(nbytes, dst, src, ctx->ghash_key.h,
- dg, iv, ctx->aes_key.key_enc, nrounds,
- tag);
- kernel_neon_end();
+ scoped_ksimd()
+ pmull_gcm_encrypt(nbytes, dst, src, ctx->ghash_key.h,
+ dg, iv, ctx->aes_key.key_enc, nrounds,
+ tag);
if (unlikely(!nbytes))
break;
@@ -399,11 +398,11 @@ static int gcm_decrypt(struct aead_request *req, char *iv, int assoclen)
tag = NULL;
}
- kernel_neon_begin();
- ret = pmull_gcm_decrypt(nbytes, dst, src, ctx->ghash_key.h,
- dg, iv, ctx->aes_key.key_enc,
- nrounds, tag, otag, authsize);
- kernel_neon_end();
+ scoped_ksimd()
+ ret = pmull_gcm_decrypt(nbytes, dst, src,
+ ctx->ghash_key.h,
+ dg, iv, ctx->aes_key.key_enc,
+ nrounds, tag, otag, authsize);
if (unlikely(!nbytes))
break;
diff --git a/arch/arm64/crypto/nhpoly1305-neon-glue.c b/arch/arm64/crypto/nhpoly1305-neon-glue.c
index e4a0b463f080e0..013de6ac569a14 100644
--- a/arch/arm64/crypto/nhpoly1305-neon-glue.c
+++ b/arch/arm64/crypto/nhpoly1305-neon-glue.c
@@ -25,9 +25,8 @@ static int nhpoly1305_neon_update(struct shash_desc *desc,
do {
unsigned int n = min_t(unsigned int, srclen, SZ_4K);
- kernel_neon_begin();
- crypto_nhpoly1305_update_helper(desc, src, n, nh_neon);
- kernel_neon_end();
+ scoped_ksimd()
+ crypto_nhpoly1305_update_helper(desc, src, n, nh_neon);
src += n;
srclen -= n;
} while (srclen);
diff --git a/arch/arm64/crypto/sm3-ce-glue.c b/arch/arm64/crypto/sm3-ce-glue.c
index eac6f5fa0abe3c..24c1fcfae0721e 100644
--- a/arch/arm64/crypto/sm3-ce-glue.c
+++ b/arch/arm64/crypto/sm3-ce-glue.c
@@ -5,7 +5,6 @@
* Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
*/
-#include <asm/neon.h>
#include <crypto/internal/hash.h>
#include <crypto/sm3.h>
#include <crypto/sm3_base.h>
@@ -13,6 +12,8 @@
#include <linux/kernel.h>
#include <linux/module.h>
+#include <asm/simd.h>
+
MODULE_DESCRIPTION("SM3 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
@@ -25,18 +26,18 @@ static int sm3_ce_update(struct shash_desc *desc, const u8 *data,
{
int remain;
- kernel_neon_begin();
- remain = sm3_base_do_update_blocks(desc, data, len, sm3_ce_transform);
- kernel_neon_end();
+ scoped_ksimd() {
+ remain = sm3_base_do_update_blocks(desc, data, len, sm3_ce_transform);
+ }
return remain;
}
static int sm3_ce_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- kernel_neon_begin();
- sm3_base_do_finup(desc, data, len, sm3_ce_transform);
- kernel_neon_end();
+ scoped_ksimd() {
+ sm3_base_do_finup(desc, data, len, sm3_ce_transform);
+ }
return sm3_base_finish(desc, out);
}
diff --git a/arch/arm64/crypto/sm3-neon-glue.c b/arch/arm64/crypto/sm3-neon-glue.c
index 6c4611a503a345..15f30cc24f32b3 100644
--- a/arch/arm64/crypto/sm3-neon-glue.c
+++ b/arch/arm64/crypto/sm3-neon-glue.c
@@ -5,7 +5,7 @@
* Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
-#include <asm/neon.h>
+#include <asm/simd.h>
#include <crypto/internal/hash.h>
#include <crypto/sm3.h>
#include <crypto/sm3_base.h>
@@ -20,20 +20,16 @@ asmlinkage void sm3_neon_transform(struct sm3_state *sst, u8 const *src,
static int sm3_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- int remain;
-
- kernel_neon_begin();
- remain = sm3_base_do_update_blocks(desc, data, len, sm3_neon_transform);
- kernel_neon_end();
- return remain;
+ scoped_ksimd()
+ return sm3_base_do_update_blocks(desc, data, len,
+ sm3_neon_transform);
}
static int sm3_neon_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- kernel_neon_begin();
- sm3_base_do_finup(desc, data, len, sm3_neon_transform);
- kernel_neon_end();
+ scoped_ksimd()
+ sm3_base_do_finup(desc, data, len, sm3_neon_transform);
return sm3_base_finish(desc, out);
}
diff --git a/arch/arm64/crypto/sm4-ce-ccm-glue.c b/arch/arm64/crypto/sm4-ce-ccm-glue.c
index e9cc1c1364ec62..332f02167a96f9 100644
--- a/arch/arm64/crypto/sm4-ce-ccm-glue.c
+++ b/arch/arm64/crypto/sm4-ce-ccm-glue.c
@@ -11,7 +11,7 @@
#include <linux/crypto.h>
#include <linux/kernel.h>
#include <linux/cpufeature.h>
-#include <asm/neon.h>
+#include <asm/simd.h>
#include <crypto/scatterwalk.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
@@ -35,10 +35,9 @@ static int ccm_setkey(struct crypto_aead *tfm, const u8 *key,
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
- kernel_neon_begin();
- sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec,
- crypto_sm4_fk, crypto_sm4_ck);
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
return 0;
}
@@ -167,39 +166,23 @@ static int ccm_crypt(struct aead_request *req, struct skcipher_walk *walk,
memcpy(ctr0, walk->iv, SM4_BLOCK_SIZE);
crypto_inc(walk->iv, SM4_BLOCK_SIZE);
- kernel_neon_begin();
+ scoped_ksimd() {
+ if (req->assoclen)
+ ccm_calculate_auth_mac(req, mac);
- if (req->assoclen)
- ccm_calculate_auth_mac(req, mac);
-
- while (walk->nbytes && walk->nbytes != walk->total) {
- unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE;
-
- sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr,
- walk->src.virt.addr, walk->iv,
- walk->nbytes - tail, mac);
-
- kernel_neon_end();
-
- err = skcipher_walk_done(walk, tail);
-
- kernel_neon_begin();
- }
-
- if (walk->nbytes) {
- sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr,
- walk->src.virt.addr, walk->iv,
- walk->nbytes, mac);
+ while (walk->nbytes) {
+ unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE;
- sm4_ce_ccm_final(rkey_enc, ctr0, mac);
+ if (walk->nbytes == walk->total)
+ tail = 0;
- kernel_neon_end();
+ sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr,
+ walk->src.virt.addr, walk->iv,
+ walk->nbytes - tail, mac);
- err = skcipher_walk_done(walk, 0);
- } else {
+ err = skcipher_walk_done(walk, tail);
+ }
sm4_ce_ccm_final(rkey_enc, ctr0, mac);
-
- kernel_neon_end();
}
return err;
diff --git a/arch/arm64/crypto/sm4-ce-cipher-glue.c b/arch/arm64/crypto/sm4-ce-cipher-glue.c
index c31d76fb5a1770..bceec833ef4e78 100644
--- a/arch/arm64/crypto/sm4-ce-cipher-glue.c
+++ b/arch/arm64/crypto/sm4-ce-cipher-glue.c
@@ -32,9 +32,8 @@ static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
if (!crypto_simd_usable()) {
sm4_crypt_block(ctx->rkey_enc, out, in);
} else {
- kernel_neon_begin();
- sm4_ce_do_crypt(ctx->rkey_enc, out, in);
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_ce_do_crypt(ctx->rkey_enc, out, in);
}
}
@@ -45,9 +44,8 @@ static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
if (!crypto_simd_usable()) {
sm4_crypt_block(ctx->rkey_dec, out, in);
} else {
- kernel_neon_begin();
- sm4_ce_do_crypt(ctx->rkey_dec, out, in);
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_ce_do_crypt(ctx->rkey_dec, out, in);
}
}
diff --git a/arch/arm64/crypto/sm4-ce-gcm-glue.c b/arch/arm64/crypto/sm4-ce-gcm-glue.c
index c2ea3d5f690b32..ef06f4f768a1dd 100644
--- a/arch/arm64/crypto/sm4-ce-gcm-glue.c
+++ b/arch/arm64/crypto/sm4-ce-gcm-glue.c
@@ -11,7 +11,7 @@
#include <linux/crypto.h>
#include <linux/kernel.h>
#include <linux/cpufeature.h>
-#include <asm/neon.h>
+#include <asm/simd.h>
#include <crypto/b128ops.h>
#include <crypto/scatterwalk.h>
#include <crypto/internal/aead.h>
@@ -48,13 +48,11 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *key,
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
- kernel_neon_begin();
-
- sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
- crypto_sm4_fk, crypto_sm4_ck);
- sm4_ce_pmull_ghash_setup(ctx->key.rkey_enc, ctx->ghash_table);
-
- kernel_neon_end();
+ scoped_ksimd() {
+ sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
+ sm4_ce_pmull_ghash_setup(ctx->key.rkey_enc, ctx->ghash_table);
+ }
return 0;
}
@@ -149,44 +147,28 @@ static int gcm_crypt(struct aead_request *req, struct skcipher_walk *walk,
memcpy(iv, req->iv, GCM_IV_SIZE);
put_unaligned_be32(2, iv + GCM_IV_SIZE);
- kernel_neon_begin();
+ scoped_ksimd() {
+ if (req->assoclen)
+ gcm_calculate_auth_mac(req, ghash);
- if (req->assoclen)
- gcm_calculate_auth_mac(req, ghash);
+ do {
+ unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE;
+ const u8 *src = walk->src.virt.addr;
+ u8 *dst = walk->dst.virt.addr;
+ const u8 *l = NULL;
- while (walk->nbytes) {
- unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE;
- const u8 *src = walk->src.virt.addr;
- u8 *dst = walk->dst.virt.addr;
+ if (walk->nbytes == walk->total) {
+ l = (const u8 *)&lengths;
+ tail = 0;
+ }
- if (walk->nbytes == walk->total) {
sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv,
- walk->nbytes, ghash,
- ctx->ghash_table,
- (const u8 *)&lengths);
-
- kernel_neon_end();
-
- return skcipher_walk_done(walk, 0);
- }
+ walk->nbytes - tail, ghash,
+ ctx->ghash_table, l);
- sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv,
- walk->nbytes - tail, ghash,
- ctx->ghash_table, NULL);
-
- kernel_neon_end();
-
- err = skcipher_walk_done(walk, tail);
-
- kernel_neon_begin();
+ err = skcipher_walk_done(walk, tail);
+ } while (walk->nbytes);
}
-
- sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, NULL, NULL, iv,
- walk->nbytes, ghash, ctx->ghash_table,
- (const u8 *)&lengths);
-
- kernel_neon_end();
-
return err;
}
diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c
index 7a60e7b559dc96..5569cece5a0b85 100644
--- a/arch/arm64/crypto/sm4-ce-glue.c
+++ b/arch/arm64/crypto/sm4-ce-glue.c
@@ -8,7 +8,7 @@
* Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
-#include <asm/neon.h>
+#include <asm/simd.h>
#include <crypto/b128ops.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/skcipher.h>
@@ -74,10 +74,9 @@ static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key,
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
- kernel_neon_begin();
- sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec,
- crypto_sm4_fk, crypto_sm4_ck);
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
return 0;
}
@@ -94,12 +93,12 @@ static int sm4_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
if (ret)
return ret;
- kernel_neon_begin();
- sm4_ce_expand_key(key, ctx->key1.rkey_enc,
- ctx->key1.rkey_dec, crypto_sm4_fk, crypto_sm4_ck);
- sm4_ce_expand_key(&key[SM4_KEY_SIZE], ctx->key2.rkey_enc,
- ctx->key2.rkey_dec, crypto_sm4_fk, crypto_sm4_ck);
- kernel_neon_end();
+ scoped_ksimd() {
+ sm4_ce_expand_key(key, ctx->key1.rkey_enc,
+ ctx->key1.rkey_dec, crypto_sm4_fk, crypto_sm4_ck);
+ sm4_ce_expand_key(&key[SM4_KEY_SIZE], ctx->key2.rkey_enc,
+ ctx->key2.rkey_dec, crypto_sm4_fk, crypto_sm4_ck);
+ }
return 0;
}
@@ -117,16 +116,14 @@ static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
- kernel_neon_begin();
-
- nblks = BYTES2BLKS(nbytes);
- if (nblks) {
- sm4_ce_crypt(rkey, dst, src, nblks);
- nbytes -= nblks * SM4_BLOCK_SIZE;
+ scoped_ksimd() {
+ nblks = BYTES2BLKS(nbytes);
+ if (nblks) {
+ sm4_ce_crypt(rkey, dst, src, nblks);
+ nbytes -= nblks * SM4_BLOCK_SIZE;
+ }
}
- kernel_neon_end();
-
err = skcipher_walk_done(&walk, nbytes);
}
@@ -167,16 +164,14 @@ static int sm4_cbc_crypt(struct skcipher_request *req,
nblocks = nbytes / SM4_BLOCK_SIZE;
if (nblocks) {
- kernel_neon_begin();
-
- if (encrypt)
- sm4_ce_cbc_enc(ctx->rkey_enc, dst, src,
- walk.iv, nblocks);
- else
- sm4_ce_cbc_dec(ctx->rkey_dec, dst, src,
- walk.iv, nblocks);
-
- kernel_neon_end();
+ scoped_ksimd() {
+ if (encrypt)
+ sm4_ce_cbc_enc(ctx->rkey_enc, dst, src,
+ walk.iv, nblocks);
+ else
+ sm4_ce_cbc_dec(ctx->rkey_dec, dst, src,
+ walk.iv, nblocks);
+ }
}
err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE);
@@ -249,16 +244,14 @@ static int sm4_cbc_cts_crypt(struct skcipher_request *req, bool encrypt)
if (err)
return err;
- kernel_neon_begin();
-
- if (encrypt)
- sm4_ce_cbc_cts_enc(ctx->rkey_enc, walk.dst.virt.addr,
- walk.src.virt.addr, walk.iv, walk.nbytes);
- else
- sm4_ce_cbc_cts_dec(ctx->rkey_dec, walk.dst.virt.addr,
- walk.src.virt.addr, walk.iv, walk.nbytes);
-
- kernel_neon_end();
+ scoped_ksimd() {
+ if (encrypt)
+ sm4_ce_cbc_cts_enc(ctx->rkey_enc, walk.dst.virt.addr,
+ walk.src.virt.addr, walk.iv, walk.nbytes);
+ else
+ sm4_ce_cbc_cts_dec(ctx->rkey_dec, walk.dst.virt.addr,
+ walk.src.virt.addr, walk.iv, walk.nbytes);
+ }
return skcipher_walk_done(&walk, 0);
}
@@ -288,28 +281,26 @@ static int sm4_ctr_crypt(struct skcipher_request *req)
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
- kernel_neon_begin();
-
- nblks = BYTES2BLKS(nbytes);
- if (nblks) {
- sm4_ce_ctr_enc(ctx->rkey_enc, dst, src, walk.iv, nblks);
- dst += nblks * SM4_BLOCK_SIZE;
- src += nblks * SM4_BLOCK_SIZE;
- nbytes -= nblks * SM4_BLOCK_SIZE;
- }
-
- /* tail */
- if (walk.nbytes == walk.total && nbytes > 0) {
- u8 keystream[SM4_BLOCK_SIZE];
-
- sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv);
- crypto_inc(walk.iv, SM4_BLOCK_SIZE);
- crypto_xor_cpy(dst, src, keystream, nbytes);
- nbytes = 0;
+ scoped_ksimd() {
+ nblks = BYTES2BLKS(nbytes);
+ if (nblks) {
+ sm4_ce_ctr_enc(ctx->rkey_enc, dst, src, walk.iv, nblks);
+ dst += nblks * SM4_BLOCK_SIZE;
+ src += nblks * SM4_BLOCK_SIZE;
+ nbytes -= nblks * SM4_BLOCK_SIZE;
+ }
+
+ /* tail */
+ if (walk.nbytes == walk.total && nbytes > 0) {
+ u8 keystream[SM4_BLOCK_SIZE];
+
+ sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv);
+ crypto_inc(walk.iv, SM4_BLOCK_SIZE);
+ crypto_xor_cpy(dst, src, keystream, nbytes);
+ nbytes = 0;
+ }
}
- kernel_neon_end();
-
err = skcipher_walk_done(&walk, nbytes);
}
@@ -359,18 +350,16 @@ static int sm4_xts_crypt(struct skcipher_request *req, bool encrypt)
if (nbytes < walk.total)
nbytes &= ~(SM4_BLOCK_SIZE - 1);
- kernel_neon_begin();
-
- if (encrypt)
- sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr,
- walk.src.virt.addr, walk.iv, nbytes,
- rkey2_enc);
- else
- sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr,
- walk.src.virt.addr, walk.iv, nbytes,
- rkey2_enc);
-
- kernel_neon_end();
+ scoped_ksimd() {
+ if (encrypt)
+ sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr,
+ walk.src.virt.addr, walk.iv, nbytes,
+ rkey2_enc);
+ else
+ sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr,
+ walk.src.virt.addr, walk.iv, nbytes,
+ rkey2_enc);
+ }
rkey2_enc = NULL;
@@ -395,18 +384,16 @@ static int sm4_xts_crypt(struct skcipher_request *req, bool encrypt)
if (err)
return err;
- kernel_neon_begin();
-
- if (encrypt)
- sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr,
- walk.src.virt.addr, walk.iv, walk.nbytes,
- rkey2_enc);
- else
- sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr,
- walk.src.virt.addr, walk.iv, walk.nbytes,
- rkey2_enc);
-
- kernel_neon_end();
+ scoped_ksimd() {
+ if (encrypt)
+ sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr,
+ walk.src.virt.addr, walk.iv, walk.nbytes,
+ rkey2_enc);
+ else
+ sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr,
+ walk.src.virt.addr, walk.iv, walk.nbytes,
+ rkey2_enc);
+ }
return skcipher_walk_done(&walk, 0);
}
@@ -510,11 +497,9 @@ static int sm4_cbcmac_setkey(struct crypto_shash *tfm, const u8 *key,
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
- kernel_neon_begin();
- sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
- crypto_sm4_fk, crypto_sm4_ck);
- kernel_neon_end();
-
+ scoped_ksimd()
+ sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
return 0;
}
@@ -530,15 +515,13 @@ static int sm4_cmac_setkey(struct crypto_shash *tfm, const u8 *key,
memset(consts, 0, SM4_BLOCK_SIZE);
- kernel_neon_begin();
-
- sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
- crypto_sm4_fk, crypto_sm4_ck);
+ scoped_ksimd() {
+ sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
- /* encrypt the zero block */
- sm4_ce_crypt_block(ctx->key.rkey_enc, (u8 *)consts, (const u8 *)consts);
-
- kernel_neon_end();
+ /* encrypt the zero block */
+ sm4_ce_crypt_block(ctx->key.rkey_enc, (u8 *)consts, (const u8 *)consts);
+ }
/* gf(2^128) multiply zero-ciphertext with u and u^2 */
a = be64_to_cpu(consts[0].a);
@@ -568,18 +551,16 @@ static int sm4_xcbc_setkey(struct crypto_shash *tfm, const u8 *key,
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
- kernel_neon_begin();
-
- sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
- crypto_sm4_fk, crypto_sm4_ck);
+ scoped_ksimd() {
+ sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
- sm4_ce_crypt_block(ctx->key.rkey_enc, key2, ks[0]);
- sm4_ce_crypt(ctx->key.rkey_enc, ctx->consts, ks[1], 2);
+ sm4_ce_crypt_block(ctx->key.rkey_enc, key2, ks[0]);
+ sm4_ce_crypt(ctx->key.rkey_enc, ctx->consts, ks[1], 2);
- sm4_ce_expand_key(key2, ctx->key.rkey_enc, ctx->key.rkey_dec,
- crypto_sm4_fk, crypto_sm4_ck);
-
- kernel_neon_end();
+ sm4_ce_expand_key(key2, ctx->key.rkey_enc, ctx->key.rkey_dec,
+ crypto_sm4_fk, crypto_sm4_ck);
+ }
return 0;
}
@@ -600,10 +581,9 @@ static int sm4_mac_update(struct shash_desc *desc, const u8 *p,
unsigned int nblocks = len / SM4_BLOCK_SIZE;
len %= SM4_BLOCK_SIZE;
- kernel_neon_begin();
- sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, p,
- nblocks, false, true);
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, p,
+ nblocks, false, true);
return len;
}
@@ -619,10 +599,9 @@ static int sm4_cmac_finup(struct shash_desc *desc, const u8 *src,
ctx->digest[len] ^= 0x80;
consts += SM4_BLOCK_SIZE;
}
- kernel_neon_begin();
- sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, consts, 1,
- false, true);
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, consts, 1,
+ false, true);
memcpy(out, ctx->digest, SM4_BLOCK_SIZE);
return 0;
}
@@ -635,10 +614,9 @@ static int sm4_cbcmac_finup(struct shash_desc *desc, const u8 *src,
if (len) {
crypto_xor(ctx->digest, src, len);
- kernel_neon_begin();
- sm4_ce_crypt_block(tctx->key.rkey_enc, ctx->digest,
- ctx->digest);
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_ce_crypt_block(tctx->key.rkey_enc, ctx->digest,
+ ctx->digest);
}
memcpy(out, ctx->digest, SM4_BLOCK_SIZE);
return 0;
diff --git a/arch/arm64/crypto/sm4-neon-glue.c b/arch/arm64/crypto/sm4-neon-glue.c
index e3500aca2d18bd..e944c2a2efb025 100644
--- a/arch/arm64/crypto/sm4-neon-glue.c
+++ b/arch/arm64/crypto/sm4-neon-glue.c
@@ -48,11 +48,8 @@ static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
nblocks = nbytes / SM4_BLOCK_SIZE;
if (nblocks) {
- kernel_neon_begin();
-
- sm4_neon_crypt(rkey, dst, src, nblocks);
-
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_neon_crypt(rkey, dst, src, nblocks);
}
err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE);
@@ -126,12 +123,9 @@ static int sm4_cbc_decrypt(struct skcipher_request *req)
nblocks = nbytes / SM4_BLOCK_SIZE;
if (nblocks) {
- kernel_neon_begin();
-
- sm4_neon_cbc_dec(ctx->rkey_dec, dst, src,
- walk.iv, nblocks);
-
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_neon_cbc_dec(ctx->rkey_dec, dst, src,
+ walk.iv, nblocks);
}
err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE);
@@ -157,12 +151,9 @@ static int sm4_ctr_crypt(struct skcipher_request *req)
nblocks = nbytes / SM4_BLOCK_SIZE;
if (nblocks) {
- kernel_neon_begin();
-
- sm4_neon_ctr_crypt(ctx->rkey_enc, dst, src,
- walk.iv, nblocks);
-
- kernel_neon_end();
+ scoped_ksimd()
+ sm4_neon_ctr_crypt(ctx->rkey_enc, dst, src,
+ walk.iv, nblocks);
dst += nblocks * SM4_BLOCK_SIZE;
src += nblocks * SM4_BLOCK_SIZE;
diff --git a/arch/arm64/include/asm/fpu.h b/arch/arm64/include/asm/fpu.h
index 2ae50bdce59b43..751e88a96734e6 100644
--- a/arch/arm64/include/asm/fpu.h
+++ b/arch/arm64/include/asm/fpu.h
@@ -6,10 +6,22 @@
#ifndef __ASM_FPU_H
#define __ASM_FPU_H
+#include <linux/preempt.h>
#include <asm/neon.h>
#define kernel_fpu_available() cpu_has_neon()
-#define kernel_fpu_begin() kernel_neon_begin()
-#define kernel_fpu_end() kernel_neon_end()
+
+static inline void kernel_fpu_begin(void)
+{
+ BUG_ON(!in_task());
+ preempt_disable();
+ kernel_neon_begin(NULL);
+}
+
+static inline void kernel_fpu_end(void)
+{
+ kernel_neon_end(NULL);
+ preempt_enable();
+}
#endif /* ! __ASM_FPU_H */
diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
index d4b1d172a79b67..acebee4605b5c5 100644
--- a/arch/arm64/include/asm/neon.h
+++ b/arch/arm64/include/asm/neon.h
@@ -13,7 +13,7 @@
#define cpu_has_neon() system_supports_fpsimd()
-void kernel_neon_begin(void);
-void kernel_neon_end(void);
+void kernel_neon_begin(struct user_fpsimd_state *);
+void kernel_neon_end(struct user_fpsimd_state *);
#endif /* ! __ASM_NEON_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 5acce7962228ec..e30c4c8e3a7a7d 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -172,7 +172,12 @@ struct thread_struct {
unsigned long fault_code; /* ESR_EL1 value */
struct debug_info debug; /* debugging */
- struct user_fpsimd_state kernel_fpsimd_state;
+ /*
+ * Set [cleared] by kernel_neon_begin() [kernel_neon_end()] to the
+ * address of a caller provided buffer that will be used to preserve a
+ * task's kernel mode FPSIMD state while it is scheduled out.
+ */
+ struct user_fpsimd_state *kernel_fpsimd_state;
unsigned int kernel_fpsimd_cpu;
#ifdef CONFIG_ARM64_PTR_AUTH
struct ptrauth_keys_user keys_user;
diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
index abd642c92f863e..0941f6f58a1463 100644
--- a/arch/arm64/include/asm/simd.h
+++ b/arch/arm64/include/asm/simd.h
@@ -6,12 +6,15 @@
#ifndef __ASM_SIMD_H
#define __ASM_SIMD_H
+#include <linux/cleanup.h>
#include <linux/compiler.h>
#include <linux/irqflags.h>
#include <linux/percpu.h>
#include <linux/preempt.h>
#include <linux/types.h>
+#include <asm/neon.h>
+
#ifdef CONFIG_KERNEL_MODE_NEON
/*
@@ -40,4 +43,11 @@ static __must_check inline bool may_use_simd(void) {
#endif /* ! CONFIG_KERNEL_MODE_NEON */
+DEFINE_LOCK_GUARD_1(ksimd,
+ struct user_fpsimd_state,
+ kernel_neon_begin(_T->lock),
+ kernel_neon_end(_T->lock))
+
+#define scoped_ksimd() scoped_guard(ksimd, &(struct user_fpsimd_state){})
+
#endif
diff --git a/arch/arm64/include/asm/xor.h b/arch/arm64/include/asm/xor.h
index befcd8a7abc98d..c38e3d017a79ec 100644
--- a/arch/arm64/include/asm/xor.h
+++ b/arch/arm64/include/asm/xor.h
@@ -9,7 +9,7 @@
#include <linux/hardirq.h>
#include <asm-generic/xor.h>
#include <asm/hwcap.h>
-#include <asm/neon.h>
+#include <asm/simd.h>
#ifdef CONFIG_KERNEL_MODE_NEON
@@ -19,9 +19,8 @@ static void
xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2)
{
- kernel_neon_begin();
- xor_block_inner_neon.do_2(bytes, p1, p2);
- kernel_neon_end();
+ scoped_ksimd()
+ xor_block_inner_neon.do_2(bytes, p1, p2);
}
static void
@@ -29,9 +28,8 @@ xor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2,
const unsigned long * __restrict p3)
{
- kernel_neon_begin();
- xor_block_inner_neon.do_3(bytes, p1, p2, p3);
- kernel_neon_end();
+ scoped_ksimd()
+ xor_block_inner_neon.do_3(bytes, p1, p2, p3);
}
static void
@@ -40,9 +38,8 @@ xor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p3,
const unsigned long * __restrict p4)
{
- kernel_neon_begin();
- xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4);
- kernel_neon_end();
+ scoped_ksimd()
+ xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4);
}
static void
@@ -52,9 +49,8 @@ xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p4,
const unsigned long * __restrict p5)
{
- kernel_neon_begin();
- xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5);
- kernel_neon_end();
+ scoped_ksimd()
+ xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5);
}
static struct xor_block_template xor_block_arm64 = {
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 6d956ed23bd216..c154f72634e028 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1502,21 +1502,23 @@ static void fpsimd_load_kernel_state(struct task_struct *task)
* Elide the load if this CPU holds the most recent kernel mode
* FPSIMD context of the current task.
*/
- if (last->st == &task->thread.kernel_fpsimd_state &&
+ if (last->st == task->thread.kernel_fpsimd_state &&
task->thread.kernel_fpsimd_cpu == smp_processor_id())
return;
- fpsimd_load_state(&task->thread.kernel_fpsimd_state);
+ fpsimd_load_state(task->thread.kernel_fpsimd_state);
}
static void fpsimd_save_kernel_state(struct task_struct *task)
{
struct cpu_fp_state cpu_fp_state = {
- .st = &task->thread.kernel_fpsimd_state,
+ .st = task->thread.kernel_fpsimd_state,
.to_save = FP_STATE_FPSIMD,
};
- fpsimd_save_state(&task->thread.kernel_fpsimd_state);
+ BUG_ON(!cpu_fp_state.st);
+
+ fpsimd_save_state(task->thread.kernel_fpsimd_state);
fpsimd_bind_state_to_cpu(&cpu_fp_state);
task->thread.kernel_fpsimd_cpu = smp_processor_id();
@@ -1787,6 +1789,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
void fpsimd_flush_task_state(struct task_struct *t)
{
t->thread.fpsimd_cpu = NR_CPUS;
+ t->thread.kernel_fpsimd_state = NULL;
/*
* If we don't support fpsimd, bail out after we have
* reset the fpsimd_cpu for this task and clear the
@@ -1846,12 +1849,19 @@ void fpsimd_save_and_flush_cpu_state(void)
*
* The caller may freely use the FPSIMD registers until kernel_neon_end() is
* called.
+ *
+ * Unless called from non-preemptible task context, @state must point to a
+ * caller provided buffer that will be used to preserve the task's kernel mode
+ * FPSIMD context when it is scheduled out, or if it is interrupted by kernel
+ * mode FPSIMD occurring in softirq context. May be %NULL otherwise.
*/
-void kernel_neon_begin(void)
+void kernel_neon_begin(struct user_fpsimd_state *state)
{
if (WARN_ON(!system_supports_fpsimd()))
return;
+ WARN_ON((preemptible() || in_serving_softirq()) && !state);
+
BUG_ON(!may_use_simd());
get_cpu_fpsimd_context();
@@ -1859,7 +1869,7 @@ void kernel_neon_begin(void)
/* Save unsaved fpsimd state, if any: */
if (test_thread_flag(TIF_KERNEL_FPSTATE)) {
BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq());
- fpsimd_save_kernel_state(current);
+ fpsimd_save_state(state);
} else {
fpsimd_save_user_state();
@@ -1880,8 +1890,16 @@ void kernel_neon_begin(void)
* mode in task context. So in this case, setting the flag here
* is always appropriate.
*/
- if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq())
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) {
+ /*
+ * Record the caller provided buffer as the kernel mode
+ * FP/SIMD buffer for this task, so that the state can
+ * be preserved and restored on a context switch.
+ */
+ WARN_ON(current->thread.kernel_fpsimd_state != NULL);
+ current->thread.kernel_fpsimd_state = state;
set_thread_flag(TIF_KERNEL_FPSTATE);
+ }
}
/* Invalidate any task state remaining in the fpsimd regs: */
@@ -1899,22 +1917,30 @@ EXPORT_SYMBOL_GPL(kernel_neon_begin);
*
* The caller must not use the FPSIMD registers after this function is called,
* unless kernel_neon_begin() is called again in the meantime.
+ *
+ * The value of @state must match the value passed to the preceding call to
+ * kernel_neon_begin().
*/
-void kernel_neon_end(void)
+void kernel_neon_end(struct user_fpsimd_state *state)
{
if (!system_supports_fpsimd())
return;
+ if (!test_thread_flag(TIF_KERNEL_FPSTATE))
+ return;
+
/*
* If we are returning from a nested use of kernel mode FPSIMD, restore
* the task context kernel mode FPSIMD state. This can only happen when
* running in softirq context on non-PREEMPT_RT.
*/
- if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() &&
- test_thread_flag(TIF_KERNEL_FPSTATE))
- fpsimd_load_kernel_state(current);
- else
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq()) {
+ fpsimd_load_state(state);
+ } else {
clear_thread_flag(TIF_KERNEL_FPSTATE);
+ WARN_ON(current->thread.kernel_fpsimd_state != state);
+ current->thread.kernel_fpsimd_state = NULL;
+ }
}
EXPORT_SYMBOL_GPL(kernel_neon_end);
@@ -1948,7 +1974,7 @@ void __efi_fpsimd_begin(void)
return;
if (may_use_simd()) {
- kernel_neon_begin();
+ kernel_neon_begin(&efi_fpsimd_state);
} else {
WARN_ON(preemptible());
@@ -1999,7 +2025,7 @@ void __efi_fpsimd_end(void)
return;
if (!efi_fpsimd_state_used) {
- kernel_neon_end();
+ kernel_neon_end(&efi_fpsimd_state);
} else {
if (system_supports_sve() && efi_sve_state_used) {
bool ffr = true;
diff --git a/crypto/aegis128-neon.c b/crypto/aegis128-neon.c
index 9ee50549e8231e..b41807e63bd3e7 100644
--- a/crypto/aegis128-neon.c
+++ b/crypto/aegis128-neon.c
@@ -4,7 +4,7 @@
*/
#include <asm/cpufeature.h>
-#include <asm/neon.h>
+#include <asm/simd.h>
#include "aegis.h"
#include "aegis-neon.h"
@@ -24,32 +24,28 @@ void crypto_aegis128_init_simd(struct aegis_state *state,
const union aegis_block *key,
const u8 *iv)
{
- kernel_neon_begin();
- crypto_aegis128_init_neon(state, key, iv);
- kernel_neon_end();
+ scoped_ksimd()
+ crypto_aegis128_init_neon(state, key, iv);
}
void crypto_aegis128_update_simd(struct aegis_state *state, const void *msg)
{
- kernel_neon_begin();
- crypto_aegis128_update_neon(state, msg);
- kernel_neon_end();
+ scoped_ksimd()
+ crypto_aegis128_update_neon(state, msg);
}
void crypto_aegis128_encrypt_chunk_simd(struct aegis_state *state, u8 *dst,
const u8 *src, unsigned int size)
{
- kernel_neon_begin();
- crypto_aegis128_encrypt_chunk_neon(state, dst, src, size);
- kernel_neon_end();
+ scoped_ksimd()
+ crypto_aegis128_encrypt_chunk_neon(state, dst, src, size);
}
void crypto_aegis128_decrypt_chunk_simd(struct aegis_state *state, u8 *dst,
const u8 *src, unsigned int size)
{
- kernel_neon_begin();
- crypto_aegis128_decrypt_chunk_neon(state, dst, src, size);
- kernel_neon_end();
+ scoped_ksimd()
+ crypto_aegis128_decrypt_chunk_neon(state, dst, src, size);
}
int crypto_aegis128_final_simd(struct aegis_state *state,
@@ -58,12 +54,7 @@ int crypto_aegis128_final_simd(struct aegis_state *state,
unsigned int cryptlen,
unsigned int authsize)
{
- int ret;
-
- kernel_neon_begin();
- ret = crypto_aegis128_final_neon(state, tag_xor, assoclen, cryptlen,
- authsize);
- kernel_neon_end();
-
- return ret;
+ scoped_ksimd()
+ return crypto_aegis128_final_neon(state, tag_xor, assoclen,
+ cryptlen, authsize);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wc.c b/drivers/net/ethernet/mellanox/mlx5/core/wc.c
index 05e5fd777d4f41..815a7c97d6b09e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wc.c
@@ -9,6 +9,7 @@
#if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && IS_ENABLED(CONFIG_ARM64)
#include <asm/neon.h>
+#include <asm/simd.h>
#endif
#define TEST_WC_NUM_WQES 255
@@ -264,15 +265,15 @@ static void mlx5_iowrite64_copy(struct mlx5_wc_sq *sq, __be32 mmio_wqe[16],
{
#if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && IS_ENABLED(CONFIG_ARM64)
if (cpu_has_neon()) {
- kernel_neon_begin();
- asm volatile
- (".arch_extension simd\n\t"
- "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%0]\n\t"
- "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%1]"
- :
- : "r"(mmio_wqe), "r"(sq->bfreg.map + offset)
- : "memory", "v0", "v1", "v2", "v3");
- kernel_neon_end();
+ scoped_ksimd() {
+ asm volatile(
+ ".arch_extension simd\n\t"
+ "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%0]\n\t"
+ "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%1]"
+ :
+ : "r"(mmio_wqe), "r"(sq->bfreg.map + offset)
+ : "memory", "v0", "v1", "v2", "v3");
+ }
return;
}
#endif
diff --git a/lib/crc/arm/crc-t10dif.h b/lib/crc/arm/crc-t10dif.h
index 63441de5e3f161..afc0ebf97f19c9 100644
--- a/lib/crc/arm/crc-t10dif.h
+++ b/lib/crc/arm/crc-t10dif.h
@@ -5,7 +5,6 @@
* Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
*/
-#include <asm/neon.h>
#include <asm/simd.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
@@ -19,22 +18,16 @@ asmlinkage void crc_t10dif_pmull8(u16 init_crc, const u8 *buf, size_t len,
static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length)
{
- if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) {
+ if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && likely(may_use_simd())) {
if (static_branch_likely(&have_pmull)) {
- if (likely(may_use_simd())) {
- kernel_neon_begin();
- crc = crc_t10dif_pmull64(crc, data, length);
- kernel_neon_end();
- return crc;
- }
+ scoped_ksimd()
+ return crc_t10dif_pmull64(crc, data, length);
} else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
- static_branch_likely(&have_neon) &&
- likely(may_use_simd())) {
+ static_branch_likely(&have_neon)) {
u8 buf[16] __aligned(16);
- kernel_neon_begin();
- crc_t10dif_pmull8(crc, data, length, buf);
- kernel_neon_end();
+ scoped_ksimd()
+ crc_t10dif_pmull8(crc, data, length, buf);
return crc_t10dif_generic(0, buf, sizeof(buf));
}
diff --git a/lib/crc/arm/crc32.h b/lib/crc/arm/crc32.h
index 7b76f52f6907db..f33de6b22cd49c 100644
--- a/lib/crc/arm/crc32.h
+++ b/lib/crc/arm/crc32.h
@@ -8,7 +8,6 @@
#include <linux/cpufeature.h>
#include <asm/hwcap.h>
-#include <asm/neon.h>
#include <asm/simd.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32);
@@ -42,9 +41,8 @@ static inline u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
len -= n;
}
n = round_down(len, 16);
- kernel_neon_begin();
- crc = crc32_pmull_le(p, n, crc);
- kernel_neon_end();
+ scoped_ksimd()
+ crc = crc32_pmull_le(p, n, crc);
p += n;
len -= n;
}
@@ -71,9 +69,8 @@ static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
len -= n;
}
n = round_down(len, 16);
- kernel_neon_begin();
- crc = crc32c_pmull_le(p, n, crc);
- kernel_neon_end();
+ scoped_ksimd()
+ crc = crc32c_pmull_le(p, n, crc);
p += n;
len -= n;
}
diff --git a/lib/crc/arm64/crc-t10dif.h b/lib/crc/arm64/crc-t10dif.h
index f88db297180516..b8338139ed7738 100644
--- a/lib/crc/arm64/crc-t10dif.h
+++ b/lib/crc/arm64/crc-t10dif.h
@@ -7,7 +7,6 @@
#include <linux/cpufeature.h>
-#include <asm/neon.h>
#include <asm/simd.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_asimd);
@@ -21,22 +20,16 @@ asmlinkage u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 *buf, size_t len);
static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length)
{
- if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) {
+ if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && likely(may_use_simd())) {
if (static_branch_likely(&have_pmull)) {
- if (likely(may_use_simd())) {
- kernel_neon_begin();
- crc = crc_t10dif_pmull_p64(crc, data, length);
- kernel_neon_end();
- return crc;
- }
+ scoped_ksimd()
+ return crc_t10dif_pmull_p64(crc, data, length);
} else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
- static_branch_likely(&have_asimd) &&
- likely(may_use_simd())) {
+ static_branch_likely(&have_asimd)) {
u8 buf[16];
- kernel_neon_begin();
- crc_t10dif_pmull_p8(crc, data, length, buf);
- kernel_neon_end();
+ scoped_ksimd()
+ crc_t10dif_pmull_p8(crc, data, length, buf);
return crc_t10dif_generic(0, buf, sizeof(buf));
}
diff --git a/lib/crc/arm64/crc32.h b/lib/crc/arm64/crc32.h
index 31e649cd40a2fd..1939a5dee47729 100644
--- a/lib/crc/arm64/crc32.h
+++ b/lib/crc/arm64/crc32.h
@@ -2,7 +2,6 @@
#include <asm/alternative.h>
#include <asm/cpufeature.h>
-#include <asm/neon.h>
#include <asm/simd.h>
// The minimum input length to consider the 4-way interleaved code path
@@ -23,9 +22,8 @@ static inline u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
if (len >= min_len && cpu_have_named_feature(PMULL) &&
likely(may_use_simd())) {
- kernel_neon_begin();
- crc = crc32_le_arm64_4way(crc, p, len);
- kernel_neon_end();
+ scoped_ksimd()
+ crc = crc32_le_arm64_4way(crc, p, len);
p += round_down(len, 64);
len %= 64;
@@ -44,9 +42,8 @@ static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
if (len >= min_len && cpu_have_named_feature(PMULL) &&
likely(may_use_simd())) {
- kernel_neon_begin();
- crc = crc32c_le_arm64_4way(crc, p, len);
- kernel_neon_end();
+ scoped_ksimd()
+ crc = crc32c_le_arm64_4way(crc, p, len);
p += round_down(len, 64);
len %= 64;
@@ -65,9 +62,8 @@ static inline u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
if (len >= min_len && cpu_have_named_feature(PMULL) &&
likely(may_use_simd())) {
- kernel_neon_begin();
- crc = crc32_be_arm64_4way(crc, p, len);
- kernel_neon_end();
+ scoped_ksimd()
+ crc = crc32_be_arm64_4way(crc, p, len);
p += round_down(len, 64);
len %= 64;
diff --git a/lib/crypto/arm/blake2b.h b/lib/crypto/arm/blake2b.h
index 1b9154d119db43..5c76498521e67e 100644
--- a/lib/crypto/arm/blake2b.h
+++ b/lib/crypto/arm/blake2b.h
@@ -24,9 +24,8 @@ static void blake2b_compress(struct blake2b_ctx *ctx,
const size_t blocks = min_t(size_t, nblocks,
SZ_4K / BLAKE2B_BLOCK_SIZE);
- kernel_neon_begin();
- blake2b_compress_neon(ctx, data, blocks, inc);
- kernel_neon_end();
+ scoped_ksimd()
+ blake2b_compress_neon(ctx, data, blocks, inc);
data += blocks * BLAKE2B_BLOCK_SIZE;
nblocks -= blocks;
diff --git a/lib/crypto/arm/chacha.h b/lib/crypto/arm/chacha.h
index 0cae30f8ee5d15..836e49088e9837 100644
--- a/lib/crypto/arm/chacha.h
+++ b/lib/crypto/arm/chacha.h
@@ -12,7 +12,6 @@
#include <asm/cputype.h>
#include <asm/hwcap.h>
-#include <asm/neon.h>
#include <asm/simd.h>
asmlinkage void chacha_block_xor_neon(const struct chacha_state *state,
@@ -68,9 +67,8 @@ static void hchacha_block_arch(const struct chacha_state *state,
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
hchacha_block_arm(state, out, nrounds);
} else {
- kernel_neon_begin();
- hchacha_block_neon(state, out, nrounds);
- kernel_neon_end();
+ scoped_ksimd()
+ hchacha_block_neon(state, out, nrounds);
}
}
@@ -87,9 +85,8 @@ static void chacha_crypt_arch(struct chacha_state *state, u8 *dst,
do {
unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
- kernel_neon_begin();
- chacha_doneon(state, dst, src, todo, nrounds);
- kernel_neon_end();
+ scoped_ksimd()
+ chacha_doneon(state, dst, src, todo, nrounds);
bytes -= todo;
src += todo;
diff --git a/lib/crypto/arm/curve25519.h b/lib/crypto/arm/curve25519.h
index f6d66494eb8f88..b1a566885e95bd 100644
--- a/lib/crypto/arm/curve25519.h
+++ b/lib/crypto/arm/curve25519.h
@@ -25,9 +25,8 @@ static void curve25519_arch(u8 out[CURVE25519_KEY_SIZE],
const u8 point[CURVE25519_KEY_SIZE])
{
if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
- kernel_neon_begin();
- curve25519_neon(out, scalar, point);
- kernel_neon_end();
+ scoped_ksimd()
+ curve25519_neon(out, scalar, point);
} else {
curve25519_generic(out, scalar, point);
}
diff --git a/lib/crypto/arm/poly1305.h b/lib/crypto/arm/poly1305.h
index 0021cf368307c7..0fe903d8de5563 100644
--- a/lib/crypto/arm/poly1305.h
+++ b/lib/crypto/arm/poly1305.h
@@ -6,7 +6,6 @@
*/
#include <asm/hwcap.h>
-#include <asm/neon.h>
#include <asm/simd.h>
#include <linux/cpufeature.h>
#include <linux/jump_label.h>
@@ -32,9 +31,8 @@ static void poly1305_blocks(struct poly1305_block_state *state, const u8 *src,
do {
unsigned int todo = min_t(unsigned int, len, SZ_4K);
- kernel_neon_begin();
- poly1305_blocks_neon(state, src, todo, padbit);
- kernel_neon_end();
+ scoped_ksimd()
+ poly1305_blocks_neon(state, src, todo, padbit);
len -= todo;
src += todo;
diff --git a/lib/crypto/arm/sha1.h b/lib/crypto/arm/sha1.h
index 29f8bcad0447c3..3e2d8c7cab9f10 100644
--- a/lib/crypto/arm/sha1.h
+++ b/lib/crypto/arm/sha1.h
@@ -4,7 +4,6 @@
*
* Copyright 2025 Google LLC
*/
-#include <asm/neon.h>
#include <asm/simd.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
@@ -22,12 +21,12 @@ static void sha1_blocks(struct sha1_block_state *state,
{
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
static_branch_likely(&have_neon) && likely(may_use_simd())) {
- kernel_neon_begin();
- if (static_branch_likely(&have_ce))
- sha1_ce_transform(state, data, nblocks);
- else
- sha1_transform_neon(state, data, nblocks);
- kernel_neon_end();
+ scoped_ksimd() {
+ if (static_branch_likely(&have_ce))
+ sha1_ce_transform(state, data, nblocks);
+ else
+ sha1_transform_neon(state, data, nblocks);
+ }
} else {
sha1_block_data_order(state, data, nblocks);
}
diff --git a/lib/crypto/arm/sha256.h b/lib/crypto/arm/sha256.h
index 7556457b3094b4..ae7e52dd6e3b62 100644
--- a/lib/crypto/arm/sha256.h
+++ b/lib/crypto/arm/sha256.h
@@ -22,12 +22,12 @@ static void sha256_blocks(struct sha256_block_state *state,
{
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
static_branch_likely(&have_neon) && likely(may_use_simd())) {
- kernel_neon_begin();
- if (static_branch_likely(&have_ce))
- sha256_ce_transform(state, data, nblocks);
- else
- sha256_block_data_order_neon(state, data, nblocks);
- kernel_neon_end();
+ scoped_ksimd() {
+ if (static_branch_likely(&have_ce))
+ sha256_ce_transform(state, data, nblocks);
+ else
+ sha256_block_data_order_neon(state, data, nblocks);
+ }
} else {
sha256_block_data_order(state, data, nblocks);
}
diff --git a/lib/crypto/arm/sha512.h b/lib/crypto/arm/sha512.h
index d1b485dd275db8..ed9bd81d6d78d3 100644
--- a/lib/crypto/arm/sha512.h
+++ b/lib/crypto/arm/sha512.h
@@ -19,9 +19,8 @@ static void sha512_blocks(struct sha512_block_state *state,
{
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
static_branch_likely(&have_neon) && likely(may_use_simd())) {
- kernel_neon_begin();
- sha512_block_data_order_neon(state, data, nblocks);
- kernel_neon_end();
+ scoped_ksimd()
+ sha512_block_data_order_neon(state, data, nblocks);
} else {
sha512_block_data_order(state, data, nblocks);
}
diff --git a/lib/crypto/arm64/chacha.h b/lib/crypto/arm64/chacha.h
index ba6c22d4608633..ca8c6a8b057829 100644
--- a/lib/crypto/arm64/chacha.h
+++ b/lib/crypto/arm64/chacha.h
@@ -23,7 +23,6 @@
#include <linux/kernel.h>
#include <asm/hwcap.h>
-#include <asm/neon.h>
#include <asm/simd.h>
asmlinkage void chacha_block_xor_neon(const struct chacha_state *state,
@@ -65,9 +64,8 @@ static void hchacha_block_arch(const struct chacha_state *state,
if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) {
hchacha_block_generic(state, out, nrounds);
} else {
- kernel_neon_begin();
- hchacha_block_neon(state, out, nrounds);
- kernel_neon_end();
+ scoped_ksimd()
+ hchacha_block_neon(state, out, nrounds);
}
}
@@ -81,9 +79,8 @@ static void chacha_crypt_arch(struct chacha_state *state, u8 *dst,
do {
unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
- kernel_neon_begin();
- chacha_doneon(state, dst, src, todo, nrounds);
- kernel_neon_end();
+ scoped_ksimd()
+ chacha_doneon(state, dst, src, todo, nrounds);
bytes -= todo;
src += todo;
diff --git a/lib/crypto/arm64/poly1305.h b/lib/crypto/arm64/poly1305.h
index aed5921ccd9a12..b77669767cd6c3 100644
--- a/lib/crypto/arm64/poly1305.h
+++ b/lib/crypto/arm64/poly1305.h
@@ -6,7 +6,6 @@
*/
#include <asm/hwcap.h>
-#include <asm/neon.h>
#include <asm/simd.h>
#include <linux/cpufeature.h>
#include <linux/jump_label.h>
@@ -31,9 +30,8 @@ static void poly1305_blocks(struct poly1305_block_state *state, const u8 *src,
do {
unsigned int todo = min_t(unsigned int, len, SZ_4K);
- kernel_neon_begin();
- poly1305_blocks_neon(state, src, todo, padbit);
- kernel_neon_end();
+ scoped_ksimd()
+ poly1305_blocks_neon(state, src, todo, padbit);
len -= todo;
src += todo;
diff --git a/lib/crypto/arm64/polyval.h b/lib/crypto/arm64/polyval.h
index 2486e80750d079..a39763395e9bf2 100644
--- a/lib/crypto/arm64/polyval.h
+++ b/lib/crypto/arm64/polyval.h
@@ -4,7 +4,6 @@
*
* Copyright 2025 Google LLC
*/
-#include <asm/neon.h>
#include <asm/simd.h>
#include <linux/cpufeature.h>
@@ -24,13 +23,14 @@ static void polyval_preparekey_arch(struct polyval_key *key,
static_assert(ARRAY_SIZE(key->h_powers) == NUM_H_POWERS);
memcpy(&key->h_powers[NUM_H_POWERS - 1], raw_key, POLYVAL_BLOCK_SIZE);
if (static_branch_likely(&have_pmull) && may_use_simd()) {
- kernel_neon_begin();
- for (int i = NUM_H_POWERS - 2; i >= 0; i--) {
- key->h_powers[i] = key->h_powers[i + 1];
- polyval_mul_pmull(&key->h_powers[i],
- &key->h_powers[NUM_H_POWERS - 1]);
+ scoped_ksimd() {
+ for (int i = NUM_H_POWERS - 2; i >= 0; i--) {
+ key->h_powers[i] = key->h_powers[i + 1];
+ polyval_mul_pmull(
+ &key->h_powers[i],
+ &key->h_powers[NUM_H_POWERS - 1]);
+ }
}
- kernel_neon_end();
} else {
for (int i = NUM_H_POWERS - 2; i >= 0; i--) {
key->h_powers[i] = key->h_powers[i + 1];
@@ -44,9 +44,8 @@ static void polyval_mul_arch(struct polyval_elem *acc,
const struct polyval_key *key)
{
if (static_branch_likely(&have_pmull) && may_use_simd()) {
- kernel_neon_begin();
- polyval_mul_pmull(acc, &key->h_powers[NUM_H_POWERS - 1]);
- kernel_neon_end();
+ scoped_ksimd()
+ polyval_mul_pmull(acc, &key->h_powers[NUM_H_POWERS - 1]);
} else {
polyval_mul_generic(acc, &key->h_powers[NUM_H_POWERS - 1]);
}
@@ -62,9 +61,8 @@ static void polyval_blocks_arch(struct polyval_elem *acc,
size_t n = min_t(size_t, nblocks,
4096 / POLYVAL_BLOCK_SIZE);
- kernel_neon_begin();
- polyval_blocks_pmull(acc, key, data, n);
- kernel_neon_end();
+ scoped_ksimd()
+ polyval_blocks_pmull(acc, key, data, n);
data += n * POLYVAL_BLOCK_SIZE;
nblocks -= n;
} while (nblocks);
diff --git a/lib/crypto/arm64/sha1.h b/lib/crypto/arm64/sha1.h
index aaef4ebfc5e34a..bc7071f1be0962 100644
--- a/lib/crypto/arm64/sha1.h
+++ b/lib/crypto/arm64/sha1.h
@@ -4,7 +4,6 @@
*
* Copyright 2025 Google LLC
*/
-#include <asm/neon.h>
#include <asm/simd.h>
#include <linux/cpufeature.h>
@@ -20,9 +19,9 @@ static void sha1_blocks(struct sha1_block_state *state,
do {
size_t rem;
- kernel_neon_begin();
- rem = __sha1_ce_transform(state, data, nblocks);
- kernel_neon_end();
+ scoped_ksimd()
+ rem = __sha1_ce_transform(state, data, nblocks);
+
data += (nblocks - rem) * SHA1_BLOCK_SIZE;
nblocks = rem;
} while (nblocks);
diff --git a/lib/crypto/arm64/sha256.h b/lib/crypto/arm64/sha256.h
index 80d06df27d3a39..568dff0f276af4 100644
--- a/lib/crypto/arm64/sha256.h
+++ b/lib/crypto/arm64/sha256.h
@@ -4,7 +4,6 @@
*
* Copyright 2025 Google LLC
*/
-#include <asm/neon.h>
#include <asm/simd.h>
#include <linux/cpufeature.h>
@@ -27,17 +26,16 @@ static void sha256_blocks(struct sha256_block_state *state,
do {
size_t rem;
- kernel_neon_begin();
- rem = __sha256_ce_transform(state,
- data, nblocks);
- kernel_neon_end();
+ scoped_ksimd()
+ rem = __sha256_ce_transform(state, data,
+ nblocks);
+
data += (nblocks - rem) * SHA256_BLOCK_SIZE;
nblocks = rem;
} while (nblocks);
} else {
- kernel_neon_begin();
- sha256_block_neon(state, data, nblocks);
- kernel_neon_end();
+ scoped_ksimd()
+ sha256_block_neon(state, data, nblocks);
}
} else {
sha256_block_data_order(state, data, nblocks);
@@ -66,9 +64,8 @@ static bool sha256_finup_2x_arch(const struct __sha256_ctx *ctx,
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
static_branch_likely(&have_ce) && len >= SHA256_BLOCK_SIZE &&
len <= 65536 && likely(may_use_simd())) {
- kernel_neon_begin();
- sha256_ce_finup2x(ctx, data1, data2, len, out1, out2);
- kernel_neon_end();
+ scoped_ksimd()
+ sha256_ce_finup2x(ctx, data1, data2, len, out1, out2);
kmsan_unpoison_memory(out1, SHA256_DIGEST_SIZE);
kmsan_unpoison_memory(out2, SHA256_DIGEST_SIZE);
return true;
diff --git a/lib/crypto/arm64/sha3.h b/lib/crypto/arm64/sha3.h
index 6dd5183056da40..b602f1b3b2822e 100644
--- a/lib/crypto/arm64/sha3.h
+++ b/lib/crypto/arm64/sha3.h
@@ -7,7 +7,6 @@
* published by the Free Software Foundation.
*/
-#include <asm/neon.h>
#include <asm/simd.h>
#include <linux/cpufeature.h>
@@ -23,10 +22,9 @@ static void sha3_absorb_blocks(struct sha3_state *state, const u8 *data,
do {
size_t rem;
- kernel_neon_begin();
- rem = sha3_ce_transform(state, data, nblocks,
- block_size);
- kernel_neon_end();
+ scoped_ksimd()
+ rem = sha3_ce_transform(state, data, nblocks,
+ block_size);
data += (nblocks - rem) * block_size;
nblocks = rem;
} while (nblocks);
@@ -46,9 +44,8 @@ static void sha3_keccakf(struct sha3_state *state)
*/
static const u8 zeroes[SHA3_512_BLOCK_SIZE];
- kernel_neon_begin();
- sha3_ce_transform(state, zeroes, 1, sizeof(zeroes));
- kernel_neon_end();
+ scoped_ksimd()
+ sha3_ce_transform(state, zeroes, 1, sizeof(zeroes));
} else {
sha3_keccakf_generic(state);
}
diff --git a/lib/crypto/arm64/sha512.h b/lib/crypto/arm64/sha512.h
index ddb0d256f73aa3..7eb7ef04d2687a 100644
--- a/lib/crypto/arm64/sha512.h
+++ b/lib/crypto/arm64/sha512.h
@@ -4,7 +4,7 @@
*
* Copyright 2025 Google LLC
*/
-#include <asm/neon.h>
+
#include <asm/simd.h>
#include <linux/cpufeature.h>
@@ -24,9 +24,9 @@ static void sha512_blocks(struct sha512_block_state *state,
do {
size_t rem;
- kernel_neon_begin();
- rem = __sha512_ce_transform(state, data, nblocks);
- kernel_neon_end();
+ scoped_ksimd()
+ rem = __sha512_ce_transform(state, data, nblocks);
+
data += (nblocks - rem) * SHA512_BLOCK_SIZE;
nblocks = rem;
} while (nblocks);
diff --git a/lib/raid6/neon.c b/lib/raid6/neon.c
index 0a2e76035ea960..6d9474ce6da917 100644
--- a/lib/raid6/neon.c
+++ b/lib/raid6/neon.c
@@ -8,10 +8,9 @@
#include <linux/raid/pq.h>
#ifdef __KERNEL__
-#include <asm/neon.h>
+#include <asm/simd.h>
#else
-#define kernel_neon_begin()
-#define kernel_neon_end()
+#define scoped_ksimd()
#define cpu_has_neon() (1)
#endif
@@ -32,10 +31,9 @@
{ \
void raid6_neon ## _n ## _gen_syndrome_real(int, \
unsigned long, void**); \
- kernel_neon_begin(); \
- raid6_neon ## _n ## _gen_syndrome_real(disks, \
+ scoped_ksimd() \
+ raid6_neon ## _n ## _gen_syndrome_real(disks, \
(unsigned long)bytes, ptrs); \
- kernel_neon_end(); \
} \
static void raid6_neon ## _n ## _xor_syndrome(int disks, \
int start, int stop, \
@@ -43,10 +41,9 @@
{ \
void raid6_neon ## _n ## _xor_syndrome_real(int, \
int, int, unsigned long, void**); \
- kernel_neon_begin(); \
- raid6_neon ## _n ## _xor_syndrome_real(disks, \
- start, stop, (unsigned long)bytes, ptrs); \
- kernel_neon_end(); \
+ scoped_ksimd() \
+ raid6_neon ## _n ## _xor_syndrome_real(disks, \
+ start, stop, (unsigned long)bytes, ptrs);\
} \
struct raid6_calls const raid6_neonx ## _n = { \
raid6_neon ## _n ## _gen_syndrome, \
diff --git a/lib/raid6/recov_neon.c b/lib/raid6/recov_neon.c
index 70e1404c151292..9d99aeabd31a90 100644
--- a/lib/raid6/recov_neon.c
+++ b/lib/raid6/recov_neon.c
@@ -7,11 +7,10 @@
#include <linux/raid/pq.h>
#ifdef __KERNEL__
-#include <asm/neon.h>
+#include <asm/simd.h>
#include "neon.h"
#else
-#define kernel_neon_begin()
-#define kernel_neon_end()
+#define scoped_ksimd()
#define cpu_has_neon() (1)
#endif
@@ -55,9 +54,8 @@ static void raid6_2data_recov_neon(int disks, size_t bytes, int faila,
qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
raid6_gfexp[failb]]];
- kernel_neon_begin();
- __raid6_2data_recov_neon(bytes, p, q, dp, dq, pbmul, qmul);
- kernel_neon_end();
+ scoped_ksimd()
+ __raid6_2data_recov_neon(bytes, p, q, dp, dq, pbmul, qmul);
}
static void raid6_datap_recov_neon(int disks, size_t bytes, int faila,
@@ -86,9 +84,8 @@ static void raid6_datap_recov_neon(int disks, size_t bytes, int faila,
/* Now, pick the proper data tables */
qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
- kernel_neon_begin();
- __raid6_datap_recov_neon(bytes, p, q, dq, qmul);
- kernel_neon_end();
+ scoped_ksimd()
+ __raid6_datap_recov_neon(bytes, p, q, dq, qmul);
}
const struct raid6_recov_calls raid6_recov_neon = {