diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-12-02 18:53:50 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-12-02 18:53:50 -0800 |
| commit | f617d24606553159a271f43e36d1c71a4c317e48 (patch) | |
| tree | f39957a87580dcc1835e2c1f0bfad5adeef93483 | |
| parent | 906003e15160642658358153e7598302d1b38166 (diff) | |
| parent | 5dc8d277520be6f0be11f36712e557167b3964c8 (diff) | |
| download | linux-f617d24606553159a271f43e36d1c71a4c317e48.tar.gz | |
Merge tag 'fpsimd-on-stack-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux
Pull arm64 FPSIMD on-stack buffer updates from Eric Biggers:
"This is a core arm64 change. However, I was asked to take this because
most uses of kernel-mode FPSIMD are in crypto or CRC code.
In v6.8, the size of task_struct on arm64 increased by 528 bytes due
to the new 'kernel_fpsimd_state' field. This field was added to allow
kernel-mode FPSIMD code to be preempted.
Unfortunately, 528 bytes is kind of a lot for task_struct. This
regression in the task_struct size was noticed and reported.
Recover that space by making this state be allocated on the stack at
the beginning of each kernel-mode FPSIMD section.
To make it easier for all the users of kernel-mode FPSIMD to do that
correctly, introduce and use a 'scoped_ksimd' abstraction"
* tag 'fpsimd-on-stack-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux: (23 commits)
lib/crypto: arm64: Move remaining algorithms to scoped ksimd API
lib/crypto: arm/blake2b: Move to scoped ksimd API
arm64/fpsimd: Allocate kernel mode FP/SIMD buffers on the stack
arm64/fpu: Enforce task-context only for generic kernel mode FPU
net/mlx5: Switch to more abstract scoped ksimd guard API on arm64
arm64/xorblocks: Switch to 'ksimd' scoped guard API
crypto/arm64: sm4 - Switch to 'ksimd' scoped guard API
crypto/arm64: sm3 - Switch to 'ksimd' scoped guard API
crypto/arm64: sha3 - Switch to 'ksimd' scoped guard API
crypto/arm64: polyval - Switch to 'ksimd' scoped guard API
crypto/arm64: nhpoly1305 - Switch to 'ksimd' scoped guard API
crypto/arm64: aes-gcm - Switch to 'ksimd' scoped guard API
crypto/arm64: aes-blk - Switch to 'ksimd' scoped guard API
crypto/arm64: aes-ccm - Switch to 'ksimd' scoped guard API
raid6: Move to more abstract 'ksimd' guard API
crypto: aegis128-neon - Move to more abstract 'ksimd' guard API
crypto/arm64: sm4-ce-gcm - Avoid pointless yield of the NEON unit
crypto/arm64: sm4-ce-ccm - Avoid pointless yield of the NEON unit
crypto/arm64: aes-ce-ccm - Avoid pointless yield of the NEON unit
lib/crc: Switch ARM and arm64 to 'ksimd' scoped guard API
...
42 files changed, 617 insertions, 712 deletions
diff --git a/arch/arm/include/asm/simd.h b/arch/arm/include/asm/simd.h index be08a8da046f92..8549fa8b7253bd 100644 --- a/arch/arm/include/asm/simd.h +++ b/arch/arm/include/asm/simd.h @@ -2,14 +2,21 @@ #ifndef _ASM_SIMD_H #define _ASM_SIMD_H +#include <linux/cleanup.h> #include <linux/compiler_attributes.h> #include <linux/preempt.h> #include <linux/types.h> +#include <asm/neon.h> + static __must_check inline bool may_use_simd(void) { return IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && !in_hardirq() && !irqs_disabled(); } +DEFINE_LOCK_GUARD_0(ksimd, kernel_neon_begin(), kernel_neon_end()) + +#define scoped_ksimd() scoped_guard(ksimd) + #endif /* _ASM_SIMD_H */ diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c index 2d791d51891b46..c4fd648471f181 100644 --- a/arch/arm64/crypto/aes-ce-ccm-glue.c +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -8,7 +8,6 @@ * Author: Ard Biesheuvel <ardb@kernel.org> */ -#include <asm/neon.h> #include <linux/unaligned.h> #include <crypto/aes.h> #include <crypto/scatterwalk.h> @@ -16,6 +15,8 @@ #include <crypto/internal/skcipher.h> #include <linux/module.h> +#include <asm/simd.h> + #include "aes-ce-setkey.h" MODULE_IMPORT_NS("CRYPTO_INTERNAL"); @@ -114,11 +115,8 @@ static u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, in += adv; abytes -= adv; - if (unlikely(rem)) { - kernel_neon_end(); - kernel_neon_begin(); + if (unlikely(rem)) macp = 0; - } } else { u32 l = min(AES_BLOCK_SIZE - macp, abytes); @@ -187,40 +185,38 @@ static int ccm_encrypt(struct aead_request *req) if (unlikely(err)) return err; - kernel_neon_begin(); + scoped_ksimd() { + if (req->assoclen) + ccm_calculate_auth_mac(req, mac); - if (req->assoclen) - ccm_calculate_auth_mac(req, mac); + do { + u32 tail = walk.nbytes % AES_BLOCK_SIZE; + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + u8 buf[AES_BLOCK_SIZE]; + u8 *final_iv = NULL; - do { - u32 tail = walk.nbytes % AES_BLOCK_SIZE; - const u8 *src = walk.src.virt.addr; - u8 *dst = walk.dst.virt.addr; - u8 buf[AES_BLOCK_SIZE]; - u8 *final_iv = NULL; - - if (walk.nbytes == walk.total) { - tail = 0; - final_iv = orig_iv; - } - - if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) - src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes], - src, walk.nbytes); + if (walk.nbytes == walk.total) { + tail = 0; + final_iv = orig_iv; + } - ce_aes_ccm_encrypt(dst, src, walk.nbytes - tail, - ctx->key_enc, num_rounds(ctx), - mac, walk.iv, final_iv); + if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) + src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes], + src, walk.nbytes); - if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) - memcpy(walk.dst.virt.addr, dst, walk.nbytes); + ce_aes_ccm_encrypt(dst, src, walk.nbytes - tail, + ctx->key_enc, num_rounds(ctx), + mac, walk.iv, final_iv); - if (walk.nbytes) { - err = skcipher_walk_done(&walk, tail); - } - } while (walk.nbytes); + if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) + memcpy(walk.dst.virt.addr, dst, walk.nbytes); - kernel_neon_end(); + if (walk.nbytes) { + err = skcipher_walk_done(&walk, tail); + } + } while (walk.nbytes); + } if (unlikely(err)) return err; @@ -254,40 +250,38 @@ static int ccm_decrypt(struct aead_request *req) if (unlikely(err)) return err; - kernel_neon_begin(); - - if (req->assoclen) - ccm_calculate_auth_mac(req, mac); + scoped_ksimd() { + if (req->assoclen) + ccm_calculate_auth_mac(req, mac); - do { - u32 tail = walk.nbytes % AES_BLOCK_SIZE; - const u8 *src = walk.src.virt.addr; - u8 *dst = walk.dst.virt.addr; - u8 buf[AES_BLOCK_SIZE]; - u8 *final_iv = NULL; - - if (walk.nbytes == walk.total) { - tail = 0; - final_iv = orig_iv; - } + do { + u32 tail = walk.nbytes % AES_BLOCK_SIZE; + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + u8 buf[AES_BLOCK_SIZE]; + u8 *final_iv = NULL; - if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) - src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes], - src, walk.nbytes); + if (walk.nbytes == walk.total) { + tail = 0; + final_iv = orig_iv; + } - ce_aes_ccm_decrypt(dst, src, walk.nbytes - tail, - ctx->key_enc, num_rounds(ctx), - mac, walk.iv, final_iv); + if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) + src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes], + src, walk.nbytes); - if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) - memcpy(walk.dst.virt.addr, dst, walk.nbytes); + ce_aes_ccm_decrypt(dst, src, walk.nbytes - tail, + ctx->key_enc, num_rounds(ctx), + mac, walk.iv, final_iv); - if (walk.nbytes) { - err = skcipher_walk_done(&walk, tail); - } - } while (walk.nbytes); + if (unlikely(walk.nbytes < AES_BLOCK_SIZE)) + memcpy(walk.dst.virt.addr, dst, walk.nbytes); - kernel_neon_end(); + if (walk.nbytes) { + err = skcipher_walk_done(&walk, tail); + } + } while (walk.nbytes); + } if (unlikely(err)) return err; diff --git a/arch/arm64/crypto/aes-ce-glue.c b/arch/arm64/crypto/aes-ce-glue.c index 00b8749013c5bf..a4dad370991df6 100644 --- a/arch/arm64/crypto/aes-ce-glue.c +++ b/arch/arm64/crypto/aes-ce-glue.c @@ -52,9 +52,8 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) return; } - kernel_neon_begin(); - __aes_ce_encrypt(ctx->key_enc, dst, src, num_rounds(ctx)); - kernel_neon_end(); + scoped_ksimd() + __aes_ce_encrypt(ctx->key_enc, dst, src, num_rounds(ctx)); } static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) @@ -66,9 +65,8 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) return; } - kernel_neon_begin(); - __aes_ce_decrypt(ctx->key_dec, dst, src, num_rounds(ctx)); - kernel_neon_end(); + scoped_ksimd() + __aes_ce_decrypt(ctx->key_dec, dst, src, num_rounds(ctx)); } int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, @@ -94,47 +92,48 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, for (i = 0; i < kwords; i++) ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32)); - kernel_neon_begin(); - for (i = 0; i < sizeof(rcon); i++) { - u32 *rki = ctx->key_enc + (i * kwords); - u32 *rko = rki + kwords; - - rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0]; - rko[1] = rko[0] ^ rki[1]; - rko[2] = rko[1] ^ rki[2]; - rko[3] = rko[2] ^ rki[3]; - - if (key_len == AES_KEYSIZE_192) { - if (i >= 7) - break; - rko[4] = rko[3] ^ rki[4]; - rko[5] = rko[4] ^ rki[5]; - } else if (key_len == AES_KEYSIZE_256) { - if (i >= 6) - break; - rko[4] = __aes_ce_sub(rko[3]) ^ rki[4]; - rko[5] = rko[4] ^ rki[5]; - rko[6] = rko[5] ^ rki[6]; - rko[7] = rko[6] ^ rki[7]; + scoped_ksimd() { + for (i = 0; i < sizeof(rcon); i++) { + u32 *rki = ctx->key_enc + (i * kwords); + u32 *rko = rki + kwords; + + rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^ + rcon[i] ^ rki[0]; + rko[1] = rko[0] ^ rki[1]; + rko[2] = rko[1] ^ rki[2]; + rko[3] = rko[2] ^ rki[3]; + + if (key_len == AES_KEYSIZE_192) { + if (i >= 7) + break; + rko[4] = rko[3] ^ rki[4]; + rko[5] = rko[4] ^ rki[5]; + } else if (key_len == AES_KEYSIZE_256) { + if (i >= 6) + break; + rko[4] = __aes_ce_sub(rko[3]) ^ rki[4]; + rko[5] = rko[4] ^ rki[5]; + rko[6] = rko[5] ^ rki[6]; + rko[7] = rko[6] ^ rki[7]; + } } - } - /* - * Generate the decryption keys for the Equivalent Inverse Cipher. - * This involves reversing the order of the round keys, and applying - * the Inverse Mix Columns transformation on all but the first and - * the last one. - */ - key_enc = (struct aes_block *)ctx->key_enc; - key_dec = (struct aes_block *)ctx->key_dec; - j = num_rounds(ctx); - - key_dec[0] = key_enc[j]; - for (i = 1, j--; j > 0; i++, j--) - __aes_ce_invert(key_dec + i, key_enc + j); - key_dec[i] = key_enc[0]; + /* + * Generate the decryption keys for the Equivalent Inverse + * Cipher. This involves reversing the order of the round + * keys, and applying the Inverse Mix Columns transformation on + * all but the first and the last one. + */ + key_enc = (struct aes_block *)ctx->key_enc; + key_dec = (struct aes_block *)ctx->key_dec; + j = num_rounds(ctx); + + key_dec[0] = key_enc[j]; + for (i = 1, j--; j > 0; i++, j--) + __aes_ce_invert(key_dec + i, key_enc + j); + key_dec[i] = key_enc[0]; + } - kernel_neon_end(); return 0; } EXPORT_SYMBOL(ce_aes_expandkey); diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 5e207ff34482f5..b087b900d2790b 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -5,8 +5,6 @@ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> */ -#include <asm/hwcap.h> -#include <asm/neon.h> #include <crypto/aes.h> #include <crypto/ctr.h> #include <crypto/internal/hash.h> @@ -20,6 +18,9 @@ #include <linux/module.h> #include <linux/string.h> +#include <asm/hwcap.h> +#include <asm/simd.h> + #include "aes-ce-setkey.h" #ifdef USE_V8_CRYPTO_EXTENSIONS @@ -186,10 +187,9 @@ static int __maybe_unused ecb_encrypt(struct skcipher_request *req) err = skcipher_walk_virt(&walk, req, false); while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { - kernel_neon_begin(); - aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_enc, rounds, blocks); - kernel_neon_end(); + scoped_ksimd() + aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key_enc, rounds, blocks); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } return err; @@ -206,10 +206,9 @@ static int __maybe_unused ecb_decrypt(struct skcipher_request *req) err = skcipher_walk_virt(&walk, req, false); while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { - kernel_neon_begin(); - aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_dec, rounds, blocks); - kernel_neon_end(); + scoped_ksimd() + aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key_dec, rounds, blocks); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } return err; @@ -224,10 +223,9 @@ static int cbc_encrypt_walk(struct skcipher_request *req, unsigned int blocks; while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) { - kernel_neon_begin(); - aes_cbc_encrypt(walk->dst.virt.addr, walk->src.virt.addr, - ctx->key_enc, rounds, blocks, walk->iv); - kernel_neon_end(); + scoped_ksimd() + aes_cbc_encrypt(walk->dst.virt.addr, walk->src.virt.addr, + ctx->key_enc, rounds, blocks, walk->iv); err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE); } return err; @@ -253,10 +251,9 @@ static int cbc_decrypt_walk(struct skcipher_request *req, unsigned int blocks; while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) { - kernel_neon_begin(); - aes_cbc_decrypt(walk->dst.virt.addr, walk->src.virt.addr, - ctx->key_dec, rounds, blocks, walk->iv); - kernel_neon_end(); + scoped_ksimd() + aes_cbc_decrypt(walk->dst.virt.addr, walk->src.virt.addr, + ctx->key_dec, rounds, blocks, walk->iv); err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE); } return err; @@ -322,10 +319,9 @@ static int cts_cbc_encrypt(struct skcipher_request *req) if (err) return err; - kernel_neon_begin(); - aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_enc, rounds, walk.nbytes, walk.iv); - kernel_neon_end(); + scoped_ksimd() + aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key_enc, rounds, walk.nbytes, walk.iv); return skcipher_walk_done(&walk, 0); } @@ -379,10 +375,9 @@ static int cts_cbc_decrypt(struct skcipher_request *req) if (err) return err; - kernel_neon_begin(); - aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_dec, rounds, walk.nbytes, walk.iv); - kernel_neon_end(); + scoped_ksimd() + aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key_dec, rounds, walk.nbytes, walk.iv); return skcipher_walk_done(&walk, 0); } @@ -399,11 +394,11 @@ static int __maybe_unused essiv_cbc_encrypt(struct skcipher_request *req) blocks = walk.nbytes / AES_BLOCK_SIZE; if (blocks) { - kernel_neon_begin(); - aes_essiv_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key1.key_enc, rounds, blocks, - req->iv, ctx->key2.key_enc); - kernel_neon_end(); + scoped_ksimd() + aes_essiv_cbc_encrypt(walk.dst.virt.addr, + walk.src.virt.addr, + ctx->key1.key_enc, rounds, blocks, + req->iv, ctx->key2.key_enc); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } return err ?: cbc_encrypt_walk(req, &walk); @@ -421,11 +416,11 @@ static int __maybe_unused essiv_cbc_decrypt(struct skcipher_request *req) blocks = walk.nbytes / AES_BLOCK_SIZE; if (blocks) { - kernel_neon_begin(); - aes_essiv_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key1.key_dec, rounds, blocks, - req->iv, ctx->key2.key_enc); - kernel_neon_end(); + scoped_ksimd() + aes_essiv_cbc_decrypt(walk.dst.virt.addr, + walk.src.virt.addr, + ctx->key1.key_dec, rounds, blocks, + req->iv, ctx->key2.key_enc); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } return err ?: cbc_decrypt_walk(req, &walk); @@ -461,10 +456,9 @@ static int __maybe_unused xctr_encrypt(struct skcipher_request *req) else if (nbytes < walk.total) nbytes &= ~(AES_BLOCK_SIZE - 1); - kernel_neon_begin(); - aes_xctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes, - walk.iv, byte_ctr); - kernel_neon_end(); + scoped_ksimd() + aes_xctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes, + walk.iv, byte_ctr); if (unlikely(nbytes < AES_BLOCK_SIZE)) memcpy(walk.dst.virt.addr, @@ -506,10 +500,9 @@ static int __maybe_unused ctr_encrypt(struct skcipher_request *req) else if (nbytes < walk.total) nbytes &= ~(AES_BLOCK_SIZE - 1); - kernel_neon_begin(); - aes_ctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes, - walk.iv); - kernel_neon_end(); + scoped_ksimd() + aes_ctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes, + walk.iv); if (unlikely(nbytes < AES_BLOCK_SIZE)) memcpy(walk.dst.virt.addr, @@ -562,11 +555,10 @@ static int __maybe_unused xts_encrypt(struct skcipher_request *req) if (walk.nbytes < walk.total) nbytes &= ~(AES_BLOCK_SIZE - 1); - kernel_neon_begin(); - aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key1.key_enc, rounds, nbytes, - ctx->key2.key_enc, walk.iv, first); - kernel_neon_end(); + scoped_ksimd() + aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key1.key_enc, rounds, nbytes, + ctx->key2.key_enc, walk.iv, first); err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } @@ -584,11 +576,10 @@ static int __maybe_unused xts_encrypt(struct skcipher_request *req) if (err) return err; - kernel_neon_begin(); - aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key1.key_enc, rounds, walk.nbytes, - ctx->key2.key_enc, walk.iv, first); - kernel_neon_end(); + scoped_ksimd() + aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key1.key_enc, rounds, walk.nbytes, + ctx->key2.key_enc, walk.iv, first); return skcipher_walk_done(&walk, 0); } @@ -634,11 +625,10 @@ static int __maybe_unused xts_decrypt(struct skcipher_request *req) if (walk.nbytes < walk.total) nbytes &= ~(AES_BLOCK_SIZE - 1); - kernel_neon_begin(); - aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key1.key_dec, rounds, nbytes, - ctx->key2.key_enc, walk.iv, first); - kernel_neon_end(); + scoped_ksimd() + aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key1.key_dec, rounds, nbytes, + ctx->key2.key_enc, walk.iv, first); err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } @@ -657,11 +647,10 @@ static int __maybe_unused xts_decrypt(struct skcipher_request *req) return err; - kernel_neon_begin(); - aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key1.key_dec, rounds, walk.nbytes, - ctx->key2.key_enc, walk.iv, first); - kernel_neon_end(); + scoped_ksimd() + aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key1.key_dec, rounds, walk.nbytes, + ctx->key2.key_enc, walk.iv, first); return skcipher_walk_done(&walk, 0); } @@ -808,10 +797,9 @@ static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key, return err; /* encrypt the zero vector */ - kernel_neon_begin(); - aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, ctx->key.key_enc, - rounds, 1); - kernel_neon_end(); + scoped_ksimd() + aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, + ctx->key.key_enc, rounds, 1); cmac_gf128_mul_by_x(consts, consts); cmac_gf128_mul_by_x(consts + 1, consts); @@ -837,10 +825,10 @@ static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key, if (err) return err; - kernel_neon_begin(); - aes_ecb_encrypt(key, ks[0], ctx->key.key_enc, rounds, 1); - aes_ecb_encrypt(ctx->consts, ks[1], ctx->key.key_enc, rounds, 2); - kernel_neon_end(); + scoped_ksimd() { + aes_ecb_encrypt(key, ks[0], ctx->key.key_enc, rounds, 1); + aes_ecb_encrypt(ctx->consts, ks[1], ctx->key.key_enc, rounds, 2); + } return cbcmac_setkey(tfm, key, sizeof(key)); } @@ -860,10 +848,9 @@ static void mac_do_update(struct crypto_aes_ctx *ctx, u8 const in[], int blocks, int rem; do { - kernel_neon_begin(); - rem = aes_mac_update(in, ctx->key_enc, rounds, blocks, - dg, enc_before, !enc_before); - kernel_neon_end(); + scoped_ksimd() + rem = aes_mac_update(in, ctx->key_enc, rounds, blocks, + dg, enc_before, !enc_before); in += (blocks - rem) * AES_BLOCK_SIZE; blocks = rem; } while (blocks); diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index c4a623e865934b..d496effb0a5b77 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -85,9 +85,8 @@ static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key, ctx->rounds = 6 + key_len / 4; - kernel_neon_begin(); - aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds); - kernel_neon_end(); + scoped_ksimd() + aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds); return 0; } @@ -110,10 +109,9 @@ static int __ecb_crypt(struct skcipher_request *req, blocks = round_down(blocks, walk.stride / AES_BLOCK_SIZE); - kernel_neon_begin(); - fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk, - ctx->rounds, blocks); - kernel_neon_end(); + scoped_ksimd() + fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk, + ctx->rounds, blocks); err = skcipher_walk_done(&walk, walk.nbytes - blocks * AES_BLOCK_SIZE); } @@ -146,9 +144,8 @@ static int aesbs_cbc_ctr_setkey(struct crypto_skcipher *tfm, const u8 *in_key, memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc)); - kernel_neon_begin(); - aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds); - kernel_neon_end(); + scoped_ksimd() + aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds); memzero_explicit(&rk, sizeof(rk)); return 0; @@ -167,11 +164,11 @@ static int cbc_encrypt(struct skcipher_request *req) unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; /* fall back to the non-bitsliced NEON implementation */ - kernel_neon_begin(); - neon_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->enc, ctx->key.rounds, blocks, - walk.iv); - kernel_neon_end(); + scoped_ksimd() + neon_aes_cbc_encrypt(walk.dst.virt.addr, + walk.src.virt.addr, + ctx->enc, ctx->key.rounds, blocks, + walk.iv); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } return err; @@ -193,11 +190,10 @@ static int cbc_decrypt(struct skcipher_request *req) blocks = round_down(blocks, walk.stride / AES_BLOCK_SIZE); - kernel_neon_begin(); - aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key.rk, ctx->key.rounds, blocks, - walk.iv); - kernel_neon_end(); + scoped_ksimd() + aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key.rk, ctx->key.rounds, blocks, + walk.iv); err = skcipher_walk_done(&walk, walk.nbytes - blocks * AES_BLOCK_SIZE); } @@ -220,30 +216,32 @@ static int ctr_encrypt(struct skcipher_request *req) const u8 *src = walk.src.virt.addr; u8 *dst = walk.dst.virt.addr; - kernel_neon_begin(); - if (blocks >= 8) { - aesbs_ctr_encrypt(dst, src, ctx->key.rk, ctx->key.rounds, - blocks, walk.iv); - dst += blocks * AES_BLOCK_SIZE; - src += blocks * AES_BLOCK_SIZE; - } - if (nbytes && walk.nbytes == walk.total) { - u8 buf[AES_BLOCK_SIZE]; - u8 *d = dst; - - if (unlikely(nbytes < AES_BLOCK_SIZE)) - src = dst = memcpy(buf + sizeof(buf) - nbytes, - src, nbytes); - - neon_aes_ctr_encrypt(dst, src, ctx->enc, ctx->key.rounds, - nbytes, walk.iv); + scoped_ksimd() { + if (blocks >= 8) { + aesbs_ctr_encrypt(dst, src, ctx->key.rk, + ctx->key.rounds, blocks, + walk.iv); + dst += blocks * AES_BLOCK_SIZE; + src += blocks * AES_BLOCK_SIZE; + } + if (nbytes && walk.nbytes == walk.total) { + u8 buf[AES_BLOCK_SIZE]; + u8 *d = dst; + + if (unlikely(nbytes < AES_BLOCK_SIZE)) + src = dst = memcpy(buf + sizeof(buf) - + nbytes, src, nbytes); + + neon_aes_ctr_encrypt(dst, src, ctx->enc, + ctx->key.rounds, nbytes, + walk.iv); - if (unlikely(nbytes < AES_BLOCK_SIZE)) - memcpy(d, dst, nbytes); + if (unlikely(nbytes < AES_BLOCK_SIZE)) + memcpy(d, dst, nbytes); - nbytes = 0; + nbytes = 0; + } } - kernel_neon_end(); err = skcipher_walk_done(&walk, nbytes); } return err; @@ -320,33 +318,33 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt, in = walk.src.virt.addr; nbytes = walk.nbytes; - kernel_neon_begin(); - if (blocks >= 8) { - if (first == 1) - neon_aes_ecb_encrypt(walk.iv, walk.iv, - ctx->twkey, - ctx->key.rounds, 1); - first = 2; - - fn(out, in, ctx->key.rk, ctx->key.rounds, blocks, - walk.iv); - - out += blocks * AES_BLOCK_SIZE; - in += blocks * AES_BLOCK_SIZE; - nbytes -= blocks * AES_BLOCK_SIZE; + scoped_ksimd() { + if (blocks >= 8) { + if (first == 1) + neon_aes_ecb_encrypt(walk.iv, walk.iv, + ctx->twkey, + ctx->key.rounds, 1); + first = 2; + + fn(out, in, ctx->key.rk, ctx->key.rounds, blocks, + walk.iv); + + out += blocks * AES_BLOCK_SIZE; + in += blocks * AES_BLOCK_SIZE; + nbytes -= blocks * AES_BLOCK_SIZE; + } + if (walk.nbytes == walk.total && nbytes > 0) { + if (encrypt) + neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, + ctx->key.rounds, nbytes, + ctx->twkey, walk.iv, first); + else + neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, + ctx->key.rounds, nbytes, + ctx->twkey, walk.iv, first); + nbytes = first = 0; + } } - if (walk.nbytes == walk.total && nbytes > 0) { - if (encrypt) - neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, - ctx->key.rounds, nbytes, - ctx->twkey, walk.iv, first); - else - neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, - ctx->key.rounds, nbytes, - ctx->twkey, walk.iv, first); - nbytes = first = 0; - } - kernel_neon_end(); err = skcipher_walk_done(&walk, nbytes); } @@ -369,14 +367,16 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt, in = walk.src.virt.addr; nbytes = walk.nbytes; - kernel_neon_begin(); - if (encrypt) - neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, ctx->key.rounds, - nbytes, ctx->twkey, walk.iv, first); - else - neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, ctx->key.rounds, - nbytes, ctx->twkey, walk.iv, first); - kernel_neon_end(); + scoped_ksimd() { + if (encrypt) + neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, + ctx->key.rounds, nbytes, ctx->twkey, + walk.iv, first); + else + neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, + ctx->key.rounds, nbytes, ctx->twkey, + walk.iv, first); + } return skcipher_walk_done(&walk, 0); } diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 4995b6e2233501..7951557a285a9d 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -5,7 +5,6 @@ * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org> */ -#include <asm/neon.h> #include <crypto/aes.h> #include <crypto/b128ops.h> #include <crypto/gcm.h> @@ -22,6 +21,8 @@ #include <linux/string.h> #include <linux/unaligned.h> +#include <asm/simd.h> + MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); MODULE_LICENSE("GPL v2"); @@ -74,9 +75,8 @@ void ghash_do_simd_update(int blocks, u64 dg[], const char *src, u64 const h[][2], const char *head)) { - kernel_neon_begin(); - simd_update(blocks, dg, src, key->h, head); - kernel_neon_end(); + scoped_ksimd() + simd_update(blocks, dg, src, key->h, head); } /* avoid hogging the CPU for too long */ @@ -329,11 +329,10 @@ static int gcm_encrypt(struct aead_request *req, char *iv, int assoclen) tag = NULL; } - kernel_neon_begin(); - pmull_gcm_encrypt(nbytes, dst, src, ctx->ghash_key.h, - dg, iv, ctx->aes_key.key_enc, nrounds, - tag); - kernel_neon_end(); + scoped_ksimd() + pmull_gcm_encrypt(nbytes, dst, src, ctx->ghash_key.h, + dg, iv, ctx->aes_key.key_enc, nrounds, + tag); if (unlikely(!nbytes)) break; @@ -399,11 +398,11 @@ static int gcm_decrypt(struct aead_request *req, char *iv, int assoclen) tag = NULL; } - kernel_neon_begin(); - ret = pmull_gcm_decrypt(nbytes, dst, src, ctx->ghash_key.h, - dg, iv, ctx->aes_key.key_enc, - nrounds, tag, otag, authsize); - kernel_neon_end(); + scoped_ksimd() + ret = pmull_gcm_decrypt(nbytes, dst, src, + ctx->ghash_key.h, + dg, iv, ctx->aes_key.key_enc, + nrounds, tag, otag, authsize); if (unlikely(!nbytes)) break; diff --git a/arch/arm64/crypto/nhpoly1305-neon-glue.c b/arch/arm64/crypto/nhpoly1305-neon-glue.c index e4a0b463f080e0..013de6ac569a14 100644 --- a/arch/arm64/crypto/nhpoly1305-neon-glue.c +++ b/arch/arm64/crypto/nhpoly1305-neon-glue.c @@ -25,9 +25,8 @@ static int nhpoly1305_neon_update(struct shash_desc *desc, do { unsigned int n = min_t(unsigned int, srclen, SZ_4K); - kernel_neon_begin(); - crypto_nhpoly1305_update_helper(desc, src, n, nh_neon); - kernel_neon_end(); + scoped_ksimd() + crypto_nhpoly1305_update_helper(desc, src, n, nh_neon); src += n; srclen -= n; } while (srclen); diff --git a/arch/arm64/crypto/sm3-ce-glue.c b/arch/arm64/crypto/sm3-ce-glue.c index eac6f5fa0abe3c..24c1fcfae0721e 100644 --- a/arch/arm64/crypto/sm3-ce-glue.c +++ b/arch/arm64/crypto/sm3-ce-glue.c @@ -5,7 +5,6 @@ * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org> */ -#include <asm/neon.h> #include <crypto/internal/hash.h> #include <crypto/sm3.h> #include <crypto/sm3_base.h> @@ -13,6 +12,8 @@ #include <linux/kernel.h> #include <linux/module.h> +#include <asm/simd.h> + MODULE_DESCRIPTION("SM3 secure hash using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); MODULE_LICENSE("GPL v2"); @@ -25,18 +26,18 @@ static int sm3_ce_update(struct shash_desc *desc, const u8 *data, { int remain; - kernel_neon_begin(); - remain = sm3_base_do_update_blocks(desc, data, len, sm3_ce_transform); - kernel_neon_end(); + scoped_ksimd() { + remain = sm3_base_do_update_blocks(desc, data, len, sm3_ce_transform); + } return remain; } static int sm3_ce_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - kernel_neon_begin(); - sm3_base_do_finup(desc, data, len, sm3_ce_transform); - kernel_neon_end(); + scoped_ksimd() { + sm3_base_do_finup(desc, data, len, sm3_ce_transform); + } return sm3_base_finish(desc, out); } diff --git a/arch/arm64/crypto/sm3-neon-glue.c b/arch/arm64/crypto/sm3-neon-glue.c index 6c4611a503a345..15f30cc24f32b3 100644 --- a/arch/arm64/crypto/sm3-neon-glue.c +++ b/arch/arm64/crypto/sm3-neon-glue.c @@ -5,7 +5,7 @@ * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> */ -#include <asm/neon.h> +#include <asm/simd.h> #include <crypto/internal/hash.h> #include <crypto/sm3.h> #include <crypto/sm3_base.h> @@ -20,20 +20,16 @@ asmlinkage void sm3_neon_transform(struct sm3_state *sst, u8 const *src, static int sm3_neon_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - int remain; - - kernel_neon_begin(); - remain = sm3_base_do_update_blocks(desc, data, len, sm3_neon_transform); - kernel_neon_end(); - return remain; + scoped_ksimd() + return sm3_base_do_update_blocks(desc, data, len, + sm3_neon_transform); } static int sm3_neon_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - kernel_neon_begin(); - sm3_base_do_finup(desc, data, len, sm3_neon_transform); - kernel_neon_end(); + scoped_ksimd() + sm3_base_do_finup(desc, data, len, sm3_neon_transform); return sm3_base_finish(desc, out); } diff --git a/arch/arm64/crypto/sm4-ce-ccm-glue.c b/arch/arm64/crypto/sm4-ce-ccm-glue.c index e9cc1c1364ec62..332f02167a96f9 100644 --- a/arch/arm64/crypto/sm4-ce-ccm-glue.c +++ b/arch/arm64/crypto/sm4-ce-ccm-glue.c @@ -11,7 +11,7 @@ #include <linux/crypto.h> #include <linux/kernel.h> #include <linux/cpufeature.h> -#include <asm/neon.h> +#include <asm/simd.h> #include <crypto/scatterwalk.h> #include <crypto/internal/aead.h> #include <crypto/internal/skcipher.h> @@ -35,10 +35,9 @@ static int ccm_setkey(struct crypto_aead *tfm, const u8 *key, if (key_len != SM4_KEY_SIZE) return -EINVAL; - kernel_neon_begin(); - sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec, - crypto_sm4_fk, crypto_sm4_ck); - kernel_neon_end(); + scoped_ksimd() + sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); return 0; } @@ -167,39 +166,23 @@ static int ccm_crypt(struct aead_request *req, struct skcipher_walk *walk, memcpy(ctr0, walk->iv, SM4_BLOCK_SIZE); crypto_inc(walk->iv, SM4_BLOCK_SIZE); - kernel_neon_begin(); + scoped_ksimd() { + if (req->assoclen) + ccm_calculate_auth_mac(req, mac); - if (req->assoclen) - ccm_calculate_auth_mac(req, mac); - - while (walk->nbytes && walk->nbytes != walk->total) { - unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE; - - sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr, - walk->src.virt.addr, walk->iv, - walk->nbytes - tail, mac); - - kernel_neon_end(); - - err = skcipher_walk_done(walk, tail); - - kernel_neon_begin(); - } - - if (walk->nbytes) { - sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr, - walk->src.virt.addr, walk->iv, - walk->nbytes, mac); + while (walk->nbytes) { + unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE; - sm4_ce_ccm_final(rkey_enc, ctr0, mac); + if (walk->nbytes == walk->total) + tail = 0; - kernel_neon_end(); + sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr, + walk->src.virt.addr, walk->iv, + walk->nbytes - tail, mac); - err = skcipher_walk_done(walk, 0); - } else { + err = skcipher_walk_done(walk, tail); + } sm4_ce_ccm_final(rkey_enc, ctr0, mac); - - kernel_neon_end(); } return err; diff --git a/arch/arm64/crypto/sm4-ce-cipher-glue.c b/arch/arm64/crypto/sm4-ce-cipher-glue.c index c31d76fb5a1770..bceec833ef4e78 100644 --- a/arch/arm64/crypto/sm4-ce-cipher-glue.c +++ b/arch/arm64/crypto/sm4-ce-cipher-glue.c @@ -32,9 +32,8 @@ static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) if (!crypto_simd_usable()) { sm4_crypt_block(ctx->rkey_enc, out, in); } else { - kernel_neon_begin(); - sm4_ce_do_crypt(ctx->rkey_enc, out, in); - kernel_neon_end(); + scoped_ksimd() + sm4_ce_do_crypt(ctx->rkey_enc, out, in); } } @@ -45,9 +44,8 @@ static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) if (!crypto_simd_usable()) { sm4_crypt_block(ctx->rkey_dec, out, in); } else { - kernel_neon_begin(); - sm4_ce_do_crypt(ctx->rkey_dec, out, in); - kernel_neon_end(); + scoped_ksimd() + sm4_ce_do_crypt(ctx->rkey_dec, out, in); } } diff --git a/arch/arm64/crypto/sm4-ce-gcm-glue.c b/arch/arm64/crypto/sm4-ce-gcm-glue.c index c2ea3d5f690b32..ef06f4f768a1dd 100644 --- a/arch/arm64/crypto/sm4-ce-gcm-glue.c +++ b/arch/arm64/crypto/sm4-ce-gcm-glue.c @@ -11,7 +11,7 @@ #include <linux/crypto.h> #include <linux/kernel.h> #include <linux/cpufeature.h> -#include <asm/neon.h> +#include <asm/simd.h> #include <crypto/b128ops.h> #include <crypto/scatterwalk.h> #include <crypto/internal/aead.h> @@ -48,13 +48,11 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *key, if (key_len != SM4_KEY_SIZE) return -EINVAL; - kernel_neon_begin(); - - sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, - crypto_sm4_fk, crypto_sm4_ck); - sm4_ce_pmull_ghash_setup(ctx->key.rkey_enc, ctx->ghash_table); - - kernel_neon_end(); + scoped_ksimd() { + sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); + sm4_ce_pmull_ghash_setup(ctx->key.rkey_enc, ctx->ghash_table); + } return 0; } @@ -149,44 +147,28 @@ static int gcm_crypt(struct aead_request *req, struct skcipher_walk *walk, memcpy(iv, req->iv, GCM_IV_SIZE); put_unaligned_be32(2, iv + GCM_IV_SIZE); - kernel_neon_begin(); + scoped_ksimd() { + if (req->assoclen) + gcm_calculate_auth_mac(req, ghash); - if (req->assoclen) - gcm_calculate_auth_mac(req, ghash); + do { + unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE; + const u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + const u8 *l = NULL; - while (walk->nbytes) { - unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE; - const u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; + if (walk->nbytes == walk->total) { + l = (const u8 *)&lengths; + tail = 0; + } - if (walk->nbytes == walk->total) { sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv, - walk->nbytes, ghash, - ctx->ghash_table, - (const u8 *)&lengths); - - kernel_neon_end(); - - return skcipher_walk_done(walk, 0); - } + walk->nbytes - tail, ghash, + ctx->ghash_table, l); - sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv, - walk->nbytes - tail, ghash, - ctx->ghash_table, NULL); - - kernel_neon_end(); - - err = skcipher_walk_done(walk, tail); - - kernel_neon_begin(); + err = skcipher_walk_done(walk, tail); + } while (walk->nbytes); } - - sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, NULL, NULL, iv, - walk->nbytes, ghash, ctx->ghash_table, - (const u8 *)&lengths); - - kernel_neon_end(); - return err; } diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c index 7a60e7b559dc96..5569cece5a0b85 100644 --- a/arch/arm64/crypto/sm4-ce-glue.c +++ b/arch/arm64/crypto/sm4-ce-glue.c @@ -8,7 +8,7 @@ * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> */ -#include <asm/neon.h> +#include <asm/simd.h> #include <crypto/b128ops.h> #include <crypto/internal/hash.h> #include <crypto/internal/skcipher.h> @@ -74,10 +74,9 @@ static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key, if (key_len != SM4_KEY_SIZE) return -EINVAL; - kernel_neon_begin(); - sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec, - crypto_sm4_fk, crypto_sm4_ck); - kernel_neon_end(); + scoped_ksimd() + sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); return 0; } @@ -94,12 +93,12 @@ static int sm4_xts_setkey(struct crypto_skcipher *tfm, const u8 *key, if (ret) return ret; - kernel_neon_begin(); - sm4_ce_expand_key(key, ctx->key1.rkey_enc, - ctx->key1.rkey_dec, crypto_sm4_fk, crypto_sm4_ck); - sm4_ce_expand_key(&key[SM4_KEY_SIZE], ctx->key2.rkey_enc, - ctx->key2.rkey_dec, crypto_sm4_fk, crypto_sm4_ck); - kernel_neon_end(); + scoped_ksimd() { + sm4_ce_expand_key(key, ctx->key1.rkey_enc, + ctx->key1.rkey_dec, crypto_sm4_fk, crypto_sm4_ck); + sm4_ce_expand_key(&key[SM4_KEY_SIZE], ctx->key2.rkey_enc, + ctx->key2.rkey_dec, crypto_sm4_fk, crypto_sm4_ck); + } return 0; } @@ -117,16 +116,14 @@ static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey) u8 *dst = walk.dst.virt.addr; unsigned int nblks; - kernel_neon_begin(); - - nblks = BYTES2BLKS(nbytes); - if (nblks) { - sm4_ce_crypt(rkey, dst, src, nblks); - nbytes -= nblks * SM4_BLOCK_SIZE; + scoped_ksimd() { + nblks = BYTES2BLKS(nbytes); + if (nblks) { + sm4_ce_crypt(rkey, dst, src, nblks); + nbytes -= nblks * SM4_BLOCK_SIZE; + } } - kernel_neon_end(); - err = skcipher_walk_done(&walk, nbytes); } @@ -167,16 +164,14 @@ static int sm4_cbc_crypt(struct skcipher_request *req, nblocks = nbytes / SM4_BLOCK_SIZE; if (nblocks) { - kernel_neon_begin(); - - if (encrypt) - sm4_ce_cbc_enc(ctx->rkey_enc, dst, src, - walk.iv, nblocks); - else - sm4_ce_cbc_dec(ctx->rkey_dec, dst, src, - walk.iv, nblocks); - - kernel_neon_end(); + scoped_ksimd() { + if (encrypt) + sm4_ce_cbc_enc(ctx->rkey_enc, dst, src, + walk.iv, nblocks); + else + sm4_ce_cbc_dec(ctx->rkey_dec, dst, src, + walk.iv, nblocks); + } } err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE); @@ -249,16 +244,14 @@ static int sm4_cbc_cts_crypt(struct skcipher_request *req, bool encrypt) if (err) return err; - kernel_neon_begin(); - - if (encrypt) - sm4_ce_cbc_cts_enc(ctx->rkey_enc, walk.dst.virt.addr, - walk.src.virt.addr, walk.iv, walk.nbytes); - else - sm4_ce_cbc_cts_dec(ctx->rkey_dec, walk.dst.virt.addr, - walk.src.virt.addr, walk.iv, walk.nbytes); - - kernel_neon_end(); + scoped_ksimd() { + if (encrypt) + sm4_ce_cbc_cts_enc(ctx->rkey_enc, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, walk.nbytes); + else + sm4_ce_cbc_cts_dec(ctx->rkey_dec, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, walk.nbytes); + } return skcipher_walk_done(&walk, 0); } @@ -288,28 +281,26 @@ static int sm4_ctr_crypt(struct skcipher_request *req) u8 *dst = walk.dst.virt.addr; unsigned int nblks; - kernel_neon_begin(); - - nblks = BYTES2BLKS(nbytes); - if (nblks) { - sm4_ce_ctr_enc(ctx->rkey_enc, dst, src, walk.iv, nblks); - dst += nblks * SM4_BLOCK_SIZE; - src += nblks * SM4_BLOCK_SIZE; - nbytes -= nblks * SM4_BLOCK_SIZE; - } - - /* tail */ - if (walk.nbytes == walk.total && nbytes > 0) { - u8 keystream[SM4_BLOCK_SIZE]; - - sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv); - crypto_inc(walk.iv, SM4_BLOCK_SIZE); - crypto_xor_cpy(dst, src, keystream, nbytes); - nbytes = 0; + scoped_ksimd() { + nblks = BYTES2BLKS(nbytes); + if (nblks) { + sm4_ce_ctr_enc(ctx->rkey_enc, dst, src, walk.iv, nblks); + dst += nblks * SM4_BLOCK_SIZE; + src += nblks * SM4_BLOCK_SIZE; + nbytes -= nblks * SM4_BLOCK_SIZE; + } + + /* tail */ + if (walk.nbytes == walk.total && nbytes > 0) { + u8 keystream[SM4_BLOCK_SIZE]; + + sm4_ce_crypt_block(ctx->rkey_enc, keystream, walk.iv); + crypto_inc(walk.iv, SM4_BLOCK_SIZE); + crypto_xor_cpy(dst, src, keystream, nbytes); + nbytes = 0; + } } - kernel_neon_end(); - err = skcipher_walk_done(&walk, nbytes); } @@ -359,18 +350,16 @@ static int sm4_xts_crypt(struct skcipher_request *req, bool encrypt) if (nbytes < walk.total) nbytes &= ~(SM4_BLOCK_SIZE - 1); - kernel_neon_begin(); - - if (encrypt) - sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr, - walk.src.virt.addr, walk.iv, nbytes, - rkey2_enc); - else - sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr, - walk.src.virt.addr, walk.iv, nbytes, - rkey2_enc); - - kernel_neon_end(); + scoped_ksimd() { + if (encrypt) + sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, nbytes, + rkey2_enc); + else + sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, nbytes, + rkey2_enc); + } rkey2_enc = NULL; @@ -395,18 +384,16 @@ static int sm4_xts_crypt(struct skcipher_request *req, bool encrypt) if (err) return err; - kernel_neon_begin(); - - if (encrypt) - sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr, - walk.src.virt.addr, walk.iv, walk.nbytes, - rkey2_enc); - else - sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr, - walk.src.virt.addr, walk.iv, walk.nbytes, - rkey2_enc); - - kernel_neon_end(); + scoped_ksimd() { + if (encrypt) + sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, walk.nbytes, + rkey2_enc); + else + sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, walk.nbytes, + rkey2_enc); + } return skcipher_walk_done(&walk, 0); } @@ -510,11 +497,9 @@ static int sm4_cbcmac_setkey(struct crypto_shash *tfm, const u8 *key, if (key_len != SM4_KEY_SIZE) return -EINVAL; - kernel_neon_begin(); - sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, - crypto_sm4_fk, crypto_sm4_ck); - kernel_neon_end(); - + scoped_ksimd() + sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); return 0; } @@ -530,15 +515,13 @@ static int sm4_cmac_setkey(struct crypto_shash *tfm, const u8 *key, memset(consts, 0, SM4_BLOCK_SIZE); - kernel_neon_begin(); - - sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, - crypto_sm4_fk, crypto_sm4_ck); + scoped_ksimd() { + sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); - /* encrypt the zero block */ - sm4_ce_crypt_block(ctx->key.rkey_enc, (u8 *)consts, (const u8 *)consts); - - kernel_neon_end(); + /* encrypt the zero block */ + sm4_ce_crypt_block(ctx->key.rkey_enc, (u8 *)consts, (const u8 *)consts); + } /* gf(2^128) multiply zero-ciphertext with u and u^2 */ a = be64_to_cpu(consts[0].a); @@ -568,18 +551,16 @@ static int sm4_xcbc_setkey(struct crypto_shash *tfm, const u8 *key, if (key_len != SM4_KEY_SIZE) return -EINVAL; - kernel_neon_begin(); - - sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, - crypto_sm4_fk, crypto_sm4_ck); + scoped_ksimd() { + sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); - sm4_ce_crypt_block(ctx->key.rkey_enc, key2, ks[0]); - sm4_ce_crypt(ctx->key.rkey_enc, ctx->consts, ks[1], 2); + sm4_ce_crypt_block(ctx->key.rkey_enc, key2, ks[0]); + sm4_ce_crypt(ctx->key.rkey_enc, ctx->consts, ks[1], 2); - sm4_ce_expand_key(key2, ctx->key.rkey_enc, ctx->key.rkey_dec, - crypto_sm4_fk, crypto_sm4_ck); - - kernel_neon_end(); + sm4_ce_expand_key(key2, ctx->key.rkey_enc, ctx->key.rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); + } return 0; } @@ -600,10 +581,9 @@ static int sm4_mac_update(struct shash_desc *desc, const u8 *p, unsigned int nblocks = len / SM4_BLOCK_SIZE; len %= SM4_BLOCK_SIZE; - kernel_neon_begin(); - sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, p, - nblocks, false, true); - kernel_neon_end(); + scoped_ksimd() + sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, p, + nblocks, false, true); return len; } @@ -619,10 +599,9 @@ static int sm4_cmac_finup(struct shash_desc *desc, const u8 *src, ctx->digest[len] ^= 0x80; consts += SM4_BLOCK_SIZE; } - kernel_neon_begin(); - sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, consts, 1, - false, true); - kernel_neon_end(); + scoped_ksimd() + sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, consts, 1, + false, true); memcpy(out, ctx->digest, SM4_BLOCK_SIZE); return 0; } @@ -635,10 +614,9 @@ static int sm4_cbcmac_finup(struct shash_desc *desc, const u8 *src, if (len) { crypto_xor(ctx->digest, src, len); - kernel_neon_begin(); - sm4_ce_crypt_block(tctx->key.rkey_enc, ctx->digest, - ctx->digest); - kernel_neon_end(); + scoped_ksimd() + sm4_ce_crypt_block(tctx->key.rkey_enc, ctx->digest, + ctx->digest); } memcpy(out, ctx->digest, SM4_BLOCK_SIZE); return 0; diff --git a/arch/arm64/crypto/sm4-neon-glue.c b/arch/arm64/crypto/sm4-neon-glue.c index e3500aca2d18bd..e944c2a2efb025 100644 --- a/arch/arm64/crypto/sm4-neon-glue.c +++ b/arch/arm64/crypto/sm4-neon-glue.c @@ -48,11 +48,8 @@ static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey) nblocks = nbytes / SM4_BLOCK_SIZE; if (nblocks) { - kernel_neon_begin(); - - sm4_neon_crypt(rkey, dst, src, nblocks); - - kernel_neon_end(); + scoped_ksimd() + sm4_neon_crypt(rkey, dst, src, nblocks); } err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE); @@ -126,12 +123,9 @@ static int sm4_cbc_decrypt(struct skcipher_request *req) nblocks = nbytes / SM4_BLOCK_SIZE; if (nblocks) { - kernel_neon_begin(); - - sm4_neon_cbc_dec(ctx->rkey_dec, dst, src, - walk.iv, nblocks); - - kernel_neon_end(); + scoped_ksimd() + sm4_neon_cbc_dec(ctx->rkey_dec, dst, src, + walk.iv, nblocks); } err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE); @@ -157,12 +151,9 @@ static int sm4_ctr_crypt(struct skcipher_request *req) nblocks = nbytes / SM4_BLOCK_SIZE; if (nblocks) { - kernel_neon_begin(); - - sm4_neon_ctr_crypt(ctx->rkey_enc, dst, src, - walk.iv, nblocks); - - kernel_neon_end(); + scoped_ksimd() + sm4_neon_ctr_crypt(ctx->rkey_enc, dst, src, + walk.iv, nblocks); dst += nblocks * SM4_BLOCK_SIZE; src += nblocks * SM4_BLOCK_SIZE; diff --git a/arch/arm64/include/asm/fpu.h b/arch/arm64/include/asm/fpu.h index 2ae50bdce59b43..751e88a96734e6 100644 --- a/arch/arm64/include/asm/fpu.h +++ b/arch/arm64/include/asm/fpu.h @@ -6,10 +6,22 @@ #ifndef __ASM_FPU_H #define __ASM_FPU_H +#include <linux/preempt.h> #include <asm/neon.h> #define kernel_fpu_available() cpu_has_neon() -#define kernel_fpu_begin() kernel_neon_begin() -#define kernel_fpu_end() kernel_neon_end() + +static inline void kernel_fpu_begin(void) +{ + BUG_ON(!in_task()); + preempt_disable(); + kernel_neon_begin(NULL); +} + +static inline void kernel_fpu_end(void) +{ + kernel_neon_end(NULL); + preempt_enable(); +} #endif /* ! __ASM_FPU_H */ diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h index d4b1d172a79b67..acebee4605b5c5 100644 --- a/arch/arm64/include/asm/neon.h +++ b/arch/arm64/include/asm/neon.h @@ -13,7 +13,7 @@ #define cpu_has_neon() system_supports_fpsimd() -void kernel_neon_begin(void); -void kernel_neon_end(void); +void kernel_neon_begin(struct user_fpsimd_state *); +void kernel_neon_end(struct user_fpsimd_state *); #endif /* ! __ASM_NEON_H */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 5acce7962228ec..e30c4c8e3a7a7d 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -172,7 +172,12 @@ struct thread_struct { unsigned long fault_code; /* ESR_EL1 value */ struct debug_info debug; /* debugging */ - struct user_fpsimd_state kernel_fpsimd_state; + /* + * Set [cleared] by kernel_neon_begin() [kernel_neon_end()] to the + * address of a caller provided buffer that will be used to preserve a + * task's kernel mode FPSIMD state while it is scheduled out. + */ + struct user_fpsimd_state *kernel_fpsimd_state; unsigned int kernel_fpsimd_cpu; #ifdef CONFIG_ARM64_PTR_AUTH struct ptrauth_keys_user keys_user; diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h index abd642c92f863e..0941f6f58a1463 100644 --- a/arch/arm64/include/asm/simd.h +++ b/arch/arm64/include/asm/simd.h @@ -6,12 +6,15 @@ #ifndef __ASM_SIMD_H #define __ASM_SIMD_H +#include <linux/cleanup.h> #include <linux/compiler.h> #include <linux/irqflags.h> #include <linux/percpu.h> #include <linux/preempt.h> #include <linux/types.h> +#include <asm/neon.h> + #ifdef CONFIG_KERNEL_MODE_NEON /* @@ -40,4 +43,11 @@ static __must_check inline bool may_use_simd(void) { #endif /* ! CONFIG_KERNEL_MODE_NEON */ +DEFINE_LOCK_GUARD_1(ksimd, + struct user_fpsimd_state, + kernel_neon_begin(_T->lock), + kernel_neon_end(_T->lock)) + +#define scoped_ksimd() scoped_guard(ksimd, &(struct user_fpsimd_state){}) + #endif diff --git a/arch/arm64/include/asm/xor.h b/arch/arm64/include/asm/xor.h index befcd8a7abc98d..c38e3d017a79ec 100644 --- a/arch/arm64/include/asm/xor.h +++ b/arch/arm64/include/asm/xor.h @@ -9,7 +9,7 @@ #include <linux/hardirq.h> #include <asm-generic/xor.h> #include <asm/hwcap.h> -#include <asm/neon.h> +#include <asm/simd.h> #ifdef CONFIG_KERNEL_MODE_NEON @@ -19,9 +19,8 @@ static void xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2) { - kernel_neon_begin(); - xor_block_inner_neon.do_2(bytes, p1, p2); - kernel_neon_end(); + scoped_ksimd() + xor_block_inner_neon.do_2(bytes, p1, p2); } static void @@ -29,9 +28,8 @@ xor_neon_3(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2, const unsigned long * __restrict p3) { - kernel_neon_begin(); - xor_block_inner_neon.do_3(bytes, p1, p2, p3); - kernel_neon_end(); + scoped_ksimd() + xor_block_inner_neon.do_3(bytes, p1, p2, p3); } static void @@ -40,9 +38,8 @@ xor_neon_4(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p3, const unsigned long * __restrict p4) { - kernel_neon_begin(); - xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4); - kernel_neon_end(); + scoped_ksimd() + xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4); } static void @@ -52,9 +49,8 @@ xor_neon_5(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p4, const unsigned long * __restrict p5) { - kernel_neon_begin(); - xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5); - kernel_neon_end(); + scoped_ksimd() + xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5); } static struct xor_block_template xor_block_arm64 = { diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 6d956ed23bd216..c154f72634e028 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1502,21 +1502,23 @@ static void fpsimd_load_kernel_state(struct task_struct *task) * Elide the load if this CPU holds the most recent kernel mode * FPSIMD context of the current task. */ - if (last->st == &task->thread.kernel_fpsimd_state && + if (last->st == task->thread.kernel_fpsimd_state && task->thread.kernel_fpsimd_cpu == smp_processor_id()) return; - fpsimd_load_state(&task->thread.kernel_fpsimd_state); + fpsimd_load_state(task->thread.kernel_fpsimd_state); } static void fpsimd_save_kernel_state(struct task_struct *task) { struct cpu_fp_state cpu_fp_state = { - .st = &task->thread.kernel_fpsimd_state, + .st = task->thread.kernel_fpsimd_state, .to_save = FP_STATE_FPSIMD, }; - fpsimd_save_state(&task->thread.kernel_fpsimd_state); + BUG_ON(!cpu_fp_state.st); + + fpsimd_save_state(task->thread.kernel_fpsimd_state); fpsimd_bind_state_to_cpu(&cpu_fp_state); task->thread.kernel_fpsimd_cpu = smp_processor_id(); @@ -1787,6 +1789,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) void fpsimd_flush_task_state(struct task_struct *t) { t->thread.fpsimd_cpu = NR_CPUS; + t->thread.kernel_fpsimd_state = NULL; /* * If we don't support fpsimd, bail out after we have * reset the fpsimd_cpu for this task and clear the @@ -1846,12 +1849,19 @@ void fpsimd_save_and_flush_cpu_state(void) * * The caller may freely use the FPSIMD registers until kernel_neon_end() is * called. + * + * Unless called from non-preemptible task context, @state must point to a + * caller provided buffer that will be used to preserve the task's kernel mode + * FPSIMD context when it is scheduled out, or if it is interrupted by kernel + * mode FPSIMD occurring in softirq context. May be %NULL otherwise. */ -void kernel_neon_begin(void) +void kernel_neon_begin(struct user_fpsimd_state *state) { if (WARN_ON(!system_supports_fpsimd())) return; + WARN_ON((preemptible() || in_serving_softirq()) && !state); + BUG_ON(!may_use_simd()); get_cpu_fpsimd_context(); @@ -1859,7 +1869,7 @@ void kernel_neon_begin(void) /* Save unsaved fpsimd state, if any: */ if (test_thread_flag(TIF_KERNEL_FPSTATE)) { BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()); - fpsimd_save_kernel_state(current); + fpsimd_save_state(state); } else { fpsimd_save_user_state(); @@ -1880,8 +1890,16 @@ void kernel_neon_begin(void) * mode in task context. So in this case, setting the flag here * is always appropriate. */ - if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) + if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) { + /* + * Record the caller provided buffer as the kernel mode + * FP/SIMD buffer for this task, so that the state can + * be preserved and restored on a context switch. + */ + WARN_ON(current->thread.kernel_fpsimd_state != NULL); + current->thread.kernel_fpsimd_state = state; set_thread_flag(TIF_KERNEL_FPSTATE); + } } /* Invalidate any task state remaining in the fpsimd regs: */ @@ -1899,22 +1917,30 @@ EXPORT_SYMBOL_GPL(kernel_neon_begin); * * The caller must not use the FPSIMD registers after this function is called, * unless kernel_neon_begin() is called again in the meantime. + * + * The value of @state must match the value passed to the preceding call to + * kernel_neon_begin(). */ -void kernel_neon_end(void) +void kernel_neon_end(struct user_fpsimd_state *state) { if (!system_supports_fpsimd()) return; + if (!test_thread_flag(TIF_KERNEL_FPSTATE)) + return; + /* * If we are returning from a nested use of kernel mode FPSIMD, restore * the task context kernel mode FPSIMD state. This can only happen when * running in softirq context on non-PREEMPT_RT. */ - if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() && - test_thread_flag(TIF_KERNEL_FPSTATE)) - fpsimd_load_kernel_state(current); - else + if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq()) { + fpsimd_load_state(state); + } else { clear_thread_flag(TIF_KERNEL_FPSTATE); + WARN_ON(current->thread.kernel_fpsimd_state != state); + current->thread.kernel_fpsimd_state = NULL; + } } EXPORT_SYMBOL_GPL(kernel_neon_end); @@ -1948,7 +1974,7 @@ void __efi_fpsimd_begin(void) return; if (may_use_simd()) { - kernel_neon_begin(); + kernel_neon_begin(&efi_fpsimd_state); } else { WARN_ON(preemptible()); @@ -1999,7 +2025,7 @@ void __efi_fpsimd_end(void) return; if (!efi_fpsimd_state_used) { - kernel_neon_end(); + kernel_neon_end(&efi_fpsimd_state); } else { if (system_supports_sve() && efi_sve_state_used) { bool ffr = true; diff --git a/crypto/aegis128-neon.c b/crypto/aegis128-neon.c index 9ee50549e8231e..b41807e63bd3e7 100644 --- a/crypto/aegis128-neon.c +++ b/crypto/aegis128-neon.c @@ -4,7 +4,7 @@ */ #include <asm/cpufeature.h> -#include <asm/neon.h> +#include <asm/simd.h> #include "aegis.h" #include "aegis-neon.h" @@ -24,32 +24,28 @@ void crypto_aegis128_init_simd(struct aegis_state *state, const union aegis_block *key, const u8 *iv) { - kernel_neon_begin(); - crypto_aegis128_init_neon(state, key, iv); - kernel_neon_end(); + scoped_ksimd() + crypto_aegis128_init_neon(state, key, iv); } void crypto_aegis128_update_simd(struct aegis_state *state, const void *msg) { - kernel_neon_begin(); - crypto_aegis128_update_neon(state, msg); - kernel_neon_end(); + scoped_ksimd() + crypto_aegis128_update_neon(state, msg); } void crypto_aegis128_encrypt_chunk_simd(struct aegis_state *state, u8 *dst, const u8 *src, unsigned int size) { - kernel_neon_begin(); - crypto_aegis128_encrypt_chunk_neon(state, dst, src, size); - kernel_neon_end(); + scoped_ksimd() + crypto_aegis128_encrypt_chunk_neon(state, dst, src, size); } void crypto_aegis128_decrypt_chunk_simd(struct aegis_state *state, u8 *dst, const u8 *src, unsigned int size) { - kernel_neon_begin(); - crypto_aegis128_decrypt_chunk_neon(state, dst, src, size); - kernel_neon_end(); + scoped_ksimd() + crypto_aegis128_decrypt_chunk_neon(state, dst, src, size); } int crypto_aegis128_final_simd(struct aegis_state *state, @@ -58,12 +54,7 @@ int crypto_aegis128_final_simd(struct aegis_state *state, unsigned int cryptlen, unsigned int authsize) { - int ret; - - kernel_neon_begin(); - ret = crypto_aegis128_final_neon(state, tag_xor, assoclen, cryptlen, - authsize); - kernel_neon_end(); - - return ret; + scoped_ksimd() + return crypto_aegis128_final_neon(state, tag_xor, assoclen, + cryptlen, authsize); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wc.c b/drivers/net/ethernet/mellanox/mlx5/core/wc.c index 05e5fd777d4f41..815a7c97d6b09e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wc.c @@ -9,6 +9,7 @@ #if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && IS_ENABLED(CONFIG_ARM64) #include <asm/neon.h> +#include <asm/simd.h> #endif #define TEST_WC_NUM_WQES 255 @@ -264,15 +265,15 @@ static void mlx5_iowrite64_copy(struct mlx5_wc_sq *sq, __be32 mmio_wqe[16], { #if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && IS_ENABLED(CONFIG_ARM64) if (cpu_has_neon()) { - kernel_neon_begin(); - asm volatile - (".arch_extension simd\n\t" - "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%0]\n\t" - "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%1]" - : - : "r"(mmio_wqe), "r"(sq->bfreg.map + offset) - : "memory", "v0", "v1", "v2", "v3"); - kernel_neon_end(); + scoped_ksimd() { + asm volatile( + ".arch_extension simd\n\t" + "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%0]\n\t" + "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%1]" + : + : "r"(mmio_wqe), "r"(sq->bfreg.map + offset) + : "memory", "v0", "v1", "v2", "v3"); + } return; } #endif diff --git a/lib/crc/arm/crc-t10dif.h b/lib/crc/arm/crc-t10dif.h index 63441de5e3f161..afc0ebf97f19c9 100644 --- a/lib/crc/arm/crc-t10dif.h +++ b/lib/crc/arm/crc-t10dif.h @@ -5,7 +5,6 @@ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> */ -#include <asm/neon.h> #include <asm/simd.h> static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); @@ -19,22 +18,16 @@ asmlinkage void crc_t10dif_pmull8(u16 init_crc, const u8 *buf, size_t len, static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length) { - if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) { + if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && likely(may_use_simd())) { if (static_branch_likely(&have_pmull)) { - if (likely(may_use_simd())) { - kernel_neon_begin(); - crc = crc_t10dif_pmull64(crc, data, length); - kernel_neon_end(); - return crc; - } + scoped_ksimd() + return crc_t10dif_pmull64(crc, data, length); } else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE && - static_branch_likely(&have_neon) && - likely(may_use_simd())) { + static_branch_likely(&have_neon)) { u8 buf[16] __aligned(16); - kernel_neon_begin(); - crc_t10dif_pmull8(crc, data, length, buf); - kernel_neon_end(); + scoped_ksimd() + crc_t10dif_pmull8(crc, data, length, buf); return crc_t10dif_generic(0, buf, sizeof(buf)); } diff --git a/lib/crc/arm/crc32.h b/lib/crc/arm/crc32.h index 7b76f52f6907db..f33de6b22cd49c 100644 --- a/lib/crc/arm/crc32.h +++ b/lib/crc/arm/crc32.h @@ -8,7 +8,6 @@ #include <linux/cpufeature.h> #include <asm/hwcap.h> -#include <asm/neon.h> #include <asm/simd.h> static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32); @@ -42,9 +41,8 @@ static inline u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) len -= n; } n = round_down(len, 16); - kernel_neon_begin(); - crc = crc32_pmull_le(p, n, crc); - kernel_neon_end(); + scoped_ksimd() + crc = crc32_pmull_le(p, n, crc); p += n; len -= n; } @@ -71,9 +69,8 @@ static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len) len -= n; } n = round_down(len, 16); - kernel_neon_begin(); - crc = crc32c_pmull_le(p, n, crc); - kernel_neon_end(); + scoped_ksimd() + crc = crc32c_pmull_le(p, n, crc); p += n; len -= n; } diff --git a/lib/crc/arm64/crc-t10dif.h b/lib/crc/arm64/crc-t10dif.h index f88db297180516..b8338139ed7738 100644 --- a/lib/crc/arm64/crc-t10dif.h +++ b/lib/crc/arm64/crc-t10dif.h @@ -7,7 +7,6 @@ #include <linux/cpufeature.h> -#include <asm/neon.h> #include <asm/simd.h> static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_asimd); @@ -21,22 +20,16 @@ asmlinkage u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 *buf, size_t len); static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length) { - if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) { + if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && likely(may_use_simd())) { if (static_branch_likely(&have_pmull)) { - if (likely(may_use_simd())) { - kernel_neon_begin(); - crc = crc_t10dif_pmull_p64(crc, data, length); - kernel_neon_end(); - return crc; - } + scoped_ksimd() + return crc_t10dif_pmull_p64(crc, data, length); } else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE && - static_branch_likely(&have_asimd) && - likely(may_use_simd())) { + static_branch_likely(&have_asimd)) { u8 buf[16]; - kernel_neon_begin(); - crc_t10dif_pmull_p8(crc, data, length, buf); - kernel_neon_end(); + scoped_ksimd() + crc_t10dif_pmull_p8(crc, data, length, buf); return crc_t10dif_generic(0, buf, sizeof(buf)); } diff --git a/lib/crc/arm64/crc32.h b/lib/crc/arm64/crc32.h index 31e649cd40a2fd..1939a5dee47729 100644 --- a/lib/crc/arm64/crc32.h +++ b/lib/crc/arm64/crc32.h @@ -2,7 +2,6 @@ #include <asm/alternative.h> #include <asm/cpufeature.h> -#include <asm/neon.h> #include <asm/simd.h> // The minimum input length to consider the 4-way interleaved code path @@ -23,9 +22,8 @@ static inline u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) if (len >= min_len && cpu_have_named_feature(PMULL) && likely(may_use_simd())) { - kernel_neon_begin(); - crc = crc32_le_arm64_4way(crc, p, len); - kernel_neon_end(); + scoped_ksimd() + crc = crc32_le_arm64_4way(crc, p, len); p += round_down(len, 64); len %= 64; @@ -44,9 +42,8 @@ static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len) if (len >= min_len && cpu_have_named_feature(PMULL) && likely(may_use_simd())) { - kernel_neon_begin(); - crc = crc32c_le_arm64_4way(crc, p, len); - kernel_neon_end(); + scoped_ksimd() + crc = crc32c_le_arm64_4way(crc, p, len); p += round_down(len, 64); len %= 64; @@ -65,9 +62,8 @@ static inline u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) if (len >= min_len && cpu_have_named_feature(PMULL) && likely(may_use_simd())) { - kernel_neon_begin(); - crc = crc32_be_arm64_4way(crc, p, len); - kernel_neon_end(); + scoped_ksimd() + crc = crc32_be_arm64_4way(crc, p, len); p += round_down(len, 64); len %= 64; diff --git a/lib/crypto/arm/blake2b.h b/lib/crypto/arm/blake2b.h index 1b9154d119db43..5c76498521e67e 100644 --- a/lib/crypto/arm/blake2b.h +++ b/lib/crypto/arm/blake2b.h @@ -24,9 +24,8 @@ static void blake2b_compress(struct blake2b_ctx *ctx, const size_t blocks = min_t(size_t, nblocks, SZ_4K / BLAKE2B_BLOCK_SIZE); - kernel_neon_begin(); - blake2b_compress_neon(ctx, data, blocks, inc); - kernel_neon_end(); + scoped_ksimd() + blake2b_compress_neon(ctx, data, blocks, inc); data += blocks * BLAKE2B_BLOCK_SIZE; nblocks -= blocks; diff --git a/lib/crypto/arm/chacha.h b/lib/crypto/arm/chacha.h index 0cae30f8ee5d15..836e49088e9837 100644 --- a/lib/crypto/arm/chacha.h +++ b/lib/crypto/arm/chacha.h @@ -12,7 +12,6 @@ #include <asm/cputype.h> #include <asm/hwcap.h> -#include <asm/neon.h> #include <asm/simd.h> asmlinkage void chacha_block_xor_neon(const struct chacha_state *state, @@ -68,9 +67,8 @@ static void hchacha_block_arch(const struct chacha_state *state, if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) { hchacha_block_arm(state, out, nrounds); } else { - kernel_neon_begin(); - hchacha_block_neon(state, out, nrounds); - kernel_neon_end(); + scoped_ksimd() + hchacha_block_neon(state, out, nrounds); } } @@ -87,9 +85,8 @@ static void chacha_crypt_arch(struct chacha_state *state, u8 *dst, do { unsigned int todo = min_t(unsigned int, bytes, SZ_4K); - kernel_neon_begin(); - chacha_doneon(state, dst, src, todo, nrounds); - kernel_neon_end(); + scoped_ksimd() + chacha_doneon(state, dst, src, todo, nrounds); bytes -= todo; src += todo; diff --git a/lib/crypto/arm/curve25519.h b/lib/crypto/arm/curve25519.h index f6d66494eb8f88..b1a566885e95bd 100644 --- a/lib/crypto/arm/curve25519.h +++ b/lib/crypto/arm/curve25519.h @@ -25,9 +25,8 @@ static void curve25519_arch(u8 out[CURVE25519_KEY_SIZE], const u8 point[CURVE25519_KEY_SIZE]) { if (static_branch_likely(&have_neon) && crypto_simd_usable()) { - kernel_neon_begin(); - curve25519_neon(out, scalar, point); - kernel_neon_end(); + scoped_ksimd() + curve25519_neon(out, scalar, point); } else { curve25519_generic(out, scalar, point); } diff --git a/lib/crypto/arm/poly1305.h b/lib/crypto/arm/poly1305.h index 0021cf368307c7..0fe903d8de5563 100644 --- a/lib/crypto/arm/poly1305.h +++ b/lib/crypto/arm/poly1305.h @@ -6,7 +6,6 @@ */ #include <asm/hwcap.h> -#include <asm/neon.h> #include <asm/simd.h> #include <linux/cpufeature.h> #include <linux/jump_label.h> @@ -32,9 +31,8 @@ static void poly1305_blocks(struct poly1305_block_state *state, const u8 *src, do { unsigned int todo = min_t(unsigned int, len, SZ_4K); - kernel_neon_begin(); - poly1305_blocks_neon(state, src, todo, padbit); - kernel_neon_end(); + scoped_ksimd() + poly1305_blocks_neon(state, src, todo, padbit); len -= todo; src += todo; diff --git a/lib/crypto/arm/sha1.h b/lib/crypto/arm/sha1.h index 29f8bcad0447c3..3e2d8c7cab9f10 100644 --- a/lib/crypto/arm/sha1.h +++ b/lib/crypto/arm/sha1.h @@ -4,7 +4,6 @@ * * Copyright 2025 Google LLC */ -#include <asm/neon.h> #include <asm/simd.h> static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); @@ -22,12 +21,12 @@ static void sha1_blocks(struct sha1_block_state *state, { if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && static_branch_likely(&have_neon) && likely(may_use_simd())) { - kernel_neon_begin(); - if (static_branch_likely(&have_ce)) - sha1_ce_transform(state, data, nblocks); - else - sha1_transform_neon(state, data, nblocks); - kernel_neon_end(); + scoped_ksimd() { + if (static_branch_likely(&have_ce)) + sha1_ce_transform(state, data, nblocks); + else + sha1_transform_neon(state, data, nblocks); + } } else { sha1_block_data_order(state, data, nblocks); } diff --git a/lib/crypto/arm/sha256.h b/lib/crypto/arm/sha256.h index 7556457b3094b4..ae7e52dd6e3b62 100644 --- a/lib/crypto/arm/sha256.h +++ b/lib/crypto/arm/sha256.h @@ -22,12 +22,12 @@ static void sha256_blocks(struct sha256_block_state *state, { if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && static_branch_likely(&have_neon) && likely(may_use_simd())) { - kernel_neon_begin(); - if (static_branch_likely(&have_ce)) - sha256_ce_transform(state, data, nblocks); - else - sha256_block_data_order_neon(state, data, nblocks); - kernel_neon_end(); + scoped_ksimd() { + if (static_branch_likely(&have_ce)) + sha256_ce_transform(state, data, nblocks); + else + sha256_block_data_order_neon(state, data, nblocks); + } } else { sha256_block_data_order(state, data, nblocks); } diff --git a/lib/crypto/arm/sha512.h b/lib/crypto/arm/sha512.h index d1b485dd275db8..ed9bd81d6d78d3 100644 --- a/lib/crypto/arm/sha512.h +++ b/lib/crypto/arm/sha512.h @@ -19,9 +19,8 @@ static void sha512_blocks(struct sha512_block_state *state, { if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && static_branch_likely(&have_neon) && likely(may_use_simd())) { - kernel_neon_begin(); - sha512_block_data_order_neon(state, data, nblocks); - kernel_neon_end(); + scoped_ksimd() + sha512_block_data_order_neon(state, data, nblocks); } else { sha512_block_data_order(state, data, nblocks); } diff --git a/lib/crypto/arm64/chacha.h b/lib/crypto/arm64/chacha.h index ba6c22d4608633..ca8c6a8b057829 100644 --- a/lib/crypto/arm64/chacha.h +++ b/lib/crypto/arm64/chacha.h @@ -23,7 +23,6 @@ #include <linux/kernel.h> #include <asm/hwcap.h> -#include <asm/neon.h> #include <asm/simd.h> asmlinkage void chacha_block_xor_neon(const struct chacha_state *state, @@ -65,9 +64,8 @@ static void hchacha_block_arch(const struct chacha_state *state, if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) { hchacha_block_generic(state, out, nrounds); } else { - kernel_neon_begin(); - hchacha_block_neon(state, out, nrounds); - kernel_neon_end(); + scoped_ksimd() + hchacha_block_neon(state, out, nrounds); } } @@ -81,9 +79,8 @@ static void chacha_crypt_arch(struct chacha_state *state, u8 *dst, do { unsigned int todo = min_t(unsigned int, bytes, SZ_4K); - kernel_neon_begin(); - chacha_doneon(state, dst, src, todo, nrounds); - kernel_neon_end(); + scoped_ksimd() + chacha_doneon(state, dst, src, todo, nrounds); bytes -= todo; src += todo; diff --git a/lib/crypto/arm64/poly1305.h b/lib/crypto/arm64/poly1305.h index aed5921ccd9a12..b77669767cd6c3 100644 --- a/lib/crypto/arm64/poly1305.h +++ b/lib/crypto/arm64/poly1305.h @@ -6,7 +6,6 @@ */ #include <asm/hwcap.h> -#include <asm/neon.h> #include <asm/simd.h> #include <linux/cpufeature.h> #include <linux/jump_label.h> @@ -31,9 +30,8 @@ static void poly1305_blocks(struct poly1305_block_state *state, const u8 *src, do { unsigned int todo = min_t(unsigned int, len, SZ_4K); - kernel_neon_begin(); - poly1305_blocks_neon(state, src, todo, padbit); - kernel_neon_end(); + scoped_ksimd() + poly1305_blocks_neon(state, src, todo, padbit); len -= todo; src += todo; diff --git a/lib/crypto/arm64/polyval.h b/lib/crypto/arm64/polyval.h index 2486e80750d079..a39763395e9bf2 100644 --- a/lib/crypto/arm64/polyval.h +++ b/lib/crypto/arm64/polyval.h @@ -4,7 +4,6 @@ * * Copyright 2025 Google LLC */ -#include <asm/neon.h> #include <asm/simd.h> #include <linux/cpufeature.h> @@ -24,13 +23,14 @@ static void polyval_preparekey_arch(struct polyval_key *key, static_assert(ARRAY_SIZE(key->h_powers) == NUM_H_POWERS); memcpy(&key->h_powers[NUM_H_POWERS - 1], raw_key, POLYVAL_BLOCK_SIZE); if (static_branch_likely(&have_pmull) && may_use_simd()) { - kernel_neon_begin(); - for (int i = NUM_H_POWERS - 2; i >= 0; i--) { - key->h_powers[i] = key->h_powers[i + 1]; - polyval_mul_pmull(&key->h_powers[i], - &key->h_powers[NUM_H_POWERS - 1]); + scoped_ksimd() { + for (int i = NUM_H_POWERS - 2; i >= 0; i--) { + key->h_powers[i] = key->h_powers[i + 1]; + polyval_mul_pmull( + &key->h_powers[i], + &key->h_powers[NUM_H_POWERS - 1]); + } } - kernel_neon_end(); } else { for (int i = NUM_H_POWERS - 2; i >= 0; i--) { key->h_powers[i] = key->h_powers[i + 1]; @@ -44,9 +44,8 @@ static void polyval_mul_arch(struct polyval_elem *acc, const struct polyval_key *key) { if (static_branch_likely(&have_pmull) && may_use_simd()) { - kernel_neon_begin(); - polyval_mul_pmull(acc, &key->h_powers[NUM_H_POWERS - 1]); - kernel_neon_end(); + scoped_ksimd() + polyval_mul_pmull(acc, &key->h_powers[NUM_H_POWERS - 1]); } else { polyval_mul_generic(acc, &key->h_powers[NUM_H_POWERS - 1]); } @@ -62,9 +61,8 @@ static void polyval_blocks_arch(struct polyval_elem *acc, size_t n = min_t(size_t, nblocks, 4096 / POLYVAL_BLOCK_SIZE); - kernel_neon_begin(); - polyval_blocks_pmull(acc, key, data, n); - kernel_neon_end(); + scoped_ksimd() + polyval_blocks_pmull(acc, key, data, n); data += n * POLYVAL_BLOCK_SIZE; nblocks -= n; } while (nblocks); diff --git a/lib/crypto/arm64/sha1.h b/lib/crypto/arm64/sha1.h index aaef4ebfc5e34a..bc7071f1be0962 100644 --- a/lib/crypto/arm64/sha1.h +++ b/lib/crypto/arm64/sha1.h @@ -4,7 +4,6 @@ * * Copyright 2025 Google LLC */ -#include <asm/neon.h> #include <asm/simd.h> #include <linux/cpufeature.h> @@ -20,9 +19,9 @@ static void sha1_blocks(struct sha1_block_state *state, do { size_t rem; - kernel_neon_begin(); - rem = __sha1_ce_transform(state, data, nblocks); - kernel_neon_end(); + scoped_ksimd() + rem = __sha1_ce_transform(state, data, nblocks); + data += (nblocks - rem) * SHA1_BLOCK_SIZE; nblocks = rem; } while (nblocks); diff --git a/lib/crypto/arm64/sha256.h b/lib/crypto/arm64/sha256.h index 80d06df27d3a39..568dff0f276af4 100644 --- a/lib/crypto/arm64/sha256.h +++ b/lib/crypto/arm64/sha256.h @@ -4,7 +4,6 @@ * * Copyright 2025 Google LLC */ -#include <asm/neon.h> #include <asm/simd.h> #include <linux/cpufeature.h> @@ -27,17 +26,16 @@ static void sha256_blocks(struct sha256_block_state *state, do { size_t rem; - kernel_neon_begin(); - rem = __sha256_ce_transform(state, - data, nblocks); - kernel_neon_end(); + scoped_ksimd() + rem = __sha256_ce_transform(state, data, + nblocks); + data += (nblocks - rem) * SHA256_BLOCK_SIZE; nblocks = rem; } while (nblocks); } else { - kernel_neon_begin(); - sha256_block_neon(state, data, nblocks); - kernel_neon_end(); + scoped_ksimd() + sha256_block_neon(state, data, nblocks); } } else { sha256_block_data_order(state, data, nblocks); @@ -66,9 +64,8 @@ static bool sha256_finup_2x_arch(const struct __sha256_ctx *ctx, if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && static_branch_likely(&have_ce) && len >= SHA256_BLOCK_SIZE && len <= 65536 && likely(may_use_simd())) { - kernel_neon_begin(); - sha256_ce_finup2x(ctx, data1, data2, len, out1, out2); - kernel_neon_end(); + scoped_ksimd() + sha256_ce_finup2x(ctx, data1, data2, len, out1, out2); kmsan_unpoison_memory(out1, SHA256_DIGEST_SIZE); kmsan_unpoison_memory(out2, SHA256_DIGEST_SIZE); return true; diff --git a/lib/crypto/arm64/sha3.h b/lib/crypto/arm64/sha3.h index 6dd5183056da40..b602f1b3b2822e 100644 --- a/lib/crypto/arm64/sha3.h +++ b/lib/crypto/arm64/sha3.h @@ -7,7 +7,6 @@ * published by the Free Software Foundation. */ -#include <asm/neon.h> #include <asm/simd.h> #include <linux/cpufeature.h> @@ -23,10 +22,9 @@ static void sha3_absorb_blocks(struct sha3_state *state, const u8 *data, do { size_t rem; - kernel_neon_begin(); - rem = sha3_ce_transform(state, data, nblocks, - block_size); - kernel_neon_end(); + scoped_ksimd() + rem = sha3_ce_transform(state, data, nblocks, + block_size); data += (nblocks - rem) * block_size; nblocks = rem; } while (nblocks); @@ -46,9 +44,8 @@ static void sha3_keccakf(struct sha3_state *state) */ static const u8 zeroes[SHA3_512_BLOCK_SIZE]; - kernel_neon_begin(); - sha3_ce_transform(state, zeroes, 1, sizeof(zeroes)); - kernel_neon_end(); + scoped_ksimd() + sha3_ce_transform(state, zeroes, 1, sizeof(zeroes)); } else { sha3_keccakf_generic(state); } diff --git a/lib/crypto/arm64/sha512.h b/lib/crypto/arm64/sha512.h index ddb0d256f73aa3..7eb7ef04d2687a 100644 --- a/lib/crypto/arm64/sha512.h +++ b/lib/crypto/arm64/sha512.h @@ -4,7 +4,7 @@ * * Copyright 2025 Google LLC */ -#include <asm/neon.h> + #include <asm/simd.h> #include <linux/cpufeature.h> @@ -24,9 +24,9 @@ static void sha512_blocks(struct sha512_block_state *state, do { size_t rem; - kernel_neon_begin(); - rem = __sha512_ce_transform(state, data, nblocks); - kernel_neon_end(); + scoped_ksimd() + rem = __sha512_ce_transform(state, data, nblocks); + data += (nblocks - rem) * SHA512_BLOCK_SIZE; nblocks = rem; } while (nblocks); diff --git a/lib/raid6/neon.c b/lib/raid6/neon.c index 0a2e76035ea960..6d9474ce6da917 100644 --- a/lib/raid6/neon.c +++ b/lib/raid6/neon.c @@ -8,10 +8,9 @@ #include <linux/raid/pq.h> #ifdef __KERNEL__ -#include <asm/neon.h> +#include <asm/simd.h> #else -#define kernel_neon_begin() -#define kernel_neon_end() +#define scoped_ksimd() #define cpu_has_neon() (1) #endif @@ -32,10 +31,9 @@ { \ void raid6_neon ## _n ## _gen_syndrome_real(int, \ unsigned long, void**); \ - kernel_neon_begin(); \ - raid6_neon ## _n ## _gen_syndrome_real(disks, \ + scoped_ksimd() \ + raid6_neon ## _n ## _gen_syndrome_real(disks, \ (unsigned long)bytes, ptrs); \ - kernel_neon_end(); \ } \ static void raid6_neon ## _n ## _xor_syndrome(int disks, \ int start, int stop, \ @@ -43,10 +41,9 @@ { \ void raid6_neon ## _n ## _xor_syndrome_real(int, \ int, int, unsigned long, void**); \ - kernel_neon_begin(); \ - raid6_neon ## _n ## _xor_syndrome_real(disks, \ - start, stop, (unsigned long)bytes, ptrs); \ - kernel_neon_end(); \ + scoped_ksimd() \ + raid6_neon ## _n ## _xor_syndrome_real(disks, \ + start, stop, (unsigned long)bytes, ptrs);\ } \ struct raid6_calls const raid6_neonx ## _n = { \ raid6_neon ## _n ## _gen_syndrome, \ diff --git a/lib/raid6/recov_neon.c b/lib/raid6/recov_neon.c index 70e1404c151292..9d99aeabd31a90 100644 --- a/lib/raid6/recov_neon.c +++ b/lib/raid6/recov_neon.c @@ -7,11 +7,10 @@ #include <linux/raid/pq.h> #ifdef __KERNEL__ -#include <asm/neon.h> +#include <asm/simd.h> #include "neon.h" #else -#define kernel_neon_begin() -#define kernel_neon_end() +#define scoped_ksimd() #define cpu_has_neon() (1) #endif @@ -55,9 +54,8 @@ static void raid6_2data_recov_neon(int disks, size_t bytes, int faila, qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]]; - kernel_neon_begin(); - __raid6_2data_recov_neon(bytes, p, q, dp, dq, pbmul, qmul); - kernel_neon_end(); + scoped_ksimd() + __raid6_2data_recov_neon(bytes, p, q, dp, dq, pbmul, qmul); } static void raid6_datap_recov_neon(int disks, size_t bytes, int faila, @@ -86,9 +84,8 @@ static void raid6_datap_recov_neon(int disks, size_t bytes, int faila, /* Now, pick the proper data tables */ qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; - kernel_neon_begin(); - __raid6_datap_recov_neon(bytes, p, q, dq, qmul); - kernel_neon_end(); + scoped_ksimd() + __raid6_datap_recov_neon(bytes, p, q, dq, qmul); } const struct raid6_recov_calls raid6_recov_neon = { |
