diff options
Diffstat (limited to 'arch')
43 files changed, 1 insertions, 5800 deletions
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig index 6915c766923a2f..84070e9698e8cc 100644 --- a/arch/arm/configs/exynos_defconfig +++ b/arch/arm/configs/exynos_defconfig @@ -364,7 +364,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_AES_ARM_BS=m -CONFIG_CRYPTO_CHACHA20_NEON=m CONFIG_CRYPTO_DEV_EXYNOS_RNG=y CONFIG_CRYPTO_DEV_S5P=y CONFIG_DMA_CMA=y diff --git a/arch/arm/configs/milbeaut_m10v_defconfig b/arch/arm/configs/milbeaut_m10v_defconfig index a3be0b2ede09c7..a2995eb390c603 100644 --- a/arch/arm/configs/milbeaut_m10v_defconfig +++ b/arch/arm/configs/milbeaut_m10v_defconfig @@ -101,7 +101,6 @@ CONFIG_CRYPTO_GHASH_ARM_CE=m CONFIG_CRYPTO_AES_ARM=m CONFIG_CRYPTO_AES_ARM_BS=m CONFIG_CRYPTO_AES_ARM_CE=m -CONFIG_CRYPTO_CHACHA20_NEON=m # CONFIG_CRYPTO_HW is not set CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=64 diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index f2822eeefb9577..cc0e0e4a879cb1 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -1291,7 +1291,6 @@ CONFIG_CRYPTO_GHASH_ARM_CE=m CONFIG_CRYPTO_AES_ARM=m CONFIG_CRYPTO_AES_ARM_BS=m CONFIG_CRYPTO_AES_ARM_CE=m -CONFIG_CRYPTO_CHACHA20_NEON=m CONFIG_CRYPTO_DEV_SUN4I_SS=m CONFIG_CRYPTO_DEV_FSL_CAAM=m CONFIG_CRYPTO_DEV_EXYNOS_RNG=m diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 939913ed9a73bd..1d5f752417398c 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -708,7 +708,6 @@ CONFIG_CRYPTO_MICHAEL_MIC=y CONFIG_CRYPTO_GHASH_ARM_CE=m CONFIG_CRYPTO_AES_ARM=m CONFIG_CRYPTO_AES_ARM_BS=m -CONFIG_CRYPTO_CHACHA20_NEON=m CONFIG_CRYPTO_DEV_OMAP=m CONFIG_CRYPTO_DEV_OMAP_SHAM=m CONFIG_CRYPTO_DEV_OMAP_AES=m diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 1e5f3cdf691c4f..c436eec22d86ca 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -2,19 +2,6 @@ menu "Accelerated Cryptographic Algorithms for CPU (arm)" -config CRYPTO_CURVE25519_NEON - tristate - depends on KERNEL_MODE_NEON - select CRYPTO_KPP - select CRYPTO_LIB_CURVE25519_GENERIC - select CRYPTO_ARCH_HAVE_LIB_CURVE25519 - default CRYPTO_LIB_CURVE25519_INTERNAL - help - Curve25519 algorithm - - Architecture: arm with - - NEON (Advanced SIMD) extensions - config CRYPTO_GHASH_ARM_CE tristate "Hash functions: GHASH (PMULL/NEON/ARMv8 Crypto Extensions)" depends on KERNEL_MODE_NEON diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 4f23999ae17dfe..6346a73effc06a 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -7,7 +7,6 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o obj-$(CONFIG_CRYPTO_BLAKE2B_NEON) += blake2b-neon.o obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o -obj-$(CONFIG_CRYPTO_CURVE25519_NEON) += curve25519-neon.o obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o @@ -18,4 +17,3 @@ blake2b-neon-y := blake2b-neon-core.o blake2b-neon-glue.o aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o -curve25519-neon-y := curve25519-core.o curve25519-glue.o diff --git a/arch/arm/crypto/curve25519-core.S b/arch/arm/crypto/curve25519-core.S deleted file mode 100644 index b697fa5d059a23..00000000000000 --- a/arch/arm/crypto/curve25519-core.S +++ /dev/null @@ -1,2062 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR MIT */ -/* - * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. - * - * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This - * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been - * manually reworked for use in kernel space. - */ - -#include <linux/linkage.h> - -.text -.arch armv7-a -.fpu neon -.align 4 - -ENTRY(curve25519_neon) - push {r4-r11, lr} - mov ip, sp - sub r3, sp, #704 - and r3, r3, #0xfffffff0 - mov sp, r3 - movw r4, #0 - movw r5, #254 - vmov.i32 q0, #1 - vshr.u64 q1, q0, #7 - vshr.u64 q0, q0, #8 - vmov.i32 d4, #19 - vmov.i32 d5, #38 - add r6, sp, #480 - vst1.8 {d2-d3}, [r6, : 128]! - vst1.8 {d0-d1}, [r6, : 128]! - vst1.8 {d4-d5}, [r6, : 128] - add r6, r3, #0 - vmov.i32 q2, #0 - vst1.8 {d4-d5}, [r6, : 128]! - vst1.8 {d4-d5}, [r6, : 128]! - vst1.8 d4, [r6, : 64] - add r6, r3, #0 - movw r7, #960 - sub r7, r7, #2 - neg r7, r7 - sub r7, r7, r7, LSL #7 - str r7, [r6] - add r6, sp, #672 - vld1.8 {d4-d5}, [r1]! - vld1.8 {d6-d7}, [r1] - vst1.8 {d4-d5}, [r6, : 128]! - vst1.8 {d6-d7}, [r6, : 128] - sub r1, r6, #16 - ldrb r6, [r1] - and r6, r6, #248 - strb r6, [r1] - ldrb r6, [r1, #31] - and r6, r6, #127 - orr r6, r6, #64 - strb r6, [r1, #31] - vmov.i64 q2, #0xffffffff - vshr.u64 q3, q2, #7 - vshr.u64 q2, q2, #6 - vld1.8 {d8}, [r2] - vld1.8 {d10}, [r2] - add r2, r2, #6 - vld1.8 {d12}, [r2] - vld1.8 {d14}, [r2] - add r2, r2, #6 - vld1.8 {d16}, [r2] - add r2, r2, #4 - vld1.8 {d18}, [r2] - vld1.8 {d20}, [r2] - add r2, r2, #6 - vld1.8 {d22}, [r2] - add r2, r2, #2 - vld1.8 {d24}, [r2] - vld1.8 {d26}, [r2] - vshr.u64 q5, q5, #26 - vshr.u64 q6, q6, #3 - vshr.u64 q7, q7, #29 - vshr.u64 q8, q8, #6 - vshr.u64 q10, q10, #25 - vshr.u64 q11, q11, #3 - vshr.u64 q12, q12, #12 - vshr.u64 q13, q13, #38 - vand q4, q4, q2 - vand q6, q6, q2 - vand q8, q8, q2 - vand q10, q10, q2 - vand q2, q12, q2 - vand q5, q5, q3 - vand q7, q7, q3 - vand q9, q9, q3 - vand q11, q11, q3 - vand q3, q13, q3 - add r2, r3, #48 - vadd.i64 q12, q4, q1 - vadd.i64 q13, q10, q1 - vshr.s64 q12, q12, #26 - vshr.s64 q13, q13, #26 - vadd.i64 q5, q5, q12 - vshl.i64 q12, q12, #26 - vadd.i64 q14, q5, q0 - vadd.i64 q11, q11, q13 - vshl.i64 q13, q13, #26 - vadd.i64 q15, q11, q0 - vsub.i64 q4, q4, q12 - vshr.s64 q12, q14, #25 - vsub.i64 q10, q10, q13 - vshr.s64 q13, q15, #25 - vadd.i64 q6, q6, q12 - vshl.i64 q12, q12, #25 - vadd.i64 q14, q6, q1 - vadd.i64 q2, q2, q13 - vsub.i64 q5, q5, q12 - vshr.s64 q12, q14, #26 - vshl.i64 q13, q13, #25 - vadd.i64 q14, q2, q1 - vadd.i64 q7, q7, q12 - vshl.i64 q12, q12, #26 - vadd.i64 q15, q7, q0 - vsub.i64 q11, q11, q13 - vshr.s64 q13, q14, #26 - vsub.i64 q6, q6, q12 - vshr.s64 q12, q15, #25 - vadd.i64 q3, q3, q13 - vshl.i64 q13, q13, #26 - vadd.i64 q14, q3, q0 - vadd.i64 q8, q8, q12 - vshl.i64 q12, q12, #25 - vadd.i64 q15, q8, q1 - add r2, r2, #8 - vsub.i64 q2, q2, q13 - vshr.s64 q13, q14, #25 - vsub.i64 q7, q7, q12 - vshr.s64 q12, q15, #26 - vadd.i64 q14, q13, q13 - vadd.i64 q9, q9, q12 - vtrn.32 d12, d14 - vshl.i64 q12, q12, #26 - vtrn.32 d13, d15 - vadd.i64 q0, q9, q0 - vadd.i64 q4, q4, q14 - vst1.8 d12, [r2, : 64]! - vshl.i64 q6, q13, #4 - vsub.i64 q7, q8, q12 - vshr.s64 q0, q0, #25 - vadd.i64 q4, q4, q6 - vadd.i64 q6, q10, q0 - vshl.i64 q0, q0, #25 - vadd.i64 q8, q6, q1 - vadd.i64 q4, q4, q13 - vshl.i64 q10, q13, #25 - vadd.i64 q1, q4, q1 - vsub.i64 q0, q9, q0 - vshr.s64 q8, q8, #26 - vsub.i64 q3, q3, q10 - vtrn.32 d14, d0 - vshr.s64 q1, q1, #26 - vtrn.32 d15, d1 - vadd.i64 q0, q11, q8 - vst1.8 d14, [r2, : 64] - vshl.i64 q7, q8, #26 - vadd.i64 q5, q5, q1 - vtrn.32 d4, d6 - vshl.i64 q1, q1, #26 - vtrn.32 d5, d7 - vsub.i64 q3, q6, q7 - add r2, r2, #16 - vsub.i64 q1, q4, q1 - vst1.8 d4, [r2, : 64] - vtrn.32 d6, d0 - vtrn.32 d7, d1 - sub r2, r2, #8 - vtrn.32 d2, d10 - vtrn.32 d3, d11 - vst1.8 d6, [r2, : 64] - sub r2, r2, #24 - vst1.8 d2, [r2, : 64] - add r2, r3, #96 - vmov.i32 q0, #0 - vmov.i64 d2, #0xff - vmov.i64 d3, #0 - vshr.u32 q1, q1, #7 - vst1.8 {d2-d3}, [r2, : 128]! - vst1.8 {d0-d1}, [r2, : 128]! - vst1.8 d0, [r2, : 64] - add r2, r3, #144 - vmov.i32 q0, #0 - vst1.8 {d0-d1}, [r2, : 128]! - vst1.8 {d0-d1}, [r2, : 128]! - vst1.8 d0, [r2, : 64] - add r2, r3, #240 - vmov.i32 q0, #0 - vmov.i64 d2, #0xff - vmov.i64 d3, #0 - vshr.u32 q1, q1, #7 - vst1.8 {d2-d3}, [r2, : 128]! - vst1.8 {d0-d1}, [r2, : 128]! - vst1.8 d0, [r2, : 64] - add r2, r3, #48 - add r6, r3, #192 - vld1.8 {d0-d1}, [r2, : 128]! - vld1.8 {d2-d3}, [r2, : 128]! - vld1.8 {d4}, [r2, : 64] - vst1.8 {d0-d1}, [r6, : 128]! - vst1.8 {d2-d3}, [r6, : 128]! - vst1.8 d4, [r6, : 64] -.Lmainloop: - mov r2, r5, LSR #3 - and r6, r5, #7 - ldrb r2, [r1, r2] - mov r2, r2, LSR r6 - and r2, r2, #1 - str r5, [sp, #456] - eor r4, r4, r2 - str r2, [sp, #460] - neg r2, r4 - add r4, r3, #96 - add r5, r3, #192 - add r6, r3, #144 - vld1.8 {d8-d9}, [r4, : 128]! - add r7, r3, #240 - vld1.8 {d10-d11}, [r5, : 128]! - veor q6, q4, q5 - vld1.8 {d14-d15}, [r6, : 128]! - vdup.i32 q8, r2 - vld1.8 {d18-d19}, [r7, : 128]! - veor q10, q7, q9 - vld1.8 {d22-d23}, [r4, : 128]! - vand q6, q6, q8 - vld1.8 {d24-d25}, [r5, : 128]! - vand q10, q10, q8 - vld1.8 {d26-d27}, [r6, : 128]! - veor q4, q4, q6 - vld1.8 {d28-d29}, [r7, : 128]! - veor q5, q5, q6 - vld1.8 {d0}, [r4, : 64] - veor q6, q7, q10 - vld1.8 {d2}, [r5, : 64] - veor q7, q9, q10 - vld1.8 {d4}, [r6, : 64] - veor q9, q11, q12 - vld1.8 {d6}, [r7, : 64] - veor q10, q0, q1 - sub r2, r4, #32 - vand q9, q9, q8 - sub r4, r5, #32 - vand q10, q10, q8 - sub r5, r6, #32 - veor q11, q11, q9 - sub r6, r7, #32 - veor q0, q0, q10 - veor q9, q12, q9 - veor q1, q1, q10 - veor q10, q13, q14 - veor q12, q2, q3 - vand q10, q10, q8 - vand q8, q12, q8 - veor q12, q13, q10 - veor q2, q2, q8 - veor q10, q14, q10 - veor q3, q3, q8 - vadd.i32 q8, q4, q6 - vsub.i32 q4, q4, q6 - vst1.8 {d16-d17}, [r2, : 128]! - vadd.i32 q6, q11, q12 - vst1.8 {d8-d9}, [r5, : 128]! - vsub.i32 q4, q11, q12 - vst1.8 {d12-d13}, [r2, : 128]! - vadd.i32 q6, q0, q2 - vst1.8 {d8-d9}, [r5, : 128]! - vsub.i32 q0, q0, q2 - vst1.8 d12, [r2, : 64] - vadd.i32 q2, q5, q7 - vst1.8 d0, [r5, : 64] - vsub.i32 q0, q5, q7 - vst1.8 {d4-d5}, [r4, : 128]! - vadd.i32 q2, q9, q10 - vst1.8 {d0-d1}, [r6, : 128]! - vsub.i32 q0, q9, q10 - vst1.8 {d4-d5}, [r4, : 128]! - vadd.i32 q2, q1, q3 - vst1.8 {d0-d1}, [r6, : 128]! - vsub.i32 q0, q1, q3 - vst1.8 d4, [r4, : 64] - vst1.8 d0, [r6, : 64] - add r2, sp, #512 - add r4, r3, #96 - add r5, r3, #144 - vld1.8 {d0-d1}, [r2, : 128] - vld1.8 {d2-d3}, [r4, : 128]! - vld1.8 {d4-d5}, [r5, : 128]! - vzip.i32 q1, q2 - vld1.8 {d6-d7}, [r4, : 128]! - vld1.8 {d8-d9}, [r5, : 128]! - vshl.i32 q5, q1, #1 - vzip.i32 q3, q4 - vshl.i32 q6, q2, #1 - vld1.8 {d14}, [r4, : 64] - vshl.i32 q8, q3, #1 - vld1.8 {d15}, [r5, : 64] - vshl.i32 q9, q4, #1 - vmul.i32 d21, d7, d1 - vtrn.32 d14, d15 - vmul.i32 q11, q4, q0 - vmul.i32 q0, q7, q0 - vmull.s32 q12, d2, d2 - vmlal.s32 q12, d11, d1 - vmlal.s32 q12, d12, d0 - vmlal.s32 q12, d13, d23 - vmlal.s32 q12, d16, d22 - vmlal.s32 q12, d7, d21 - vmull.s32 q10, d2, d11 - vmlal.s32 q10, d4, d1 - vmlal.s32 q10, d13, d0 - vmlal.s32 q10, d6, d23 - vmlal.s32 q10, d17, d22 - vmull.s32 q13, d10, d4 - vmlal.s32 q13, d11, d3 - vmlal.s32 q13, d13, d1 - vmlal.s32 q13, d16, d0 - vmlal.s32 q13, d17, d23 - vmlal.s32 q13, d8, d22 - vmull.s32 q1, d10, d5 - vmlal.s32 q1, d11, d4 - vmlal.s32 q1, d6, d1 - vmlal.s32 q1, d17, d0 - vmlal.s32 q1, d8, d23 - vmull.s32 q14, d10, d6 - vmlal.s32 q14, d11, d13 - vmlal.s32 q14, d4, d4 - vmlal.s32 q14, d17, d1 - vmlal.s32 q14, d18, d0 - vmlal.s32 q14, d9, d23 - vmull.s32 q11, d10, d7 - vmlal.s32 q11, d11, d6 - vmlal.s32 q11, d12, d5 - vmlal.s32 q11, d8, d1 - vmlal.s32 q11, d19, d0 - vmull.s32 q15, d10, d8 - vmlal.s32 q15, d11, d17 - vmlal.s32 q15, d12, d6 - vmlal.s32 q15, d13, d5 - vmlal.s32 q15, d19, d1 - vmlal.s32 q15, d14, d0 - vmull.s32 q2, d10, d9 - vmlal.s32 q2, d11, d8 - vmlal.s32 q2, d12, d7 - vmlal.s32 q2, d13, d6 - vmlal.s32 q2, d14, d1 - vmull.s32 q0, d15, d1 - vmlal.s32 q0, d10, d14 - vmlal.s32 q0, d11, d19 - vmlal.s32 q0, d12, d8 - vmlal.s32 q0, d13, d17 - vmlal.s32 q0, d6, d6 - add r2, sp, #480 - vld1.8 {d18-d19}, [r2, : 128]! - vmull.s32 q3, d16, d7 - vmlal.s32 q3, d10, d15 - vmlal.s32 q3, d11, d14 - vmlal.s32 q3, d12, d9 - vmlal.s32 q3, d13, d8 - vld1.8 {d8-d9}, [r2, : 128] - vadd.i64 q5, q12, q9 - vadd.i64 q6, q15, q9 - vshr.s64 q5, q5, #26 - vshr.s64 q6, q6, #26 - vadd.i64 q7, q10, q5 - vshl.i64 q5, q5, #26 - vadd.i64 q8, q7, q4 - vadd.i64 q2, q2, q6 - vshl.i64 q6, q6, #26 - vadd.i64 q10, q2, q4 - vsub.i64 q5, q12, q5 - vshr.s64 q8, q8, #25 - vsub.i64 q6, q15, q6 - vshr.s64 q10, q10, #25 - vadd.i64 q12, q13, q8 - vshl.i64 q8, q8, #25 - vadd.i64 q13, q12, q9 - vadd.i64 q0, q0, q10 - vsub.i64 q7, q7, q8 - vshr.s64 q8, q13, #26 - vshl.i64 q10, q10, #25 - vadd.i64 q13, q0, q9 - vadd.i64 q1, q1, q8 - vshl.i64 q8, q8, #26 - vadd.i64 q15, q1, q4 - vsub.i64 q2, q2, q10 - vshr.s64 q10, q13, #26 - vsub.i64 q8, q12, q8 - vshr.s64 q12, q15, #25 - vadd.i64 q3, q3, q10 - vshl.i64 q10, q10, #26 - vadd.i64 q13, q3, q4 - vadd.i64 q14, q14, q12 - add r2, r3, #288 - vshl.i64 q12, q12, #25 - add r4, r3, #336 - vadd.i64 q15, q14, q9 - add r2, r2, #8 - vsub.i64 q0, q0, q10 - add r4, r4, #8 - vshr.s64 q10, q13, #25 - vsub.i64 q1, q1, q12 - vshr.s64 q12, q15, #26 - vadd.i64 q13, q10, q10 - vadd.i64 q11, q11, q12 - vtrn.32 d16, d2 - vshl.i64 q12, q12, #26 - vtrn.32 d17, d3 - vadd.i64 q1, q11, q4 - vadd.i64 q4, q5, q13 - vst1.8 d16, [r2, : 64]! - vshl.i64 q5, q10, #4 - vst1.8 d17, [r4, : 64]! - vsub.i64 q8, q14, q12 - vshr.s64 q1, q1, #25 - vadd.i64 q4, q4, q5 - vadd.i64 q5, q6, q1 - vshl.i64 q1, q1, #25 - vadd.i64 q6, q5, q9 - vadd.i64 q4, q4, q10 - vshl.i64 q10, q10, #25 - vadd.i64 q9, q4, q9 - vsub.i64 q1, q11, q1 - vshr.s64 q6, q6, #26 - vsub.i64 q3, q3, q10 - vtrn.32 d16, d2 - vshr.s64 q9, q9, #26 - vtrn.32 d17, d3 - vadd.i64 q1, q2, q6 - vst1.8 d16, [r2, : 64] - vshl.i64 q2, q6, #26 - vst1.8 d17, [r4, : 64] - vadd.i64 q6, q7, q9 - vtrn.32 d0, d6 - vshl.i64 q7, q9, #26 - vtrn.32 d1, d7 - vsub.i64 q2, q5, q2 - add r2, r2, #16 - vsub.i64 q3, q4, q7 - vst1.8 d0, [r2, : 64] - add r4, r4, #16 - vst1.8 d1, [r4, : 64] - vtrn.32 d4, d2 - vtrn.32 d5, d3 - sub r2, r2, #8 - sub r4, r4, #8 - vtrn.32 d6, d12 - vtrn.32 d7, d13 - vst1.8 d4, [r2, : 64] - vst1.8 d5, [r4, : 64] - sub r2, r2, #24 - sub r4, r4, #24 - vst1.8 d6, [r2, : 64] - vst1.8 d7, [r4, : 64] - add r2, r3, #240 - add r4, r3, #96 - vld1.8 {d0-d1}, [r4, : 128]! - vld1.8 {d2-d3}, [r4, : 128]! - vld1.8 {d4}, [r4, : 64] - add r4, r3, #144 - vld1.8 {d6-d7}, [r4, : 128]! - vtrn.32 q0, q3 - vld1.8 {d8-d9}, [r4, : 128]! - vshl.i32 q5, q0, #4 - vtrn.32 q1, q4 - vshl.i32 q6, q3, #4 - vadd.i32 q5, q5, q0 - vadd.i32 q6, q6, q3 - vshl.i32 q7, q1, #4 - vld1.8 {d5}, [r4, : 64] - vshl.i32 q8, q4, #4 - vtrn.32 d4, d5 - vadd.i32 q7, q7, q1 - vadd.i32 q8, q8, q4 - vld1.8 {d18-d19}, [r2, : 128]! - vshl.i32 q10, q2, #4 - vld1.8 {d22-d23}, [r2, : 128]! - vadd.i32 q10, q10, q2 - vld1.8 {d24}, [r2, : 64] - vadd.i32 q5, q5, q0 - add r2, r3, #192 - vld1.8 {d26-d27}, [r2, : 128]! - vadd.i32 q6, q6, q3 - vld1.8 {d28-d29}, [r2, : 128]! - vadd.i32 q8, q8, q4 - vld1.8 {d25}, [r2, : 64] - vadd.i32 q10, q10, q2 - vtrn.32 q9, q13 - vadd.i32 q7, q7, q1 - vadd.i32 q5, q5, q0 - vtrn.32 q11, q14 - vadd.i32 q6, q6, q3 - add r2, sp, #528 - vadd.i32 q10, q10, q2 - vtrn.32 d24, d25 - vst1.8 {d12-d13}, [r2, : 128]! - vshl.i32 q6, q13, #1 - vst1.8 {d20-d21}, [r2, : 128]! - vshl.i32 q10, q14, #1 - vst1.8 {d12-d13}, [r2, : 128]! - vshl.i32 q15, q12, #1 - vadd.i32 q8, q8, q4 - vext.32 d10, d31, d30, #0 - vadd.i32 q7, q7, q1 - vst1.8 {d16-d17}, [r2, : 128]! - vmull.s32 q8, d18, d5 - vmlal.s32 q8, d26, d4 - vmlal.s32 q8, d19, d9 - vmlal.s32 q8, d27, d3 - vmlal.s32 q8, d22, d8 - vmlal.s32 q8, d28, d2 - vmlal.s32 q8, d23, d7 - vmlal.s32 q8, d29, d1 - vmlal.s32 q8, d24, d6 - vmlal.s32 q8, d25, d0 - vst1.8 {d14-d15}, [r2, : 128]! - vmull.s32 q2, d18, d4 - vmlal.s32 q2, d12, d9 - vmlal.s32 q2, d13, d8 - vmlal.s32 q2, d19, d3 - vmlal.s32 q2, d22, d2 - vmlal.s32 q2, d23, d1 - vmlal.s32 q2, d24, d0 - vst1.8 {d20-d21}, [r2, : 128]! - vmull.s32 q7, d18, d9 - vmlal.s32 q7, d26, d3 - vmlal.s32 q7, d19, d8 - vmlal.s32 q7, d27, d2 - vmlal.s32 q7, d22, d7 - vmlal.s32 q7, d28, d1 - vmlal.s32 q7, d23, d6 - vmlal.s32 q7, d29, d0 - vst1.8 {d10-d11}, [r2, : 128]! - vmull.s32 q5, d18, d3 - vmlal.s32 q5, d19, d2 - vmlal.s32 q5, d22, d1 - vmlal.s32 q5, d23, d0 - vmlal.s32 q5, d12, d8 - vst1.8 {d16-d17}, [r2, : 128] - vmull.s32 q4, d18, d8 - vmlal.s32 q4, d26, d2 - vmlal.s32 q4, d19, d7 - vmlal.s32 q4, d27, d1 - vmlal.s32 q4, d22, d6 - vmlal.s32 q4, d28, d0 - vmull.s32 q8, d18, d7 - vmlal.s32 q8, d26, d1 - vmlal.s32 q8, d19, d6 - vmlal.s32 q8, d27, d0 - add r2, sp, #544 - vld1.8 {d20-d21}, [r2, : 128] - vmlal.s32 q7, d24, d21 - vmlal.s32 q7, d25, d20 - vmlal.s32 q4, d23, d21 - vmlal.s32 q4, d29, d20 - vmlal.s32 q8, d22, d21 - vmlal.s32 q8, d28, d20 - vmlal.s32 q5, d24, d20 - vst1.8 {d14-d15}, [r2, : 128] - vmull.s32 q7, d18, d6 - vmlal.s32 q7, d26, d0 - add r2, sp, #624 - vld1.8 {d30-d31}, [r2, : 128] - vmlal.s32 q2, d30, d21 - vmlal.s32 q7, d19, d21 - vmlal.s32 q7, d27, d20 - add r2, sp, #592 - vld1.8 {d26-d27}, [r2, : 128] - vmlal.s32 q4, d25, d27 - vmlal.s32 q8, d29, d27 - vmlal.s32 q8, d25, d26 - vmlal.s32 q7, d28, d27 - vmlal.s32 q7, d29, d26 - add r2, sp, #576 - vld1.8 {d28-d29}, [r2, : 128] - vmlal.s32 q4, d24, d29 - vmlal.s32 q8, d23, d29 - vmlal.s32 q8, d24, d28 - vmlal.s32 q7, d22, d29 - vmlal.s32 q7, d23, d28 - vst1.8 {d8-d9}, [r2, : 128] - add r2, sp, #528 - vld1.8 {d8-d9}, [r2, : 128] - vmlal.s32 q7, d24, d9 - vmlal.s32 q7, d25, d31 - vmull.s32 q1, d18, d2 - vmlal.s32 q1, d19, d1 - vmlal.s32 q1, d22, d0 - vmlal.s32 q1, d24, d27 - vmlal.s32 q1, d23, d20 - vmlal.s32 q1, d12, d7 - vmlal.s32 q1, d13, d6 - vmull.s32 q6, d18, d1 - vmlal.s32 q6, d19, d0 - vmlal.s32 q6, d23, d27 - vmlal.s32 q6, d22, d20 - vmlal.s32 q6, d24, d26 - vmull.s32 q0, d18, d0 - vmlal.s32 q0, d22, d27 - vmlal.s32 q0, d23, d26 - vmlal.s32 q0, d24, d31 - vmlal.s32 q0, d19, d20 - add r2, sp, #608 - vld1.8 {d18-d19}, [r2, : 128] - vmlal.s32 q2, d18, d7 - vmlal.s32 q5, d18, d6 - vmlal.s32 q1, d18, d21 - vmlal.s32 q0, d18, d28 - vmlal.s32 q6, d18, d29 - vmlal.s32 q2, d19, d6 - vmlal.s32 q5, d19, d21 - vmlal.s32 q1, d19, d29 - vmlal.s32 q0, d19, d9 - vmlal.s32 q6, d19, d28 - add r2, sp, #560 - vld1.8 {d18-d19}, [r2, : 128] - add r2, sp, #480 - vld1.8 {d22-d23}, [r2, : 128] - vmlal.s32 q5, d19, d7 - vmlal.s32 q0, d18, d21 - vmlal.s32 q0, d19, d29 - vmlal.s32 q6, d18, d6 - add r2, sp, #496 - vld1.8 {d6-d7}, [r2, : 128] - vmlal.s32 q6, d19, d21 - add r2, sp, #544 - vld1.8 {d18-d19}, [r2, : 128] - vmlal.s32 q0, d30, d8 - add r2, sp, #640 - vld1.8 {d20-d21}, [r2, : 128] - vmlal.s32 q5, d30, d29 - add r2, sp, #576 - vld1.8 {d24-d25}, [r2, : 128] - vmlal.s32 q1, d30, d28 - vadd.i64 q13, q0, q11 - vadd.i64 q14, q5, q11 - vmlal.s32 q6, d30, d9 - vshr.s64 q4, q13, #26 - vshr.s64 q13, q14, #26 - vadd.i64 q7, q7, q4 - vshl.i64 q4, q4, #26 - vadd.i64 q14, q7, q3 - vadd.i64 q9, q9, q13 - vshl.i64 q13, q13, #26 - vadd.i64 q15, q9, q3 - vsub.i64 q0, q0, q4 - vshr.s64 q4, q14, #25 - vsub.i64 q5, q5, q13 - vshr.s64 q13, q15, #25 - vadd.i64 q6, q6, q4 - vshl.i64 q4, q4, #25 - vadd.i64 q14, q6, q11 - vadd.i64 q2, q2, q13 - vsub.i64 q4, q7, q4 - vshr.s64 q7, q14, #26 - vshl.i64 q13, q13, #25 - vadd.i64 q14, q2, q11 - vadd.i64 q8, q8, q7 - vshl.i64 q7, q7, #26 - vadd.i64 q15, q8, q3 - vsub.i64 q9, q9, q13 - vshr.s64 q13, q14, #26 - vsub.i64 q6, q6, q7 - vshr.s64 q7, q15, #25 - vadd.i64 q10, q10, q13 - vshl.i64 q13, q13, #26 - vadd.i64 q14, q10, q3 - vadd.i64 q1, q1, q7 - add r2, r3, #144 - vshl.i64 q7, q7, #25 - add r4, r3, #96 - vadd.i64 q15, q1, q11 - add r2, r2, #8 - vsub.i64 q2, q2, q13 - add r4, r4, #8 - vshr.s64 q13, q14, #25 - vsub.i64 q7, q8, q7 - vshr.s64 q8, q15, #26 - vadd.i64 q14, q13, q13 - vadd.i64 q12, q12, q8 - vtrn.32 d12, d14 - vshl.i64 q8, q8, #26 - vtrn.32 d13, d15 - vadd.i64 q3, q12, q3 - vadd.i64 q0, q0, q14 - vst1.8 d12, [r2, : 64]! - vshl.i64 q7, q13, #4 - vst1.8 d13, [r4, : 64]! - vsub.i64 q1, q1, q8 - vshr.s64 q3, q3, #25 - vadd.i64 q0, q0, q7 - vadd.i64 q5, q5, q3 - vshl.i64 q3, q3, #25 - vadd.i64 q6, q5, q11 - vadd.i64 q0, q0, q13 - vshl.i64 q7, q13, #25 - vadd.i64 q8, q0, q11 - vsub.i64 q3, q12, q3 - vshr.s64 q6, q6, #26 - vsub.i64 q7, q10, q7 - vtrn.32 d2, d6 - vshr.s64 q8, q8, #26 - vtrn.32 d3, d7 - vadd.i64 q3, q9, q6 - vst1.8 d2, [r2, : 64] - vshl.i64 q6, q6, #26 - vst1.8 d3, [r4, : 64] - vadd.i64 q1, q4, q8 - vtrn.32 d4, d14 - vshl.i64 q4, q8, #26 - vtrn.32 d5, d15 - vsub.i64 q5, q5, q6 - add r2, r2, #16 - vsub.i64 q0, q0, q4 - vst1.8 d4, [r2, : 64] - add r4, r4, #16 - vst1.8 d5, [r4, : 64] - vtrn.32 d10, d6 - vtrn.32 d11, d7 - sub r2, r2, #8 - sub r4, r4, #8 - vtrn.32 d0, d2 - vtrn.32 d1, d3 - vst1.8 d10, [r2, : 64] - vst1.8 d11, [r4, : 64] - sub r2, r2, #24 - sub r4, r4, #24 - vst1.8 d0, [r2, : 64] - vst1.8 d1, [r4, : 64] - add r2, r3, #288 - add r4, r3, #336 - vld1.8 {d0-d1}, [r2, : 128]! - vld1.8 {d2-d3}, [r4, : 128]! - vsub.i32 q0, q0, q1 - vld1.8 {d2-d3}, [r2, : 128]! - vld1.8 {d4-d5}, [r4, : 128]! - vsub.i32 q1, q1, q2 - add r5, r3, #240 - vld1.8 {d4}, [r2, : 64] - vld1.8 {d6}, [r4, : 64] - vsub.i32 q2, q2, q3 - vst1.8 {d0-d1}, [r5, : 128]! - vst1.8 {d2-d3}, [r5, : 128]! - vst1.8 d4, [r5, : 64] - add r2, r3, #144 - add r4, r3, #96 - add r5, r3, #144 - add r6, r3, #192 - vld1.8 {d0-d1}, [r2, : 128]! - vld1.8 {d2-d3}, [r4, : 128]! - vsub.i32 q2, q0, q1 - vadd.i32 q0, q0, q1 - vld1.8 {d2-d3}, [r2, : 128]! - vld1.8 {d6-d7}, [r4, : 128]! - vsub.i32 q4, q1, q3 - vadd.i32 q1, q1, q3 - vld1.8 {d6}, [r2, : 64] - vld1.8 {d10}, [r4, : 64] - vsub.i32 q6, q3, q5 - vadd.i32 q3, q3, q5 - vst1.8 {d4-d5}, [r5, : 128]! - vst1.8 {d0-d1}, [r6, : 128]! - vst1.8 {d8-d9}, [r5, : 128]! - vst1.8 {d2-d3}, [r6, : 128]! - vst1.8 d12, [r5, : 64] - vst1.8 d6, [r6, : 64] - add r2, r3, #0 - add r4, r3, #240 - vld1.8 {d0-d1}, [r4, : 128]! - vld1.8 {d2-d3}, [r4, : 128]! - vld1.8 {d4}, [r4, : 64] - add r4, r3, #336 - vld1.8 {d6-d7}, [r4, : 128]! - vtrn.32 q0, q3 - vld1.8 {d8-d9}, [r4, : 128]! - vshl.i32 q5, q0, #4 - vtrn.32 q1, q4 - vshl.i32 q6, q3, #4 - vadd.i32 q5, q5, q0 - vadd.i32 q6, q6, q3 - vshl.i32 q7, q1, #4 - vld1.8 {d5}, [r4, : 64] - vshl.i32 q8, q4, #4 - vtrn.32 d4, d5 - vadd.i32 q7, q7, q1 - vadd.i32 q8, q8, q4 - vld1.8 {d18-d19}, [r2, : 128]! - vshl.i32 q10, q2, #4 - vld1.8 {d22-d23}, [r2, : 128]! - vadd.i32 q10, q10, q2 - vld1.8 {d24}, [r2, : 64] - vadd.i32 q5, q5, q0 - add r2, r3, #288 - vld1.8 {d26-d27}, [r2, : 128]! - vadd.i32 q6, q6, q3 - vld1.8 {d28-d29}, [r2, : 128]! - vadd.i32 q8, q8, q4 - vld1.8 {d25}, [r2, : 64] - vadd.i32 q10, q10, q2 - vtrn.32 q9, q13 - vadd.i32 q7, q7, q1 - vadd.i32 q5, q5, q0 - vtrn.32 q11, q14 - vadd.i32 q6, q6, q3 - add r2, sp, #528 - vadd.i32 q10, q10, q2 - vtrn.32 d24, d25 - vst1.8 {d12-d13}, [r2, : 128]! - vshl.i32 q6, q13, #1 - vst1.8 {d20-d21}, [r2, : 128]! - vshl.i32 q10, q14, #1 - vst1.8 {d12-d13}, [r2, : 128]! - vshl.i32 q15, q12, #1 - vadd.i32 q8, q8, q4 - vext.32 d10, d31, d30, #0 - vadd.i32 q7, q7, q1 - vst1.8 {d16-d17}, [r2, : 128]! - vmull.s32 q8, d18, d5 - vmlal.s32 q8, d26, d4 - vmlal.s32 q8, d19, d9 - vmlal.s32 q8, d27, d3 - vmlal.s32 q8, d22, d8 - vmlal.s32 q8, d28, d2 - vmlal.s32 q8, d23, d7 - vmlal.s32 q8, d29, d1 - vmlal.s32 q8, d24, d6 - vmlal.s32 q8, d25, d0 - vst1.8 {d14-d15}, [r2, : 128]! - vmull.s32 q2, d18, d4 - vmlal.s32 q2, d12, d9 - vmlal.s32 q2, d13, d8 - vmlal.s32 q2, d19, d3 - vmlal.s32 q2, d22, d2 - vmlal.s32 q2, d23, d1 - vmlal.s32 q2, d24, d0 - vst1.8 {d20-d21}, [r2, : 128]! - vmull.s32 q7, d18, d9 - vmlal.s32 q7, d26, d3 - vmlal.s32 q7, d19, d8 - vmlal.s32 q7, d27, d2 - vmlal.s32 q7, d22, d7 - vmlal.s32 q7, d28, d1 - vmlal.s32 q7, d23, d6 - vmlal.s32 q7, d29, d0 - vst1.8 {d10-d11}, [r2, : 128]! - vmull.s32 q5, d18, d3 - vmlal.s32 q5, d19, d2 - vmlal.s32 q5, d22, d1 - vmlal.s32 q5, d23, d0 - vmlal.s32 q5, d12, d8 - vst1.8 {d16-d17}, [r2, : 128]! - vmull.s32 q4, d18, d8 - vmlal.s32 q4, d26, d2 - vmlal.s32 q4, d19, d7 - vmlal.s32 q4, d27, d1 - vmlal.s32 q4, d22, d6 - vmlal.s32 q4, d28, d0 - vmull.s32 q8, d18, d7 - vmlal.s32 q8, d26, d1 - vmlal.s32 q8, d19, d6 - vmlal.s32 q8, d27, d0 - add r2, sp, #544 - vld1.8 {d20-d21}, [r2, : 128] - vmlal.s32 q7, d24, d21 - vmlal.s32 q7, d25, d20 - vmlal.s32 q4, d23, d21 - vmlal.s32 q4, d29, d20 - vmlal.s32 q8, d22, d21 - vmlal.s32 q8, d28, d20 - vmlal.s32 q5, d24, d20 - vst1.8 {d14-d15}, [r2, : 128] - vmull.s32 q7, d18, d6 - vmlal.s32 q7, d26, d0 - add r2, sp, #624 - vld1.8 {d30-d31}, [r2, : 128] - vmlal.s32 q2, d30, d21 - vmlal.s32 q7, d19, d21 - vmlal.s32 q7, d27, d20 - add r2, sp, #592 - vld1.8 {d26-d27}, [r2, : 128] - vmlal.s32 q4, d25, d27 - vmlal.s32 q8, d29, d27 - vmlal.s32 q8, d25, d26 - vmlal.s32 q7, d28, d27 - vmlal.s32 q7, d29, d26 - add r2, sp, #576 - vld1.8 {d28-d29}, [r2, : 128] - vmlal.s32 q4, d24, d29 - vmlal.s32 q8, d23, d29 - vmlal.s32 q8, d24, d28 - vmlal.s32 q7, d22, d29 - vmlal.s32 q7, d23, d28 - vst1.8 {d8-d9}, [r2, : 128] - add r2, sp, #528 - vld1.8 {d8-d9}, [r2, : 128] - vmlal.s32 q7, d24, d9 - vmlal.s32 q7, d25, d31 - vmull.s32 q1, d18, d2 - vmlal.s32 q1, d19, d1 - vmlal.s32 q1, d22, d0 - vmlal.s32 q1, d24, d27 - vmlal.s32 q1, d23, d20 - vmlal.s32 q1, d12, d7 - vmlal.s32 q1, d13, d6 - vmull.s32 q6, d18, d1 - vmlal.s32 q6, d19, d0 - vmlal.s32 q6, d23, d27 - vmlal.s32 q6, d22, d20 - vmlal.s32 q6, d24, d26 - vmull.s32 q0, d18, d0 - vmlal.s32 q0, d22, d27 - vmlal.s32 q0, d23, d26 - vmlal.s32 q0, d24, d31 - vmlal.s32 q0, d19, d20 - add r2, sp, #608 - vld1.8 {d18-d19}, [r2, : 128] - vmlal.s32 q2, d18, d7 - vmlal.s32 q5, d18, d6 - vmlal.s32 q1, d18, d21 - vmlal.s32 q0, d18, d28 - vmlal.s32 q6, d18, d29 - vmlal.s32 q2, d19, d6 - vmlal.s32 q5, d19, d21 - vmlal.s32 q1, d19, d29 - vmlal.s32 q0, d19, d9 - vmlal.s32 q6, d19, d28 - add r2, sp, #560 - vld1.8 {d18-d19}, [r2, : 128] - add r2, sp, #480 - vld1.8 {d22-d23}, [r2, : 128] - vmlal.s32 q5, d19, d7 - vmlal.s32 q0, d18, d21 - vmlal.s32 q0, d19, d29 - vmlal.s32 q6, d18, d6 - add r2, sp, #496 - vld1.8 {d6-d7}, [r2, : 128] - vmlal.s32 q6, d19, d21 - add r2, sp, #544 - vld1.8 {d18-d19}, [r2, : 128] - vmlal.s32 q0, d30, d8 - add r2, sp, #640 - vld1.8 {d20-d21}, [r2, : 128] - vmlal.s32 q5, d30, d29 - add r2, sp, #576 - vld1.8 {d24-d25}, [r2, : 128] - vmlal.s32 q1, d30, d28 - vadd.i64 q13, q0, q11 - vadd.i64 q14, q5, q11 - vmlal.s32 q6, d30, d9 - vshr.s64 q4, q13, #26 - vshr.s64 q13, q14, #26 - vadd.i64 q7, q7, q4 - vshl.i64 q4, q4, #26 - vadd.i64 q14, q7, q3 - vadd.i64 q9, q9, q13 - vshl.i64 q13, q13, #26 - vadd.i64 q15, q9, q3 - vsub.i64 q0, q0, q4 - vshr.s64 q4, q14, #25 - vsub.i64 q5, q5, q13 - vshr.s64 q13, q15, #25 - vadd.i64 q6, q6, q4 - vshl.i64 q4, q4, #25 - vadd.i64 q14, q6, q11 - vadd.i64 q2, q2, q13 - vsub.i64 q4, q7, q4 - vshr.s64 q7, q14, #26 - vshl.i64 q13, q13, #25 - vadd.i64 q14, q2, q11 - vadd.i64 q8, q8, q7 - vshl.i64 q7, q7, #26 - vadd.i64 q15, q8, q3 - vsub.i64 q9, q9, q13 - vshr.s64 q13, q14, #26 - vsub.i64 q6, q6, q7 - vshr.s64 q7, q15, #25 - vadd.i64 q10, q10, q13 - vshl.i64 q13, q13, #26 - vadd.i64 q14, q10, q3 - vadd.i64 q1, q1, q7 - add r2, r3, #288 - vshl.i64 q7, q7, #25 - add r4, r3, #96 - vadd.i64 q15, q1, q11 - add r2, r2, #8 - vsub.i64 q2, q2, q13 - add r4, r4, #8 - vshr.s64 q13, q14, #25 - vsub.i64 q7, q8, q7 - vshr.s64 q8, q15, #26 - vadd.i64 q14, q13, q13 - vadd.i64 q12, q12, q8 - vtrn.32 d12, d14 - vshl.i64 q8, q8, #26 - vtrn.32 d13, d15 - vadd.i64 q3, q12, q3 - vadd.i64 q0, q0, q14 - vst1.8 d12, [r2, : 64]! - vshl.i64 q7, q13, #4 - vst1.8 d13, [r4, : 64]! - vsub.i64 q1, q1, q8 - vshr.s64 q3, q3, #25 - vadd.i64 q0, q0, q7 - vadd.i64 q5, q5, q3 - vshl.i64 q3, q3, #25 - vadd.i64 q6, q5, q11 - vadd.i64 q0, q0, q13 - vshl.i64 q7, q13, #25 - vadd.i64 q8, q0, q11 - vsub.i64 q3, q12, q3 - vshr.s64 q6, q6, #26 - vsub.i64 q7, q10, q7 - vtrn.32 d2, d6 - vshr.s64 q8, q8, #26 - vtrn.32 d3, d7 - vadd.i64 q3, q9, q6 - vst1.8 d2, [r2, : 64] - vshl.i64 q6, q6, #26 - vst1.8 d3, [r4, : 64] - vadd.i64 q1, q4, q8 - vtrn.32 d4, d14 - vshl.i64 q4, q8, #26 - vtrn.32 d5, d15 - vsub.i64 q5, q5, q6 - add r2, r2, #16 - vsub.i64 q0, q0, q4 - vst1.8 d4, [r2, : 64] - add r4, r4, #16 - vst1.8 d5, [r4, : 64] - vtrn.32 d10, d6 - vtrn.32 d11, d7 - sub r2, r2, #8 - sub r4, r4, #8 - vtrn.32 d0, d2 - vtrn.32 d1, d3 - vst1.8 d10, [r2, : 64] - vst1.8 d11, [r4, : 64] - sub r2, r2, #24 - sub r4, r4, #24 - vst1.8 d0, [r2, : 64] - vst1.8 d1, [r4, : 64] - add r2, sp, #512 - add r4, r3, #144 - add r5, r3, #192 - vld1.8 {d0-d1}, [r2, : 128] - vld1.8 {d2-d3}, [r4, : 128]! - vld1.8 {d4-d5}, [r5, : 128]! - vzip.i32 q1, q2 - vld1.8 {d6-d7}, [r4, : 128]! - vld1.8 {d8-d9}, [r5, : 128]! - vshl.i32 q5, q1, #1 - vzip.i32 q3, q4 - vshl.i32 q6, q2, #1 - vld1.8 {d14}, [r4, : 64] - vshl.i32 q8, q3, #1 - vld1.8 {d15}, [r5, : 64] - vshl.i32 q9, q4, #1 - vmul.i32 d21, d7, d1 - vtrn.32 d14, d15 - vmul.i32 q11, q4, q0 - vmul.i32 q0, q7, q0 - vmull.s32 q12, d2, d2 - vmlal.s32 q12, d11, d1 - vmlal.s32 q12, d12, d0 - vmlal.s32 q12, d13, d23 - vmlal.s32 q12, d16, d22 - vmlal.s32 q12, d7, d21 - vmull.s32 q10, d2, d11 - vmlal.s32 q10, d4, d1 - vmlal.s32 q10, d13, d0 - vmlal.s32 q10, d6, d23 - vmlal.s32 q10, d17, d22 - vmull.s32 q13, d10, d4 - vmlal.s32 q13, d11, d3 - vmlal.s32 q13, d13, d1 - vmlal.s32 q13, d16, d0 - vmlal.s32 q13, d17, d23 - vmlal.s32 q13, d8, d22 - vmull.s32 q1, d10, d5 - vmlal.s32 q1, d11, d4 - vmlal.s32 q1, d6, d1 - vmlal.s32 q1, d17, d0 - vmlal.s32 q1, d8, d23 - vmull.s32 q14, d10, d6 - vmlal.s32 q14, d11, d13 - vmlal.s32 q14, d4, d4 - vmlal.s32 q14, d17, d1 - vmlal.s32 q14, d18, d0 - vmlal.s32 q14, d9, d23 - vmull.s32 q11, d10, d7 - vmlal.s32 q11, d11, d6 - vmlal.s32 q11, d12, d5 - vmlal.s32 q11, d8, d1 - vmlal.s32 q11, d19, d0 - vmull.s32 q15, d10, d8 - vmlal.s32 q15, d11, d17 - vmlal.s32 q15, d12, d6 - vmlal.s32 q15, d13, d5 - vmlal.s32 q15, d19, d1 - vmlal.s32 q15, d14, d0 - vmull.s32 q2, d10, d9 - vmlal.s32 q2, d11, d8 - vmlal.s32 q2, d12, d7 - vmlal.s32 q2, d13, d6 - vmlal.s32 q2, d14, d1 - vmull.s32 q0, d15, d1 - vmlal.s32 q0, d10, d14 - vmlal.s32 q0, d11, d19 - vmlal.s32 q0, d12, d8 - vmlal.s32 q0, d13, d17 - vmlal.s32 q0, d6, d6 - add r2, sp, #480 - vld1.8 {d18-d19}, [r2, : 128]! - vmull.s32 q3, d16, d7 - vmlal.s32 q3, d10, d15 - vmlal.s32 q3, d11, d14 - vmlal.s32 q3, d12, d9 - vmlal.s32 q3, d13, d8 - vld1.8 {d8-d9}, [r2, : 128] - vadd.i64 q5, q12, q9 - vadd.i64 q6, q15, q9 - vshr.s64 q5, q5, #26 - vshr.s64 q6, q6, #26 - vadd.i64 q7, q10, q5 - vshl.i64 q5, q5, #26 - vadd.i64 q8, q7, q4 - vadd.i64 q2, q2, q6 - vshl.i64 q6, q6, #26 - vadd.i64 q10, q2, q4 - vsub.i64 q5, q12, q5 - vshr.s64 q8, q8, #25 - vsub.i64 q6, q15, q6 - vshr.s64 q10, q10, #25 - vadd.i64 q12, q13, q8 - vshl.i64 q8, q8, #25 - vadd.i64 q13, q12, q9 - vadd.i64 q0, q0, q10 - vsub.i64 q7, q7, q8 - vshr.s64 q8, q13, #26 - vshl.i64 q10, q10, #25 - vadd.i64 q13, q0, q9 - vadd.i64 q1, q1, q8 - vshl.i64 q8, q8, #26 - vadd.i64 q15, q1, q4 - vsub.i64 q2, q2, q10 - vshr.s64 q10, q13, #26 - vsub.i64 q8, q12, q8 - vshr.s64 q12, q15, #25 - vadd.i64 q3, q3, q10 - vshl.i64 q10, q10, #26 - vadd.i64 q13, q3, q4 - vadd.i64 q14, q14, q12 - add r2, r3, #144 - vshl.i64 q12, q12, #25 - add r4, r3, #192 - vadd.i64 q15, q14, q9 - add r2, r2, #8 - vsub.i64 q0, q0, q10 - add r4, r4, #8 - vshr.s64 q10, q13, #25 - vsub.i64 q1, q1, q12 - vshr.s64 q12, q15, #26 - vadd.i64 q13, q10, q10 - vadd.i64 q11, q11, q12 - vtrn.32 d16, d2 - vshl.i64 q12, q12, #26 - vtrn.32 d17, d3 - vadd.i64 q1, q11, q4 - vadd.i64 q4, q5, q13 - vst1.8 d16, [r2, : 64]! - vshl.i64 q5, q10, #4 - vst1.8 d17, [r4, : 64]! - vsub.i64 q8, q14, q12 - vshr.s64 q1, q1, #25 - vadd.i64 q4, q4, q5 - vadd.i64 q5, q6, q1 - vshl.i64 q1, q1, #25 - vadd.i64 q6, q5, q9 - vadd.i64 q4, q4, q10 - vshl.i64 q10, q10, #25 - vadd.i64 q9, q4, q9 - vsub.i64 q1, q11, q1 - vshr.s64 q6, q6, #26 - vsub.i64 q3, q3, q10 - vtrn.32 d16, d2 - vshr.s64 q9, q9, #26 - vtrn.32 d17, d3 - vadd.i64 q1, q2, q6 - vst1.8 d16, [r2, : 64] - vshl.i64 q2, q6, #26 - vst1.8 d17, [r4, : 64] - vadd.i64 q6, q7, q9 - vtrn.32 d0, d6 - vshl.i64 q7, q9, #26 - vtrn.32 d1, d7 - vsub.i64 q2, q5, q2 - add r2, r2, #16 - vsub.i64 q3, q4, q7 - vst1.8 d0, [r2, : 64] - add r4, r4, #16 - vst1.8 d1, [r4, : 64] - vtrn.32 d4, d2 - vtrn.32 d5, d3 - sub r2, r2, #8 - sub r4, r4, #8 - vtrn.32 d6, d12 - vtrn.32 d7, d13 - vst1.8 d4, [r2, : 64] - vst1.8 d5, [r4, : 64] - sub r2, r2, #24 - sub r4, r4, #24 - vst1.8 d6, [r2, : 64] - vst1.8 d7, [r4, : 64] - add r2, r3, #336 - add r4, r3, #288 - vld1.8 {d0-d1}, [r2, : 128]! - vld1.8 {d2-d3}, [r4, : 128]! - vadd.i32 q0, q0, q1 - vld1.8 {d2-d3}, [r2, : 128]! - vld1.8 {d4-d5}, [r4, : 128]! - vadd.i32 q1, q1, q2 - add r5, r3, #288 - vld1.8 {d4}, [r2, : 64] - vld1.8 {d6}, [r4, : 64] - vadd.i32 q2, q2, q3 - vst1.8 {d0-d1}, [r5, : 128]! - vst1.8 {d2-d3}, [r5, : 128]! - vst1.8 d4, [r5, : 64] - add r2, r3, #48 - add r4, r3, #144 - vld1.8 {d0-d1}, [r4, : 128]! - vld1.8 {d2-d3}, [r4, : 128]! - vld1.8 {d4}, [r4, : 64] - add r4, r3, #288 - vld1.8 {d6-d7}, [r4, : 128]! - vtrn.32 q0, q3 - vld1.8 {d8-d9}, [r4, : 128]! - vshl.i32 q5, q0, #4 - vtrn.32 q1, q4 - vshl.i32 q6, q3, #4 - vadd.i32 q5, q5, q0 - vadd.i32 q6, q6, q3 - vshl.i32 q7, q1, #4 - vld1.8 {d5}, [r4, : 64] - vshl.i32 q8, q4, #4 - vtrn.32 d4, d5 - vadd.i32 q7, q7, q1 - vadd.i32 q8, q8, q4 - vld1.8 {d18-d19}, [r2, : 128]! - vshl.i32 q10, q2, #4 - vld1.8 {d22-d23}, [r2, : 128]! - vadd.i32 q10, q10, q2 - vld1.8 {d24}, [r2, : 64] - vadd.i32 q5, q5, q0 - add r2, r3, #240 - vld1.8 {d26-d27}, [r2, : 128]! - vadd.i32 q6, q6, q3 - vld1.8 {d28-d29}, [r2, : 128]! - vadd.i32 q8, q8, q4 - vld1.8 {d25}, [r2, : 64] - vadd.i32 q10, q10, q2 - vtrn.32 q9, q13 - vadd.i32 q7, q7, q1 - vadd.i32 q5, q5, q0 - vtrn.32 q11, q14 - vadd.i32 q6, q6, q3 - add r2, sp, #528 - vadd.i32 q10, q10, q2 - vtrn.32 d24, d25 - vst1.8 {d12-d13}, [r2, : 128]! - vshl.i32 q6, q13, #1 - vst1.8 {d20-d21}, [r2, : 128]! - vshl.i32 q10, q14, #1 - vst1.8 {d12-d13}, [r2, : 128]! - vshl.i32 q15, q12, #1 - vadd.i32 q8, q8, q4 - vext.32 d10, d31, d30, #0 - vadd.i32 q7, q7, q1 - vst1.8 {d16-d17}, [r2, : 128]! - vmull.s32 q8, d18, d5 - vmlal.s32 q8, d26, d4 - vmlal.s32 q8, d19, d9 - vmlal.s32 q8, d27, d3 - vmlal.s32 q8, d22, d8 - vmlal.s32 q8, d28, d2 - vmlal.s32 q8, d23, d7 - vmlal.s32 q8, d29, d1 - vmlal.s32 q8, d24, d6 - vmlal.s32 q8, d25, d0 - vst1.8 {d14-d15}, [r2, : 128]! - vmull.s32 q2, d18, d4 - vmlal.s32 q2, d12, d9 - vmlal.s32 q2, d13, d8 - vmlal.s32 q2, d19, d3 - vmlal.s32 q2, d22, d2 - vmlal.s32 q2, d23, d1 - vmlal.s32 q2, d24, d0 - vst1.8 {d20-d21}, [r2, : 128]! - vmull.s32 q7, d18, d9 - vmlal.s32 q7, d26, d3 - vmlal.s32 q7, d19, d8 - vmlal.s32 q7, d27, d2 - vmlal.s32 q7, d22, d7 - vmlal.s32 q7, d28, d1 - vmlal.s32 q7, d23, d6 - vmlal.s32 q7, d29, d0 - vst1.8 {d10-d11}, [r2, : 128]! - vmull.s32 q5, d18, d3 - vmlal.s32 q5, d19, d2 - vmlal.s32 q5, d22, d1 - vmlal.s32 q5, d23, d0 - vmlal.s32 q5, d12, d8 - vst1.8 {d16-d17}, [r2, : 128]! - vmull.s32 q4, d18, d8 - vmlal.s32 q4, d26, d2 - vmlal.s32 q4, d19, d7 - vmlal.s32 q4, d27, d1 - vmlal.s32 q4, d22, d6 - vmlal.s32 q4, d28, d0 - vmull.s32 q8, d18, d7 - vmlal.s32 q8, d26, d1 - vmlal.s32 q8, d19, d6 - vmlal.s32 q8, d27, d0 - add r2, sp, #544 - vld1.8 {d20-d21}, [r2, : 128] - vmlal.s32 q7, d24, d21 - vmlal.s32 q7, d25, d20 - vmlal.s32 q4, d23, d21 - vmlal.s32 q4, d29, d20 - vmlal.s32 q8, d22, d21 - vmlal.s32 q8, d28, d20 - vmlal.s32 q5, d24, d20 - vst1.8 {d14-d15}, [r2, : 128] - vmull.s32 q7, d18, d6 - vmlal.s32 q7, d26, d0 - add r2, sp, #624 - vld1.8 {d30-d31}, [r2, : 128] - vmlal.s32 q2, d30, d21 - vmlal.s32 q7, d19, d21 - vmlal.s32 q7, d27, d20 - add r2, sp, #592 - vld1.8 {d26-d27}, [r2, : 128] - vmlal.s32 q4, d25, d27 - vmlal.s32 q8, d29, d27 - vmlal.s32 q8, d25, d26 - vmlal.s32 q7, d28, d27 - vmlal.s32 q7, d29, d26 - add r2, sp, #576 - vld1.8 {d28-d29}, [r2, : 128] - vmlal.s32 q4, d24, d29 - vmlal.s32 q8, d23, d29 - vmlal.s32 q8, d24, d28 - vmlal.s32 q7, d22, d29 - vmlal.s32 q7, d23, d28 - vst1.8 {d8-d9}, [r2, : 128] - add r2, sp, #528 - vld1.8 {d8-d9}, [r2, : 128] - vmlal.s32 q7, d24, d9 - vmlal.s32 q7, d25, d31 - vmull.s32 q1, d18, d2 - vmlal.s32 q1, d19, d1 - vmlal.s32 q1, d22, d0 - vmlal.s32 q1, d24, d27 - vmlal.s32 q1, d23, d20 - vmlal.s32 q1, d12, d7 - vmlal.s32 q1, d13, d6 - vmull.s32 q6, d18, d1 - vmlal.s32 q6, d19, d0 - vmlal.s32 q6, d23, d27 - vmlal.s32 q6, d22, d20 - vmlal.s32 q6, d24, d26 - vmull.s32 q0, d18, d0 - vmlal.s32 q0, d22, d27 - vmlal.s32 q0, d23, d26 - vmlal.s32 q0, d24, d31 - vmlal.s32 q0, d19, d20 - add r2, sp, #608 - vld1.8 {d18-d19}, [r2, : 128] - vmlal.s32 q2, d18, d7 - vmlal.s32 q5, d18, d6 - vmlal.s32 q1, d18, d21 - vmlal.s32 q0, d18, d28 - vmlal.s32 q6, d18, d29 - vmlal.s32 q2, d19, d6 - vmlal.s32 q5, d19, d21 - vmlal.s32 q1, d19, d29 - vmlal.s32 q0, d19, d9 - vmlal.s32 q6, d19, d28 - add r2, sp, #560 - vld1.8 {d18-d19}, [r2, : 128] - add r2, sp, #480 - vld1.8 {d22-d23}, [r2, : 128] - vmlal.s32 q5, d19, d7 - vmlal.s32 q0, d18, d21 - vmlal.s32 q0, d19, d29 - vmlal.s32 q6, d18, d6 - add r2, sp, #496 - vld1.8 {d6-d7}, [r2, : 128] - vmlal.s32 q6, d19, d21 - add r2, sp, #544 - vld1.8 {d18-d19}, [r2, : 128] - vmlal.s32 q0, d30, d8 - add r2, sp, #640 - vld1.8 {d20-d21}, [r2, : 128] - vmlal.s32 q5, d30, d29 - add r2, sp, #576 - vld1.8 {d24-d25}, [r2, : 128] - vmlal.s32 q1, d30, d28 - vadd.i64 q13, q0, q11 - vadd.i64 q14, q5, q11 - vmlal.s32 q6, d30, d9 - vshr.s64 q4, q13, #26 - vshr.s64 q13, q14, #26 - vadd.i64 q7, q7, q4 - vshl.i64 q4, q4, #26 - vadd.i64 q14, q7, q3 - vadd.i64 q9, q9, q13 - vshl.i64 q13, q13, #26 - vadd.i64 q15, q9, q3 - vsub.i64 q0, q0, q4 - vshr.s64 q4, q14, #25 - vsub.i64 q5, q5, q13 - vshr.s64 q13, q15, #25 - vadd.i64 q6, q6, q4 - vshl.i64 q4, q4, #25 - vadd.i64 q14, q6, q11 - vadd.i64 q2, q2, q13 - vsub.i64 q4, q7, q4 - vshr.s64 q7, q14, #26 - vshl.i64 q13, q13, #25 - vadd.i64 q14, q2, q11 - vadd.i64 q8, q8, q7 - vshl.i64 q7, q7, #26 - vadd.i64 q15, q8, q3 - vsub.i64 q9, q9, q13 - vshr.s64 q13, q14, #26 - vsub.i64 q6, q6, q7 - vshr.s64 q7, q15, #25 - vadd.i64 q10, q10, q13 - vshl.i64 q13, q13, #26 - vadd.i64 q14, q10, q3 - vadd.i64 q1, q1, q7 - add r2, r3, #240 - vshl.i64 q7, q7, #25 - add r4, r3, #144 - vadd.i64 q15, q1, q11 - add r2, r2, #8 - vsub.i64 q2, q2, q13 - add r4, r4, #8 - vshr.s64 q13, q14, #25 - vsub.i64 q7, q8, q7 - vshr.s64 q8, q15, #26 - vadd.i64 q14, q13, q13 - vadd.i64 q12, q12, q8 - vtrn.32 d12, d14 - vshl.i64 q8, q8, #26 - vtrn.32 d13, d15 - vadd.i64 q3, q12, q3 - vadd.i64 q0, q0, q14 - vst1.8 d12, [r2, : 64]! - vshl.i64 q7, q13, #4 - vst1.8 d13, [r4, : 64]! - vsub.i64 q1, q1, q8 - vshr.s64 q3, q3, #25 - vadd.i64 q0, q0, q7 - vadd.i64 q5, q5, q3 - vshl.i64 q3, q3, #25 - vadd.i64 q6, q5, q11 - vadd.i64 q0, q0, q13 - vshl.i64 q7, q13, #25 - vadd.i64 q8, q0, q11 - vsub.i64 q3, q12, q3 - vshr.s64 q6, q6, #26 - vsub.i64 q7, q10, q7 - vtrn.32 d2, d6 - vshr.s64 q8, q8, #26 - vtrn.32 d3, d7 - vadd.i64 q3, q9, q6 - vst1.8 d2, [r2, : 64] - vshl.i64 q6, q6, #26 - vst1.8 d3, [r4, : 64] - vadd.i64 q1, q4, q8 - vtrn.32 d4, d14 - vshl.i64 q4, q8, #26 - vtrn.32 d5, d15 - vsub.i64 q5, q5, q6 - add r2, r2, #16 - vsub.i64 q0, q0, q4 - vst1.8 d4, [r2, : 64] - add r4, r4, #16 - vst1.8 d5, [r4, : 64] - vtrn.32 d10, d6 - vtrn.32 d11, d7 - sub r2, r2, #8 - sub r4, r4, #8 - vtrn.32 d0, d2 - vtrn.32 d1, d3 - vst1.8 d10, [r2, : 64] - vst1.8 d11, [r4, : 64] - sub r2, r2, #24 - sub r4, r4, #24 - vst1.8 d0, [r2, : 64] - vst1.8 d1, [r4, : 64] - ldr r2, [sp, #456] - ldr r4, [sp, #460] - subs r5, r2, #1 - bge .Lmainloop - add r1, r3, #144 - add r2, r3, #336 - vld1.8 {d0-d1}, [r1, : 128]! - vld1.8 {d2-d3}, [r1, : 128]! - vld1.8 {d4}, [r1, : 64] - vst1.8 {d0-d1}, [r2, : 128]! - vst1.8 {d2-d3}, [r2, : 128]! - vst1.8 d4, [r2, : 64] - movw r1, #0 -.Linvertloop: - add r2, r3, #144 - movw r4, #0 - movw r5, #2 - cmp r1, #1 - moveq r5, #1 - addeq r2, r3, #336 - addeq r4, r3, #48 - cmp r1, #2 - moveq r5, #1 - addeq r2, r3, #48 - cmp r1, #3 - moveq r5, #5 - addeq r4, r3, #336 - cmp r1, #4 - moveq r5, #10 - cmp r1, #5 - moveq r5, #20 - cmp r1, #6 - moveq r5, #10 - addeq r2, r3, #336 - addeq r4, r3, #336 - cmp r1, #7 - moveq r5, #50 - cmp r1, #8 - moveq r5, #100 - cmp r1, #9 - moveq r5, #50 - addeq r2, r3, #336 - cmp r1, #10 - moveq r5, #5 - addeq r2, r3, #48 - cmp r1, #11 - moveq r5, #0 - addeq r2, r3, #96 - add r6, r3, #144 - add r7, r3, #288 - vld1.8 {d0-d1}, [r6, : 128]! - vld1.8 {d2-d3}, [r6, : 128]! - vld1.8 {d4}, [r6, : 64] - vst1.8 {d0-d1}, [r7, : 128]! - vst1.8 {d2-d3}, [r7, : 128]! - vst1.8 d4, [r7, : 64] - cmp r5, #0 - beq .Lskipsquaringloop -.Lsquaringloop: - add r6, r3, #288 - add r7, r3, #288 - add r8, r3, #288 - vmov.i32 q0, #19 - vmov.i32 q1, #0 - vmov.i32 q2, #1 - vzip.i32 q1, q2 - vld1.8 {d4-d5}, [r7, : 128]! - vld1.8 {d6-d7}, [r7, : 128]! - vld1.8 {d9}, [r7, : 64] - vld1.8 {d10-d11}, [r6, : 128]! - add r7, sp, #384 - vld1.8 {d12-d13}, [r6, : 128]! - vmul.i32 q7, q2, q0 - vld1.8 {d8}, [r6, : 64] - vext.32 d17, d11, d10, #1 - vmul.i32 q9, q3, q0 - vext.32 d16, d10, d8, #1 - vshl.u32 q10, q5, q1 - vext.32 d22, d14, d4, #1 - vext.32 d24, d18, d6, #1 - vshl.u32 q13, q6, q1 - vshl.u32 d28, d8, d2 - vrev64.i32 d22, d22 - vmul.i32 d1, d9, d1 - vrev64.i32 d24, d24 - vext.32 d29, d8, d13, #1 - vext.32 d0, d1, d9, #1 - vrev64.i32 d0, d0 - vext.32 d2, d9, d1, #1 - vext.32 d23, d15, d5, #1 - vmull.s32 q4, d20, d4 - vrev64.i32 d23, d23 - vmlal.s32 q4, d21, d1 - vrev64.i32 d2, d2 - vmlal.s32 q4, d26, d19 - vext.32 d3, d5, d15, #1 - vmlal.s32 q4, d27, d18 - vrev64.i32 d3, d3 - vmlal.s32 q4, d28, d15 - vext.32 d14, d12, d11, #1 - vmull.s32 q5, d16, d23 - vext.32 d15, d13, d12, #1 - vmlal.s32 q5, d17, d4 - vst1.8 d8, [r7, : 64]! - vmlal.s32 q5, d14, d1 - vext.32 d12, d9, d8, #0 - vmlal.s32 q5, d15, d19 - vmov.i64 d13, #0 - vmlal.s32 q5, d29, d18 - vext.32 d25, d19, d7, #1 - vmlal.s32 q6, d20, d5 - vrev64.i32 d25, d25 - vmlal.s32 q6, d21, d4 - vst1.8 d11, [r7, : 64]! - vmlal.s32 q6, d26, d1 - vext.32 d9, d10, d10, #0 - vmlal.s32 q6, d27, d19 - vmov.i64 d8, #0 - vmlal.s32 q6, d28, d18 - vmlal.s32 q4, d16, d24 - vmlal.s32 q4, d17, d5 - vmlal.s32 q4, d14, d4 - vst1.8 d12, [r7, : 64]! - vmlal.s32 q4, d15, d1 - vext.32 d10, d13, d12, #0 - vmlal.s32 q4, d29, d19 - vmov.i64 d11, #0 - vmlal.s32 q5, d20, d6 - vmlal.s32 q5, d21, d5 - vmlal.s32 q5, d26, d4 - vext.32 d13, d8, d8, #0 - vmlal.s32 q5, d27, d1 - vmov.i64 d12, #0 - vmlal.s32 q5, d28, d19 - vst1.8 d9, [r7, : 64]! - vmlal.s32 q6, d16, d25 - vmlal.s32 q6, d17, d6 - vst1.8 d10, [r7, : 64] - vmlal.s32 q6, d14, d5 - vext.32 d8, d11, d10, #0 - vmlal.s32 q6, d15, d4 - vmov.i64 d9, #0 - vmlal.s32 q6, d29, d1 - vmlal.s32 q4, d20, d7 - vmlal.s32 q4, d21, d6 - vmlal.s32 q4, d26, d5 - vext.32 d11, d12, d12, #0 - vmlal.s32 q4, d27, d4 - vmov.i64 d10, #0 - vmlal.s32 q4, d28, d1 - vmlal.s32 q5, d16, d0 - sub r6, r7, #32 - vmlal.s32 q5, d17, d7 - vmlal.s32 q5, d14, d6 - vext.32 d30, d9, d8, #0 - vmlal.s32 q5, d15, d5 - vld1.8 {d31}, [r6, : 64]! - vmlal.s32 q5, d29, d4 - vmlal.s32 q15, d20, d0 - vext.32 d0, d6, d18, #1 - vmlal.s32 q15, d21, d25 - vrev64.i32 d0, d0 - vmlal.s32 q15, d26, d24 - vext.32 d1, d7, d19, #1 - vext.32 d7, d10, d10, #0 - vmlal.s32 q15, d27, d23 - vrev64.i32 d1, d1 - vld1.8 {d6}, [r6, : 64] - vmlal.s32 q15, d28, d22 - vmlal.s32 q3, d16, d4 - add r6, r6, #24 - vmlal.s32 q3, d17, d2 - vext.32 d4, d31, d30, #0 - vmov d17, d11 - vmlal.s32 q3, d14, d1 - vext.32 d11, d13, d13, #0 - vext.32 d13, d30, d30, #0 - vmlal.s32 q3, d15, d0 - vext.32 d1, d8, d8, #0 - vmlal.s32 q3, d29, d3 - vld1.8 {d5}, [r6, : 64] - sub r6, r6, #16 - vext.32 d10, d6, d6, #0 - vmov.i32 q1, #0xffffffff - vshl.i64 q4, q1, #25 - add r7, sp, #480 - vld1.8 {d14-d15}, [r7, : 128] - vadd.i64 q9, q2, q7 - vshl.i64 q1, q1, #26 - vshr.s64 q10, q9, #26 - vld1.8 {d0}, [r6, : 64]! - vadd.i64 q5, q5, q10 - vand q9, q9, q1 - vld1.8 {d16}, [r6, : 64]! - add r6, sp, #496 - vld1.8 {d20-d21}, [r6, : 128] - vadd.i64 q11, q5, q10 - vsub.i64 q2, q2, q9 - vshr.s64 q9, q11, #25 - vext.32 d12, d5, d4, #0 - vand q11, q11, q4 - vadd.i64 q0, q0, q9 - vmov d19, d7 - vadd.i64 q3, q0, q7 - vsub.i64 q5, q5, q11 - vshr.s64 q11, q3, #26 - vext.32 d18, d11, d10, #0 - vand q3, q3, q1 - vadd.i64 q8, q8, q11 - vadd.i64 q11, q8, q10 - vsub.i64 q0, q0, q3 - vshr.s64 q3, q11, #25 - vand q11, q11, q4 - vadd.i64 q3, q6, q3 - vadd.i64 q6, q3, q7 - vsub.i64 q8, q8, q11 - vshr.s64 q11, q6, #26 - vand q6, q6, q1 - vadd.i64 q9, q9, q11 - vadd.i64 d25, d19, d21 - vsub.i64 q3, q3, q6 - vshr.s64 d23, d25, #25 - vand q4, q12, q4 - vadd.i64 d21, d23, d23 - vshl.i64 d25, d23, #4 - vadd.i64 d21, d21, d23 - vadd.i64 d25, d25, d21 - vadd.i64 d4, d4, d25 - vzip.i32 q0, q8 - vadd.i64 d12, d4, d14 - add r6, r8, #8 - vst1.8 d0, [r6, : 64] - vsub.i64 d19, d19, d9 - add r6, r6, #16 - vst1.8 d16, [r6, : 64] - vshr.s64 d22, d12, #26 - vand q0, q6, q1 - vadd.i64 d10, d10, d22 - vzip.i32 q3, q9 - vsub.i64 d4, d4, d0 - sub r6, r6, #8 - vst1.8 d6, [r6, : 64] - add r6, r6, #16 - vst1.8 d18, [r6, : 64] - vzip.i32 q2, q5 - sub r6, r6, #32 - vst1.8 d4, [r6, : 64] - subs r5, r5, #1 - bhi .Lsquaringloop -.Lskipsquaringloop: - mov r2, r2 - add r5, r3, #288 - add r6, r3, #144 - vmov.i32 q0, #19 - vmov.i32 q1, #0 - vmov.i32 q2, #1 - vzip.i32 q1, q2 - vld1.8 {d4-d5}, [r5, : 128]! - vld1.8 {d6-d7}, [r5, : 128]! - vld1.8 {d9}, [r5, : 64] - vld1.8 {d10-d11}, [r2, : 128]! - add r5, sp, #384 - vld1.8 {d12-d13}, [r2, : 128]! - vmul.i32 q7, q2, q0 - vld1.8 {d8}, [r2, : 64] - vext.32 d17, d11, d10, #1 - vmul.i32 q9, q3, q0 - vext.32 d16, d10, d8, #1 - vshl.u32 q10, q5, q1 - vext.32 d22, d14, d4, #1 - vext.32 d24, d18, d6, #1 - vshl.u32 q13, q6, q1 - vshl.u32 d28, d8, d2 - vrev64.i32 d22, d22 - vmul.i32 d1, d9, d1 - vrev64.i32 d24, d24 - vext.32 d29, d8, d13, #1 - vext.32 d0, d1, d9, #1 - vrev64.i32 d0, d0 - vext.32 d2, d9, d1, #1 - vext.32 d23, d15, d5, #1 - vmull.s32 q4, d20, d4 - vrev64.i32 d23, d23 - vmlal.s32 q4, d21, d1 - vrev64.i32 d2, d2 - vmlal.s32 q4, d26, d19 - vext.32 d3, d5, d15, #1 - vmlal.s32 q4, d27, d18 - vrev64.i32 d3, d3 - vmlal.s32 q4, d28, d15 - vext.32 d14, d12, d11, #1 - vmull.s32 q5, d16, d23 - vext.32 d15, d13, d12, #1 - vmlal.s32 q5, d17, d4 - vst1.8 d8, [r5, : 64]! - vmlal.s32 q5, d14, d1 - vext.32 d12, d9, d8, #0 - vmlal.s32 q5, d15, d19 - vmov.i64 d13, #0 - vmlal.s32 q5, d29, d18 - vext.32 d25, d19, d7, #1 - vmlal.s32 q6, d20, d5 - vrev64.i32 d25, d25 - vmlal.s32 q6, d21, d4 - vst1.8 d11, [r5, : 64]! - vmlal.s32 q6, d26, d1 - vext.32 d9, d10, d10, #0 - vmlal.s32 q6, d27, d19 - vmov.i64 d8, #0 - vmlal.s32 q6, d28, d18 - vmlal.s32 q4, d16, d24 - vmlal.s32 q4, d17, d5 - vmlal.s32 q4, d14, d4 - vst1.8 d12, [r5, : 64]! - vmlal.s32 q4, d15, d1 - vext.32 d10, d13, d12, #0 - vmlal.s32 q4, d29, d19 - vmov.i64 d11, #0 - vmlal.s32 q5, d20, d6 - vmlal.s32 q5, d21, d5 - vmlal.s32 q5, d26, d4 - vext.32 d13, d8, d8, #0 - vmlal.s32 q5, d27, d1 - vmov.i64 d12, #0 - vmlal.s32 q5, d28, d19 - vst1.8 d9, [r5, : 64]! - vmlal.s32 q6, d16, d25 - vmlal.s32 q6, d17, d6 - vst1.8 d10, [r5, : 64] - vmlal.s32 q6, d14, d5 - vext.32 d8, d11, d10, #0 - vmlal.s32 q6, d15, d4 - vmov.i64 d9, #0 - vmlal.s32 q6, d29, d1 - vmlal.s32 q4, d20, d7 - vmlal.s32 q4, d21, d6 - vmlal.s32 q4, d26, d5 - vext.32 d11, d12, d12, #0 - vmlal.s32 q4, d27, d4 - vmov.i64 d10, #0 - vmlal.s32 q4, d28, d1 - vmlal.s32 q5, d16, d0 - sub r2, r5, #32 - vmlal.s32 q5, d17, d7 - vmlal.s32 q5, d14, d6 - vext.32 d30, d9, d8, #0 - vmlal.s32 q5, d15, d5 - vld1.8 {d31}, [r2, : 64]! - vmlal.s32 q5, d29, d4 - vmlal.s32 q15, d20, d0 - vext.32 d0, d6, d18, #1 - vmlal.s32 q15, d21, d25 - vrev64.i32 d0, d0 - vmlal.s32 q15, d26, d24 - vext.32 d1, d7, d19, #1 - vext.32 d7, d10, d10, #0 - vmlal.s32 q15, d27, d23 - vrev64.i32 d1, d1 - vld1.8 {d6}, [r2, : 64] - vmlal.s32 q15, d28, d22 - vmlal.s32 q3, d16, d4 - add r2, r2, #24 - vmlal.s32 q3, d17, d2 - vext.32 d4, d31, d30, #0 - vmov d17, d11 - vmlal.s32 q3, d14, d1 - vext.32 d11, d13, d13, #0 - vext.32 d13, d30, d30, #0 - vmlal.s32 q3, d15, d0 - vext.32 d1, d8, d8, #0 - vmlal.s32 q3, d29, d3 - vld1.8 {d5}, [r2, : 64] - sub r2, r2, #16 - vext.32 d10, d6, d6, #0 - vmov.i32 q1, #0xffffffff - vshl.i64 q4, q1, #25 - add r5, sp, #480 - vld1.8 {d14-d15}, [r5, : 128] - vadd.i64 q9, q2, q7 - vshl.i64 q1, q1, #26 - vshr.s64 q10, q9, #26 - vld1.8 {d0}, [r2, : 64]! - vadd.i64 q5, q5, q10 - vand q9, q9, q1 - vld1.8 {d16}, [r2, : 64]! - add r2, sp, #496 - vld1.8 {d20-d21}, [r2, : 128] - vadd.i64 q11, q5, q10 - vsub.i64 q2, q2, q9 - vshr.s64 q9, q11, #25 - vext.32 d12, d5, d4, #0 - vand q11, q11, q4 - vadd.i64 q0, q0, q9 - vmov d19, d7 - vadd.i64 q3, q0, q7 - vsub.i64 q5, q5, q11 - vshr.s64 q11, q3, #26 - vext.32 d18, d11, d10, #0 - vand q3, q3, q1 - vadd.i64 q8, q8, q11 - vadd.i64 q11, q8, q10 - vsub.i64 q0, q0, q3 - vshr.s64 q3, q11, #25 - vand q11, q11, q4 - vadd.i64 q3, q6, q3 - vadd.i64 q6, q3, q7 - vsub.i64 q8, q8, q11 - vshr.s64 q11, q6, #26 - vand q6, q6, q1 - vadd.i64 q9, q9, q11 - vadd.i64 d25, d19, d21 - vsub.i64 q3, q3, q6 - vshr.s64 d23, d25, #25 - vand q4, q12, q4 - vadd.i64 d21, d23, d23 - vshl.i64 d25, d23, #4 - vadd.i64 d21, d21, d23 - vadd.i64 d25, d25, d21 - vadd.i64 d4, d4, d25 - vzip.i32 q0, q8 - vadd.i64 d12, d4, d14 - add r2, r6, #8 - vst1.8 d0, [r2, : 64] - vsub.i64 d19, d19, d9 - add r2, r2, #16 - vst1.8 d16, [r2, : 64] - vshr.s64 d22, d12, #26 - vand q0, q6, q1 - vadd.i64 d10, d10, d22 - vzip.i32 q3, q9 - vsub.i64 d4, d4, d0 - sub r2, r2, #8 - vst1.8 d6, [r2, : 64] - add r2, r2, #16 - vst1.8 d18, [r2, : 64] - vzip.i32 q2, q5 - sub r2, r2, #32 - vst1.8 d4, [r2, : 64] - cmp r4, #0 - beq .Lskippostcopy - add r2, r3, #144 - mov r4, r4 - vld1.8 {d0-d1}, [r2, : 128]! - vld1.8 {d2-d3}, [r2, : 128]! - vld1.8 {d4}, [r2, : 64] - vst1.8 {d0-d1}, [r4, : 128]! - vst1.8 {d2-d3}, [r4, : 128]! - vst1.8 d4, [r4, : 64] -.Lskippostcopy: - cmp r1, #1 - bne .Lskipfinalcopy - add r2, r3, #288 - add r4, r3, #144 - vld1.8 {d0-d1}, [r2, : 128]! - vld1.8 {d2-d3}, [r2, : 128]! - vld1.8 {d4}, [r2, : 64] - vst1.8 {d0-d1}, [r4, : 128]! - vst1.8 {d2-d3}, [r4, : 128]! - vst1.8 d4, [r4, : 64] -.Lskipfinalcopy: - add r1, r1, #1 - cmp r1, #12 - blo .Linvertloop - add r1, r3, #144 - ldr r2, [r1], #4 - ldr r3, [r1], #4 - ldr r4, [r1], #4 - ldr r5, [r1], #4 - ldr r6, [r1], #4 - ldr r7, [r1], #4 - ldr r8, [r1], #4 - ldr r9, [r1], #4 - ldr r10, [r1], #4 - ldr r1, [r1] - add r11, r1, r1, LSL #4 - add r11, r11, r1, LSL #1 - add r11, r11, #16777216 - mov r11, r11, ASR #25 - add r11, r11, r2 - mov r11, r11, ASR #26 - add r11, r11, r3 - mov r11, r11, ASR #25 - add r11, r11, r4 - mov r11, r11, ASR #26 - add r11, r11, r5 - mov r11, r11, ASR #25 - add r11, r11, r6 - mov r11, r11, ASR #26 - add r11, r11, r7 - mov r11, r11, ASR #25 - add r11, r11, r8 - mov r11, r11, ASR #26 - add r11, r11, r9 - mov r11, r11, ASR #25 - add r11, r11, r10 - mov r11, r11, ASR #26 - add r11, r11, r1 - mov r11, r11, ASR #25 - add r2, r2, r11 - add r2, r2, r11, LSL #1 - add r2, r2, r11, LSL #4 - mov r11, r2, ASR #26 - add r3, r3, r11 - sub r2, r2, r11, LSL #26 - mov r11, r3, ASR #25 - add r4, r4, r11 - sub r3, r3, r11, LSL #25 - mov r11, r4, ASR #26 - add r5, r5, r11 - sub r4, r4, r11, LSL #26 - mov r11, r5, ASR #25 - add r6, r6, r11 - sub r5, r5, r11, LSL #25 - mov r11, r6, ASR #26 - add r7, r7, r11 - sub r6, r6, r11, LSL #26 - mov r11, r7, ASR #25 - add r8, r8, r11 - sub r7, r7, r11, LSL #25 - mov r11, r8, ASR #26 - add r9, r9, r11 - sub r8, r8, r11, LSL #26 - mov r11, r9, ASR #25 - add r10, r10, r11 - sub r9, r9, r11, LSL #25 - mov r11, r10, ASR #26 - add r1, r1, r11 - sub r10, r10, r11, LSL #26 - mov r11, r1, ASR #25 - sub r1, r1, r11, LSL #25 - add r2, r2, r3, LSL #26 - mov r3, r3, LSR #6 - add r3, r3, r4, LSL #19 - mov r4, r4, LSR #13 - add r4, r4, r5, LSL #13 - mov r5, r5, LSR #19 - add r5, r5, r6, LSL #6 - add r6, r7, r8, LSL #25 - mov r7, r8, LSR #7 - add r7, r7, r9, LSL #19 - mov r8, r9, LSR #13 - add r8, r8, r10, LSL #12 - mov r9, r10, LSR #20 - add r1, r9, r1, LSL #6 - str r2, [r0] - str r3, [r0, #4] - str r4, [r0, #8] - str r5, [r0, #12] - str r6, [r0, #16] - str r7, [r0, #20] - str r8, [r0, #24] - str r1, [r0, #28] - movw r0, #0 - mov sp, ip - pop {r4-r11, pc} -ENDPROC(curve25519_neon) diff --git a/arch/arm/crypto/curve25519-glue.c b/arch/arm/crypto/curve25519-glue.c deleted file mode 100644 index e7b87e09dd99f4..00000000000000 --- a/arch/arm/crypto/curve25519-glue.c +++ /dev/null @@ -1,137 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR MIT -/* - * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. - * - * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This - * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been - * manually reworked for use in kernel space. - */ - -#include <asm/hwcap.h> -#include <asm/neon.h> -#include <asm/simd.h> -#include <crypto/internal/kpp.h> -#include <crypto/internal/simd.h> -#include <linux/types.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/jump_label.h> -#include <linux/scatterlist.h> -#include <crypto/curve25519.h> - -asmlinkage void curve25519_neon(u8 mypublic[CURVE25519_KEY_SIZE], - const u8 secret[CURVE25519_KEY_SIZE], - const u8 basepoint[CURVE25519_KEY_SIZE]); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); - -void curve25519_arch(u8 out[CURVE25519_KEY_SIZE], - const u8 scalar[CURVE25519_KEY_SIZE], - const u8 point[CURVE25519_KEY_SIZE]) -{ - if (static_branch_likely(&have_neon) && crypto_simd_usable()) { - kernel_neon_begin(); - curve25519_neon(out, scalar, point); - kernel_neon_end(); - } else { - curve25519_generic(out, scalar, point); - } -} -EXPORT_SYMBOL(curve25519_arch); - -void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], - const u8 secret[CURVE25519_KEY_SIZE]) -{ - return curve25519_arch(pub, secret, curve25519_base_point); -} -EXPORT_SYMBOL(curve25519_base_arch); - -static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf, - unsigned int len) -{ - u8 *secret = kpp_tfm_ctx(tfm); - - if (!len) - curve25519_generate_secret(secret); - else if (len == CURVE25519_KEY_SIZE && - crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE)) - memcpy(secret, buf, CURVE25519_KEY_SIZE); - else - return -EINVAL; - return 0; -} - -static int curve25519_compute_value(struct kpp_request *req) -{ - struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); - const u8 *secret = kpp_tfm_ctx(tfm); - u8 public_key[CURVE25519_KEY_SIZE]; - u8 buf[CURVE25519_KEY_SIZE]; - int copied, nbytes; - u8 const *bp; - - if (req->src) { - copied = sg_copy_to_buffer(req->src, - sg_nents_for_len(req->src, - CURVE25519_KEY_SIZE), - public_key, CURVE25519_KEY_SIZE); - if (copied != CURVE25519_KEY_SIZE) - return -EINVAL; - bp = public_key; - } else { - bp = curve25519_base_point; - } - - curve25519_arch(buf, secret, bp); - - /* might want less than we've got */ - nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len); - copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst, - nbytes), - buf, nbytes); - if (copied != nbytes) - return -EINVAL; - return 0; -} - -static unsigned int curve25519_max_size(struct crypto_kpp *tfm) -{ - return CURVE25519_KEY_SIZE; -} - -static struct kpp_alg curve25519_alg = { - .base.cra_name = "curve25519", - .base.cra_driver_name = "curve25519-neon", - .base.cra_priority = 200, - .base.cra_module = THIS_MODULE, - .base.cra_ctxsize = CURVE25519_KEY_SIZE, - - .set_secret = curve25519_set_secret, - .generate_public_key = curve25519_compute_value, - .compute_shared_secret = curve25519_compute_value, - .max_size = curve25519_max_size, -}; - -static int __init arm_curve25519_init(void) -{ - if (elf_hwcap & HWCAP_NEON) { - static_branch_enable(&have_neon); - return IS_REACHABLE(CONFIG_CRYPTO_KPP) ? - crypto_register_kpp(&curve25519_alg) : 0; - } - return 0; -} - -static void __exit arm_curve25519_exit(void) -{ - if (IS_REACHABLE(CONFIG_CRYPTO_KPP) && elf_hwcap & HWCAP_NEON) - crypto_unregister_kpp(&curve25519_alg); -} - -module_init(arm_curve25519_init); -module_exit(arm_curve25519_exit); - -MODULE_ALIAS_CRYPTO("curve25519"); -MODULE_ALIAS_CRYPTO("curve25519-neon"); -MODULE_DESCRIPTION("Public key crypto: Curve25519 (NEON-accelerated)"); -MODULE_LICENSE("GPL v2"); diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index b5546a3ac9c5bd..5bc610c1e13a5b 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -559,7 +559,6 @@ CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECDSA=m CONFIG_CRYPTO_ECRDSA=m -CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 4ea0d686e28eaa..0f239c56bfce26 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -516,7 +516,6 @@ CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECDSA=m CONFIG_CRYPTO_ECRDSA=m -CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 0698d9d4b04e8c..917d222d02c40b 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -536,7 +536,6 @@ CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECDSA=m CONFIG_CRYPTO_ECRDSA=m -CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 45d1ee0860e531..ba58a395fa038c 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -508,7 +508,6 @@ CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECDSA=m CONFIG_CRYPTO_ECRDSA=m -CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index e5794b906b655f..81273266ad83f9 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -518,7 +518,6 @@ CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECDSA=m CONFIG_CRYPTO_ECRDSA=m -CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index fb84ba4c135006..81ec471d7c84d1 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -535,7 +535,6 @@ CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECDSA=m CONFIG_CRYPTO_ECRDSA=m -CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 9a05e05523fc65..76166baed5bf5f 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -622,7 +622,6 @@ CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECDSA=m CONFIG_CRYPTO_ECRDSA=m -CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 0e30aa574a2b49..8ebe527e7777a8 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -508,7 +508,6 @@ CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECDSA=m CONFIG_CRYPTO_ECRDSA=m -CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index d6f5600d941065..82e59dc8f4f3fe 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -509,7 +509,6 @@ CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECDSA=m CONFIG_CRYPTO_ECRDSA=m -CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 16f0adff4ada7f..135848c83a7dab 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -525,7 +525,6 @@ CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECDSA=m CONFIG_CRYPTO_ECRDSA=m -CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 5b0e273be74bdb..2194b5a059d490 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -506,7 +506,6 @@ CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECDSA=m CONFIG_CRYPTO_ECRDSA=m -CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 3851d6720fac45..208e1a19621ac6 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -506,7 +506,6 @@ CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECDSA=m CONFIG_CRYPTO_ECRDSA=m -CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m diff --git a/arch/mips/cavium-octeon/Makefile b/arch/mips/cavium-octeon/Makefile index 2a59265788413e..ab84ede0cbe0e8 100644 --- a/arch/mips/cavium-octeon/Makefile +++ b/arch/mips/cavium-octeon/Makefile @@ -11,9 +11,9 @@ obj-y := cpu.o setup.o octeon-platform.o octeon-irq.o csrc-octeon.o obj-y += dma-octeon.o +obj-y += octeon-crypto.o obj-y += octeon-memcpy.o obj-y += executive/ -obj-y += crypto/ obj-$(CONFIG_MTD) += flash_setup.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/mips/cavium-octeon/crypto/Makefile b/arch/mips/cavium-octeon/crypto/Makefile deleted file mode 100644 index 83f2f5dd93cccd..00000000000000 --- a/arch/mips/cavium-octeon/crypto/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# OCTEON-specific crypto modules. -# - -obj-y += octeon-crypto.o - -obj-$(CONFIG_CRYPTO_MD5_OCTEON) += octeon-md5.o diff --git a/arch/mips/cavium-octeon/crypto/octeon-md5.c b/arch/mips/cavium-octeon/crypto/octeon-md5.c deleted file mode 100644 index a8ce831e2cebd9..00000000000000 --- a/arch/mips/cavium-octeon/crypto/octeon-md5.c +++ /dev/null @@ -1,214 +0,0 @@ -/* - * Cryptographic API. - * - * MD5 Message Digest Algorithm (RFC1321). - * - * Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>. - * - * Based on crypto/md5.c, which is: - * - * Derived from cryptoapi implementation, originally based on the - * public domain implementation written by Colin Plumb in 1993. - * - * Copyright (c) Cryptoapi developers. - * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - */ - -#include <asm/octeon/crypto.h> -#include <asm/octeon/octeon.h> -#include <crypto/internal/hash.h> -#include <crypto/md5.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/string.h> -#include <linux/unaligned.h> - -struct octeon_md5_state { - __le32 hash[MD5_HASH_WORDS]; - u64 byte_count; -}; - -/* - * We pass everything as 64-bit. OCTEON can handle misaligned data. - */ - -static void octeon_md5_store_hash(struct octeon_md5_state *ctx) -{ - u64 *hash = (u64 *)ctx->hash; - - write_octeon_64bit_hash_dword(hash[0], 0); - write_octeon_64bit_hash_dword(hash[1], 1); -} - -static void octeon_md5_read_hash(struct octeon_md5_state *ctx) -{ - u64 *hash = (u64 *)ctx->hash; - - hash[0] = read_octeon_64bit_hash_dword(0); - hash[1] = read_octeon_64bit_hash_dword(1); -} - -static void octeon_md5_transform(const void *_block) -{ - const u64 *block = _block; - - write_octeon_64bit_block_dword(block[0], 0); - write_octeon_64bit_block_dword(block[1], 1); - write_octeon_64bit_block_dword(block[2], 2); - write_octeon_64bit_block_dword(block[3], 3); - write_octeon_64bit_block_dword(block[4], 4); - write_octeon_64bit_block_dword(block[5], 5); - write_octeon_64bit_block_dword(block[6], 6); - octeon_md5_start(block[7]); -} - -static int octeon_md5_init(struct shash_desc *desc) -{ - struct octeon_md5_state *mctx = shash_desc_ctx(desc); - - mctx->hash[0] = cpu_to_le32(MD5_H0); - mctx->hash[1] = cpu_to_le32(MD5_H1); - mctx->hash[2] = cpu_to_le32(MD5_H2); - mctx->hash[3] = cpu_to_le32(MD5_H3); - mctx->byte_count = 0; - - return 0; -} - -static int octeon_md5_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct octeon_md5_state *mctx = shash_desc_ctx(desc); - struct octeon_cop2_state state; - unsigned long flags; - - mctx->byte_count += len; - flags = octeon_crypto_enable(&state); - octeon_md5_store_hash(mctx); - - do { - octeon_md5_transform(data); - data += MD5_HMAC_BLOCK_SIZE; - len -= MD5_HMAC_BLOCK_SIZE; - } while (len >= MD5_HMAC_BLOCK_SIZE); - - octeon_md5_read_hash(mctx); - octeon_crypto_disable(&state, flags); - mctx->byte_count -= len; - return len; -} - -static int octeon_md5_finup(struct shash_desc *desc, const u8 *src, - unsigned int offset, u8 *out) -{ - struct octeon_md5_state *mctx = shash_desc_ctx(desc); - int padding = 56 - (offset + 1); - struct octeon_cop2_state state; - u32 block[MD5_BLOCK_WORDS]; - unsigned long flags; - char *p; - - p = memcpy(block, src, offset); - p += offset; - *p++ = 0x80; - - flags = octeon_crypto_enable(&state); - octeon_md5_store_hash(mctx); - - if (padding < 0) { - memset(p, 0x00, padding + sizeof(u64)); - octeon_md5_transform(block); - p = (char *)block; - padding = 56; - } - - memset(p, 0, padding); - mctx->byte_count += offset; - block[14] = mctx->byte_count << 3; - block[15] = mctx->byte_count >> 29; - cpu_to_le32_array(block + 14, 2); - octeon_md5_transform(block); - - octeon_md5_read_hash(mctx); - octeon_crypto_disable(&state, flags); - - memzero_explicit(block, sizeof(block)); - memcpy(out, mctx->hash, sizeof(mctx->hash)); - - return 0; -} - -static int octeon_md5_export(struct shash_desc *desc, void *out) -{ - struct octeon_md5_state *ctx = shash_desc_ctx(desc); - union { - u8 *u8; - u32 *u32; - u64 *u64; - } p = { .u8 = out }; - int i; - - for (i = 0; i < MD5_HASH_WORDS; i++) - put_unaligned(le32_to_cpu(ctx->hash[i]), p.u32++); - put_unaligned(ctx->byte_count, p.u64); - return 0; -} - -static int octeon_md5_import(struct shash_desc *desc, const void *in) -{ - struct octeon_md5_state *ctx = shash_desc_ctx(desc); - union { - const u8 *u8; - const u32 *u32; - const u64 *u64; - } p = { .u8 = in }; - int i; - - for (i = 0; i < MD5_HASH_WORDS; i++) - ctx->hash[i] = cpu_to_le32(get_unaligned(p.u32++)); - ctx->byte_count = get_unaligned(p.u64); - return 0; -} - -static struct shash_alg alg = { - .digestsize = MD5_DIGEST_SIZE, - .init = octeon_md5_init, - .update = octeon_md5_update, - .finup = octeon_md5_finup, - .export = octeon_md5_export, - .import = octeon_md5_import, - .statesize = MD5_STATE_SIZE, - .descsize = sizeof(struct octeon_md5_state), - .base = { - .cra_name = "md5", - .cra_driver_name= "octeon-md5", - .cra_priority = OCTEON_CR_OPCODE_PRIORITY, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .cra_blocksize = MD5_HMAC_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static int __init md5_mod_init(void) -{ - if (!octeon_has_crypto()) - return -ENOTSUPP; - return crypto_register_shash(&alg); -} - -static void __exit md5_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_init(md5_mod_init); -module_exit(md5_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("MD5 Message Digest Algorithm (OCTEON)"); -MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>"); diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.c b/arch/mips/cavium-octeon/octeon-crypto.c index 0ff8559391f5b7..0ff8559391f5b7 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-crypto.c +++ b/arch/mips/cavium-octeon/octeon-crypto.c diff --git a/arch/mips/configs/cavium_octeon_defconfig b/arch/mips/configs/cavium_octeon_defconfig index 3f50e1d78894a1..68c363366bceb8 100644 --- a/arch/mips/configs/cavium_octeon_defconfig +++ b/arch/mips/configs/cavium_octeon_defconfig @@ -155,7 +155,6 @@ CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_MD5_OCTEON=y CONFIG_CRYPTO_DES=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_FS=y diff --git a/arch/mips/crypto/Kconfig b/arch/mips/crypto/Kconfig index 7b91f4ec65bffb..6a5bd5074867e0 100644 --- a/arch/mips/crypto/Kconfig +++ b/arch/mips/crypto/Kconfig @@ -2,14 +2,4 @@ menu "Accelerated Cryptographic Algorithms for CPU (mips)" -config CRYPTO_MD5_OCTEON - tristate "Digests: MD5 (OCTEON)" - depends on CPU_CAVIUM_OCTEON - select CRYPTO_MD5 - select CRYPTO_HASH - help - MD5 message digest algorithm (RFC1321) - - Architecture: mips OCTEON using crypto instructions, when available - endmenu diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index d06388b0f66e31..bd4685612de6dd 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -320,7 +320,6 @@ CONFIG_XMON=y CONFIG_CRYPTO_BENCHMARK=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_MD5_PPC=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_WP512=m diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index ce34597e9f3e14..2d92c11eea7e47 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -387,7 +387,6 @@ CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_LZO=m -CONFIG_CRYPTO_MD5_PPC=m CONFIG_CRYPTO_AES_GCM_P10=m CONFIG_CRYPTO_DEV_NX=y CONFIG_CRYPTO_DEV_NX_ENCRYPT=m diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig index cfe39fc221cf81..662aed46f9c795 100644 --- a/arch/powerpc/crypto/Kconfig +++ b/arch/powerpc/crypto/Kconfig @@ -2,27 +2,6 @@ menu "Accelerated Cryptographic Algorithms for CPU (powerpc)" -config CRYPTO_CURVE25519_PPC64 - tristate - depends on PPC64 && CPU_LITTLE_ENDIAN - select CRYPTO_KPP - select CRYPTO_LIB_CURVE25519_GENERIC - select CRYPTO_ARCH_HAVE_LIB_CURVE25519 - default CRYPTO_LIB_CURVE25519_INTERNAL - help - Curve25519 algorithm - - Architecture: PowerPC64 - - Little-endian - -config CRYPTO_MD5_PPC - tristate "Digests: MD5" - select CRYPTO_HASH - help - MD5 message digest algorithm (RFC1321) - - Architecture: powerpc - config CRYPTO_AES_PPC_SPE tristate "Ciphers: AES, modes: ECB/CBC/CTR/XTS (SPE)" depends on SPE diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile index bc8fd27344b8bb..5960e5300db71e 100644 --- a/arch/powerpc/crypto/Makefile +++ b/arch/powerpc/crypto/Makefile @@ -6,16 +6,12 @@ # obj-$(CONFIG_CRYPTO_AES_PPC_SPE) += aes-ppc-spe.o -obj-$(CONFIG_CRYPTO_MD5_PPC) += md5-ppc.o obj-$(CONFIG_CRYPTO_AES_GCM_P10) += aes-gcm-p10-crypto.o obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o -obj-$(CONFIG_CRYPTO_CURVE25519_PPC64) += curve25519-ppc64le.o aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o -md5-ppc-y := md5-asm.o md5-glue.o aes-gcm-p10-crypto-y := aes-gcm-p10-glue.o aes-gcm-p10.o ghashp10-ppc.o aesp10-ppc.o vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o -curve25519-ppc64le-y := curve25519-ppc64le-core.o curve25519-ppc64le_asm.o ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) override flavour := linux-ppc64le diff --git a/arch/powerpc/crypto/curve25519-ppc64le-core.c b/arch/powerpc/crypto/curve25519-ppc64le-core.c deleted file mode 100644 index f7810be0b292b7..00000000000000 --- a/arch/powerpc/crypto/curve25519-ppc64le-core.c +++ /dev/null @@ -1,300 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2024- IBM Corp. - * - * X25519 scalar multiplication with 51 bits limbs for PPC64le. - * Based on RFC7748 and AArch64 optimized implementation for X25519 - * - Algorithm 1 Scalar multiplication of a variable point - */ - -#include <crypto/curve25519.h> -#include <crypto/internal/kpp.h> - -#include <linux/types.h> -#include <linux/jump_label.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/scatterlist.h> - -#include <linux/cpufeature.h> -#include <linux/processor.h> - -typedef uint64_t fe51[5]; - -asmlinkage void x25519_fe51_mul(fe51 h, const fe51 f, const fe51 g); -asmlinkage void x25519_fe51_sqr(fe51 h, const fe51 f); -asmlinkage void x25519_fe51_mul121666(fe51 h, fe51 f); -asmlinkage void x25519_fe51_sqr_times(fe51 h, const fe51 f, int n); -asmlinkage void x25519_fe51_frombytes(fe51 h, const uint8_t *s); -asmlinkage void x25519_fe51_tobytes(uint8_t *s, const fe51 h); -asmlinkage void x25519_cswap(fe51 p, fe51 q, unsigned int bit); - -#define fmul x25519_fe51_mul -#define fsqr x25519_fe51_sqr -#define fmul121666 x25519_fe51_mul121666 -#define fe51_tobytes x25519_fe51_tobytes - -static void fadd(fe51 h, const fe51 f, const fe51 g) -{ - h[0] = f[0] + g[0]; - h[1] = f[1] + g[1]; - h[2] = f[2] + g[2]; - h[3] = f[3] + g[3]; - h[4] = f[4] + g[4]; -} - -/* - * Prime = 2 ** 255 - 19, 255 bits - * (0x7fffffff ffffffff ffffffff ffffffff ffffffff ffffffff ffffffff ffffffed) - * - * Prime in 5 51-bit limbs - */ -static fe51 prime51 = { 0x7ffffffffffed, 0x7ffffffffffff, 0x7ffffffffffff, 0x7ffffffffffff, 0x7ffffffffffff}; - -static void fsub(fe51 h, const fe51 f, const fe51 g) -{ - h[0] = (f[0] + ((prime51[0] * 2))) - g[0]; - h[1] = (f[1] + ((prime51[1] * 2))) - g[1]; - h[2] = (f[2] + ((prime51[2] * 2))) - g[2]; - h[3] = (f[3] + ((prime51[3] * 2))) - g[3]; - h[4] = (f[4] + ((prime51[4] * 2))) - g[4]; -} - -static void fe51_frombytes(fe51 h, const uint8_t *s) -{ - /* - * Make sure 64-bit aligned. - */ - unsigned char sbuf[32+8]; - unsigned char *sb = PTR_ALIGN((void *)sbuf, 8); - - memcpy(sb, s, 32); - x25519_fe51_frombytes(h, sb); -} - -static void finv(fe51 o, const fe51 i) -{ - fe51 a0, b, c, t00; - - fsqr(a0, i); - x25519_fe51_sqr_times(t00, a0, 2); - - fmul(b, t00, i); - fmul(a0, b, a0); - - fsqr(t00, a0); - - fmul(b, t00, b); - x25519_fe51_sqr_times(t00, b, 5); - - fmul(b, t00, b); - x25519_fe51_sqr_times(t00, b, 10); - - fmul(c, t00, b); - x25519_fe51_sqr_times(t00, c, 20); - - fmul(t00, t00, c); - x25519_fe51_sqr_times(t00, t00, 10); - - fmul(b, t00, b); - x25519_fe51_sqr_times(t00, b, 50); - - fmul(c, t00, b); - x25519_fe51_sqr_times(t00, c, 100); - - fmul(t00, t00, c); - x25519_fe51_sqr_times(t00, t00, 50); - - fmul(t00, t00, b); - x25519_fe51_sqr_times(t00, t00, 5); - - fmul(o, t00, a0); -} - -static void curve25519_fe51(uint8_t out[32], const uint8_t scalar[32], - const uint8_t point[32]) -{ - fe51 x1, x2, z2, x3, z3; - uint8_t s[32]; - unsigned int swap = 0; - int i; - - memcpy(s, scalar, 32); - s[0] &= 0xf8; - s[31] &= 0x7f; - s[31] |= 0x40; - fe51_frombytes(x1, point); - - z2[0] = z2[1] = z2[2] = z2[3] = z2[4] = 0; - x3[0] = x1[0]; - x3[1] = x1[1]; - x3[2] = x1[2]; - x3[3] = x1[3]; - x3[4] = x1[4]; - - x2[0] = z3[0] = 1; - x2[1] = z3[1] = 0; - x2[2] = z3[2] = 0; - x2[3] = z3[3] = 0; - x2[4] = z3[4] = 0; - - for (i = 254; i >= 0; --i) { - unsigned int k_t = 1 & (s[i / 8] >> (i & 7)); - fe51 a, b, c, d, e; - fe51 da, cb, aa, bb; - fe51 dacb_p, dacb_m; - - swap ^= k_t; - x25519_cswap(x2, x3, swap); - x25519_cswap(z2, z3, swap); - swap = k_t; - - fsub(b, x2, z2); // B = x_2 - z_2 - fadd(a, x2, z2); // A = x_2 + z_2 - fsub(d, x3, z3); // D = x_3 - z_3 - fadd(c, x3, z3); // C = x_3 + z_3 - - fsqr(bb, b); // BB = B^2 - fsqr(aa, a); // AA = A^2 - fmul(da, d, a); // DA = D * A - fmul(cb, c, b); // CB = C * B - - fsub(e, aa, bb); // E = AA - BB - fmul(x2, aa, bb); // x2 = AA * BB - fadd(dacb_p, da, cb); // DA + CB - fsub(dacb_m, da, cb); // DA - CB - - fmul121666(z3, e); // 121666 * E - fsqr(z2, dacb_m); // (DA - CB)^2 - fsqr(x3, dacb_p); // x3 = (DA + CB)^2 - fadd(b, bb, z3); // BB + 121666 * E - fmul(z3, x1, z2); // z3 = x1 * (DA - CB)^2 - fmul(z2, e, b); // z2 = e * (BB + (DA + CB)^2) - } - - finv(z2, z2); - fmul(x2, x2, z2); - fe51_tobytes(out, x2); -} - -void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE], - const u8 secret[CURVE25519_KEY_SIZE], - const u8 basepoint[CURVE25519_KEY_SIZE]) -{ - curve25519_fe51(mypublic, secret, basepoint); -} -EXPORT_SYMBOL(curve25519_arch); - -void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], - const u8 secret[CURVE25519_KEY_SIZE]) -{ - curve25519_fe51(pub, secret, curve25519_base_point); -} -EXPORT_SYMBOL(curve25519_base_arch); - -static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf, - unsigned int len) -{ - u8 *secret = kpp_tfm_ctx(tfm); - - if (!len) - curve25519_generate_secret(secret); - else if (len == CURVE25519_KEY_SIZE && - crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE)) - memcpy(secret, buf, CURVE25519_KEY_SIZE); - else - return -EINVAL; - return 0; -} - -static int curve25519_generate_public_key(struct kpp_request *req) -{ - struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); - const u8 *secret = kpp_tfm_ctx(tfm); - u8 buf[CURVE25519_KEY_SIZE]; - int copied, nbytes; - - if (req->src) - return -EINVAL; - - curve25519_base_arch(buf, secret); - - /* might want less than we've got */ - nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len); - copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst, - nbytes), - buf, nbytes); - if (copied != nbytes) - return -EINVAL; - return 0; -} - -static int curve25519_compute_shared_secret(struct kpp_request *req) -{ - struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); - const u8 *secret = kpp_tfm_ctx(tfm); - u8 public_key[CURVE25519_KEY_SIZE]; - u8 buf[CURVE25519_KEY_SIZE]; - int copied, nbytes; - - if (!req->src) - return -EINVAL; - - copied = sg_copy_to_buffer(req->src, - sg_nents_for_len(req->src, - CURVE25519_KEY_SIZE), - public_key, CURVE25519_KEY_SIZE); - if (copied != CURVE25519_KEY_SIZE) - return -EINVAL; - - curve25519_arch(buf, secret, public_key); - - /* might want less than we've got */ - nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len); - copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst, - nbytes), - buf, nbytes); - if (copied != nbytes) - return -EINVAL; - return 0; -} - -static unsigned int curve25519_max_size(struct crypto_kpp *tfm) -{ - return CURVE25519_KEY_SIZE; -} - -static struct kpp_alg curve25519_alg = { - .base.cra_name = "curve25519", - .base.cra_driver_name = "curve25519-ppc64le", - .base.cra_priority = 200, - .base.cra_module = THIS_MODULE, - .base.cra_ctxsize = CURVE25519_KEY_SIZE, - - .set_secret = curve25519_set_secret, - .generate_public_key = curve25519_generate_public_key, - .compute_shared_secret = curve25519_compute_shared_secret, - .max_size = curve25519_max_size, -}; - - -static int __init curve25519_mod_init(void) -{ - return IS_REACHABLE(CONFIG_CRYPTO_KPP) ? - crypto_register_kpp(&curve25519_alg) : 0; -} - -static void __exit curve25519_mod_exit(void) -{ - if (IS_REACHABLE(CONFIG_CRYPTO_KPP)) - crypto_unregister_kpp(&curve25519_alg); -} - -module_init(curve25519_mod_init); -module_exit(curve25519_mod_exit); - -MODULE_ALIAS_CRYPTO("curve25519"); -MODULE_ALIAS_CRYPTO("curve25519-ppc64le"); -MODULE_DESCRIPTION("PPC64le Curve25519 scalar multiplication with 51 bits limbs"); -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Danny Tsen <dtsen@us.ibm.com>"); diff --git a/arch/powerpc/crypto/curve25519-ppc64le_asm.S b/arch/powerpc/crypto/curve25519-ppc64le_asm.S deleted file mode 100644 index 06c1febe24b91a..00000000000000 --- a/arch/powerpc/crypto/curve25519-ppc64le_asm.S +++ /dev/null @@ -1,671 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -# -# This code is taken from CRYPTOGAMs[1] and is included here using the option -# in the license to distribute the code under the GPL. Therefore this program -# is free software; you can redistribute it and/or modify it under the terms of -# the GNU General Public License version 2 as published by the Free Software -# Foundation. -# -# [1] https://github.com/dot-asm/cryptogams/ - -# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org> -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain copyright notices, -# this list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials -# provided with the distribution. -# -# * Neither the name of the CRYPTOGAMS nor the names of its -# copyright holder and contributors may be used to endorse or -# promote products derived from this software without specific -# prior written permission. -# -# ALTERNATIVELY, provided that this notice is retained in full, this -# product may be distributed under the terms of the GNU General Public -# License (GPL), in which case the provisions of the GPL apply INSTEAD OF -# those given above. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -# ==================================================================== -# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see https://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# -# ==================================================================== -# Written and Modified by Danny Tsen <dtsen@us.ibm.com> -# - Added x25519_fe51_sqr_times, x25519_fe51_frombytes, x25519_fe51_tobytes -# and x25519_cswap -# -# Copyright 2024- IBM Corp. -# -# X25519 lower-level primitives for PPC64. -# - -#include <linux/linkage.h> - -.text - -.align 5 -SYM_FUNC_START(x25519_fe51_mul) - - stdu 1,-144(1) - std 21,56(1) - std 22,64(1) - std 23,72(1) - std 24,80(1) - std 25,88(1) - std 26,96(1) - std 27,104(1) - std 28,112(1) - std 29,120(1) - std 30,128(1) - std 31,136(1) - - ld 6,0(5) - ld 7,0(4) - ld 8,8(4) - ld 9,16(4) - ld 10,24(4) - ld 11,32(4) - - mulld 22,7,6 - mulhdu 23,7,6 - - mulld 24,8,6 - mulhdu 25,8,6 - - mulld 30,11,6 - mulhdu 31,11,6 - ld 4,8(5) - mulli 11,11,19 - - mulld 26,9,6 - mulhdu 27,9,6 - - mulld 28,10,6 - mulhdu 29,10,6 - mulld 12,11,4 - mulhdu 21,11,4 - addc 22,22,12 - adde 23,23,21 - - mulld 12,7,4 - mulhdu 21,7,4 - addc 24,24,12 - adde 25,25,21 - - mulld 12,10,4 - mulhdu 21,10,4 - ld 6,16(5) - mulli 10,10,19 - addc 30,30,12 - adde 31,31,21 - - mulld 12,8,4 - mulhdu 21,8,4 - addc 26,26,12 - adde 27,27,21 - - mulld 12,9,4 - mulhdu 21,9,4 - addc 28,28,12 - adde 29,29,21 - mulld 12,10,6 - mulhdu 21,10,6 - addc 22,22,12 - adde 23,23,21 - - mulld 12,11,6 - mulhdu 21,11,6 - addc 24,24,12 - adde 25,25,21 - - mulld 12,9,6 - mulhdu 21,9,6 - ld 4,24(5) - mulli 9,9,19 - addc 30,30,12 - adde 31,31,21 - - mulld 12,7,6 - mulhdu 21,7,6 - addc 26,26,12 - adde 27,27,21 - - mulld 12,8,6 - mulhdu 21,8,6 - addc 28,28,12 - adde 29,29,21 - mulld 12,9,4 - mulhdu 21,9,4 - addc 22,22,12 - adde 23,23,21 - - mulld 12,10,4 - mulhdu 21,10,4 - addc 24,24,12 - adde 25,25,21 - - mulld 12,8,4 - mulhdu 21,8,4 - ld 6,32(5) - mulli 8,8,19 - addc 30,30,12 - adde 31,31,21 - - mulld 12,11,4 - mulhdu 21,11,4 - addc 26,26,12 - adde 27,27,21 - - mulld 12,7,4 - mulhdu 21,7,4 - addc 28,28,12 - adde 29,29,21 - mulld 12,8,6 - mulhdu 21,8,6 - addc 22,22,12 - adde 23,23,21 - - mulld 12,9,6 - mulhdu 21,9,6 - addc 24,24,12 - adde 25,25,21 - - mulld 12,10,6 - mulhdu 21,10,6 - addc 26,26,12 - adde 27,27,21 - - mulld 12,11,6 - mulhdu 21,11,6 - addc 28,28,12 - adde 29,29,21 - - mulld 12,7,6 - mulhdu 21,7,6 - addc 30,30,12 - adde 31,31,21 - -.Lfe51_reduce: - li 0,-1 - srdi 0,0,13 - - srdi 12,26,51 - and 9,26,0 - insrdi 12,27,51,0 - srdi 21,22,51 - and 7,22,0 - insrdi 21,23,51,0 - addc 28,28,12 - addze 29,29 - addc 24,24,21 - addze 25,25 - - srdi 12,28,51 - and 10,28,0 - insrdi 12,29,51,0 - srdi 21,24,51 - and 8,24,0 - insrdi 21,25,51,0 - addc 30,30,12 - addze 31,31 - add 9,9,21 - - srdi 12,30,51 - and 11,30,0 - insrdi 12,31,51,0 - mulli 12,12,19 - - add 7,7,12 - - srdi 21,9,51 - and 9,9,0 - add 10,10,21 - - srdi 12,7,51 - and 7,7,0 - add 8,8,12 - - std 9,16(3) - std 10,24(3) - std 11,32(3) - std 7,0(3) - std 8,8(3) - - ld 21,56(1) - ld 22,64(1) - ld 23,72(1) - ld 24,80(1) - ld 25,88(1) - ld 26,96(1) - ld 27,104(1) - ld 28,112(1) - ld 29,120(1) - ld 30,128(1) - ld 31,136(1) - addi 1,1,144 - blr -SYM_FUNC_END(x25519_fe51_mul) - -.align 5 -SYM_FUNC_START(x25519_fe51_sqr) - - stdu 1,-144(1) - std 21,56(1) - std 22,64(1) - std 23,72(1) - std 24,80(1) - std 25,88(1) - std 26,96(1) - std 27,104(1) - std 28,112(1) - std 29,120(1) - std 30,128(1) - std 31,136(1) - - ld 7,0(4) - ld 8,8(4) - ld 9,16(4) - ld 10,24(4) - ld 11,32(4) - - add 6,7,7 - mulli 21,11,19 - - mulld 22,7,7 - mulhdu 23,7,7 - mulld 24,8,6 - mulhdu 25,8,6 - mulld 26,9,6 - mulhdu 27,9,6 - mulld 28,10,6 - mulhdu 29,10,6 - mulld 30,11,6 - mulhdu 31,11,6 - add 6,8,8 - mulld 12,11,21 - mulhdu 11,11,21 - addc 28,28,12 - adde 29,29,11 - - mulli 5,10,19 - - mulld 12,8,8 - mulhdu 11,8,8 - addc 26,26,12 - adde 27,27,11 - mulld 12,9,6 - mulhdu 11,9,6 - addc 28,28,12 - adde 29,29,11 - mulld 12,10,6 - mulhdu 11,10,6 - addc 30,30,12 - adde 31,31,11 - mulld 12,21,6 - mulhdu 11,21,6 - add 6,10,10 - addc 22,22,12 - adde 23,23,11 - mulld 12,10,5 - mulhdu 10,10,5 - addc 24,24,12 - adde 25,25,10 - mulld 12,6,21 - mulhdu 10,6,21 - add 6,9,9 - addc 26,26,12 - adde 27,27,10 - - mulld 12,9,9 - mulhdu 10,9,9 - addc 30,30,12 - adde 31,31,10 - mulld 12,5,6 - mulhdu 10,5,6 - addc 22,22,12 - adde 23,23,10 - mulld 12,21,6 - mulhdu 10,21,6 - addc 24,24,12 - adde 25,25,10 - - b .Lfe51_reduce -SYM_FUNC_END(x25519_fe51_sqr) - -.align 5 -SYM_FUNC_START(x25519_fe51_mul121666) - - stdu 1,-144(1) - std 21,56(1) - std 22,64(1) - std 23,72(1) - std 24,80(1) - std 25,88(1) - std 26,96(1) - std 27,104(1) - std 28,112(1) - std 29,120(1) - std 30,128(1) - std 31,136(1) - - lis 6,1 - ori 6,6,56130 - ld 7,0(4) - ld 8,8(4) - ld 9,16(4) - ld 10,24(4) - ld 11,32(4) - - mulld 22,7,6 - mulhdu 23,7,6 - mulld 24,8,6 - mulhdu 25,8,6 - mulld 26,9,6 - mulhdu 27,9,6 - mulld 28,10,6 - mulhdu 29,10,6 - mulld 30,11,6 - mulhdu 31,11,6 - - b .Lfe51_reduce -SYM_FUNC_END(x25519_fe51_mul121666) - -.align 5 -SYM_FUNC_START(x25519_fe51_sqr_times) - - stdu 1,-144(1) - std 21,56(1) - std 22,64(1) - std 23,72(1) - std 24,80(1) - std 25,88(1) - std 26,96(1) - std 27,104(1) - std 28,112(1) - std 29,120(1) - std 30,128(1) - std 31,136(1) - - ld 7,0(4) - ld 8,8(4) - ld 9,16(4) - ld 10,24(4) - ld 11,32(4) - - mtctr 5 - -.Lsqr_times_loop: - add 6,7,7 - mulli 21,11,19 - - mulld 22,7,7 - mulhdu 23,7,7 - mulld 24,8,6 - mulhdu 25,8,6 - mulld 26,9,6 - mulhdu 27,9,6 - mulld 28,10,6 - mulhdu 29,10,6 - mulld 30,11,6 - mulhdu 31,11,6 - add 6,8,8 - mulld 12,11,21 - mulhdu 11,11,21 - addc 28,28,12 - adde 29,29,11 - - mulli 5,10,19 - - mulld 12,8,8 - mulhdu 11,8,8 - addc 26,26,12 - adde 27,27,11 - mulld 12,9,6 - mulhdu 11,9,6 - addc 28,28,12 - adde 29,29,11 - mulld 12,10,6 - mulhdu 11,10,6 - addc 30,30,12 - adde 31,31,11 - mulld 12,21,6 - mulhdu 11,21,6 - add 6,10,10 - addc 22,22,12 - adde 23,23,11 - mulld 12,10,5 - mulhdu 10,10,5 - addc 24,24,12 - adde 25,25,10 - mulld 12,6,21 - mulhdu 10,6,21 - add 6,9,9 - addc 26,26,12 - adde 27,27,10 - - mulld 12,9,9 - mulhdu 10,9,9 - addc 30,30,12 - adde 31,31,10 - mulld 12,5,6 - mulhdu 10,5,6 - addc 22,22,12 - adde 23,23,10 - mulld 12,21,6 - mulhdu 10,21,6 - addc 24,24,12 - adde 25,25,10 - - # fe51_reduce - li 0,-1 - srdi 0,0,13 - - srdi 12,26,51 - and 9,26,0 - insrdi 12,27,51,0 - srdi 21,22,51 - and 7,22,0 - insrdi 21,23,51,0 - addc 28,28,12 - addze 29,29 - addc 24,24,21 - addze 25,25 - - srdi 12,28,51 - and 10,28,0 - insrdi 12,29,51,0 - srdi 21,24,51 - and 8,24,0 - insrdi 21,25,51,0 - addc 30,30,12 - addze 31,31 - add 9,9,21 - - srdi 12,30,51 - and 11,30,0 - insrdi 12,31,51,0 - mulli 12,12,19 - - add 7,7,12 - - srdi 21,9,51 - and 9,9,0 - add 10,10,21 - - srdi 12,7,51 - and 7,7,0 - add 8,8,12 - - bdnz .Lsqr_times_loop - - std 9,16(3) - std 10,24(3) - std 11,32(3) - std 7,0(3) - std 8,8(3) - - ld 21,56(1) - ld 22,64(1) - ld 23,72(1) - ld 24,80(1) - ld 25,88(1) - ld 26,96(1) - ld 27,104(1) - ld 28,112(1) - ld 29,120(1) - ld 30,128(1) - ld 31,136(1) - addi 1,1,144 - blr -SYM_FUNC_END(x25519_fe51_sqr_times) - -.align 5 -SYM_FUNC_START(x25519_fe51_frombytes) - - li 12, -1 - srdi 12, 12, 13 # 0x7ffffffffffff - - ld 5, 0(4) - ld 6, 8(4) - ld 7, 16(4) - ld 8, 24(4) - - srdi 10, 5, 51 - and 5, 5, 12 # h0 - - sldi 11, 6, 13 - or 11, 10, 11 # h1t - srdi 10, 6, 38 - and 6, 11, 12 # h1 - - sldi 11, 7, 26 - or 10, 10, 11 # h2t - - srdi 11, 7, 25 - and 7, 10, 12 # h2 - sldi 10, 8, 39 - or 11, 11, 10 # h3t - - srdi 9, 8, 12 - and 8, 11, 12 # h3 - and 9, 9, 12 # h4 - - std 5, 0(3) - std 6, 8(3) - std 7, 16(3) - std 8, 24(3) - std 9, 32(3) - - blr -SYM_FUNC_END(x25519_fe51_frombytes) - -.align 5 -SYM_FUNC_START(x25519_fe51_tobytes) - - ld 5, 0(4) - ld 6, 8(4) - ld 7, 16(4) - ld 8, 24(4) - ld 9, 32(4) - - li 12, -1 - srdi 12, 12, 13 # 0x7ffffffffffff - - # Full reducuction - addi 10, 5, 19 - srdi 10, 10, 51 - add 10, 10, 6 - srdi 10, 10, 51 - add 10, 10, 7 - srdi 10, 10, 51 - add 10, 10, 8 - srdi 10, 10, 51 - add 10, 10, 9 - srdi 10, 10, 51 - - mulli 10, 10, 19 - add 5, 5, 10 - srdi 11, 5, 51 - add 6, 6, 11 - srdi 11, 6, 51 - add 7, 7, 11 - srdi 11, 7, 51 - add 8, 8, 11 - srdi 11, 8, 51 - add 9, 9, 11 - - and 5, 5, 12 - and 6, 6, 12 - and 7, 7, 12 - and 8, 8, 12 - and 9, 9, 12 - - sldi 10, 6, 51 - or 5, 5, 10 # s0 - - srdi 11, 6, 13 - sldi 10, 7, 38 - or 6, 11, 10 # s1 - - srdi 11, 7, 26 - sldi 10, 8, 25 - or 7, 11, 10 # s2 - - srdi 11, 8, 39 - sldi 10, 9, 12 - or 8, 11, 10 # s4 - - std 5, 0(3) - std 6, 8(3) - std 7, 16(3) - std 8, 24(3) - - blr -SYM_FUNC_END(x25519_fe51_tobytes) - -.align 5 -SYM_FUNC_START(x25519_cswap) - - li 7, 5 - neg 6, 5 - mtctr 7 - -.Lswap_loop: - ld 8, 0(3) - ld 9, 0(4) - xor 10, 8, 9 - and 10, 10, 6 - xor 11, 8, 10 - xor 12, 9, 10 - std 11, 0(3) - addi 3, 3, 8 - std 12, 0(4) - addi 4, 4, 8 - bdnz .Lswap_loop - - blr -SYM_FUNC_END(x25519_cswap) diff --git a/arch/powerpc/crypto/md5-asm.S b/arch/powerpc/crypto/md5-asm.S deleted file mode 100644 index fa6bc440cf4acf..00000000000000 --- a/arch/powerpc/crypto/md5-asm.S +++ /dev/null @@ -1,235 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Fast MD5 implementation for PPC - * - * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> - */ -#include <asm/ppc_asm.h> -#include <asm/asm-offsets.h> -#include <asm/asm-compat.h> - -#define rHP r3 -#define rWP r4 - -#define rH0 r0 -#define rH1 r6 -#define rH2 r7 -#define rH3 r5 - -#define rW00 r8 -#define rW01 r9 -#define rW02 r10 -#define rW03 r11 -#define rW04 r12 -#define rW05 r14 -#define rW06 r15 -#define rW07 r16 -#define rW08 r17 -#define rW09 r18 -#define rW10 r19 -#define rW11 r20 -#define rW12 r21 -#define rW13 r22 -#define rW14 r23 -#define rW15 r24 - -#define rT0 r25 -#define rT1 r26 - -#define INITIALIZE \ - PPC_STLU r1,-INT_FRAME_SIZE(r1); \ - SAVE_GPRS(14, 26, r1) /* push registers onto stack */ - -#define FINALIZE \ - REST_GPRS(14, 26, r1); /* pop registers from stack */ \ - addi r1,r1,INT_FRAME_SIZE - -#ifdef __BIG_ENDIAN__ -#define LOAD_DATA(reg, off) \ - lwbrx reg,0,rWP; /* load data */ -#define INC_PTR \ - addi rWP,rWP,4; /* increment per word */ -#define NEXT_BLOCK /* nothing to do */ -#else -#define LOAD_DATA(reg, off) \ - lwz reg,off(rWP); /* load data */ -#define INC_PTR /* nothing to do */ -#define NEXT_BLOCK \ - addi rWP,rWP,64; /* increment per block */ -#endif - -#define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \ - LOAD_DATA(w0, off) /* W */ \ - and rT0,b,c; /* 1: f = b and c */ \ - INC_PTR /* ptr++ */ \ - andc rT1,d,b; /* 1: f' = ~b and d */ \ - LOAD_DATA(w1, off+4) /* W */ \ - or rT0,rT0,rT1; /* 1: f = f or f' */ \ - addi w0,w0,k0l; /* 1: wk = w + k */ \ - add a,a,rT0; /* 1: a = a + f */ \ - addis w0,w0,k0h; /* 1: wk = w + k' */ \ - addis w1,w1,k1h; /* 2: wk = w + k */ \ - add a,a,w0; /* 1: a = a + wk */ \ - addi w1,w1,k1l; /* 2: wk = w + k' */ \ - rotrwi a,a,p; /* 1: a = a rotl x */ \ - add d,d,w1; /* 2: a = a + wk */ \ - add a,a,b; /* 1: a = a + b */ \ - and rT0,a,b; /* 2: f = b and c */ \ - andc rT1,c,a; /* 2: f' = ~b and d */ \ - or rT0,rT0,rT1; /* 2: f = f or f' */ \ - add d,d,rT0; /* 2: a = a + f */ \ - INC_PTR /* ptr++ */ \ - rotrwi d,d,q; /* 2: a = a rotl x */ \ - add d,d,a; /* 2: a = a + b */ - -#define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ - andc rT0,c,d; /* 1: f = c and ~d */ \ - and rT1,b,d; /* 1: f' = b and d */ \ - addi w0,w0,k0l; /* 1: wk = w + k */ \ - or rT0,rT0,rT1; /* 1: f = f or f' */ \ - addis w0,w0,k0h; /* 1: wk = w + k' */ \ - add a,a,rT0; /* 1: a = a + f */ \ - addi w1,w1,k1l; /* 2: wk = w + k */ \ - add a,a,w0; /* 1: a = a + wk */ \ - addis w1,w1,k1h; /* 2: wk = w + k' */ \ - andc rT0,b,c; /* 2: f = c and ~d */ \ - rotrwi a,a,p; /* 1: a = a rotl x */ \ - add a,a,b; /* 1: a = a + b */ \ - add d,d,w1; /* 2: a = a + wk */ \ - and rT1,a,c; /* 2: f' = b and d */ \ - or rT0,rT0,rT1; /* 2: f = f or f' */ \ - add d,d,rT0; /* 2: a = a + f */ \ - rotrwi d,d,q; /* 2: a = a rotl x */ \ - add d,d,a; /* 2: a = a +b */ - -#define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ - xor rT0,b,c; /* 1: f' = b xor c */ \ - addi w0,w0,k0l; /* 1: wk = w + k */ \ - xor rT1,rT0,d; /* 1: f = f xor f' */ \ - addis w0,w0,k0h; /* 1: wk = w + k' */ \ - add a,a,rT1; /* 1: a = a + f */ \ - addi w1,w1,k1l; /* 2: wk = w + k */ \ - add a,a,w0; /* 1: a = a + wk */ \ - addis w1,w1,k1h; /* 2: wk = w + k' */ \ - rotrwi a,a,p; /* 1: a = a rotl x */ \ - add d,d,w1; /* 2: a = a + wk */ \ - add a,a,b; /* 1: a = a + b */ \ - xor rT1,rT0,a; /* 2: f = b xor f' */ \ - add d,d,rT1; /* 2: a = a + f */ \ - rotrwi d,d,q; /* 2: a = a rotl x */ \ - add d,d,a; /* 2: a = a + b */ - -#define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ - addi w0,w0,k0l; /* 1: w = w + k */ \ - orc rT0,b,d; /* 1: f = b or ~d */ \ - addis w0,w0,k0h; /* 1: w = w + k' */ \ - xor rT0,rT0,c; /* 1: f = f xor c */ \ - add a,a,w0; /* 1: a = a + wk */ \ - addi w1,w1,k1l; /* 2: w = w + k */ \ - add a,a,rT0; /* 1: a = a + f */ \ - addis w1,w1,k1h; /* 2: w = w + k' */ \ - rotrwi a,a,p; /* 1: a = a rotl x */ \ - add a,a,b; /* 1: a = a + b */ \ - orc rT0,a,c; /* 2: f = b or ~d */ \ - add d,d,w1; /* 2: a = a + wk */ \ - xor rT0,rT0,b; /* 2: f = f xor c */ \ - add d,d,rT0; /* 2: a = a + f */ \ - rotrwi d,d,q; /* 2: a = a rotl x */ \ - add d,d,a; /* 2: a = a + b */ - -_GLOBAL(ppc_md5_transform) - INITIALIZE - - mtctr r5 - lwz rH0,0(rHP) - lwz rH1,4(rHP) - lwz rH2,8(rHP) - lwz rH3,12(rHP) - -ppc_md5_main: - R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0, - 0xd76b, -23432, 0xe8c8, -18602) - R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8, - 0x2420, 0x70db, 0xc1be, -12562) - R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16, - 0xf57c, 0x0faf, 0x4788, -14806) - R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24, - 0xa830, 0x4613, 0xfd47, -27391) - R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32, - 0x6981, -26408, 0x8b45, -2129) - R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40, - 0xffff, 0x5bb1, 0x895d, -10306) - R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48, - 0x6b90, 0x1122, 0xfd98, 0x7193) - R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56, - 0xa679, 0x438e, 0x49b4, 0x0821) - - R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23, - 0x0d56, 0x6e0c, 0x1810, 0x6d2d) - R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12, - 0x9d02, -32109, 0x124c, 0x2332) - R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23, - 0x8ea7, 0x4a33, 0x0245, -18270) - R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12, - 0x8eee, -8608, 0xf258, -5095) - R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23, - 0x969d, -10697, 0x1cbe, -15288) - R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12, - 0x3317, 0x3e99, 0xdbd9, 0x7c15) - R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23, - 0xac4b, 0x7772, 0xd8cf, 0x331d) - R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12, - 0x6a28, 0x6dd8, 0x219a, 0x3b68) - - R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21, - 0x29cb, 0x28e5, 0x4218, -7788) - R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16, 9, - 0x473f, 0x06d1, 0x3aae, 0x3036) - R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21, - 0xaea1, -15134, 0x640b, -11295) - R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16, 9, - 0x8f4c, 0x4887, 0xbc7c, -22499) - R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21, - 0x7eb8, -27199, 0x00ea, 0x6050) - R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16, 9, - 0xe01a, 0x22fe, 0x4447, 0x69c5) - R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21, - 0xb7f3, 0x0253, 0x59b1, 0x4d5b) - R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16, 9, - 0x4701, -27017, 0xc7bd, -19859) - - R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22, - 0x0988, -1462, 0x4c70, -19401) - R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11, - 0xadaf, -5221, 0xfc99, 0x66f7) - R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22, - 0x7e80, -16418, 0xba1e, -25587) - R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11, - 0x4130, 0x380d, 0xe0c5, 0x738d) - lwz rW00,0(rHP) - R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22, - 0xe837, -30770, 0xde8a, 0x69e8) - lwz rW14,4(rHP) - R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11, - 0x9e79, 0x260f, 0x256d, -27941) - lwz rW12,8(rHP) - R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22, - 0xab75, -20775, 0x4f9e, -28397) - lwz rW10,12(rHP) - R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11, - 0x662b, 0x7c56, 0x11b2, 0x0358) - - add rH0,rH0,rW00 - stw rH0,0(rHP) - add rH1,rH1,rW14 - stw rH1,4(rHP) - add rH2,rH2,rW12 - stw rH2,8(rHP) - add rH3,rH3,rW10 - stw rH3,12(rHP) - NEXT_BLOCK - - bdnz ppc_md5_main - - FINALIZE - blr diff --git a/arch/powerpc/crypto/md5-glue.c b/arch/powerpc/crypto/md5-glue.c deleted file mode 100644 index 204440a90cd84c..00000000000000 --- a/arch/powerpc/crypto/md5-glue.c +++ /dev/null @@ -1,99 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Glue code for MD5 implementation for PPC assembler - * - * Based on generic implementation. - * - * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> - */ - -#include <crypto/internal/hash.h> -#include <crypto/md5.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/string.h> - -extern void ppc_md5_transform(u32 *state, const u8 *src, u32 blocks); - -static int ppc_md5_init(struct shash_desc *desc) -{ - struct md5_state *sctx = shash_desc_ctx(desc); - - sctx->hash[0] = MD5_H0; - sctx->hash[1] = MD5_H1; - sctx->hash[2] = MD5_H2; - sctx->hash[3] = MD5_H3; - sctx->byte_count = 0; - - return 0; -} - -static int ppc_md5_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct md5_state *sctx = shash_desc_ctx(desc); - - sctx->byte_count += round_down(len, MD5_HMAC_BLOCK_SIZE); - ppc_md5_transform(sctx->hash, data, len >> 6); - return len - round_down(len, MD5_HMAC_BLOCK_SIZE); -} - -static int ppc_md5_finup(struct shash_desc *desc, const u8 *src, - unsigned int offset, u8 *out) -{ - struct md5_state *sctx = shash_desc_ctx(desc); - __le64 block[MD5_BLOCK_WORDS] = {}; - u8 *p = memcpy(block, src, offset); - __le32 *dst = (__le32 *)out; - __le64 *pbits; - - src = p; - p += offset; - *p++ = 0x80; - sctx->byte_count += offset; - pbits = &block[(MD5_BLOCK_WORDS / (offset > 55 ? 1 : 2)) - 1]; - *pbits = cpu_to_le64(sctx->byte_count << 3); - ppc_md5_transform(sctx->hash, src, (pbits - block + 1) / 8); - memzero_explicit(block, sizeof(block)); - - dst[0] = cpu_to_le32(sctx->hash[0]); - dst[1] = cpu_to_le32(sctx->hash[1]); - dst[2] = cpu_to_le32(sctx->hash[2]); - dst[3] = cpu_to_le32(sctx->hash[3]); - return 0; -} - -static struct shash_alg alg = { - .digestsize = MD5_DIGEST_SIZE, - .init = ppc_md5_init, - .update = ppc_md5_update, - .finup = ppc_md5_finup, - .descsize = MD5_STATE_SIZE, - .base = { - .cra_name = "md5", - .cra_driver_name= "md5-ppc", - .cra_priority = 200, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .cra_blocksize = MD5_HMAC_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static int __init ppc_md5_mod_init(void) -{ - return crypto_register_shash(&alg); -} - -static void __exit ppc_md5_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_init(ppc_md5_mod_init); -module_exit(ppc_md5_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, PPC assembler"); - -MODULE_ALIAS_CRYPTO("md5"); -MODULE_ALIAS_CRYPTO("md5-ppc"); diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 8ceef305f13875..3c942945729e02 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -758,7 +758,6 @@ CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECDSA=m CONFIG_CRYPTO_ECRDSA=m -CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_ARIA=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 3fc12b0af55b25..c752cc114747fc 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -742,7 +742,6 @@ CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECDSA=m CONFIG_CRYPTO_ECRDSA=m -CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_ARIA=m diff --git a/arch/sparc/crypto/Kconfig b/arch/sparc/crypto/Kconfig index f5b2e720fec3c1..f755da97953462 100644 --- a/arch/sparc/crypto/Kconfig +++ b/arch/sparc/crypto/Kconfig @@ -16,16 +16,6 @@ config CRYPTO_DES_SPARC64 Architecture: sparc64 -config CRYPTO_MD5_SPARC64 - tristate "Digests: MD5" - depends on SPARC64 - select CRYPTO_MD5 - select CRYPTO_HASH - help - MD5 message digest algorithm (RFC1321) - - Architecture: sparc64 using crypto instructions, when available - config CRYPTO_AES_SPARC64 tristate "Ciphers: AES, modes: ECB, CBC, CTR" depends on SPARC64 diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile index 0d05a17988c4cd..7b4796842ddd7c 100644 --- a/arch/sparc/crypto/Makefile +++ b/arch/sparc/crypto/Makefile @@ -3,14 +3,10 @@ # Arch-specific CryptoAPI modules. # -obj-$(CONFIG_CRYPTO_MD5_SPARC64) += md5-sparc64.o - obj-$(CONFIG_CRYPTO_AES_SPARC64) += aes-sparc64.o obj-$(CONFIG_CRYPTO_DES_SPARC64) += des-sparc64.o obj-$(CONFIG_CRYPTO_CAMELLIA_SPARC64) += camellia-sparc64.o -md5-sparc64-y := md5_asm.o md5_glue.o - aes-sparc64-y := aes_asm.o aes_glue.o des-sparc64-y := des_asm.o des_glue.o camellia-sparc64-y := camellia_asm.o camellia_glue.o diff --git a/arch/sparc/crypto/md5_asm.S b/arch/sparc/crypto/md5_asm.S deleted file mode 100644 index 60b544e4d205b1..00000000000000 --- a/arch/sparc/crypto/md5_asm.S +++ /dev/null @@ -1,70 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <linux/linkage.h> -#include <asm/opcodes.h> -#include <asm/visasm.h> - -ENTRY(md5_sparc64_transform) - /* %o0 = digest, %o1 = data, %o2 = rounds */ - VISEntryHalf - ld [%o0 + 0x00], %f0 - ld [%o0 + 0x04], %f1 - andcc %o1, 0x7, %g0 - ld [%o0 + 0x08], %f2 - bne,pn %xcc, 10f - ld [%o0 + 0x0c], %f3 - -1: - ldd [%o1 + 0x00], %f8 - ldd [%o1 + 0x08], %f10 - ldd [%o1 + 0x10], %f12 - ldd [%o1 + 0x18], %f14 - ldd [%o1 + 0x20], %f16 - ldd [%o1 + 0x28], %f18 - ldd [%o1 + 0x30], %f20 - ldd [%o1 + 0x38], %f22 - - MD5 - - subcc %o2, 1, %o2 - bne,pt %xcc, 1b - add %o1, 0x40, %o1 - -5: - st %f0, [%o0 + 0x00] - st %f1, [%o0 + 0x04] - st %f2, [%o0 + 0x08] - st %f3, [%o0 + 0x0c] - retl - VISExitHalf -10: - alignaddr %o1, %g0, %o1 - - ldd [%o1 + 0x00], %f10 -1: - ldd [%o1 + 0x08], %f12 - ldd [%o1 + 0x10], %f14 - ldd [%o1 + 0x18], %f16 - ldd [%o1 + 0x20], %f18 - ldd [%o1 + 0x28], %f20 - ldd [%o1 + 0x30], %f22 - ldd [%o1 + 0x38], %f24 - ldd [%o1 + 0x40], %f26 - - faligndata %f10, %f12, %f8 - faligndata %f12, %f14, %f10 - faligndata %f14, %f16, %f12 - faligndata %f16, %f18, %f14 - faligndata %f18, %f20, %f16 - faligndata %f20, %f22, %f18 - faligndata %f22, %f24, %f20 - faligndata %f24, %f26, %f22 - - MD5 - - subcc %o2, 1, %o2 - fsrc2 %f26, %f10 - bne,pt %xcc, 1b - add %o1, 0x40, %o1 - - ba,a,pt %xcc, 5b -ENDPROC(md5_sparc64_transform) diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c deleted file mode 100644 index b3615f0cdf6262..00000000000000 --- a/arch/sparc/crypto/md5_glue.c +++ /dev/null @@ -1,174 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Glue code for MD5 hashing optimized for sparc64 crypto opcodes. - * - * This is based largely upon arch/x86/crypto/sha1_ssse3_glue.c - * and crypto/md5.c which are: - * - * Copyright (c) Alan Smithee. - * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> - * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> - * Copyright (c) Mathias Krause <minipli@googlemail.com> - * Copyright (c) Cryptoapi developers. - * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <asm/elf.h> -#include <asm/opcodes.h> -#include <asm/pstate.h> -#include <crypto/internal/hash.h> -#include <crypto/md5.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/string.h> -#include <linux/unaligned.h> - -struct sparc_md5_state { - __le32 hash[MD5_HASH_WORDS]; - u64 byte_count; -}; - -asmlinkage void md5_sparc64_transform(__le32 *digest, const char *data, - unsigned int rounds); - -static int md5_sparc64_init(struct shash_desc *desc) -{ - struct sparc_md5_state *mctx = shash_desc_ctx(desc); - - mctx->hash[0] = cpu_to_le32(MD5_H0); - mctx->hash[1] = cpu_to_le32(MD5_H1); - mctx->hash[2] = cpu_to_le32(MD5_H2); - mctx->hash[3] = cpu_to_le32(MD5_H3); - mctx->byte_count = 0; - - return 0; -} - -static int md5_sparc64_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct sparc_md5_state *sctx = shash_desc_ctx(desc); - - sctx->byte_count += round_down(len, MD5_HMAC_BLOCK_SIZE); - md5_sparc64_transform(sctx->hash, data, len / MD5_HMAC_BLOCK_SIZE); - return len - round_down(len, MD5_HMAC_BLOCK_SIZE); -} - -/* Add padding and return the message digest. */ -static int md5_sparc64_finup(struct shash_desc *desc, const u8 *src, - unsigned int offset, u8 *out) -{ - struct sparc_md5_state *sctx = shash_desc_ctx(desc); - __le64 block[MD5_BLOCK_WORDS] = {}; - u8 *p = memcpy(block, src, offset); - __le32 *dst = (__le32 *)out; - __le64 *pbits; - int i; - - src = p; - p += offset; - *p++ = 0x80; - sctx->byte_count += offset; - pbits = &block[(MD5_BLOCK_WORDS / (offset > 55 ? 1 : 2)) - 1]; - *pbits = cpu_to_le64(sctx->byte_count << 3); - md5_sparc64_transform(sctx->hash, src, (pbits - block + 1) / 8); - memzero_explicit(block, sizeof(block)); - - /* Store state in digest */ - for (i = 0; i < MD5_HASH_WORDS; i++) - dst[i] = sctx->hash[i]; - - return 0; -} - -static int md5_sparc64_export(struct shash_desc *desc, void *out) -{ - struct sparc_md5_state *sctx = shash_desc_ctx(desc); - union { - u8 *u8; - u32 *u32; - u64 *u64; - } p = { .u8 = out }; - int i; - - for (i = 0; i < MD5_HASH_WORDS; i++) - put_unaligned(le32_to_cpu(sctx->hash[i]), p.u32++); - put_unaligned(sctx->byte_count, p.u64); - return 0; -} - -static int md5_sparc64_import(struct shash_desc *desc, const void *in) -{ - struct sparc_md5_state *sctx = shash_desc_ctx(desc); - union { - const u8 *u8; - const u32 *u32; - const u64 *u64; - } p = { .u8 = in }; - int i; - - for (i = 0; i < MD5_HASH_WORDS; i++) - sctx->hash[i] = cpu_to_le32(get_unaligned(p.u32++)); - sctx->byte_count = get_unaligned(p.u64); - return 0; -} - -static struct shash_alg alg = { - .digestsize = MD5_DIGEST_SIZE, - .init = md5_sparc64_init, - .update = md5_sparc64_update, - .finup = md5_sparc64_finup, - .export = md5_sparc64_export, - .import = md5_sparc64_import, - .descsize = sizeof(struct sparc_md5_state), - .statesize = sizeof(struct sparc_md5_state), - .base = { - .cra_name = "md5", - .cra_driver_name= "md5-sparc64", - .cra_priority = SPARC_CR_OPCODE_PRIORITY, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .cra_blocksize = MD5_HMAC_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static bool __init sparc64_has_md5_opcode(void) -{ - unsigned long cfr; - - if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) - return false; - - __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); - if (!(cfr & CFR_MD5)) - return false; - - return true; -} - -static int __init md5_sparc64_mod_init(void) -{ - if (sparc64_has_md5_opcode()) { - pr_info("Using sparc64 md5 opcode optimized MD5 implementation\n"); - return crypto_register_shash(&alg); - } - pr_info("sparc64 md5 opcode not available.\n"); - return -ENODEV; -} - -static void __exit md5_sparc64_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_init(md5_sparc64_mod_init); -module_exit(md5_sparc64_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("MD5 Message Digest Algorithm, sparc64 md5 opcode accelerated"); - -MODULE_ALIAS_CRYPTO("md5"); - -#include "crop_devid.c" diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig index 94016c60561e28..d9c6fc78cf3324 100644 --- a/arch/x86/crypto/Kconfig +++ b/arch/x86/crypto/Kconfig @@ -2,19 +2,6 @@ menu "Accelerated Cryptographic Algorithms for CPU (x86)" -config CRYPTO_CURVE25519_X86 - tristate - depends on 64BIT - select CRYPTO_KPP - select CRYPTO_LIB_CURVE25519_GENERIC - select CRYPTO_ARCH_HAVE_LIB_CURVE25519 - default CRYPTO_LIB_CURVE25519_INTERNAL - help - Curve25519 algorithm - - Architecture: x86_64 using: - - ADX (large integer arithmetic) - config CRYPTO_AES_NI_INTEL tristate "Ciphers: AES, modes: ECB, CBC, CTS, CTR, XCTR, XTS, GCM (AES-NI/VAES)" select CRYPTO_AEAD diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index d402963d6b579a..dfba7e5e88ea69 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -62,8 +62,6 @@ nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o -obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o - obj-$(CONFIG_CRYPTO_SM3_AVX_X86_64) += sm3-avx-x86_64.o sm3-avx-x86_64-y := sm3-avx-asm_64.o sm3_avx_glue.o @@ -81,6 +79,3 @@ aria-aesni-avx2-x86_64-y := aria-aesni-avx2-asm_64.o aria_aesni_avx2_glue.o obj-$(CONFIG_CRYPTO_ARIA_GFNI_AVX512_X86_64) += aria-gfni-avx512-x86_64.o aria-gfni-avx512-x86_64-y := aria-gfni-avx512-asm_64.o aria_gfni_avx512_glue.o - -# Disable GCOV in odd or sensitive code -GCOV_PROFILE_curve25519-x86_64.o := n diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c deleted file mode 100644 index d587f05c3c8c36..00000000000000 --- a/arch/x86/crypto/curve25519-x86_64.c +++ /dev/null @@ -1,1726 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR MIT -/* - * Copyright (C) 2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. - * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation - */ - -#include <crypto/curve25519.h> -#include <crypto/internal/kpp.h> - -#include <linux/export.h> -#include <linux/types.h> -#include <linux/jump_label.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/scatterlist.h> - -#include <asm/cpufeature.h> -#include <asm/processor.h> - -static __always_inline u64 eq_mask(u64 a, u64 b) -{ - u64 x = a ^ b; - u64 minus_x = ~x + (u64)1U; - u64 x_or_minus_x = x | minus_x; - u64 xnx = x_or_minus_x >> (u32)63U; - return xnx - (u64)1U; -} - -static __always_inline u64 gte_mask(u64 a, u64 b) -{ - u64 x = a; - u64 y = b; - u64 x_xor_y = x ^ y; - u64 x_sub_y = x - y; - u64 x_sub_y_xor_y = x_sub_y ^ y; - u64 q = x_xor_y | x_sub_y_xor_y; - u64 x_xor_q = x ^ q; - u64 x_xor_q_ = x_xor_q >> (u32)63U; - return x_xor_q_ - (u64)1U; -} - -/* Computes the addition of four-element f1 with value in f2 - * and returns the carry (if any) */ -static inline u64 add_scalar(u64 *out, const u64 *f1, u64 f2) -{ - u64 carry_r; - - asm volatile( - /* Clear registers to propagate the carry bit */ - " xor %%r8d, %%r8d;" - " xor %%r9d, %%r9d;" - " xor %%r10d, %%r10d;" - " xor %%r11d, %%r11d;" - " xor %k1, %k1;" - - /* Begin addition chain */ - " addq 0(%3), %0;" - " movq %0, 0(%2);" - " adcxq 8(%3), %%r8;" - " movq %%r8, 8(%2);" - " adcxq 16(%3), %%r9;" - " movq %%r9, 16(%2);" - " adcxq 24(%3), %%r10;" - " movq %%r10, 24(%2);" - - /* Return the carry bit in a register */ - " adcx %%r11, %1;" - : "+&r"(f2), "=&r"(carry_r) - : "r"(out), "r"(f1) - : "%r8", "%r9", "%r10", "%r11", "memory", "cc"); - - return carry_r; -} - -/* Computes the field addition of two field elements */ -static inline void fadd(u64 *out, const u64 *f1, const u64 *f2) -{ - asm volatile( - /* Compute the raw addition of f1 + f2 */ - " movq 0(%0), %%r8;" - " addq 0(%2), %%r8;" - " movq 8(%0), %%r9;" - " adcxq 8(%2), %%r9;" - " movq 16(%0), %%r10;" - " adcxq 16(%2), %%r10;" - " movq 24(%0), %%r11;" - " adcxq 24(%2), %%r11;" - - /* Wrap the result back into the field */ - - /* Step 1: Compute carry*38 */ - " mov $0, %%rax;" - " mov $38, %0;" - " cmovc %0, %%rax;" - - /* Step 2: Add carry*38 to the original sum */ - " xor %%ecx, %%ecx;" - " add %%rax, %%r8;" - " adcx %%rcx, %%r9;" - " movq %%r9, 8(%1);" - " adcx %%rcx, %%r10;" - " movq %%r10, 16(%1);" - " adcx %%rcx, %%r11;" - " movq %%r11, 24(%1);" - - /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ - " mov $0, %%rax;" - " cmovc %0, %%rax;" - " add %%rax, %%r8;" - " movq %%r8, 0(%1);" - : "+&r"(f2) - : "r"(out), "r"(f1) - : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"); -} - -/* Computes the field subtraction of two field elements */ -static inline void fsub(u64 *out, const u64 *f1, const u64 *f2) -{ - asm volatile( - /* Compute the raw subtraction of f1-f2 */ - " movq 0(%1), %%r8;" - " subq 0(%2), %%r8;" - " movq 8(%1), %%r9;" - " sbbq 8(%2), %%r9;" - " movq 16(%1), %%r10;" - " sbbq 16(%2), %%r10;" - " movq 24(%1), %%r11;" - " sbbq 24(%2), %%r11;" - - /* Wrap the result back into the field */ - - /* Step 1: Compute carry*38 */ - " mov $0, %%rax;" - " mov $38, %%rcx;" - " cmovc %%rcx, %%rax;" - - /* Step 2: Subtract carry*38 from the original difference */ - " sub %%rax, %%r8;" - " sbb $0, %%r9;" - " sbb $0, %%r10;" - " sbb $0, %%r11;" - - /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ - " mov $0, %%rax;" - " cmovc %%rcx, %%rax;" - " sub %%rax, %%r8;" - - /* Store the result */ - " movq %%r8, 0(%0);" - " movq %%r9, 8(%0);" - " movq %%r10, 16(%0);" - " movq %%r11, 24(%0);" - : - : "r"(out), "r"(f1), "r"(f2) - : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"); -} - -/* Computes a field multiplication: out <- f1 * f2 - * Uses the 8-element buffer tmp for intermediate results */ -static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) -{ - asm volatile( - - /* Compute the raw multiplication: tmp <- src1 * src2 */ - - /* Compute src1[0] * src2 */ - " movq 0(%0), %%rdx;" - " mulxq 0(%1), %%r8, %%r9;" - " xor %%r10d, %%r10d;" - " movq %%r8, 0(%2);" - " mulxq 8(%1), %%r10, %%r11;" - " adox %%r9, %%r10;" - " movq %%r10, 8(%2);" - " mulxq 16(%1), %%rbx, %%r13;" - " adox %%r11, %%rbx;" - " mulxq 24(%1), %%r14, %%rdx;" - " adox %%r13, %%r14;" - " mov $0, %%rax;" - " adox %%rdx, %%rax;" - - /* Compute src1[1] * src2 */ - " movq 8(%0), %%rdx;" - " mulxq 0(%1), %%r8, %%r9;" - " xor %%r10d, %%r10d;" - " adcxq 8(%2), %%r8;" - " movq %%r8, 8(%2);" - " mulxq 8(%1), %%r10, %%r11;" - " adox %%r9, %%r10;" - " adcx %%rbx, %%r10;" - " movq %%r10, 16(%2);" - " mulxq 16(%1), %%rbx, %%r13;" - " adox %%r11, %%rbx;" - " adcx %%r14, %%rbx;" - " mov $0, %%r8;" - " mulxq 24(%1), %%r14, %%rdx;" - " adox %%r13, %%r14;" - " adcx %%rax, %%r14;" - " mov $0, %%rax;" - " adox %%rdx, %%rax;" - " adcx %%r8, %%rax;" - - /* Compute src1[2] * src2 */ - " movq 16(%0), %%rdx;" - " mulxq 0(%1), %%r8, %%r9;" - " xor %%r10d, %%r10d;" - " adcxq 16(%2), %%r8;" - " movq %%r8, 16(%2);" - " mulxq 8(%1), %%r10, %%r11;" - " adox %%r9, %%r10;" - " adcx %%rbx, %%r10;" - " movq %%r10, 24(%2);" - " mulxq 16(%1), %%rbx, %%r13;" - " adox %%r11, %%rbx;" - " adcx %%r14, %%rbx;" - " mov $0, %%r8;" - " mulxq 24(%1), %%r14, %%rdx;" - " adox %%r13, %%r14;" - " adcx %%rax, %%r14;" - " mov $0, %%rax;" - " adox %%rdx, %%rax;" - " adcx %%r8, %%rax;" - - /* Compute src1[3] * src2 */ - " movq 24(%0), %%rdx;" - " mulxq 0(%1), %%r8, %%r9;" - " xor %%r10d, %%r10d;" - " adcxq 24(%2), %%r8;" - " movq %%r8, 24(%2);" - " mulxq 8(%1), %%r10, %%r11;" - " adox %%r9, %%r10;" - " adcx %%rbx, %%r10;" - " movq %%r10, 32(%2);" - " mulxq 16(%1), %%rbx, %%r13;" - " adox %%r11, %%rbx;" - " adcx %%r14, %%rbx;" - " movq %%rbx, 40(%2);" - " mov $0, %%r8;" - " mulxq 24(%1), %%r14, %%rdx;" - " adox %%r13, %%r14;" - " adcx %%rax, %%r14;" - " movq %%r14, 48(%2);" - " mov $0, %%rax;" - " adox %%rdx, %%rax;" - " adcx %%r8, %%rax;" - " movq %%rax, 56(%2);" - - /* Line up pointers */ - " mov %2, %0;" - " mov %3, %2;" - - /* Wrap the result back into the field */ - - /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ - " mov $38, %%rdx;" - " mulxq 32(%0), %%r8, %%r13;" - " xor %k1, %k1;" - " adoxq 0(%0), %%r8;" - " mulxq 40(%0), %%r9, %%rbx;" - " adcx %%r13, %%r9;" - " adoxq 8(%0), %%r9;" - " mulxq 48(%0), %%r10, %%r13;" - " adcx %%rbx, %%r10;" - " adoxq 16(%0), %%r10;" - " mulxq 56(%0), %%r11, %%rax;" - " adcx %%r13, %%r11;" - " adoxq 24(%0), %%r11;" - " adcx %1, %%rax;" - " adox %1, %%rax;" - " imul %%rdx, %%rax;" - - /* Step 2: Fold the carry back into dst */ - " add %%rax, %%r8;" - " adcx %1, %%r9;" - " movq %%r9, 8(%2);" - " adcx %1, %%r10;" - " movq %%r10, 16(%2);" - " adcx %1, %%r11;" - " movq %%r11, 24(%2);" - - /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ - " mov $0, %%rax;" - " cmovc %%rdx, %%rax;" - " add %%rax, %%r8;" - " movq %%r8, 0(%2);" - : "+&r"(f1), "+&r"(f2), "+&r"(tmp) - : "r"(out) - : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", - "%r14", "memory", "cc"); -} - -/* Computes two field multiplications: - * out[0] <- f1[0] * f2[0] - * out[1] <- f1[1] * f2[1] - * Uses the 16-element buffer tmp for intermediate results: */ -static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) -{ - asm volatile( - - /* Compute the raw multiplication tmp[0] <- f1[0] * f2[0] */ - - /* Compute src1[0] * src2 */ - " movq 0(%0), %%rdx;" - " mulxq 0(%1), %%r8, %%r9;" - " xor %%r10d, %%r10d;" - " movq %%r8, 0(%2);" - " mulxq 8(%1), %%r10, %%r11;" - " adox %%r9, %%r10;" - " movq %%r10, 8(%2);" - " mulxq 16(%1), %%rbx, %%r13;" - " adox %%r11, %%rbx;" - " mulxq 24(%1), %%r14, %%rdx;" - " adox %%r13, %%r14;" - " mov $0, %%rax;" - " adox %%rdx, %%rax;" - - /* Compute src1[1] * src2 */ - " movq 8(%0), %%rdx;" - " mulxq 0(%1), %%r8, %%r9;" - " xor %%r10d, %%r10d;" - " adcxq 8(%2), %%r8;" - " movq %%r8, 8(%2);" - " mulxq 8(%1), %%r10, %%r11;" - " adox %%r9, %%r10;" - " adcx %%rbx, %%r10;" - " movq %%r10, 16(%2);" - " mulxq 16(%1), %%rbx, %%r13;" - " adox %%r11, %%rbx;" - " adcx %%r14, %%rbx;" - " mov $0, %%r8;" - " mulxq 24(%1), %%r14, %%rdx;" - " adox %%r13, %%r14;" - " adcx %%rax, %%r14;" - " mov $0, %%rax;" - " adox %%rdx, %%rax;" - " adcx %%r8, %%rax;" - - /* Compute src1[2] * src2 */ - " movq 16(%0), %%rdx;" - " mulxq 0(%1), %%r8, %%r9;" - " xor %%r10d, %%r10d;" - " adcxq 16(%2), %%r8;" - " movq %%r8, 16(%2);" - " mulxq 8(%1), %%r10, %%r11;" - " adox %%r9, %%r10;" - " adcx %%rbx, %%r10;" - " movq %%r10, 24(%2);" - " mulxq 16(%1), %%rbx, %%r13;" - " adox %%r11, %%rbx;" - " adcx %%r14, %%rbx;" - " mov $0, %%r8;" - " mulxq 24(%1), %%r14, %%rdx;" - " adox %%r13, %%r14;" - " adcx %%rax, %%r14;" - " mov $0, %%rax;" - " adox %%rdx, %%rax;" - " adcx %%r8, %%rax;" - - /* Compute src1[3] * src2 */ - " movq 24(%0), %%rdx;" - " mulxq 0(%1), %%r8, %%r9;" - " xor %%r10d, %%r10d;" - " adcxq 24(%2), %%r8;" - " movq %%r8, 24(%2);" - " mulxq 8(%1), %%r10, %%r11;" - " adox %%r9, %%r10;" - " adcx %%rbx, %%r10;" - " movq %%r10, 32(%2);" - " mulxq 16(%1), %%rbx, %%r13;" - " adox %%r11, %%rbx;" - " adcx %%r14, %%rbx;" - " movq %%rbx, 40(%2);" - " mov $0, %%r8;" - " mulxq 24(%1), %%r14, %%rdx;" - " adox %%r13, %%r14;" - " adcx %%rax, %%r14;" - " movq %%r14, 48(%2);" - " mov $0, %%rax;" - " adox %%rdx, %%rax;" - " adcx %%r8, %%rax;" - " movq %%rax, 56(%2);" - - /* Compute the raw multiplication tmp[1] <- f1[1] * f2[1] */ - - /* Compute src1[0] * src2 */ - " movq 32(%0), %%rdx;" - " mulxq 32(%1), %%r8, %%r9;" - " xor %%r10d, %%r10d;" - " movq %%r8, 64(%2);" - " mulxq 40(%1), %%r10, %%r11;" - " adox %%r9, %%r10;" - " movq %%r10, 72(%2);" - " mulxq 48(%1), %%rbx, %%r13;" - " adox %%r11, %%rbx;" - " mulxq 56(%1), %%r14, %%rdx;" - " adox %%r13, %%r14;" - " mov $0, %%rax;" - " adox %%rdx, %%rax;" - - /* Compute src1[1] * src2 */ - " movq 40(%0), %%rdx;" - " mulxq 32(%1), %%r8, %%r9;" - " xor %%r10d, %%r10d;" - " adcxq 72(%2), %%r8;" - " movq %%r8, 72(%2);" - " mulxq 40(%1), %%r10, %%r11;" - " adox %%r9, %%r10;" - " adcx %%rbx, %%r10;" - " movq %%r10, 80(%2);" - " mulxq 48(%1), %%rbx, %%r13;" - " adox %%r11, %%rbx;" - " adcx %%r14, %%rbx;" - " mov $0, %%r8;" - " mulxq 56(%1), %%r14, %%rdx;" - " adox %%r13, %%r14;" - " adcx %%rax, %%r14;" - " mov $0, %%rax;" - " adox %%rdx, %%rax;" - " adcx %%r8, %%rax;" - - /* Compute src1[2] * src2 */ - " movq 48(%0), %%rdx;" - " mulxq 32(%1), %%r8, %%r9;" - " xor %%r10d, %%r10d;" - " adcxq 80(%2), %%r8;" - " movq %%r8, 80(%2);" - " mulxq 40(%1), %%r10, %%r11;" - " adox %%r9, %%r10;" - " adcx %%rbx, %%r10;" - " movq %%r10, 88(%2);" - " mulxq 48(%1), %%rbx, %%r13;" - " adox %%r11, %%rbx;" - " adcx %%r14, %%rbx;" - " mov $0, %%r8;" - " mulxq 56(%1), %%r14, %%rdx;" - " adox %%r13, %%r14;" - " adcx %%rax, %%r14;" - " mov $0, %%rax;" - " adox %%rdx, %%rax;" - " adcx %%r8, %%rax;" - - /* Compute src1[3] * src2 */ - " movq 56(%0), %%rdx;" - " mulxq 32(%1), %%r8, %%r9;" - " xor %%r10d, %%r10d;" - " adcxq 88(%2), %%r8;" - " movq %%r8, 88(%2);" - " mulxq 40(%1), %%r10, %%r11;" - " adox %%r9, %%r10;" - " adcx %%rbx, %%r10;" - " movq %%r10, 96(%2);" - " mulxq 48(%1), %%rbx, %%r13;" - " adox %%r11, %%rbx;" - " adcx %%r14, %%rbx;" - " movq %%rbx, 104(%2);" - " mov $0, %%r8;" - " mulxq 56(%1), %%r14, %%rdx;" - " adox %%r13, %%r14;" - " adcx %%rax, %%r14;" - " movq %%r14, 112(%2);" - " mov $0, %%rax;" - " adox %%rdx, %%rax;" - " adcx %%r8, %%rax;" - " movq %%rax, 120(%2);" - - /* Line up pointers */ - " mov %2, %0;" - " mov %3, %2;" - - /* Wrap the results back into the field */ - - /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ - " mov $38, %%rdx;" - " mulxq 32(%0), %%r8, %%r13;" - " xor %k1, %k1;" - " adoxq 0(%0), %%r8;" - " mulxq 40(%0), %%r9, %%rbx;" - " adcx %%r13, %%r9;" - " adoxq 8(%0), %%r9;" - " mulxq 48(%0), %%r10, %%r13;" - " adcx %%rbx, %%r10;" - " adoxq 16(%0), %%r10;" - " mulxq 56(%0), %%r11, %%rax;" - " adcx %%r13, %%r11;" - " adoxq 24(%0), %%r11;" - " adcx %1, %%rax;" - " adox %1, %%rax;" - " imul %%rdx, %%rax;" - - /* Step 2: Fold the carry back into dst */ - " add %%rax, %%r8;" - " adcx %1, %%r9;" - " movq %%r9, 8(%2);" - " adcx %1, %%r10;" - " movq %%r10, 16(%2);" - " adcx %1, %%r11;" - " movq %%r11, 24(%2);" - - /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ - " mov $0, %%rax;" - " cmovc %%rdx, %%rax;" - " add %%rax, %%r8;" - " movq %%r8, 0(%2);" - - /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ - " mov $38, %%rdx;" - " mulxq 96(%0), %%r8, %%r13;" - " xor %k1, %k1;" - " adoxq 64(%0), %%r8;" - " mulxq 104(%0), %%r9, %%rbx;" - " adcx %%r13, %%r9;" - " adoxq 72(%0), %%r9;" - " mulxq 112(%0), %%r10, %%r13;" - " adcx %%rbx, %%r10;" - " adoxq 80(%0), %%r10;" - " mulxq 120(%0), %%r11, %%rax;" - " adcx %%r13, %%r11;" - " adoxq 88(%0), %%r11;" - " adcx %1, %%rax;" - " adox %1, %%rax;" - " imul %%rdx, %%rax;" - - /* Step 2: Fold the carry back into dst */ - " add %%rax, %%r8;" - " adcx %1, %%r9;" - " movq %%r9, 40(%2);" - " adcx %1, %%r10;" - " movq %%r10, 48(%2);" - " adcx %1, %%r11;" - " movq %%r11, 56(%2);" - - /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ - " mov $0, %%rax;" - " cmovc %%rdx, %%rax;" - " add %%rax, %%r8;" - " movq %%r8, 32(%2);" - : "+&r"(f1), "+&r"(f2), "+&r"(tmp) - : "r"(out) - : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", - "%r14", "memory", "cc"); -} - -/* Computes the field multiplication of four-element f1 with value in f2 - * Requires f2 to be smaller than 2^17 */ -static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2) -{ - register u64 f2_r asm("rdx") = f2; - - asm volatile( - /* Compute the raw multiplication of f1*f2 */ - " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */ - " mulxq 8(%2), %%r9, %%rbx;" /* f1[1]*f2 */ - " add %%rcx, %%r9;" - " mov $0, %%rcx;" - " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */ - " adcx %%rbx, %%r10;" - " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */ - " adcx %%r13, %%r11;" - " adcx %%rcx, %%rax;" - - /* Wrap the result back into the field */ - - /* Step 1: Compute carry*38 */ - " mov $38, %%rdx;" - " imul %%rdx, %%rax;" - - /* Step 2: Fold the carry back into dst */ - " add %%rax, %%r8;" - " adcx %%rcx, %%r9;" - " movq %%r9, 8(%1);" - " adcx %%rcx, %%r10;" - " movq %%r10, 16(%1);" - " adcx %%rcx, %%r11;" - " movq %%r11, 24(%1);" - - /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ - " mov $0, %%rax;" - " cmovc %%rdx, %%rax;" - " add %%rax, %%r8;" - " movq %%r8, 0(%1);" - : "+&r"(f2_r) - : "r"(out), "r"(f1) - : "%rax", "%rbx", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r13", - "memory", "cc"); -} - -/* Computes p1 <- bit ? p2 : p1 in constant time */ -static inline void cswap2(u64 bit, const u64 *p1, const u64 *p2) -{ - asm volatile( - /* Transfer bit into CF flag */ - " add $18446744073709551615, %0;" - - /* cswap p1[0], p2[0] */ - " movq 0(%1), %%r8;" - " movq 0(%2), %%r9;" - " mov %%r8, %%r10;" - " cmovc %%r9, %%r8;" - " cmovc %%r10, %%r9;" - " movq %%r8, 0(%1);" - " movq %%r9, 0(%2);" - - /* cswap p1[1], p2[1] */ - " movq 8(%1), %%r8;" - " movq 8(%2), %%r9;" - " mov %%r8, %%r10;" - " cmovc %%r9, %%r8;" - " cmovc %%r10, %%r9;" - " movq %%r8, 8(%1);" - " movq %%r9, 8(%2);" - - /* cswap p1[2], p2[2] */ - " movq 16(%1), %%r8;" - " movq 16(%2), %%r9;" - " mov %%r8, %%r10;" - " cmovc %%r9, %%r8;" - " cmovc %%r10, %%r9;" - " movq %%r8, 16(%1);" - " movq %%r9, 16(%2);" - - /* cswap p1[3], p2[3] */ - " movq 24(%1), %%r8;" - " movq 24(%2), %%r9;" - " mov %%r8, %%r10;" - " cmovc %%r9, %%r8;" - " cmovc %%r10, %%r9;" - " movq %%r8, 24(%1);" - " movq %%r9, 24(%2);" - - /* cswap p1[4], p2[4] */ - " movq 32(%1), %%r8;" - " movq 32(%2), %%r9;" - " mov %%r8, %%r10;" - " cmovc %%r9, %%r8;" - " cmovc %%r10, %%r9;" - " movq %%r8, 32(%1);" - " movq %%r9, 32(%2);" - - /* cswap p1[5], p2[5] */ - " movq 40(%1), %%r8;" - " movq 40(%2), %%r9;" - " mov %%r8, %%r10;" - " cmovc %%r9, %%r8;" - " cmovc %%r10, %%r9;" - " movq %%r8, 40(%1);" - " movq %%r9, 40(%2);" - - /* cswap p1[6], p2[6] */ - " movq 48(%1), %%r8;" - " movq 48(%2), %%r9;" - " mov %%r8, %%r10;" - " cmovc %%r9, %%r8;" - " cmovc %%r10, %%r9;" - " movq %%r8, 48(%1);" - " movq %%r9, 48(%2);" - - /* cswap p1[7], p2[7] */ - " movq 56(%1), %%r8;" - " movq 56(%2), %%r9;" - " mov %%r8, %%r10;" - " cmovc %%r9, %%r8;" - " cmovc %%r10, %%r9;" - " movq %%r8, 56(%1);" - " movq %%r9, 56(%2);" - : "+&r"(bit) - : "r"(p1), "r"(p2) - : "%r8", "%r9", "%r10", "memory", "cc"); -} - -/* Computes the square of a field element: out <- f * f - * Uses the 8-element buffer tmp for intermediate results */ -static inline void fsqr(u64 *out, const u64 *f, u64 *tmp) -{ - asm volatile( - /* Compute the raw multiplication: tmp <- f * f */ - - /* Step 1: Compute all partial products */ - " movq 0(%0), %%rdx;" /* f[0] */ - " mulxq 8(%0), %%r8, %%r14;" - " xor %%r15d, %%r15d;" /* f[1]*f[0] */ - " mulxq 16(%0), %%r9, %%r10;" - " adcx %%r14, %%r9;" /* f[2]*f[0] */ - " mulxq 24(%0), %%rax, %%rcx;" - " adcx %%rax, %%r10;" /* f[3]*f[0] */ - " movq 24(%0), %%rdx;" /* f[3] */ - " mulxq 8(%0), %%r11, %%rbx;" - " adcx %%rcx, %%r11;" /* f[1]*f[3] */ - " mulxq 16(%0), %%rax, %%r13;" - " adcx %%rax, %%rbx;" /* f[2]*f[3] */ - " movq 8(%0), %%rdx;" - " adcx %%r15, %%r13;" /* f1 */ - " mulxq 16(%0), %%rax, %%rcx;" - " mov $0, %%r14;" /* f[2]*f[1] */ - - /* Step 2: Compute two parallel carry chains */ - " xor %%r15d, %%r15d;" - " adox %%rax, %%r10;" - " adcx %%r8, %%r8;" - " adox %%rcx, %%r11;" - " adcx %%r9, %%r9;" - " adox %%r15, %%rbx;" - " adcx %%r10, %%r10;" - " adox %%r15, %%r13;" - " adcx %%r11, %%r11;" - " adox %%r15, %%r14;" - " adcx %%rbx, %%rbx;" - " adcx %%r13, %%r13;" - " adcx %%r14, %%r14;" - - /* Step 3: Compute intermediate squares */ - " movq 0(%0), %%rdx;" - " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ - " movq %%rax, 0(%1);" - " add %%rcx, %%r8;" - " movq %%r8, 8(%1);" - " movq 8(%0), %%rdx;" - " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ - " adcx %%rax, %%r9;" - " movq %%r9, 16(%1);" - " adcx %%rcx, %%r10;" - " movq %%r10, 24(%1);" - " movq 16(%0), %%rdx;" - " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ - " adcx %%rax, %%r11;" - " movq %%r11, 32(%1);" - " adcx %%rcx, %%rbx;" - " movq %%rbx, 40(%1);" - " movq 24(%0), %%rdx;" - " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ - " adcx %%rax, %%r13;" - " movq %%r13, 48(%1);" - " adcx %%rcx, %%r14;" - " movq %%r14, 56(%1);" - - /* Line up pointers */ - " mov %1, %0;" - " mov %2, %1;" - - /* Wrap the result back into the field */ - - /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ - " mov $38, %%rdx;" - " mulxq 32(%0), %%r8, %%r13;" - " xor %%ecx, %%ecx;" - " adoxq 0(%0), %%r8;" - " mulxq 40(%0), %%r9, %%rbx;" - " adcx %%r13, %%r9;" - " adoxq 8(%0), %%r9;" - " mulxq 48(%0), %%r10, %%r13;" - " adcx %%rbx, %%r10;" - " adoxq 16(%0), %%r10;" - " mulxq 56(%0), %%r11, %%rax;" - " adcx %%r13, %%r11;" - " adoxq 24(%0), %%r11;" - " adcx %%rcx, %%rax;" - " adox %%rcx, %%rax;" - " imul %%rdx, %%rax;" - - /* Step 2: Fold the carry back into dst */ - " add %%rax, %%r8;" - " adcx %%rcx, %%r9;" - " movq %%r9, 8(%1);" - " adcx %%rcx, %%r10;" - " movq %%r10, 16(%1);" - " adcx %%rcx, %%r11;" - " movq %%r11, 24(%1);" - - /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ - " mov $0, %%rax;" - " cmovc %%rdx, %%rax;" - " add %%rax, %%r8;" - " movq %%r8, 0(%1);" - : "+&r"(f), "+&r"(tmp) - : "r"(out) - : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", - "%r13", "%r14", "%r15", "memory", "cc"); -} - -/* Computes two field squarings: - * out[0] <- f[0] * f[0] - * out[1] <- f[1] * f[1] - * Uses the 16-element buffer tmp for intermediate results */ -static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) -{ - asm volatile( - /* Step 1: Compute all partial products */ - " movq 0(%0), %%rdx;" /* f[0] */ - " mulxq 8(%0), %%r8, %%r14;" - " xor %%r15d, %%r15d;" /* f[1]*f[0] */ - " mulxq 16(%0), %%r9, %%r10;" - " adcx %%r14, %%r9;" /* f[2]*f[0] */ - " mulxq 24(%0), %%rax, %%rcx;" - " adcx %%rax, %%r10;" /* f[3]*f[0] */ - " movq 24(%0), %%rdx;" /* f[3] */ - " mulxq 8(%0), %%r11, %%rbx;" - " adcx %%rcx, %%r11;" /* f[1]*f[3] */ - " mulxq 16(%0), %%rax, %%r13;" - " adcx %%rax, %%rbx;" /* f[2]*f[3] */ - " movq 8(%0), %%rdx;" - " adcx %%r15, %%r13;" /* f1 */ - " mulxq 16(%0), %%rax, %%rcx;" - " mov $0, %%r14;" /* f[2]*f[1] */ - - /* Step 2: Compute two parallel carry chains */ - " xor %%r15d, %%r15d;" - " adox %%rax, %%r10;" - " adcx %%r8, %%r8;" - " adox %%rcx, %%r11;" - " adcx %%r9, %%r9;" - " adox %%r15, %%rbx;" - " adcx %%r10, %%r10;" - " adox %%r15, %%r13;" - " adcx %%r11, %%r11;" - " adox %%r15, %%r14;" - " adcx %%rbx, %%rbx;" - " adcx %%r13, %%r13;" - " adcx %%r14, %%r14;" - - /* Step 3: Compute intermediate squares */ - " movq 0(%0), %%rdx;" - " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ - " movq %%rax, 0(%1);" - " add %%rcx, %%r8;" - " movq %%r8, 8(%1);" - " movq 8(%0), %%rdx;" - " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ - " adcx %%rax, %%r9;" - " movq %%r9, 16(%1);" - " adcx %%rcx, %%r10;" - " movq %%r10, 24(%1);" - " movq 16(%0), %%rdx;" - " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ - " adcx %%rax, %%r11;" - " movq %%r11, 32(%1);" - " adcx %%rcx, %%rbx;" - " movq %%rbx, 40(%1);" - " movq 24(%0), %%rdx;" - " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ - " adcx %%rax, %%r13;" - " movq %%r13, 48(%1);" - " adcx %%rcx, %%r14;" - " movq %%r14, 56(%1);" - - /* Step 1: Compute all partial products */ - " movq 32(%0), %%rdx;" /* f[0] */ - " mulxq 40(%0), %%r8, %%r14;" - " xor %%r15d, %%r15d;" /* f[1]*f[0] */ - " mulxq 48(%0), %%r9, %%r10;" - " adcx %%r14, %%r9;" /* f[2]*f[0] */ - " mulxq 56(%0), %%rax, %%rcx;" - " adcx %%rax, %%r10;" /* f[3]*f[0] */ - " movq 56(%0), %%rdx;" /* f[3] */ - " mulxq 40(%0), %%r11, %%rbx;" - " adcx %%rcx, %%r11;" /* f[1]*f[3] */ - " mulxq 48(%0), %%rax, %%r13;" - " adcx %%rax, %%rbx;" /* f[2]*f[3] */ - " movq 40(%0), %%rdx;" - " adcx %%r15, %%r13;" /* f1 */ - " mulxq 48(%0), %%rax, %%rcx;" - " mov $0, %%r14;" /* f[2]*f[1] */ - - /* Step 2: Compute two parallel carry chains */ - " xor %%r15d, %%r15d;" - " adox %%rax, %%r10;" - " adcx %%r8, %%r8;" - " adox %%rcx, %%r11;" - " adcx %%r9, %%r9;" - " adox %%r15, %%rbx;" - " adcx %%r10, %%r10;" - " adox %%r15, %%r13;" - " adcx %%r11, %%r11;" - " adox %%r15, %%r14;" - " adcx %%rbx, %%rbx;" - " adcx %%r13, %%r13;" - " adcx %%r14, %%r14;" - - /* Step 3: Compute intermediate squares */ - " movq 32(%0), %%rdx;" - " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ - " movq %%rax, 64(%1);" - " add %%rcx, %%r8;" - " movq %%r8, 72(%1);" - " movq 40(%0), %%rdx;" - " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ - " adcx %%rax, %%r9;" - " movq %%r9, 80(%1);" - " adcx %%rcx, %%r10;" - " movq %%r10, 88(%1);" - " movq 48(%0), %%rdx;" - " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ - " adcx %%rax, %%r11;" - " movq %%r11, 96(%1);" - " adcx %%rcx, %%rbx;" - " movq %%rbx, 104(%1);" - " movq 56(%0), %%rdx;" - " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ - " adcx %%rax, %%r13;" - " movq %%r13, 112(%1);" - " adcx %%rcx, %%r14;" - " movq %%r14, 120(%1);" - - /* Line up pointers */ - " mov %1, %0;" - " mov %2, %1;" - - /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ - " mov $38, %%rdx;" - " mulxq 32(%0), %%r8, %%r13;" - " xor %%ecx, %%ecx;" - " adoxq 0(%0), %%r8;" - " mulxq 40(%0), %%r9, %%rbx;" - " adcx %%r13, %%r9;" - " adoxq 8(%0), %%r9;" - " mulxq 48(%0), %%r10, %%r13;" - " adcx %%rbx, %%r10;" - " adoxq 16(%0), %%r10;" - " mulxq 56(%0), %%r11, %%rax;" - " adcx %%r13, %%r11;" - " adoxq 24(%0), %%r11;" - " adcx %%rcx, %%rax;" - " adox %%rcx, %%rax;" - " imul %%rdx, %%rax;" - - /* Step 2: Fold the carry back into dst */ - " add %%rax, %%r8;" - " adcx %%rcx, %%r9;" - " movq %%r9, 8(%1);" - " adcx %%rcx, %%r10;" - " movq %%r10, 16(%1);" - " adcx %%rcx, %%r11;" - " movq %%r11, 24(%1);" - - /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ - " mov $0, %%rax;" - " cmovc %%rdx, %%rax;" - " add %%rax, %%r8;" - " movq %%r8, 0(%1);" - - /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ - " mov $38, %%rdx;" - " mulxq 96(%0), %%r8, %%r13;" - " xor %%ecx, %%ecx;" - " adoxq 64(%0), %%r8;" - " mulxq 104(%0), %%r9, %%rbx;" - " adcx %%r13, %%r9;" - " adoxq 72(%0), %%r9;" - " mulxq 112(%0), %%r10, %%r13;" - " adcx %%rbx, %%r10;" - " adoxq 80(%0), %%r10;" - " mulxq 120(%0), %%r11, %%rax;" - " adcx %%r13, %%r11;" - " adoxq 88(%0), %%r11;" - " adcx %%rcx, %%rax;" - " adox %%rcx, %%rax;" - " imul %%rdx, %%rax;" - - /* Step 2: Fold the carry back into dst */ - " add %%rax, %%r8;" - " adcx %%rcx, %%r9;" - " movq %%r9, 40(%1);" - " adcx %%rcx, %%r10;" - " movq %%r10, 48(%1);" - " adcx %%rcx, %%r11;" - " movq %%r11, 56(%1);" - - /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ - " mov $0, %%rax;" - " cmovc %%rdx, %%rax;" - " add %%rax, %%r8;" - " movq %%r8, 32(%1);" - : "+&r"(f), "+&r"(tmp) - : "r"(out) - : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", - "%r13", "%r14", "%r15", "memory", "cc"); -} - -static void point_add_and_double(u64 *q, u64 *p01_tmp1, u64 *tmp2) -{ - u64 *nq = p01_tmp1; - u64 *nq_p1 = p01_tmp1 + (u32)8U; - u64 *tmp1 = p01_tmp1 + (u32)16U; - u64 *x1 = q; - u64 *x2 = nq; - u64 *z2 = nq + (u32)4U; - u64 *z3 = nq_p1 + (u32)4U; - u64 *a = tmp1; - u64 *b = tmp1 + (u32)4U; - u64 *ab = tmp1; - u64 *dc = tmp1 + (u32)8U; - u64 *x3; - u64 *z31; - u64 *d0; - u64 *c0; - u64 *a1; - u64 *b1; - u64 *d; - u64 *c; - u64 *ab1; - u64 *dc1; - fadd(a, x2, z2); - fsub(b, x2, z2); - x3 = nq_p1; - z31 = nq_p1 + (u32)4U; - d0 = dc; - c0 = dc + (u32)4U; - fadd(c0, x3, z31); - fsub(d0, x3, z31); - fmul2(dc, dc, ab, tmp2); - fadd(x3, d0, c0); - fsub(z31, d0, c0); - a1 = tmp1; - b1 = tmp1 + (u32)4U; - d = tmp1 + (u32)8U; - c = tmp1 + (u32)12U; - ab1 = tmp1; - dc1 = tmp1 + (u32)8U; - fsqr2(dc1, ab1, tmp2); - fsqr2(nq_p1, nq_p1, tmp2); - a1[0U] = c[0U]; - a1[1U] = c[1U]; - a1[2U] = c[2U]; - a1[3U] = c[3U]; - fsub(c, d, c); - fmul_scalar(b1, c, (u64)121665U); - fadd(b1, b1, d); - fmul2(nq, dc1, ab1, tmp2); - fmul(z3, z3, x1, tmp2); -} - -static void point_double(u64 *nq, u64 *tmp1, u64 *tmp2) -{ - u64 *x2 = nq; - u64 *z2 = nq + (u32)4U; - u64 *a = tmp1; - u64 *b = tmp1 + (u32)4U; - u64 *d = tmp1 + (u32)8U; - u64 *c = tmp1 + (u32)12U; - u64 *ab = tmp1; - u64 *dc = tmp1 + (u32)8U; - fadd(a, x2, z2); - fsub(b, x2, z2); - fsqr2(dc, ab, tmp2); - a[0U] = c[0U]; - a[1U] = c[1U]; - a[2U] = c[2U]; - a[3U] = c[3U]; - fsub(c, d, c); - fmul_scalar(b, c, (u64)121665U); - fadd(b, b, d); - fmul2(nq, dc, ab, tmp2); -} - -static void montgomery_ladder(u64 *out, const u8 *key, u64 *init1) -{ - u64 tmp2[16U] = { 0U }; - u64 p01_tmp1_swap[33U] = { 0U }; - u64 *p0 = p01_tmp1_swap; - u64 *p01 = p01_tmp1_swap; - u64 *p03 = p01; - u64 *p11 = p01 + (u32)8U; - u64 *x0; - u64 *z0; - u64 *p01_tmp1; - u64 *p01_tmp11; - u64 *nq10; - u64 *nq_p11; - u64 *swap1; - u64 sw0; - u64 *nq1; - u64 *tmp1; - memcpy(p11, init1, (u32)8U * sizeof(init1[0U])); - x0 = p03; - z0 = p03 + (u32)4U; - x0[0U] = (u64)1U; - x0[1U] = (u64)0U; - x0[2U] = (u64)0U; - x0[3U] = (u64)0U; - z0[0U] = (u64)0U; - z0[1U] = (u64)0U; - z0[2U] = (u64)0U; - z0[3U] = (u64)0U; - p01_tmp1 = p01_tmp1_swap; - p01_tmp11 = p01_tmp1_swap; - nq10 = p01_tmp1_swap; - nq_p11 = p01_tmp1_swap + (u32)8U; - swap1 = p01_tmp1_swap + (u32)32U; - cswap2((u64)1U, nq10, nq_p11); - point_add_and_double(init1, p01_tmp11, tmp2); - swap1[0U] = (u64)1U; - { - u32 i; - for (i = (u32)0U; i < (u32)251U; i = i + (u32)1U) { - u64 *p01_tmp12 = p01_tmp1_swap; - u64 *swap2 = p01_tmp1_swap + (u32)32U; - u64 *nq2 = p01_tmp12; - u64 *nq_p12 = p01_tmp12 + (u32)8U; - u64 bit = (u64)(key[((u32)253U - i) / (u32)8U] >> ((u32)253U - i) % (u32)8U & (u8)1U); - u64 sw = swap2[0U] ^ bit; - cswap2(sw, nq2, nq_p12); - point_add_and_double(init1, p01_tmp12, tmp2); - swap2[0U] = bit; - } - } - sw0 = swap1[0U]; - cswap2(sw0, nq10, nq_p11); - nq1 = p01_tmp1; - tmp1 = p01_tmp1 + (u32)16U; - point_double(nq1, tmp1, tmp2); - point_double(nq1, tmp1, tmp2); - point_double(nq1, tmp1, tmp2); - memcpy(out, p0, (u32)8U * sizeof(p0[0U])); - - memzero_explicit(tmp2, sizeof(tmp2)); - memzero_explicit(p01_tmp1_swap, sizeof(p01_tmp1_swap)); -} - -static void fsquare_times(u64 *o, const u64 *inp, u64 *tmp, u32 n1) -{ - u32 i; - fsqr(o, inp, tmp); - for (i = (u32)0U; i < n1 - (u32)1U; i = i + (u32)1U) - fsqr(o, o, tmp); -} - -static void finv(u64 *o, const u64 *i, u64 *tmp) -{ - u64 t1[16U] = { 0U }; - u64 *a0 = t1; - u64 *b = t1 + (u32)4U; - u64 *c = t1 + (u32)8U; - u64 *t00 = t1 + (u32)12U; - u64 *tmp1 = tmp; - u64 *a; - u64 *t0; - fsquare_times(a0, i, tmp1, (u32)1U); - fsquare_times(t00, a0, tmp1, (u32)2U); - fmul(b, t00, i, tmp); - fmul(a0, b, a0, tmp); - fsquare_times(t00, a0, tmp1, (u32)1U); - fmul(b, t00, b, tmp); - fsquare_times(t00, b, tmp1, (u32)5U); - fmul(b, t00, b, tmp); - fsquare_times(t00, b, tmp1, (u32)10U); - fmul(c, t00, b, tmp); - fsquare_times(t00, c, tmp1, (u32)20U); - fmul(t00, t00, c, tmp); - fsquare_times(t00, t00, tmp1, (u32)10U); - fmul(b, t00, b, tmp); - fsquare_times(t00, b, tmp1, (u32)50U); - fmul(c, t00, b, tmp); - fsquare_times(t00, c, tmp1, (u32)100U); - fmul(t00, t00, c, tmp); - fsquare_times(t00, t00, tmp1, (u32)50U); - fmul(t00, t00, b, tmp); - fsquare_times(t00, t00, tmp1, (u32)5U); - a = t1; - t0 = t1 + (u32)12U; - fmul(o, t0, a, tmp); -} - -static void store_felem(u64 *b, u64 *f) -{ - u64 f30 = f[3U]; - u64 top_bit0 = f30 >> (u32)63U; - u64 f31; - u64 top_bit; - u64 f0; - u64 f1; - u64 f2; - u64 f3; - u64 m0; - u64 m1; - u64 m2; - u64 m3; - u64 mask; - u64 f0_; - u64 f1_; - u64 f2_; - u64 f3_; - u64 o0; - u64 o1; - u64 o2; - u64 o3; - f[3U] = f30 & (u64)0x7fffffffffffffffU; - add_scalar(f, f, (u64)19U * top_bit0); - f31 = f[3U]; - top_bit = f31 >> (u32)63U; - f[3U] = f31 & (u64)0x7fffffffffffffffU; - add_scalar(f, f, (u64)19U * top_bit); - f0 = f[0U]; - f1 = f[1U]; - f2 = f[2U]; - f3 = f[3U]; - m0 = gte_mask(f0, (u64)0xffffffffffffffedU); - m1 = eq_mask(f1, (u64)0xffffffffffffffffU); - m2 = eq_mask(f2, (u64)0xffffffffffffffffU); - m3 = eq_mask(f3, (u64)0x7fffffffffffffffU); - mask = ((m0 & m1) & m2) & m3; - f0_ = f0 - (mask & (u64)0xffffffffffffffedU); - f1_ = f1 - (mask & (u64)0xffffffffffffffffU); - f2_ = f2 - (mask & (u64)0xffffffffffffffffU); - f3_ = f3 - (mask & (u64)0x7fffffffffffffffU); - o0 = f0_; - o1 = f1_; - o2 = f2_; - o3 = f3_; - b[0U] = o0; - b[1U] = o1; - b[2U] = o2; - b[3U] = o3; -} - -static void encode_point(u8 *o, const u64 *i) -{ - const u64 *x = i; - const u64 *z = i + (u32)4U; - u64 tmp[4U] = { 0U }; - u64 tmp_w[16U] = { 0U }; - finv(tmp, z, tmp_w); - fmul(tmp, tmp, x, tmp_w); - store_felem((u64 *)o, tmp); -} - -static void curve25519_ever64(u8 *out, const u8 *priv, const u8 *pub) -{ - u64 init1[8U] = { 0U }; - u64 tmp[4U] = { 0U }; - u64 tmp3; - u64 *x; - u64 *z; - { - u32 i; - for (i = (u32)0U; i < (u32)4U; i = i + (u32)1U) { - u64 *os = tmp; - const u8 *bj = pub + i * (u32)8U; - u64 u = *(u64 *)bj; - u64 r = u; - u64 x0 = r; - os[i] = x0; - } - } - tmp3 = tmp[3U]; - tmp[3U] = tmp3 & (u64)0x7fffffffffffffffU; - x = init1; - z = init1 + (u32)4U; - z[0U] = (u64)1U; - z[1U] = (u64)0U; - z[2U] = (u64)0U; - z[3U] = (u64)0U; - x[0U] = tmp[0U]; - x[1U] = tmp[1U]; - x[2U] = tmp[2U]; - x[3U] = tmp[3U]; - montgomery_ladder(init1, priv, init1); - encode_point(out, init1); -} - -/* The below constants were generated using this sage script: - * - * #!/usr/bin/env sage - * import sys - * from sage.all import * - * def limbs(n): - * n = int(n) - * l = ((n >> 0) % 2^64, (n >> 64) % 2^64, (n >> 128) % 2^64, (n >> 192) % 2^64) - * return "0x%016xULL, 0x%016xULL, 0x%016xULL, 0x%016xULL" % l - * ec = EllipticCurve(GF(2^255 - 19), [0, 486662, 0, 1, 0]) - * p_minus_s = (ec.lift_x(9) - ec.lift_x(1))[0] - * print("static const u64 p_minus_s[] = { %s };\n" % limbs(p_minus_s)) - * print("static const u64 table_ladder[] = {") - * p = ec.lift_x(9) - * for i in range(252): - * l = (p[0] + p[2]) / (p[0] - p[2]) - * print(("\t%s" + ("," if i != 251 else "")) % limbs(l)) - * p = p * 2 - * print("};") - * - */ - -static const u64 p_minus_s[] = { 0x816b1e0137d48290ULL, 0x440f6a51eb4d1207ULL, 0x52385f46dca2b71dULL, 0x215132111d8354cbULL }; - -static const u64 table_ladder[] = { - 0xfffffffffffffff3ULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x5fffffffffffffffULL, - 0x6b8220f416aafe96ULL, 0x82ebeb2b4f566a34ULL, 0xd5a9a5b075a5950fULL, 0x5142b2cf4b2488f4ULL, - 0x6aaebc750069680cULL, 0x89cf7820a0f99c41ULL, 0x2a58d9183b56d0f4ULL, 0x4b5aca80e36011a4ULL, - 0x329132348c29745dULL, 0xf4a2e616e1642fd7ULL, 0x1e45bb03ff67bc34ULL, 0x306912d0f42a9b4aULL, - 0xff886507e6af7154ULL, 0x04f50e13dfeec82fULL, 0xaa512fe82abab5ceULL, 0x174e251a68d5f222ULL, - 0xcf96700d82028898ULL, 0x1743e3370a2c02c5ULL, 0x379eec98b4e86eaaULL, 0x0c59888a51e0482eULL, - 0xfbcbf1d699b5d189ULL, 0xacaef0d58e9fdc84ULL, 0xc1c20d06231f7614ULL, 0x2938218da274f972ULL, - 0xf6af49beff1d7f18ULL, 0xcc541c22387ac9c2ULL, 0x96fcc9ef4015c56bULL, 0x69c1627c690913a9ULL, - 0x7a86fd2f4733db0eULL, 0xfdb8c4f29e087de9ULL, 0x095e4b1a8ea2a229ULL, 0x1ad7a7c829b37a79ULL, - 0x342d89cad17ea0c0ULL, 0x67bedda6cced2051ULL, 0x19ca31bf2bb42f74ULL, 0x3df7b4c84980acbbULL, - 0xa8c6444dc80ad883ULL, 0xb91e440366e3ab85ULL, 0xc215cda00164f6d8ULL, 0x3d867c6ef247e668ULL, - 0xc7dd582bcc3e658cULL, 0xfd2c4748ee0e5528ULL, 0xa0fd9b95cc9f4f71ULL, 0x7529d871b0675ddfULL, - 0xb8f568b42d3cbd78ULL, 0x1233011b91f3da82ULL, 0x2dce6ccd4a7c3b62ULL, 0x75e7fc8e9e498603ULL, - 0x2f4f13f1fcd0b6ecULL, 0xf1a8ca1f29ff7a45ULL, 0xc249c1a72981e29bULL, 0x6ebe0dbb8c83b56aULL, - 0x7114fa8d170bb222ULL, 0x65a2dcd5bf93935fULL, 0xbdc41f68b59c979aULL, 0x2f0eef79a2ce9289ULL, - 0x42ecbf0c083c37ceULL, 0x2930bc09ec496322ULL, 0xf294b0c19cfeac0dULL, 0x3780aa4bedfabb80ULL, - 0x56c17d3e7cead929ULL, 0xe7cb4beb2e5722c5ULL, 0x0ce931732dbfe15aULL, 0x41b883c7621052f8ULL, - 0xdbf75ca0c3d25350ULL, 0x2936be086eb1e351ULL, 0xc936e03cb4a9b212ULL, 0x1d45bf82322225aaULL, - 0xe81ab1036a024cc5ULL, 0xe212201c304c9a72ULL, 0xc5d73fba6832b1fcULL, 0x20ffdb5a4d839581ULL, - 0xa283d367be5d0fadULL, 0x6c2b25ca8b164475ULL, 0x9d4935467caaf22eULL, 0x5166408eee85ff49ULL, - 0x3c67baa2fab4e361ULL, 0xb3e433c67ef35cefULL, 0x5259729241159b1cULL, 0x6a621892d5b0ab33ULL, - 0x20b74a387555cdcbULL, 0x532aa10e1208923fULL, 0xeaa17b7762281dd1ULL, 0x61ab3443f05c44bfULL, - 0x257a6c422324def8ULL, 0x131c6c1017e3cf7fULL, 0x23758739f630a257ULL, 0x295a407a01a78580ULL, - 0xf8c443246d5da8d9ULL, 0x19d775450c52fa5dULL, 0x2afcfc92731bf83dULL, 0x7d10c8e81b2b4700ULL, - 0xc8e0271f70baa20bULL, 0x993748867ca63957ULL, 0x5412efb3cb7ed4bbULL, 0x3196d36173e62975ULL, - 0xde5bcad141c7dffcULL, 0x47cc8cd2b395c848ULL, 0xa34cd942e11af3cbULL, 0x0256dbf2d04ecec2ULL, - 0x875ab7e94b0e667fULL, 0xcad4dd83c0850d10ULL, 0x47f12e8f4e72c79fULL, 0x5f1a87bb8c85b19bULL, - 0x7ae9d0b6437f51b8ULL, 0x12c7ce5518879065ULL, 0x2ade09fe5cf77aeeULL, 0x23a05a2f7d2c5627ULL, - 0x5908e128f17c169aULL, 0xf77498dd8ad0852dULL, 0x74b4c4ceab102f64ULL, 0x183abadd10139845ULL, - 0xb165ba8daa92aaacULL, 0xd5c5ef9599386705ULL, 0xbe2f8f0cf8fc40d1ULL, 0x2701e635ee204514ULL, - 0x629fa80020156514ULL, 0xf223868764a8c1ceULL, 0x5b894fff0b3f060eULL, 0x60d9944cf708a3faULL, - 0xaeea001a1c7a201fULL, 0xebf16a633ee2ce63ULL, 0x6f7709594c7a07e1ULL, 0x79b958150d0208cbULL, - 0x24b55e5301d410e7ULL, 0xe3a34edff3fdc84dULL, 0xd88768e4904032d8ULL, 0x131384427b3aaeecULL, - 0x8405e51286234f14ULL, 0x14dc4739adb4c529ULL, 0xb8a2b5b250634ffdULL, 0x2fe2a94ad8a7ff93ULL, - 0xec5c57efe843faddULL, 0x2843ce40f0bb9918ULL, 0xa4b561d6cf3d6305ULL, 0x743629bde8fb777eULL, - 0x343edd46bbaf738fULL, 0xed981828b101a651ULL, 0xa401760b882c797aULL, 0x1fc223e28dc88730ULL, - 0x48604e91fc0fba0eULL, 0xb637f78f052c6fa4ULL, 0x91ccac3d09e9239cULL, 0x23f7eed4437a687cULL, - 0x5173b1118d9bd800ULL, 0x29d641b63189d4a7ULL, 0xfdbf177988bbc586ULL, 0x2959894fcad81df5ULL, - 0xaebc8ef3b4bbc899ULL, 0x4148995ab26992b9ULL, 0x24e20b0134f92cfbULL, 0x40d158894a05dee8ULL, - 0x46b00b1185af76f6ULL, 0x26bac77873187a79ULL, 0x3dc0bf95ab8fff5fULL, 0x2a608bd8945524d7ULL, - 0x26449588bd446302ULL, 0x7c4bc21c0388439cULL, 0x8e98a4f383bd11b2ULL, 0x26218d7bc9d876b9ULL, - 0xe3081542997c178aULL, 0x3c2d29a86fb6606fULL, 0x5c217736fa279374ULL, 0x7dde05734afeb1faULL, - 0x3bf10e3906d42babULL, 0xe4f7803e1980649cULL, 0xe6053bf89595bf7aULL, 0x394faf38da245530ULL, - 0x7a8efb58896928f4ULL, 0xfbc778e9cc6a113cULL, 0x72670ce330af596fULL, 0x48f222a81d3d6cf7ULL, - 0xf01fce410d72caa7ULL, 0x5a20ecc7213b5595ULL, 0x7bc21165c1fa1483ULL, 0x07f89ae31da8a741ULL, - 0x05d2c2b4c6830ff9ULL, 0xd43e330fc6316293ULL, 0xa5a5590a96d3a904ULL, 0x705edb91a65333b6ULL, - 0x048ee15e0bb9a5f7ULL, 0x3240cfca9e0aaf5dULL, 0x8f4b71ceedc4a40bULL, 0x621c0da3de544a6dULL, - 0x92872836a08c4091ULL, 0xce8375b010c91445ULL, 0x8a72eb524f276394ULL, 0x2667fcfa7ec83635ULL, - 0x7f4c173345e8752aULL, 0x061b47feee7079a5ULL, 0x25dd9afa9f86ff34ULL, 0x3780cef5425dc89cULL, - 0x1a46035a513bb4e9ULL, 0x3e1ef379ac575adaULL, 0xc78c5f1c5fa24b50ULL, 0x321a967634fd9f22ULL, - 0x946707b8826e27faULL, 0x3dca84d64c506fd0ULL, 0xc189218075e91436ULL, 0x6d9284169b3b8484ULL, - 0x3a67e840383f2ddfULL, 0x33eec9a30c4f9b75ULL, 0x3ec7c86fa783ef47ULL, 0x26ec449fbac9fbc4ULL, - 0x5c0f38cba09b9e7dULL, 0x81168cc762a3478cULL, 0x3e23b0d306fc121cULL, 0x5a238aa0a5efdcddULL, - 0x1ba26121c4ea43ffULL, 0x36f8c77f7c8832b5ULL, 0x88fbea0b0adcf99aULL, 0x5ca9938ec25bebf9ULL, - 0xd5436a5e51fccda0ULL, 0x1dbc4797c2cd893bULL, 0x19346a65d3224a08ULL, 0x0f5034e49b9af466ULL, - 0xf23c3967a1e0b96eULL, 0xe58b08fa867a4d88ULL, 0xfb2fabc6a7341679ULL, 0x2a75381eb6026946ULL, - 0xc80a3be4c19420acULL, 0x66b1f6c681f2b6dcULL, 0x7cf7036761e93388ULL, 0x25abbbd8a660a4c4ULL, - 0x91ea12ba14fd5198ULL, 0x684950fc4a3cffa9ULL, 0xf826842130f5ad28ULL, 0x3ea988f75301a441ULL, - 0xc978109a695f8c6fULL, 0x1746eb4a0530c3f3ULL, 0x444d6d77b4459995ULL, 0x75952b8c054e5cc7ULL, - 0xa3703f7915f4d6aaULL, 0x66c346202f2647d8ULL, 0xd01469df811d644bULL, 0x77fea47d81a5d71fULL, - 0xc5e9529ef57ca381ULL, 0x6eeeb4b9ce2f881aULL, 0xb6e91a28e8009bd6ULL, 0x4b80be3e9afc3fecULL, - 0x7e3773c526aed2c5ULL, 0x1b4afcb453c9a49dULL, 0xa920bdd7baffb24dULL, 0x7c54699f122d400eULL, - 0xef46c8e14fa94bc8ULL, 0xe0b074ce2952ed5eULL, 0xbea450e1dbd885d5ULL, 0x61b68649320f712cULL, - 0x8a485f7309ccbdd1ULL, 0xbd06320d7d4d1a2dULL, 0x25232973322dbef4ULL, 0x445dc4758c17f770ULL, - 0xdb0434177cc8933cULL, 0xed6fe82175ea059fULL, 0x1efebefdc053db34ULL, 0x4adbe867c65daf99ULL, - 0x3acd71a2a90609dfULL, 0xe5e991856dd04050ULL, 0x1ec69b688157c23cULL, 0x697427f6885cfe4dULL, - 0xd7be7b9b65e1a851ULL, 0xa03d28d522c536ddULL, 0x28399d658fd2b645ULL, 0x49e5b7e17c2641e1ULL, - 0x6f8c3a98700457a4ULL, 0x5078f0a25ebb6778ULL, 0xd13c3ccbc382960fULL, 0x2e003258a7df84b1ULL, - 0x8ad1f39be6296a1cULL, 0xc1eeaa652a5fbfb2ULL, 0x33ee0673fd26f3cbULL, 0x59256173a69d2cccULL, - 0x41ea07aa4e18fc41ULL, 0xd9fc19527c87a51eULL, 0xbdaacb805831ca6fULL, 0x445b652dc916694fULL, - 0xce92a3a7f2172315ULL, 0x1edc282de11b9964ULL, 0xa1823aafe04c314aULL, 0x790a2d94437cf586ULL, - 0x71c447fb93f6e009ULL, 0x8922a56722845276ULL, 0xbf70903b204f5169ULL, 0x2f7a89891ba319feULL, - 0x02a08eb577e2140cULL, 0xed9a4ed4427bdcf4ULL, 0x5253ec44e4323cd1ULL, 0x3e88363c14e9355bULL, - 0xaa66c14277110b8cULL, 0x1ae0391610a23390ULL, 0x2030bd12c93fc2a2ULL, 0x3ee141579555c7abULL, - 0x9214de3a6d6e7d41ULL, 0x3ccdd88607f17efeULL, 0x674f1288f8e11217ULL, 0x5682250f329f93d0ULL, - 0x6cf00b136d2e396eULL, 0x6e4cf86f1014debfULL, 0x5930b1b5bfcc4e83ULL, 0x047069b48aba16b6ULL, - 0x0d4ce4ab69b20793ULL, 0xb24db91a97d0fb9eULL, 0xcdfa50f54e00d01dULL, 0x221b1085368bddb5ULL, - 0xe7e59468b1e3d8d2ULL, 0x53c56563bd122f93ULL, 0xeee8a903e0663f09ULL, 0x61efa662cbbe3d42ULL, - 0x2cf8ddddde6eab2aULL, 0x9bf80ad51435f231ULL, 0x5deadacec9f04973ULL, 0x29275b5d41d29b27ULL, - 0xcfde0f0895ebf14fULL, 0xb9aab96b054905a7ULL, 0xcae80dd9a1c420fdULL, 0x0a63bf2f1673bbc7ULL, - 0x092f6e11958fbc8cULL, 0x672a81e804822fadULL, 0xcac8351560d52517ULL, 0x6f3f7722c8f192f8ULL, - 0xf8ba90ccc2e894b7ULL, 0x2c7557a438ff9f0dULL, 0x894d1d855ae52359ULL, 0x68e122157b743d69ULL, - 0xd87e5570cfb919f3ULL, 0x3f2cdecd95798db9ULL, 0x2121154710c0a2ceULL, 0x3c66a115246dc5b2ULL, - 0xcbedc562294ecb72ULL, 0xba7143c36a280b16ULL, 0x9610c2efd4078b67ULL, 0x6144735d946a4b1eULL, - 0x536f111ed75b3350ULL, 0x0211db8c2041d81bULL, 0xf93cb1000e10413cULL, 0x149dfd3c039e8876ULL, - 0xd479dde46b63155bULL, 0xb66e15e93c837976ULL, 0xdafde43b1f13e038ULL, 0x5fafda1a2e4b0b35ULL, - 0x3600bbdf17197581ULL, 0x3972050bbe3cd2c2ULL, 0x5938906dbdd5be86ULL, 0x34fce5e43f9b860fULL, - 0x75a8a4cd42d14d02ULL, 0x828dabc53441df65ULL, 0x33dcabedd2e131d3ULL, 0x3ebad76fb814d25fULL, - 0xd4906f566f70e10fULL, 0x5d12f7aa51690f5aULL, 0x45adb16e76cefcf2ULL, 0x01f768aead232999ULL, - 0x2b6cc77b6248febdULL, 0x3cd30628ec3aaffdULL, 0xce1c0b80d4ef486aULL, 0x4c3bff2ea6f66c23ULL, - 0x3f2ec4094aeaeb5fULL, 0x61b19b286e372ca7ULL, 0x5eefa966de2a701dULL, 0x23b20565de55e3efULL, - 0xe301ca5279d58557ULL, 0x07b2d4ce27c2874fULL, 0xa532cd8a9dcf1d67ULL, 0x2a52fee23f2bff56ULL, - 0x8624efb37cd8663dULL, 0xbbc7ac20ffbd7594ULL, 0x57b85e9c82d37445ULL, 0x7b3052cb86a6ec66ULL, - 0x3482f0ad2525e91eULL, 0x2cb68043d28edca0ULL, 0xaf4f6d052e1b003aULL, 0x185f8c2529781b0aULL, - 0xaa41de5bd80ce0d6ULL, 0x9407b2416853e9d6ULL, 0x563ec36e357f4c3aULL, 0x4cc4b8dd0e297bceULL, - 0xa2fc1a52ffb8730eULL, 0x1811f16e67058e37ULL, 0x10f9a366cddf4ee1ULL, 0x72f4a0c4a0b9f099ULL, - 0x8c16c06f663f4ea7ULL, 0x693b3af74e970fbaULL, 0x2102e7f1d69ec345ULL, 0x0ba53cbc968a8089ULL, - 0xca3d9dc7fea15537ULL, 0x4c6824bb51536493ULL, 0xb9886314844006b1ULL, 0x40d2a72ab454cc60ULL, - 0x5936a1b712570975ULL, 0x91b9d648debda657ULL, 0x3344094bb64330eaULL, 0x006ba10d12ee51d0ULL, - 0x19228468f5de5d58ULL, 0x0eb12f4c38cc05b0ULL, 0xa1039f9dd5601990ULL, 0x4502d4ce4fff0e0bULL, - 0xeb2054106837c189ULL, 0xd0f6544c6dd3b93cULL, 0x40727064c416d74fULL, 0x6e15c6114b502ef0ULL, - 0x4df2a398cfb1a76bULL, 0x11256c7419f2f6b1ULL, 0x4a497962066e6043ULL, 0x705b3aab41355b44ULL, - 0x365ef536d797b1d8ULL, 0x00076bd622ddf0dbULL, 0x3bbf33b0e0575a88ULL, 0x3777aa05c8e4ca4dULL, - 0x392745c85578db5fULL, 0x6fda4149dbae5ae2ULL, 0xb1f0b00b8adc9867ULL, 0x09963437d36f1da3ULL, - 0x7e824e90a5dc3853ULL, 0xccb5f6641f135cbdULL, 0x6736d86c87ce8fccULL, 0x625f3ce26604249fULL, - 0xaf8ac8059502f63fULL, 0x0c05e70a2e351469ULL, 0x35292e9c764b6305ULL, 0x1a394360c7e23ac3ULL, - 0xd5c6d53251183264ULL, 0x62065abd43c2b74fULL, 0xb5fbf5d03b973f9bULL, 0x13a3da3661206e5eULL, - 0xc6bd5837725d94e5ULL, 0x18e30912205016c5ULL, 0x2088ce1570033c68ULL, 0x7fba1f495c837987ULL, - 0x5a8c7423f2f9079dULL, 0x1735157b34023fc5ULL, 0xe4f9b49ad2fab351ULL, 0x6691ff72c878e33cULL, - 0x122c2adedc5eff3eULL, 0xf8dd4bf1d8956cf4ULL, 0xeb86205d9e9e5bdaULL, 0x049b92b9d975c743ULL, - 0xa5379730b0f6c05aULL, 0x72a0ffacc6f3a553ULL, 0xb0032c34b20dcd6dULL, 0x470e9dbc88d5164aULL, - 0xb19cf10ca237c047ULL, 0xb65466711f6c81a2ULL, 0xb3321bd16dd80b43ULL, 0x48c14f600c5fbe8eULL, - 0x66451c264aa6c803ULL, 0xb66e3904a4fa7da6ULL, 0xd45f19b0b3128395ULL, 0x31602627c3c9bc10ULL, - 0x3120dc4832e4e10dULL, 0xeb20c46756c717f7ULL, 0x00f52e3f67280294ULL, 0x566d4fc14730c509ULL, - 0x7e3a5d40fd837206ULL, 0xc1e926dc7159547aULL, 0x216730fba68d6095ULL, 0x22e8c3843f69cea7ULL, - 0x33d074e8930e4b2bULL, 0xb6e4350e84d15816ULL, 0x5534c26ad6ba2365ULL, 0x7773c12f89f1f3f3ULL, - 0x8cba404da57962aaULL, 0x5b9897a81999ce56ULL, 0x508e862f121692fcULL, 0x3a81907fa093c291ULL, - 0x0dded0ff4725a510ULL, 0x10d8cc10673fc503ULL, 0x5b9d151c9f1f4e89ULL, 0x32a5c1d5cb09a44cULL, - 0x1e0aa442b90541fbULL, 0x5f85eb7cc1b485dbULL, 0xbee595ce8a9df2e5ULL, 0x25e496c722422236ULL, - 0x5edf3c46cd0fe5b9ULL, 0x34e75a7ed2a43388ULL, 0xe488de11d761e352ULL, 0x0e878a01a085545cULL, - 0xba493c77e021bb04ULL, 0x2b4d1843c7df899aULL, 0x9ea37a487ae80d67ULL, 0x67a9958011e41794ULL, - 0x4b58051a6697b065ULL, 0x47e33f7d8d6ba6d4ULL, 0xbb4da8d483ca46c1ULL, 0x68becaa181c2db0dULL, - 0x8d8980e90b989aa5ULL, 0xf95eb14a2c93c99bULL, 0x51c6c7c4796e73a2ULL, 0x6e228363b5efb569ULL, - 0xc6bbc0b02dd624c8ULL, 0x777eb47dec8170eeULL, 0x3cde15a004cfafa9ULL, 0x1dc6bc087160bf9bULL, - 0x2e07e043eec34002ULL, 0x18e9fc677a68dc7fULL, 0xd8da03188bd15b9aULL, 0x48fbc3bb00568253ULL, - 0x57547d4cfb654ce1ULL, 0xd3565b82a058e2adULL, 0xf63eaf0bbf154478ULL, 0x47531ef114dfbb18ULL, - 0xe1ec630a4278c587ULL, 0x5507d546ca8e83f3ULL, 0x85e135c63adc0c2bULL, 0x0aa7efa85682844eULL, - 0x72691ba8b3e1f615ULL, 0x32b4e9701fbe3ffaULL, 0x97b6d92e39bb7868ULL, 0x2cfe53dea02e39e8ULL, - 0x687392cd85cd52b0ULL, 0x27ff66c910e29831ULL, 0x97134556a9832d06ULL, 0x269bb0360a84f8a0ULL, - 0x706e55457643f85cULL, 0x3734a48c9b597d1bULL, 0x7aee91e8c6efa472ULL, 0x5cd6abc198a9d9e0ULL, - 0x0e04de06cb3ce41aULL, 0xd8c6eb893402e138ULL, 0x904659bb686e3772ULL, 0x7215c371746ba8c8ULL, - 0xfd12a97eeae4a2d9ULL, 0x9514b7516394f2c5ULL, 0x266fd5809208f294ULL, 0x5c847085619a26b9ULL, - 0x52985410fed694eaULL, 0x3c905b934a2ed254ULL, 0x10bb47692d3be467ULL, 0x063b3d2d69e5e9e1ULL, - 0x472726eedda57debULL, 0xefb6c4ae10f41891ULL, 0x2b1641917b307614ULL, 0x117c554fc4f45b7cULL, - 0xc07cf3118f9d8812ULL, 0x01dbd82050017939ULL, 0xd7e803f4171b2827ULL, 0x1015e87487d225eaULL, - 0xc58de3fed23acc4dULL, 0x50db91c294a7be2dULL, 0x0b94d43d1c9cf457ULL, 0x6b1640fa6e37524aULL, - 0x692f346c5fda0d09ULL, 0x200b1c59fa4d3151ULL, 0xb8c46f760777a296ULL, 0x4b38395f3ffdfbcfULL, - 0x18d25e00be54d671ULL, 0x60d50582bec8aba6ULL, 0x87ad8f263b78b982ULL, 0x50fdf64e9cda0432ULL, - 0x90f567aac578dcf0ULL, 0xef1e9b0ef2a3133bULL, 0x0eebba9242d9de71ULL, 0x15473c9bf03101c7ULL, - 0x7c77e8ae56b78095ULL, 0xb678e7666e6f078eULL, 0x2da0b9615348ba1fULL, 0x7cf931c1ff733f0bULL, - 0x26b357f50a0a366cULL, 0xe9708cf42b87d732ULL, 0xc13aeea5f91cb2c0ULL, 0x35d90c991143bb4cULL, - 0x47c1c404a9a0d9dcULL, 0x659e58451972d251ULL, 0x3875a8c473b38c31ULL, 0x1fbd9ed379561f24ULL, - 0x11fabc6fd41ec28dULL, 0x7ef8dfe3cd2a2dcaULL, 0x72e73b5d8c404595ULL, 0x6135fa4954b72f27ULL, - 0xccfc32a2de24b69cULL, 0x3f55698c1f095d88ULL, 0xbe3350ed5ac3f929ULL, 0x5e9bf806ca477eebULL, - 0xe9ce8fb63c309f68ULL, 0x5376f63565e1f9f4ULL, 0xd1afcfb35a6393f1ULL, 0x6632a1ede5623506ULL, - 0x0b7d6c390c2ded4cULL, 0x56cb3281df04cb1fULL, 0x66305a1249ecc3c7ULL, 0x5d588b60a38ca72aULL, - 0xa6ecbf78e8e5f42dULL, 0x86eeb44b3c8a3eecULL, 0xec219c48fbd21604ULL, 0x1aaf1af517c36731ULL, - 0xc306a2836769bde7ULL, 0x208280622b1e2adbULL, 0x8027f51ffbff94a6ULL, 0x76cfa1ce1124f26bULL, - 0x18eb00562422abb6ULL, 0xf377c4d58f8c29c3ULL, 0x4dbbc207f531561aULL, 0x0253b7f082128a27ULL, - 0x3d1f091cb62c17e0ULL, 0x4860e1abd64628a9ULL, 0x52d17436309d4253ULL, 0x356f97e13efae576ULL, - 0xd351e11aa150535bULL, 0x3e6b45bb1dd878ccULL, 0x0c776128bed92c98ULL, 0x1d34ae93032885b8ULL, - 0x4ba0488ca85ba4c3ULL, 0x985348c33c9ce6ceULL, 0x66124c6f97bda770ULL, 0x0f81a0290654124aULL, - 0x9ed09ca6569b86fdULL, 0x811009fd18af9a2dULL, 0xff08d03f93d8c20aULL, 0x52a148199faef26bULL, - 0x3e03f9dc2d8d1b73ULL, 0x4205801873961a70ULL, 0xc0d987f041a35970ULL, 0x07aa1f15a1c0d549ULL, - 0xdfd46ce08cd27224ULL, 0x6d0a024f934e4239ULL, 0x808a7a6399897b59ULL, 0x0a4556e9e13d95a2ULL, - 0xd21a991fe9c13045ULL, 0x9b0e8548fe7751b8ULL, 0x5da643cb4bf30035ULL, 0x77db28d63940f721ULL, - 0xfc5eeb614adc9011ULL, 0x5229419ae8c411ebULL, 0x9ec3e7787d1dcf74ULL, 0x340d053e216e4cb5ULL, - 0xcac7af39b48df2b4ULL, 0xc0faec2871a10a94ULL, 0x140a69245ca575edULL, 0x0cf1c37134273a4cULL, - 0xc8ee306ac224b8a5ULL, 0x57eaee7ccb4930b0ULL, 0xa1e806bdaacbe74fULL, 0x7d9a62742eeb657dULL, - 0x9eb6b6ef546c4830ULL, 0x885cca1fddb36e2eULL, 0xe6b9f383ef0d7105ULL, 0x58654fef9d2e0412ULL, - 0xa905c4ffbe0e8e26ULL, 0x942de5df9b31816eULL, 0x497d723f802e88e1ULL, 0x30684dea602f408dULL, - 0x21e5a278a3e6cb34ULL, 0xaefb6e6f5b151dc4ULL, 0xb30b8e049d77ca15ULL, 0x28c3c9cf53b98981ULL, - 0x287fb721556cdd2aULL, 0x0d317ca897022274ULL, 0x7468c7423a543258ULL, 0x4a7f11464eb5642fULL, - 0xa237a4774d193aa6ULL, 0xd865986ea92129a1ULL, 0x24c515ecf87c1a88ULL, 0x604003575f39f5ebULL, - 0x47b9f189570a9b27ULL, 0x2b98cede465e4b78ULL, 0x026df551dbb85c20ULL, 0x74fcd91047e21901ULL, - 0x13e2a90a23c1bfa3ULL, 0x0cb0074e478519f6ULL, 0x5ff1cbbe3af6cf44ULL, 0x67fe5438be812dbeULL, - 0xd13cf64fa40f05b0ULL, 0x054dfb2f32283787ULL, 0x4173915b7f0d2aeaULL, 0x482f144f1f610d4eULL, - 0xf6210201b47f8234ULL, 0x5d0ae1929e70b990ULL, 0xdcd7f455b049567cULL, 0x7e93d0f1f0916f01ULL, - 0xdd79cbf18a7db4faULL, 0xbe8391bf6f74c62fULL, 0x027145d14b8291bdULL, 0x585a73ea2cbf1705ULL, - 0x485ca03e928a0db2ULL, 0x10fc01a5742857e7ULL, 0x2f482edbd6d551a7ULL, 0x0f0433b5048fdb8aULL, - 0x60da2e8dd7dc6247ULL, 0x88b4c9d38cd4819aULL, 0x13033ac001f66697ULL, 0x273b24fe3b367d75ULL, - 0xc6e8f66a31b3b9d4ULL, 0x281514a494df49d5ULL, 0xd1726fdfc8b23da7ULL, 0x4b3ae7d103dee548ULL, - 0xc6256e19ce4b9d7eULL, 0xff5c5cf186e3c61cULL, 0xacc63ca34b8ec145ULL, 0x74621888fee66574ULL, - 0x956f409645290a1eULL, 0xef0bf8e3263a962eULL, 0xed6a50eb5ec2647bULL, 0x0694283a9dca7502ULL, - 0x769b963643a2dcd1ULL, 0x42b7c8ea09fc5353ULL, 0x4f002aee13397eabULL, 0x63005e2c19b7d63aULL, - 0xca6736da63023beaULL, 0x966c7f6db12a99b7ULL, 0xace09390c537c5e1ULL, 0x0b696063a1aa89eeULL, - 0xebb03e97288c56e5ULL, 0x432a9f9f938c8be8ULL, 0xa6a5a93d5b717f71ULL, 0x1a5fb4c3e18f9d97ULL, - 0x1c94e7ad1c60cdceULL, 0xee202a43fc02c4a0ULL, 0x8dafe4d867c46a20ULL, 0x0a10263c8ac27b58ULL, - 0xd0dea9dfe4432a4aULL, 0x856af87bbe9277c5ULL, 0xce8472acc212c71aULL, 0x6f151b6d9bbb1e91ULL, - 0x26776c527ceed56aULL, 0x7d211cb7fbf8faecULL, 0x37ae66a6fd4609ccULL, 0x1f81b702d2770c42ULL, - 0x2fb0b057eac58392ULL, 0xe1dd89fe29744e9dULL, 0xc964f8eb17beb4f8ULL, 0x29571073c9a2d41eULL, - 0xa948a18981c0e254ULL, 0x2df6369b65b22830ULL, 0xa33eb2d75fcfd3c6ULL, 0x078cd6ec4199a01fULL, - 0x4a584a41ad900d2fULL, 0x32142b78e2c74c52ULL, 0x68c4e8338431c978ULL, 0x7f69ea9008689fc2ULL, - 0x52f2c81e46a38265ULL, 0xfd78072d04a832fdULL, 0x8cd7d5fa25359e94ULL, 0x4de71b7454cc29d2ULL, - 0x42eb60ad1eda6ac9ULL, 0x0aad37dfdbc09c3aULL, 0x81004b71e33cc191ULL, 0x44e6be345122803cULL, - 0x03fe8388ba1920dbULL, 0xf5d57c32150db008ULL, 0x49c8c4281af60c29ULL, 0x21edb518de701aeeULL, - 0x7fb63e418f06dc99ULL, 0xa4460d99c166d7b8ULL, 0x24dd5248ce520a83ULL, 0x5ec3ad712b928358ULL, - 0x15022a5fbd17930fULL, 0xa4f64a77d82570e3ULL, 0x12bc8d6915783712ULL, 0x498194c0fc620abbULL, - 0x38a2d9d255686c82ULL, 0x785c6bd9193e21f0ULL, 0xe4d5c81ab24a5484ULL, 0x56307860b2e20989ULL, - 0x429d55f78b4d74c4ULL, 0x22f1834643350131ULL, 0x1e60c24598c71fffULL, 0x59f2f014979983efULL, - 0x46a47d56eb494a44ULL, 0x3e22a854d636a18eULL, 0xb346e15274491c3bULL, 0x2ceafd4e5390cde7ULL, - 0xba8a8538be0d6675ULL, 0x4b9074bb50818e23ULL, 0xcbdab89085d304c3ULL, 0x61a24fe0e56192c4ULL, - 0xcb7615e6db525bcbULL, 0xdd7d8c35a567e4caULL, 0xe6b4153acafcdd69ULL, 0x2d668e097f3c9766ULL, - 0xa57e7e265ce55ef0ULL, 0x5d9f4e527cd4b967ULL, 0xfbc83606492fd1e5ULL, 0x090d52beb7c3f7aeULL, - 0x09b9515a1e7b4d7cULL, 0x1f266a2599da44c0ULL, 0xa1c49548e2c55504ULL, 0x7ef04287126f15ccULL, - 0xfed1659dbd30ef15ULL, 0x8b4ab9eec4e0277bULL, 0x884d6236a5df3291ULL, 0x1fd96ea6bf5cf788ULL, - 0x42a161981f190d9aULL, 0x61d849507e6052c1ULL, 0x9fe113bf285a2cd5ULL, 0x7c22d676dbad85d8ULL, - 0x82e770ed2bfbd27dULL, 0x4c05b2ece996f5a5ULL, 0xcd40a9c2b0900150ULL, 0x5895319213d9bf64ULL, - 0xe7cc5d703fea2e08ULL, 0xb50c491258e2188cULL, 0xcce30baa48205bf0ULL, 0x537c659ccfa32d62ULL, - 0x37b6623a98cfc088ULL, 0xfe9bed1fa4d6aca4ULL, 0x04d29b8e56a8d1b0ULL, 0x725f71c40b519575ULL, - 0x28c7f89cd0339ce6ULL, 0x8367b14469ddc18bULL, 0x883ada83a6a1652cULL, 0x585f1974034d6c17ULL, - 0x89cfb266f1b19188ULL, 0xe63b4863e7c35217ULL, 0xd88c9da6b4c0526aULL, 0x3e035c9df0954635ULL, - 0xdd9d5412fb45de9dULL, 0xdd684532e4cff40dULL, 0x4b5c999b151d671cULL, 0x2d8c2cc811e7f690ULL, - 0x7f54be1d90055d40ULL, 0xa464c5df464aaf40ULL, 0x33979624f0e917beULL, 0x2c018dc527356b30ULL, - 0xa5415024e330b3d4ULL, 0x73ff3d96691652d3ULL, 0x94ec42c4ef9b59f1ULL, 0x0747201618d08e5aULL, - 0x4d6ca48aca411c53ULL, 0x66415f2fcfa66119ULL, 0x9c4dd40051e227ffULL, 0x59810bc09a02f7ebULL, - 0x2a7eb171b3dc101dULL, 0x441c5ab99ffef68eULL, 0x32025c9b93b359eaULL, 0x5e8ce0a71e9d112fULL, - 0xbfcccb92429503fdULL, 0xd271ba752f095d55ULL, 0x345ead5e972d091eULL, 0x18c8df11a83103baULL, - 0x90cd949a9aed0f4cULL, 0xc5d1f4cb6660e37eULL, 0xb8cac52d56c52e0bULL, 0x6e42e400c5808e0dULL, - 0xa3b46966eeaefd23ULL, 0x0c4f1f0be39ecdcaULL, 0x189dc8c9d683a51dULL, 0x51f27f054c09351bULL, - 0x4c487ccd2a320682ULL, 0x587ea95bb3df1c96ULL, 0xc8ccf79e555cb8e8ULL, 0x547dc829a206d73dULL, - 0xb822a6cd80c39b06ULL, 0xe96d54732000d4c6ULL, 0x28535b6f91463b4dULL, 0x228f4660e2486e1dULL, - 0x98799538de8d3abfULL, 0x8cd8330045ebca6eULL, 0x79952a008221e738ULL, 0x4322e1a7535cd2bbULL, - 0xb114c11819d1801cULL, 0x2016e4d84f3f5ec7ULL, 0xdd0e2df409260f4cULL, 0x5ec362c0ae5f7266ULL, - 0xc0462b18b8b2b4eeULL, 0x7cc8d950274d1afbULL, 0xf25f7105436b02d2ULL, 0x43bbf8dcbff9ccd3ULL, - 0xb6ad1767a039e9dfULL, 0xb0714da8f69d3583ULL, 0x5e55fa18b42931f5ULL, 0x4ed5558f33c60961ULL, - 0x1fe37901c647a5ddULL, 0x593ddf1f8081d357ULL, 0x0249a4fd813fd7a6ULL, 0x69acca274e9caf61ULL, - 0x047ba3ea330721c9ULL, 0x83423fc20e7e1ea0ULL, 0x1df4c0af01314a60ULL, 0x09a62dab89289527ULL, - 0xa5b325a49cc6cb00ULL, 0xe94b5dc654b56cb6ULL, 0x3be28779adc994a0ULL, 0x4296e8f8ba3a4aadULL, - 0x328689761e451eabULL, 0x2e4d598bff59594aULL, 0x49b96853d7a7084aULL, 0x4980a319601420a8ULL, - 0x9565b9e12f552c42ULL, 0x8a5318db7100fe96ULL, 0x05c90b4d43add0d7ULL, 0x538b4cd66a5d4edaULL, - 0xf4e94fc3e89f039fULL, 0x592c9af26f618045ULL, 0x08a36eb5fd4b9550ULL, 0x25fffaf6c2ed1419ULL, - 0x34434459cc79d354ULL, 0xeeecbfb4b1d5476bULL, 0xddeb34a061615d99ULL, 0x5129cecceb64b773ULL, - 0xee43215894993520ULL, 0x772f9c7cf14c0b3bULL, 0xd2e2fce306bedad5ULL, 0x715f42b546f06a97ULL, - 0x434ecdceda5b5f1aULL, 0x0da17115a49741a9ULL, 0x680bd77c73edad2eULL, 0x487c02354edd9041ULL, - 0xb8efeff3a70ed9c4ULL, 0x56a32aa3e857e302ULL, 0xdf3a68bd48a2a5a0ULL, 0x07f650b73176c444ULL, - 0xe38b9b1626e0ccb1ULL, 0x79e053c18b09fb36ULL, 0x56d90319c9f94964ULL, 0x1ca941e7ac9ff5c4ULL, - 0x49c4df29162fa0bbULL, 0x8488cf3282b33305ULL, 0x95dfda14cabb437dULL, 0x3391f78264d5ad86ULL, - 0x729ae06ae2b5095dULL, 0xd58a58d73259a946ULL, 0xe9834262d13921edULL, 0x27fedafaa54bb592ULL, - 0xa99dc5b829ad48bbULL, 0x5f025742499ee260ULL, 0x802c8ecd5d7513fdULL, 0x78ceb3ef3f6dd938ULL, - 0xc342f44f8a135d94ULL, 0x7b9edb44828cdda3ULL, 0x9436d11a0537cfe7ULL, 0x5064b164ec1ab4c8ULL, - 0x7020eccfd37eb2fcULL, 0x1f31ea3ed90d25fcULL, 0x1b930d7bdfa1bb34ULL, 0x5344467a48113044ULL, - 0x70073170f25e6dfbULL, 0xe385dc1a50114cc8ULL, 0x2348698ac8fc4f00ULL, 0x2a77a55284dd40d8ULL, - 0xfe06afe0c98c6ce4ULL, 0xc235df96dddfd6e4ULL, 0x1428d01e33bf1ed3ULL, 0x785768ec9300bdafULL, - 0x9702e57a91deb63bULL, 0x61bdb8bfe5ce8b80ULL, 0x645b426f3d1d58acULL, 0x4804a82227a557bcULL, - 0x8e57048ab44d2601ULL, 0x68d6501a4b3a6935ULL, 0xc39c9ec3f9e1c293ULL, 0x4172f257d4de63e2ULL, - 0xd368b450330c6401ULL, 0x040d3017418f2391ULL, 0x2c34bb6090b7d90dULL, 0x16f649228fdfd51fULL, - 0xbea6818e2b928ef5ULL, 0xe28ccf91cdc11e72ULL, 0x594aaa68e77a36cdULL, 0x313034806c7ffd0fULL, - 0x8a9d27ac2249bd65ULL, 0x19a3b464018e9512ULL, 0xc26ccff352b37ec7ULL, 0x056f68341d797b21ULL, - 0x5e79d6757efd2327ULL, 0xfabdbcb6553afe15ULL, 0xd3e7222c6eaf5a60ULL, 0x7046c76d4dae743bULL, - 0x660be872b18d4a55ULL, 0x19992518574e1496ULL, 0xc103053a302bdcbbULL, 0x3ed8e9800b218e8eULL, - 0x7b0b9239fa75e03eULL, 0xefe9fb684633c083ULL, 0x98a35fbe391a7793ULL, 0x6065510fe2d0fe34ULL, - 0x55cb668548abad0cULL, 0xb4584548da87e527ULL, 0x2c43ecea0107c1ddULL, 0x526028809372de35ULL, - 0x3415c56af9213b1fULL, 0x5bee1a4d017e98dbULL, 0x13f6b105b5cf709bULL, 0x5ff20e3482b29ab6ULL, - 0x0aa29c75cc2e6c90ULL, 0xfc7d73ca3a70e206ULL, 0x899fc38fc4b5c515ULL, 0x250386b124ffc207ULL, - 0x54ea28d5ae3d2b56ULL, 0x9913149dd6de60ceULL, 0x16694fc58f06d6c1ULL, 0x46b23975eb018fc7ULL, - 0x470a6a0fb4b7b4e2ULL, 0x5d92475a8f7253deULL, 0xabeee5b52fbd3adbULL, 0x7fa20801a0806968ULL, - 0x76f3faf19f7714d2ULL, 0xb3e840c12f4660c3ULL, 0x0fb4cd8df212744eULL, 0x4b065a251d3a2dd2ULL, - 0x5cebde383d77cd4aULL, 0x6adf39df882c9cb1ULL, 0xa2dd242eb09af759ULL, 0x3147c0e50e5f6422ULL, - 0x164ca5101d1350dbULL, 0xf8d13479c33fc962ULL, 0xe640ce4d13e5da08ULL, 0x4bdee0c45061f8baULL, - 0xd7c46dc1a4edb1c9ULL, 0x5514d7b6437fd98aULL, 0x58942f6bb2a1c00bULL, 0x2dffb2ab1d70710eULL, - 0xccdfcf2fc18b6d68ULL, 0xa8ebcba8b7806167ULL, 0x980697f95e2937e3ULL, 0x02fbba1cd0126e8cULL -}; - -static void curve25519_ever64_base(u8 *out, const u8 *priv) -{ - u64 swap = 1; - int i, j, k; - u64 tmp[16 + 32 + 4]; - u64 *x1 = &tmp[0]; - u64 *z1 = &tmp[4]; - u64 *x2 = &tmp[8]; - u64 *z2 = &tmp[12]; - u64 *xz1 = &tmp[0]; - u64 *xz2 = &tmp[8]; - u64 *a = &tmp[0 + 16]; - u64 *b = &tmp[4 + 16]; - u64 *c = &tmp[8 + 16]; - u64 *ab = &tmp[0 + 16]; - u64 *abcd = &tmp[0 + 16]; - u64 *ef = &tmp[16 + 16]; - u64 *efgh = &tmp[16 + 16]; - u64 *key = &tmp[0 + 16 + 32]; - - memcpy(key, priv, 32); - ((u8 *)key)[0] &= 248; - ((u8 *)key)[31] = (((u8 *)key)[31] & 127) | 64; - - x1[0] = 1, x1[1] = x1[2] = x1[3] = 0; - z1[0] = 1, z1[1] = z1[2] = z1[3] = 0; - z2[0] = 1, z2[1] = z2[2] = z2[3] = 0; - memcpy(x2, p_minus_s, sizeof(p_minus_s)); - - j = 3; - for (i = 0; i < 4; ++i) { - while (j < (const int[]){ 64, 64, 64, 63 }[i]) { - u64 bit = (key[i] >> j) & 1; - k = (64 * i + j - 3); - swap = swap ^ bit; - cswap2(swap, xz1, xz2); - swap = bit; - fsub(b, x1, z1); - fadd(a, x1, z1); - fmul(c, &table_ladder[4 * k], b, ef); - fsub(b, a, c); - fadd(a, a, c); - fsqr2(ab, ab, efgh); - fmul2(xz1, xz2, ab, efgh); - ++j; - } - j = 0; - } - - point_double(xz1, abcd, efgh); - point_double(xz1, abcd, efgh); - point_double(xz1, abcd, efgh); - encode_point(out, xz1); - - memzero_explicit(tmp, sizeof(tmp)); -} - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2_adx); - -void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE], - const u8 secret[CURVE25519_KEY_SIZE], - const u8 basepoint[CURVE25519_KEY_SIZE]) -{ - if (static_branch_likely(&curve25519_use_bmi2_adx)) - curve25519_ever64(mypublic, secret, basepoint); - else - curve25519_generic(mypublic, secret, basepoint); -} -EXPORT_SYMBOL(curve25519_arch); - -void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], - const u8 secret[CURVE25519_KEY_SIZE]) -{ - if (static_branch_likely(&curve25519_use_bmi2_adx)) - curve25519_ever64_base(pub, secret); - else - curve25519_generic(pub, secret, curve25519_base_point); -} -EXPORT_SYMBOL(curve25519_base_arch); - -static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf, - unsigned int len) -{ - u8 *secret = kpp_tfm_ctx(tfm); - - if (!len) - curve25519_generate_secret(secret); - else if (len == CURVE25519_KEY_SIZE && - crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE)) - memcpy(secret, buf, CURVE25519_KEY_SIZE); - else - return -EINVAL; - return 0; -} - -static int curve25519_generate_public_key(struct kpp_request *req) -{ - struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); - const u8 *secret = kpp_tfm_ctx(tfm); - u8 buf[CURVE25519_KEY_SIZE]; - int copied, nbytes; - - if (req->src) - return -EINVAL; - - curve25519_base_arch(buf, secret); - - /* might want less than we've got */ - nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len); - copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst, - nbytes), - buf, nbytes); - if (copied != nbytes) - return -EINVAL; - return 0; -} - -static int curve25519_compute_shared_secret(struct kpp_request *req) -{ - struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); - const u8 *secret = kpp_tfm_ctx(tfm); - u8 public_key[CURVE25519_KEY_SIZE]; - u8 buf[CURVE25519_KEY_SIZE]; - int copied, nbytes; - - if (!req->src) - return -EINVAL; - - copied = sg_copy_to_buffer(req->src, - sg_nents_for_len(req->src, - CURVE25519_KEY_SIZE), - public_key, CURVE25519_KEY_SIZE); - if (copied != CURVE25519_KEY_SIZE) - return -EINVAL; - - curve25519_arch(buf, secret, public_key); - - /* might want less than we've got */ - nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len); - copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst, - nbytes), - buf, nbytes); - if (copied != nbytes) - return -EINVAL; - return 0; -} - -static unsigned int curve25519_max_size(struct crypto_kpp *tfm) -{ - return CURVE25519_KEY_SIZE; -} - -static struct kpp_alg curve25519_alg = { - .base.cra_name = "curve25519", - .base.cra_driver_name = "curve25519-x86", - .base.cra_priority = 200, - .base.cra_module = THIS_MODULE, - .base.cra_ctxsize = CURVE25519_KEY_SIZE, - - .set_secret = curve25519_set_secret, - .generate_public_key = curve25519_generate_public_key, - .compute_shared_secret = curve25519_compute_shared_secret, - .max_size = curve25519_max_size, -}; - - -static int __init curve25519_mod_init(void) -{ - if (boot_cpu_has(X86_FEATURE_BMI2) && boot_cpu_has(X86_FEATURE_ADX)) - static_branch_enable(&curve25519_use_bmi2_adx); - else - return 0; - return IS_REACHABLE(CONFIG_CRYPTO_KPP) ? - crypto_register_kpp(&curve25519_alg) : 0; -} - -static void __exit curve25519_mod_exit(void) -{ - if (IS_REACHABLE(CONFIG_CRYPTO_KPP) && - static_branch_likely(&curve25519_use_bmi2_adx)) - crypto_unregister_kpp(&curve25519_alg); -} - -module_init(curve25519_mod_init); -module_exit(curve25519_mod_exit); - -MODULE_ALIAS_CRYPTO("curve25519"); -MODULE_ALIAS_CRYPTO("curve25519-x86"); -MODULE_DESCRIPTION("Curve25519 algorithm, ADX optimized"); -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); |
