aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/configs/exynos_defconfig1
-rw-r--r--arch/arm/configs/milbeaut_m10v_defconfig1
-rw-r--r--arch/arm/configs/multi_v7_defconfig1
-rw-r--r--arch/arm/configs/omap2plus_defconfig1
-rw-r--r--arch/arm/crypto/Kconfig13
-rw-r--r--arch/arm/crypto/Makefile2
-rw-r--r--arch/arm/crypto/curve25519-core.S2062
-rw-r--r--arch/arm/crypto/curve25519-glue.c137
-rw-r--r--arch/m68k/configs/amiga_defconfig1
-rw-r--r--arch/m68k/configs/apollo_defconfig1
-rw-r--r--arch/m68k/configs/atari_defconfig1
-rw-r--r--arch/m68k/configs/bvme6000_defconfig1
-rw-r--r--arch/m68k/configs/hp300_defconfig1
-rw-r--r--arch/m68k/configs/mac_defconfig1
-rw-r--r--arch/m68k/configs/multi_defconfig1
-rw-r--r--arch/m68k/configs/mvme147_defconfig1
-rw-r--r--arch/m68k/configs/mvme16x_defconfig1
-rw-r--r--arch/m68k/configs/q40_defconfig1
-rw-r--r--arch/m68k/configs/sun3_defconfig1
-rw-r--r--arch/m68k/configs/sun3x_defconfig1
-rw-r--r--arch/mips/cavium-octeon/Makefile2
-rw-r--r--arch/mips/cavium-octeon/crypto/Makefile8
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-md5.c214
-rw-r--r--arch/mips/cavium-octeon/octeon-crypto.c (renamed from arch/mips/cavium-octeon/crypto/octeon-crypto.c)0
-rw-r--r--arch/mips/configs/cavium_octeon_defconfig1
-rw-r--r--arch/mips/crypto/Kconfig10
-rw-r--r--arch/powerpc/configs/powernv_defconfig1
-rw-r--r--arch/powerpc/configs/ppc64_defconfig1
-rw-r--r--arch/powerpc/crypto/Kconfig21
-rw-r--r--arch/powerpc/crypto/Makefile4
-rw-r--r--arch/powerpc/crypto/curve25519-ppc64le-core.c300
-rw-r--r--arch/powerpc/crypto/curve25519-ppc64le_asm.S671
-rw-r--r--arch/powerpc/crypto/md5-asm.S235
-rw-r--r--arch/powerpc/crypto/md5-glue.c99
-rw-r--r--arch/s390/configs/debug_defconfig1
-rw-r--r--arch/s390/configs/defconfig1
-rw-r--r--arch/sparc/crypto/Kconfig10
-rw-r--r--arch/sparc/crypto/Makefile4
-rw-r--r--arch/sparc/crypto/md5_asm.S70
-rw-r--r--arch/sparc/crypto/md5_glue.c174
-rw-r--r--arch/x86/crypto/Kconfig13
-rw-r--r--arch/x86/crypto/Makefile5
-rw-r--r--arch/x86/crypto/curve25519-x86_64.c1726
43 files changed, 1 insertions, 5800 deletions
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index 6915c766923a2f..84070e9698e8cc 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig
@@ -364,7 +364,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_CRYPTO_AES_ARM_BS=m
-CONFIG_CRYPTO_CHACHA20_NEON=m
CONFIG_CRYPTO_DEV_EXYNOS_RNG=y
CONFIG_CRYPTO_DEV_S5P=y
CONFIG_DMA_CMA=y
diff --git a/arch/arm/configs/milbeaut_m10v_defconfig b/arch/arm/configs/milbeaut_m10v_defconfig
index a3be0b2ede09c7..a2995eb390c603 100644
--- a/arch/arm/configs/milbeaut_m10v_defconfig
+++ b/arch/arm/configs/milbeaut_m10v_defconfig
@@ -101,7 +101,6 @@ CONFIG_CRYPTO_GHASH_ARM_CE=m
CONFIG_CRYPTO_AES_ARM=m
CONFIG_CRYPTO_AES_ARM_BS=m
CONFIG_CRYPTO_AES_ARM_CE=m
-CONFIG_CRYPTO_CHACHA20_NEON=m
# CONFIG_CRYPTO_HW is not set
CONFIG_DMA_CMA=y
CONFIG_CMA_SIZE_MBYTES=64
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index f2822eeefb9577..cc0e0e4a879cb1 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -1291,7 +1291,6 @@ CONFIG_CRYPTO_GHASH_ARM_CE=m
CONFIG_CRYPTO_AES_ARM=m
CONFIG_CRYPTO_AES_ARM_BS=m
CONFIG_CRYPTO_AES_ARM_CE=m
-CONFIG_CRYPTO_CHACHA20_NEON=m
CONFIG_CRYPTO_DEV_SUN4I_SS=m
CONFIG_CRYPTO_DEV_FSL_CAAM=m
CONFIG_CRYPTO_DEV_EXYNOS_RNG=m
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 939913ed9a73bd..1d5f752417398c 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -708,7 +708,6 @@ CONFIG_CRYPTO_MICHAEL_MIC=y
CONFIG_CRYPTO_GHASH_ARM_CE=m
CONFIG_CRYPTO_AES_ARM=m
CONFIG_CRYPTO_AES_ARM_BS=m
-CONFIG_CRYPTO_CHACHA20_NEON=m
CONFIG_CRYPTO_DEV_OMAP=m
CONFIG_CRYPTO_DEV_OMAP_SHAM=m
CONFIG_CRYPTO_DEV_OMAP_AES=m
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 1e5f3cdf691c4f..c436eec22d86ca 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -2,19 +2,6 @@
menu "Accelerated Cryptographic Algorithms for CPU (arm)"
-config CRYPTO_CURVE25519_NEON
- tristate
- depends on KERNEL_MODE_NEON
- select CRYPTO_KPP
- select CRYPTO_LIB_CURVE25519_GENERIC
- select CRYPTO_ARCH_HAVE_LIB_CURVE25519
- default CRYPTO_LIB_CURVE25519_INTERNAL
- help
- Curve25519 algorithm
-
- Architecture: arm with
- - NEON (Advanced SIMD) extensions
-
config CRYPTO_GHASH_ARM_CE
tristate "Hash functions: GHASH (PMULL/NEON/ARMv8 Crypto Extensions)"
depends on KERNEL_MODE_NEON
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 4f23999ae17dfe..6346a73effc06a 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -7,7 +7,6 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o
obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
obj-$(CONFIG_CRYPTO_BLAKE2B_NEON) += blake2b-neon.o
obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
-obj-$(CONFIG_CRYPTO_CURVE25519_NEON) += curve25519-neon.o
obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
@@ -18,4 +17,3 @@ blake2b-neon-y := blake2b-neon-core.o blake2b-neon-glue.o
aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o
ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
-curve25519-neon-y := curve25519-core.o curve25519-glue.o
diff --git a/arch/arm/crypto/curve25519-core.S b/arch/arm/crypto/curve25519-core.S
deleted file mode 100644
index b697fa5d059a23..00000000000000
--- a/arch/arm/crypto/curve25519-core.S
+++ /dev/null
@@ -1,2062 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR MIT */
-/*
- * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
- *
- * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This
- * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been
- * manually reworked for use in kernel space.
- */
-
-#include <linux/linkage.h>
-
-.text
-.arch armv7-a
-.fpu neon
-.align 4
-
-ENTRY(curve25519_neon)
- push {r4-r11, lr}
- mov ip, sp
- sub r3, sp, #704
- and r3, r3, #0xfffffff0
- mov sp, r3
- movw r4, #0
- movw r5, #254
- vmov.i32 q0, #1
- vshr.u64 q1, q0, #7
- vshr.u64 q0, q0, #8
- vmov.i32 d4, #19
- vmov.i32 d5, #38
- add r6, sp, #480
- vst1.8 {d2-d3}, [r6, : 128]!
- vst1.8 {d0-d1}, [r6, : 128]!
- vst1.8 {d4-d5}, [r6, : 128]
- add r6, r3, #0
- vmov.i32 q2, #0
- vst1.8 {d4-d5}, [r6, : 128]!
- vst1.8 {d4-d5}, [r6, : 128]!
- vst1.8 d4, [r6, : 64]
- add r6, r3, #0
- movw r7, #960
- sub r7, r7, #2
- neg r7, r7
- sub r7, r7, r7, LSL #7
- str r7, [r6]
- add r6, sp, #672
- vld1.8 {d4-d5}, [r1]!
- vld1.8 {d6-d7}, [r1]
- vst1.8 {d4-d5}, [r6, : 128]!
- vst1.8 {d6-d7}, [r6, : 128]
- sub r1, r6, #16
- ldrb r6, [r1]
- and r6, r6, #248
- strb r6, [r1]
- ldrb r6, [r1, #31]
- and r6, r6, #127
- orr r6, r6, #64
- strb r6, [r1, #31]
- vmov.i64 q2, #0xffffffff
- vshr.u64 q3, q2, #7
- vshr.u64 q2, q2, #6
- vld1.8 {d8}, [r2]
- vld1.8 {d10}, [r2]
- add r2, r2, #6
- vld1.8 {d12}, [r2]
- vld1.8 {d14}, [r2]
- add r2, r2, #6
- vld1.8 {d16}, [r2]
- add r2, r2, #4
- vld1.8 {d18}, [r2]
- vld1.8 {d20}, [r2]
- add r2, r2, #6
- vld1.8 {d22}, [r2]
- add r2, r2, #2
- vld1.8 {d24}, [r2]
- vld1.8 {d26}, [r2]
- vshr.u64 q5, q5, #26
- vshr.u64 q6, q6, #3
- vshr.u64 q7, q7, #29
- vshr.u64 q8, q8, #6
- vshr.u64 q10, q10, #25
- vshr.u64 q11, q11, #3
- vshr.u64 q12, q12, #12
- vshr.u64 q13, q13, #38
- vand q4, q4, q2
- vand q6, q6, q2
- vand q8, q8, q2
- vand q10, q10, q2
- vand q2, q12, q2
- vand q5, q5, q3
- vand q7, q7, q3
- vand q9, q9, q3
- vand q11, q11, q3
- vand q3, q13, q3
- add r2, r3, #48
- vadd.i64 q12, q4, q1
- vadd.i64 q13, q10, q1
- vshr.s64 q12, q12, #26
- vshr.s64 q13, q13, #26
- vadd.i64 q5, q5, q12
- vshl.i64 q12, q12, #26
- vadd.i64 q14, q5, q0
- vadd.i64 q11, q11, q13
- vshl.i64 q13, q13, #26
- vadd.i64 q15, q11, q0
- vsub.i64 q4, q4, q12
- vshr.s64 q12, q14, #25
- vsub.i64 q10, q10, q13
- vshr.s64 q13, q15, #25
- vadd.i64 q6, q6, q12
- vshl.i64 q12, q12, #25
- vadd.i64 q14, q6, q1
- vadd.i64 q2, q2, q13
- vsub.i64 q5, q5, q12
- vshr.s64 q12, q14, #26
- vshl.i64 q13, q13, #25
- vadd.i64 q14, q2, q1
- vadd.i64 q7, q7, q12
- vshl.i64 q12, q12, #26
- vadd.i64 q15, q7, q0
- vsub.i64 q11, q11, q13
- vshr.s64 q13, q14, #26
- vsub.i64 q6, q6, q12
- vshr.s64 q12, q15, #25
- vadd.i64 q3, q3, q13
- vshl.i64 q13, q13, #26
- vadd.i64 q14, q3, q0
- vadd.i64 q8, q8, q12
- vshl.i64 q12, q12, #25
- vadd.i64 q15, q8, q1
- add r2, r2, #8
- vsub.i64 q2, q2, q13
- vshr.s64 q13, q14, #25
- vsub.i64 q7, q7, q12
- vshr.s64 q12, q15, #26
- vadd.i64 q14, q13, q13
- vadd.i64 q9, q9, q12
- vtrn.32 d12, d14
- vshl.i64 q12, q12, #26
- vtrn.32 d13, d15
- vadd.i64 q0, q9, q0
- vadd.i64 q4, q4, q14
- vst1.8 d12, [r2, : 64]!
- vshl.i64 q6, q13, #4
- vsub.i64 q7, q8, q12
- vshr.s64 q0, q0, #25
- vadd.i64 q4, q4, q6
- vadd.i64 q6, q10, q0
- vshl.i64 q0, q0, #25
- vadd.i64 q8, q6, q1
- vadd.i64 q4, q4, q13
- vshl.i64 q10, q13, #25
- vadd.i64 q1, q4, q1
- vsub.i64 q0, q9, q0
- vshr.s64 q8, q8, #26
- vsub.i64 q3, q3, q10
- vtrn.32 d14, d0
- vshr.s64 q1, q1, #26
- vtrn.32 d15, d1
- vadd.i64 q0, q11, q8
- vst1.8 d14, [r2, : 64]
- vshl.i64 q7, q8, #26
- vadd.i64 q5, q5, q1
- vtrn.32 d4, d6
- vshl.i64 q1, q1, #26
- vtrn.32 d5, d7
- vsub.i64 q3, q6, q7
- add r2, r2, #16
- vsub.i64 q1, q4, q1
- vst1.8 d4, [r2, : 64]
- vtrn.32 d6, d0
- vtrn.32 d7, d1
- sub r2, r2, #8
- vtrn.32 d2, d10
- vtrn.32 d3, d11
- vst1.8 d6, [r2, : 64]
- sub r2, r2, #24
- vst1.8 d2, [r2, : 64]
- add r2, r3, #96
- vmov.i32 q0, #0
- vmov.i64 d2, #0xff
- vmov.i64 d3, #0
- vshr.u32 q1, q1, #7
- vst1.8 {d2-d3}, [r2, : 128]!
- vst1.8 {d0-d1}, [r2, : 128]!
- vst1.8 d0, [r2, : 64]
- add r2, r3, #144
- vmov.i32 q0, #0
- vst1.8 {d0-d1}, [r2, : 128]!
- vst1.8 {d0-d1}, [r2, : 128]!
- vst1.8 d0, [r2, : 64]
- add r2, r3, #240
- vmov.i32 q0, #0
- vmov.i64 d2, #0xff
- vmov.i64 d3, #0
- vshr.u32 q1, q1, #7
- vst1.8 {d2-d3}, [r2, : 128]!
- vst1.8 {d0-d1}, [r2, : 128]!
- vst1.8 d0, [r2, : 64]
- add r2, r3, #48
- add r6, r3, #192
- vld1.8 {d0-d1}, [r2, : 128]!
- vld1.8 {d2-d3}, [r2, : 128]!
- vld1.8 {d4}, [r2, : 64]
- vst1.8 {d0-d1}, [r6, : 128]!
- vst1.8 {d2-d3}, [r6, : 128]!
- vst1.8 d4, [r6, : 64]
-.Lmainloop:
- mov r2, r5, LSR #3
- and r6, r5, #7
- ldrb r2, [r1, r2]
- mov r2, r2, LSR r6
- and r2, r2, #1
- str r5, [sp, #456]
- eor r4, r4, r2
- str r2, [sp, #460]
- neg r2, r4
- add r4, r3, #96
- add r5, r3, #192
- add r6, r3, #144
- vld1.8 {d8-d9}, [r4, : 128]!
- add r7, r3, #240
- vld1.8 {d10-d11}, [r5, : 128]!
- veor q6, q4, q5
- vld1.8 {d14-d15}, [r6, : 128]!
- vdup.i32 q8, r2
- vld1.8 {d18-d19}, [r7, : 128]!
- veor q10, q7, q9
- vld1.8 {d22-d23}, [r4, : 128]!
- vand q6, q6, q8
- vld1.8 {d24-d25}, [r5, : 128]!
- vand q10, q10, q8
- vld1.8 {d26-d27}, [r6, : 128]!
- veor q4, q4, q6
- vld1.8 {d28-d29}, [r7, : 128]!
- veor q5, q5, q6
- vld1.8 {d0}, [r4, : 64]
- veor q6, q7, q10
- vld1.8 {d2}, [r5, : 64]
- veor q7, q9, q10
- vld1.8 {d4}, [r6, : 64]
- veor q9, q11, q12
- vld1.8 {d6}, [r7, : 64]
- veor q10, q0, q1
- sub r2, r4, #32
- vand q9, q9, q8
- sub r4, r5, #32
- vand q10, q10, q8
- sub r5, r6, #32
- veor q11, q11, q9
- sub r6, r7, #32
- veor q0, q0, q10
- veor q9, q12, q9
- veor q1, q1, q10
- veor q10, q13, q14
- veor q12, q2, q3
- vand q10, q10, q8
- vand q8, q12, q8
- veor q12, q13, q10
- veor q2, q2, q8
- veor q10, q14, q10
- veor q3, q3, q8
- vadd.i32 q8, q4, q6
- vsub.i32 q4, q4, q6
- vst1.8 {d16-d17}, [r2, : 128]!
- vadd.i32 q6, q11, q12
- vst1.8 {d8-d9}, [r5, : 128]!
- vsub.i32 q4, q11, q12
- vst1.8 {d12-d13}, [r2, : 128]!
- vadd.i32 q6, q0, q2
- vst1.8 {d8-d9}, [r5, : 128]!
- vsub.i32 q0, q0, q2
- vst1.8 d12, [r2, : 64]
- vadd.i32 q2, q5, q7
- vst1.8 d0, [r5, : 64]
- vsub.i32 q0, q5, q7
- vst1.8 {d4-d5}, [r4, : 128]!
- vadd.i32 q2, q9, q10
- vst1.8 {d0-d1}, [r6, : 128]!
- vsub.i32 q0, q9, q10
- vst1.8 {d4-d5}, [r4, : 128]!
- vadd.i32 q2, q1, q3
- vst1.8 {d0-d1}, [r6, : 128]!
- vsub.i32 q0, q1, q3
- vst1.8 d4, [r4, : 64]
- vst1.8 d0, [r6, : 64]
- add r2, sp, #512
- add r4, r3, #96
- add r5, r3, #144
- vld1.8 {d0-d1}, [r2, : 128]
- vld1.8 {d2-d3}, [r4, : 128]!
- vld1.8 {d4-d5}, [r5, : 128]!
- vzip.i32 q1, q2
- vld1.8 {d6-d7}, [r4, : 128]!
- vld1.8 {d8-d9}, [r5, : 128]!
- vshl.i32 q5, q1, #1
- vzip.i32 q3, q4
- vshl.i32 q6, q2, #1
- vld1.8 {d14}, [r4, : 64]
- vshl.i32 q8, q3, #1
- vld1.8 {d15}, [r5, : 64]
- vshl.i32 q9, q4, #1
- vmul.i32 d21, d7, d1
- vtrn.32 d14, d15
- vmul.i32 q11, q4, q0
- vmul.i32 q0, q7, q0
- vmull.s32 q12, d2, d2
- vmlal.s32 q12, d11, d1
- vmlal.s32 q12, d12, d0
- vmlal.s32 q12, d13, d23
- vmlal.s32 q12, d16, d22
- vmlal.s32 q12, d7, d21
- vmull.s32 q10, d2, d11
- vmlal.s32 q10, d4, d1
- vmlal.s32 q10, d13, d0
- vmlal.s32 q10, d6, d23
- vmlal.s32 q10, d17, d22
- vmull.s32 q13, d10, d4
- vmlal.s32 q13, d11, d3
- vmlal.s32 q13, d13, d1
- vmlal.s32 q13, d16, d0
- vmlal.s32 q13, d17, d23
- vmlal.s32 q13, d8, d22
- vmull.s32 q1, d10, d5
- vmlal.s32 q1, d11, d4
- vmlal.s32 q1, d6, d1
- vmlal.s32 q1, d17, d0
- vmlal.s32 q1, d8, d23
- vmull.s32 q14, d10, d6
- vmlal.s32 q14, d11, d13
- vmlal.s32 q14, d4, d4
- vmlal.s32 q14, d17, d1
- vmlal.s32 q14, d18, d0
- vmlal.s32 q14, d9, d23
- vmull.s32 q11, d10, d7
- vmlal.s32 q11, d11, d6
- vmlal.s32 q11, d12, d5
- vmlal.s32 q11, d8, d1
- vmlal.s32 q11, d19, d0
- vmull.s32 q15, d10, d8
- vmlal.s32 q15, d11, d17
- vmlal.s32 q15, d12, d6
- vmlal.s32 q15, d13, d5
- vmlal.s32 q15, d19, d1
- vmlal.s32 q15, d14, d0
- vmull.s32 q2, d10, d9
- vmlal.s32 q2, d11, d8
- vmlal.s32 q2, d12, d7
- vmlal.s32 q2, d13, d6
- vmlal.s32 q2, d14, d1
- vmull.s32 q0, d15, d1
- vmlal.s32 q0, d10, d14
- vmlal.s32 q0, d11, d19
- vmlal.s32 q0, d12, d8
- vmlal.s32 q0, d13, d17
- vmlal.s32 q0, d6, d6
- add r2, sp, #480
- vld1.8 {d18-d19}, [r2, : 128]!
- vmull.s32 q3, d16, d7
- vmlal.s32 q3, d10, d15
- vmlal.s32 q3, d11, d14
- vmlal.s32 q3, d12, d9
- vmlal.s32 q3, d13, d8
- vld1.8 {d8-d9}, [r2, : 128]
- vadd.i64 q5, q12, q9
- vadd.i64 q6, q15, q9
- vshr.s64 q5, q5, #26
- vshr.s64 q6, q6, #26
- vadd.i64 q7, q10, q5
- vshl.i64 q5, q5, #26
- vadd.i64 q8, q7, q4
- vadd.i64 q2, q2, q6
- vshl.i64 q6, q6, #26
- vadd.i64 q10, q2, q4
- vsub.i64 q5, q12, q5
- vshr.s64 q8, q8, #25
- vsub.i64 q6, q15, q6
- vshr.s64 q10, q10, #25
- vadd.i64 q12, q13, q8
- vshl.i64 q8, q8, #25
- vadd.i64 q13, q12, q9
- vadd.i64 q0, q0, q10
- vsub.i64 q7, q7, q8
- vshr.s64 q8, q13, #26
- vshl.i64 q10, q10, #25
- vadd.i64 q13, q0, q9
- vadd.i64 q1, q1, q8
- vshl.i64 q8, q8, #26
- vadd.i64 q15, q1, q4
- vsub.i64 q2, q2, q10
- vshr.s64 q10, q13, #26
- vsub.i64 q8, q12, q8
- vshr.s64 q12, q15, #25
- vadd.i64 q3, q3, q10
- vshl.i64 q10, q10, #26
- vadd.i64 q13, q3, q4
- vadd.i64 q14, q14, q12
- add r2, r3, #288
- vshl.i64 q12, q12, #25
- add r4, r3, #336
- vadd.i64 q15, q14, q9
- add r2, r2, #8
- vsub.i64 q0, q0, q10
- add r4, r4, #8
- vshr.s64 q10, q13, #25
- vsub.i64 q1, q1, q12
- vshr.s64 q12, q15, #26
- vadd.i64 q13, q10, q10
- vadd.i64 q11, q11, q12
- vtrn.32 d16, d2
- vshl.i64 q12, q12, #26
- vtrn.32 d17, d3
- vadd.i64 q1, q11, q4
- vadd.i64 q4, q5, q13
- vst1.8 d16, [r2, : 64]!
- vshl.i64 q5, q10, #4
- vst1.8 d17, [r4, : 64]!
- vsub.i64 q8, q14, q12
- vshr.s64 q1, q1, #25
- vadd.i64 q4, q4, q5
- vadd.i64 q5, q6, q1
- vshl.i64 q1, q1, #25
- vadd.i64 q6, q5, q9
- vadd.i64 q4, q4, q10
- vshl.i64 q10, q10, #25
- vadd.i64 q9, q4, q9
- vsub.i64 q1, q11, q1
- vshr.s64 q6, q6, #26
- vsub.i64 q3, q3, q10
- vtrn.32 d16, d2
- vshr.s64 q9, q9, #26
- vtrn.32 d17, d3
- vadd.i64 q1, q2, q6
- vst1.8 d16, [r2, : 64]
- vshl.i64 q2, q6, #26
- vst1.8 d17, [r4, : 64]
- vadd.i64 q6, q7, q9
- vtrn.32 d0, d6
- vshl.i64 q7, q9, #26
- vtrn.32 d1, d7
- vsub.i64 q2, q5, q2
- add r2, r2, #16
- vsub.i64 q3, q4, q7
- vst1.8 d0, [r2, : 64]
- add r4, r4, #16
- vst1.8 d1, [r4, : 64]
- vtrn.32 d4, d2
- vtrn.32 d5, d3
- sub r2, r2, #8
- sub r4, r4, #8
- vtrn.32 d6, d12
- vtrn.32 d7, d13
- vst1.8 d4, [r2, : 64]
- vst1.8 d5, [r4, : 64]
- sub r2, r2, #24
- sub r4, r4, #24
- vst1.8 d6, [r2, : 64]
- vst1.8 d7, [r4, : 64]
- add r2, r3, #240
- add r4, r3, #96
- vld1.8 {d0-d1}, [r4, : 128]!
- vld1.8 {d2-d3}, [r4, : 128]!
- vld1.8 {d4}, [r4, : 64]
- add r4, r3, #144
- vld1.8 {d6-d7}, [r4, : 128]!
- vtrn.32 q0, q3
- vld1.8 {d8-d9}, [r4, : 128]!
- vshl.i32 q5, q0, #4
- vtrn.32 q1, q4
- vshl.i32 q6, q3, #4
- vadd.i32 q5, q5, q0
- vadd.i32 q6, q6, q3
- vshl.i32 q7, q1, #4
- vld1.8 {d5}, [r4, : 64]
- vshl.i32 q8, q4, #4
- vtrn.32 d4, d5
- vadd.i32 q7, q7, q1
- vadd.i32 q8, q8, q4
- vld1.8 {d18-d19}, [r2, : 128]!
- vshl.i32 q10, q2, #4
- vld1.8 {d22-d23}, [r2, : 128]!
- vadd.i32 q10, q10, q2
- vld1.8 {d24}, [r2, : 64]
- vadd.i32 q5, q5, q0
- add r2, r3, #192
- vld1.8 {d26-d27}, [r2, : 128]!
- vadd.i32 q6, q6, q3
- vld1.8 {d28-d29}, [r2, : 128]!
- vadd.i32 q8, q8, q4
- vld1.8 {d25}, [r2, : 64]
- vadd.i32 q10, q10, q2
- vtrn.32 q9, q13
- vadd.i32 q7, q7, q1
- vadd.i32 q5, q5, q0
- vtrn.32 q11, q14
- vadd.i32 q6, q6, q3
- add r2, sp, #528
- vadd.i32 q10, q10, q2
- vtrn.32 d24, d25
- vst1.8 {d12-d13}, [r2, : 128]!
- vshl.i32 q6, q13, #1
- vst1.8 {d20-d21}, [r2, : 128]!
- vshl.i32 q10, q14, #1
- vst1.8 {d12-d13}, [r2, : 128]!
- vshl.i32 q15, q12, #1
- vadd.i32 q8, q8, q4
- vext.32 d10, d31, d30, #0
- vadd.i32 q7, q7, q1
- vst1.8 {d16-d17}, [r2, : 128]!
- vmull.s32 q8, d18, d5
- vmlal.s32 q8, d26, d4
- vmlal.s32 q8, d19, d9
- vmlal.s32 q8, d27, d3
- vmlal.s32 q8, d22, d8
- vmlal.s32 q8, d28, d2
- vmlal.s32 q8, d23, d7
- vmlal.s32 q8, d29, d1
- vmlal.s32 q8, d24, d6
- vmlal.s32 q8, d25, d0
- vst1.8 {d14-d15}, [r2, : 128]!
- vmull.s32 q2, d18, d4
- vmlal.s32 q2, d12, d9
- vmlal.s32 q2, d13, d8
- vmlal.s32 q2, d19, d3
- vmlal.s32 q2, d22, d2
- vmlal.s32 q2, d23, d1
- vmlal.s32 q2, d24, d0
- vst1.8 {d20-d21}, [r2, : 128]!
- vmull.s32 q7, d18, d9
- vmlal.s32 q7, d26, d3
- vmlal.s32 q7, d19, d8
- vmlal.s32 q7, d27, d2
- vmlal.s32 q7, d22, d7
- vmlal.s32 q7, d28, d1
- vmlal.s32 q7, d23, d6
- vmlal.s32 q7, d29, d0
- vst1.8 {d10-d11}, [r2, : 128]!
- vmull.s32 q5, d18, d3
- vmlal.s32 q5, d19, d2
- vmlal.s32 q5, d22, d1
- vmlal.s32 q5, d23, d0
- vmlal.s32 q5, d12, d8
- vst1.8 {d16-d17}, [r2, : 128]
- vmull.s32 q4, d18, d8
- vmlal.s32 q4, d26, d2
- vmlal.s32 q4, d19, d7
- vmlal.s32 q4, d27, d1
- vmlal.s32 q4, d22, d6
- vmlal.s32 q4, d28, d0
- vmull.s32 q8, d18, d7
- vmlal.s32 q8, d26, d1
- vmlal.s32 q8, d19, d6
- vmlal.s32 q8, d27, d0
- add r2, sp, #544
- vld1.8 {d20-d21}, [r2, : 128]
- vmlal.s32 q7, d24, d21
- vmlal.s32 q7, d25, d20
- vmlal.s32 q4, d23, d21
- vmlal.s32 q4, d29, d20
- vmlal.s32 q8, d22, d21
- vmlal.s32 q8, d28, d20
- vmlal.s32 q5, d24, d20
- vst1.8 {d14-d15}, [r2, : 128]
- vmull.s32 q7, d18, d6
- vmlal.s32 q7, d26, d0
- add r2, sp, #624
- vld1.8 {d30-d31}, [r2, : 128]
- vmlal.s32 q2, d30, d21
- vmlal.s32 q7, d19, d21
- vmlal.s32 q7, d27, d20
- add r2, sp, #592
- vld1.8 {d26-d27}, [r2, : 128]
- vmlal.s32 q4, d25, d27
- vmlal.s32 q8, d29, d27
- vmlal.s32 q8, d25, d26
- vmlal.s32 q7, d28, d27
- vmlal.s32 q7, d29, d26
- add r2, sp, #576
- vld1.8 {d28-d29}, [r2, : 128]
- vmlal.s32 q4, d24, d29
- vmlal.s32 q8, d23, d29
- vmlal.s32 q8, d24, d28
- vmlal.s32 q7, d22, d29
- vmlal.s32 q7, d23, d28
- vst1.8 {d8-d9}, [r2, : 128]
- add r2, sp, #528
- vld1.8 {d8-d9}, [r2, : 128]
- vmlal.s32 q7, d24, d9
- vmlal.s32 q7, d25, d31
- vmull.s32 q1, d18, d2
- vmlal.s32 q1, d19, d1
- vmlal.s32 q1, d22, d0
- vmlal.s32 q1, d24, d27
- vmlal.s32 q1, d23, d20
- vmlal.s32 q1, d12, d7
- vmlal.s32 q1, d13, d6
- vmull.s32 q6, d18, d1
- vmlal.s32 q6, d19, d0
- vmlal.s32 q6, d23, d27
- vmlal.s32 q6, d22, d20
- vmlal.s32 q6, d24, d26
- vmull.s32 q0, d18, d0
- vmlal.s32 q0, d22, d27
- vmlal.s32 q0, d23, d26
- vmlal.s32 q0, d24, d31
- vmlal.s32 q0, d19, d20
- add r2, sp, #608
- vld1.8 {d18-d19}, [r2, : 128]
- vmlal.s32 q2, d18, d7
- vmlal.s32 q5, d18, d6
- vmlal.s32 q1, d18, d21
- vmlal.s32 q0, d18, d28
- vmlal.s32 q6, d18, d29
- vmlal.s32 q2, d19, d6
- vmlal.s32 q5, d19, d21
- vmlal.s32 q1, d19, d29
- vmlal.s32 q0, d19, d9
- vmlal.s32 q6, d19, d28
- add r2, sp, #560
- vld1.8 {d18-d19}, [r2, : 128]
- add r2, sp, #480
- vld1.8 {d22-d23}, [r2, : 128]
- vmlal.s32 q5, d19, d7
- vmlal.s32 q0, d18, d21
- vmlal.s32 q0, d19, d29
- vmlal.s32 q6, d18, d6
- add r2, sp, #496
- vld1.8 {d6-d7}, [r2, : 128]
- vmlal.s32 q6, d19, d21
- add r2, sp, #544
- vld1.8 {d18-d19}, [r2, : 128]
- vmlal.s32 q0, d30, d8
- add r2, sp, #640
- vld1.8 {d20-d21}, [r2, : 128]
- vmlal.s32 q5, d30, d29
- add r2, sp, #576
- vld1.8 {d24-d25}, [r2, : 128]
- vmlal.s32 q1, d30, d28
- vadd.i64 q13, q0, q11
- vadd.i64 q14, q5, q11
- vmlal.s32 q6, d30, d9
- vshr.s64 q4, q13, #26
- vshr.s64 q13, q14, #26
- vadd.i64 q7, q7, q4
- vshl.i64 q4, q4, #26
- vadd.i64 q14, q7, q3
- vadd.i64 q9, q9, q13
- vshl.i64 q13, q13, #26
- vadd.i64 q15, q9, q3
- vsub.i64 q0, q0, q4
- vshr.s64 q4, q14, #25
- vsub.i64 q5, q5, q13
- vshr.s64 q13, q15, #25
- vadd.i64 q6, q6, q4
- vshl.i64 q4, q4, #25
- vadd.i64 q14, q6, q11
- vadd.i64 q2, q2, q13
- vsub.i64 q4, q7, q4
- vshr.s64 q7, q14, #26
- vshl.i64 q13, q13, #25
- vadd.i64 q14, q2, q11
- vadd.i64 q8, q8, q7
- vshl.i64 q7, q7, #26
- vadd.i64 q15, q8, q3
- vsub.i64 q9, q9, q13
- vshr.s64 q13, q14, #26
- vsub.i64 q6, q6, q7
- vshr.s64 q7, q15, #25
- vadd.i64 q10, q10, q13
- vshl.i64 q13, q13, #26
- vadd.i64 q14, q10, q3
- vadd.i64 q1, q1, q7
- add r2, r3, #144
- vshl.i64 q7, q7, #25
- add r4, r3, #96
- vadd.i64 q15, q1, q11
- add r2, r2, #8
- vsub.i64 q2, q2, q13
- add r4, r4, #8
- vshr.s64 q13, q14, #25
- vsub.i64 q7, q8, q7
- vshr.s64 q8, q15, #26
- vadd.i64 q14, q13, q13
- vadd.i64 q12, q12, q8
- vtrn.32 d12, d14
- vshl.i64 q8, q8, #26
- vtrn.32 d13, d15
- vadd.i64 q3, q12, q3
- vadd.i64 q0, q0, q14
- vst1.8 d12, [r2, : 64]!
- vshl.i64 q7, q13, #4
- vst1.8 d13, [r4, : 64]!
- vsub.i64 q1, q1, q8
- vshr.s64 q3, q3, #25
- vadd.i64 q0, q0, q7
- vadd.i64 q5, q5, q3
- vshl.i64 q3, q3, #25
- vadd.i64 q6, q5, q11
- vadd.i64 q0, q0, q13
- vshl.i64 q7, q13, #25
- vadd.i64 q8, q0, q11
- vsub.i64 q3, q12, q3
- vshr.s64 q6, q6, #26
- vsub.i64 q7, q10, q7
- vtrn.32 d2, d6
- vshr.s64 q8, q8, #26
- vtrn.32 d3, d7
- vadd.i64 q3, q9, q6
- vst1.8 d2, [r2, : 64]
- vshl.i64 q6, q6, #26
- vst1.8 d3, [r4, : 64]
- vadd.i64 q1, q4, q8
- vtrn.32 d4, d14
- vshl.i64 q4, q8, #26
- vtrn.32 d5, d15
- vsub.i64 q5, q5, q6
- add r2, r2, #16
- vsub.i64 q0, q0, q4
- vst1.8 d4, [r2, : 64]
- add r4, r4, #16
- vst1.8 d5, [r4, : 64]
- vtrn.32 d10, d6
- vtrn.32 d11, d7
- sub r2, r2, #8
- sub r4, r4, #8
- vtrn.32 d0, d2
- vtrn.32 d1, d3
- vst1.8 d10, [r2, : 64]
- vst1.8 d11, [r4, : 64]
- sub r2, r2, #24
- sub r4, r4, #24
- vst1.8 d0, [r2, : 64]
- vst1.8 d1, [r4, : 64]
- add r2, r3, #288
- add r4, r3, #336
- vld1.8 {d0-d1}, [r2, : 128]!
- vld1.8 {d2-d3}, [r4, : 128]!
- vsub.i32 q0, q0, q1
- vld1.8 {d2-d3}, [r2, : 128]!
- vld1.8 {d4-d5}, [r4, : 128]!
- vsub.i32 q1, q1, q2
- add r5, r3, #240
- vld1.8 {d4}, [r2, : 64]
- vld1.8 {d6}, [r4, : 64]
- vsub.i32 q2, q2, q3
- vst1.8 {d0-d1}, [r5, : 128]!
- vst1.8 {d2-d3}, [r5, : 128]!
- vst1.8 d4, [r5, : 64]
- add r2, r3, #144
- add r4, r3, #96
- add r5, r3, #144
- add r6, r3, #192
- vld1.8 {d0-d1}, [r2, : 128]!
- vld1.8 {d2-d3}, [r4, : 128]!
- vsub.i32 q2, q0, q1
- vadd.i32 q0, q0, q1
- vld1.8 {d2-d3}, [r2, : 128]!
- vld1.8 {d6-d7}, [r4, : 128]!
- vsub.i32 q4, q1, q3
- vadd.i32 q1, q1, q3
- vld1.8 {d6}, [r2, : 64]
- vld1.8 {d10}, [r4, : 64]
- vsub.i32 q6, q3, q5
- vadd.i32 q3, q3, q5
- vst1.8 {d4-d5}, [r5, : 128]!
- vst1.8 {d0-d1}, [r6, : 128]!
- vst1.8 {d8-d9}, [r5, : 128]!
- vst1.8 {d2-d3}, [r6, : 128]!
- vst1.8 d12, [r5, : 64]
- vst1.8 d6, [r6, : 64]
- add r2, r3, #0
- add r4, r3, #240
- vld1.8 {d0-d1}, [r4, : 128]!
- vld1.8 {d2-d3}, [r4, : 128]!
- vld1.8 {d4}, [r4, : 64]
- add r4, r3, #336
- vld1.8 {d6-d7}, [r4, : 128]!
- vtrn.32 q0, q3
- vld1.8 {d8-d9}, [r4, : 128]!
- vshl.i32 q5, q0, #4
- vtrn.32 q1, q4
- vshl.i32 q6, q3, #4
- vadd.i32 q5, q5, q0
- vadd.i32 q6, q6, q3
- vshl.i32 q7, q1, #4
- vld1.8 {d5}, [r4, : 64]
- vshl.i32 q8, q4, #4
- vtrn.32 d4, d5
- vadd.i32 q7, q7, q1
- vadd.i32 q8, q8, q4
- vld1.8 {d18-d19}, [r2, : 128]!
- vshl.i32 q10, q2, #4
- vld1.8 {d22-d23}, [r2, : 128]!
- vadd.i32 q10, q10, q2
- vld1.8 {d24}, [r2, : 64]
- vadd.i32 q5, q5, q0
- add r2, r3, #288
- vld1.8 {d26-d27}, [r2, : 128]!
- vadd.i32 q6, q6, q3
- vld1.8 {d28-d29}, [r2, : 128]!
- vadd.i32 q8, q8, q4
- vld1.8 {d25}, [r2, : 64]
- vadd.i32 q10, q10, q2
- vtrn.32 q9, q13
- vadd.i32 q7, q7, q1
- vadd.i32 q5, q5, q0
- vtrn.32 q11, q14
- vadd.i32 q6, q6, q3
- add r2, sp, #528
- vadd.i32 q10, q10, q2
- vtrn.32 d24, d25
- vst1.8 {d12-d13}, [r2, : 128]!
- vshl.i32 q6, q13, #1
- vst1.8 {d20-d21}, [r2, : 128]!
- vshl.i32 q10, q14, #1
- vst1.8 {d12-d13}, [r2, : 128]!
- vshl.i32 q15, q12, #1
- vadd.i32 q8, q8, q4
- vext.32 d10, d31, d30, #0
- vadd.i32 q7, q7, q1
- vst1.8 {d16-d17}, [r2, : 128]!
- vmull.s32 q8, d18, d5
- vmlal.s32 q8, d26, d4
- vmlal.s32 q8, d19, d9
- vmlal.s32 q8, d27, d3
- vmlal.s32 q8, d22, d8
- vmlal.s32 q8, d28, d2
- vmlal.s32 q8, d23, d7
- vmlal.s32 q8, d29, d1
- vmlal.s32 q8, d24, d6
- vmlal.s32 q8, d25, d0
- vst1.8 {d14-d15}, [r2, : 128]!
- vmull.s32 q2, d18, d4
- vmlal.s32 q2, d12, d9
- vmlal.s32 q2, d13, d8
- vmlal.s32 q2, d19, d3
- vmlal.s32 q2, d22, d2
- vmlal.s32 q2, d23, d1
- vmlal.s32 q2, d24, d0
- vst1.8 {d20-d21}, [r2, : 128]!
- vmull.s32 q7, d18, d9
- vmlal.s32 q7, d26, d3
- vmlal.s32 q7, d19, d8
- vmlal.s32 q7, d27, d2
- vmlal.s32 q7, d22, d7
- vmlal.s32 q7, d28, d1
- vmlal.s32 q7, d23, d6
- vmlal.s32 q7, d29, d0
- vst1.8 {d10-d11}, [r2, : 128]!
- vmull.s32 q5, d18, d3
- vmlal.s32 q5, d19, d2
- vmlal.s32 q5, d22, d1
- vmlal.s32 q5, d23, d0
- vmlal.s32 q5, d12, d8
- vst1.8 {d16-d17}, [r2, : 128]!
- vmull.s32 q4, d18, d8
- vmlal.s32 q4, d26, d2
- vmlal.s32 q4, d19, d7
- vmlal.s32 q4, d27, d1
- vmlal.s32 q4, d22, d6
- vmlal.s32 q4, d28, d0
- vmull.s32 q8, d18, d7
- vmlal.s32 q8, d26, d1
- vmlal.s32 q8, d19, d6
- vmlal.s32 q8, d27, d0
- add r2, sp, #544
- vld1.8 {d20-d21}, [r2, : 128]
- vmlal.s32 q7, d24, d21
- vmlal.s32 q7, d25, d20
- vmlal.s32 q4, d23, d21
- vmlal.s32 q4, d29, d20
- vmlal.s32 q8, d22, d21
- vmlal.s32 q8, d28, d20
- vmlal.s32 q5, d24, d20
- vst1.8 {d14-d15}, [r2, : 128]
- vmull.s32 q7, d18, d6
- vmlal.s32 q7, d26, d0
- add r2, sp, #624
- vld1.8 {d30-d31}, [r2, : 128]
- vmlal.s32 q2, d30, d21
- vmlal.s32 q7, d19, d21
- vmlal.s32 q7, d27, d20
- add r2, sp, #592
- vld1.8 {d26-d27}, [r2, : 128]
- vmlal.s32 q4, d25, d27
- vmlal.s32 q8, d29, d27
- vmlal.s32 q8, d25, d26
- vmlal.s32 q7, d28, d27
- vmlal.s32 q7, d29, d26
- add r2, sp, #576
- vld1.8 {d28-d29}, [r2, : 128]
- vmlal.s32 q4, d24, d29
- vmlal.s32 q8, d23, d29
- vmlal.s32 q8, d24, d28
- vmlal.s32 q7, d22, d29
- vmlal.s32 q7, d23, d28
- vst1.8 {d8-d9}, [r2, : 128]
- add r2, sp, #528
- vld1.8 {d8-d9}, [r2, : 128]
- vmlal.s32 q7, d24, d9
- vmlal.s32 q7, d25, d31
- vmull.s32 q1, d18, d2
- vmlal.s32 q1, d19, d1
- vmlal.s32 q1, d22, d0
- vmlal.s32 q1, d24, d27
- vmlal.s32 q1, d23, d20
- vmlal.s32 q1, d12, d7
- vmlal.s32 q1, d13, d6
- vmull.s32 q6, d18, d1
- vmlal.s32 q6, d19, d0
- vmlal.s32 q6, d23, d27
- vmlal.s32 q6, d22, d20
- vmlal.s32 q6, d24, d26
- vmull.s32 q0, d18, d0
- vmlal.s32 q0, d22, d27
- vmlal.s32 q0, d23, d26
- vmlal.s32 q0, d24, d31
- vmlal.s32 q0, d19, d20
- add r2, sp, #608
- vld1.8 {d18-d19}, [r2, : 128]
- vmlal.s32 q2, d18, d7
- vmlal.s32 q5, d18, d6
- vmlal.s32 q1, d18, d21
- vmlal.s32 q0, d18, d28
- vmlal.s32 q6, d18, d29
- vmlal.s32 q2, d19, d6
- vmlal.s32 q5, d19, d21
- vmlal.s32 q1, d19, d29
- vmlal.s32 q0, d19, d9
- vmlal.s32 q6, d19, d28
- add r2, sp, #560
- vld1.8 {d18-d19}, [r2, : 128]
- add r2, sp, #480
- vld1.8 {d22-d23}, [r2, : 128]
- vmlal.s32 q5, d19, d7
- vmlal.s32 q0, d18, d21
- vmlal.s32 q0, d19, d29
- vmlal.s32 q6, d18, d6
- add r2, sp, #496
- vld1.8 {d6-d7}, [r2, : 128]
- vmlal.s32 q6, d19, d21
- add r2, sp, #544
- vld1.8 {d18-d19}, [r2, : 128]
- vmlal.s32 q0, d30, d8
- add r2, sp, #640
- vld1.8 {d20-d21}, [r2, : 128]
- vmlal.s32 q5, d30, d29
- add r2, sp, #576
- vld1.8 {d24-d25}, [r2, : 128]
- vmlal.s32 q1, d30, d28
- vadd.i64 q13, q0, q11
- vadd.i64 q14, q5, q11
- vmlal.s32 q6, d30, d9
- vshr.s64 q4, q13, #26
- vshr.s64 q13, q14, #26
- vadd.i64 q7, q7, q4
- vshl.i64 q4, q4, #26
- vadd.i64 q14, q7, q3
- vadd.i64 q9, q9, q13
- vshl.i64 q13, q13, #26
- vadd.i64 q15, q9, q3
- vsub.i64 q0, q0, q4
- vshr.s64 q4, q14, #25
- vsub.i64 q5, q5, q13
- vshr.s64 q13, q15, #25
- vadd.i64 q6, q6, q4
- vshl.i64 q4, q4, #25
- vadd.i64 q14, q6, q11
- vadd.i64 q2, q2, q13
- vsub.i64 q4, q7, q4
- vshr.s64 q7, q14, #26
- vshl.i64 q13, q13, #25
- vadd.i64 q14, q2, q11
- vadd.i64 q8, q8, q7
- vshl.i64 q7, q7, #26
- vadd.i64 q15, q8, q3
- vsub.i64 q9, q9, q13
- vshr.s64 q13, q14, #26
- vsub.i64 q6, q6, q7
- vshr.s64 q7, q15, #25
- vadd.i64 q10, q10, q13
- vshl.i64 q13, q13, #26
- vadd.i64 q14, q10, q3
- vadd.i64 q1, q1, q7
- add r2, r3, #288
- vshl.i64 q7, q7, #25
- add r4, r3, #96
- vadd.i64 q15, q1, q11
- add r2, r2, #8
- vsub.i64 q2, q2, q13
- add r4, r4, #8
- vshr.s64 q13, q14, #25
- vsub.i64 q7, q8, q7
- vshr.s64 q8, q15, #26
- vadd.i64 q14, q13, q13
- vadd.i64 q12, q12, q8
- vtrn.32 d12, d14
- vshl.i64 q8, q8, #26
- vtrn.32 d13, d15
- vadd.i64 q3, q12, q3
- vadd.i64 q0, q0, q14
- vst1.8 d12, [r2, : 64]!
- vshl.i64 q7, q13, #4
- vst1.8 d13, [r4, : 64]!
- vsub.i64 q1, q1, q8
- vshr.s64 q3, q3, #25
- vadd.i64 q0, q0, q7
- vadd.i64 q5, q5, q3
- vshl.i64 q3, q3, #25
- vadd.i64 q6, q5, q11
- vadd.i64 q0, q0, q13
- vshl.i64 q7, q13, #25
- vadd.i64 q8, q0, q11
- vsub.i64 q3, q12, q3
- vshr.s64 q6, q6, #26
- vsub.i64 q7, q10, q7
- vtrn.32 d2, d6
- vshr.s64 q8, q8, #26
- vtrn.32 d3, d7
- vadd.i64 q3, q9, q6
- vst1.8 d2, [r2, : 64]
- vshl.i64 q6, q6, #26
- vst1.8 d3, [r4, : 64]
- vadd.i64 q1, q4, q8
- vtrn.32 d4, d14
- vshl.i64 q4, q8, #26
- vtrn.32 d5, d15
- vsub.i64 q5, q5, q6
- add r2, r2, #16
- vsub.i64 q0, q0, q4
- vst1.8 d4, [r2, : 64]
- add r4, r4, #16
- vst1.8 d5, [r4, : 64]
- vtrn.32 d10, d6
- vtrn.32 d11, d7
- sub r2, r2, #8
- sub r4, r4, #8
- vtrn.32 d0, d2
- vtrn.32 d1, d3
- vst1.8 d10, [r2, : 64]
- vst1.8 d11, [r4, : 64]
- sub r2, r2, #24
- sub r4, r4, #24
- vst1.8 d0, [r2, : 64]
- vst1.8 d1, [r4, : 64]
- add r2, sp, #512
- add r4, r3, #144
- add r5, r3, #192
- vld1.8 {d0-d1}, [r2, : 128]
- vld1.8 {d2-d3}, [r4, : 128]!
- vld1.8 {d4-d5}, [r5, : 128]!
- vzip.i32 q1, q2
- vld1.8 {d6-d7}, [r4, : 128]!
- vld1.8 {d8-d9}, [r5, : 128]!
- vshl.i32 q5, q1, #1
- vzip.i32 q3, q4
- vshl.i32 q6, q2, #1
- vld1.8 {d14}, [r4, : 64]
- vshl.i32 q8, q3, #1
- vld1.8 {d15}, [r5, : 64]
- vshl.i32 q9, q4, #1
- vmul.i32 d21, d7, d1
- vtrn.32 d14, d15
- vmul.i32 q11, q4, q0
- vmul.i32 q0, q7, q0
- vmull.s32 q12, d2, d2
- vmlal.s32 q12, d11, d1
- vmlal.s32 q12, d12, d0
- vmlal.s32 q12, d13, d23
- vmlal.s32 q12, d16, d22
- vmlal.s32 q12, d7, d21
- vmull.s32 q10, d2, d11
- vmlal.s32 q10, d4, d1
- vmlal.s32 q10, d13, d0
- vmlal.s32 q10, d6, d23
- vmlal.s32 q10, d17, d22
- vmull.s32 q13, d10, d4
- vmlal.s32 q13, d11, d3
- vmlal.s32 q13, d13, d1
- vmlal.s32 q13, d16, d0
- vmlal.s32 q13, d17, d23
- vmlal.s32 q13, d8, d22
- vmull.s32 q1, d10, d5
- vmlal.s32 q1, d11, d4
- vmlal.s32 q1, d6, d1
- vmlal.s32 q1, d17, d0
- vmlal.s32 q1, d8, d23
- vmull.s32 q14, d10, d6
- vmlal.s32 q14, d11, d13
- vmlal.s32 q14, d4, d4
- vmlal.s32 q14, d17, d1
- vmlal.s32 q14, d18, d0
- vmlal.s32 q14, d9, d23
- vmull.s32 q11, d10, d7
- vmlal.s32 q11, d11, d6
- vmlal.s32 q11, d12, d5
- vmlal.s32 q11, d8, d1
- vmlal.s32 q11, d19, d0
- vmull.s32 q15, d10, d8
- vmlal.s32 q15, d11, d17
- vmlal.s32 q15, d12, d6
- vmlal.s32 q15, d13, d5
- vmlal.s32 q15, d19, d1
- vmlal.s32 q15, d14, d0
- vmull.s32 q2, d10, d9
- vmlal.s32 q2, d11, d8
- vmlal.s32 q2, d12, d7
- vmlal.s32 q2, d13, d6
- vmlal.s32 q2, d14, d1
- vmull.s32 q0, d15, d1
- vmlal.s32 q0, d10, d14
- vmlal.s32 q0, d11, d19
- vmlal.s32 q0, d12, d8
- vmlal.s32 q0, d13, d17
- vmlal.s32 q0, d6, d6
- add r2, sp, #480
- vld1.8 {d18-d19}, [r2, : 128]!
- vmull.s32 q3, d16, d7
- vmlal.s32 q3, d10, d15
- vmlal.s32 q3, d11, d14
- vmlal.s32 q3, d12, d9
- vmlal.s32 q3, d13, d8
- vld1.8 {d8-d9}, [r2, : 128]
- vadd.i64 q5, q12, q9
- vadd.i64 q6, q15, q9
- vshr.s64 q5, q5, #26
- vshr.s64 q6, q6, #26
- vadd.i64 q7, q10, q5
- vshl.i64 q5, q5, #26
- vadd.i64 q8, q7, q4
- vadd.i64 q2, q2, q6
- vshl.i64 q6, q6, #26
- vadd.i64 q10, q2, q4
- vsub.i64 q5, q12, q5
- vshr.s64 q8, q8, #25
- vsub.i64 q6, q15, q6
- vshr.s64 q10, q10, #25
- vadd.i64 q12, q13, q8
- vshl.i64 q8, q8, #25
- vadd.i64 q13, q12, q9
- vadd.i64 q0, q0, q10
- vsub.i64 q7, q7, q8
- vshr.s64 q8, q13, #26
- vshl.i64 q10, q10, #25
- vadd.i64 q13, q0, q9
- vadd.i64 q1, q1, q8
- vshl.i64 q8, q8, #26
- vadd.i64 q15, q1, q4
- vsub.i64 q2, q2, q10
- vshr.s64 q10, q13, #26
- vsub.i64 q8, q12, q8
- vshr.s64 q12, q15, #25
- vadd.i64 q3, q3, q10
- vshl.i64 q10, q10, #26
- vadd.i64 q13, q3, q4
- vadd.i64 q14, q14, q12
- add r2, r3, #144
- vshl.i64 q12, q12, #25
- add r4, r3, #192
- vadd.i64 q15, q14, q9
- add r2, r2, #8
- vsub.i64 q0, q0, q10
- add r4, r4, #8
- vshr.s64 q10, q13, #25
- vsub.i64 q1, q1, q12
- vshr.s64 q12, q15, #26
- vadd.i64 q13, q10, q10
- vadd.i64 q11, q11, q12
- vtrn.32 d16, d2
- vshl.i64 q12, q12, #26
- vtrn.32 d17, d3
- vadd.i64 q1, q11, q4
- vadd.i64 q4, q5, q13
- vst1.8 d16, [r2, : 64]!
- vshl.i64 q5, q10, #4
- vst1.8 d17, [r4, : 64]!
- vsub.i64 q8, q14, q12
- vshr.s64 q1, q1, #25
- vadd.i64 q4, q4, q5
- vadd.i64 q5, q6, q1
- vshl.i64 q1, q1, #25
- vadd.i64 q6, q5, q9
- vadd.i64 q4, q4, q10
- vshl.i64 q10, q10, #25
- vadd.i64 q9, q4, q9
- vsub.i64 q1, q11, q1
- vshr.s64 q6, q6, #26
- vsub.i64 q3, q3, q10
- vtrn.32 d16, d2
- vshr.s64 q9, q9, #26
- vtrn.32 d17, d3
- vadd.i64 q1, q2, q6
- vst1.8 d16, [r2, : 64]
- vshl.i64 q2, q6, #26
- vst1.8 d17, [r4, : 64]
- vadd.i64 q6, q7, q9
- vtrn.32 d0, d6
- vshl.i64 q7, q9, #26
- vtrn.32 d1, d7
- vsub.i64 q2, q5, q2
- add r2, r2, #16
- vsub.i64 q3, q4, q7
- vst1.8 d0, [r2, : 64]
- add r4, r4, #16
- vst1.8 d1, [r4, : 64]
- vtrn.32 d4, d2
- vtrn.32 d5, d3
- sub r2, r2, #8
- sub r4, r4, #8
- vtrn.32 d6, d12
- vtrn.32 d7, d13
- vst1.8 d4, [r2, : 64]
- vst1.8 d5, [r4, : 64]
- sub r2, r2, #24
- sub r4, r4, #24
- vst1.8 d6, [r2, : 64]
- vst1.8 d7, [r4, : 64]
- add r2, r3, #336
- add r4, r3, #288
- vld1.8 {d0-d1}, [r2, : 128]!
- vld1.8 {d2-d3}, [r4, : 128]!
- vadd.i32 q0, q0, q1
- vld1.8 {d2-d3}, [r2, : 128]!
- vld1.8 {d4-d5}, [r4, : 128]!
- vadd.i32 q1, q1, q2
- add r5, r3, #288
- vld1.8 {d4}, [r2, : 64]
- vld1.8 {d6}, [r4, : 64]
- vadd.i32 q2, q2, q3
- vst1.8 {d0-d1}, [r5, : 128]!
- vst1.8 {d2-d3}, [r5, : 128]!
- vst1.8 d4, [r5, : 64]
- add r2, r3, #48
- add r4, r3, #144
- vld1.8 {d0-d1}, [r4, : 128]!
- vld1.8 {d2-d3}, [r4, : 128]!
- vld1.8 {d4}, [r4, : 64]
- add r4, r3, #288
- vld1.8 {d6-d7}, [r4, : 128]!
- vtrn.32 q0, q3
- vld1.8 {d8-d9}, [r4, : 128]!
- vshl.i32 q5, q0, #4
- vtrn.32 q1, q4
- vshl.i32 q6, q3, #4
- vadd.i32 q5, q5, q0
- vadd.i32 q6, q6, q3
- vshl.i32 q7, q1, #4
- vld1.8 {d5}, [r4, : 64]
- vshl.i32 q8, q4, #4
- vtrn.32 d4, d5
- vadd.i32 q7, q7, q1
- vadd.i32 q8, q8, q4
- vld1.8 {d18-d19}, [r2, : 128]!
- vshl.i32 q10, q2, #4
- vld1.8 {d22-d23}, [r2, : 128]!
- vadd.i32 q10, q10, q2
- vld1.8 {d24}, [r2, : 64]
- vadd.i32 q5, q5, q0
- add r2, r3, #240
- vld1.8 {d26-d27}, [r2, : 128]!
- vadd.i32 q6, q6, q3
- vld1.8 {d28-d29}, [r2, : 128]!
- vadd.i32 q8, q8, q4
- vld1.8 {d25}, [r2, : 64]
- vadd.i32 q10, q10, q2
- vtrn.32 q9, q13
- vadd.i32 q7, q7, q1
- vadd.i32 q5, q5, q0
- vtrn.32 q11, q14
- vadd.i32 q6, q6, q3
- add r2, sp, #528
- vadd.i32 q10, q10, q2
- vtrn.32 d24, d25
- vst1.8 {d12-d13}, [r2, : 128]!
- vshl.i32 q6, q13, #1
- vst1.8 {d20-d21}, [r2, : 128]!
- vshl.i32 q10, q14, #1
- vst1.8 {d12-d13}, [r2, : 128]!
- vshl.i32 q15, q12, #1
- vadd.i32 q8, q8, q4
- vext.32 d10, d31, d30, #0
- vadd.i32 q7, q7, q1
- vst1.8 {d16-d17}, [r2, : 128]!
- vmull.s32 q8, d18, d5
- vmlal.s32 q8, d26, d4
- vmlal.s32 q8, d19, d9
- vmlal.s32 q8, d27, d3
- vmlal.s32 q8, d22, d8
- vmlal.s32 q8, d28, d2
- vmlal.s32 q8, d23, d7
- vmlal.s32 q8, d29, d1
- vmlal.s32 q8, d24, d6
- vmlal.s32 q8, d25, d0
- vst1.8 {d14-d15}, [r2, : 128]!
- vmull.s32 q2, d18, d4
- vmlal.s32 q2, d12, d9
- vmlal.s32 q2, d13, d8
- vmlal.s32 q2, d19, d3
- vmlal.s32 q2, d22, d2
- vmlal.s32 q2, d23, d1
- vmlal.s32 q2, d24, d0
- vst1.8 {d20-d21}, [r2, : 128]!
- vmull.s32 q7, d18, d9
- vmlal.s32 q7, d26, d3
- vmlal.s32 q7, d19, d8
- vmlal.s32 q7, d27, d2
- vmlal.s32 q7, d22, d7
- vmlal.s32 q7, d28, d1
- vmlal.s32 q7, d23, d6
- vmlal.s32 q7, d29, d0
- vst1.8 {d10-d11}, [r2, : 128]!
- vmull.s32 q5, d18, d3
- vmlal.s32 q5, d19, d2
- vmlal.s32 q5, d22, d1
- vmlal.s32 q5, d23, d0
- vmlal.s32 q5, d12, d8
- vst1.8 {d16-d17}, [r2, : 128]!
- vmull.s32 q4, d18, d8
- vmlal.s32 q4, d26, d2
- vmlal.s32 q4, d19, d7
- vmlal.s32 q4, d27, d1
- vmlal.s32 q4, d22, d6
- vmlal.s32 q4, d28, d0
- vmull.s32 q8, d18, d7
- vmlal.s32 q8, d26, d1
- vmlal.s32 q8, d19, d6
- vmlal.s32 q8, d27, d0
- add r2, sp, #544
- vld1.8 {d20-d21}, [r2, : 128]
- vmlal.s32 q7, d24, d21
- vmlal.s32 q7, d25, d20
- vmlal.s32 q4, d23, d21
- vmlal.s32 q4, d29, d20
- vmlal.s32 q8, d22, d21
- vmlal.s32 q8, d28, d20
- vmlal.s32 q5, d24, d20
- vst1.8 {d14-d15}, [r2, : 128]
- vmull.s32 q7, d18, d6
- vmlal.s32 q7, d26, d0
- add r2, sp, #624
- vld1.8 {d30-d31}, [r2, : 128]
- vmlal.s32 q2, d30, d21
- vmlal.s32 q7, d19, d21
- vmlal.s32 q7, d27, d20
- add r2, sp, #592
- vld1.8 {d26-d27}, [r2, : 128]
- vmlal.s32 q4, d25, d27
- vmlal.s32 q8, d29, d27
- vmlal.s32 q8, d25, d26
- vmlal.s32 q7, d28, d27
- vmlal.s32 q7, d29, d26
- add r2, sp, #576
- vld1.8 {d28-d29}, [r2, : 128]
- vmlal.s32 q4, d24, d29
- vmlal.s32 q8, d23, d29
- vmlal.s32 q8, d24, d28
- vmlal.s32 q7, d22, d29
- vmlal.s32 q7, d23, d28
- vst1.8 {d8-d9}, [r2, : 128]
- add r2, sp, #528
- vld1.8 {d8-d9}, [r2, : 128]
- vmlal.s32 q7, d24, d9
- vmlal.s32 q7, d25, d31
- vmull.s32 q1, d18, d2
- vmlal.s32 q1, d19, d1
- vmlal.s32 q1, d22, d0
- vmlal.s32 q1, d24, d27
- vmlal.s32 q1, d23, d20
- vmlal.s32 q1, d12, d7
- vmlal.s32 q1, d13, d6
- vmull.s32 q6, d18, d1
- vmlal.s32 q6, d19, d0
- vmlal.s32 q6, d23, d27
- vmlal.s32 q6, d22, d20
- vmlal.s32 q6, d24, d26
- vmull.s32 q0, d18, d0
- vmlal.s32 q0, d22, d27
- vmlal.s32 q0, d23, d26
- vmlal.s32 q0, d24, d31
- vmlal.s32 q0, d19, d20
- add r2, sp, #608
- vld1.8 {d18-d19}, [r2, : 128]
- vmlal.s32 q2, d18, d7
- vmlal.s32 q5, d18, d6
- vmlal.s32 q1, d18, d21
- vmlal.s32 q0, d18, d28
- vmlal.s32 q6, d18, d29
- vmlal.s32 q2, d19, d6
- vmlal.s32 q5, d19, d21
- vmlal.s32 q1, d19, d29
- vmlal.s32 q0, d19, d9
- vmlal.s32 q6, d19, d28
- add r2, sp, #560
- vld1.8 {d18-d19}, [r2, : 128]
- add r2, sp, #480
- vld1.8 {d22-d23}, [r2, : 128]
- vmlal.s32 q5, d19, d7
- vmlal.s32 q0, d18, d21
- vmlal.s32 q0, d19, d29
- vmlal.s32 q6, d18, d6
- add r2, sp, #496
- vld1.8 {d6-d7}, [r2, : 128]
- vmlal.s32 q6, d19, d21
- add r2, sp, #544
- vld1.8 {d18-d19}, [r2, : 128]
- vmlal.s32 q0, d30, d8
- add r2, sp, #640
- vld1.8 {d20-d21}, [r2, : 128]
- vmlal.s32 q5, d30, d29
- add r2, sp, #576
- vld1.8 {d24-d25}, [r2, : 128]
- vmlal.s32 q1, d30, d28
- vadd.i64 q13, q0, q11
- vadd.i64 q14, q5, q11
- vmlal.s32 q6, d30, d9
- vshr.s64 q4, q13, #26
- vshr.s64 q13, q14, #26
- vadd.i64 q7, q7, q4
- vshl.i64 q4, q4, #26
- vadd.i64 q14, q7, q3
- vadd.i64 q9, q9, q13
- vshl.i64 q13, q13, #26
- vadd.i64 q15, q9, q3
- vsub.i64 q0, q0, q4
- vshr.s64 q4, q14, #25
- vsub.i64 q5, q5, q13
- vshr.s64 q13, q15, #25
- vadd.i64 q6, q6, q4
- vshl.i64 q4, q4, #25
- vadd.i64 q14, q6, q11
- vadd.i64 q2, q2, q13
- vsub.i64 q4, q7, q4
- vshr.s64 q7, q14, #26
- vshl.i64 q13, q13, #25
- vadd.i64 q14, q2, q11
- vadd.i64 q8, q8, q7
- vshl.i64 q7, q7, #26
- vadd.i64 q15, q8, q3
- vsub.i64 q9, q9, q13
- vshr.s64 q13, q14, #26
- vsub.i64 q6, q6, q7
- vshr.s64 q7, q15, #25
- vadd.i64 q10, q10, q13
- vshl.i64 q13, q13, #26
- vadd.i64 q14, q10, q3
- vadd.i64 q1, q1, q7
- add r2, r3, #240
- vshl.i64 q7, q7, #25
- add r4, r3, #144
- vadd.i64 q15, q1, q11
- add r2, r2, #8
- vsub.i64 q2, q2, q13
- add r4, r4, #8
- vshr.s64 q13, q14, #25
- vsub.i64 q7, q8, q7
- vshr.s64 q8, q15, #26
- vadd.i64 q14, q13, q13
- vadd.i64 q12, q12, q8
- vtrn.32 d12, d14
- vshl.i64 q8, q8, #26
- vtrn.32 d13, d15
- vadd.i64 q3, q12, q3
- vadd.i64 q0, q0, q14
- vst1.8 d12, [r2, : 64]!
- vshl.i64 q7, q13, #4
- vst1.8 d13, [r4, : 64]!
- vsub.i64 q1, q1, q8
- vshr.s64 q3, q3, #25
- vadd.i64 q0, q0, q7
- vadd.i64 q5, q5, q3
- vshl.i64 q3, q3, #25
- vadd.i64 q6, q5, q11
- vadd.i64 q0, q0, q13
- vshl.i64 q7, q13, #25
- vadd.i64 q8, q0, q11
- vsub.i64 q3, q12, q3
- vshr.s64 q6, q6, #26
- vsub.i64 q7, q10, q7
- vtrn.32 d2, d6
- vshr.s64 q8, q8, #26
- vtrn.32 d3, d7
- vadd.i64 q3, q9, q6
- vst1.8 d2, [r2, : 64]
- vshl.i64 q6, q6, #26
- vst1.8 d3, [r4, : 64]
- vadd.i64 q1, q4, q8
- vtrn.32 d4, d14
- vshl.i64 q4, q8, #26
- vtrn.32 d5, d15
- vsub.i64 q5, q5, q6
- add r2, r2, #16
- vsub.i64 q0, q0, q4
- vst1.8 d4, [r2, : 64]
- add r4, r4, #16
- vst1.8 d5, [r4, : 64]
- vtrn.32 d10, d6
- vtrn.32 d11, d7
- sub r2, r2, #8
- sub r4, r4, #8
- vtrn.32 d0, d2
- vtrn.32 d1, d3
- vst1.8 d10, [r2, : 64]
- vst1.8 d11, [r4, : 64]
- sub r2, r2, #24
- sub r4, r4, #24
- vst1.8 d0, [r2, : 64]
- vst1.8 d1, [r4, : 64]
- ldr r2, [sp, #456]
- ldr r4, [sp, #460]
- subs r5, r2, #1
- bge .Lmainloop
- add r1, r3, #144
- add r2, r3, #336
- vld1.8 {d0-d1}, [r1, : 128]!
- vld1.8 {d2-d3}, [r1, : 128]!
- vld1.8 {d4}, [r1, : 64]
- vst1.8 {d0-d1}, [r2, : 128]!
- vst1.8 {d2-d3}, [r2, : 128]!
- vst1.8 d4, [r2, : 64]
- movw r1, #0
-.Linvertloop:
- add r2, r3, #144
- movw r4, #0
- movw r5, #2
- cmp r1, #1
- moveq r5, #1
- addeq r2, r3, #336
- addeq r4, r3, #48
- cmp r1, #2
- moveq r5, #1
- addeq r2, r3, #48
- cmp r1, #3
- moveq r5, #5
- addeq r4, r3, #336
- cmp r1, #4
- moveq r5, #10
- cmp r1, #5
- moveq r5, #20
- cmp r1, #6
- moveq r5, #10
- addeq r2, r3, #336
- addeq r4, r3, #336
- cmp r1, #7
- moveq r5, #50
- cmp r1, #8
- moveq r5, #100
- cmp r1, #9
- moveq r5, #50
- addeq r2, r3, #336
- cmp r1, #10
- moveq r5, #5
- addeq r2, r3, #48
- cmp r1, #11
- moveq r5, #0
- addeq r2, r3, #96
- add r6, r3, #144
- add r7, r3, #288
- vld1.8 {d0-d1}, [r6, : 128]!
- vld1.8 {d2-d3}, [r6, : 128]!
- vld1.8 {d4}, [r6, : 64]
- vst1.8 {d0-d1}, [r7, : 128]!
- vst1.8 {d2-d3}, [r7, : 128]!
- vst1.8 d4, [r7, : 64]
- cmp r5, #0
- beq .Lskipsquaringloop
-.Lsquaringloop:
- add r6, r3, #288
- add r7, r3, #288
- add r8, r3, #288
- vmov.i32 q0, #19
- vmov.i32 q1, #0
- vmov.i32 q2, #1
- vzip.i32 q1, q2
- vld1.8 {d4-d5}, [r7, : 128]!
- vld1.8 {d6-d7}, [r7, : 128]!
- vld1.8 {d9}, [r7, : 64]
- vld1.8 {d10-d11}, [r6, : 128]!
- add r7, sp, #384
- vld1.8 {d12-d13}, [r6, : 128]!
- vmul.i32 q7, q2, q0
- vld1.8 {d8}, [r6, : 64]
- vext.32 d17, d11, d10, #1
- vmul.i32 q9, q3, q0
- vext.32 d16, d10, d8, #1
- vshl.u32 q10, q5, q1
- vext.32 d22, d14, d4, #1
- vext.32 d24, d18, d6, #1
- vshl.u32 q13, q6, q1
- vshl.u32 d28, d8, d2
- vrev64.i32 d22, d22
- vmul.i32 d1, d9, d1
- vrev64.i32 d24, d24
- vext.32 d29, d8, d13, #1
- vext.32 d0, d1, d9, #1
- vrev64.i32 d0, d0
- vext.32 d2, d9, d1, #1
- vext.32 d23, d15, d5, #1
- vmull.s32 q4, d20, d4
- vrev64.i32 d23, d23
- vmlal.s32 q4, d21, d1
- vrev64.i32 d2, d2
- vmlal.s32 q4, d26, d19
- vext.32 d3, d5, d15, #1
- vmlal.s32 q4, d27, d18
- vrev64.i32 d3, d3
- vmlal.s32 q4, d28, d15
- vext.32 d14, d12, d11, #1
- vmull.s32 q5, d16, d23
- vext.32 d15, d13, d12, #1
- vmlal.s32 q5, d17, d4
- vst1.8 d8, [r7, : 64]!
- vmlal.s32 q5, d14, d1
- vext.32 d12, d9, d8, #0
- vmlal.s32 q5, d15, d19
- vmov.i64 d13, #0
- vmlal.s32 q5, d29, d18
- vext.32 d25, d19, d7, #1
- vmlal.s32 q6, d20, d5
- vrev64.i32 d25, d25
- vmlal.s32 q6, d21, d4
- vst1.8 d11, [r7, : 64]!
- vmlal.s32 q6, d26, d1
- vext.32 d9, d10, d10, #0
- vmlal.s32 q6, d27, d19
- vmov.i64 d8, #0
- vmlal.s32 q6, d28, d18
- vmlal.s32 q4, d16, d24
- vmlal.s32 q4, d17, d5
- vmlal.s32 q4, d14, d4
- vst1.8 d12, [r7, : 64]!
- vmlal.s32 q4, d15, d1
- vext.32 d10, d13, d12, #0
- vmlal.s32 q4, d29, d19
- vmov.i64 d11, #0
- vmlal.s32 q5, d20, d6
- vmlal.s32 q5, d21, d5
- vmlal.s32 q5, d26, d4
- vext.32 d13, d8, d8, #0
- vmlal.s32 q5, d27, d1
- vmov.i64 d12, #0
- vmlal.s32 q5, d28, d19
- vst1.8 d9, [r7, : 64]!
- vmlal.s32 q6, d16, d25
- vmlal.s32 q6, d17, d6
- vst1.8 d10, [r7, : 64]
- vmlal.s32 q6, d14, d5
- vext.32 d8, d11, d10, #0
- vmlal.s32 q6, d15, d4
- vmov.i64 d9, #0
- vmlal.s32 q6, d29, d1
- vmlal.s32 q4, d20, d7
- vmlal.s32 q4, d21, d6
- vmlal.s32 q4, d26, d5
- vext.32 d11, d12, d12, #0
- vmlal.s32 q4, d27, d4
- vmov.i64 d10, #0
- vmlal.s32 q4, d28, d1
- vmlal.s32 q5, d16, d0
- sub r6, r7, #32
- vmlal.s32 q5, d17, d7
- vmlal.s32 q5, d14, d6
- vext.32 d30, d9, d8, #0
- vmlal.s32 q5, d15, d5
- vld1.8 {d31}, [r6, : 64]!
- vmlal.s32 q5, d29, d4
- vmlal.s32 q15, d20, d0
- vext.32 d0, d6, d18, #1
- vmlal.s32 q15, d21, d25
- vrev64.i32 d0, d0
- vmlal.s32 q15, d26, d24
- vext.32 d1, d7, d19, #1
- vext.32 d7, d10, d10, #0
- vmlal.s32 q15, d27, d23
- vrev64.i32 d1, d1
- vld1.8 {d6}, [r6, : 64]
- vmlal.s32 q15, d28, d22
- vmlal.s32 q3, d16, d4
- add r6, r6, #24
- vmlal.s32 q3, d17, d2
- vext.32 d4, d31, d30, #0
- vmov d17, d11
- vmlal.s32 q3, d14, d1
- vext.32 d11, d13, d13, #0
- vext.32 d13, d30, d30, #0
- vmlal.s32 q3, d15, d0
- vext.32 d1, d8, d8, #0
- vmlal.s32 q3, d29, d3
- vld1.8 {d5}, [r6, : 64]
- sub r6, r6, #16
- vext.32 d10, d6, d6, #0
- vmov.i32 q1, #0xffffffff
- vshl.i64 q4, q1, #25
- add r7, sp, #480
- vld1.8 {d14-d15}, [r7, : 128]
- vadd.i64 q9, q2, q7
- vshl.i64 q1, q1, #26
- vshr.s64 q10, q9, #26
- vld1.8 {d0}, [r6, : 64]!
- vadd.i64 q5, q5, q10
- vand q9, q9, q1
- vld1.8 {d16}, [r6, : 64]!
- add r6, sp, #496
- vld1.8 {d20-d21}, [r6, : 128]
- vadd.i64 q11, q5, q10
- vsub.i64 q2, q2, q9
- vshr.s64 q9, q11, #25
- vext.32 d12, d5, d4, #0
- vand q11, q11, q4
- vadd.i64 q0, q0, q9
- vmov d19, d7
- vadd.i64 q3, q0, q7
- vsub.i64 q5, q5, q11
- vshr.s64 q11, q3, #26
- vext.32 d18, d11, d10, #0
- vand q3, q3, q1
- vadd.i64 q8, q8, q11
- vadd.i64 q11, q8, q10
- vsub.i64 q0, q0, q3
- vshr.s64 q3, q11, #25
- vand q11, q11, q4
- vadd.i64 q3, q6, q3
- vadd.i64 q6, q3, q7
- vsub.i64 q8, q8, q11
- vshr.s64 q11, q6, #26
- vand q6, q6, q1
- vadd.i64 q9, q9, q11
- vadd.i64 d25, d19, d21
- vsub.i64 q3, q3, q6
- vshr.s64 d23, d25, #25
- vand q4, q12, q4
- vadd.i64 d21, d23, d23
- vshl.i64 d25, d23, #4
- vadd.i64 d21, d21, d23
- vadd.i64 d25, d25, d21
- vadd.i64 d4, d4, d25
- vzip.i32 q0, q8
- vadd.i64 d12, d4, d14
- add r6, r8, #8
- vst1.8 d0, [r6, : 64]
- vsub.i64 d19, d19, d9
- add r6, r6, #16
- vst1.8 d16, [r6, : 64]
- vshr.s64 d22, d12, #26
- vand q0, q6, q1
- vadd.i64 d10, d10, d22
- vzip.i32 q3, q9
- vsub.i64 d4, d4, d0
- sub r6, r6, #8
- vst1.8 d6, [r6, : 64]
- add r6, r6, #16
- vst1.8 d18, [r6, : 64]
- vzip.i32 q2, q5
- sub r6, r6, #32
- vst1.8 d4, [r6, : 64]
- subs r5, r5, #1
- bhi .Lsquaringloop
-.Lskipsquaringloop:
- mov r2, r2
- add r5, r3, #288
- add r6, r3, #144
- vmov.i32 q0, #19
- vmov.i32 q1, #0
- vmov.i32 q2, #1
- vzip.i32 q1, q2
- vld1.8 {d4-d5}, [r5, : 128]!
- vld1.8 {d6-d7}, [r5, : 128]!
- vld1.8 {d9}, [r5, : 64]
- vld1.8 {d10-d11}, [r2, : 128]!
- add r5, sp, #384
- vld1.8 {d12-d13}, [r2, : 128]!
- vmul.i32 q7, q2, q0
- vld1.8 {d8}, [r2, : 64]
- vext.32 d17, d11, d10, #1
- vmul.i32 q9, q3, q0
- vext.32 d16, d10, d8, #1
- vshl.u32 q10, q5, q1
- vext.32 d22, d14, d4, #1
- vext.32 d24, d18, d6, #1
- vshl.u32 q13, q6, q1
- vshl.u32 d28, d8, d2
- vrev64.i32 d22, d22
- vmul.i32 d1, d9, d1
- vrev64.i32 d24, d24
- vext.32 d29, d8, d13, #1
- vext.32 d0, d1, d9, #1
- vrev64.i32 d0, d0
- vext.32 d2, d9, d1, #1
- vext.32 d23, d15, d5, #1
- vmull.s32 q4, d20, d4
- vrev64.i32 d23, d23
- vmlal.s32 q4, d21, d1
- vrev64.i32 d2, d2
- vmlal.s32 q4, d26, d19
- vext.32 d3, d5, d15, #1
- vmlal.s32 q4, d27, d18
- vrev64.i32 d3, d3
- vmlal.s32 q4, d28, d15
- vext.32 d14, d12, d11, #1
- vmull.s32 q5, d16, d23
- vext.32 d15, d13, d12, #1
- vmlal.s32 q5, d17, d4
- vst1.8 d8, [r5, : 64]!
- vmlal.s32 q5, d14, d1
- vext.32 d12, d9, d8, #0
- vmlal.s32 q5, d15, d19
- vmov.i64 d13, #0
- vmlal.s32 q5, d29, d18
- vext.32 d25, d19, d7, #1
- vmlal.s32 q6, d20, d5
- vrev64.i32 d25, d25
- vmlal.s32 q6, d21, d4
- vst1.8 d11, [r5, : 64]!
- vmlal.s32 q6, d26, d1
- vext.32 d9, d10, d10, #0
- vmlal.s32 q6, d27, d19
- vmov.i64 d8, #0
- vmlal.s32 q6, d28, d18
- vmlal.s32 q4, d16, d24
- vmlal.s32 q4, d17, d5
- vmlal.s32 q4, d14, d4
- vst1.8 d12, [r5, : 64]!
- vmlal.s32 q4, d15, d1
- vext.32 d10, d13, d12, #0
- vmlal.s32 q4, d29, d19
- vmov.i64 d11, #0
- vmlal.s32 q5, d20, d6
- vmlal.s32 q5, d21, d5
- vmlal.s32 q5, d26, d4
- vext.32 d13, d8, d8, #0
- vmlal.s32 q5, d27, d1
- vmov.i64 d12, #0
- vmlal.s32 q5, d28, d19
- vst1.8 d9, [r5, : 64]!
- vmlal.s32 q6, d16, d25
- vmlal.s32 q6, d17, d6
- vst1.8 d10, [r5, : 64]
- vmlal.s32 q6, d14, d5
- vext.32 d8, d11, d10, #0
- vmlal.s32 q6, d15, d4
- vmov.i64 d9, #0
- vmlal.s32 q6, d29, d1
- vmlal.s32 q4, d20, d7
- vmlal.s32 q4, d21, d6
- vmlal.s32 q4, d26, d5
- vext.32 d11, d12, d12, #0
- vmlal.s32 q4, d27, d4
- vmov.i64 d10, #0
- vmlal.s32 q4, d28, d1
- vmlal.s32 q5, d16, d0
- sub r2, r5, #32
- vmlal.s32 q5, d17, d7
- vmlal.s32 q5, d14, d6
- vext.32 d30, d9, d8, #0
- vmlal.s32 q5, d15, d5
- vld1.8 {d31}, [r2, : 64]!
- vmlal.s32 q5, d29, d4
- vmlal.s32 q15, d20, d0
- vext.32 d0, d6, d18, #1
- vmlal.s32 q15, d21, d25
- vrev64.i32 d0, d0
- vmlal.s32 q15, d26, d24
- vext.32 d1, d7, d19, #1
- vext.32 d7, d10, d10, #0
- vmlal.s32 q15, d27, d23
- vrev64.i32 d1, d1
- vld1.8 {d6}, [r2, : 64]
- vmlal.s32 q15, d28, d22
- vmlal.s32 q3, d16, d4
- add r2, r2, #24
- vmlal.s32 q3, d17, d2
- vext.32 d4, d31, d30, #0
- vmov d17, d11
- vmlal.s32 q3, d14, d1
- vext.32 d11, d13, d13, #0
- vext.32 d13, d30, d30, #0
- vmlal.s32 q3, d15, d0
- vext.32 d1, d8, d8, #0
- vmlal.s32 q3, d29, d3
- vld1.8 {d5}, [r2, : 64]
- sub r2, r2, #16
- vext.32 d10, d6, d6, #0
- vmov.i32 q1, #0xffffffff
- vshl.i64 q4, q1, #25
- add r5, sp, #480
- vld1.8 {d14-d15}, [r5, : 128]
- vadd.i64 q9, q2, q7
- vshl.i64 q1, q1, #26
- vshr.s64 q10, q9, #26
- vld1.8 {d0}, [r2, : 64]!
- vadd.i64 q5, q5, q10
- vand q9, q9, q1
- vld1.8 {d16}, [r2, : 64]!
- add r2, sp, #496
- vld1.8 {d20-d21}, [r2, : 128]
- vadd.i64 q11, q5, q10
- vsub.i64 q2, q2, q9
- vshr.s64 q9, q11, #25
- vext.32 d12, d5, d4, #0
- vand q11, q11, q4
- vadd.i64 q0, q0, q9
- vmov d19, d7
- vadd.i64 q3, q0, q7
- vsub.i64 q5, q5, q11
- vshr.s64 q11, q3, #26
- vext.32 d18, d11, d10, #0
- vand q3, q3, q1
- vadd.i64 q8, q8, q11
- vadd.i64 q11, q8, q10
- vsub.i64 q0, q0, q3
- vshr.s64 q3, q11, #25
- vand q11, q11, q4
- vadd.i64 q3, q6, q3
- vadd.i64 q6, q3, q7
- vsub.i64 q8, q8, q11
- vshr.s64 q11, q6, #26
- vand q6, q6, q1
- vadd.i64 q9, q9, q11
- vadd.i64 d25, d19, d21
- vsub.i64 q3, q3, q6
- vshr.s64 d23, d25, #25
- vand q4, q12, q4
- vadd.i64 d21, d23, d23
- vshl.i64 d25, d23, #4
- vadd.i64 d21, d21, d23
- vadd.i64 d25, d25, d21
- vadd.i64 d4, d4, d25
- vzip.i32 q0, q8
- vadd.i64 d12, d4, d14
- add r2, r6, #8
- vst1.8 d0, [r2, : 64]
- vsub.i64 d19, d19, d9
- add r2, r2, #16
- vst1.8 d16, [r2, : 64]
- vshr.s64 d22, d12, #26
- vand q0, q6, q1
- vadd.i64 d10, d10, d22
- vzip.i32 q3, q9
- vsub.i64 d4, d4, d0
- sub r2, r2, #8
- vst1.8 d6, [r2, : 64]
- add r2, r2, #16
- vst1.8 d18, [r2, : 64]
- vzip.i32 q2, q5
- sub r2, r2, #32
- vst1.8 d4, [r2, : 64]
- cmp r4, #0
- beq .Lskippostcopy
- add r2, r3, #144
- mov r4, r4
- vld1.8 {d0-d1}, [r2, : 128]!
- vld1.8 {d2-d3}, [r2, : 128]!
- vld1.8 {d4}, [r2, : 64]
- vst1.8 {d0-d1}, [r4, : 128]!
- vst1.8 {d2-d3}, [r4, : 128]!
- vst1.8 d4, [r4, : 64]
-.Lskippostcopy:
- cmp r1, #1
- bne .Lskipfinalcopy
- add r2, r3, #288
- add r4, r3, #144
- vld1.8 {d0-d1}, [r2, : 128]!
- vld1.8 {d2-d3}, [r2, : 128]!
- vld1.8 {d4}, [r2, : 64]
- vst1.8 {d0-d1}, [r4, : 128]!
- vst1.8 {d2-d3}, [r4, : 128]!
- vst1.8 d4, [r4, : 64]
-.Lskipfinalcopy:
- add r1, r1, #1
- cmp r1, #12
- blo .Linvertloop
- add r1, r3, #144
- ldr r2, [r1], #4
- ldr r3, [r1], #4
- ldr r4, [r1], #4
- ldr r5, [r1], #4
- ldr r6, [r1], #4
- ldr r7, [r1], #4
- ldr r8, [r1], #4
- ldr r9, [r1], #4
- ldr r10, [r1], #4
- ldr r1, [r1]
- add r11, r1, r1, LSL #4
- add r11, r11, r1, LSL #1
- add r11, r11, #16777216
- mov r11, r11, ASR #25
- add r11, r11, r2
- mov r11, r11, ASR #26
- add r11, r11, r3
- mov r11, r11, ASR #25
- add r11, r11, r4
- mov r11, r11, ASR #26
- add r11, r11, r5
- mov r11, r11, ASR #25
- add r11, r11, r6
- mov r11, r11, ASR #26
- add r11, r11, r7
- mov r11, r11, ASR #25
- add r11, r11, r8
- mov r11, r11, ASR #26
- add r11, r11, r9
- mov r11, r11, ASR #25
- add r11, r11, r10
- mov r11, r11, ASR #26
- add r11, r11, r1
- mov r11, r11, ASR #25
- add r2, r2, r11
- add r2, r2, r11, LSL #1
- add r2, r2, r11, LSL #4
- mov r11, r2, ASR #26
- add r3, r3, r11
- sub r2, r2, r11, LSL #26
- mov r11, r3, ASR #25
- add r4, r4, r11
- sub r3, r3, r11, LSL #25
- mov r11, r4, ASR #26
- add r5, r5, r11
- sub r4, r4, r11, LSL #26
- mov r11, r5, ASR #25
- add r6, r6, r11
- sub r5, r5, r11, LSL #25
- mov r11, r6, ASR #26
- add r7, r7, r11
- sub r6, r6, r11, LSL #26
- mov r11, r7, ASR #25
- add r8, r8, r11
- sub r7, r7, r11, LSL #25
- mov r11, r8, ASR #26
- add r9, r9, r11
- sub r8, r8, r11, LSL #26
- mov r11, r9, ASR #25
- add r10, r10, r11
- sub r9, r9, r11, LSL #25
- mov r11, r10, ASR #26
- add r1, r1, r11
- sub r10, r10, r11, LSL #26
- mov r11, r1, ASR #25
- sub r1, r1, r11, LSL #25
- add r2, r2, r3, LSL #26
- mov r3, r3, LSR #6
- add r3, r3, r4, LSL #19
- mov r4, r4, LSR #13
- add r4, r4, r5, LSL #13
- mov r5, r5, LSR #19
- add r5, r5, r6, LSL #6
- add r6, r7, r8, LSL #25
- mov r7, r8, LSR #7
- add r7, r7, r9, LSL #19
- mov r8, r9, LSR #13
- add r8, r8, r10, LSL #12
- mov r9, r10, LSR #20
- add r1, r9, r1, LSL #6
- str r2, [r0]
- str r3, [r0, #4]
- str r4, [r0, #8]
- str r5, [r0, #12]
- str r6, [r0, #16]
- str r7, [r0, #20]
- str r8, [r0, #24]
- str r1, [r0, #28]
- movw r0, #0
- mov sp, ip
- pop {r4-r11, pc}
-ENDPROC(curve25519_neon)
diff --git a/arch/arm/crypto/curve25519-glue.c b/arch/arm/crypto/curve25519-glue.c
deleted file mode 100644
index e7b87e09dd99f4..00000000000000
--- a/arch/arm/crypto/curve25519-glue.c
+++ /dev/null
@@ -1,137 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR MIT
-/*
- * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
- *
- * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This
- * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been
- * manually reworked for use in kernel space.
- */
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-#include <crypto/internal/kpp.h>
-#include <crypto/internal/simd.h>
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/jump_label.h>
-#include <linux/scatterlist.h>
-#include <crypto/curve25519.h>
-
-asmlinkage void curve25519_neon(u8 mypublic[CURVE25519_KEY_SIZE],
- const u8 secret[CURVE25519_KEY_SIZE],
- const u8 basepoint[CURVE25519_KEY_SIZE]);
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
-
-void curve25519_arch(u8 out[CURVE25519_KEY_SIZE],
- const u8 scalar[CURVE25519_KEY_SIZE],
- const u8 point[CURVE25519_KEY_SIZE])
-{
- if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
- kernel_neon_begin();
- curve25519_neon(out, scalar, point);
- kernel_neon_end();
- } else {
- curve25519_generic(out, scalar, point);
- }
-}
-EXPORT_SYMBOL(curve25519_arch);
-
-void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
- const u8 secret[CURVE25519_KEY_SIZE])
-{
- return curve25519_arch(pub, secret, curve25519_base_point);
-}
-EXPORT_SYMBOL(curve25519_base_arch);
-
-static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
- unsigned int len)
-{
- u8 *secret = kpp_tfm_ctx(tfm);
-
- if (!len)
- curve25519_generate_secret(secret);
- else if (len == CURVE25519_KEY_SIZE &&
- crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE))
- memcpy(secret, buf, CURVE25519_KEY_SIZE);
- else
- return -EINVAL;
- return 0;
-}
-
-static int curve25519_compute_value(struct kpp_request *req)
-{
- struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
- const u8 *secret = kpp_tfm_ctx(tfm);
- u8 public_key[CURVE25519_KEY_SIZE];
- u8 buf[CURVE25519_KEY_SIZE];
- int copied, nbytes;
- u8 const *bp;
-
- if (req->src) {
- copied = sg_copy_to_buffer(req->src,
- sg_nents_for_len(req->src,
- CURVE25519_KEY_SIZE),
- public_key, CURVE25519_KEY_SIZE);
- if (copied != CURVE25519_KEY_SIZE)
- return -EINVAL;
- bp = public_key;
- } else {
- bp = curve25519_base_point;
- }
-
- curve25519_arch(buf, secret, bp);
-
- /* might want less than we've got */
- nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
- copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
- nbytes),
- buf, nbytes);
- if (copied != nbytes)
- return -EINVAL;
- return 0;
-}
-
-static unsigned int curve25519_max_size(struct crypto_kpp *tfm)
-{
- return CURVE25519_KEY_SIZE;
-}
-
-static struct kpp_alg curve25519_alg = {
- .base.cra_name = "curve25519",
- .base.cra_driver_name = "curve25519-neon",
- .base.cra_priority = 200,
- .base.cra_module = THIS_MODULE,
- .base.cra_ctxsize = CURVE25519_KEY_SIZE,
-
- .set_secret = curve25519_set_secret,
- .generate_public_key = curve25519_compute_value,
- .compute_shared_secret = curve25519_compute_value,
- .max_size = curve25519_max_size,
-};
-
-static int __init arm_curve25519_init(void)
-{
- if (elf_hwcap & HWCAP_NEON) {
- static_branch_enable(&have_neon);
- return IS_REACHABLE(CONFIG_CRYPTO_KPP) ?
- crypto_register_kpp(&curve25519_alg) : 0;
- }
- return 0;
-}
-
-static void __exit arm_curve25519_exit(void)
-{
- if (IS_REACHABLE(CONFIG_CRYPTO_KPP) && elf_hwcap & HWCAP_NEON)
- crypto_unregister_kpp(&curve25519_alg);
-}
-
-module_init(arm_curve25519_init);
-module_exit(arm_curve25519_exit);
-
-MODULE_ALIAS_CRYPTO("curve25519");
-MODULE_ALIAS_CRYPTO("curve25519-neon");
-MODULE_DESCRIPTION("Public key crypto: Curve25519 (NEON-accelerated)");
-MODULE_LICENSE("GPL v2");
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index b5546a3ac9c5bd..5bc610c1e13a5b 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -559,7 +559,6 @@ CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECDSA=m
CONFIG_CRYPTO_ECRDSA=m
-CONFIG_CRYPTO_CURVE25519=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 4ea0d686e28eaa..0f239c56bfce26 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -516,7 +516,6 @@ CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECDSA=m
CONFIG_CRYPTO_ECRDSA=m
-CONFIG_CRYPTO_CURVE25519=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 0698d9d4b04e8c..917d222d02c40b 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -536,7 +536,6 @@ CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECDSA=m
CONFIG_CRYPTO_ECRDSA=m
-CONFIG_CRYPTO_CURVE25519=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 45d1ee0860e531..ba58a395fa038c 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -508,7 +508,6 @@ CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECDSA=m
CONFIG_CRYPTO_ECRDSA=m
-CONFIG_CRYPTO_CURVE25519=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index e5794b906b655f..81273266ad83f9 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -518,7 +518,6 @@ CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECDSA=m
CONFIG_CRYPTO_ECRDSA=m
-CONFIG_CRYPTO_CURVE25519=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index fb84ba4c135006..81ec471d7c84d1 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -535,7 +535,6 @@ CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECDSA=m
CONFIG_CRYPTO_ECRDSA=m
-CONFIG_CRYPTO_CURVE25519=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 9a05e05523fc65..76166baed5bf5f 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -622,7 +622,6 @@ CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECDSA=m
CONFIG_CRYPTO_ECRDSA=m
-CONFIG_CRYPTO_CURVE25519=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 0e30aa574a2b49..8ebe527e7777a8 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -508,7 +508,6 @@ CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECDSA=m
CONFIG_CRYPTO_ECRDSA=m
-CONFIG_CRYPTO_CURVE25519=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index d6f5600d941065..82e59dc8f4f3fe 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -509,7 +509,6 @@ CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECDSA=m
CONFIG_CRYPTO_ECRDSA=m
-CONFIG_CRYPTO_CURVE25519=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 16f0adff4ada7f..135848c83a7dab 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -525,7 +525,6 @@ CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECDSA=m
CONFIG_CRYPTO_ECRDSA=m
-CONFIG_CRYPTO_CURVE25519=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 5b0e273be74bdb..2194b5a059d490 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -506,7 +506,6 @@ CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECDSA=m
CONFIG_CRYPTO_ECRDSA=m
-CONFIG_CRYPTO_CURVE25519=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 3851d6720fac45..208e1a19621ac6 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -506,7 +506,6 @@ CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECDSA=m
CONFIG_CRYPTO_ECRDSA=m
-CONFIG_CRYPTO_CURVE25519=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/mips/cavium-octeon/Makefile b/arch/mips/cavium-octeon/Makefile
index 2a59265788413e..ab84ede0cbe0e8 100644
--- a/arch/mips/cavium-octeon/Makefile
+++ b/arch/mips/cavium-octeon/Makefile
@@ -11,9 +11,9 @@
obj-y := cpu.o setup.o octeon-platform.o octeon-irq.o csrc-octeon.o
obj-y += dma-octeon.o
+obj-y += octeon-crypto.o
obj-y += octeon-memcpy.o
obj-y += executive/
-obj-y += crypto/
obj-$(CONFIG_MTD) += flash_setup.o
obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/mips/cavium-octeon/crypto/Makefile b/arch/mips/cavium-octeon/crypto/Makefile
deleted file mode 100644
index 83f2f5dd93cccd..00000000000000
--- a/arch/mips/cavium-octeon/crypto/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# OCTEON-specific crypto modules.
-#
-
-obj-y += octeon-crypto.o
-
-obj-$(CONFIG_CRYPTO_MD5_OCTEON) += octeon-md5.o
diff --git a/arch/mips/cavium-octeon/crypto/octeon-md5.c b/arch/mips/cavium-octeon/crypto/octeon-md5.c
deleted file mode 100644
index a8ce831e2cebd9..00000000000000
--- a/arch/mips/cavium-octeon/crypto/octeon-md5.c
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Cryptographic API.
- *
- * MD5 Message Digest Algorithm (RFC1321).
- *
- * Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>.
- *
- * Based on crypto/md5.c, which is:
- *
- * Derived from cryptoapi implementation, originally based on the
- * public domain implementation written by Colin Plumb in 1993.
- *
- * Copyright (c) Cryptoapi developers.
- * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- */
-
-#include <asm/octeon/crypto.h>
-#include <asm/octeon/octeon.h>
-#include <crypto/internal/hash.h>
-#include <crypto/md5.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/unaligned.h>
-
-struct octeon_md5_state {
- __le32 hash[MD5_HASH_WORDS];
- u64 byte_count;
-};
-
-/*
- * We pass everything as 64-bit. OCTEON can handle misaligned data.
- */
-
-static void octeon_md5_store_hash(struct octeon_md5_state *ctx)
-{
- u64 *hash = (u64 *)ctx->hash;
-
- write_octeon_64bit_hash_dword(hash[0], 0);
- write_octeon_64bit_hash_dword(hash[1], 1);
-}
-
-static void octeon_md5_read_hash(struct octeon_md5_state *ctx)
-{
- u64 *hash = (u64 *)ctx->hash;
-
- hash[0] = read_octeon_64bit_hash_dword(0);
- hash[1] = read_octeon_64bit_hash_dword(1);
-}
-
-static void octeon_md5_transform(const void *_block)
-{
- const u64 *block = _block;
-
- write_octeon_64bit_block_dword(block[0], 0);
- write_octeon_64bit_block_dword(block[1], 1);
- write_octeon_64bit_block_dword(block[2], 2);
- write_octeon_64bit_block_dword(block[3], 3);
- write_octeon_64bit_block_dword(block[4], 4);
- write_octeon_64bit_block_dword(block[5], 5);
- write_octeon_64bit_block_dword(block[6], 6);
- octeon_md5_start(block[7]);
-}
-
-static int octeon_md5_init(struct shash_desc *desc)
-{
- struct octeon_md5_state *mctx = shash_desc_ctx(desc);
-
- mctx->hash[0] = cpu_to_le32(MD5_H0);
- mctx->hash[1] = cpu_to_le32(MD5_H1);
- mctx->hash[2] = cpu_to_le32(MD5_H2);
- mctx->hash[3] = cpu_to_le32(MD5_H3);
- mctx->byte_count = 0;
-
- return 0;
-}
-
-static int octeon_md5_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- struct octeon_md5_state *mctx = shash_desc_ctx(desc);
- struct octeon_cop2_state state;
- unsigned long flags;
-
- mctx->byte_count += len;
- flags = octeon_crypto_enable(&state);
- octeon_md5_store_hash(mctx);
-
- do {
- octeon_md5_transform(data);
- data += MD5_HMAC_BLOCK_SIZE;
- len -= MD5_HMAC_BLOCK_SIZE;
- } while (len >= MD5_HMAC_BLOCK_SIZE);
-
- octeon_md5_read_hash(mctx);
- octeon_crypto_disable(&state, flags);
- mctx->byte_count -= len;
- return len;
-}
-
-static int octeon_md5_finup(struct shash_desc *desc, const u8 *src,
- unsigned int offset, u8 *out)
-{
- struct octeon_md5_state *mctx = shash_desc_ctx(desc);
- int padding = 56 - (offset + 1);
- struct octeon_cop2_state state;
- u32 block[MD5_BLOCK_WORDS];
- unsigned long flags;
- char *p;
-
- p = memcpy(block, src, offset);
- p += offset;
- *p++ = 0x80;
-
- flags = octeon_crypto_enable(&state);
- octeon_md5_store_hash(mctx);
-
- if (padding < 0) {
- memset(p, 0x00, padding + sizeof(u64));
- octeon_md5_transform(block);
- p = (char *)block;
- padding = 56;
- }
-
- memset(p, 0, padding);
- mctx->byte_count += offset;
- block[14] = mctx->byte_count << 3;
- block[15] = mctx->byte_count >> 29;
- cpu_to_le32_array(block + 14, 2);
- octeon_md5_transform(block);
-
- octeon_md5_read_hash(mctx);
- octeon_crypto_disable(&state, flags);
-
- memzero_explicit(block, sizeof(block));
- memcpy(out, mctx->hash, sizeof(mctx->hash));
-
- return 0;
-}
-
-static int octeon_md5_export(struct shash_desc *desc, void *out)
-{
- struct octeon_md5_state *ctx = shash_desc_ctx(desc);
- union {
- u8 *u8;
- u32 *u32;
- u64 *u64;
- } p = { .u8 = out };
- int i;
-
- for (i = 0; i < MD5_HASH_WORDS; i++)
- put_unaligned(le32_to_cpu(ctx->hash[i]), p.u32++);
- put_unaligned(ctx->byte_count, p.u64);
- return 0;
-}
-
-static int octeon_md5_import(struct shash_desc *desc, const void *in)
-{
- struct octeon_md5_state *ctx = shash_desc_ctx(desc);
- union {
- const u8 *u8;
- const u32 *u32;
- const u64 *u64;
- } p = { .u8 = in };
- int i;
-
- for (i = 0; i < MD5_HASH_WORDS; i++)
- ctx->hash[i] = cpu_to_le32(get_unaligned(p.u32++));
- ctx->byte_count = get_unaligned(p.u64);
- return 0;
-}
-
-static struct shash_alg alg = {
- .digestsize = MD5_DIGEST_SIZE,
- .init = octeon_md5_init,
- .update = octeon_md5_update,
- .finup = octeon_md5_finup,
- .export = octeon_md5_export,
- .import = octeon_md5_import,
- .statesize = MD5_STATE_SIZE,
- .descsize = sizeof(struct octeon_md5_state),
- .base = {
- .cra_name = "md5",
- .cra_driver_name= "octeon-md5",
- .cra_priority = OCTEON_CR_OPCODE_PRIORITY,
- .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
- .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-};
-
-static int __init md5_mod_init(void)
-{
- if (!octeon_has_crypto())
- return -ENOTSUPP;
- return crypto_register_shash(&alg);
-}
-
-static void __exit md5_mod_fini(void)
-{
- crypto_unregister_shash(&alg);
-}
-
-module_init(md5_mod_init);
-module_exit(md5_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("MD5 Message Digest Algorithm (OCTEON)");
-MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>");
diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.c b/arch/mips/cavium-octeon/octeon-crypto.c
index 0ff8559391f5b7..0ff8559391f5b7 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-crypto.c
+++ b/arch/mips/cavium-octeon/octeon-crypto.c
diff --git a/arch/mips/configs/cavium_octeon_defconfig b/arch/mips/configs/cavium_octeon_defconfig
index 3f50e1d78894a1..68c363366bceb8 100644
--- a/arch/mips/configs/cavium_octeon_defconfig
+++ b/arch/mips/configs/cavium_octeon_defconfig
@@ -155,7 +155,6 @@ CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_MD5_OCTEON=y
CONFIG_CRYPTO_DES=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_DEBUG_FS=y
diff --git a/arch/mips/crypto/Kconfig b/arch/mips/crypto/Kconfig
index 7b91f4ec65bffb..6a5bd5074867e0 100644
--- a/arch/mips/crypto/Kconfig
+++ b/arch/mips/crypto/Kconfig
@@ -2,14 +2,4 @@
menu "Accelerated Cryptographic Algorithms for CPU (mips)"
-config CRYPTO_MD5_OCTEON
- tristate "Digests: MD5 (OCTEON)"
- depends on CPU_CAVIUM_OCTEON
- select CRYPTO_MD5
- select CRYPTO_HASH
- help
- MD5 message digest algorithm (RFC1321)
-
- Architecture: mips OCTEON using crypto instructions, when available
-
endmenu
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index d06388b0f66e31..bd4685612de6dd 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -320,7 +320,6 @@ CONFIG_XMON=y
CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_MD5_PPC=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_WP512=m
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index ce34597e9f3e14..2d92c11eea7e47 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -387,7 +387,6 @@ CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_LZO=m
-CONFIG_CRYPTO_MD5_PPC=m
CONFIG_CRYPTO_AES_GCM_P10=m
CONFIG_CRYPTO_DEV_NX=y
CONFIG_CRYPTO_DEV_NX_ENCRYPT=m
diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig
index cfe39fc221cf81..662aed46f9c795 100644
--- a/arch/powerpc/crypto/Kconfig
+++ b/arch/powerpc/crypto/Kconfig
@@ -2,27 +2,6 @@
menu "Accelerated Cryptographic Algorithms for CPU (powerpc)"
-config CRYPTO_CURVE25519_PPC64
- tristate
- depends on PPC64 && CPU_LITTLE_ENDIAN
- select CRYPTO_KPP
- select CRYPTO_LIB_CURVE25519_GENERIC
- select CRYPTO_ARCH_HAVE_LIB_CURVE25519
- default CRYPTO_LIB_CURVE25519_INTERNAL
- help
- Curve25519 algorithm
-
- Architecture: PowerPC64
- - Little-endian
-
-config CRYPTO_MD5_PPC
- tristate "Digests: MD5"
- select CRYPTO_HASH
- help
- MD5 message digest algorithm (RFC1321)
-
- Architecture: powerpc
-
config CRYPTO_AES_PPC_SPE
tristate "Ciphers: AES, modes: ECB/CBC/CTR/XTS (SPE)"
depends on SPE
diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile
index bc8fd27344b8bb..5960e5300db71e 100644
--- a/arch/powerpc/crypto/Makefile
+++ b/arch/powerpc/crypto/Makefile
@@ -6,16 +6,12 @@
#
obj-$(CONFIG_CRYPTO_AES_PPC_SPE) += aes-ppc-spe.o
-obj-$(CONFIG_CRYPTO_MD5_PPC) += md5-ppc.o
obj-$(CONFIG_CRYPTO_AES_GCM_P10) += aes-gcm-p10-crypto.o
obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o
-obj-$(CONFIG_CRYPTO_CURVE25519_PPC64) += curve25519-ppc64le.o
aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o
-md5-ppc-y := md5-asm.o md5-glue.o
aes-gcm-p10-crypto-y := aes-gcm-p10-glue.o aes-gcm-p10.o ghashp10-ppc.o aesp10-ppc.o
vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o
-curve25519-ppc64le-y := curve25519-ppc64le-core.o curve25519-ppc64le_asm.o
ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
override flavour := linux-ppc64le
diff --git a/arch/powerpc/crypto/curve25519-ppc64le-core.c b/arch/powerpc/crypto/curve25519-ppc64le-core.c
deleted file mode 100644
index f7810be0b292b7..00000000000000
--- a/arch/powerpc/crypto/curve25519-ppc64le-core.c
+++ /dev/null
@@ -1,300 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright 2024- IBM Corp.
- *
- * X25519 scalar multiplication with 51 bits limbs for PPC64le.
- * Based on RFC7748 and AArch64 optimized implementation for X25519
- * - Algorithm 1 Scalar multiplication of a variable point
- */
-
-#include <crypto/curve25519.h>
-#include <crypto/internal/kpp.h>
-
-#include <linux/types.h>
-#include <linux/jump_label.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/scatterlist.h>
-
-#include <linux/cpufeature.h>
-#include <linux/processor.h>
-
-typedef uint64_t fe51[5];
-
-asmlinkage void x25519_fe51_mul(fe51 h, const fe51 f, const fe51 g);
-asmlinkage void x25519_fe51_sqr(fe51 h, const fe51 f);
-asmlinkage void x25519_fe51_mul121666(fe51 h, fe51 f);
-asmlinkage void x25519_fe51_sqr_times(fe51 h, const fe51 f, int n);
-asmlinkage void x25519_fe51_frombytes(fe51 h, const uint8_t *s);
-asmlinkage void x25519_fe51_tobytes(uint8_t *s, const fe51 h);
-asmlinkage void x25519_cswap(fe51 p, fe51 q, unsigned int bit);
-
-#define fmul x25519_fe51_mul
-#define fsqr x25519_fe51_sqr
-#define fmul121666 x25519_fe51_mul121666
-#define fe51_tobytes x25519_fe51_tobytes
-
-static void fadd(fe51 h, const fe51 f, const fe51 g)
-{
- h[0] = f[0] + g[0];
- h[1] = f[1] + g[1];
- h[2] = f[2] + g[2];
- h[3] = f[3] + g[3];
- h[4] = f[4] + g[4];
-}
-
-/*
- * Prime = 2 ** 255 - 19, 255 bits
- * (0x7fffffff ffffffff ffffffff ffffffff ffffffff ffffffff ffffffff ffffffed)
- *
- * Prime in 5 51-bit limbs
- */
-static fe51 prime51 = { 0x7ffffffffffed, 0x7ffffffffffff, 0x7ffffffffffff, 0x7ffffffffffff, 0x7ffffffffffff};
-
-static void fsub(fe51 h, const fe51 f, const fe51 g)
-{
- h[0] = (f[0] + ((prime51[0] * 2))) - g[0];
- h[1] = (f[1] + ((prime51[1] * 2))) - g[1];
- h[2] = (f[2] + ((prime51[2] * 2))) - g[2];
- h[3] = (f[3] + ((prime51[3] * 2))) - g[3];
- h[4] = (f[4] + ((prime51[4] * 2))) - g[4];
-}
-
-static void fe51_frombytes(fe51 h, const uint8_t *s)
-{
- /*
- * Make sure 64-bit aligned.
- */
- unsigned char sbuf[32+8];
- unsigned char *sb = PTR_ALIGN((void *)sbuf, 8);
-
- memcpy(sb, s, 32);
- x25519_fe51_frombytes(h, sb);
-}
-
-static void finv(fe51 o, const fe51 i)
-{
- fe51 a0, b, c, t00;
-
- fsqr(a0, i);
- x25519_fe51_sqr_times(t00, a0, 2);
-
- fmul(b, t00, i);
- fmul(a0, b, a0);
-
- fsqr(t00, a0);
-
- fmul(b, t00, b);
- x25519_fe51_sqr_times(t00, b, 5);
-
- fmul(b, t00, b);
- x25519_fe51_sqr_times(t00, b, 10);
-
- fmul(c, t00, b);
- x25519_fe51_sqr_times(t00, c, 20);
-
- fmul(t00, t00, c);
- x25519_fe51_sqr_times(t00, t00, 10);
-
- fmul(b, t00, b);
- x25519_fe51_sqr_times(t00, b, 50);
-
- fmul(c, t00, b);
- x25519_fe51_sqr_times(t00, c, 100);
-
- fmul(t00, t00, c);
- x25519_fe51_sqr_times(t00, t00, 50);
-
- fmul(t00, t00, b);
- x25519_fe51_sqr_times(t00, t00, 5);
-
- fmul(o, t00, a0);
-}
-
-static void curve25519_fe51(uint8_t out[32], const uint8_t scalar[32],
- const uint8_t point[32])
-{
- fe51 x1, x2, z2, x3, z3;
- uint8_t s[32];
- unsigned int swap = 0;
- int i;
-
- memcpy(s, scalar, 32);
- s[0] &= 0xf8;
- s[31] &= 0x7f;
- s[31] |= 0x40;
- fe51_frombytes(x1, point);
-
- z2[0] = z2[1] = z2[2] = z2[3] = z2[4] = 0;
- x3[0] = x1[0];
- x3[1] = x1[1];
- x3[2] = x1[2];
- x3[3] = x1[3];
- x3[4] = x1[4];
-
- x2[0] = z3[0] = 1;
- x2[1] = z3[1] = 0;
- x2[2] = z3[2] = 0;
- x2[3] = z3[3] = 0;
- x2[4] = z3[4] = 0;
-
- for (i = 254; i >= 0; --i) {
- unsigned int k_t = 1 & (s[i / 8] >> (i & 7));
- fe51 a, b, c, d, e;
- fe51 da, cb, aa, bb;
- fe51 dacb_p, dacb_m;
-
- swap ^= k_t;
- x25519_cswap(x2, x3, swap);
- x25519_cswap(z2, z3, swap);
- swap = k_t;
-
- fsub(b, x2, z2); // B = x_2 - z_2
- fadd(a, x2, z2); // A = x_2 + z_2
- fsub(d, x3, z3); // D = x_3 - z_3
- fadd(c, x3, z3); // C = x_3 + z_3
-
- fsqr(bb, b); // BB = B^2
- fsqr(aa, a); // AA = A^2
- fmul(da, d, a); // DA = D * A
- fmul(cb, c, b); // CB = C * B
-
- fsub(e, aa, bb); // E = AA - BB
- fmul(x2, aa, bb); // x2 = AA * BB
- fadd(dacb_p, da, cb); // DA + CB
- fsub(dacb_m, da, cb); // DA - CB
-
- fmul121666(z3, e); // 121666 * E
- fsqr(z2, dacb_m); // (DA - CB)^2
- fsqr(x3, dacb_p); // x3 = (DA + CB)^2
- fadd(b, bb, z3); // BB + 121666 * E
- fmul(z3, x1, z2); // z3 = x1 * (DA - CB)^2
- fmul(z2, e, b); // z2 = e * (BB + (DA + CB)^2)
- }
-
- finv(z2, z2);
- fmul(x2, x2, z2);
- fe51_tobytes(out, x2);
-}
-
-void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE],
- const u8 secret[CURVE25519_KEY_SIZE],
- const u8 basepoint[CURVE25519_KEY_SIZE])
-{
- curve25519_fe51(mypublic, secret, basepoint);
-}
-EXPORT_SYMBOL(curve25519_arch);
-
-void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
- const u8 secret[CURVE25519_KEY_SIZE])
-{
- curve25519_fe51(pub, secret, curve25519_base_point);
-}
-EXPORT_SYMBOL(curve25519_base_arch);
-
-static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
- unsigned int len)
-{
- u8 *secret = kpp_tfm_ctx(tfm);
-
- if (!len)
- curve25519_generate_secret(secret);
- else if (len == CURVE25519_KEY_SIZE &&
- crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE))
- memcpy(secret, buf, CURVE25519_KEY_SIZE);
- else
- return -EINVAL;
- return 0;
-}
-
-static int curve25519_generate_public_key(struct kpp_request *req)
-{
- struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
- const u8 *secret = kpp_tfm_ctx(tfm);
- u8 buf[CURVE25519_KEY_SIZE];
- int copied, nbytes;
-
- if (req->src)
- return -EINVAL;
-
- curve25519_base_arch(buf, secret);
-
- /* might want less than we've got */
- nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
- copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
- nbytes),
- buf, nbytes);
- if (copied != nbytes)
- return -EINVAL;
- return 0;
-}
-
-static int curve25519_compute_shared_secret(struct kpp_request *req)
-{
- struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
- const u8 *secret = kpp_tfm_ctx(tfm);
- u8 public_key[CURVE25519_KEY_SIZE];
- u8 buf[CURVE25519_KEY_SIZE];
- int copied, nbytes;
-
- if (!req->src)
- return -EINVAL;
-
- copied = sg_copy_to_buffer(req->src,
- sg_nents_for_len(req->src,
- CURVE25519_KEY_SIZE),
- public_key, CURVE25519_KEY_SIZE);
- if (copied != CURVE25519_KEY_SIZE)
- return -EINVAL;
-
- curve25519_arch(buf, secret, public_key);
-
- /* might want less than we've got */
- nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
- copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
- nbytes),
- buf, nbytes);
- if (copied != nbytes)
- return -EINVAL;
- return 0;
-}
-
-static unsigned int curve25519_max_size(struct crypto_kpp *tfm)
-{
- return CURVE25519_KEY_SIZE;
-}
-
-static struct kpp_alg curve25519_alg = {
- .base.cra_name = "curve25519",
- .base.cra_driver_name = "curve25519-ppc64le",
- .base.cra_priority = 200,
- .base.cra_module = THIS_MODULE,
- .base.cra_ctxsize = CURVE25519_KEY_SIZE,
-
- .set_secret = curve25519_set_secret,
- .generate_public_key = curve25519_generate_public_key,
- .compute_shared_secret = curve25519_compute_shared_secret,
- .max_size = curve25519_max_size,
-};
-
-
-static int __init curve25519_mod_init(void)
-{
- return IS_REACHABLE(CONFIG_CRYPTO_KPP) ?
- crypto_register_kpp(&curve25519_alg) : 0;
-}
-
-static void __exit curve25519_mod_exit(void)
-{
- if (IS_REACHABLE(CONFIG_CRYPTO_KPP))
- crypto_unregister_kpp(&curve25519_alg);
-}
-
-module_init(curve25519_mod_init);
-module_exit(curve25519_mod_exit);
-
-MODULE_ALIAS_CRYPTO("curve25519");
-MODULE_ALIAS_CRYPTO("curve25519-ppc64le");
-MODULE_DESCRIPTION("PPC64le Curve25519 scalar multiplication with 51 bits limbs");
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Danny Tsen <dtsen@us.ibm.com>");
diff --git a/arch/powerpc/crypto/curve25519-ppc64le_asm.S b/arch/powerpc/crypto/curve25519-ppc64le_asm.S
deleted file mode 100644
index 06c1febe24b91a..00000000000000
--- a/arch/powerpc/crypto/curve25519-ppc64le_asm.S
+++ /dev/null
@@ -1,671 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#
-# This code is taken from CRYPTOGAMs[1] and is included here using the option
-# in the license to distribute the code under the GPL. Therefore this program
-# is free software; you can redistribute it and/or modify it under the terms of
-# the GNU General Public License version 2 as published by the Free Software
-# Foundation.
-#
-# [1] https://github.com/dot-asm/cryptogams/
-
-# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# * Redistributions of source code must retain copyright notices,
-# this list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following
-# disclaimer in the documentation and/or other materials
-# provided with the distribution.
-#
-# * Neither the name of the CRYPTOGAMS nor the names of its
-# copyright holder and contributors may be used to endorse or
-# promote products derived from this software without specific
-# prior written permission.
-#
-# ALTERNATIVELY, provided that this notice is retained in full, this
-# product may be distributed under the terms of the GNU General Public
-# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
-# those given above.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-# ====================================================================
-# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see https://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-
-#
-# ====================================================================
-# Written and Modified by Danny Tsen <dtsen@us.ibm.com>
-# - Added x25519_fe51_sqr_times, x25519_fe51_frombytes, x25519_fe51_tobytes
-# and x25519_cswap
-#
-# Copyright 2024- IBM Corp.
-#
-# X25519 lower-level primitives for PPC64.
-#
-
-#include <linux/linkage.h>
-
-.text
-
-.align 5
-SYM_FUNC_START(x25519_fe51_mul)
-
- stdu 1,-144(1)
- std 21,56(1)
- std 22,64(1)
- std 23,72(1)
- std 24,80(1)
- std 25,88(1)
- std 26,96(1)
- std 27,104(1)
- std 28,112(1)
- std 29,120(1)
- std 30,128(1)
- std 31,136(1)
-
- ld 6,0(5)
- ld 7,0(4)
- ld 8,8(4)
- ld 9,16(4)
- ld 10,24(4)
- ld 11,32(4)
-
- mulld 22,7,6
- mulhdu 23,7,6
-
- mulld 24,8,6
- mulhdu 25,8,6
-
- mulld 30,11,6
- mulhdu 31,11,6
- ld 4,8(5)
- mulli 11,11,19
-
- mulld 26,9,6
- mulhdu 27,9,6
-
- mulld 28,10,6
- mulhdu 29,10,6
- mulld 12,11,4
- mulhdu 21,11,4
- addc 22,22,12
- adde 23,23,21
-
- mulld 12,7,4
- mulhdu 21,7,4
- addc 24,24,12
- adde 25,25,21
-
- mulld 12,10,4
- mulhdu 21,10,4
- ld 6,16(5)
- mulli 10,10,19
- addc 30,30,12
- adde 31,31,21
-
- mulld 12,8,4
- mulhdu 21,8,4
- addc 26,26,12
- adde 27,27,21
-
- mulld 12,9,4
- mulhdu 21,9,4
- addc 28,28,12
- adde 29,29,21
- mulld 12,10,6
- mulhdu 21,10,6
- addc 22,22,12
- adde 23,23,21
-
- mulld 12,11,6
- mulhdu 21,11,6
- addc 24,24,12
- adde 25,25,21
-
- mulld 12,9,6
- mulhdu 21,9,6
- ld 4,24(5)
- mulli 9,9,19
- addc 30,30,12
- adde 31,31,21
-
- mulld 12,7,6
- mulhdu 21,7,6
- addc 26,26,12
- adde 27,27,21
-
- mulld 12,8,6
- mulhdu 21,8,6
- addc 28,28,12
- adde 29,29,21
- mulld 12,9,4
- mulhdu 21,9,4
- addc 22,22,12
- adde 23,23,21
-
- mulld 12,10,4
- mulhdu 21,10,4
- addc 24,24,12
- adde 25,25,21
-
- mulld 12,8,4
- mulhdu 21,8,4
- ld 6,32(5)
- mulli 8,8,19
- addc 30,30,12
- adde 31,31,21
-
- mulld 12,11,4
- mulhdu 21,11,4
- addc 26,26,12
- adde 27,27,21
-
- mulld 12,7,4
- mulhdu 21,7,4
- addc 28,28,12
- adde 29,29,21
- mulld 12,8,6
- mulhdu 21,8,6
- addc 22,22,12
- adde 23,23,21
-
- mulld 12,9,6
- mulhdu 21,9,6
- addc 24,24,12
- adde 25,25,21
-
- mulld 12,10,6
- mulhdu 21,10,6
- addc 26,26,12
- adde 27,27,21
-
- mulld 12,11,6
- mulhdu 21,11,6
- addc 28,28,12
- adde 29,29,21
-
- mulld 12,7,6
- mulhdu 21,7,6
- addc 30,30,12
- adde 31,31,21
-
-.Lfe51_reduce:
- li 0,-1
- srdi 0,0,13
-
- srdi 12,26,51
- and 9,26,0
- insrdi 12,27,51,0
- srdi 21,22,51
- and 7,22,0
- insrdi 21,23,51,0
- addc 28,28,12
- addze 29,29
- addc 24,24,21
- addze 25,25
-
- srdi 12,28,51
- and 10,28,0
- insrdi 12,29,51,0
- srdi 21,24,51
- and 8,24,0
- insrdi 21,25,51,0
- addc 30,30,12
- addze 31,31
- add 9,9,21
-
- srdi 12,30,51
- and 11,30,0
- insrdi 12,31,51,0
- mulli 12,12,19
-
- add 7,7,12
-
- srdi 21,9,51
- and 9,9,0
- add 10,10,21
-
- srdi 12,7,51
- and 7,7,0
- add 8,8,12
-
- std 9,16(3)
- std 10,24(3)
- std 11,32(3)
- std 7,0(3)
- std 8,8(3)
-
- ld 21,56(1)
- ld 22,64(1)
- ld 23,72(1)
- ld 24,80(1)
- ld 25,88(1)
- ld 26,96(1)
- ld 27,104(1)
- ld 28,112(1)
- ld 29,120(1)
- ld 30,128(1)
- ld 31,136(1)
- addi 1,1,144
- blr
-SYM_FUNC_END(x25519_fe51_mul)
-
-.align 5
-SYM_FUNC_START(x25519_fe51_sqr)
-
- stdu 1,-144(1)
- std 21,56(1)
- std 22,64(1)
- std 23,72(1)
- std 24,80(1)
- std 25,88(1)
- std 26,96(1)
- std 27,104(1)
- std 28,112(1)
- std 29,120(1)
- std 30,128(1)
- std 31,136(1)
-
- ld 7,0(4)
- ld 8,8(4)
- ld 9,16(4)
- ld 10,24(4)
- ld 11,32(4)
-
- add 6,7,7
- mulli 21,11,19
-
- mulld 22,7,7
- mulhdu 23,7,7
- mulld 24,8,6
- mulhdu 25,8,6
- mulld 26,9,6
- mulhdu 27,9,6
- mulld 28,10,6
- mulhdu 29,10,6
- mulld 30,11,6
- mulhdu 31,11,6
- add 6,8,8
- mulld 12,11,21
- mulhdu 11,11,21
- addc 28,28,12
- adde 29,29,11
-
- mulli 5,10,19
-
- mulld 12,8,8
- mulhdu 11,8,8
- addc 26,26,12
- adde 27,27,11
- mulld 12,9,6
- mulhdu 11,9,6
- addc 28,28,12
- adde 29,29,11
- mulld 12,10,6
- mulhdu 11,10,6
- addc 30,30,12
- adde 31,31,11
- mulld 12,21,6
- mulhdu 11,21,6
- add 6,10,10
- addc 22,22,12
- adde 23,23,11
- mulld 12,10,5
- mulhdu 10,10,5
- addc 24,24,12
- adde 25,25,10
- mulld 12,6,21
- mulhdu 10,6,21
- add 6,9,9
- addc 26,26,12
- adde 27,27,10
-
- mulld 12,9,9
- mulhdu 10,9,9
- addc 30,30,12
- adde 31,31,10
- mulld 12,5,6
- mulhdu 10,5,6
- addc 22,22,12
- adde 23,23,10
- mulld 12,21,6
- mulhdu 10,21,6
- addc 24,24,12
- adde 25,25,10
-
- b .Lfe51_reduce
-SYM_FUNC_END(x25519_fe51_sqr)
-
-.align 5
-SYM_FUNC_START(x25519_fe51_mul121666)
-
- stdu 1,-144(1)
- std 21,56(1)
- std 22,64(1)
- std 23,72(1)
- std 24,80(1)
- std 25,88(1)
- std 26,96(1)
- std 27,104(1)
- std 28,112(1)
- std 29,120(1)
- std 30,128(1)
- std 31,136(1)
-
- lis 6,1
- ori 6,6,56130
- ld 7,0(4)
- ld 8,8(4)
- ld 9,16(4)
- ld 10,24(4)
- ld 11,32(4)
-
- mulld 22,7,6
- mulhdu 23,7,6
- mulld 24,8,6
- mulhdu 25,8,6
- mulld 26,9,6
- mulhdu 27,9,6
- mulld 28,10,6
- mulhdu 29,10,6
- mulld 30,11,6
- mulhdu 31,11,6
-
- b .Lfe51_reduce
-SYM_FUNC_END(x25519_fe51_mul121666)
-
-.align 5
-SYM_FUNC_START(x25519_fe51_sqr_times)
-
- stdu 1,-144(1)
- std 21,56(1)
- std 22,64(1)
- std 23,72(1)
- std 24,80(1)
- std 25,88(1)
- std 26,96(1)
- std 27,104(1)
- std 28,112(1)
- std 29,120(1)
- std 30,128(1)
- std 31,136(1)
-
- ld 7,0(4)
- ld 8,8(4)
- ld 9,16(4)
- ld 10,24(4)
- ld 11,32(4)
-
- mtctr 5
-
-.Lsqr_times_loop:
- add 6,7,7
- mulli 21,11,19
-
- mulld 22,7,7
- mulhdu 23,7,7
- mulld 24,8,6
- mulhdu 25,8,6
- mulld 26,9,6
- mulhdu 27,9,6
- mulld 28,10,6
- mulhdu 29,10,6
- mulld 30,11,6
- mulhdu 31,11,6
- add 6,8,8
- mulld 12,11,21
- mulhdu 11,11,21
- addc 28,28,12
- adde 29,29,11
-
- mulli 5,10,19
-
- mulld 12,8,8
- mulhdu 11,8,8
- addc 26,26,12
- adde 27,27,11
- mulld 12,9,6
- mulhdu 11,9,6
- addc 28,28,12
- adde 29,29,11
- mulld 12,10,6
- mulhdu 11,10,6
- addc 30,30,12
- adde 31,31,11
- mulld 12,21,6
- mulhdu 11,21,6
- add 6,10,10
- addc 22,22,12
- adde 23,23,11
- mulld 12,10,5
- mulhdu 10,10,5
- addc 24,24,12
- adde 25,25,10
- mulld 12,6,21
- mulhdu 10,6,21
- add 6,9,9
- addc 26,26,12
- adde 27,27,10
-
- mulld 12,9,9
- mulhdu 10,9,9
- addc 30,30,12
- adde 31,31,10
- mulld 12,5,6
- mulhdu 10,5,6
- addc 22,22,12
- adde 23,23,10
- mulld 12,21,6
- mulhdu 10,21,6
- addc 24,24,12
- adde 25,25,10
-
- # fe51_reduce
- li 0,-1
- srdi 0,0,13
-
- srdi 12,26,51
- and 9,26,0
- insrdi 12,27,51,0
- srdi 21,22,51
- and 7,22,0
- insrdi 21,23,51,0
- addc 28,28,12
- addze 29,29
- addc 24,24,21
- addze 25,25
-
- srdi 12,28,51
- and 10,28,0
- insrdi 12,29,51,0
- srdi 21,24,51
- and 8,24,0
- insrdi 21,25,51,0
- addc 30,30,12
- addze 31,31
- add 9,9,21
-
- srdi 12,30,51
- and 11,30,0
- insrdi 12,31,51,0
- mulli 12,12,19
-
- add 7,7,12
-
- srdi 21,9,51
- and 9,9,0
- add 10,10,21
-
- srdi 12,7,51
- and 7,7,0
- add 8,8,12
-
- bdnz .Lsqr_times_loop
-
- std 9,16(3)
- std 10,24(3)
- std 11,32(3)
- std 7,0(3)
- std 8,8(3)
-
- ld 21,56(1)
- ld 22,64(1)
- ld 23,72(1)
- ld 24,80(1)
- ld 25,88(1)
- ld 26,96(1)
- ld 27,104(1)
- ld 28,112(1)
- ld 29,120(1)
- ld 30,128(1)
- ld 31,136(1)
- addi 1,1,144
- blr
-SYM_FUNC_END(x25519_fe51_sqr_times)
-
-.align 5
-SYM_FUNC_START(x25519_fe51_frombytes)
-
- li 12, -1
- srdi 12, 12, 13 # 0x7ffffffffffff
-
- ld 5, 0(4)
- ld 6, 8(4)
- ld 7, 16(4)
- ld 8, 24(4)
-
- srdi 10, 5, 51
- and 5, 5, 12 # h0
-
- sldi 11, 6, 13
- or 11, 10, 11 # h1t
- srdi 10, 6, 38
- and 6, 11, 12 # h1
-
- sldi 11, 7, 26
- or 10, 10, 11 # h2t
-
- srdi 11, 7, 25
- and 7, 10, 12 # h2
- sldi 10, 8, 39
- or 11, 11, 10 # h3t
-
- srdi 9, 8, 12
- and 8, 11, 12 # h3
- and 9, 9, 12 # h4
-
- std 5, 0(3)
- std 6, 8(3)
- std 7, 16(3)
- std 8, 24(3)
- std 9, 32(3)
-
- blr
-SYM_FUNC_END(x25519_fe51_frombytes)
-
-.align 5
-SYM_FUNC_START(x25519_fe51_tobytes)
-
- ld 5, 0(4)
- ld 6, 8(4)
- ld 7, 16(4)
- ld 8, 24(4)
- ld 9, 32(4)
-
- li 12, -1
- srdi 12, 12, 13 # 0x7ffffffffffff
-
- # Full reducuction
- addi 10, 5, 19
- srdi 10, 10, 51
- add 10, 10, 6
- srdi 10, 10, 51
- add 10, 10, 7
- srdi 10, 10, 51
- add 10, 10, 8
- srdi 10, 10, 51
- add 10, 10, 9
- srdi 10, 10, 51
-
- mulli 10, 10, 19
- add 5, 5, 10
- srdi 11, 5, 51
- add 6, 6, 11
- srdi 11, 6, 51
- add 7, 7, 11
- srdi 11, 7, 51
- add 8, 8, 11
- srdi 11, 8, 51
- add 9, 9, 11
-
- and 5, 5, 12
- and 6, 6, 12
- and 7, 7, 12
- and 8, 8, 12
- and 9, 9, 12
-
- sldi 10, 6, 51
- or 5, 5, 10 # s0
-
- srdi 11, 6, 13
- sldi 10, 7, 38
- or 6, 11, 10 # s1
-
- srdi 11, 7, 26
- sldi 10, 8, 25
- or 7, 11, 10 # s2
-
- srdi 11, 8, 39
- sldi 10, 9, 12
- or 8, 11, 10 # s4
-
- std 5, 0(3)
- std 6, 8(3)
- std 7, 16(3)
- std 8, 24(3)
-
- blr
-SYM_FUNC_END(x25519_fe51_tobytes)
-
-.align 5
-SYM_FUNC_START(x25519_cswap)
-
- li 7, 5
- neg 6, 5
- mtctr 7
-
-.Lswap_loop:
- ld 8, 0(3)
- ld 9, 0(4)
- xor 10, 8, 9
- and 10, 10, 6
- xor 11, 8, 10
- xor 12, 9, 10
- std 11, 0(3)
- addi 3, 3, 8
- std 12, 0(4)
- addi 4, 4, 8
- bdnz .Lswap_loop
-
- blr
-SYM_FUNC_END(x25519_cswap)
diff --git a/arch/powerpc/crypto/md5-asm.S b/arch/powerpc/crypto/md5-asm.S
deleted file mode 100644
index fa6bc440cf4acf..00000000000000
--- a/arch/powerpc/crypto/md5-asm.S
+++ /dev/null
@@ -1,235 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Fast MD5 implementation for PPC
- *
- * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
- */
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/asm-compat.h>
-
-#define rHP r3
-#define rWP r4
-
-#define rH0 r0
-#define rH1 r6
-#define rH2 r7
-#define rH3 r5
-
-#define rW00 r8
-#define rW01 r9
-#define rW02 r10
-#define rW03 r11
-#define rW04 r12
-#define rW05 r14
-#define rW06 r15
-#define rW07 r16
-#define rW08 r17
-#define rW09 r18
-#define rW10 r19
-#define rW11 r20
-#define rW12 r21
-#define rW13 r22
-#define rW14 r23
-#define rW15 r24
-
-#define rT0 r25
-#define rT1 r26
-
-#define INITIALIZE \
- PPC_STLU r1,-INT_FRAME_SIZE(r1); \
- SAVE_GPRS(14, 26, r1) /* push registers onto stack */
-
-#define FINALIZE \
- REST_GPRS(14, 26, r1); /* pop registers from stack */ \
- addi r1,r1,INT_FRAME_SIZE
-
-#ifdef __BIG_ENDIAN__
-#define LOAD_DATA(reg, off) \
- lwbrx reg,0,rWP; /* load data */
-#define INC_PTR \
- addi rWP,rWP,4; /* increment per word */
-#define NEXT_BLOCK /* nothing to do */
-#else
-#define LOAD_DATA(reg, off) \
- lwz reg,off(rWP); /* load data */
-#define INC_PTR /* nothing to do */
-#define NEXT_BLOCK \
- addi rWP,rWP,64; /* increment per block */
-#endif
-
-#define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \
- LOAD_DATA(w0, off) /* W */ \
- and rT0,b,c; /* 1: f = b and c */ \
- INC_PTR /* ptr++ */ \
- andc rT1,d,b; /* 1: f' = ~b and d */ \
- LOAD_DATA(w1, off+4) /* W */ \
- or rT0,rT0,rT1; /* 1: f = f or f' */ \
- addi w0,w0,k0l; /* 1: wk = w + k */ \
- add a,a,rT0; /* 1: a = a + f */ \
- addis w0,w0,k0h; /* 1: wk = w + k' */ \
- addis w1,w1,k1h; /* 2: wk = w + k */ \
- add a,a,w0; /* 1: a = a + wk */ \
- addi w1,w1,k1l; /* 2: wk = w + k' */ \
- rotrwi a,a,p; /* 1: a = a rotl x */ \
- add d,d,w1; /* 2: a = a + wk */ \
- add a,a,b; /* 1: a = a + b */ \
- and rT0,a,b; /* 2: f = b and c */ \
- andc rT1,c,a; /* 2: f' = ~b and d */ \
- or rT0,rT0,rT1; /* 2: f = f or f' */ \
- add d,d,rT0; /* 2: a = a + f */ \
- INC_PTR /* ptr++ */ \
- rotrwi d,d,q; /* 2: a = a rotl x */ \
- add d,d,a; /* 2: a = a + b */
-
-#define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
- andc rT0,c,d; /* 1: f = c and ~d */ \
- and rT1,b,d; /* 1: f' = b and d */ \
- addi w0,w0,k0l; /* 1: wk = w + k */ \
- or rT0,rT0,rT1; /* 1: f = f or f' */ \
- addis w0,w0,k0h; /* 1: wk = w + k' */ \
- add a,a,rT0; /* 1: a = a + f */ \
- addi w1,w1,k1l; /* 2: wk = w + k */ \
- add a,a,w0; /* 1: a = a + wk */ \
- addis w1,w1,k1h; /* 2: wk = w + k' */ \
- andc rT0,b,c; /* 2: f = c and ~d */ \
- rotrwi a,a,p; /* 1: a = a rotl x */ \
- add a,a,b; /* 1: a = a + b */ \
- add d,d,w1; /* 2: a = a + wk */ \
- and rT1,a,c; /* 2: f' = b and d */ \
- or rT0,rT0,rT1; /* 2: f = f or f' */ \
- add d,d,rT0; /* 2: a = a + f */ \
- rotrwi d,d,q; /* 2: a = a rotl x */ \
- add d,d,a; /* 2: a = a +b */
-
-#define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
- xor rT0,b,c; /* 1: f' = b xor c */ \
- addi w0,w0,k0l; /* 1: wk = w + k */ \
- xor rT1,rT0,d; /* 1: f = f xor f' */ \
- addis w0,w0,k0h; /* 1: wk = w + k' */ \
- add a,a,rT1; /* 1: a = a + f */ \
- addi w1,w1,k1l; /* 2: wk = w + k */ \
- add a,a,w0; /* 1: a = a + wk */ \
- addis w1,w1,k1h; /* 2: wk = w + k' */ \
- rotrwi a,a,p; /* 1: a = a rotl x */ \
- add d,d,w1; /* 2: a = a + wk */ \
- add a,a,b; /* 1: a = a + b */ \
- xor rT1,rT0,a; /* 2: f = b xor f' */ \
- add d,d,rT1; /* 2: a = a + f */ \
- rotrwi d,d,q; /* 2: a = a rotl x */ \
- add d,d,a; /* 2: a = a + b */
-
-#define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
- addi w0,w0,k0l; /* 1: w = w + k */ \
- orc rT0,b,d; /* 1: f = b or ~d */ \
- addis w0,w0,k0h; /* 1: w = w + k' */ \
- xor rT0,rT0,c; /* 1: f = f xor c */ \
- add a,a,w0; /* 1: a = a + wk */ \
- addi w1,w1,k1l; /* 2: w = w + k */ \
- add a,a,rT0; /* 1: a = a + f */ \
- addis w1,w1,k1h; /* 2: w = w + k' */ \
- rotrwi a,a,p; /* 1: a = a rotl x */ \
- add a,a,b; /* 1: a = a + b */ \
- orc rT0,a,c; /* 2: f = b or ~d */ \
- add d,d,w1; /* 2: a = a + wk */ \
- xor rT0,rT0,b; /* 2: f = f xor c */ \
- add d,d,rT0; /* 2: a = a + f */ \
- rotrwi d,d,q; /* 2: a = a rotl x */ \
- add d,d,a; /* 2: a = a + b */
-
-_GLOBAL(ppc_md5_transform)
- INITIALIZE
-
- mtctr r5
- lwz rH0,0(rHP)
- lwz rH1,4(rHP)
- lwz rH2,8(rHP)
- lwz rH3,12(rHP)
-
-ppc_md5_main:
- R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0,
- 0xd76b, -23432, 0xe8c8, -18602)
- R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8,
- 0x2420, 0x70db, 0xc1be, -12562)
- R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16,
- 0xf57c, 0x0faf, 0x4788, -14806)
- R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24,
- 0xa830, 0x4613, 0xfd47, -27391)
- R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32,
- 0x6981, -26408, 0x8b45, -2129)
- R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40,
- 0xffff, 0x5bb1, 0x895d, -10306)
- R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48,
- 0x6b90, 0x1122, 0xfd98, 0x7193)
- R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56,
- 0xa679, 0x438e, 0x49b4, 0x0821)
-
- R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23,
- 0x0d56, 0x6e0c, 0x1810, 0x6d2d)
- R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12,
- 0x9d02, -32109, 0x124c, 0x2332)
- R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23,
- 0x8ea7, 0x4a33, 0x0245, -18270)
- R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12,
- 0x8eee, -8608, 0xf258, -5095)
- R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23,
- 0x969d, -10697, 0x1cbe, -15288)
- R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12,
- 0x3317, 0x3e99, 0xdbd9, 0x7c15)
- R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23,
- 0xac4b, 0x7772, 0xd8cf, 0x331d)
- R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12,
- 0x6a28, 0x6dd8, 0x219a, 0x3b68)
-
- R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21,
- 0x29cb, 0x28e5, 0x4218, -7788)
- R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16, 9,
- 0x473f, 0x06d1, 0x3aae, 0x3036)
- R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21,
- 0xaea1, -15134, 0x640b, -11295)
- R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16, 9,
- 0x8f4c, 0x4887, 0xbc7c, -22499)
- R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21,
- 0x7eb8, -27199, 0x00ea, 0x6050)
- R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16, 9,
- 0xe01a, 0x22fe, 0x4447, 0x69c5)
- R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21,
- 0xb7f3, 0x0253, 0x59b1, 0x4d5b)
- R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16, 9,
- 0x4701, -27017, 0xc7bd, -19859)
-
- R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22,
- 0x0988, -1462, 0x4c70, -19401)
- R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11,
- 0xadaf, -5221, 0xfc99, 0x66f7)
- R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22,
- 0x7e80, -16418, 0xba1e, -25587)
- R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11,
- 0x4130, 0x380d, 0xe0c5, 0x738d)
- lwz rW00,0(rHP)
- R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22,
- 0xe837, -30770, 0xde8a, 0x69e8)
- lwz rW14,4(rHP)
- R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11,
- 0x9e79, 0x260f, 0x256d, -27941)
- lwz rW12,8(rHP)
- R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22,
- 0xab75, -20775, 0x4f9e, -28397)
- lwz rW10,12(rHP)
- R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11,
- 0x662b, 0x7c56, 0x11b2, 0x0358)
-
- add rH0,rH0,rW00
- stw rH0,0(rHP)
- add rH1,rH1,rW14
- stw rH1,4(rHP)
- add rH2,rH2,rW12
- stw rH2,8(rHP)
- add rH3,rH3,rW10
- stw rH3,12(rHP)
- NEXT_BLOCK
-
- bdnz ppc_md5_main
-
- FINALIZE
- blr
diff --git a/arch/powerpc/crypto/md5-glue.c b/arch/powerpc/crypto/md5-glue.c
deleted file mode 100644
index 204440a90cd84c..00000000000000
--- a/arch/powerpc/crypto/md5-glue.c
+++ /dev/null
@@ -1,99 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Glue code for MD5 implementation for PPC assembler
- *
- * Based on generic implementation.
- *
- * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
- */
-
-#include <crypto/internal/hash.h>
-#include <crypto/md5.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-
-extern void ppc_md5_transform(u32 *state, const u8 *src, u32 blocks);
-
-static int ppc_md5_init(struct shash_desc *desc)
-{
- struct md5_state *sctx = shash_desc_ctx(desc);
-
- sctx->hash[0] = MD5_H0;
- sctx->hash[1] = MD5_H1;
- sctx->hash[2] = MD5_H2;
- sctx->hash[3] = MD5_H3;
- sctx->byte_count = 0;
-
- return 0;
-}
-
-static int ppc_md5_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- struct md5_state *sctx = shash_desc_ctx(desc);
-
- sctx->byte_count += round_down(len, MD5_HMAC_BLOCK_SIZE);
- ppc_md5_transform(sctx->hash, data, len >> 6);
- return len - round_down(len, MD5_HMAC_BLOCK_SIZE);
-}
-
-static int ppc_md5_finup(struct shash_desc *desc, const u8 *src,
- unsigned int offset, u8 *out)
-{
- struct md5_state *sctx = shash_desc_ctx(desc);
- __le64 block[MD5_BLOCK_WORDS] = {};
- u8 *p = memcpy(block, src, offset);
- __le32 *dst = (__le32 *)out;
- __le64 *pbits;
-
- src = p;
- p += offset;
- *p++ = 0x80;
- sctx->byte_count += offset;
- pbits = &block[(MD5_BLOCK_WORDS / (offset > 55 ? 1 : 2)) - 1];
- *pbits = cpu_to_le64(sctx->byte_count << 3);
- ppc_md5_transform(sctx->hash, src, (pbits - block + 1) / 8);
- memzero_explicit(block, sizeof(block));
-
- dst[0] = cpu_to_le32(sctx->hash[0]);
- dst[1] = cpu_to_le32(sctx->hash[1]);
- dst[2] = cpu_to_le32(sctx->hash[2]);
- dst[3] = cpu_to_le32(sctx->hash[3]);
- return 0;
-}
-
-static struct shash_alg alg = {
- .digestsize = MD5_DIGEST_SIZE,
- .init = ppc_md5_init,
- .update = ppc_md5_update,
- .finup = ppc_md5_finup,
- .descsize = MD5_STATE_SIZE,
- .base = {
- .cra_name = "md5",
- .cra_driver_name= "md5-ppc",
- .cra_priority = 200,
- .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
- .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-};
-
-static int __init ppc_md5_mod_init(void)
-{
- return crypto_register_shash(&alg);
-}
-
-static void __exit ppc_md5_mod_fini(void)
-{
- crypto_unregister_shash(&alg);
-}
-
-module_init(ppc_md5_mod_init);
-module_exit(ppc_md5_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, PPC assembler");
-
-MODULE_ALIAS_CRYPTO("md5");
-MODULE_ALIAS_CRYPTO("md5-ppc");
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 8ceef305f13875..3c942945729e02 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -758,7 +758,6 @@ CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECDSA=m
CONFIG_CRYPTO_ECRDSA=m
-CONFIG_CRYPTO_CURVE25519=m
CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
CONFIG_CRYPTO_ARIA=m
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 3fc12b0af55b25..c752cc114747fc 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -742,7 +742,6 @@ CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECDSA=m
CONFIG_CRYPTO_ECRDSA=m
-CONFIG_CRYPTO_CURVE25519=m
CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
CONFIG_CRYPTO_ARIA=m
diff --git a/arch/sparc/crypto/Kconfig b/arch/sparc/crypto/Kconfig
index f5b2e720fec3c1..f755da97953462 100644
--- a/arch/sparc/crypto/Kconfig
+++ b/arch/sparc/crypto/Kconfig
@@ -16,16 +16,6 @@ config CRYPTO_DES_SPARC64
Architecture: sparc64
-config CRYPTO_MD5_SPARC64
- tristate "Digests: MD5"
- depends on SPARC64
- select CRYPTO_MD5
- select CRYPTO_HASH
- help
- MD5 message digest algorithm (RFC1321)
-
- Architecture: sparc64 using crypto instructions, when available
-
config CRYPTO_AES_SPARC64
tristate "Ciphers: AES, modes: ECB, CBC, CTR"
depends on SPARC64
diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile
index 0d05a17988c4cd..7b4796842ddd7c 100644
--- a/arch/sparc/crypto/Makefile
+++ b/arch/sparc/crypto/Makefile
@@ -3,14 +3,10 @@
# Arch-specific CryptoAPI modules.
#
-obj-$(CONFIG_CRYPTO_MD5_SPARC64) += md5-sparc64.o
-
obj-$(CONFIG_CRYPTO_AES_SPARC64) += aes-sparc64.o
obj-$(CONFIG_CRYPTO_DES_SPARC64) += des-sparc64.o
obj-$(CONFIG_CRYPTO_CAMELLIA_SPARC64) += camellia-sparc64.o
-md5-sparc64-y := md5_asm.o md5_glue.o
-
aes-sparc64-y := aes_asm.o aes_glue.o
des-sparc64-y := des_asm.o des_glue.o
camellia-sparc64-y := camellia_asm.o camellia_glue.o
diff --git a/arch/sparc/crypto/md5_asm.S b/arch/sparc/crypto/md5_asm.S
deleted file mode 100644
index 60b544e4d205b1..00000000000000
--- a/arch/sparc/crypto/md5_asm.S
+++ /dev/null
@@ -1,70 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/opcodes.h>
-#include <asm/visasm.h>
-
-ENTRY(md5_sparc64_transform)
- /* %o0 = digest, %o1 = data, %o2 = rounds */
- VISEntryHalf
- ld [%o0 + 0x00], %f0
- ld [%o0 + 0x04], %f1
- andcc %o1, 0x7, %g0
- ld [%o0 + 0x08], %f2
- bne,pn %xcc, 10f
- ld [%o0 + 0x0c], %f3
-
-1:
- ldd [%o1 + 0x00], %f8
- ldd [%o1 + 0x08], %f10
- ldd [%o1 + 0x10], %f12
- ldd [%o1 + 0x18], %f14
- ldd [%o1 + 0x20], %f16
- ldd [%o1 + 0x28], %f18
- ldd [%o1 + 0x30], %f20
- ldd [%o1 + 0x38], %f22
-
- MD5
-
- subcc %o2, 1, %o2
- bne,pt %xcc, 1b
- add %o1, 0x40, %o1
-
-5:
- st %f0, [%o0 + 0x00]
- st %f1, [%o0 + 0x04]
- st %f2, [%o0 + 0x08]
- st %f3, [%o0 + 0x0c]
- retl
- VISExitHalf
-10:
- alignaddr %o1, %g0, %o1
-
- ldd [%o1 + 0x00], %f10
-1:
- ldd [%o1 + 0x08], %f12
- ldd [%o1 + 0x10], %f14
- ldd [%o1 + 0x18], %f16
- ldd [%o1 + 0x20], %f18
- ldd [%o1 + 0x28], %f20
- ldd [%o1 + 0x30], %f22
- ldd [%o1 + 0x38], %f24
- ldd [%o1 + 0x40], %f26
-
- faligndata %f10, %f12, %f8
- faligndata %f12, %f14, %f10
- faligndata %f14, %f16, %f12
- faligndata %f16, %f18, %f14
- faligndata %f18, %f20, %f16
- faligndata %f20, %f22, %f18
- faligndata %f22, %f24, %f20
- faligndata %f24, %f26, %f22
-
- MD5
-
- subcc %o2, 1, %o2
- fsrc2 %f26, %f10
- bne,pt %xcc, 1b
- add %o1, 0x40, %o1
-
- ba,a,pt %xcc, 5b
-ENDPROC(md5_sparc64_transform)
diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c
deleted file mode 100644
index b3615f0cdf6262..00000000000000
--- a/arch/sparc/crypto/md5_glue.c
+++ /dev/null
@@ -1,174 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Glue code for MD5 hashing optimized for sparc64 crypto opcodes.
- *
- * This is based largely upon arch/x86/crypto/sha1_ssse3_glue.c
- * and crypto/md5.c which are:
- *
- * Copyright (c) Alan Smithee.
- * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
- * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
- * Copyright (c) Mathias Krause <minipli@googlemail.com>
- * Copyright (c) Cryptoapi developers.
- * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <asm/elf.h>
-#include <asm/opcodes.h>
-#include <asm/pstate.h>
-#include <crypto/internal/hash.h>
-#include <crypto/md5.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/unaligned.h>
-
-struct sparc_md5_state {
- __le32 hash[MD5_HASH_WORDS];
- u64 byte_count;
-};
-
-asmlinkage void md5_sparc64_transform(__le32 *digest, const char *data,
- unsigned int rounds);
-
-static int md5_sparc64_init(struct shash_desc *desc)
-{
- struct sparc_md5_state *mctx = shash_desc_ctx(desc);
-
- mctx->hash[0] = cpu_to_le32(MD5_H0);
- mctx->hash[1] = cpu_to_le32(MD5_H1);
- mctx->hash[2] = cpu_to_le32(MD5_H2);
- mctx->hash[3] = cpu_to_le32(MD5_H3);
- mctx->byte_count = 0;
-
- return 0;
-}
-
-static int md5_sparc64_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- struct sparc_md5_state *sctx = shash_desc_ctx(desc);
-
- sctx->byte_count += round_down(len, MD5_HMAC_BLOCK_SIZE);
- md5_sparc64_transform(sctx->hash, data, len / MD5_HMAC_BLOCK_SIZE);
- return len - round_down(len, MD5_HMAC_BLOCK_SIZE);
-}
-
-/* Add padding and return the message digest. */
-static int md5_sparc64_finup(struct shash_desc *desc, const u8 *src,
- unsigned int offset, u8 *out)
-{
- struct sparc_md5_state *sctx = shash_desc_ctx(desc);
- __le64 block[MD5_BLOCK_WORDS] = {};
- u8 *p = memcpy(block, src, offset);
- __le32 *dst = (__le32 *)out;
- __le64 *pbits;
- int i;
-
- src = p;
- p += offset;
- *p++ = 0x80;
- sctx->byte_count += offset;
- pbits = &block[(MD5_BLOCK_WORDS / (offset > 55 ? 1 : 2)) - 1];
- *pbits = cpu_to_le64(sctx->byte_count << 3);
- md5_sparc64_transform(sctx->hash, src, (pbits - block + 1) / 8);
- memzero_explicit(block, sizeof(block));
-
- /* Store state in digest */
- for (i = 0; i < MD5_HASH_WORDS; i++)
- dst[i] = sctx->hash[i];
-
- return 0;
-}
-
-static int md5_sparc64_export(struct shash_desc *desc, void *out)
-{
- struct sparc_md5_state *sctx = shash_desc_ctx(desc);
- union {
- u8 *u8;
- u32 *u32;
- u64 *u64;
- } p = { .u8 = out };
- int i;
-
- for (i = 0; i < MD5_HASH_WORDS; i++)
- put_unaligned(le32_to_cpu(sctx->hash[i]), p.u32++);
- put_unaligned(sctx->byte_count, p.u64);
- return 0;
-}
-
-static int md5_sparc64_import(struct shash_desc *desc, const void *in)
-{
- struct sparc_md5_state *sctx = shash_desc_ctx(desc);
- union {
- const u8 *u8;
- const u32 *u32;
- const u64 *u64;
- } p = { .u8 = in };
- int i;
-
- for (i = 0; i < MD5_HASH_WORDS; i++)
- sctx->hash[i] = cpu_to_le32(get_unaligned(p.u32++));
- sctx->byte_count = get_unaligned(p.u64);
- return 0;
-}
-
-static struct shash_alg alg = {
- .digestsize = MD5_DIGEST_SIZE,
- .init = md5_sparc64_init,
- .update = md5_sparc64_update,
- .finup = md5_sparc64_finup,
- .export = md5_sparc64_export,
- .import = md5_sparc64_import,
- .descsize = sizeof(struct sparc_md5_state),
- .statesize = sizeof(struct sparc_md5_state),
- .base = {
- .cra_name = "md5",
- .cra_driver_name= "md5-sparc64",
- .cra_priority = SPARC_CR_OPCODE_PRIORITY,
- .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
- .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-};
-
-static bool __init sparc64_has_md5_opcode(void)
-{
- unsigned long cfr;
-
- if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
- return false;
-
- __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
- if (!(cfr & CFR_MD5))
- return false;
-
- return true;
-}
-
-static int __init md5_sparc64_mod_init(void)
-{
- if (sparc64_has_md5_opcode()) {
- pr_info("Using sparc64 md5 opcode optimized MD5 implementation\n");
- return crypto_register_shash(&alg);
- }
- pr_info("sparc64 md5 opcode not available.\n");
- return -ENODEV;
-}
-
-static void __exit md5_sparc64_mod_fini(void)
-{
- crypto_unregister_shash(&alg);
-}
-
-module_init(md5_sparc64_mod_init);
-module_exit(md5_sparc64_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("MD5 Message Digest Algorithm, sparc64 md5 opcode accelerated");
-
-MODULE_ALIAS_CRYPTO("md5");
-
-#include "crop_devid.c"
diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig
index 94016c60561e28..d9c6fc78cf3324 100644
--- a/arch/x86/crypto/Kconfig
+++ b/arch/x86/crypto/Kconfig
@@ -2,19 +2,6 @@
menu "Accelerated Cryptographic Algorithms for CPU (x86)"
-config CRYPTO_CURVE25519_X86
- tristate
- depends on 64BIT
- select CRYPTO_KPP
- select CRYPTO_LIB_CURVE25519_GENERIC
- select CRYPTO_ARCH_HAVE_LIB_CURVE25519
- default CRYPTO_LIB_CURVE25519_INTERNAL
- help
- Curve25519 algorithm
-
- Architecture: x86_64 using:
- - ADX (large integer arithmetic)
-
config CRYPTO_AES_NI_INTEL
tristate "Ciphers: AES, modes: ECB, CBC, CTS, CTR, XCTR, XTS, GCM (AES-NI/VAES)"
select CRYPTO_AEAD
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index d402963d6b579a..dfba7e5e88ea69 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -62,8 +62,6 @@ nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o
obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o
nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o
-obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
-
obj-$(CONFIG_CRYPTO_SM3_AVX_X86_64) += sm3-avx-x86_64.o
sm3-avx-x86_64-y := sm3-avx-asm_64.o sm3_avx_glue.o
@@ -81,6 +79,3 @@ aria-aesni-avx2-x86_64-y := aria-aesni-avx2-asm_64.o aria_aesni_avx2_glue.o
obj-$(CONFIG_CRYPTO_ARIA_GFNI_AVX512_X86_64) += aria-gfni-avx512-x86_64.o
aria-gfni-avx512-x86_64-y := aria-gfni-avx512-asm_64.o aria_gfni_avx512_glue.o
-
-# Disable GCOV in odd or sensitive code
-GCOV_PROFILE_curve25519-x86_64.o := n
diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c
deleted file mode 100644
index d587f05c3c8c36..00000000000000
--- a/arch/x86/crypto/curve25519-x86_64.c
+++ /dev/null
@@ -1,1726 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR MIT
-/*
- * Copyright (C) 2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
- * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
- */
-
-#include <crypto/curve25519.h>
-#include <crypto/internal/kpp.h>
-
-#include <linux/export.h>
-#include <linux/types.h>
-#include <linux/jump_label.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/scatterlist.h>
-
-#include <asm/cpufeature.h>
-#include <asm/processor.h>
-
-static __always_inline u64 eq_mask(u64 a, u64 b)
-{
- u64 x = a ^ b;
- u64 minus_x = ~x + (u64)1U;
- u64 x_or_minus_x = x | minus_x;
- u64 xnx = x_or_minus_x >> (u32)63U;
- return xnx - (u64)1U;
-}
-
-static __always_inline u64 gte_mask(u64 a, u64 b)
-{
- u64 x = a;
- u64 y = b;
- u64 x_xor_y = x ^ y;
- u64 x_sub_y = x - y;
- u64 x_sub_y_xor_y = x_sub_y ^ y;
- u64 q = x_xor_y | x_sub_y_xor_y;
- u64 x_xor_q = x ^ q;
- u64 x_xor_q_ = x_xor_q >> (u32)63U;
- return x_xor_q_ - (u64)1U;
-}
-
-/* Computes the addition of four-element f1 with value in f2
- * and returns the carry (if any) */
-static inline u64 add_scalar(u64 *out, const u64 *f1, u64 f2)
-{
- u64 carry_r;
-
- asm volatile(
- /* Clear registers to propagate the carry bit */
- " xor %%r8d, %%r8d;"
- " xor %%r9d, %%r9d;"
- " xor %%r10d, %%r10d;"
- " xor %%r11d, %%r11d;"
- " xor %k1, %k1;"
-
- /* Begin addition chain */
- " addq 0(%3), %0;"
- " movq %0, 0(%2);"
- " adcxq 8(%3), %%r8;"
- " movq %%r8, 8(%2);"
- " adcxq 16(%3), %%r9;"
- " movq %%r9, 16(%2);"
- " adcxq 24(%3), %%r10;"
- " movq %%r10, 24(%2);"
-
- /* Return the carry bit in a register */
- " adcx %%r11, %1;"
- : "+&r"(f2), "=&r"(carry_r)
- : "r"(out), "r"(f1)
- : "%r8", "%r9", "%r10", "%r11", "memory", "cc");
-
- return carry_r;
-}
-
-/* Computes the field addition of two field elements */
-static inline void fadd(u64 *out, const u64 *f1, const u64 *f2)
-{
- asm volatile(
- /* Compute the raw addition of f1 + f2 */
- " movq 0(%0), %%r8;"
- " addq 0(%2), %%r8;"
- " movq 8(%0), %%r9;"
- " adcxq 8(%2), %%r9;"
- " movq 16(%0), %%r10;"
- " adcxq 16(%2), %%r10;"
- " movq 24(%0), %%r11;"
- " adcxq 24(%2), %%r11;"
-
- /* Wrap the result back into the field */
-
- /* Step 1: Compute carry*38 */
- " mov $0, %%rax;"
- " mov $38, %0;"
- " cmovc %0, %%rax;"
-
- /* Step 2: Add carry*38 to the original sum */
- " xor %%ecx, %%ecx;"
- " add %%rax, %%r8;"
- " adcx %%rcx, %%r9;"
- " movq %%r9, 8(%1);"
- " adcx %%rcx, %%r10;"
- " movq %%r10, 16(%1);"
- " adcx %%rcx, %%r11;"
- " movq %%r11, 24(%1);"
-
- /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
- " mov $0, %%rax;"
- " cmovc %0, %%rax;"
- " add %%rax, %%r8;"
- " movq %%r8, 0(%1);"
- : "+&r"(f2)
- : "r"(out), "r"(f1)
- : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc");
-}
-
-/* Computes the field subtraction of two field elements */
-static inline void fsub(u64 *out, const u64 *f1, const u64 *f2)
-{
- asm volatile(
- /* Compute the raw subtraction of f1-f2 */
- " movq 0(%1), %%r8;"
- " subq 0(%2), %%r8;"
- " movq 8(%1), %%r9;"
- " sbbq 8(%2), %%r9;"
- " movq 16(%1), %%r10;"
- " sbbq 16(%2), %%r10;"
- " movq 24(%1), %%r11;"
- " sbbq 24(%2), %%r11;"
-
- /* Wrap the result back into the field */
-
- /* Step 1: Compute carry*38 */
- " mov $0, %%rax;"
- " mov $38, %%rcx;"
- " cmovc %%rcx, %%rax;"
-
- /* Step 2: Subtract carry*38 from the original difference */
- " sub %%rax, %%r8;"
- " sbb $0, %%r9;"
- " sbb $0, %%r10;"
- " sbb $0, %%r11;"
-
- /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
- " mov $0, %%rax;"
- " cmovc %%rcx, %%rax;"
- " sub %%rax, %%r8;"
-
- /* Store the result */
- " movq %%r8, 0(%0);"
- " movq %%r9, 8(%0);"
- " movq %%r10, 16(%0);"
- " movq %%r11, 24(%0);"
- :
- : "r"(out), "r"(f1), "r"(f2)
- : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc");
-}
-
-/* Computes a field multiplication: out <- f1 * f2
- * Uses the 8-element buffer tmp for intermediate results */
-static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
-{
- asm volatile(
-
- /* Compute the raw multiplication: tmp <- src1 * src2 */
-
- /* Compute src1[0] * src2 */
- " movq 0(%0), %%rdx;"
- " mulxq 0(%1), %%r8, %%r9;"
- " xor %%r10d, %%r10d;"
- " movq %%r8, 0(%2);"
- " mulxq 8(%1), %%r10, %%r11;"
- " adox %%r9, %%r10;"
- " movq %%r10, 8(%2);"
- " mulxq 16(%1), %%rbx, %%r13;"
- " adox %%r11, %%rbx;"
- " mulxq 24(%1), %%r14, %%rdx;"
- " adox %%r13, %%r14;"
- " mov $0, %%rax;"
- " adox %%rdx, %%rax;"
-
- /* Compute src1[1] * src2 */
- " movq 8(%0), %%rdx;"
- " mulxq 0(%1), %%r8, %%r9;"
- " xor %%r10d, %%r10d;"
- " adcxq 8(%2), %%r8;"
- " movq %%r8, 8(%2);"
- " mulxq 8(%1), %%r10, %%r11;"
- " adox %%r9, %%r10;"
- " adcx %%rbx, %%r10;"
- " movq %%r10, 16(%2);"
- " mulxq 16(%1), %%rbx, %%r13;"
- " adox %%r11, %%rbx;"
- " adcx %%r14, %%rbx;"
- " mov $0, %%r8;"
- " mulxq 24(%1), %%r14, %%rdx;"
- " adox %%r13, %%r14;"
- " adcx %%rax, %%r14;"
- " mov $0, %%rax;"
- " adox %%rdx, %%rax;"
- " adcx %%r8, %%rax;"
-
- /* Compute src1[2] * src2 */
- " movq 16(%0), %%rdx;"
- " mulxq 0(%1), %%r8, %%r9;"
- " xor %%r10d, %%r10d;"
- " adcxq 16(%2), %%r8;"
- " movq %%r8, 16(%2);"
- " mulxq 8(%1), %%r10, %%r11;"
- " adox %%r9, %%r10;"
- " adcx %%rbx, %%r10;"
- " movq %%r10, 24(%2);"
- " mulxq 16(%1), %%rbx, %%r13;"
- " adox %%r11, %%rbx;"
- " adcx %%r14, %%rbx;"
- " mov $0, %%r8;"
- " mulxq 24(%1), %%r14, %%rdx;"
- " adox %%r13, %%r14;"
- " adcx %%rax, %%r14;"
- " mov $0, %%rax;"
- " adox %%rdx, %%rax;"
- " adcx %%r8, %%rax;"
-
- /* Compute src1[3] * src2 */
- " movq 24(%0), %%rdx;"
- " mulxq 0(%1), %%r8, %%r9;"
- " xor %%r10d, %%r10d;"
- " adcxq 24(%2), %%r8;"
- " movq %%r8, 24(%2);"
- " mulxq 8(%1), %%r10, %%r11;"
- " adox %%r9, %%r10;"
- " adcx %%rbx, %%r10;"
- " movq %%r10, 32(%2);"
- " mulxq 16(%1), %%rbx, %%r13;"
- " adox %%r11, %%rbx;"
- " adcx %%r14, %%rbx;"
- " movq %%rbx, 40(%2);"
- " mov $0, %%r8;"
- " mulxq 24(%1), %%r14, %%rdx;"
- " adox %%r13, %%r14;"
- " adcx %%rax, %%r14;"
- " movq %%r14, 48(%2);"
- " mov $0, %%rax;"
- " adox %%rdx, %%rax;"
- " adcx %%r8, %%rax;"
- " movq %%rax, 56(%2);"
-
- /* Line up pointers */
- " mov %2, %0;"
- " mov %3, %2;"
-
- /* Wrap the result back into the field */
-
- /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
- " mov $38, %%rdx;"
- " mulxq 32(%0), %%r8, %%r13;"
- " xor %k1, %k1;"
- " adoxq 0(%0), %%r8;"
- " mulxq 40(%0), %%r9, %%rbx;"
- " adcx %%r13, %%r9;"
- " adoxq 8(%0), %%r9;"
- " mulxq 48(%0), %%r10, %%r13;"
- " adcx %%rbx, %%r10;"
- " adoxq 16(%0), %%r10;"
- " mulxq 56(%0), %%r11, %%rax;"
- " adcx %%r13, %%r11;"
- " adoxq 24(%0), %%r11;"
- " adcx %1, %%rax;"
- " adox %1, %%rax;"
- " imul %%rdx, %%rax;"
-
- /* Step 2: Fold the carry back into dst */
- " add %%rax, %%r8;"
- " adcx %1, %%r9;"
- " movq %%r9, 8(%2);"
- " adcx %1, %%r10;"
- " movq %%r10, 16(%2);"
- " adcx %1, %%r11;"
- " movq %%r11, 24(%2);"
-
- /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
- " mov $0, %%rax;"
- " cmovc %%rdx, %%rax;"
- " add %%rax, %%r8;"
- " movq %%r8, 0(%2);"
- : "+&r"(f1), "+&r"(f2), "+&r"(tmp)
- : "r"(out)
- : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13",
- "%r14", "memory", "cc");
-}
-
-/* Computes two field multiplications:
- * out[0] <- f1[0] * f2[0]
- * out[1] <- f1[1] * f2[1]
- * Uses the 16-element buffer tmp for intermediate results: */
-static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
-{
- asm volatile(
-
- /* Compute the raw multiplication tmp[0] <- f1[0] * f2[0] */
-
- /* Compute src1[0] * src2 */
- " movq 0(%0), %%rdx;"
- " mulxq 0(%1), %%r8, %%r9;"
- " xor %%r10d, %%r10d;"
- " movq %%r8, 0(%2);"
- " mulxq 8(%1), %%r10, %%r11;"
- " adox %%r9, %%r10;"
- " movq %%r10, 8(%2);"
- " mulxq 16(%1), %%rbx, %%r13;"
- " adox %%r11, %%rbx;"
- " mulxq 24(%1), %%r14, %%rdx;"
- " adox %%r13, %%r14;"
- " mov $0, %%rax;"
- " adox %%rdx, %%rax;"
-
- /* Compute src1[1] * src2 */
- " movq 8(%0), %%rdx;"
- " mulxq 0(%1), %%r8, %%r9;"
- " xor %%r10d, %%r10d;"
- " adcxq 8(%2), %%r8;"
- " movq %%r8, 8(%2);"
- " mulxq 8(%1), %%r10, %%r11;"
- " adox %%r9, %%r10;"
- " adcx %%rbx, %%r10;"
- " movq %%r10, 16(%2);"
- " mulxq 16(%1), %%rbx, %%r13;"
- " adox %%r11, %%rbx;"
- " adcx %%r14, %%rbx;"
- " mov $0, %%r8;"
- " mulxq 24(%1), %%r14, %%rdx;"
- " adox %%r13, %%r14;"
- " adcx %%rax, %%r14;"
- " mov $0, %%rax;"
- " adox %%rdx, %%rax;"
- " adcx %%r8, %%rax;"
-
- /* Compute src1[2] * src2 */
- " movq 16(%0), %%rdx;"
- " mulxq 0(%1), %%r8, %%r9;"
- " xor %%r10d, %%r10d;"
- " adcxq 16(%2), %%r8;"
- " movq %%r8, 16(%2);"
- " mulxq 8(%1), %%r10, %%r11;"
- " adox %%r9, %%r10;"
- " adcx %%rbx, %%r10;"
- " movq %%r10, 24(%2);"
- " mulxq 16(%1), %%rbx, %%r13;"
- " adox %%r11, %%rbx;"
- " adcx %%r14, %%rbx;"
- " mov $0, %%r8;"
- " mulxq 24(%1), %%r14, %%rdx;"
- " adox %%r13, %%r14;"
- " adcx %%rax, %%r14;"
- " mov $0, %%rax;"
- " adox %%rdx, %%rax;"
- " adcx %%r8, %%rax;"
-
- /* Compute src1[3] * src2 */
- " movq 24(%0), %%rdx;"
- " mulxq 0(%1), %%r8, %%r9;"
- " xor %%r10d, %%r10d;"
- " adcxq 24(%2), %%r8;"
- " movq %%r8, 24(%2);"
- " mulxq 8(%1), %%r10, %%r11;"
- " adox %%r9, %%r10;"
- " adcx %%rbx, %%r10;"
- " movq %%r10, 32(%2);"
- " mulxq 16(%1), %%rbx, %%r13;"
- " adox %%r11, %%rbx;"
- " adcx %%r14, %%rbx;"
- " movq %%rbx, 40(%2);"
- " mov $0, %%r8;"
- " mulxq 24(%1), %%r14, %%rdx;"
- " adox %%r13, %%r14;"
- " adcx %%rax, %%r14;"
- " movq %%r14, 48(%2);"
- " mov $0, %%rax;"
- " adox %%rdx, %%rax;"
- " adcx %%r8, %%rax;"
- " movq %%rax, 56(%2);"
-
- /* Compute the raw multiplication tmp[1] <- f1[1] * f2[1] */
-
- /* Compute src1[0] * src2 */
- " movq 32(%0), %%rdx;"
- " mulxq 32(%1), %%r8, %%r9;"
- " xor %%r10d, %%r10d;"
- " movq %%r8, 64(%2);"
- " mulxq 40(%1), %%r10, %%r11;"
- " adox %%r9, %%r10;"
- " movq %%r10, 72(%2);"
- " mulxq 48(%1), %%rbx, %%r13;"
- " adox %%r11, %%rbx;"
- " mulxq 56(%1), %%r14, %%rdx;"
- " adox %%r13, %%r14;"
- " mov $0, %%rax;"
- " adox %%rdx, %%rax;"
-
- /* Compute src1[1] * src2 */
- " movq 40(%0), %%rdx;"
- " mulxq 32(%1), %%r8, %%r9;"
- " xor %%r10d, %%r10d;"
- " adcxq 72(%2), %%r8;"
- " movq %%r8, 72(%2);"
- " mulxq 40(%1), %%r10, %%r11;"
- " adox %%r9, %%r10;"
- " adcx %%rbx, %%r10;"
- " movq %%r10, 80(%2);"
- " mulxq 48(%1), %%rbx, %%r13;"
- " adox %%r11, %%rbx;"
- " adcx %%r14, %%rbx;"
- " mov $0, %%r8;"
- " mulxq 56(%1), %%r14, %%rdx;"
- " adox %%r13, %%r14;"
- " adcx %%rax, %%r14;"
- " mov $0, %%rax;"
- " adox %%rdx, %%rax;"
- " adcx %%r8, %%rax;"
-
- /* Compute src1[2] * src2 */
- " movq 48(%0), %%rdx;"
- " mulxq 32(%1), %%r8, %%r9;"
- " xor %%r10d, %%r10d;"
- " adcxq 80(%2), %%r8;"
- " movq %%r8, 80(%2);"
- " mulxq 40(%1), %%r10, %%r11;"
- " adox %%r9, %%r10;"
- " adcx %%rbx, %%r10;"
- " movq %%r10, 88(%2);"
- " mulxq 48(%1), %%rbx, %%r13;"
- " adox %%r11, %%rbx;"
- " adcx %%r14, %%rbx;"
- " mov $0, %%r8;"
- " mulxq 56(%1), %%r14, %%rdx;"
- " adox %%r13, %%r14;"
- " adcx %%rax, %%r14;"
- " mov $0, %%rax;"
- " adox %%rdx, %%rax;"
- " adcx %%r8, %%rax;"
-
- /* Compute src1[3] * src2 */
- " movq 56(%0), %%rdx;"
- " mulxq 32(%1), %%r8, %%r9;"
- " xor %%r10d, %%r10d;"
- " adcxq 88(%2), %%r8;"
- " movq %%r8, 88(%2);"
- " mulxq 40(%1), %%r10, %%r11;"
- " adox %%r9, %%r10;"
- " adcx %%rbx, %%r10;"
- " movq %%r10, 96(%2);"
- " mulxq 48(%1), %%rbx, %%r13;"
- " adox %%r11, %%rbx;"
- " adcx %%r14, %%rbx;"
- " movq %%rbx, 104(%2);"
- " mov $0, %%r8;"
- " mulxq 56(%1), %%r14, %%rdx;"
- " adox %%r13, %%r14;"
- " adcx %%rax, %%r14;"
- " movq %%r14, 112(%2);"
- " mov $0, %%rax;"
- " adox %%rdx, %%rax;"
- " adcx %%r8, %%rax;"
- " movq %%rax, 120(%2);"
-
- /* Line up pointers */
- " mov %2, %0;"
- " mov %3, %2;"
-
- /* Wrap the results back into the field */
-
- /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
- " mov $38, %%rdx;"
- " mulxq 32(%0), %%r8, %%r13;"
- " xor %k1, %k1;"
- " adoxq 0(%0), %%r8;"
- " mulxq 40(%0), %%r9, %%rbx;"
- " adcx %%r13, %%r9;"
- " adoxq 8(%0), %%r9;"
- " mulxq 48(%0), %%r10, %%r13;"
- " adcx %%rbx, %%r10;"
- " adoxq 16(%0), %%r10;"
- " mulxq 56(%0), %%r11, %%rax;"
- " adcx %%r13, %%r11;"
- " adoxq 24(%0), %%r11;"
- " adcx %1, %%rax;"
- " adox %1, %%rax;"
- " imul %%rdx, %%rax;"
-
- /* Step 2: Fold the carry back into dst */
- " add %%rax, %%r8;"
- " adcx %1, %%r9;"
- " movq %%r9, 8(%2);"
- " adcx %1, %%r10;"
- " movq %%r10, 16(%2);"
- " adcx %1, %%r11;"
- " movq %%r11, 24(%2);"
-
- /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
- " mov $0, %%rax;"
- " cmovc %%rdx, %%rax;"
- " add %%rax, %%r8;"
- " movq %%r8, 0(%2);"
-
- /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
- " mov $38, %%rdx;"
- " mulxq 96(%0), %%r8, %%r13;"
- " xor %k1, %k1;"
- " adoxq 64(%0), %%r8;"
- " mulxq 104(%0), %%r9, %%rbx;"
- " adcx %%r13, %%r9;"
- " adoxq 72(%0), %%r9;"
- " mulxq 112(%0), %%r10, %%r13;"
- " adcx %%rbx, %%r10;"
- " adoxq 80(%0), %%r10;"
- " mulxq 120(%0), %%r11, %%rax;"
- " adcx %%r13, %%r11;"
- " adoxq 88(%0), %%r11;"
- " adcx %1, %%rax;"
- " adox %1, %%rax;"
- " imul %%rdx, %%rax;"
-
- /* Step 2: Fold the carry back into dst */
- " add %%rax, %%r8;"
- " adcx %1, %%r9;"
- " movq %%r9, 40(%2);"
- " adcx %1, %%r10;"
- " movq %%r10, 48(%2);"
- " adcx %1, %%r11;"
- " movq %%r11, 56(%2);"
-
- /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
- " mov $0, %%rax;"
- " cmovc %%rdx, %%rax;"
- " add %%rax, %%r8;"
- " movq %%r8, 32(%2);"
- : "+&r"(f1), "+&r"(f2), "+&r"(tmp)
- : "r"(out)
- : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13",
- "%r14", "memory", "cc");
-}
-
-/* Computes the field multiplication of four-element f1 with value in f2
- * Requires f2 to be smaller than 2^17 */
-static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2)
-{
- register u64 f2_r asm("rdx") = f2;
-
- asm volatile(
- /* Compute the raw multiplication of f1*f2 */
- " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */
- " mulxq 8(%2), %%r9, %%rbx;" /* f1[1]*f2 */
- " add %%rcx, %%r9;"
- " mov $0, %%rcx;"
- " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */
- " adcx %%rbx, %%r10;"
- " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */
- " adcx %%r13, %%r11;"
- " adcx %%rcx, %%rax;"
-
- /* Wrap the result back into the field */
-
- /* Step 1: Compute carry*38 */
- " mov $38, %%rdx;"
- " imul %%rdx, %%rax;"
-
- /* Step 2: Fold the carry back into dst */
- " add %%rax, %%r8;"
- " adcx %%rcx, %%r9;"
- " movq %%r9, 8(%1);"
- " adcx %%rcx, %%r10;"
- " movq %%r10, 16(%1);"
- " adcx %%rcx, %%r11;"
- " movq %%r11, 24(%1);"
-
- /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
- " mov $0, %%rax;"
- " cmovc %%rdx, %%rax;"
- " add %%rax, %%r8;"
- " movq %%r8, 0(%1);"
- : "+&r"(f2_r)
- : "r"(out), "r"(f1)
- : "%rax", "%rbx", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r13",
- "memory", "cc");
-}
-
-/* Computes p1 <- bit ? p2 : p1 in constant time */
-static inline void cswap2(u64 bit, const u64 *p1, const u64 *p2)
-{
- asm volatile(
- /* Transfer bit into CF flag */
- " add $18446744073709551615, %0;"
-
- /* cswap p1[0], p2[0] */
- " movq 0(%1), %%r8;"
- " movq 0(%2), %%r9;"
- " mov %%r8, %%r10;"
- " cmovc %%r9, %%r8;"
- " cmovc %%r10, %%r9;"
- " movq %%r8, 0(%1);"
- " movq %%r9, 0(%2);"
-
- /* cswap p1[1], p2[1] */
- " movq 8(%1), %%r8;"
- " movq 8(%2), %%r9;"
- " mov %%r8, %%r10;"
- " cmovc %%r9, %%r8;"
- " cmovc %%r10, %%r9;"
- " movq %%r8, 8(%1);"
- " movq %%r9, 8(%2);"
-
- /* cswap p1[2], p2[2] */
- " movq 16(%1), %%r8;"
- " movq 16(%2), %%r9;"
- " mov %%r8, %%r10;"
- " cmovc %%r9, %%r8;"
- " cmovc %%r10, %%r9;"
- " movq %%r8, 16(%1);"
- " movq %%r9, 16(%2);"
-
- /* cswap p1[3], p2[3] */
- " movq 24(%1), %%r8;"
- " movq 24(%2), %%r9;"
- " mov %%r8, %%r10;"
- " cmovc %%r9, %%r8;"
- " cmovc %%r10, %%r9;"
- " movq %%r8, 24(%1);"
- " movq %%r9, 24(%2);"
-
- /* cswap p1[4], p2[4] */
- " movq 32(%1), %%r8;"
- " movq 32(%2), %%r9;"
- " mov %%r8, %%r10;"
- " cmovc %%r9, %%r8;"
- " cmovc %%r10, %%r9;"
- " movq %%r8, 32(%1);"
- " movq %%r9, 32(%2);"
-
- /* cswap p1[5], p2[5] */
- " movq 40(%1), %%r8;"
- " movq 40(%2), %%r9;"
- " mov %%r8, %%r10;"
- " cmovc %%r9, %%r8;"
- " cmovc %%r10, %%r9;"
- " movq %%r8, 40(%1);"
- " movq %%r9, 40(%2);"
-
- /* cswap p1[6], p2[6] */
- " movq 48(%1), %%r8;"
- " movq 48(%2), %%r9;"
- " mov %%r8, %%r10;"
- " cmovc %%r9, %%r8;"
- " cmovc %%r10, %%r9;"
- " movq %%r8, 48(%1);"
- " movq %%r9, 48(%2);"
-
- /* cswap p1[7], p2[7] */
- " movq 56(%1), %%r8;"
- " movq 56(%2), %%r9;"
- " mov %%r8, %%r10;"
- " cmovc %%r9, %%r8;"
- " cmovc %%r10, %%r9;"
- " movq %%r8, 56(%1);"
- " movq %%r9, 56(%2);"
- : "+&r"(bit)
- : "r"(p1), "r"(p2)
- : "%r8", "%r9", "%r10", "memory", "cc");
-}
-
-/* Computes the square of a field element: out <- f * f
- * Uses the 8-element buffer tmp for intermediate results */
-static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
-{
- asm volatile(
- /* Compute the raw multiplication: tmp <- f * f */
-
- /* Step 1: Compute all partial products */
- " movq 0(%0), %%rdx;" /* f[0] */
- " mulxq 8(%0), %%r8, %%r14;"
- " xor %%r15d, %%r15d;" /* f[1]*f[0] */
- " mulxq 16(%0), %%r9, %%r10;"
- " adcx %%r14, %%r9;" /* f[2]*f[0] */
- " mulxq 24(%0), %%rax, %%rcx;"
- " adcx %%rax, %%r10;" /* f[3]*f[0] */
- " movq 24(%0), %%rdx;" /* f[3] */
- " mulxq 8(%0), %%r11, %%rbx;"
- " adcx %%rcx, %%r11;" /* f[1]*f[3] */
- " mulxq 16(%0), %%rax, %%r13;"
- " adcx %%rax, %%rbx;" /* f[2]*f[3] */
- " movq 8(%0), %%rdx;"
- " adcx %%r15, %%r13;" /* f1 */
- " mulxq 16(%0), %%rax, %%rcx;"
- " mov $0, %%r14;" /* f[2]*f[1] */
-
- /* Step 2: Compute two parallel carry chains */
- " xor %%r15d, %%r15d;"
- " adox %%rax, %%r10;"
- " adcx %%r8, %%r8;"
- " adox %%rcx, %%r11;"
- " adcx %%r9, %%r9;"
- " adox %%r15, %%rbx;"
- " adcx %%r10, %%r10;"
- " adox %%r15, %%r13;"
- " adcx %%r11, %%r11;"
- " adox %%r15, %%r14;"
- " adcx %%rbx, %%rbx;"
- " adcx %%r13, %%r13;"
- " adcx %%r14, %%r14;"
-
- /* Step 3: Compute intermediate squares */
- " movq 0(%0), %%rdx;"
- " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
- " movq %%rax, 0(%1);"
- " add %%rcx, %%r8;"
- " movq %%r8, 8(%1);"
- " movq 8(%0), %%rdx;"
- " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
- " adcx %%rax, %%r9;"
- " movq %%r9, 16(%1);"
- " adcx %%rcx, %%r10;"
- " movq %%r10, 24(%1);"
- " movq 16(%0), %%rdx;"
- " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
- " adcx %%rax, %%r11;"
- " movq %%r11, 32(%1);"
- " adcx %%rcx, %%rbx;"
- " movq %%rbx, 40(%1);"
- " movq 24(%0), %%rdx;"
- " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
- " adcx %%rax, %%r13;"
- " movq %%r13, 48(%1);"
- " adcx %%rcx, %%r14;"
- " movq %%r14, 56(%1);"
-
- /* Line up pointers */
- " mov %1, %0;"
- " mov %2, %1;"
-
- /* Wrap the result back into the field */
-
- /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
- " mov $38, %%rdx;"
- " mulxq 32(%0), %%r8, %%r13;"
- " xor %%ecx, %%ecx;"
- " adoxq 0(%0), %%r8;"
- " mulxq 40(%0), %%r9, %%rbx;"
- " adcx %%r13, %%r9;"
- " adoxq 8(%0), %%r9;"
- " mulxq 48(%0), %%r10, %%r13;"
- " adcx %%rbx, %%r10;"
- " adoxq 16(%0), %%r10;"
- " mulxq 56(%0), %%r11, %%rax;"
- " adcx %%r13, %%r11;"
- " adoxq 24(%0), %%r11;"
- " adcx %%rcx, %%rax;"
- " adox %%rcx, %%rax;"
- " imul %%rdx, %%rax;"
-
- /* Step 2: Fold the carry back into dst */
- " add %%rax, %%r8;"
- " adcx %%rcx, %%r9;"
- " movq %%r9, 8(%1);"
- " adcx %%rcx, %%r10;"
- " movq %%r10, 16(%1);"
- " adcx %%rcx, %%r11;"
- " movq %%r11, 24(%1);"
-
- /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
- " mov $0, %%rax;"
- " cmovc %%rdx, %%rax;"
- " add %%rax, %%r8;"
- " movq %%r8, 0(%1);"
- : "+&r"(f), "+&r"(tmp)
- : "r"(out)
- : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11",
- "%r13", "%r14", "%r15", "memory", "cc");
-}
-
-/* Computes two field squarings:
- * out[0] <- f[0] * f[0]
- * out[1] <- f[1] * f[1]
- * Uses the 16-element buffer tmp for intermediate results */
-static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
-{
- asm volatile(
- /* Step 1: Compute all partial products */
- " movq 0(%0), %%rdx;" /* f[0] */
- " mulxq 8(%0), %%r8, %%r14;"
- " xor %%r15d, %%r15d;" /* f[1]*f[0] */
- " mulxq 16(%0), %%r9, %%r10;"
- " adcx %%r14, %%r9;" /* f[2]*f[0] */
- " mulxq 24(%0), %%rax, %%rcx;"
- " adcx %%rax, %%r10;" /* f[3]*f[0] */
- " movq 24(%0), %%rdx;" /* f[3] */
- " mulxq 8(%0), %%r11, %%rbx;"
- " adcx %%rcx, %%r11;" /* f[1]*f[3] */
- " mulxq 16(%0), %%rax, %%r13;"
- " adcx %%rax, %%rbx;" /* f[2]*f[3] */
- " movq 8(%0), %%rdx;"
- " adcx %%r15, %%r13;" /* f1 */
- " mulxq 16(%0), %%rax, %%rcx;"
- " mov $0, %%r14;" /* f[2]*f[1] */
-
- /* Step 2: Compute two parallel carry chains */
- " xor %%r15d, %%r15d;"
- " adox %%rax, %%r10;"
- " adcx %%r8, %%r8;"
- " adox %%rcx, %%r11;"
- " adcx %%r9, %%r9;"
- " adox %%r15, %%rbx;"
- " adcx %%r10, %%r10;"
- " adox %%r15, %%r13;"
- " adcx %%r11, %%r11;"
- " adox %%r15, %%r14;"
- " adcx %%rbx, %%rbx;"
- " adcx %%r13, %%r13;"
- " adcx %%r14, %%r14;"
-
- /* Step 3: Compute intermediate squares */
- " movq 0(%0), %%rdx;"
- " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
- " movq %%rax, 0(%1);"
- " add %%rcx, %%r8;"
- " movq %%r8, 8(%1);"
- " movq 8(%0), %%rdx;"
- " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
- " adcx %%rax, %%r9;"
- " movq %%r9, 16(%1);"
- " adcx %%rcx, %%r10;"
- " movq %%r10, 24(%1);"
- " movq 16(%0), %%rdx;"
- " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
- " adcx %%rax, %%r11;"
- " movq %%r11, 32(%1);"
- " adcx %%rcx, %%rbx;"
- " movq %%rbx, 40(%1);"
- " movq 24(%0), %%rdx;"
- " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
- " adcx %%rax, %%r13;"
- " movq %%r13, 48(%1);"
- " adcx %%rcx, %%r14;"
- " movq %%r14, 56(%1);"
-
- /* Step 1: Compute all partial products */
- " movq 32(%0), %%rdx;" /* f[0] */
- " mulxq 40(%0), %%r8, %%r14;"
- " xor %%r15d, %%r15d;" /* f[1]*f[0] */
- " mulxq 48(%0), %%r9, %%r10;"
- " adcx %%r14, %%r9;" /* f[2]*f[0] */
- " mulxq 56(%0), %%rax, %%rcx;"
- " adcx %%rax, %%r10;" /* f[3]*f[0] */
- " movq 56(%0), %%rdx;" /* f[3] */
- " mulxq 40(%0), %%r11, %%rbx;"
- " adcx %%rcx, %%r11;" /* f[1]*f[3] */
- " mulxq 48(%0), %%rax, %%r13;"
- " adcx %%rax, %%rbx;" /* f[2]*f[3] */
- " movq 40(%0), %%rdx;"
- " adcx %%r15, %%r13;" /* f1 */
- " mulxq 48(%0), %%rax, %%rcx;"
- " mov $0, %%r14;" /* f[2]*f[1] */
-
- /* Step 2: Compute two parallel carry chains */
- " xor %%r15d, %%r15d;"
- " adox %%rax, %%r10;"
- " adcx %%r8, %%r8;"
- " adox %%rcx, %%r11;"
- " adcx %%r9, %%r9;"
- " adox %%r15, %%rbx;"
- " adcx %%r10, %%r10;"
- " adox %%r15, %%r13;"
- " adcx %%r11, %%r11;"
- " adox %%r15, %%r14;"
- " adcx %%rbx, %%rbx;"
- " adcx %%r13, %%r13;"
- " adcx %%r14, %%r14;"
-
- /* Step 3: Compute intermediate squares */
- " movq 32(%0), %%rdx;"
- " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
- " movq %%rax, 64(%1);"
- " add %%rcx, %%r8;"
- " movq %%r8, 72(%1);"
- " movq 40(%0), %%rdx;"
- " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
- " adcx %%rax, %%r9;"
- " movq %%r9, 80(%1);"
- " adcx %%rcx, %%r10;"
- " movq %%r10, 88(%1);"
- " movq 48(%0), %%rdx;"
- " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
- " adcx %%rax, %%r11;"
- " movq %%r11, 96(%1);"
- " adcx %%rcx, %%rbx;"
- " movq %%rbx, 104(%1);"
- " movq 56(%0), %%rdx;"
- " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
- " adcx %%rax, %%r13;"
- " movq %%r13, 112(%1);"
- " adcx %%rcx, %%r14;"
- " movq %%r14, 120(%1);"
-
- /* Line up pointers */
- " mov %1, %0;"
- " mov %2, %1;"
-
- /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
- " mov $38, %%rdx;"
- " mulxq 32(%0), %%r8, %%r13;"
- " xor %%ecx, %%ecx;"
- " adoxq 0(%0), %%r8;"
- " mulxq 40(%0), %%r9, %%rbx;"
- " adcx %%r13, %%r9;"
- " adoxq 8(%0), %%r9;"
- " mulxq 48(%0), %%r10, %%r13;"
- " adcx %%rbx, %%r10;"
- " adoxq 16(%0), %%r10;"
- " mulxq 56(%0), %%r11, %%rax;"
- " adcx %%r13, %%r11;"
- " adoxq 24(%0), %%r11;"
- " adcx %%rcx, %%rax;"
- " adox %%rcx, %%rax;"
- " imul %%rdx, %%rax;"
-
- /* Step 2: Fold the carry back into dst */
- " add %%rax, %%r8;"
- " adcx %%rcx, %%r9;"
- " movq %%r9, 8(%1);"
- " adcx %%rcx, %%r10;"
- " movq %%r10, 16(%1);"
- " adcx %%rcx, %%r11;"
- " movq %%r11, 24(%1);"
-
- /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
- " mov $0, %%rax;"
- " cmovc %%rdx, %%rax;"
- " add %%rax, %%r8;"
- " movq %%r8, 0(%1);"
-
- /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
- " mov $38, %%rdx;"
- " mulxq 96(%0), %%r8, %%r13;"
- " xor %%ecx, %%ecx;"
- " adoxq 64(%0), %%r8;"
- " mulxq 104(%0), %%r9, %%rbx;"
- " adcx %%r13, %%r9;"
- " adoxq 72(%0), %%r9;"
- " mulxq 112(%0), %%r10, %%r13;"
- " adcx %%rbx, %%r10;"
- " adoxq 80(%0), %%r10;"
- " mulxq 120(%0), %%r11, %%rax;"
- " adcx %%r13, %%r11;"
- " adoxq 88(%0), %%r11;"
- " adcx %%rcx, %%rax;"
- " adox %%rcx, %%rax;"
- " imul %%rdx, %%rax;"
-
- /* Step 2: Fold the carry back into dst */
- " add %%rax, %%r8;"
- " adcx %%rcx, %%r9;"
- " movq %%r9, 40(%1);"
- " adcx %%rcx, %%r10;"
- " movq %%r10, 48(%1);"
- " adcx %%rcx, %%r11;"
- " movq %%r11, 56(%1);"
-
- /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
- " mov $0, %%rax;"
- " cmovc %%rdx, %%rax;"
- " add %%rax, %%r8;"
- " movq %%r8, 32(%1);"
- : "+&r"(f), "+&r"(tmp)
- : "r"(out)
- : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11",
- "%r13", "%r14", "%r15", "memory", "cc");
-}
-
-static void point_add_and_double(u64 *q, u64 *p01_tmp1, u64 *tmp2)
-{
- u64 *nq = p01_tmp1;
- u64 *nq_p1 = p01_tmp1 + (u32)8U;
- u64 *tmp1 = p01_tmp1 + (u32)16U;
- u64 *x1 = q;
- u64 *x2 = nq;
- u64 *z2 = nq + (u32)4U;
- u64 *z3 = nq_p1 + (u32)4U;
- u64 *a = tmp1;
- u64 *b = tmp1 + (u32)4U;
- u64 *ab = tmp1;
- u64 *dc = tmp1 + (u32)8U;
- u64 *x3;
- u64 *z31;
- u64 *d0;
- u64 *c0;
- u64 *a1;
- u64 *b1;
- u64 *d;
- u64 *c;
- u64 *ab1;
- u64 *dc1;
- fadd(a, x2, z2);
- fsub(b, x2, z2);
- x3 = nq_p1;
- z31 = nq_p1 + (u32)4U;
- d0 = dc;
- c0 = dc + (u32)4U;
- fadd(c0, x3, z31);
- fsub(d0, x3, z31);
- fmul2(dc, dc, ab, tmp2);
- fadd(x3, d0, c0);
- fsub(z31, d0, c0);
- a1 = tmp1;
- b1 = tmp1 + (u32)4U;
- d = tmp1 + (u32)8U;
- c = tmp1 + (u32)12U;
- ab1 = tmp1;
- dc1 = tmp1 + (u32)8U;
- fsqr2(dc1, ab1, tmp2);
- fsqr2(nq_p1, nq_p1, tmp2);
- a1[0U] = c[0U];
- a1[1U] = c[1U];
- a1[2U] = c[2U];
- a1[3U] = c[3U];
- fsub(c, d, c);
- fmul_scalar(b1, c, (u64)121665U);
- fadd(b1, b1, d);
- fmul2(nq, dc1, ab1, tmp2);
- fmul(z3, z3, x1, tmp2);
-}
-
-static void point_double(u64 *nq, u64 *tmp1, u64 *tmp2)
-{
- u64 *x2 = nq;
- u64 *z2 = nq + (u32)4U;
- u64 *a = tmp1;
- u64 *b = tmp1 + (u32)4U;
- u64 *d = tmp1 + (u32)8U;
- u64 *c = tmp1 + (u32)12U;
- u64 *ab = tmp1;
- u64 *dc = tmp1 + (u32)8U;
- fadd(a, x2, z2);
- fsub(b, x2, z2);
- fsqr2(dc, ab, tmp2);
- a[0U] = c[0U];
- a[1U] = c[1U];
- a[2U] = c[2U];
- a[3U] = c[3U];
- fsub(c, d, c);
- fmul_scalar(b, c, (u64)121665U);
- fadd(b, b, d);
- fmul2(nq, dc, ab, tmp2);
-}
-
-static void montgomery_ladder(u64 *out, const u8 *key, u64 *init1)
-{
- u64 tmp2[16U] = { 0U };
- u64 p01_tmp1_swap[33U] = { 0U };
- u64 *p0 = p01_tmp1_swap;
- u64 *p01 = p01_tmp1_swap;
- u64 *p03 = p01;
- u64 *p11 = p01 + (u32)8U;
- u64 *x0;
- u64 *z0;
- u64 *p01_tmp1;
- u64 *p01_tmp11;
- u64 *nq10;
- u64 *nq_p11;
- u64 *swap1;
- u64 sw0;
- u64 *nq1;
- u64 *tmp1;
- memcpy(p11, init1, (u32)8U * sizeof(init1[0U]));
- x0 = p03;
- z0 = p03 + (u32)4U;
- x0[0U] = (u64)1U;
- x0[1U] = (u64)0U;
- x0[2U] = (u64)0U;
- x0[3U] = (u64)0U;
- z0[0U] = (u64)0U;
- z0[1U] = (u64)0U;
- z0[2U] = (u64)0U;
- z0[3U] = (u64)0U;
- p01_tmp1 = p01_tmp1_swap;
- p01_tmp11 = p01_tmp1_swap;
- nq10 = p01_tmp1_swap;
- nq_p11 = p01_tmp1_swap + (u32)8U;
- swap1 = p01_tmp1_swap + (u32)32U;
- cswap2((u64)1U, nq10, nq_p11);
- point_add_and_double(init1, p01_tmp11, tmp2);
- swap1[0U] = (u64)1U;
- {
- u32 i;
- for (i = (u32)0U; i < (u32)251U; i = i + (u32)1U) {
- u64 *p01_tmp12 = p01_tmp1_swap;
- u64 *swap2 = p01_tmp1_swap + (u32)32U;
- u64 *nq2 = p01_tmp12;
- u64 *nq_p12 = p01_tmp12 + (u32)8U;
- u64 bit = (u64)(key[((u32)253U - i) / (u32)8U] >> ((u32)253U - i) % (u32)8U & (u8)1U);
- u64 sw = swap2[0U] ^ bit;
- cswap2(sw, nq2, nq_p12);
- point_add_and_double(init1, p01_tmp12, tmp2);
- swap2[0U] = bit;
- }
- }
- sw0 = swap1[0U];
- cswap2(sw0, nq10, nq_p11);
- nq1 = p01_tmp1;
- tmp1 = p01_tmp1 + (u32)16U;
- point_double(nq1, tmp1, tmp2);
- point_double(nq1, tmp1, tmp2);
- point_double(nq1, tmp1, tmp2);
- memcpy(out, p0, (u32)8U * sizeof(p0[0U]));
-
- memzero_explicit(tmp2, sizeof(tmp2));
- memzero_explicit(p01_tmp1_swap, sizeof(p01_tmp1_swap));
-}
-
-static void fsquare_times(u64 *o, const u64 *inp, u64 *tmp, u32 n1)
-{
- u32 i;
- fsqr(o, inp, tmp);
- for (i = (u32)0U; i < n1 - (u32)1U; i = i + (u32)1U)
- fsqr(o, o, tmp);
-}
-
-static void finv(u64 *o, const u64 *i, u64 *tmp)
-{
- u64 t1[16U] = { 0U };
- u64 *a0 = t1;
- u64 *b = t1 + (u32)4U;
- u64 *c = t1 + (u32)8U;
- u64 *t00 = t1 + (u32)12U;
- u64 *tmp1 = tmp;
- u64 *a;
- u64 *t0;
- fsquare_times(a0, i, tmp1, (u32)1U);
- fsquare_times(t00, a0, tmp1, (u32)2U);
- fmul(b, t00, i, tmp);
- fmul(a0, b, a0, tmp);
- fsquare_times(t00, a0, tmp1, (u32)1U);
- fmul(b, t00, b, tmp);
- fsquare_times(t00, b, tmp1, (u32)5U);
- fmul(b, t00, b, tmp);
- fsquare_times(t00, b, tmp1, (u32)10U);
- fmul(c, t00, b, tmp);
- fsquare_times(t00, c, tmp1, (u32)20U);
- fmul(t00, t00, c, tmp);
- fsquare_times(t00, t00, tmp1, (u32)10U);
- fmul(b, t00, b, tmp);
- fsquare_times(t00, b, tmp1, (u32)50U);
- fmul(c, t00, b, tmp);
- fsquare_times(t00, c, tmp1, (u32)100U);
- fmul(t00, t00, c, tmp);
- fsquare_times(t00, t00, tmp1, (u32)50U);
- fmul(t00, t00, b, tmp);
- fsquare_times(t00, t00, tmp1, (u32)5U);
- a = t1;
- t0 = t1 + (u32)12U;
- fmul(o, t0, a, tmp);
-}
-
-static void store_felem(u64 *b, u64 *f)
-{
- u64 f30 = f[3U];
- u64 top_bit0 = f30 >> (u32)63U;
- u64 f31;
- u64 top_bit;
- u64 f0;
- u64 f1;
- u64 f2;
- u64 f3;
- u64 m0;
- u64 m1;
- u64 m2;
- u64 m3;
- u64 mask;
- u64 f0_;
- u64 f1_;
- u64 f2_;
- u64 f3_;
- u64 o0;
- u64 o1;
- u64 o2;
- u64 o3;
- f[3U] = f30 & (u64)0x7fffffffffffffffU;
- add_scalar(f, f, (u64)19U * top_bit0);
- f31 = f[3U];
- top_bit = f31 >> (u32)63U;
- f[3U] = f31 & (u64)0x7fffffffffffffffU;
- add_scalar(f, f, (u64)19U * top_bit);
- f0 = f[0U];
- f1 = f[1U];
- f2 = f[2U];
- f3 = f[3U];
- m0 = gte_mask(f0, (u64)0xffffffffffffffedU);
- m1 = eq_mask(f1, (u64)0xffffffffffffffffU);
- m2 = eq_mask(f2, (u64)0xffffffffffffffffU);
- m3 = eq_mask(f3, (u64)0x7fffffffffffffffU);
- mask = ((m0 & m1) & m2) & m3;
- f0_ = f0 - (mask & (u64)0xffffffffffffffedU);
- f1_ = f1 - (mask & (u64)0xffffffffffffffffU);
- f2_ = f2 - (mask & (u64)0xffffffffffffffffU);
- f3_ = f3 - (mask & (u64)0x7fffffffffffffffU);
- o0 = f0_;
- o1 = f1_;
- o2 = f2_;
- o3 = f3_;
- b[0U] = o0;
- b[1U] = o1;
- b[2U] = o2;
- b[3U] = o3;
-}
-
-static void encode_point(u8 *o, const u64 *i)
-{
- const u64 *x = i;
- const u64 *z = i + (u32)4U;
- u64 tmp[4U] = { 0U };
- u64 tmp_w[16U] = { 0U };
- finv(tmp, z, tmp_w);
- fmul(tmp, tmp, x, tmp_w);
- store_felem((u64 *)o, tmp);
-}
-
-static void curve25519_ever64(u8 *out, const u8 *priv, const u8 *pub)
-{
- u64 init1[8U] = { 0U };
- u64 tmp[4U] = { 0U };
- u64 tmp3;
- u64 *x;
- u64 *z;
- {
- u32 i;
- for (i = (u32)0U; i < (u32)4U; i = i + (u32)1U) {
- u64 *os = tmp;
- const u8 *bj = pub + i * (u32)8U;
- u64 u = *(u64 *)bj;
- u64 r = u;
- u64 x0 = r;
- os[i] = x0;
- }
- }
- tmp3 = tmp[3U];
- tmp[3U] = tmp3 & (u64)0x7fffffffffffffffU;
- x = init1;
- z = init1 + (u32)4U;
- z[0U] = (u64)1U;
- z[1U] = (u64)0U;
- z[2U] = (u64)0U;
- z[3U] = (u64)0U;
- x[0U] = tmp[0U];
- x[1U] = tmp[1U];
- x[2U] = tmp[2U];
- x[3U] = tmp[3U];
- montgomery_ladder(init1, priv, init1);
- encode_point(out, init1);
-}
-
-/* The below constants were generated using this sage script:
- *
- * #!/usr/bin/env sage
- * import sys
- * from sage.all import *
- * def limbs(n):
- * n = int(n)
- * l = ((n >> 0) % 2^64, (n >> 64) % 2^64, (n >> 128) % 2^64, (n >> 192) % 2^64)
- * return "0x%016xULL, 0x%016xULL, 0x%016xULL, 0x%016xULL" % l
- * ec = EllipticCurve(GF(2^255 - 19), [0, 486662, 0, 1, 0])
- * p_minus_s = (ec.lift_x(9) - ec.lift_x(1))[0]
- * print("static const u64 p_minus_s[] = { %s };\n" % limbs(p_minus_s))
- * print("static const u64 table_ladder[] = {")
- * p = ec.lift_x(9)
- * for i in range(252):
- * l = (p[0] + p[2]) / (p[0] - p[2])
- * print(("\t%s" + ("," if i != 251 else "")) % limbs(l))
- * p = p * 2
- * print("};")
- *
- */
-
-static const u64 p_minus_s[] = { 0x816b1e0137d48290ULL, 0x440f6a51eb4d1207ULL, 0x52385f46dca2b71dULL, 0x215132111d8354cbULL };
-
-static const u64 table_ladder[] = {
- 0xfffffffffffffff3ULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x5fffffffffffffffULL,
- 0x6b8220f416aafe96ULL, 0x82ebeb2b4f566a34ULL, 0xd5a9a5b075a5950fULL, 0x5142b2cf4b2488f4ULL,
- 0x6aaebc750069680cULL, 0x89cf7820a0f99c41ULL, 0x2a58d9183b56d0f4ULL, 0x4b5aca80e36011a4ULL,
- 0x329132348c29745dULL, 0xf4a2e616e1642fd7ULL, 0x1e45bb03ff67bc34ULL, 0x306912d0f42a9b4aULL,
- 0xff886507e6af7154ULL, 0x04f50e13dfeec82fULL, 0xaa512fe82abab5ceULL, 0x174e251a68d5f222ULL,
- 0xcf96700d82028898ULL, 0x1743e3370a2c02c5ULL, 0x379eec98b4e86eaaULL, 0x0c59888a51e0482eULL,
- 0xfbcbf1d699b5d189ULL, 0xacaef0d58e9fdc84ULL, 0xc1c20d06231f7614ULL, 0x2938218da274f972ULL,
- 0xf6af49beff1d7f18ULL, 0xcc541c22387ac9c2ULL, 0x96fcc9ef4015c56bULL, 0x69c1627c690913a9ULL,
- 0x7a86fd2f4733db0eULL, 0xfdb8c4f29e087de9ULL, 0x095e4b1a8ea2a229ULL, 0x1ad7a7c829b37a79ULL,
- 0x342d89cad17ea0c0ULL, 0x67bedda6cced2051ULL, 0x19ca31bf2bb42f74ULL, 0x3df7b4c84980acbbULL,
- 0xa8c6444dc80ad883ULL, 0xb91e440366e3ab85ULL, 0xc215cda00164f6d8ULL, 0x3d867c6ef247e668ULL,
- 0xc7dd582bcc3e658cULL, 0xfd2c4748ee0e5528ULL, 0xa0fd9b95cc9f4f71ULL, 0x7529d871b0675ddfULL,
- 0xb8f568b42d3cbd78ULL, 0x1233011b91f3da82ULL, 0x2dce6ccd4a7c3b62ULL, 0x75e7fc8e9e498603ULL,
- 0x2f4f13f1fcd0b6ecULL, 0xf1a8ca1f29ff7a45ULL, 0xc249c1a72981e29bULL, 0x6ebe0dbb8c83b56aULL,
- 0x7114fa8d170bb222ULL, 0x65a2dcd5bf93935fULL, 0xbdc41f68b59c979aULL, 0x2f0eef79a2ce9289ULL,
- 0x42ecbf0c083c37ceULL, 0x2930bc09ec496322ULL, 0xf294b0c19cfeac0dULL, 0x3780aa4bedfabb80ULL,
- 0x56c17d3e7cead929ULL, 0xe7cb4beb2e5722c5ULL, 0x0ce931732dbfe15aULL, 0x41b883c7621052f8ULL,
- 0xdbf75ca0c3d25350ULL, 0x2936be086eb1e351ULL, 0xc936e03cb4a9b212ULL, 0x1d45bf82322225aaULL,
- 0xe81ab1036a024cc5ULL, 0xe212201c304c9a72ULL, 0xc5d73fba6832b1fcULL, 0x20ffdb5a4d839581ULL,
- 0xa283d367be5d0fadULL, 0x6c2b25ca8b164475ULL, 0x9d4935467caaf22eULL, 0x5166408eee85ff49ULL,
- 0x3c67baa2fab4e361ULL, 0xb3e433c67ef35cefULL, 0x5259729241159b1cULL, 0x6a621892d5b0ab33ULL,
- 0x20b74a387555cdcbULL, 0x532aa10e1208923fULL, 0xeaa17b7762281dd1ULL, 0x61ab3443f05c44bfULL,
- 0x257a6c422324def8ULL, 0x131c6c1017e3cf7fULL, 0x23758739f630a257ULL, 0x295a407a01a78580ULL,
- 0xf8c443246d5da8d9ULL, 0x19d775450c52fa5dULL, 0x2afcfc92731bf83dULL, 0x7d10c8e81b2b4700ULL,
- 0xc8e0271f70baa20bULL, 0x993748867ca63957ULL, 0x5412efb3cb7ed4bbULL, 0x3196d36173e62975ULL,
- 0xde5bcad141c7dffcULL, 0x47cc8cd2b395c848ULL, 0xa34cd942e11af3cbULL, 0x0256dbf2d04ecec2ULL,
- 0x875ab7e94b0e667fULL, 0xcad4dd83c0850d10ULL, 0x47f12e8f4e72c79fULL, 0x5f1a87bb8c85b19bULL,
- 0x7ae9d0b6437f51b8ULL, 0x12c7ce5518879065ULL, 0x2ade09fe5cf77aeeULL, 0x23a05a2f7d2c5627ULL,
- 0x5908e128f17c169aULL, 0xf77498dd8ad0852dULL, 0x74b4c4ceab102f64ULL, 0x183abadd10139845ULL,
- 0xb165ba8daa92aaacULL, 0xd5c5ef9599386705ULL, 0xbe2f8f0cf8fc40d1ULL, 0x2701e635ee204514ULL,
- 0x629fa80020156514ULL, 0xf223868764a8c1ceULL, 0x5b894fff0b3f060eULL, 0x60d9944cf708a3faULL,
- 0xaeea001a1c7a201fULL, 0xebf16a633ee2ce63ULL, 0x6f7709594c7a07e1ULL, 0x79b958150d0208cbULL,
- 0x24b55e5301d410e7ULL, 0xe3a34edff3fdc84dULL, 0xd88768e4904032d8ULL, 0x131384427b3aaeecULL,
- 0x8405e51286234f14ULL, 0x14dc4739adb4c529ULL, 0xb8a2b5b250634ffdULL, 0x2fe2a94ad8a7ff93ULL,
- 0xec5c57efe843faddULL, 0x2843ce40f0bb9918ULL, 0xa4b561d6cf3d6305ULL, 0x743629bde8fb777eULL,
- 0x343edd46bbaf738fULL, 0xed981828b101a651ULL, 0xa401760b882c797aULL, 0x1fc223e28dc88730ULL,
- 0x48604e91fc0fba0eULL, 0xb637f78f052c6fa4ULL, 0x91ccac3d09e9239cULL, 0x23f7eed4437a687cULL,
- 0x5173b1118d9bd800ULL, 0x29d641b63189d4a7ULL, 0xfdbf177988bbc586ULL, 0x2959894fcad81df5ULL,
- 0xaebc8ef3b4bbc899ULL, 0x4148995ab26992b9ULL, 0x24e20b0134f92cfbULL, 0x40d158894a05dee8ULL,
- 0x46b00b1185af76f6ULL, 0x26bac77873187a79ULL, 0x3dc0bf95ab8fff5fULL, 0x2a608bd8945524d7ULL,
- 0x26449588bd446302ULL, 0x7c4bc21c0388439cULL, 0x8e98a4f383bd11b2ULL, 0x26218d7bc9d876b9ULL,
- 0xe3081542997c178aULL, 0x3c2d29a86fb6606fULL, 0x5c217736fa279374ULL, 0x7dde05734afeb1faULL,
- 0x3bf10e3906d42babULL, 0xe4f7803e1980649cULL, 0xe6053bf89595bf7aULL, 0x394faf38da245530ULL,
- 0x7a8efb58896928f4ULL, 0xfbc778e9cc6a113cULL, 0x72670ce330af596fULL, 0x48f222a81d3d6cf7ULL,
- 0xf01fce410d72caa7ULL, 0x5a20ecc7213b5595ULL, 0x7bc21165c1fa1483ULL, 0x07f89ae31da8a741ULL,
- 0x05d2c2b4c6830ff9ULL, 0xd43e330fc6316293ULL, 0xa5a5590a96d3a904ULL, 0x705edb91a65333b6ULL,
- 0x048ee15e0bb9a5f7ULL, 0x3240cfca9e0aaf5dULL, 0x8f4b71ceedc4a40bULL, 0x621c0da3de544a6dULL,
- 0x92872836a08c4091ULL, 0xce8375b010c91445ULL, 0x8a72eb524f276394ULL, 0x2667fcfa7ec83635ULL,
- 0x7f4c173345e8752aULL, 0x061b47feee7079a5ULL, 0x25dd9afa9f86ff34ULL, 0x3780cef5425dc89cULL,
- 0x1a46035a513bb4e9ULL, 0x3e1ef379ac575adaULL, 0xc78c5f1c5fa24b50ULL, 0x321a967634fd9f22ULL,
- 0x946707b8826e27faULL, 0x3dca84d64c506fd0ULL, 0xc189218075e91436ULL, 0x6d9284169b3b8484ULL,
- 0x3a67e840383f2ddfULL, 0x33eec9a30c4f9b75ULL, 0x3ec7c86fa783ef47ULL, 0x26ec449fbac9fbc4ULL,
- 0x5c0f38cba09b9e7dULL, 0x81168cc762a3478cULL, 0x3e23b0d306fc121cULL, 0x5a238aa0a5efdcddULL,
- 0x1ba26121c4ea43ffULL, 0x36f8c77f7c8832b5ULL, 0x88fbea0b0adcf99aULL, 0x5ca9938ec25bebf9ULL,
- 0xd5436a5e51fccda0ULL, 0x1dbc4797c2cd893bULL, 0x19346a65d3224a08ULL, 0x0f5034e49b9af466ULL,
- 0xf23c3967a1e0b96eULL, 0xe58b08fa867a4d88ULL, 0xfb2fabc6a7341679ULL, 0x2a75381eb6026946ULL,
- 0xc80a3be4c19420acULL, 0x66b1f6c681f2b6dcULL, 0x7cf7036761e93388ULL, 0x25abbbd8a660a4c4ULL,
- 0x91ea12ba14fd5198ULL, 0x684950fc4a3cffa9ULL, 0xf826842130f5ad28ULL, 0x3ea988f75301a441ULL,
- 0xc978109a695f8c6fULL, 0x1746eb4a0530c3f3ULL, 0x444d6d77b4459995ULL, 0x75952b8c054e5cc7ULL,
- 0xa3703f7915f4d6aaULL, 0x66c346202f2647d8ULL, 0xd01469df811d644bULL, 0x77fea47d81a5d71fULL,
- 0xc5e9529ef57ca381ULL, 0x6eeeb4b9ce2f881aULL, 0xb6e91a28e8009bd6ULL, 0x4b80be3e9afc3fecULL,
- 0x7e3773c526aed2c5ULL, 0x1b4afcb453c9a49dULL, 0xa920bdd7baffb24dULL, 0x7c54699f122d400eULL,
- 0xef46c8e14fa94bc8ULL, 0xe0b074ce2952ed5eULL, 0xbea450e1dbd885d5ULL, 0x61b68649320f712cULL,
- 0x8a485f7309ccbdd1ULL, 0xbd06320d7d4d1a2dULL, 0x25232973322dbef4ULL, 0x445dc4758c17f770ULL,
- 0xdb0434177cc8933cULL, 0xed6fe82175ea059fULL, 0x1efebefdc053db34ULL, 0x4adbe867c65daf99ULL,
- 0x3acd71a2a90609dfULL, 0xe5e991856dd04050ULL, 0x1ec69b688157c23cULL, 0x697427f6885cfe4dULL,
- 0xd7be7b9b65e1a851ULL, 0xa03d28d522c536ddULL, 0x28399d658fd2b645ULL, 0x49e5b7e17c2641e1ULL,
- 0x6f8c3a98700457a4ULL, 0x5078f0a25ebb6778ULL, 0xd13c3ccbc382960fULL, 0x2e003258a7df84b1ULL,
- 0x8ad1f39be6296a1cULL, 0xc1eeaa652a5fbfb2ULL, 0x33ee0673fd26f3cbULL, 0x59256173a69d2cccULL,
- 0x41ea07aa4e18fc41ULL, 0xd9fc19527c87a51eULL, 0xbdaacb805831ca6fULL, 0x445b652dc916694fULL,
- 0xce92a3a7f2172315ULL, 0x1edc282de11b9964ULL, 0xa1823aafe04c314aULL, 0x790a2d94437cf586ULL,
- 0x71c447fb93f6e009ULL, 0x8922a56722845276ULL, 0xbf70903b204f5169ULL, 0x2f7a89891ba319feULL,
- 0x02a08eb577e2140cULL, 0xed9a4ed4427bdcf4ULL, 0x5253ec44e4323cd1ULL, 0x3e88363c14e9355bULL,
- 0xaa66c14277110b8cULL, 0x1ae0391610a23390ULL, 0x2030bd12c93fc2a2ULL, 0x3ee141579555c7abULL,
- 0x9214de3a6d6e7d41ULL, 0x3ccdd88607f17efeULL, 0x674f1288f8e11217ULL, 0x5682250f329f93d0ULL,
- 0x6cf00b136d2e396eULL, 0x6e4cf86f1014debfULL, 0x5930b1b5bfcc4e83ULL, 0x047069b48aba16b6ULL,
- 0x0d4ce4ab69b20793ULL, 0xb24db91a97d0fb9eULL, 0xcdfa50f54e00d01dULL, 0x221b1085368bddb5ULL,
- 0xe7e59468b1e3d8d2ULL, 0x53c56563bd122f93ULL, 0xeee8a903e0663f09ULL, 0x61efa662cbbe3d42ULL,
- 0x2cf8ddddde6eab2aULL, 0x9bf80ad51435f231ULL, 0x5deadacec9f04973ULL, 0x29275b5d41d29b27ULL,
- 0xcfde0f0895ebf14fULL, 0xb9aab96b054905a7ULL, 0xcae80dd9a1c420fdULL, 0x0a63bf2f1673bbc7ULL,
- 0x092f6e11958fbc8cULL, 0x672a81e804822fadULL, 0xcac8351560d52517ULL, 0x6f3f7722c8f192f8ULL,
- 0xf8ba90ccc2e894b7ULL, 0x2c7557a438ff9f0dULL, 0x894d1d855ae52359ULL, 0x68e122157b743d69ULL,
- 0xd87e5570cfb919f3ULL, 0x3f2cdecd95798db9ULL, 0x2121154710c0a2ceULL, 0x3c66a115246dc5b2ULL,
- 0xcbedc562294ecb72ULL, 0xba7143c36a280b16ULL, 0x9610c2efd4078b67ULL, 0x6144735d946a4b1eULL,
- 0x536f111ed75b3350ULL, 0x0211db8c2041d81bULL, 0xf93cb1000e10413cULL, 0x149dfd3c039e8876ULL,
- 0xd479dde46b63155bULL, 0xb66e15e93c837976ULL, 0xdafde43b1f13e038ULL, 0x5fafda1a2e4b0b35ULL,
- 0x3600bbdf17197581ULL, 0x3972050bbe3cd2c2ULL, 0x5938906dbdd5be86ULL, 0x34fce5e43f9b860fULL,
- 0x75a8a4cd42d14d02ULL, 0x828dabc53441df65ULL, 0x33dcabedd2e131d3ULL, 0x3ebad76fb814d25fULL,
- 0xd4906f566f70e10fULL, 0x5d12f7aa51690f5aULL, 0x45adb16e76cefcf2ULL, 0x01f768aead232999ULL,
- 0x2b6cc77b6248febdULL, 0x3cd30628ec3aaffdULL, 0xce1c0b80d4ef486aULL, 0x4c3bff2ea6f66c23ULL,
- 0x3f2ec4094aeaeb5fULL, 0x61b19b286e372ca7ULL, 0x5eefa966de2a701dULL, 0x23b20565de55e3efULL,
- 0xe301ca5279d58557ULL, 0x07b2d4ce27c2874fULL, 0xa532cd8a9dcf1d67ULL, 0x2a52fee23f2bff56ULL,
- 0x8624efb37cd8663dULL, 0xbbc7ac20ffbd7594ULL, 0x57b85e9c82d37445ULL, 0x7b3052cb86a6ec66ULL,
- 0x3482f0ad2525e91eULL, 0x2cb68043d28edca0ULL, 0xaf4f6d052e1b003aULL, 0x185f8c2529781b0aULL,
- 0xaa41de5bd80ce0d6ULL, 0x9407b2416853e9d6ULL, 0x563ec36e357f4c3aULL, 0x4cc4b8dd0e297bceULL,
- 0xa2fc1a52ffb8730eULL, 0x1811f16e67058e37ULL, 0x10f9a366cddf4ee1ULL, 0x72f4a0c4a0b9f099ULL,
- 0x8c16c06f663f4ea7ULL, 0x693b3af74e970fbaULL, 0x2102e7f1d69ec345ULL, 0x0ba53cbc968a8089ULL,
- 0xca3d9dc7fea15537ULL, 0x4c6824bb51536493ULL, 0xb9886314844006b1ULL, 0x40d2a72ab454cc60ULL,
- 0x5936a1b712570975ULL, 0x91b9d648debda657ULL, 0x3344094bb64330eaULL, 0x006ba10d12ee51d0ULL,
- 0x19228468f5de5d58ULL, 0x0eb12f4c38cc05b0ULL, 0xa1039f9dd5601990ULL, 0x4502d4ce4fff0e0bULL,
- 0xeb2054106837c189ULL, 0xd0f6544c6dd3b93cULL, 0x40727064c416d74fULL, 0x6e15c6114b502ef0ULL,
- 0x4df2a398cfb1a76bULL, 0x11256c7419f2f6b1ULL, 0x4a497962066e6043ULL, 0x705b3aab41355b44ULL,
- 0x365ef536d797b1d8ULL, 0x00076bd622ddf0dbULL, 0x3bbf33b0e0575a88ULL, 0x3777aa05c8e4ca4dULL,
- 0x392745c85578db5fULL, 0x6fda4149dbae5ae2ULL, 0xb1f0b00b8adc9867ULL, 0x09963437d36f1da3ULL,
- 0x7e824e90a5dc3853ULL, 0xccb5f6641f135cbdULL, 0x6736d86c87ce8fccULL, 0x625f3ce26604249fULL,
- 0xaf8ac8059502f63fULL, 0x0c05e70a2e351469ULL, 0x35292e9c764b6305ULL, 0x1a394360c7e23ac3ULL,
- 0xd5c6d53251183264ULL, 0x62065abd43c2b74fULL, 0xb5fbf5d03b973f9bULL, 0x13a3da3661206e5eULL,
- 0xc6bd5837725d94e5ULL, 0x18e30912205016c5ULL, 0x2088ce1570033c68ULL, 0x7fba1f495c837987ULL,
- 0x5a8c7423f2f9079dULL, 0x1735157b34023fc5ULL, 0xe4f9b49ad2fab351ULL, 0x6691ff72c878e33cULL,
- 0x122c2adedc5eff3eULL, 0xf8dd4bf1d8956cf4ULL, 0xeb86205d9e9e5bdaULL, 0x049b92b9d975c743ULL,
- 0xa5379730b0f6c05aULL, 0x72a0ffacc6f3a553ULL, 0xb0032c34b20dcd6dULL, 0x470e9dbc88d5164aULL,
- 0xb19cf10ca237c047ULL, 0xb65466711f6c81a2ULL, 0xb3321bd16dd80b43ULL, 0x48c14f600c5fbe8eULL,
- 0x66451c264aa6c803ULL, 0xb66e3904a4fa7da6ULL, 0xd45f19b0b3128395ULL, 0x31602627c3c9bc10ULL,
- 0x3120dc4832e4e10dULL, 0xeb20c46756c717f7ULL, 0x00f52e3f67280294ULL, 0x566d4fc14730c509ULL,
- 0x7e3a5d40fd837206ULL, 0xc1e926dc7159547aULL, 0x216730fba68d6095ULL, 0x22e8c3843f69cea7ULL,
- 0x33d074e8930e4b2bULL, 0xb6e4350e84d15816ULL, 0x5534c26ad6ba2365ULL, 0x7773c12f89f1f3f3ULL,
- 0x8cba404da57962aaULL, 0x5b9897a81999ce56ULL, 0x508e862f121692fcULL, 0x3a81907fa093c291ULL,
- 0x0dded0ff4725a510ULL, 0x10d8cc10673fc503ULL, 0x5b9d151c9f1f4e89ULL, 0x32a5c1d5cb09a44cULL,
- 0x1e0aa442b90541fbULL, 0x5f85eb7cc1b485dbULL, 0xbee595ce8a9df2e5ULL, 0x25e496c722422236ULL,
- 0x5edf3c46cd0fe5b9ULL, 0x34e75a7ed2a43388ULL, 0xe488de11d761e352ULL, 0x0e878a01a085545cULL,
- 0xba493c77e021bb04ULL, 0x2b4d1843c7df899aULL, 0x9ea37a487ae80d67ULL, 0x67a9958011e41794ULL,
- 0x4b58051a6697b065ULL, 0x47e33f7d8d6ba6d4ULL, 0xbb4da8d483ca46c1ULL, 0x68becaa181c2db0dULL,
- 0x8d8980e90b989aa5ULL, 0xf95eb14a2c93c99bULL, 0x51c6c7c4796e73a2ULL, 0x6e228363b5efb569ULL,
- 0xc6bbc0b02dd624c8ULL, 0x777eb47dec8170eeULL, 0x3cde15a004cfafa9ULL, 0x1dc6bc087160bf9bULL,
- 0x2e07e043eec34002ULL, 0x18e9fc677a68dc7fULL, 0xd8da03188bd15b9aULL, 0x48fbc3bb00568253ULL,
- 0x57547d4cfb654ce1ULL, 0xd3565b82a058e2adULL, 0xf63eaf0bbf154478ULL, 0x47531ef114dfbb18ULL,
- 0xe1ec630a4278c587ULL, 0x5507d546ca8e83f3ULL, 0x85e135c63adc0c2bULL, 0x0aa7efa85682844eULL,
- 0x72691ba8b3e1f615ULL, 0x32b4e9701fbe3ffaULL, 0x97b6d92e39bb7868ULL, 0x2cfe53dea02e39e8ULL,
- 0x687392cd85cd52b0ULL, 0x27ff66c910e29831ULL, 0x97134556a9832d06ULL, 0x269bb0360a84f8a0ULL,
- 0x706e55457643f85cULL, 0x3734a48c9b597d1bULL, 0x7aee91e8c6efa472ULL, 0x5cd6abc198a9d9e0ULL,
- 0x0e04de06cb3ce41aULL, 0xd8c6eb893402e138ULL, 0x904659bb686e3772ULL, 0x7215c371746ba8c8ULL,
- 0xfd12a97eeae4a2d9ULL, 0x9514b7516394f2c5ULL, 0x266fd5809208f294ULL, 0x5c847085619a26b9ULL,
- 0x52985410fed694eaULL, 0x3c905b934a2ed254ULL, 0x10bb47692d3be467ULL, 0x063b3d2d69e5e9e1ULL,
- 0x472726eedda57debULL, 0xefb6c4ae10f41891ULL, 0x2b1641917b307614ULL, 0x117c554fc4f45b7cULL,
- 0xc07cf3118f9d8812ULL, 0x01dbd82050017939ULL, 0xd7e803f4171b2827ULL, 0x1015e87487d225eaULL,
- 0xc58de3fed23acc4dULL, 0x50db91c294a7be2dULL, 0x0b94d43d1c9cf457ULL, 0x6b1640fa6e37524aULL,
- 0x692f346c5fda0d09ULL, 0x200b1c59fa4d3151ULL, 0xb8c46f760777a296ULL, 0x4b38395f3ffdfbcfULL,
- 0x18d25e00be54d671ULL, 0x60d50582bec8aba6ULL, 0x87ad8f263b78b982ULL, 0x50fdf64e9cda0432ULL,
- 0x90f567aac578dcf0ULL, 0xef1e9b0ef2a3133bULL, 0x0eebba9242d9de71ULL, 0x15473c9bf03101c7ULL,
- 0x7c77e8ae56b78095ULL, 0xb678e7666e6f078eULL, 0x2da0b9615348ba1fULL, 0x7cf931c1ff733f0bULL,
- 0x26b357f50a0a366cULL, 0xe9708cf42b87d732ULL, 0xc13aeea5f91cb2c0ULL, 0x35d90c991143bb4cULL,
- 0x47c1c404a9a0d9dcULL, 0x659e58451972d251ULL, 0x3875a8c473b38c31ULL, 0x1fbd9ed379561f24ULL,
- 0x11fabc6fd41ec28dULL, 0x7ef8dfe3cd2a2dcaULL, 0x72e73b5d8c404595ULL, 0x6135fa4954b72f27ULL,
- 0xccfc32a2de24b69cULL, 0x3f55698c1f095d88ULL, 0xbe3350ed5ac3f929ULL, 0x5e9bf806ca477eebULL,
- 0xe9ce8fb63c309f68ULL, 0x5376f63565e1f9f4ULL, 0xd1afcfb35a6393f1ULL, 0x6632a1ede5623506ULL,
- 0x0b7d6c390c2ded4cULL, 0x56cb3281df04cb1fULL, 0x66305a1249ecc3c7ULL, 0x5d588b60a38ca72aULL,
- 0xa6ecbf78e8e5f42dULL, 0x86eeb44b3c8a3eecULL, 0xec219c48fbd21604ULL, 0x1aaf1af517c36731ULL,
- 0xc306a2836769bde7ULL, 0x208280622b1e2adbULL, 0x8027f51ffbff94a6ULL, 0x76cfa1ce1124f26bULL,
- 0x18eb00562422abb6ULL, 0xf377c4d58f8c29c3ULL, 0x4dbbc207f531561aULL, 0x0253b7f082128a27ULL,
- 0x3d1f091cb62c17e0ULL, 0x4860e1abd64628a9ULL, 0x52d17436309d4253ULL, 0x356f97e13efae576ULL,
- 0xd351e11aa150535bULL, 0x3e6b45bb1dd878ccULL, 0x0c776128bed92c98ULL, 0x1d34ae93032885b8ULL,
- 0x4ba0488ca85ba4c3ULL, 0x985348c33c9ce6ceULL, 0x66124c6f97bda770ULL, 0x0f81a0290654124aULL,
- 0x9ed09ca6569b86fdULL, 0x811009fd18af9a2dULL, 0xff08d03f93d8c20aULL, 0x52a148199faef26bULL,
- 0x3e03f9dc2d8d1b73ULL, 0x4205801873961a70ULL, 0xc0d987f041a35970ULL, 0x07aa1f15a1c0d549ULL,
- 0xdfd46ce08cd27224ULL, 0x6d0a024f934e4239ULL, 0x808a7a6399897b59ULL, 0x0a4556e9e13d95a2ULL,
- 0xd21a991fe9c13045ULL, 0x9b0e8548fe7751b8ULL, 0x5da643cb4bf30035ULL, 0x77db28d63940f721ULL,
- 0xfc5eeb614adc9011ULL, 0x5229419ae8c411ebULL, 0x9ec3e7787d1dcf74ULL, 0x340d053e216e4cb5ULL,
- 0xcac7af39b48df2b4ULL, 0xc0faec2871a10a94ULL, 0x140a69245ca575edULL, 0x0cf1c37134273a4cULL,
- 0xc8ee306ac224b8a5ULL, 0x57eaee7ccb4930b0ULL, 0xa1e806bdaacbe74fULL, 0x7d9a62742eeb657dULL,
- 0x9eb6b6ef546c4830ULL, 0x885cca1fddb36e2eULL, 0xe6b9f383ef0d7105ULL, 0x58654fef9d2e0412ULL,
- 0xa905c4ffbe0e8e26ULL, 0x942de5df9b31816eULL, 0x497d723f802e88e1ULL, 0x30684dea602f408dULL,
- 0x21e5a278a3e6cb34ULL, 0xaefb6e6f5b151dc4ULL, 0xb30b8e049d77ca15ULL, 0x28c3c9cf53b98981ULL,
- 0x287fb721556cdd2aULL, 0x0d317ca897022274ULL, 0x7468c7423a543258ULL, 0x4a7f11464eb5642fULL,
- 0xa237a4774d193aa6ULL, 0xd865986ea92129a1ULL, 0x24c515ecf87c1a88ULL, 0x604003575f39f5ebULL,
- 0x47b9f189570a9b27ULL, 0x2b98cede465e4b78ULL, 0x026df551dbb85c20ULL, 0x74fcd91047e21901ULL,
- 0x13e2a90a23c1bfa3ULL, 0x0cb0074e478519f6ULL, 0x5ff1cbbe3af6cf44ULL, 0x67fe5438be812dbeULL,
- 0xd13cf64fa40f05b0ULL, 0x054dfb2f32283787ULL, 0x4173915b7f0d2aeaULL, 0x482f144f1f610d4eULL,
- 0xf6210201b47f8234ULL, 0x5d0ae1929e70b990ULL, 0xdcd7f455b049567cULL, 0x7e93d0f1f0916f01ULL,
- 0xdd79cbf18a7db4faULL, 0xbe8391bf6f74c62fULL, 0x027145d14b8291bdULL, 0x585a73ea2cbf1705ULL,
- 0x485ca03e928a0db2ULL, 0x10fc01a5742857e7ULL, 0x2f482edbd6d551a7ULL, 0x0f0433b5048fdb8aULL,
- 0x60da2e8dd7dc6247ULL, 0x88b4c9d38cd4819aULL, 0x13033ac001f66697ULL, 0x273b24fe3b367d75ULL,
- 0xc6e8f66a31b3b9d4ULL, 0x281514a494df49d5ULL, 0xd1726fdfc8b23da7ULL, 0x4b3ae7d103dee548ULL,
- 0xc6256e19ce4b9d7eULL, 0xff5c5cf186e3c61cULL, 0xacc63ca34b8ec145ULL, 0x74621888fee66574ULL,
- 0x956f409645290a1eULL, 0xef0bf8e3263a962eULL, 0xed6a50eb5ec2647bULL, 0x0694283a9dca7502ULL,
- 0x769b963643a2dcd1ULL, 0x42b7c8ea09fc5353ULL, 0x4f002aee13397eabULL, 0x63005e2c19b7d63aULL,
- 0xca6736da63023beaULL, 0x966c7f6db12a99b7ULL, 0xace09390c537c5e1ULL, 0x0b696063a1aa89eeULL,
- 0xebb03e97288c56e5ULL, 0x432a9f9f938c8be8ULL, 0xa6a5a93d5b717f71ULL, 0x1a5fb4c3e18f9d97ULL,
- 0x1c94e7ad1c60cdceULL, 0xee202a43fc02c4a0ULL, 0x8dafe4d867c46a20ULL, 0x0a10263c8ac27b58ULL,
- 0xd0dea9dfe4432a4aULL, 0x856af87bbe9277c5ULL, 0xce8472acc212c71aULL, 0x6f151b6d9bbb1e91ULL,
- 0x26776c527ceed56aULL, 0x7d211cb7fbf8faecULL, 0x37ae66a6fd4609ccULL, 0x1f81b702d2770c42ULL,
- 0x2fb0b057eac58392ULL, 0xe1dd89fe29744e9dULL, 0xc964f8eb17beb4f8ULL, 0x29571073c9a2d41eULL,
- 0xa948a18981c0e254ULL, 0x2df6369b65b22830ULL, 0xa33eb2d75fcfd3c6ULL, 0x078cd6ec4199a01fULL,
- 0x4a584a41ad900d2fULL, 0x32142b78e2c74c52ULL, 0x68c4e8338431c978ULL, 0x7f69ea9008689fc2ULL,
- 0x52f2c81e46a38265ULL, 0xfd78072d04a832fdULL, 0x8cd7d5fa25359e94ULL, 0x4de71b7454cc29d2ULL,
- 0x42eb60ad1eda6ac9ULL, 0x0aad37dfdbc09c3aULL, 0x81004b71e33cc191ULL, 0x44e6be345122803cULL,
- 0x03fe8388ba1920dbULL, 0xf5d57c32150db008ULL, 0x49c8c4281af60c29ULL, 0x21edb518de701aeeULL,
- 0x7fb63e418f06dc99ULL, 0xa4460d99c166d7b8ULL, 0x24dd5248ce520a83ULL, 0x5ec3ad712b928358ULL,
- 0x15022a5fbd17930fULL, 0xa4f64a77d82570e3ULL, 0x12bc8d6915783712ULL, 0x498194c0fc620abbULL,
- 0x38a2d9d255686c82ULL, 0x785c6bd9193e21f0ULL, 0xe4d5c81ab24a5484ULL, 0x56307860b2e20989ULL,
- 0x429d55f78b4d74c4ULL, 0x22f1834643350131ULL, 0x1e60c24598c71fffULL, 0x59f2f014979983efULL,
- 0x46a47d56eb494a44ULL, 0x3e22a854d636a18eULL, 0xb346e15274491c3bULL, 0x2ceafd4e5390cde7ULL,
- 0xba8a8538be0d6675ULL, 0x4b9074bb50818e23ULL, 0xcbdab89085d304c3ULL, 0x61a24fe0e56192c4ULL,
- 0xcb7615e6db525bcbULL, 0xdd7d8c35a567e4caULL, 0xe6b4153acafcdd69ULL, 0x2d668e097f3c9766ULL,
- 0xa57e7e265ce55ef0ULL, 0x5d9f4e527cd4b967ULL, 0xfbc83606492fd1e5ULL, 0x090d52beb7c3f7aeULL,
- 0x09b9515a1e7b4d7cULL, 0x1f266a2599da44c0ULL, 0xa1c49548e2c55504ULL, 0x7ef04287126f15ccULL,
- 0xfed1659dbd30ef15ULL, 0x8b4ab9eec4e0277bULL, 0x884d6236a5df3291ULL, 0x1fd96ea6bf5cf788ULL,
- 0x42a161981f190d9aULL, 0x61d849507e6052c1ULL, 0x9fe113bf285a2cd5ULL, 0x7c22d676dbad85d8ULL,
- 0x82e770ed2bfbd27dULL, 0x4c05b2ece996f5a5ULL, 0xcd40a9c2b0900150ULL, 0x5895319213d9bf64ULL,
- 0xe7cc5d703fea2e08ULL, 0xb50c491258e2188cULL, 0xcce30baa48205bf0ULL, 0x537c659ccfa32d62ULL,
- 0x37b6623a98cfc088ULL, 0xfe9bed1fa4d6aca4ULL, 0x04d29b8e56a8d1b0ULL, 0x725f71c40b519575ULL,
- 0x28c7f89cd0339ce6ULL, 0x8367b14469ddc18bULL, 0x883ada83a6a1652cULL, 0x585f1974034d6c17ULL,
- 0x89cfb266f1b19188ULL, 0xe63b4863e7c35217ULL, 0xd88c9da6b4c0526aULL, 0x3e035c9df0954635ULL,
- 0xdd9d5412fb45de9dULL, 0xdd684532e4cff40dULL, 0x4b5c999b151d671cULL, 0x2d8c2cc811e7f690ULL,
- 0x7f54be1d90055d40ULL, 0xa464c5df464aaf40ULL, 0x33979624f0e917beULL, 0x2c018dc527356b30ULL,
- 0xa5415024e330b3d4ULL, 0x73ff3d96691652d3ULL, 0x94ec42c4ef9b59f1ULL, 0x0747201618d08e5aULL,
- 0x4d6ca48aca411c53ULL, 0x66415f2fcfa66119ULL, 0x9c4dd40051e227ffULL, 0x59810bc09a02f7ebULL,
- 0x2a7eb171b3dc101dULL, 0x441c5ab99ffef68eULL, 0x32025c9b93b359eaULL, 0x5e8ce0a71e9d112fULL,
- 0xbfcccb92429503fdULL, 0xd271ba752f095d55ULL, 0x345ead5e972d091eULL, 0x18c8df11a83103baULL,
- 0x90cd949a9aed0f4cULL, 0xc5d1f4cb6660e37eULL, 0xb8cac52d56c52e0bULL, 0x6e42e400c5808e0dULL,
- 0xa3b46966eeaefd23ULL, 0x0c4f1f0be39ecdcaULL, 0x189dc8c9d683a51dULL, 0x51f27f054c09351bULL,
- 0x4c487ccd2a320682ULL, 0x587ea95bb3df1c96ULL, 0xc8ccf79e555cb8e8ULL, 0x547dc829a206d73dULL,
- 0xb822a6cd80c39b06ULL, 0xe96d54732000d4c6ULL, 0x28535b6f91463b4dULL, 0x228f4660e2486e1dULL,
- 0x98799538de8d3abfULL, 0x8cd8330045ebca6eULL, 0x79952a008221e738ULL, 0x4322e1a7535cd2bbULL,
- 0xb114c11819d1801cULL, 0x2016e4d84f3f5ec7ULL, 0xdd0e2df409260f4cULL, 0x5ec362c0ae5f7266ULL,
- 0xc0462b18b8b2b4eeULL, 0x7cc8d950274d1afbULL, 0xf25f7105436b02d2ULL, 0x43bbf8dcbff9ccd3ULL,
- 0xb6ad1767a039e9dfULL, 0xb0714da8f69d3583ULL, 0x5e55fa18b42931f5ULL, 0x4ed5558f33c60961ULL,
- 0x1fe37901c647a5ddULL, 0x593ddf1f8081d357ULL, 0x0249a4fd813fd7a6ULL, 0x69acca274e9caf61ULL,
- 0x047ba3ea330721c9ULL, 0x83423fc20e7e1ea0ULL, 0x1df4c0af01314a60ULL, 0x09a62dab89289527ULL,
- 0xa5b325a49cc6cb00ULL, 0xe94b5dc654b56cb6ULL, 0x3be28779adc994a0ULL, 0x4296e8f8ba3a4aadULL,
- 0x328689761e451eabULL, 0x2e4d598bff59594aULL, 0x49b96853d7a7084aULL, 0x4980a319601420a8ULL,
- 0x9565b9e12f552c42ULL, 0x8a5318db7100fe96ULL, 0x05c90b4d43add0d7ULL, 0x538b4cd66a5d4edaULL,
- 0xf4e94fc3e89f039fULL, 0x592c9af26f618045ULL, 0x08a36eb5fd4b9550ULL, 0x25fffaf6c2ed1419ULL,
- 0x34434459cc79d354ULL, 0xeeecbfb4b1d5476bULL, 0xddeb34a061615d99ULL, 0x5129cecceb64b773ULL,
- 0xee43215894993520ULL, 0x772f9c7cf14c0b3bULL, 0xd2e2fce306bedad5ULL, 0x715f42b546f06a97ULL,
- 0x434ecdceda5b5f1aULL, 0x0da17115a49741a9ULL, 0x680bd77c73edad2eULL, 0x487c02354edd9041ULL,
- 0xb8efeff3a70ed9c4ULL, 0x56a32aa3e857e302ULL, 0xdf3a68bd48a2a5a0ULL, 0x07f650b73176c444ULL,
- 0xe38b9b1626e0ccb1ULL, 0x79e053c18b09fb36ULL, 0x56d90319c9f94964ULL, 0x1ca941e7ac9ff5c4ULL,
- 0x49c4df29162fa0bbULL, 0x8488cf3282b33305ULL, 0x95dfda14cabb437dULL, 0x3391f78264d5ad86ULL,
- 0x729ae06ae2b5095dULL, 0xd58a58d73259a946ULL, 0xe9834262d13921edULL, 0x27fedafaa54bb592ULL,
- 0xa99dc5b829ad48bbULL, 0x5f025742499ee260ULL, 0x802c8ecd5d7513fdULL, 0x78ceb3ef3f6dd938ULL,
- 0xc342f44f8a135d94ULL, 0x7b9edb44828cdda3ULL, 0x9436d11a0537cfe7ULL, 0x5064b164ec1ab4c8ULL,
- 0x7020eccfd37eb2fcULL, 0x1f31ea3ed90d25fcULL, 0x1b930d7bdfa1bb34ULL, 0x5344467a48113044ULL,
- 0x70073170f25e6dfbULL, 0xe385dc1a50114cc8ULL, 0x2348698ac8fc4f00ULL, 0x2a77a55284dd40d8ULL,
- 0xfe06afe0c98c6ce4ULL, 0xc235df96dddfd6e4ULL, 0x1428d01e33bf1ed3ULL, 0x785768ec9300bdafULL,
- 0x9702e57a91deb63bULL, 0x61bdb8bfe5ce8b80ULL, 0x645b426f3d1d58acULL, 0x4804a82227a557bcULL,
- 0x8e57048ab44d2601ULL, 0x68d6501a4b3a6935ULL, 0xc39c9ec3f9e1c293ULL, 0x4172f257d4de63e2ULL,
- 0xd368b450330c6401ULL, 0x040d3017418f2391ULL, 0x2c34bb6090b7d90dULL, 0x16f649228fdfd51fULL,
- 0xbea6818e2b928ef5ULL, 0xe28ccf91cdc11e72ULL, 0x594aaa68e77a36cdULL, 0x313034806c7ffd0fULL,
- 0x8a9d27ac2249bd65ULL, 0x19a3b464018e9512ULL, 0xc26ccff352b37ec7ULL, 0x056f68341d797b21ULL,
- 0x5e79d6757efd2327ULL, 0xfabdbcb6553afe15ULL, 0xd3e7222c6eaf5a60ULL, 0x7046c76d4dae743bULL,
- 0x660be872b18d4a55ULL, 0x19992518574e1496ULL, 0xc103053a302bdcbbULL, 0x3ed8e9800b218e8eULL,
- 0x7b0b9239fa75e03eULL, 0xefe9fb684633c083ULL, 0x98a35fbe391a7793ULL, 0x6065510fe2d0fe34ULL,
- 0x55cb668548abad0cULL, 0xb4584548da87e527ULL, 0x2c43ecea0107c1ddULL, 0x526028809372de35ULL,
- 0x3415c56af9213b1fULL, 0x5bee1a4d017e98dbULL, 0x13f6b105b5cf709bULL, 0x5ff20e3482b29ab6ULL,
- 0x0aa29c75cc2e6c90ULL, 0xfc7d73ca3a70e206ULL, 0x899fc38fc4b5c515ULL, 0x250386b124ffc207ULL,
- 0x54ea28d5ae3d2b56ULL, 0x9913149dd6de60ceULL, 0x16694fc58f06d6c1ULL, 0x46b23975eb018fc7ULL,
- 0x470a6a0fb4b7b4e2ULL, 0x5d92475a8f7253deULL, 0xabeee5b52fbd3adbULL, 0x7fa20801a0806968ULL,
- 0x76f3faf19f7714d2ULL, 0xb3e840c12f4660c3ULL, 0x0fb4cd8df212744eULL, 0x4b065a251d3a2dd2ULL,
- 0x5cebde383d77cd4aULL, 0x6adf39df882c9cb1ULL, 0xa2dd242eb09af759ULL, 0x3147c0e50e5f6422ULL,
- 0x164ca5101d1350dbULL, 0xf8d13479c33fc962ULL, 0xe640ce4d13e5da08ULL, 0x4bdee0c45061f8baULL,
- 0xd7c46dc1a4edb1c9ULL, 0x5514d7b6437fd98aULL, 0x58942f6bb2a1c00bULL, 0x2dffb2ab1d70710eULL,
- 0xccdfcf2fc18b6d68ULL, 0xa8ebcba8b7806167ULL, 0x980697f95e2937e3ULL, 0x02fbba1cd0126e8cULL
-};
-
-static void curve25519_ever64_base(u8 *out, const u8 *priv)
-{
- u64 swap = 1;
- int i, j, k;
- u64 tmp[16 + 32 + 4];
- u64 *x1 = &tmp[0];
- u64 *z1 = &tmp[4];
- u64 *x2 = &tmp[8];
- u64 *z2 = &tmp[12];
- u64 *xz1 = &tmp[0];
- u64 *xz2 = &tmp[8];
- u64 *a = &tmp[0 + 16];
- u64 *b = &tmp[4 + 16];
- u64 *c = &tmp[8 + 16];
- u64 *ab = &tmp[0 + 16];
- u64 *abcd = &tmp[0 + 16];
- u64 *ef = &tmp[16 + 16];
- u64 *efgh = &tmp[16 + 16];
- u64 *key = &tmp[0 + 16 + 32];
-
- memcpy(key, priv, 32);
- ((u8 *)key)[0] &= 248;
- ((u8 *)key)[31] = (((u8 *)key)[31] & 127) | 64;
-
- x1[0] = 1, x1[1] = x1[2] = x1[3] = 0;
- z1[0] = 1, z1[1] = z1[2] = z1[3] = 0;
- z2[0] = 1, z2[1] = z2[2] = z2[3] = 0;
- memcpy(x2, p_minus_s, sizeof(p_minus_s));
-
- j = 3;
- for (i = 0; i < 4; ++i) {
- while (j < (const int[]){ 64, 64, 64, 63 }[i]) {
- u64 bit = (key[i] >> j) & 1;
- k = (64 * i + j - 3);
- swap = swap ^ bit;
- cswap2(swap, xz1, xz2);
- swap = bit;
- fsub(b, x1, z1);
- fadd(a, x1, z1);
- fmul(c, &table_ladder[4 * k], b, ef);
- fsub(b, a, c);
- fadd(a, a, c);
- fsqr2(ab, ab, efgh);
- fmul2(xz1, xz2, ab, efgh);
- ++j;
- }
- j = 0;
- }
-
- point_double(xz1, abcd, efgh);
- point_double(xz1, abcd, efgh);
- point_double(xz1, abcd, efgh);
- encode_point(out, xz1);
-
- memzero_explicit(tmp, sizeof(tmp));
-}
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2_adx);
-
-void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE],
- const u8 secret[CURVE25519_KEY_SIZE],
- const u8 basepoint[CURVE25519_KEY_SIZE])
-{
- if (static_branch_likely(&curve25519_use_bmi2_adx))
- curve25519_ever64(mypublic, secret, basepoint);
- else
- curve25519_generic(mypublic, secret, basepoint);
-}
-EXPORT_SYMBOL(curve25519_arch);
-
-void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
- const u8 secret[CURVE25519_KEY_SIZE])
-{
- if (static_branch_likely(&curve25519_use_bmi2_adx))
- curve25519_ever64_base(pub, secret);
- else
- curve25519_generic(pub, secret, curve25519_base_point);
-}
-EXPORT_SYMBOL(curve25519_base_arch);
-
-static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
- unsigned int len)
-{
- u8 *secret = kpp_tfm_ctx(tfm);
-
- if (!len)
- curve25519_generate_secret(secret);
- else if (len == CURVE25519_KEY_SIZE &&
- crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE))
- memcpy(secret, buf, CURVE25519_KEY_SIZE);
- else
- return -EINVAL;
- return 0;
-}
-
-static int curve25519_generate_public_key(struct kpp_request *req)
-{
- struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
- const u8 *secret = kpp_tfm_ctx(tfm);
- u8 buf[CURVE25519_KEY_SIZE];
- int copied, nbytes;
-
- if (req->src)
- return -EINVAL;
-
- curve25519_base_arch(buf, secret);
-
- /* might want less than we've got */
- nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
- copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
- nbytes),
- buf, nbytes);
- if (copied != nbytes)
- return -EINVAL;
- return 0;
-}
-
-static int curve25519_compute_shared_secret(struct kpp_request *req)
-{
- struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
- const u8 *secret = kpp_tfm_ctx(tfm);
- u8 public_key[CURVE25519_KEY_SIZE];
- u8 buf[CURVE25519_KEY_SIZE];
- int copied, nbytes;
-
- if (!req->src)
- return -EINVAL;
-
- copied = sg_copy_to_buffer(req->src,
- sg_nents_for_len(req->src,
- CURVE25519_KEY_SIZE),
- public_key, CURVE25519_KEY_SIZE);
- if (copied != CURVE25519_KEY_SIZE)
- return -EINVAL;
-
- curve25519_arch(buf, secret, public_key);
-
- /* might want less than we've got */
- nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
- copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
- nbytes),
- buf, nbytes);
- if (copied != nbytes)
- return -EINVAL;
- return 0;
-}
-
-static unsigned int curve25519_max_size(struct crypto_kpp *tfm)
-{
- return CURVE25519_KEY_SIZE;
-}
-
-static struct kpp_alg curve25519_alg = {
- .base.cra_name = "curve25519",
- .base.cra_driver_name = "curve25519-x86",
- .base.cra_priority = 200,
- .base.cra_module = THIS_MODULE,
- .base.cra_ctxsize = CURVE25519_KEY_SIZE,
-
- .set_secret = curve25519_set_secret,
- .generate_public_key = curve25519_generate_public_key,
- .compute_shared_secret = curve25519_compute_shared_secret,
- .max_size = curve25519_max_size,
-};
-
-
-static int __init curve25519_mod_init(void)
-{
- if (boot_cpu_has(X86_FEATURE_BMI2) && boot_cpu_has(X86_FEATURE_ADX))
- static_branch_enable(&curve25519_use_bmi2_adx);
- else
- return 0;
- return IS_REACHABLE(CONFIG_CRYPTO_KPP) ?
- crypto_register_kpp(&curve25519_alg) : 0;
-}
-
-static void __exit curve25519_mod_exit(void)
-{
- if (IS_REACHABLE(CONFIG_CRYPTO_KPP) &&
- static_branch_likely(&curve25519_use_bmi2_adx))
- crypto_unregister_kpp(&curve25519_alg);
-}
-
-module_init(curve25519_mod_init);
-module_exit(curve25519_mod_exit);
-
-MODULE_ALIAS_CRYPTO("curve25519");
-MODULE_ALIAS_CRYPTO("curve25519-x86");
-MODULE_DESCRIPTION("Curve25519 algorithm, ADX optimized");
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");