aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-07-28 17:43:29 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-07-28 17:43:29 -0700
commita578dd095dfe8b56c167201d9aea43e47d27f807 (patch)
tree6d4ee6e286b92ebad6d10572af74eb15fd31973f /arch
parent8e736a2eeaf261213b4557778e015699da1e1c8c (diff)
parent118da22eb6fbd48f896d17411f942399283d600c (diff)
downloadnet-a578dd095dfe8b56c167201d9aea43e47d27f807.tar.gz
Merge tag 'crc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux
Pull CRC updates from Eric Biggers: - Reorganize the architecture-optimized CRC code It now lives in lib/crc/$(SRCARCH)/ rather than arch/$(SRCARCH)/lib/, and it is no longer artificially split into separate generic and arch modules. This allows better inlining and dead code elimination The generic CRC code is also no longer exported, simplifying the API. (This mirrors the similar changes to SHA-1 and SHA-2 in lib/crypto/, which can be found in the "Crypto library updates" pull request) - Improve crc32c() performance on newer x86_64 CPUs on long messages by enabling the VPCLMULQDQ optimized code - Simplify the crypto_shash wrappers for crc32_le() and crc32c() Register just one shash algorithm for each that uses the (fully optimized) library functions, instead of unnecessarily providing direct access to the generic CRC code - Remove unused and obsolete drivers for hardware CRC engines - Remove CRC-32 combination functions that are no longer used - Add kerneldoc for crc32_le(), crc32_be(), and crc32c() - Convert the crc32() macro to an inline function * tag 'crc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux: (26 commits) lib/crc: x86/crc32c: Enable VPCLMULQDQ optimization where beneficial lib/crc: x86: Reorganize crc-pclmul static_call initialization lib/crc: crc64: Add include/linux/crc64.h to kernel-api.rst lib/crc: crc32: Change crc32() from macro to inline function and remove cast nvmem: layouts: Switch from crc32() to crc32_le() lib/crc: crc32: Document crc32_le(), crc32_be(), and crc32c() lib/crc: Explicitly include <linux/export.h> lib/crc: Remove ARCH_HAS_* kconfig symbols lib/crc: x86: Migrate optimized CRC code into lib/crc/ lib/crc: sparc: Migrate optimized CRC code into lib/crc/ lib/crc: s390: Migrate optimized CRC code into lib/crc/ lib/crc: riscv: Migrate optimized CRC code into lib/crc/ lib/crc: powerpc: Migrate optimized CRC code into lib/crc/ lib/crc: mips: Migrate optimized CRC code into lib/crc/ lib/crc: loongarch: Migrate optimized CRC code into lib/crc/ lib/crc: arm64: Migrate optimized CRC code into lib/crc/ lib/crc: arm: Migrate optimized CRC code into lib/crc/ lib/crc: Prepare for arch-optimized code in subdirs of lib/crc/ lib/crc: Move files into lib/crc/ lib/crc32: Remove unused combination support ...
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm/configs/multi_v7_defconfig1
-rw-r--r--arch/arm/lib/Makefile6
-rw-r--r--arch/arm/lib/crc-t10dif-core.S468
-rw-r--r--arch/arm/lib/crc-t10dif.c72
-rw-r--r--arch/arm/lib/crc32-core.S306
-rw-r--r--arch/arm/lib/crc32.c123
-rw-r--r--arch/arm64/Kconfig2
-rw-r--r--arch/arm64/lib/Makefile6
-rw-r--r--arch/arm64/lib/crc-t10dif-core.S469
-rw-r--r--arch/arm64/lib/crc-t10dif.c73
-rw-r--r--arch/arm64/lib/crc32-core.S362
-rw-r--r--arch/arm64/lib/crc32.c99
-rw-r--r--arch/loongarch/Kconfig1
-rw-r--r--arch/loongarch/lib/Makefile2
-rw-r--r--arch/loongarch/lib/crc32-loongarch.c136
-rw-r--r--arch/mips/Kconfig1
-rw-r--r--arch/mips/lib/Makefile2
-rw-r--r--arch/mips/lib/crc32-mips.c183
-rw-r--r--arch/powerpc/Kconfig2
-rw-r--r--arch/powerpc/lib/Makefile6
-rw-r--r--arch/powerpc/lib/crc-t10dif.c83
-rw-r--r--arch/powerpc/lib/crc-vpmsum-template.S746
-rw-r--r--arch/powerpc/lib/crc32.c93
-rw-r--r--arch/powerpc/lib/crc32c-vpmsum_asm.S842
-rw-r--r--arch/powerpc/lib/crct10dif-vpmsum_asm.S845
-rw-r--r--arch/riscv/Kconfig3
-rw-r--r--arch/riscv/lib/Makefile6
-rw-r--r--arch/riscv/lib/crc-clmul-consts.h122
-rw-r--r--arch/riscv/lib/crc-clmul-template.h265
-rw-r--r--arch/riscv/lib/crc-clmul.h23
-rw-r--r--arch/riscv/lib/crc-t10dif.c24
-rw-r--r--arch/riscv/lib/crc16_msb.c18
-rw-r--r--arch/riscv/lib/crc32.c53
-rw-r--r--arch/riscv/lib/crc32_lsb.c18
-rw-r--r--arch/riscv/lib/crc32_msb.c18
-rw-r--r--arch/riscv/lib/crc64.c34
-rw-r--r--arch/riscv/lib/crc64_lsb.c18
-rw-r--r--arch/riscv/lib/crc64_msb.c18
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/lib/Makefile3
-rw-r--r--arch/s390/lib/crc32-vx.h12
-rw-r--r--arch/s390/lib/crc32.c77
-rw-r--r--arch/s390/lib/crc32be-vx.c174
-rw-r--r--arch/s390/lib/crc32le-vx.c240
-rw-r--r--arch/sparc/Kconfig1
-rw-r--r--arch/sparc/lib/Makefile2
-rw-r--r--arch/sparc/lib/crc32.c93
-rw-r--r--arch/sparc/lib/crc32c_asm.S20
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/lib/Makefile10
-rw-r--r--arch/x86/lib/crc-pclmul-consts.h195
-rw-r--r--arch/x86/lib/crc-pclmul-template.S582
-rw-r--r--arch/x86/lib/crc-pclmul-template.h76
-rw-r--r--arch/x86/lib/crc-t10dif.c40
-rw-r--r--arch/x86/lib/crc16-msb-pclmul.S6
-rw-r--r--arch/x86/lib/crc32-pclmul.S6
-rw-r--r--arch/x86/lib/crc32.c111
-rw-r--r--arch/x86/lib/crc32c-3way.S360
-rw-r--r--arch/x86/lib/crc64-pclmul.S7
-rw-r--r--arch/x86/lib/crc64.c50
61 files changed, 0 insertions, 7620 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 6136d4dc211a20..a6e80653abd1b3 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -8,8 +8,6 @@ config ARM
select ARCH_HAS_CACHE_LINE_SIZE if OF
select ARCH_HAS_CPU_CACHE_ALIASING
select ARCH_HAS_CPU_FINALIZE_INIT if MMU
- select ARCH_HAS_CRC32 if KERNEL_MODE_NEON
- select ARCH_HAS_CRC_T10DIF if KERNEL_MODE_NEON
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL if MMU
select ARCH_HAS_DMA_ALLOC if MMU
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 50c170b4619f72..c79495e113c85d 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -1298,7 +1298,6 @@ CONFIG_CRYPTO_DEV_MARVELL_CESA=m
CONFIG_CRYPTO_DEV_QCE=m
CONFIG_CRYPTO_DEV_QCOM_RNG=m
CONFIG_CRYPTO_DEV_ROCKCHIP=m
-CONFIG_CRYPTO_DEV_STM32_CRC=m
CONFIG_CRYPTO_DEV_STM32_HASH=m
CONFIG_CRYPTO_DEV_STM32_CRYP=m
CONFIG_CMA_SIZE_MBYTES=64
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 91ea0e29107afc..d1b9ea20236486 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -47,9 +47,3 @@ ifeq ($(CONFIG_KERNEL_MODE_NEON),y)
endif
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
-
-obj-$(CONFIG_CRC32_ARCH) += crc32-arm.o
-crc32-arm-y := crc32.o crc32-core.o
-
-obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-arm.o
-crc-t10dif-arm-y := crc-t10dif.o crc-t10dif-core.o
diff --git a/arch/arm/lib/crc-t10dif-core.S b/arch/arm/lib/crc-t10dif-core.S
deleted file mode 100644
index 2bbf2df9c1e2fe..00000000000000
--- a/arch/arm/lib/crc-t10dif-core.S
+++ /dev/null
@@ -1,468 +0,0 @@
-//
-// Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions instructions
-//
-// Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
-// Copyright (C) 2019 Google LLC <ebiggers@google.com>
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License version 2 as
-// published by the Free Software Foundation.
-//
-
-// Derived from the x86 version:
-//
-// Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
-//
-// Copyright (c) 2013, Intel Corporation
-//
-// Authors:
-// Erdinc Ozturk <erdinc.ozturk@intel.com>
-// Vinodh Gopal <vinodh.gopal@intel.com>
-// James Guilford <james.guilford@intel.com>
-// Tim Chen <tim.c.chen@linux.intel.com>
-//
-// This software is available to you under a choice of one of two
-// licenses. You may choose to be licensed under the terms of the GNU
-// General Public License (GPL) Version 2, available from the file
-// COPYING in the main directory of this source tree, or the
-// OpenIB.org BSD license below:
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// * Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the
-// distribution.
-//
-// * Neither the name of the Intel Corporation nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-//
-// THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Reference paper titled "Fast CRC Computation for Generic
-// Polynomials Using PCLMULQDQ Instruction"
-// URL: http://www.intel.com/content/dam/www/public/us/en/documents
-// /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
-//
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-#ifdef CONFIG_CPU_ENDIAN_BE8
-#define CPU_LE(code...)
-#else
-#define CPU_LE(code...) code
-#endif
-
- .text
- .arch armv8-a
- .fpu crypto-neon-fp-armv8
-
- init_crc .req r0
- buf .req r1
- len .req r2
-
- fold_consts_ptr .req ip
-
- q0l .req d0
- q0h .req d1
- q1l .req d2
- q1h .req d3
- q2l .req d4
- q2h .req d5
- q3l .req d6
- q3h .req d7
- q4l .req d8
- q4h .req d9
- q5l .req d10
- q5h .req d11
- q6l .req d12
- q6h .req d13
- q7l .req d14
- q7h .req d15
- q8l .req d16
- q8h .req d17
- q9l .req d18
- q9h .req d19
- q10l .req d20
- q10h .req d21
- q11l .req d22
- q11h .req d23
- q12l .req d24
- q12h .req d25
-
- FOLD_CONSTS .req q10
- FOLD_CONST_L .req q10l
- FOLD_CONST_H .req q10h
-
- /*
- * Pairwise long polynomial multiplication of two 16-bit values
- *
- * { w0, w1 }, { y0, y1 }
- *
- * by two 64-bit values
- *
- * { x0, x1, x2, x3, x4, x5, x6, x7 }, { z0, z1, z2, z3, z4, z5, z6, z7 }
- *
- * where each vector element is a byte, ordered from least to most
- * significant. The resulting 80-bit vectors are XOR'ed together.
- *
- * This can be implemented using 8x8 long polynomial multiplication, by
- * reorganizing the input so that each pairwise 8x8 multiplication
- * produces one of the terms from the decomposition below, and
- * combining the results of each rank and shifting them into place.
- *
- * Rank
- * 0 w0*x0 ^ | y0*z0 ^
- * 1 (w0*x1 ^ w1*x0) << 8 ^ | (y0*z1 ^ y1*z0) << 8 ^
- * 2 (w0*x2 ^ w1*x1) << 16 ^ | (y0*z2 ^ y1*z1) << 16 ^
- * 3 (w0*x3 ^ w1*x2) << 24 ^ | (y0*z3 ^ y1*z2) << 24 ^
- * 4 (w0*x4 ^ w1*x3) << 32 ^ | (y0*z4 ^ y1*z3) << 32 ^
- * 5 (w0*x5 ^ w1*x4) << 40 ^ | (y0*z5 ^ y1*z4) << 40 ^
- * 6 (w0*x6 ^ w1*x5) << 48 ^ | (y0*z6 ^ y1*z5) << 48 ^
- * 7 (w0*x7 ^ w1*x6) << 56 ^ | (y0*z7 ^ y1*z6) << 56 ^
- * 8 w1*x7 << 64 | y1*z7 << 64
- *
- * The inputs can be reorganized into
- *
- * { w0, w0, w0, w0, y0, y0, y0, y0 }, { w1, w1, w1, w1, y1, y1, y1, y1 }
- * { x0, x2, x4, x6, z0, z2, z4, z6 }, { x1, x3, x5, x7, z1, z3, z5, z7 }
- *
- * and after performing 8x8->16 bit long polynomial multiplication of
- * each of the halves of the first vector with those of the second one,
- * we obtain the following four vectors of 16-bit elements:
- *
- * a := { w0*x0, w0*x2, w0*x4, w0*x6 }, { y0*z0, y0*z2, y0*z4, y0*z6 }
- * b := { w0*x1, w0*x3, w0*x5, w0*x7 }, { y0*z1, y0*z3, y0*z5, y0*z7 }
- * c := { w1*x0, w1*x2, w1*x4, w1*x6 }, { y1*z0, y1*z2, y1*z4, y1*z6 }
- * d := { w1*x1, w1*x3, w1*x5, w1*x7 }, { y1*z1, y1*z3, y1*z5, y1*z7 }
- *
- * Results b and c can be XORed together, as the vector elements have
- * matching ranks. Then, the final XOR can be pulled forward, and
- * applied between the halves of each of the remaining three vectors,
- * which are then shifted into place, and XORed together to produce the
- * final 80-bit result.
- */
- .macro pmull16x64_p8, v16, v64
- vext.8 q11, \v64, \v64, #1
- vld1.64 {q12}, [r4, :128]
- vuzp.8 q11, \v64
- vtbl.8 d24, {\v16\()_L-\v16\()_H}, d24
- vtbl.8 d25, {\v16\()_L-\v16\()_H}, d25
- bl __pmull16x64_p8
- veor \v64, q12, q14
- .endm
-
-__pmull16x64_p8:
- vmull.p8 q13, d23, d24
- vmull.p8 q14, d23, d25
- vmull.p8 q15, d22, d24
- vmull.p8 q12, d22, d25
-
- veor q14, q14, q15
- veor d24, d24, d25
- veor d26, d26, d27
- veor d28, d28, d29
- vmov.i32 d25, #0
- vmov.i32 d29, #0
- vext.8 q12, q12, q12, #14
- vext.8 q14, q14, q14, #15
- veor d24, d24, d26
- bx lr
-ENDPROC(__pmull16x64_p8)
-
- .macro pmull16x64_p64, v16, v64
- vmull.p64 q11, \v64\()l, \v16\()_L
- vmull.p64 \v64, \v64\()h, \v16\()_H
- veor \v64, \v64, q11
- .endm
-
- // Fold reg1, reg2 into the next 32 data bytes, storing the result back
- // into reg1, reg2.
- .macro fold_32_bytes, reg1, reg2, p
- vld1.64 {q8-q9}, [buf]!
-
- pmull16x64_\p FOLD_CONST, \reg1
- pmull16x64_\p FOLD_CONST, \reg2
-
-CPU_LE( vrev64.8 q8, q8 )
-CPU_LE( vrev64.8 q9, q9 )
- vswp q8l, q8h
- vswp q9l, q9h
-
- veor.8 \reg1, \reg1, q8
- veor.8 \reg2, \reg2, q9
- .endm
-
- // Fold src_reg into dst_reg, optionally loading the next fold constants
- .macro fold_16_bytes, src_reg, dst_reg, p, load_next_consts
- pmull16x64_\p FOLD_CONST, \src_reg
- .ifnb \load_next_consts
- vld1.64 {FOLD_CONSTS}, [fold_consts_ptr, :128]!
- .endif
- veor.8 \dst_reg, \dst_reg, \src_reg
- .endm
-
- .macro crct10dif, p
- // For sizes less than 256 bytes, we can't fold 128 bytes at a time.
- cmp len, #256
- blt .Lless_than_256_bytes\@
-
- mov_l fold_consts_ptr, .Lfold_across_128_bytes_consts
-
- // Load the first 128 data bytes. Byte swapping is necessary to make
- // the bit order match the polynomial coefficient order.
- vld1.64 {q0-q1}, [buf]!
- vld1.64 {q2-q3}, [buf]!
- vld1.64 {q4-q5}, [buf]!
- vld1.64 {q6-q7}, [buf]!
-CPU_LE( vrev64.8 q0, q0 )
-CPU_LE( vrev64.8 q1, q1 )
-CPU_LE( vrev64.8 q2, q2 )
-CPU_LE( vrev64.8 q3, q3 )
-CPU_LE( vrev64.8 q4, q4 )
-CPU_LE( vrev64.8 q5, q5 )
-CPU_LE( vrev64.8 q6, q6 )
-CPU_LE( vrev64.8 q7, q7 )
- vswp q0l, q0h
- vswp q1l, q1h
- vswp q2l, q2h
- vswp q3l, q3h
- vswp q4l, q4h
- vswp q5l, q5h
- vswp q6l, q6h
- vswp q7l, q7h
-
- // XOR the first 16 data *bits* with the initial CRC value.
- vmov.i8 q8h, #0
- vmov.u16 q8h[3], init_crc
- veor q0h, q0h, q8h
-
- // Load the constants for folding across 128 bytes.
- vld1.64 {FOLD_CONSTS}, [fold_consts_ptr, :128]!
-
- // Subtract 128 for the 128 data bytes just consumed. Subtract another
- // 128 to simplify the termination condition of the following loop.
- sub len, len, #256
-
- // While >= 128 data bytes remain (not counting q0-q7), fold the 128
- // bytes q0-q7 into them, storing the result back into q0-q7.
-.Lfold_128_bytes_loop\@:
- fold_32_bytes q0, q1, \p
- fold_32_bytes q2, q3, \p
- fold_32_bytes q4, q5, \p
- fold_32_bytes q6, q7, \p
- subs len, len, #128
- bge .Lfold_128_bytes_loop\@
-
- // Now fold the 112 bytes in q0-q6 into the 16 bytes in q7.
-
- // Fold across 64 bytes.
- vld1.64 {FOLD_CONSTS}, [fold_consts_ptr, :128]!
- fold_16_bytes q0, q4, \p
- fold_16_bytes q1, q5, \p
- fold_16_bytes q2, q6, \p
- fold_16_bytes q3, q7, \p, 1
- // Fold across 32 bytes.
- fold_16_bytes q4, q6, \p
- fold_16_bytes q5, q7, \p, 1
- // Fold across 16 bytes.
- fold_16_bytes q6, q7, \p
-
- // Add 128 to get the correct number of data bytes remaining in 0...127
- // (not counting q7), following the previous extra subtraction by 128.
- // Then subtract 16 to simplify the termination condition of the
- // following loop.
- adds len, len, #(128-16)
-
- // While >= 16 data bytes remain (not counting q7), fold the 16 bytes q7
- // into them, storing the result back into q7.
- blt .Lfold_16_bytes_loop_done\@
-.Lfold_16_bytes_loop\@:
- pmull16x64_\p FOLD_CONST, q7
- vld1.64 {q0}, [buf]!
-CPU_LE( vrev64.8 q0, q0 )
- vswp q0l, q0h
- veor.8 q7, q7, q0
- subs len, len, #16
- bge .Lfold_16_bytes_loop\@
-
-.Lfold_16_bytes_loop_done\@:
- // Add 16 to get the correct number of data bytes remaining in 0...15
- // (not counting q7), following the previous extra subtraction by 16.
- adds len, len, #16
- beq .Lreduce_final_16_bytes\@
-
-.Lhandle_partial_segment\@:
- // Reduce the last '16 + len' bytes where 1 <= len <= 15 and the first
- // 16 bytes are in q7 and the rest are the remaining data in 'buf'. To
- // do this without needing a fold constant for each possible 'len',
- // redivide the bytes into a first chunk of 'len' bytes and a second
- // chunk of 16 bytes, then fold the first chunk into the second.
-
- // q0 = last 16 original data bytes
- add buf, buf, len
- sub buf, buf, #16
- vld1.64 {q0}, [buf]
-CPU_LE( vrev64.8 q0, q0 )
- vswp q0l, q0h
-
- // q1 = high order part of second chunk: q7 left-shifted by 'len' bytes.
- mov_l r1, .Lbyteshift_table + 16
- sub r1, r1, len
- vld1.8 {q2}, [r1]
- vtbl.8 q1l, {q7l-q7h}, q2l
- vtbl.8 q1h, {q7l-q7h}, q2h
-
- // q3 = first chunk: q7 right-shifted by '16-len' bytes.
- vmov.i8 q3, #0x80
- veor.8 q2, q2, q3
- vtbl.8 q3l, {q7l-q7h}, q2l
- vtbl.8 q3h, {q7l-q7h}, q2h
-
- // Convert to 8-bit masks: 'len' 0x00 bytes, then '16-len' 0xff bytes.
- vshr.s8 q2, q2, #7
-
- // q2 = second chunk: 'len' bytes from q0 (low-order bytes),
- // then '16-len' bytes from q1 (high-order bytes).
- vbsl.8 q2, q1, q0
-
- // Fold the first chunk into the second chunk, storing the result in q7.
- pmull16x64_\p FOLD_CONST, q3
- veor.8 q7, q3, q2
- b .Lreduce_final_16_bytes\@
-
-.Lless_than_256_bytes\@:
- // Checksumming a buffer of length 16...255 bytes
-
- mov_l fold_consts_ptr, .Lfold_across_16_bytes_consts
-
- // Load the first 16 data bytes.
- vld1.64 {q7}, [buf]!
-CPU_LE( vrev64.8 q7, q7 )
- vswp q7l, q7h
-
- // XOR the first 16 data *bits* with the initial CRC value.
- vmov.i8 q0h, #0
- vmov.u16 q0h[3], init_crc
- veor.8 q7h, q7h, q0h
-
- // Load the fold-across-16-bytes constants.
- vld1.64 {FOLD_CONSTS}, [fold_consts_ptr, :128]!
-
- cmp len, #16
- beq .Lreduce_final_16_bytes\@ // len == 16
- subs len, len, #32
- addlt len, len, #16
- blt .Lhandle_partial_segment\@ // 17 <= len <= 31
- b .Lfold_16_bytes_loop\@ // 32 <= len <= 255
-
-.Lreduce_final_16_bytes\@:
- .endm
-
-//
-// u16 crc_t10dif_pmull(u16 init_crc, const u8 *buf, size_t len);
-//
-// Assumes len >= 16.
-//
-ENTRY(crc_t10dif_pmull64)
- crct10dif p64
-
- // Reduce the 128-bit value M(x), stored in q7, to the final 16-bit CRC.
-
- // Load 'x^48 * (x^48 mod G(x))' and 'x^48 * (x^80 mod G(x))'.
- vld1.64 {FOLD_CONSTS}, [fold_consts_ptr, :128]!
-
- // Fold the high 64 bits into the low 64 bits, while also multiplying by
- // x^64. This produces a 128-bit value congruent to x^64 * M(x) and
- // whose low 48 bits are 0.
- vmull.p64 q0, q7h, FOLD_CONST_H // high bits * x^48 * (x^80 mod G(x))
- veor.8 q0h, q0h, q7l // + low bits * x^64
-
- // Fold the high 32 bits into the low 96 bits. This produces a 96-bit
- // value congruent to x^64 * M(x) and whose low 48 bits are 0.
- vmov.i8 q1, #0
- vmov s4, s3 // extract high 32 bits
- vmov s3, s5 // zero high 32 bits
- vmull.p64 q1, q1l, FOLD_CONST_L // high 32 bits * x^48 * (x^48 mod G(x))
- veor.8 q0, q0, q1 // + low bits
-
- // Load G(x) and floor(x^48 / G(x)).
- vld1.64 {FOLD_CONSTS}, [fold_consts_ptr, :128]
-
- // Use Barrett reduction to compute the final CRC value.
- vmull.p64 q1, q0h, FOLD_CONST_H // high 32 bits * floor(x^48 / G(x))
- vshr.u64 q1l, q1l, #32 // /= x^32
- vmull.p64 q1, q1l, FOLD_CONST_L // *= G(x)
- vshr.u64 q0l, q0l, #48
- veor.8 q0l, q0l, q1l // + low 16 nonzero bits
- // Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of q0.
-
- vmov.u16 r0, q0l[0]
- bx lr
-ENDPROC(crc_t10dif_pmull64)
-
-ENTRY(crc_t10dif_pmull8)
- push {r4, lr}
- mov_l r4, .L16x64perm
-
- crct10dif p8
-
-CPU_LE( vrev64.8 q7, q7 )
- vswp q7l, q7h
- vst1.64 {q7}, [r3, :128]
- pop {r4, pc}
-ENDPROC(crc_t10dif_pmull8)
-
- .section ".rodata", "a"
- .align 4
-
-// Fold constants precomputed from the polynomial 0x18bb7
-// G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0
-.Lfold_across_128_bytes_consts:
- .quad 0x0000000000006123 // x^(8*128) mod G(x)
- .quad 0x0000000000002295 // x^(8*128+64) mod G(x)
-// .Lfold_across_64_bytes_consts:
- .quad 0x0000000000001069 // x^(4*128) mod G(x)
- .quad 0x000000000000dd31 // x^(4*128+64) mod G(x)
-// .Lfold_across_32_bytes_consts:
- .quad 0x000000000000857d // x^(2*128) mod G(x)
- .quad 0x0000000000007acc // x^(2*128+64) mod G(x)
-.Lfold_across_16_bytes_consts:
- .quad 0x000000000000a010 // x^(1*128) mod G(x)
- .quad 0x0000000000001faa // x^(1*128+64) mod G(x)
-// .Lfinal_fold_consts:
- .quad 0x1368000000000000 // x^48 * (x^48 mod G(x))
- .quad 0x2d56000000000000 // x^48 * (x^80 mod G(x))
-// .Lbarrett_reduction_consts:
- .quad 0x0000000000018bb7 // G(x)
- .quad 0x00000001f65a57f8 // floor(x^48 / G(x))
-
-// For 1 <= len <= 15, the 16-byte vector beginning at &byteshift_table[16 -
-// len] is the index vector to shift left by 'len' bytes, and is also {0x80,
-// ..., 0x80} XOR the index vector to shift right by '16 - len' bytes.
-.Lbyteshift_table:
- .byte 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
- .byte 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f
- .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
- .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe , 0x0
-
-.L16x64perm:
- .quad 0x808080800000000, 0x909090901010101
diff --git a/arch/arm/lib/crc-t10dif.c b/arch/arm/lib/crc-t10dif.c
deleted file mode 100644
index 1093f8ec13b0be..00000000000000
--- a/arch/arm/lib/crc-t10dif.c
+++ /dev/null
@@ -1,72 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions instructions
- *
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/crc-t10dif.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-
-#include <crypto/internal/simd.h>
-
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pmull);
-
-#define CRC_T10DIF_PMULL_CHUNK_SIZE 16U
-
-asmlinkage u16 crc_t10dif_pmull64(u16 init_crc, const u8 *buf, size_t len);
-asmlinkage void crc_t10dif_pmull8(u16 init_crc, const u8 *buf, size_t len,
- u8 out[16]);
-
-u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length)
-{
- if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) {
- if (static_branch_likely(&have_pmull)) {
- if (crypto_simd_usable()) {
- kernel_neon_begin();
- crc = crc_t10dif_pmull64(crc, data, length);
- kernel_neon_end();
- return crc;
- }
- } else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
- static_branch_likely(&have_neon) &&
- crypto_simd_usable()) {
- u8 buf[16] __aligned(16);
-
- kernel_neon_begin();
- crc_t10dif_pmull8(crc, data, length, buf);
- kernel_neon_end();
-
- return crc_t10dif_generic(0, buf, sizeof(buf));
- }
- }
- return crc_t10dif_generic(crc, data, length);
-}
-EXPORT_SYMBOL(crc_t10dif_arch);
-
-static int __init crc_t10dif_arm_init(void)
-{
- if (elf_hwcap & HWCAP_NEON) {
- static_branch_enable(&have_neon);
- if (elf_hwcap2 & HWCAP2_PMULL)
- static_branch_enable(&have_pmull);
- }
- return 0;
-}
-subsys_initcall(crc_t10dif_arm_init);
-
-static void __exit crc_t10dif_arm_exit(void)
-{
-}
-module_exit(crc_t10dif_arm_exit);
-
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_DESCRIPTION("Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions");
-MODULE_LICENSE("GPL v2");
diff --git a/arch/arm/lib/crc32-core.S b/arch/arm/lib/crc32-core.S
deleted file mode 100644
index 6f674f30c70b33..00000000000000
--- a/arch/arm/lib/crc32-core.S
+++ /dev/null
@@ -1,306 +0,0 @@
-/*
- * Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions instructions
- *
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/* GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * Please visit http://www.xyratex.com/contact if you need additional
- * information or have any questions.
- *
- * GPL HEADER END
- */
-
-/*
- * Copyright 2012 Xyratex Technology Limited
- *
- * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
- * calculation.
- * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
- * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
- * at:
- * https://www.intel.com/products/processor/manuals/
- * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
- * Volume 2B: Instruction Set Reference, N-Z
- *
- * Authors: Gregory Prestas <Gregory_Prestas@us.xyratex.com>
- * Alexander Boyko <Alexander_Boyko@xyratex.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
- .text
- .align 6
- .arch armv8-a
- .arch_extension crc
- .fpu crypto-neon-fp-armv8
-
-.Lcrc32_constants:
- /*
- * [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4
- * #define CONSTANT_R1 0x154442bd4LL
- *
- * [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596
- * #define CONSTANT_R2 0x1c6e41596LL
- */
- .quad 0x0000000154442bd4
- .quad 0x00000001c6e41596
-
- /*
- * [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0
- * #define CONSTANT_R3 0x1751997d0LL
- *
- * [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e
- * #define CONSTANT_R4 0x0ccaa009eLL
- */
- .quad 0x00000001751997d0
- .quad 0x00000000ccaa009e
-
- /*
- * [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124
- * #define CONSTANT_R5 0x163cd6124LL
- */
- .quad 0x0000000163cd6124
- .quad 0x00000000FFFFFFFF
-
- /*
- * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
- *
- * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))`
- * = 0x1F7011641LL
- * #define CONSTANT_RU 0x1F7011641LL
- */
- .quad 0x00000001DB710641
- .quad 0x00000001F7011641
-
-.Lcrc32c_constants:
- .quad 0x00000000740eef02
- .quad 0x000000009e4addf8
- .quad 0x00000000f20c0dfe
- .quad 0x000000014cd00bd6
- .quad 0x00000000dd45aab8
- .quad 0x00000000FFFFFFFF
- .quad 0x0000000105ec76f0
- .quad 0x00000000dea713f1
-
- dCONSTANTl .req d0
- dCONSTANTh .req d1
- qCONSTANT .req q0
-
- BUF .req r0
- LEN .req r1
- CRC .req r2
-
- qzr .req q9
-
- /**
- * Calculate crc32
- * BUF - buffer
- * LEN - sizeof buffer (multiple of 16 bytes), LEN should be > 63
- * CRC - initial crc32
- * return %eax crc32
- * uint crc32_pmull_le(unsigned char const *buffer,
- * size_t len, uint crc32)
- */
-SYM_FUNC_START(crc32_pmull_le)
- adr r3, .Lcrc32_constants
- b 0f
-SYM_FUNC_END(crc32_pmull_le)
-
-SYM_FUNC_START(crc32c_pmull_le)
- adr r3, .Lcrc32c_constants
-
-0: bic LEN, LEN, #15
- vld1.8 {q1-q2}, [BUF, :128]!
- vld1.8 {q3-q4}, [BUF, :128]!
- vmov.i8 qzr, #0
- vmov.i8 qCONSTANT, #0
- vmov.32 dCONSTANTl[0], CRC
- veor.8 d2, d2, dCONSTANTl
- sub LEN, LEN, #0x40
- cmp LEN, #0x40
- blt less_64
-
- vld1.64 {qCONSTANT}, [r3]
-
-loop_64: /* 64 bytes Full cache line folding */
- sub LEN, LEN, #0x40
-
- vmull.p64 q5, d3, dCONSTANTh
- vmull.p64 q6, d5, dCONSTANTh
- vmull.p64 q7, d7, dCONSTANTh
- vmull.p64 q8, d9, dCONSTANTh
-
- vmull.p64 q1, d2, dCONSTANTl
- vmull.p64 q2, d4, dCONSTANTl
- vmull.p64 q3, d6, dCONSTANTl
- vmull.p64 q4, d8, dCONSTANTl
-
- veor.8 q1, q1, q5
- vld1.8 {q5}, [BUF, :128]!
- veor.8 q2, q2, q6
- vld1.8 {q6}, [BUF, :128]!
- veor.8 q3, q3, q7
- vld1.8 {q7}, [BUF, :128]!
- veor.8 q4, q4, q8
- vld1.8 {q8}, [BUF, :128]!
-
- veor.8 q1, q1, q5
- veor.8 q2, q2, q6
- veor.8 q3, q3, q7
- veor.8 q4, q4, q8
-
- cmp LEN, #0x40
- bge loop_64
-
-less_64: /* Folding cache line into 128bit */
- vldr dCONSTANTl, [r3, #16]
- vldr dCONSTANTh, [r3, #24]
-
- vmull.p64 q5, d3, dCONSTANTh
- vmull.p64 q1, d2, dCONSTANTl
- veor.8 q1, q1, q5
- veor.8 q1, q1, q2
-
- vmull.p64 q5, d3, dCONSTANTh
- vmull.p64 q1, d2, dCONSTANTl
- veor.8 q1, q1, q5
- veor.8 q1, q1, q3
-
- vmull.p64 q5, d3, dCONSTANTh
- vmull.p64 q1, d2, dCONSTANTl
- veor.8 q1, q1, q5
- veor.8 q1, q1, q4
-
- teq LEN, #0
- beq fold_64
-
-loop_16: /* Folding rest buffer into 128bit */
- subs LEN, LEN, #0x10
-
- vld1.8 {q2}, [BUF, :128]!
- vmull.p64 q5, d3, dCONSTANTh
- vmull.p64 q1, d2, dCONSTANTl
- veor.8 q1, q1, q5
- veor.8 q1, q1, q2
-
- bne loop_16
-
-fold_64:
- /* perform the last 64 bit fold, also adds 32 zeroes
- * to the input stream */
- vmull.p64 q2, d2, dCONSTANTh
- vext.8 q1, q1, qzr, #8
- veor.8 q1, q1, q2
-
- /* final 32-bit fold */
- vldr dCONSTANTl, [r3, #32]
- vldr d6, [r3, #40]
- vmov.i8 d7, #0
-
- vext.8 q2, q1, qzr, #4
- vand.8 d2, d2, d6
- vmull.p64 q1, d2, dCONSTANTl
- veor.8 q1, q1, q2
-
- /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
- vldr dCONSTANTl, [r3, #48]
- vldr dCONSTANTh, [r3, #56]
-
- vand.8 q2, q1, q3
- vext.8 q2, qzr, q2, #8
- vmull.p64 q2, d5, dCONSTANTh
- vand.8 q2, q2, q3
- vmull.p64 q2, d4, dCONSTANTl
- veor.8 q1, q1, q2
- vmov r0, s5
-
- bx lr
-SYM_FUNC_END(crc32c_pmull_le)
-
- .macro __crc32, c
- subs ip, r2, #8
- bmi .Ltail\c
-
- tst r1, #3
- bne .Lunaligned\c
-
- teq ip, #0
-.Laligned8\c:
- ldrd r2, r3, [r1], #8
-ARM_BE8(rev r2, r2 )
-ARM_BE8(rev r3, r3 )
- crc32\c\()w r0, r0, r2
- crc32\c\()w r0, r0, r3
- bxeq lr
- subs ip, ip, #8
- bpl .Laligned8\c
-
-.Ltail\c:
- tst ip, #4
- beq 2f
- ldr r3, [r1], #4
-ARM_BE8(rev r3, r3 )
- crc32\c\()w r0, r0, r3
-
-2: tst ip, #2
- beq 1f
- ldrh r3, [r1], #2
-ARM_BE8(rev16 r3, r3 )
- crc32\c\()h r0, r0, r3
-
-1: tst ip, #1
- bxeq lr
- ldrb r3, [r1]
- crc32\c\()b r0, r0, r3
- bx lr
-
-.Lunaligned\c:
- tst r1, #1
- beq 2f
- ldrb r3, [r1], #1
- subs r2, r2, #1
- crc32\c\()b r0, r0, r3
-
- tst r1, #2
- beq 0f
-2: ldrh r3, [r1], #2
- subs r2, r2, #2
-ARM_BE8(rev16 r3, r3 )
- crc32\c\()h r0, r0, r3
-
-0: subs ip, r2, #8
- bpl .Laligned8\c
- b .Ltail\c
- .endm
-
- .align 5
-SYM_FUNC_START(crc32_armv8_le)
- __crc32
-SYM_FUNC_END(crc32_armv8_le)
-
- .align 5
-SYM_FUNC_START(crc32c_armv8_le)
- __crc32 c
-SYM_FUNC_END(crc32c_armv8_le)
diff --git a/arch/arm/lib/crc32.c b/arch/arm/lib/crc32.c
deleted file mode 100644
index f2bef8849c7c3a..00000000000000
--- a/arch/arm/lib/crc32.c
+++ /dev/null
@@ -1,123 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions instructions
- *
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/cpufeature.h>
-#include <linux/crc32.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-
-#include <crypto/internal/simd.h>
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32);
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pmull);
-
-#define PMULL_MIN_LEN 64 /* min size of buffer for pmull functions */
-
-asmlinkage u32 crc32_pmull_le(const u8 buf[], u32 len, u32 init_crc);
-asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], u32 len);
-
-asmlinkage u32 crc32c_pmull_le(const u8 buf[], u32 len, u32 init_crc);
-asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], u32 len);
-
-static u32 crc32_le_scalar(u32 crc, const u8 *p, size_t len)
-{
- if (static_branch_likely(&have_crc32))
- return crc32_armv8_le(crc, p, len);
- return crc32_le_base(crc, p, len);
-}
-
-u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
-{
- if (len >= PMULL_MIN_LEN + 15 &&
- static_branch_likely(&have_pmull) && crypto_simd_usable()) {
- size_t n = -(uintptr_t)p & 15;
-
- /* align p to 16-byte boundary */
- if (n) {
- crc = crc32_le_scalar(crc, p, n);
- p += n;
- len -= n;
- }
- n = round_down(len, 16);
- kernel_neon_begin();
- crc = crc32_pmull_le(p, n, crc);
- kernel_neon_end();
- p += n;
- len -= n;
- }
- return crc32_le_scalar(crc, p, len);
-}
-EXPORT_SYMBOL(crc32_le_arch);
-
-static u32 crc32c_scalar(u32 crc, const u8 *p, size_t len)
-{
- if (static_branch_likely(&have_crc32))
- return crc32c_armv8_le(crc, p, len);
- return crc32c_base(crc, p, len);
-}
-
-u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
-{
- if (len >= PMULL_MIN_LEN + 15 &&
- static_branch_likely(&have_pmull) && crypto_simd_usable()) {
- size_t n = -(uintptr_t)p & 15;
-
- /* align p to 16-byte boundary */
- if (n) {
- crc = crc32c_scalar(crc, p, n);
- p += n;
- len -= n;
- }
- n = round_down(len, 16);
- kernel_neon_begin();
- crc = crc32c_pmull_le(p, n, crc);
- kernel_neon_end();
- p += n;
- len -= n;
- }
- return crc32c_scalar(crc, p, len);
-}
-EXPORT_SYMBOL(crc32c_arch);
-
-u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
-{
- return crc32_be_base(crc, p, len);
-}
-EXPORT_SYMBOL(crc32_be_arch);
-
-static int __init crc32_arm_init(void)
-{
- if (elf_hwcap2 & HWCAP2_CRC32)
- static_branch_enable(&have_crc32);
- if (elf_hwcap2 & HWCAP2_PMULL)
- static_branch_enable(&have_pmull);
- return 0;
-}
-subsys_initcall(crc32_arm_init);
-
-static void __exit crc32_arm_exit(void)
-{
-}
-module_exit(crc32_arm_exit);
-
-u32 crc32_optimizations(void)
-{
- if (elf_hwcap2 & (HWCAP2_CRC32 | HWCAP2_PMULL))
- return CRC32_LE_OPTIMIZATION | CRC32C_OPTIMIZATION;
- return 0;
-}
-EXPORT_SYMBOL(crc32_optimizations);
-
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_DESCRIPTION("Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions");
-MODULE_LICENSE("GPL v2");
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e09d48eb68e076..7c456aa838b9a3 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -21,8 +21,6 @@ config ARM64
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
select ARCH_HAS_CACHE_LINE_SIZE
select ARCH_HAS_CC_PLATFORM
- select ARCH_HAS_CRC32
- select ARCH_HAS_CRC_T10DIF if KERNEL_MODE_NEON
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 027bfa9689c6a2..9b255d9332479c 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -16,12 +16,6 @@ endif
lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o
-obj-$(CONFIG_CRC32_ARCH) += crc32-arm64.o
-crc32-arm64-y := crc32.o crc32-core.o
-
-obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-arm64.o
-crc-t10dif-arm64-y := crc-t10dif.o crc-t10dif-core.o
-
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
obj-$(CONFIG_ARM64_MTE) += mte.o
diff --git a/arch/arm64/lib/crc-t10dif-core.S b/arch/arm64/lib/crc-t10dif-core.S
deleted file mode 100644
index 87dd6d46224d8c..00000000000000
--- a/arch/arm64/lib/crc-t10dif-core.S
+++ /dev/null
@@ -1,469 +0,0 @@
-//
-// Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions
-//
-// Copyright (C) 2016 Linaro Ltd
-// Copyright (C) 2019-2024 Google LLC
-//
-// Authors: Ard Biesheuvel <ardb@google.com>
-// Eric Biggers <ebiggers@google.com>
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License version 2 as
-// published by the Free Software Foundation.
-//
-
-// Derived from the x86 version:
-//
-// Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
-//
-// Copyright (c) 2013, Intel Corporation
-//
-// Authors:
-// Erdinc Ozturk <erdinc.ozturk@intel.com>
-// Vinodh Gopal <vinodh.gopal@intel.com>
-// James Guilford <james.guilford@intel.com>
-// Tim Chen <tim.c.chen@linux.intel.com>
-//
-// This software is available to you under a choice of one of two
-// licenses. You may choose to be licensed under the terms of the GNU
-// General Public License (GPL) Version 2, available from the file
-// COPYING in the main directory of this source tree, or the
-// OpenIB.org BSD license below:
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// * Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the
-// distribution.
-//
-// * Neither the name of the Intel Corporation nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-//
-// THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Reference paper titled "Fast CRC Computation for Generic
-// Polynomials Using PCLMULQDQ Instruction"
-// URL: http://www.intel.com/content/dam/www/public/us/en/documents
-// /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
-//
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
- .text
- .arch armv8-a+crypto
-
- init_crc .req w0
- buf .req x1
- len .req x2
- fold_consts_ptr .req x5
-
- fold_consts .req v10
-
- t3 .req v17
- t4 .req v18
- t5 .req v19
- t6 .req v20
- t7 .req v21
- t8 .req v22
-
- perm .req v27
-
- .macro pmull16x64_p64, a16, b64, c64
- pmull2 \c64\().1q, \a16\().2d, \b64\().2d
- pmull \b64\().1q, \a16\().1d, \b64\().1d
- .endm
-
- /*
- * Pairwise long polynomial multiplication of two 16-bit values
- *
- * { w0, w1 }, { y0, y1 }
- *
- * by two 64-bit values
- *
- * { x0, x1, x2, x3, x4, x5, x6, x7 }, { z0, z1, z2, z3, z4, z5, z6, z7 }
- *
- * where each vector element is a byte, ordered from least to most
- * significant.
- *
- * This can be implemented using 8x8 long polynomial multiplication, by
- * reorganizing the input so that each pairwise 8x8 multiplication
- * produces one of the terms from the decomposition below, and
- * combining the results of each rank and shifting them into place.
- *
- * Rank
- * 0 w0*x0 ^ | y0*z0 ^
- * 1 (w0*x1 ^ w1*x0) << 8 ^ | (y0*z1 ^ y1*z0) << 8 ^
- * 2 (w0*x2 ^ w1*x1) << 16 ^ | (y0*z2 ^ y1*z1) << 16 ^
- * 3 (w0*x3 ^ w1*x2) << 24 ^ | (y0*z3 ^ y1*z2) << 24 ^
- * 4 (w0*x4 ^ w1*x3) << 32 ^ | (y0*z4 ^ y1*z3) << 32 ^
- * 5 (w0*x5 ^ w1*x4) << 40 ^ | (y0*z5 ^ y1*z4) << 40 ^
- * 6 (w0*x6 ^ w1*x5) << 48 ^ | (y0*z6 ^ y1*z5) << 48 ^
- * 7 (w0*x7 ^ w1*x6) << 56 ^ | (y0*z7 ^ y1*z6) << 56 ^
- * 8 w1*x7 << 64 | y1*z7 << 64
- *
- * The inputs can be reorganized into
- *
- * { w0, w0, w0, w0, y0, y0, y0, y0 }, { w1, w1, w1, w1, y1, y1, y1, y1 }
- * { x0, x2, x4, x6, z0, z2, z4, z6 }, { x1, x3, x5, x7, z1, z3, z5, z7 }
- *
- * and after performing 8x8->16 bit long polynomial multiplication of
- * each of the halves of the first vector with those of the second one,
- * we obtain the following four vectors of 16-bit elements:
- *
- * a := { w0*x0, w0*x2, w0*x4, w0*x6 }, { y0*z0, y0*z2, y0*z4, y0*z6 }
- * b := { w0*x1, w0*x3, w0*x5, w0*x7 }, { y0*z1, y0*z3, y0*z5, y0*z7 }
- * c := { w1*x0, w1*x2, w1*x4, w1*x6 }, { y1*z0, y1*z2, y1*z4, y1*z6 }
- * d := { w1*x1, w1*x3, w1*x5, w1*x7 }, { y1*z1, y1*z3, y1*z5, y1*z7 }
- *
- * Results b and c can be XORed together, as the vector elements have
- * matching ranks. Then, the final XOR (*) can be pulled forward, and
- * applied between the halves of each of the remaining three vectors,
- * which are then shifted into place, and combined to produce two
- * 80-bit results.
- *
- * (*) NOTE: the 16x64 bit polynomial multiply below is not equivalent
- * to the 64x64 bit one above, but XOR'ing the outputs together will
- * produce the expected result, and this is sufficient in the context of
- * this algorithm.
- */
- .macro pmull16x64_p8, a16, b64, c64
- ext t7.16b, \b64\().16b, \b64\().16b, #1
- tbl t5.16b, {\a16\().16b}, perm.16b
- uzp1 t7.16b, \b64\().16b, t7.16b
- bl __pmull_p8_16x64
- ext \b64\().16b, t4.16b, t4.16b, #15
- eor \c64\().16b, t8.16b, t5.16b
- .endm
-
-SYM_FUNC_START_LOCAL(__pmull_p8_16x64)
- ext t6.16b, t5.16b, t5.16b, #8
-
- pmull t3.8h, t7.8b, t5.8b
- pmull t4.8h, t7.8b, t6.8b
- pmull2 t5.8h, t7.16b, t5.16b
- pmull2 t6.8h, t7.16b, t6.16b
-
- ext t8.16b, t3.16b, t3.16b, #8
- eor t4.16b, t4.16b, t6.16b
- ext t7.16b, t5.16b, t5.16b, #8
- ext t6.16b, t4.16b, t4.16b, #8
- eor t8.8b, t8.8b, t3.8b
- eor t5.8b, t5.8b, t7.8b
- eor t4.8b, t4.8b, t6.8b
- ext t5.16b, t5.16b, t5.16b, #14
- ret
-SYM_FUNC_END(__pmull_p8_16x64)
-
-
- // Fold reg1, reg2 into the next 32 data bytes, storing the result back
- // into reg1, reg2.
- .macro fold_32_bytes, p, reg1, reg2
- ldp q11, q12, [buf], #0x20
-
- pmull16x64_\p fold_consts, \reg1, v8
-
-CPU_LE( rev64 v11.16b, v11.16b )
-CPU_LE( rev64 v12.16b, v12.16b )
-
- pmull16x64_\p fold_consts, \reg2, v9
-
-CPU_LE( ext v11.16b, v11.16b, v11.16b, #8 )
-CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
-
- eor \reg1\().16b, \reg1\().16b, v8.16b
- eor \reg2\().16b, \reg2\().16b, v9.16b
- eor \reg1\().16b, \reg1\().16b, v11.16b
- eor \reg2\().16b, \reg2\().16b, v12.16b
- .endm
-
- // Fold src_reg into dst_reg, optionally loading the next fold constants
- .macro fold_16_bytes, p, src_reg, dst_reg, load_next_consts
- pmull16x64_\p fold_consts, \src_reg, v8
- .ifnb \load_next_consts
- ld1 {fold_consts.2d}, [fold_consts_ptr], #16
- .endif
- eor \dst_reg\().16b, \dst_reg\().16b, v8.16b
- eor \dst_reg\().16b, \dst_reg\().16b, \src_reg\().16b
- .endm
-
- .macro crc_t10dif_pmull, p
-
- // For sizes less than 256 bytes, we can't fold 128 bytes at a time.
- cmp len, #256
- b.lt .Lless_than_256_bytes_\@
-
- adr_l fold_consts_ptr, .Lfold_across_128_bytes_consts
-
- // Load the first 128 data bytes. Byte swapping is necessary to make
- // the bit order match the polynomial coefficient order.
- ldp q0, q1, [buf]
- ldp q2, q3, [buf, #0x20]
- ldp q4, q5, [buf, #0x40]
- ldp q6, q7, [buf, #0x60]
- add buf, buf, #0x80
-CPU_LE( rev64 v0.16b, v0.16b )
-CPU_LE( rev64 v1.16b, v1.16b )
-CPU_LE( rev64 v2.16b, v2.16b )
-CPU_LE( rev64 v3.16b, v3.16b )
-CPU_LE( rev64 v4.16b, v4.16b )
-CPU_LE( rev64 v5.16b, v5.16b )
-CPU_LE( rev64 v6.16b, v6.16b )
-CPU_LE( rev64 v7.16b, v7.16b )
-CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
-CPU_LE( ext v1.16b, v1.16b, v1.16b, #8 )
-CPU_LE( ext v2.16b, v2.16b, v2.16b, #8 )
-CPU_LE( ext v3.16b, v3.16b, v3.16b, #8 )
-CPU_LE( ext v4.16b, v4.16b, v4.16b, #8 )
-CPU_LE( ext v5.16b, v5.16b, v5.16b, #8 )
-CPU_LE( ext v6.16b, v6.16b, v6.16b, #8 )
-CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
-
- // XOR the first 16 data *bits* with the initial CRC value.
- movi v8.16b, #0
- mov v8.h[7], init_crc
- eor v0.16b, v0.16b, v8.16b
-
- // Load the constants for folding across 128 bytes.
- ld1 {fold_consts.2d}, [fold_consts_ptr]
-
- // Subtract 128 for the 128 data bytes just consumed. Subtract another
- // 128 to simplify the termination condition of the following loop.
- sub len, len, #256
-
- // While >= 128 data bytes remain (not counting v0-v7), fold the 128
- // bytes v0-v7 into them, storing the result back into v0-v7.
-.Lfold_128_bytes_loop_\@:
- fold_32_bytes \p, v0, v1
- fold_32_bytes \p, v2, v3
- fold_32_bytes \p, v4, v5
- fold_32_bytes \p, v6, v7
-
- subs len, len, #128
- b.ge .Lfold_128_bytes_loop_\@
-
- // Now fold the 112 bytes in v0-v6 into the 16 bytes in v7.
-
- // Fold across 64 bytes.
- add fold_consts_ptr, fold_consts_ptr, #16
- ld1 {fold_consts.2d}, [fold_consts_ptr], #16
- fold_16_bytes \p, v0, v4
- fold_16_bytes \p, v1, v5
- fold_16_bytes \p, v2, v6
- fold_16_bytes \p, v3, v7, 1
- // Fold across 32 bytes.
- fold_16_bytes \p, v4, v6
- fold_16_bytes \p, v5, v7, 1
- // Fold across 16 bytes.
- fold_16_bytes \p, v6, v7
-
- // Add 128 to get the correct number of data bytes remaining in 0...127
- // (not counting v7), following the previous extra subtraction by 128.
- // Then subtract 16 to simplify the termination condition of the
- // following loop.
- adds len, len, #(128-16)
-
- // While >= 16 data bytes remain (not counting v7), fold the 16 bytes v7
- // into them, storing the result back into v7.
- b.lt .Lfold_16_bytes_loop_done_\@
-.Lfold_16_bytes_loop_\@:
- pmull16x64_\p fold_consts, v7, v8
- eor v7.16b, v7.16b, v8.16b
- ldr q0, [buf], #16
-CPU_LE( rev64 v0.16b, v0.16b )
-CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
- eor v7.16b, v7.16b, v0.16b
- subs len, len, #16
- b.ge .Lfold_16_bytes_loop_\@
-
-.Lfold_16_bytes_loop_done_\@:
- // Add 16 to get the correct number of data bytes remaining in 0...15
- // (not counting v7), following the previous extra subtraction by 16.
- adds len, len, #16
- b.eq .Lreduce_final_16_bytes_\@
-
-.Lhandle_partial_segment_\@:
- // Reduce the last '16 + len' bytes where 1 <= len <= 15 and the first
- // 16 bytes are in v7 and the rest are the remaining data in 'buf'. To
- // do this without needing a fold constant for each possible 'len',
- // redivide the bytes into a first chunk of 'len' bytes and a second
- // chunk of 16 bytes, then fold the first chunk into the second.
-
- // v0 = last 16 original data bytes
- add buf, buf, len
- ldr q0, [buf, #-16]
-CPU_LE( rev64 v0.16b, v0.16b )
-CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
-
- // v1 = high order part of second chunk: v7 left-shifted by 'len' bytes.
- adr_l x4, .Lbyteshift_table + 16
- sub x4, x4, len
- ld1 {v2.16b}, [x4]
- tbl v1.16b, {v7.16b}, v2.16b
-
- // v3 = first chunk: v7 right-shifted by '16-len' bytes.
- movi v3.16b, #0x80
- eor v2.16b, v2.16b, v3.16b
- tbl v3.16b, {v7.16b}, v2.16b
-
- // Convert to 8-bit masks: 'len' 0x00 bytes, then '16-len' 0xff bytes.
- sshr v2.16b, v2.16b, #7
-
- // v2 = second chunk: 'len' bytes from v0 (low-order bytes),
- // then '16-len' bytes from v1 (high-order bytes).
- bsl v2.16b, v1.16b, v0.16b
-
- // Fold the first chunk into the second chunk, storing the result in v7.
- pmull16x64_\p fold_consts, v3, v0
- eor v7.16b, v3.16b, v0.16b
- eor v7.16b, v7.16b, v2.16b
- b .Lreduce_final_16_bytes_\@
-
-.Lless_than_256_bytes_\@:
- // Checksumming a buffer of length 16...255 bytes
-
- adr_l fold_consts_ptr, .Lfold_across_16_bytes_consts
-
- // Load the first 16 data bytes.
- ldr q7, [buf], #0x10
-CPU_LE( rev64 v7.16b, v7.16b )
-CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
-
- // XOR the first 16 data *bits* with the initial CRC value.
- movi v0.16b, #0
- mov v0.h[7], init_crc
- eor v7.16b, v7.16b, v0.16b
-
- // Load the fold-across-16-bytes constants.
- ld1 {fold_consts.2d}, [fold_consts_ptr], #16
-
- cmp len, #16
- b.eq .Lreduce_final_16_bytes_\@ // len == 16
- subs len, len, #32
- b.ge .Lfold_16_bytes_loop_\@ // 32 <= len <= 255
- add len, len, #16
- b .Lhandle_partial_segment_\@ // 17 <= len <= 31
-
-.Lreduce_final_16_bytes_\@:
- .endm
-
-//
-// u16 crc_t10dif_pmull_p8(u16 init_crc, const u8 *buf, size_t len);
-//
-// Assumes len >= 16.
-//
-SYM_FUNC_START(crc_t10dif_pmull_p8)
- frame_push 1
-
- // Compose { 0,0,0,0, 8,8,8,8, 1,1,1,1, 9,9,9,9 }
- movi perm.4h, #8, lsl #8
- orr perm.2s, #1, lsl #16
- orr perm.2s, #1, lsl #24
- zip1 perm.16b, perm.16b, perm.16b
- zip1 perm.16b, perm.16b, perm.16b
-
- crc_t10dif_pmull p8
-
-CPU_LE( rev64 v7.16b, v7.16b )
-CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
- str q7, [x3]
-
- frame_pop
- ret
-SYM_FUNC_END(crc_t10dif_pmull_p8)
-
- .align 5
-//
-// u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 *buf, size_t len);
-//
-// Assumes len >= 16.
-//
-SYM_FUNC_START(crc_t10dif_pmull_p64)
- crc_t10dif_pmull p64
-
- // Reduce the 128-bit value M(x), stored in v7, to the final 16-bit CRC.
-
- movi v2.16b, #0 // init zero register
-
- // Load 'x^48 * (x^48 mod G(x))' and 'x^48 * (x^80 mod G(x))'.
- ld1 {fold_consts.2d}, [fold_consts_ptr], #16
-
- // Fold the high 64 bits into the low 64 bits, while also multiplying by
- // x^64. This produces a 128-bit value congruent to x^64 * M(x) and
- // whose low 48 bits are 0.
- ext v0.16b, v2.16b, v7.16b, #8
- pmull2 v7.1q, v7.2d, fold_consts.2d // high bits * x^48 * (x^80 mod G(x))
- eor v0.16b, v0.16b, v7.16b // + low bits * x^64
-
- // Fold the high 32 bits into the low 96 bits. This produces a 96-bit
- // value congruent to x^64 * M(x) and whose low 48 bits are 0.
- ext v1.16b, v0.16b, v2.16b, #12 // extract high 32 bits
- mov v0.s[3], v2.s[0] // zero high 32 bits
- pmull v1.1q, v1.1d, fold_consts.1d // high 32 bits * x^48 * (x^48 mod G(x))
- eor v0.16b, v0.16b, v1.16b // + low bits
-
- // Load G(x) and floor(x^48 / G(x)).
- ld1 {fold_consts.2d}, [fold_consts_ptr]
-
- // Use Barrett reduction to compute the final CRC value.
- pmull2 v1.1q, v0.2d, fold_consts.2d // high 32 bits * floor(x^48 / G(x))
- ushr v1.2d, v1.2d, #32 // /= x^32
- pmull v1.1q, v1.1d, fold_consts.1d // *= G(x)
- ushr v0.2d, v0.2d, #48
- eor v0.16b, v0.16b, v1.16b // + low 16 nonzero bits
- // Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of v0.
-
- umov w0, v0.h[0]
- ret
-SYM_FUNC_END(crc_t10dif_pmull_p64)
-
- .section ".rodata", "a"
- .align 4
-
-// Fold constants precomputed from the polynomial 0x18bb7
-// G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0
-.Lfold_across_128_bytes_consts:
- .quad 0x0000000000006123 // x^(8*128) mod G(x)
- .quad 0x0000000000002295 // x^(8*128+64) mod G(x)
-// .Lfold_across_64_bytes_consts:
- .quad 0x0000000000001069 // x^(4*128) mod G(x)
- .quad 0x000000000000dd31 // x^(4*128+64) mod G(x)
-// .Lfold_across_32_bytes_consts:
- .quad 0x000000000000857d // x^(2*128) mod G(x)
- .quad 0x0000000000007acc // x^(2*128+64) mod G(x)
-.Lfold_across_16_bytes_consts:
- .quad 0x000000000000a010 // x^(1*128) mod G(x)
- .quad 0x0000000000001faa // x^(1*128+64) mod G(x)
-// .Lfinal_fold_consts:
- .quad 0x1368000000000000 // x^48 * (x^48 mod G(x))
- .quad 0x2d56000000000000 // x^48 * (x^80 mod G(x))
-// .Lbarrett_reduction_consts:
- .quad 0x0000000000018bb7 // G(x)
- .quad 0x00000001f65a57f8 // floor(x^48 / G(x))
-
-// For 1 <= len <= 15, the 16-byte vector beginning at &byteshift_table[16 -
-// len] is the index vector to shift left by 'len' bytes, and is also {0x80,
-// ..., 0x80} XOR the index vector to shift right by '16 - len' bytes.
-.Lbyteshift_table:
- .byte 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
- .byte 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f
- .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
- .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe , 0x0
diff --git a/arch/arm64/lib/crc-t10dif.c b/arch/arm64/lib/crc-t10dif.c
deleted file mode 100644
index c2ffe4fdb59d14..00000000000000
--- a/arch/arm64/lib/crc-t10dif.c
+++ /dev/null
@@ -1,73 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions
- *
- * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/cpufeature.h>
-#include <linux/crc-t10dif.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-
-#include <crypto/internal/simd.h>
-
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_asimd);
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pmull);
-
-#define CRC_T10DIF_PMULL_CHUNK_SIZE 16U
-
-asmlinkage void crc_t10dif_pmull_p8(u16 init_crc, const u8 *buf, size_t len,
- u8 out[16]);
-asmlinkage u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 *buf, size_t len);
-
-u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length)
-{
- if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) {
- if (static_branch_likely(&have_pmull)) {
- if (crypto_simd_usable()) {
- kernel_neon_begin();
- crc = crc_t10dif_pmull_p64(crc, data, length);
- kernel_neon_end();
- return crc;
- }
- } else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
- static_branch_likely(&have_asimd) &&
- crypto_simd_usable()) {
- u8 buf[16];
-
- kernel_neon_begin();
- crc_t10dif_pmull_p8(crc, data, length, buf);
- kernel_neon_end();
-
- return crc_t10dif_generic(0, buf, sizeof(buf));
- }
- }
- return crc_t10dif_generic(crc, data, length);
-}
-EXPORT_SYMBOL(crc_t10dif_arch);
-
-static int __init crc_t10dif_arm64_init(void)
-{
- if (cpu_have_named_feature(ASIMD)) {
- static_branch_enable(&have_asimd);
- if (cpu_have_named_feature(PMULL))
- static_branch_enable(&have_pmull);
- }
- return 0;
-}
-subsys_initcall(crc_t10dif_arm64_init);
-
-static void __exit crc_t10dif_arm64_exit(void)
-{
-}
-module_exit(crc_t10dif_arm64_exit);
-
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_DESCRIPTION("CRC-T10DIF using arm64 NEON and Crypto Extensions");
-MODULE_LICENSE("GPL v2");
diff --git a/arch/arm64/lib/crc32-core.S b/arch/arm64/lib/crc32-core.S
deleted file mode 100644
index 68825317460fc6..00000000000000
--- a/arch/arm64/lib/crc32-core.S
+++ /dev/null
@@ -1,362 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Accelerated CRC32(C) using AArch64 CRC and PMULL instructions
- *
- * Copyright (C) 2016 - 2018 Linaro Ltd.
- * Copyright (C) 2024 Google LLC
- *
- * Author: Ard Biesheuvel <ardb@kernel.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
- .cpu generic+crc+crypto
-
- .macro bitle, reg
- .endm
-
- .macro bitbe, reg
- rbit \reg, \reg
- .endm
-
- .macro bytele, reg
- .endm
-
- .macro bytebe, reg
- rbit \reg, \reg
- lsr \reg, \reg, #24
- .endm
-
- .macro hwordle, reg
-CPU_BE( rev16 \reg, \reg )
- .endm
-
- .macro hwordbe, reg
-CPU_LE( rev \reg, \reg )
- rbit \reg, \reg
-CPU_BE( lsr \reg, \reg, #16 )
- .endm
-
- .macro le, regs:vararg
- .irp r, \regs
-CPU_BE( rev \r, \r )
- .endr
- .endm
-
- .macro be, regs:vararg
- .irp r, \regs
-CPU_LE( rev \r, \r )
- .endr
- .irp r, \regs
- rbit \r, \r
- .endr
- .endm
-
- .macro __crc32, c, order=le
- bit\order w0
- cmp x2, #16
- b.lt 8f // less than 16 bytes
-
- and x7, x2, #0x1f
- and x2, x2, #~0x1f
- cbz x7, 32f // multiple of 32 bytes
-
- and x8, x7, #0xf
- ldp x3, x4, [x1]
- add x8, x8, x1
- add x1, x1, x7
- ldp x5, x6, [x8]
- \order x3, x4, x5, x6
-
- tst x7, #8
- crc32\c\()x w8, w0, x3
- csel x3, x3, x4, eq
- csel w0, w0, w8, eq
- tst x7, #4
- lsr x4, x3, #32
- crc32\c\()w w8, w0, w3
- csel x3, x3, x4, eq
- csel w0, w0, w8, eq
- tst x7, #2
- lsr w4, w3, #16
- crc32\c\()h w8, w0, w3
- csel w3, w3, w4, eq
- csel w0, w0, w8, eq
- tst x7, #1
- crc32\c\()b w8, w0, w3
- csel w0, w0, w8, eq
- tst x7, #16
- crc32\c\()x w8, w0, x5
- crc32\c\()x w8, w8, x6
- csel w0, w0, w8, eq
- cbz x2, 0f
-
-32: ldp x3, x4, [x1], #32
- sub x2, x2, #32
- ldp x5, x6, [x1, #-16]
- \order x3, x4, x5, x6
- crc32\c\()x w0, w0, x3
- crc32\c\()x w0, w0, x4
- crc32\c\()x w0, w0, x5
- crc32\c\()x w0, w0, x6
- cbnz x2, 32b
-0: bit\order w0
- ret
-
-8: tbz x2, #3, 4f
- ldr x3, [x1], #8
- \order x3
- crc32\c\()x w0, w0, x3
-4: tbz x2, #2, 2f
- ldr w3, [x1], #4
- \order w3
- crc32\c\()w w0, w0, w3
-2: tbz x2, #1, 1f
- ldrh w3, [x1], #2
- hword\order w3
- crc32\c\()h w0, w0, w3
-1: tbz x2, #0, 0f
- ldrb w3, [x1]
- byte\order w3
- crc32\c\()b w0, w0, w3
-0: bit\order w0
- ret
- .endm
-
- .align 5
-SYM_FUNC_START(crc32_le_arm64)
- __crc32
-SYM_FUNC_END(crc32_le_arm64)
-
- .align 5
-SYM_FUNC_START(crc32c_le_arm64)
- __crc32 c
-SYM_FUNC_END(crc32c_le_arm64)
-
- .align 5
-SYM_FUNC_START(crc32_be_arm64)
- __crc32 order=be
-SYM_FUNC_END(crc32_be_arm64)
-
- in .req x1
- len .req x2
-
- /*
- * w0: input CRC at entry, output CRC at exit
- * x1: pointer to input buffer
- * x2: length of input in bytes
- */
- .macro crc4way, insn, table, order=le
- bit\order w0
- lsr len, len, #6 // len := # of 64-byte blocks
-
- /* Process up to 64 blocks of 64 bytes at a time */
-.La\@: mov x3, #64
- cmp len, #64
- csel x3, x3, len, hi // x3 := min(len, 64)
- sub len, len, x3
-
- /* Divide the input into 4 contiguous blocks */
- add x4, x3, x3, lsl #1 // x4 := 3 * x3
- add x7, in, x3, lsl #4 // x7 := in + 16 * x3
- add x8, in, x3, lsl #5 // x8 := in + 32 * x3
- add x9, in, x4, lsl #4 // x9 := in + 16 * x4
-
- /* Load the folding coefficients from the lookup table */
- adr_l x5, \table - 12 // entry 0 omitted
- add x5, x5, x4, lsl #2 // x5 += 12 * x3
- ldp s0, s1, [x5]
- ldr s2, [x5, #8]
-
- /* Zero init partial CRCs for this iteration */
- mov w4, wzr
- mov w5, wzr
- mov w6, wzr
- mov x17, xzr
-
-.Lb\@: sub x3, x3, #1
- \insn w6, w6, x17
- ldp x10, x11, [in], #16
- ldp x12, x13, [x7], #16
- ldp x14, x15, [x8], #16
- ldp x16, x17, [x9], #16
-
- \order x10, x11, x12, x13, x14, x15, x16, x17
-
- /* Apply the CRC transform to 4 16-byte blocks in parallel */
- \insn w0, w0, x10
- \insn w4, w4, x12
- \insn w5, w5, x14
- \insn w6, w6, x16
- \insn w0, w0, x11
- \insn w4, w4, x13
- \insn w5, w5, x15
- cbnz x3, .Lb\@
-
- /* Combine the 4 partial results into w0 */
- mov v3.d[0], x0
- mov v4.d[0], x4
- mov v5.d[0], x5
- pmull v0.1q, v0.1d, v3.1d
- pmull v1.1q, v1.1d, v4.1d
- pmull v2.1q, v2.1d, v5.1d
- eor v0.8b, v0.8b, v1.8b
- eor v0.8b, v0.8b, v2.8b
- mov x5, v0.d[0]
- eor x5, x5, x17
- \insn w0, w6, x5
-
- mov in, x9
- cbnz len, .La\@
-
- bit\order w0
- ret
- .endm
-
- .align 5
-SYM_FUNC_START(crc32c_le_arm64_4way)
- crc4way crc32cx, .L0
-SYM_FUNC_END(crc32c_le_arm64_4way)
-
- .align 5
-SYM_FUNC_START(crc32_le_arm64_4way)
- crc4way crc32x, .L1
-SYM_FUNC_END(crc32_le_arm64_4way)
-
- .align 5
-SYM_FUNC_START(crc32_be_arm64_4way)
- crc4way crc32x, .L1, be
-SYM_FUNC_END(crc32_be_arm64_4way)
-
- .section .rodata, "a", %progbits
- .align 6
-.L0: .long 0xddc0152b, 0xba4fc28e, 0x493c7d27
- .long 0x0715ce53, 0x9e4addf8, 0xba4fc28e
- .long 0xc96cfdc0, 0x0715ce53, 0xddc0152b
- .long 0xab7aff2a, 0x0d3b6092, 0x9e4addf8
- .long 0x299847d5, 0x878a92a7, 0x39d3b296
- .long 0xb6dd949b, 0xab7aff2a, 0x0715ce53
- .long 0xa60ce07b, 0x83348832, 0x47db8317
- .long 0xd270f1a2, 0xb9e02b86, 0x0d3b6092
- .long 0x65863b64, 0xb6dd949b, 0xc96cfdc0
- .long 0xb3e32c28, 0xbac2fd7b, 0x878a92a7
- .long 0xf285651c, 0xce7f39f4, 0xdaece73e
- .long 0x271d9844, 0xd270f1a2, 0xab7aff2a
- .long 0x6cb08e5c, 0x2b3cac5d, 0x2162d385
- .long 0xcec3662e, 0x1b03397f, 0x83348832
- .long 0x8227bb8a, 0xb3e32c28, 0x299847d5
- .long 0xd7a4825c, 0xdd7e3b0c, 0xb9e02b86
- .long 0xf6076544, 0x10746f3c, 0x18b33a4e
- .long 0x98d8d9cb, 0x271d9844, 0xb6dd949b
- .long 0x57a3d037, 0x93a5f730, 0x78d9ccb7
- .long 0x3771e98f, 0x6b749fb2, 0xbac2fd7b
- .long 0xe0ac139e, 0xcec3662e, 0xa60ce07b
- .long 0x6f345e45, 0xe6fc4e6a, 0xce7f39f4
- .long 0xa2b73df1, 0xb0cd4768, 0x61d82e56
- .long 0x86d8e4d2, 0xd7a4825c, 0xd270f1a2
- .long 0xa90fd27a, 0x0167d312, 0xc619809d
- .long 0xca6ef3ac, 0x26f6a60a, 0x2b3cac5d
- .long 0x4597456a, 0x98d8d9cb, 0x65863b64
- .long 0xc9c8b782, 0x68bce87a, 0x1b03397f
- .long 0x62ec6c6d, 0x6956fc3b, 0xebb883bd
- .long 0x2342001e, 0x3771e98f, 0xb3e32c28
- .long 0xe8b6368b, 0x2178513a, 0x064f7f26
- .long 0x9ef68d35, 0x170076fa, 0xdd7e3b0c
- .long 0x0b0bf8ca, 0x6f345e45, 0xf285651c
- .long 0x02ee03b2, 0xff0dba97, 0x10746f3c
- .long 0x135c83fd, 0xf872e54c, 0xc7a68855
- .long 0x00bcf5f6, 0x86d8e4d2, 0x271d9844
- .long 0x58ca5f00, 0x5bb8f1bc, 0x8e766a0c
- .long 0xded288f8, 0xb3af077a, 0x93a5f730
- .long 0x37170390, 0xca6ef3ac, 0x6cb08e5c
- .long 0xf48642e9, 0xdd66cbbb, 0x6b749fb2
- .long 0xb25b29f2, 0xe9e28eb4, 0x1393e203
- .long 0x45cddf4e, 0xc9c8b782, 0xcec3662e
- .long 0xdfd94fb2, 0x93e106a4, 0x96c515bb
- .long 0x021ac5ef, 0xd813b325, 0xe6fc4e6a
- .long 0x8e1450f7, 0x2342001e, 0x8227bb8a
- .long 0xe0cdcf86, 0x6d9a4957, 0xb0cd4768
- .long 0x613eee91, 0xd2c3ed1a, 0x39c7ff35
- .long 0xbedc6ba1, 0x9ef68d35, 0xd7a4825c
- .long 0x0cd1526a, 0xf2271e60, 0x0ab3844b
- .long 0xd6c3a807, 0x2664fd8b, 0x0167d312
- .long 0x1d31175f, 0x02ee03b2, 0xf6076544
- .long 0x4be7fd90, 0x363bd6b3, 0x26f6a60a
- .long 0x6eeed1c9, 0x5fabe670, 0xa741c1bf
- .long 0xb3a6da94, 0x00bcf5f6, 0x98d8d9cb
- .long 0x2e7d11a7, 0x17f27698, 0x49c3cc9c
- .long 0x889774e1, 0xaa7c7ad5, 0x68bce87a
- .long 0x8a074012, 0xded288f8, 0x57a3d037
- .long 0xbd0bb25f, 0x6d390dec, 0x6956fc3b
- .long 0x3be3c09b, 0x6353c1cc, 0x42d98888
- .long 0x465a4eee, 0xf48642e9, 0x3771e98f
- .long 0x2e5f3c8c, 0xdd35bc8d, 0xb42ae3d9
- .long 0xa52f58ec, 0x9a5ede41, 0x2178513a
- .long 0x47972100, 0x45cddf4e, 0xe0ac139e
- .long 0x359674f7, 0xa51b6135, 0x170076fa
-
-.L1: .long 0xaf449247, 0x81256527, 0xccaa009e
- .long 0x57c54819, 0x1d9513d7, 0x81256527
- .long 0x3f41287a, 0x57c54819, 0xaf449247
- .long 0xf5e48c85, 0x910eeec1, 0x1d9513d7
- .long 0x1f0c2cdd, 0x9026d5b1, 0xae0b5394
- .long 0x71d54a59, 0xf5e48c85, 0x57c54819
- .long 0x1c63267b, 0xfe807bbd, 0x0cbec0ed
- .long 0xd31343ea, 0xe95c1271, 0x910eeec1
- .long 0xf9d9c7ee, 0x71d54a59, 0x3f41287a
- .long 0x9ee62949, 0xcec97417, 0x9026d5b1
- .long 0xa55d1514, 0xf183c71b, 0xd1df2327
- .long 0x21aa2b26, 0xd31343ea, 0xf5e48c85
- .long 0x9d842b80, 0xeea395c4, 0x3c656ced
- .long 0xd8110ff1, 0xcd669a40, 0xfe807bbd
- .long 0x3f9e9356, 0x9ee62949, 0x1f0c2cdd
- .long 0x1d6708a0, 0x0c30f51d, 0xe95c1271
- .long 0xef82aa68, 0xdb3935ea, 0xb918a347
- .long 0xd14bcc9b, 0x21aa2b26, 0x71d54a59
- .long 0x99cce860, 0x356d209f, 0xff6f2fc2
- .long 0xd8af8e46, 0xc352f6de, 0xcec97417
- .long 0xf1996890, 0xd8110ff1, 0x1c63267b
- .long 0x631bc508, 0xe95c7216, 0xf183c71b
- .long 0x8511c306, 0x8e031a19, 0x9b9bdbd0
- .long 0xdb3839f3, 0x1d6708a0, 0xd31343ea
- .long 0x7a92fffb, 0xf7003835, 0x4470ac44
- .long 0x6ce68f2a, 0x00eba0c8, 0xeea395c4
- .long 0x4caaa263, 0xd14bcc9b, 0xf9d9c7ee
- .long 0xb46f7cff, 0x9a1b53c8, 0xcd669a40
- .long 0x60290934, 0x81b6f443, 0x6d40f445
- .long 0x8e976a7d, 0xd8af8e46, 0x9ee62949
- .long 0xdcf5088a, 0x9dbdc100, 0x145575d5
- .long 0x1753ab84, 0xbbf2f6d6, 0x0c30f51d
- .long 0x255b139e, 0x631bc508, 0xa55d1514
- .long 0xd784eaa8, 0xce26786c, 0xdb3935ea
- .long 0x6d2c864a, 0x8068c345, 0x2586d334
- .long 0x02072e24, 0xdb3839f3, 0x21aa2b26
- .long 0x06689b0a, 0x5efd72f5, 0xe0575528
- .long 0x1e52f5ea, 0x4117915b, 0x356d209f
- .long 0x1d3d1db6, 0x6ce68f2a, 0x9d842b80
- .long 0x3796455c, 0xb8e0e4a8, 0xc352f6de
- .long 0xdf3a4eb3, 0xc55a2330, 0xb84ffa9c
- .long 0x28ae0976, 0xb46f7cff, 0xd8110ff1
- .long 0x9764bc8d, 0xd7e7a22c, 0x712510f0
- .long 0x13a13e18, 0x3e9a43cd, 0xe95c7216
- .long 0xb8ee242e, 0x8e976a7d, 0x3f9e9356
- .long 0x0c540e7b, 0x753c81ff, 0x8e031a19
- .long 0x9924c781, 0xb9220208, 0x3edcde65
- .long 0x3954de39, 0x1753ab84, 0x1d6708a0
- .long 0xf32238b5, 0xbec81497, 0x9e70b943
- .long 0xbbd2cd2c, 0x0925d861, 0xf7003835
- .long 0xcc401304, 0xd784eaa8, 0xef82aa68
- .long 0x4987e684, 0x6044fbb0, 0x00eba0c8
- .long 0x3aa11427, 0x18fe3b4a, 0x87441142
- .long 0x297aad60, 0x02072e24, 0xd14bcc9b
- .long 0xf60c5e51, 0x6ef6f487, 0x5b7fdd0a
- .long 0x632d78c5, 0x3fc33de4, 0x9a1b53c8
- .long 0x25b8822a, 0x1e52f5ea, 0x99cce860
- .long 0xd4fc84bc, 0x1af62fb8, 0x81b6f443
- .long 0x5690aa32, 0xa91fdefb, 0x688a110e
- .long 0x1357a093, 0x3796455c, 0xd8af8e46
- .long 0x798fdd33, 0xaaa18a37, 0x357b9517
- .long 0xc2815395, 0x54d42691, 0x9dbdc100
- .long 0x21cfc0f7, 0x28ae0976, 0xf1996890
- .long 0xa0decef3, 0x7b4aa8b7, 0xbbf2f6d6
diff --git a/arch/arm64/lib/crc32.c b/arch/arm64/lib/crc32.c
deleted file mode 100644
index ed3acd71178f8d..00000000000000
--- a/arch/arm64/lib/crc32.c
+++ /dev/null
@@ -1,99 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-
-#include <linux/crc32.h>
-#include <linux/linkage.h>
-#include <linux/module.h>
-
-#include <asm/alternative.h>
-#include <asm/cpufeature.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-#include <crypto/internal/simd.h>
-
-// The minimum input length to consider the 4-way interleaved code path
-static const size_t min_len = 1024;
-
-asmlinkage u32 crc32_le_arm64(u32 crc, unsigned char const *p, size_t len);
-asmlinkage u32 crc32c_le_arm64(u32 crc, unsigned char const *p, size_t len);
-asmlinkage u32 crc32_be_arm64(u32 crc, unsigned char const *p, size_t len);
-
-asmlinkage u32 crc32_le_arm64_4way(u32 crc, unsigned char const *p, size_t len);
-asmlinkage u32 crc32c_le_arm64_4way(u32 crc, unsigned char const *p, size_t len);
-asmlinkage u32 crc32_be_arm64_4way(u32 crc, unsigned char const *p, size_t len);
-
-u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
-{
- if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
- return crc32_le_base(crc, p, len);
-
- if (len >= min_len && cpu_have_named_feature(PMULL) && crypto_simd_usable()) {
- kernel_neon_begin();
- crc = crc32_le_arm64_4way(crc, p, len);
- kernel_neon_end();
-
- p += round_down(len, 64);
- len %= 64;
-
- if (!len)
- return crc;
- }
-
- return crc32_le_arm64(crc, p, len);
-}
-EXPORT_SYMBOL(crc32_le_arch);
-
-u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
-{
- if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
- return crc32c_base(crc, p, len);
-
- if (len >= min_len && cpu_have_named_feature(PMULL) && crypto_simd_usable()) {
- kernel_neon_begin();
- crc = crc32c_le_arm64_4way(crc, p, len);
- kernel_neon_end();
-
- p += round_down(len, 64);
- len %= 64;
-
- if (!len)
- return crc;
- }
-
- return crc32c_le_arm64(crc, p, len);
-}
-EXPORT_SYMBOL(crc32c_arch);
-
-u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
-{
- if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
- return crc32_be_base(crc, p, len);
-
- if (len >= min_len && cpu_have_named_feature(PMULL) && crypto_simd_usable()) {
- kernel_neon_begin();
- crc = crc32_be_arm64_4way(crc, p, len);
- kernel_neon_end();
-
- p += round_down(len, 64);
- len %= 64;
-
- if (!len)
- return crc;
- }
-
- return crc32_be_arm64(crc, p, len);
-}
-EXPORT_SYMBOL(crc32_be_arch);
-
-u32 crc32_optimizations(void)
-{
- if (alternative_has_cap_likely(ARM64_HAS_CRC32))
- return CRC32_LE_OPTIMIZATION |
- CRC32_BE_OPTIMIZATION |
- CRC32C_OPTIMIZATION;
- return 0;
-}
-EXPORT_SYMBOL(crc32_optimizations);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("arm64-optimized CRC32 functions");
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 1514789bea4a19..fa3eb87f0999d6 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -15,7 +15,6 @@ config LOONGARCH
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_CPU_FINALIZE_INIT
- select ARCH_HAS_CRC32
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_FAST_MULTIPLIER
diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile
index fae77809048b80..ccea3bbd435313 100644
--- a/arch/loongarch/lib/Makefile
+++ b/arch/loongarch/lib/Makefile
@@ -11,5 +11,3 @@ obj-$(CONFIG_ARCH_SUPPORTS_INT128) += tishift.o
obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
-
-obj-$(CONFIG_CRC32_ARCH) += crc32-loongarch.o
diff --git a/arch/loongarch/lib/crc32-loongarch.c b/arch/loongarch/lib/crc32-loongarch.c
deleted file mode 100644
index db22c2ec55e2b2..00000000000000
--- a/arch/loongarch/lib/crc32-loongarch.c
+++ /dev/null
@@ -1,136 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * CRC32 and CRC32C using LoongArch crc* instructions
- *
- * Module based on mips/crypto/crc32-mips.c
- *
- * Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org>
- * Copyright (C) 2018 MIPS Tech, LLC
- * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
- */
-
-#include <asm/cpu-features.h>
-#include <linux/crc32.h>
-#include <linux/export.h>
-#include <linux/module.h>
-#include <linux/unaligned.h>
-
-#define _CRC32(crc, value, size, type) \
-do { \
- __asm__ __volatile__( \
- #type ".w." #size ".w" " %0, %1, %0\n\t"\
- : "+r" (crc) \
- : "r" (value) \
- : "memory"); \
-} while (0)
-
-#define CRC32(crc, value, size) _CRC32(crc, value, size, crc)
-#define CRC32C(crc, value, size) _CRC32(crc, value, size, crcc)
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32);
-
-u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
-{
- if (!static_branch_likely(&have_crc32))
- return crc32_le_base(crc, p, len);
-
- while (len >= sizeof(u64)) {
- u64 value = get_unaligned_le64(p);
-
- CRC32(crc, value, d);
- p += sizeof(u64);
- len -= sizeof(u64);
- }
-
- if (len & sizeof(u32)) {
- u32 value = get_unaligned_le32(p);
-
- CRC32(crc, value, w);
- p += sizeof(u32);
- }
-
- if (len & sizeof(u16)) {
- u16 value = get_unaligned_le16(p);
-
- CRC32(crc, value, h);
- p += sizeof(u16);
- }
-
- if (len & sizeof(u8)) {
- u8 value = *p++;
-
- CRC32(crc, value, b);
- }
-
- return crc;
-}
-EXPORT_SYMBOL(crc32_le_arch);
-
-u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
-{
- if (!static_branch_likely(&have_crc32))
- return crc32c_base(crc, p, len);
-
- while (len >= sizeof(u64)) {
- u64 value = get_unaligned_le64(p);
-
- CRC32C(crc, value, d);
- p += sizeof(u64);
- len -= sizeof(u64);
- }
-
- if (len & sizeof(u32)) {
- u32 value = get_unaligned_le32(p);
-
- CRC32C(crc, value, w);
- p += sizeof(u32);
- }
-
- if (len & sizeof(u16)) {
- u16 value = get_unaligned_le16(p);
-
- CRC32C(crc, value, h);
- p += sizeof(u16);
- }
-
- if (len & sizeof(u8)) {
- u8 value = *p++;
-
- CRC32C(crc, value, b);
- }
-
- return crc;
-}
-EXPORT_SYMBOL(crc32c_arch);
-
-u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
-{
- return crc32_be_base(crc, p, len);
-}
-EXPORT_SYMBOL(crc32_be_arch);
-
-static int __init crc32_loongarch_init(void)
-{
- if (cpu_has_crc32)
- static_branch_enable(&have_crc32);
- return 0;
-}
-subsys_initcall(crc32_loongarch_init);
-
-static void __exit crc32_loongarch_exit(void)
-{
-}
-module_exit(crc32_loongarch_exit);
-
-u32 crc32_optimizations(void)
-{
- if (static_key_enabled(&have_crc32))
- return CRC32_LE_OPTIMIZATION | CRC32C_OPTIMIZATION;
- return 0;
-}
-EXPORT_SYMBOL(crc32_optimizations);
-
-MODULE_AUTHOR("Min Zhou <zhoumin@loongson.cn>");
-MODULE_AUTHOR("Huacai Chen <chenhuacai@loongson.cn>");
-MODULE_DESCRIPTION("CRC32 and CRC32C using LoongArch crc* instructions");
-MODULE_LICENSE("GPL v2");
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 1e48184ecf1ec8..934eb961bd0dd3 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2024,7 +2024,6 @@ config CPU_MIPSR5
config CPU_MIPSR6
bool
default y if CPU_MIPS32_R6 || CPU_MIPS64_R6
- select ARCH_HAS_CRC32
select CPU_HAS_RIXI
select CPU_HAS_DIEI if !CPU_DIEI_BROKEN
select HAVE_ARCH_BITREVERSE
diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile
index 9d75845ef78e18..8c40ffb09c4200 100644
--- a/arch/mips/lib/Makefile
+++ b/arch/mips/lib/Makefile
@@ -16,7 +16,5 @@ lib-$(CONFIG_GENERIC_CSUM) := $(filter-out csum_partial.o, $(lib-y))
obj-$(CONFIG_CPU_GENERIC_DUMP_TLB) += dump_tlb.o
obj-$(CONFIG_CPU_R3000) += r3k_dump_tlb.o
-obj-$(CONFIG_CRC32_ARCH) += crc32-mips.o
-
# libgcc-style stuff needed in the kernel
obj-y += bswapsi.o bswapdi.o multi3.o
diff --git a/arch/mips/lib/crc32-mips.c b/arch/mips/lib/crc32-mips.c
deleted file mode 100644
index 45e4d2c9fbf549..00000000000000
--- a/arch/mips/lib/crc32-mips.c
+++ /dev/null
@@ -1,183 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * crc32-mips.c - CRC32 and CRC32C using optional MIPSr6 instructions
- *
- * Module based on arm64/crypto/crc32-arm.c
- *
- * Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org>
- * Copyright (C) 2018 MIPS Tech, LLC
- */
-
-#include <linux/cpufeature.h>
-#include <linux/crc32.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <asm/mipsregs.h>
-#include <linux/unaligned.h>
-
-#ifndef TOOLCHAIN_SUPPORTS_CRC
-#define _ASM_SET_CRC(OP, SZ, TYPE) \
-_ASM_MACRO_3R(OP, rt, rs, rt2, \
- ".ifnc \\rt, \\rt2\n\t" \
- ".error \"invalid operands \\\"" #OP " \\rt,\\rs,\\rt2\\\"\"\n\t" \
- ".endif\n\t" \
- _ASM_INSN_IF_MIPS(0x7c00000f | (__rt << 16) | (__rs << 21) | \
- ((SZ) << 6) | ((TYPE) << 8)) \
- _ASM_INSN32_IF_MM(0x00000030 | (__rs << 16) | (__rt << 21) | \
- ((SZ) << 14) | ((TYPE) << 3)))
-#define _ASM_UNSET_CRC(op, SZ, TYPE) ".purgem " #op "\n\t"
-#else /* !TOOLCHAIN_SUPPORTS_CRC */
-#define _ASM_SET_CRC(op, SZ, TYPE) ".set\tcrc\n\t"
-#define _ASM_UNSET_CRC(op, SZ, TYPE)
-#endif
-
-#define __CRC32(crc, value, op, SZ, TYPE) \
-do { \
- __asm__ __volatile__( \
- ".set push\n\t" \
- _ASM_SET_CRC(op, SZ, TYPE) \
- #op " %0, %1, %0\n\t" \
- _ASM_UNSET_CRC(op, SZ, TYPE) \
- ".set pop" \
- : "+r" (crc) \
- : "r" (value)); \
-} while (0)
-
-#define _CRC32_crc32b(crc, value) __CRC32(crc, value, crc32b, 0, 0)
-#define _CRC32_crc32h(crc, value) __CRC32(crc, value, crc32h, 1, 0)
-#define _CRC32_crc32w(crc, value) __CRC32(crc, value, crc32w, 2, 0)
-#define _CRC32_crc32d(crc, value) __CRC32(crc, value, crc32d, 3, 0)
-#define _CRC32_crc32cb(crc, value) __CRC32(crc, value, crc32cb, 0, 1)
-#define _CRC32_crc32ch(crc, value) __CRC32(crc, value, crc32ch, 1, 1)
-#define _CRC32_crc32cw(crc, value) __CRC32(crc, value, crc32cw, 2, 1)
-#define _CRC32_crc32cd(crc, value) __CRC32(crc, value, crc32cd, 3, 1)
-
-#define _CRC32(crc, value, size, op) \
- _CRC32_##op##size(crc, value)
-
-#define CRC32(crc, value, size) \
- _CRC32(crc, value, size, crc32)
-
-#define CRC32C(crc, value, size) \
- _CRC32(crc, value, size, crc32c)
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32);
-
-u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
-{
- if (!static_branch_likely(&have_crc32))
- return crc32_le_base(crc, p, len);
-
- if (IS_ENABLED(CONFIG_64BIT)) {
- for (; len >= sizeof(u64); p += sizeof(u64), len -= sizeof(u64)) {
- u64 value = get_unaligned_le64(p);
-
- CRC32(crc, value, d);
- }
-
- if (len & sizeof(u32)) {
- u32 value = get_unaligned_le32(p);
-
- CRC32(crc, value, w);
- p += sizeof(u32);
- }
- } else {
- for (; len >= sizeof(u32); len -= sizeof(u32)) {
- u32 value = get_unaligned_le32(p);
-
- CRC32(crc, value, w);
- p += sizeof(u32);
- }
- }
-
- if (len & sizeof(u16)) {
- u16 value = get_unaligned_le16(p);
-
- CRC32(crc, value, h);
- p += sizeof(u16);
- }
-
- if (len & sizeof(u8)) {
- u8 value = *p++;
-
- CRC32(crc, value, b);
- }
-
- return crc;
-}
-EXPORT_SYMBOL(crc32_le_arch);
-
-u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
-{
- if (!static_branch_likely(&have_crc32))
- return crc32c_base(crc, p, len);
-
- if (IS_ENABLED(CONFIG_64BIT)) {
- for (; len >= sizeof(u64); p += sizeof(u64), len -= sizeof(u64)) {
- u64 value = get_unaligned_le64(p);
-
- CRC32C(crc, value, d);
- }
-
- if (len & sizeof(u32)) {
- u32 value = get_unaligned_le32(p);
-
- CRC32C(crc, value, w);
- p += sizeof(u32);
- }
- } else {
- for (; len >= sizeof(u32); len -= sizeof(u32)) {
- u32 value = get_unaligned_le32(p);
-
- CRC32C(crc, value, w);
- p += sizeof(u32);
- }
- }
-
- if (len & sizeof(u16)) {
- u16 value = get_unaligned_le16(p);
-
- CRC32C(crc, value, h);
- p += sizeof(u16);
- }
-
- if (len & sizeof(u8)) {
- u8 value = *p++;
-
- CRC32C(crc, value, b);
- }
- return crc;
-}
-EXPORT_SYMBOL(crc32c_arch);
-
-u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
-{
- return crc32_be_base(crc, p, len);
-}
-EXPORT_SYMBOL(crc32_be_arch);
-
-static int __init crc32_mips_init(void)
-{
- if (cpu_have_feature(cpu_feature(MIPS_CRC32)))
- static_branch_enable(&have_crc32);
- return 0;
-}
-subsys_initcall(crc32_mips_init);
-
-static void __exit crc32_mips_exit(void)
-{
-}
-module_exit(crc32_mips_exit);
-
-u32 crc32_optimizations(void)
-{
- if (static_key_enabled(&have_crc32))
- return CRC32_LE_OPTIMIZATION | CRC32C_OPTIMIZATION;
- return 0;
-}
-EXPORT_SYMBOL(crc32_optimizations);
-
-MODULE_AUTHOR("Marcin Nowakowski <marcin.nowakowski@mips.com");
-MODULE_DESCRIPTION("CRC32 and CRC32C using optional MIPS instructions");
-MODULE_LICENSE("GPL v2");
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c3e0cc83f1205e..45b4fa7b9b02fd 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -127,8 +127,6 @@ config PPC
select ARCH_ENABLE_MEMORY_HOTPLUG
select ARCH_ENABLE_MEMORY_HOTREMOVE
select ARCH_HAS_COPY_MC if PPC64
- select ARCH_HAS_CRC32 if PPC64 && ALTIVEC
- select ARCH_HAS_CRC_T10DIF if PPC64 && ALTIVEC
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 481f968e42c7bc..59de2e2232df69 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -80,10 +80,4 @@ CFLAGS_xor_vmx.o += -mhard-float -maltivec $(call cc-option,-mabi=altivec)
# Enable <altivec.h>
CFLAGS_xor_vmx.o += -isystem $(shell $(CC) -print-file-name=include)
-obj-$(CONFIG_CRC32_ARCH) += crc32-powerpc.o
-crc32-powerpc-y := crc32.o crc32c-vpmsum_asm.o
-
-obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-powerpc.o
-crc-t10dif-powerpc-y := crc-t10dif.o crct10dif-vpmsum_asm.o
-
obj-$(CONFIG_PPC64) += $(obj64-y)
diff --git a/arch/powerpc/lib/crc-t10dif.c b/arch/powerpc/lib/crc-t10dif.c
deleted file mode 100644
index be23ded3a9df69..00000000000000
--- a/arch/powerpc/lib/crc-t10dif.c
+++ /dev/null
@@ -1,83 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Calculate a CRC T10-DIF with vpmsum acceleration
- *
- * Copyright 2017, Daniel Axtens, IBM Corporation.
- * [based on crc32c-vpmsum_glue.c]
- */
-
-#include <asm/switch_to.h>
-#include <crypto/internal/simd.h>
-#include <linux/cpufeature.h>
-#include <linux/crc-t10dif.h>
-#include <linux/jump_label.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/preempt.h>
-#include <linux/uaccess.h>
-
-#define VMX_ALIGN 16
-#define VMX_ALIGN_MASK (VMX_ALIGN-1)
-
-#define VECTOR_BREAKPOINT 64
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_vec_crypto);
-
-u32 __crct10dif_vpmsum(u32 crc, unsigned char const *p, size_t len);
-
-u16 crc_t10dif_arch(u16 crci, const u8 *p, size_t len)
-{
- unsigned int prealign;
- unsigned int tail;
- u32 crc = crci;
-
- if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) ||
- !static_branch_likely(&have_vec_crypto) || !crypto_simd_usable())
- return crc_t10dif_generic(crc, p, len);
-
- if ((unsigned long)p & VMX_ALIGN_MASK) {
- prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
- crc = crc_t10dif_generic(crc, p, prealign);
- len -= prealign;
- p += prealign;
- }
-
- if (len & ~VMX_ALIGN_MASK) {
- crc <<= 16;
- preempt_disable();
- pagefault_disable();
- enable_kernel_altivec();
- crc = __crct10dif_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
- disable_kernel_altivec();
- pagefault_enable();
- preempt_enable();
- crc >>= 16;
- }
-
- tail = len & VMX_ALIGN_MASK;
- if (tail) {
- p += len & ~VMX_ALIGN_MASK;
- crc = crc_t10dif_generic(crc, p, tail);
- }
-
- return crc & 0xffff;
-}
-EXPORT_SYMBOL(crc_t10dif_arch);
-
-static int __init crc_t10dif_powerpc_init(void)
-{
- if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
- (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO))
- static_branch_enable(&have_vec_crypto);
- return 0;
-}
-subsys_initcall(crc_t10dif_powerpc_init);
-
-static void __exit crc_t10dif_powerpc_exit(void)
-{
-}
-module_exit(crc_t10dif_powerpc_exit);
-
-MODULE_AUTHOR("Daniel Axtens <dja@axtens.net>");
-MODULE_DESCRIPTION("CRCT10DIF using vector polynomial multiply-sum instructions");
-MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/lib/crc-vpmsum-template.S b/arch/powerpc/lib/crc-vpmsum-template.S
deleted file mode 100644
index b0f87f595b269f..00000000000000
--- a/arch/powerpc/lib/crc-vpmsum-template.S
+++ /dev/null
@@ -1,746 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Core of the accelerated CRC algorithm.
- * In your file, define the constants and CRC_FUNCTION_NAME
- * Then include this file.
- *
- * Calculate the checksum of data that is 16 byte aligned and a multiple of
- * 16 bytes.
- *
- * The first step is to reduce it to 1024 bits. We do this in 8 parallel
- * chunks in order to mask the latency of the vpmsum instructions. If we
- * have more than 32 kB of data to checksum we repeat this step multiple
- * times, passing in the previous 1024 bits.
- *
- * The next step is to reduce the 1024 bits to 64 bits. This step adds
- * 32 bits of 0s to the end - this matches what a CRC does. We just
- * calculate constants that land the data in this 32 bits.
- *
- * We then use fixed point Barrett reduction to compute a mod n over GF(2)
- * for n = CRC using POWER8 instructions. We use x = 32.
- *
- * https://en.wikipedia.org/wiki/Barrett_reduction
- *
- * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
-*/
-
-#include <asm/ppc_asm.h>
-#include <asm/ppc-opcode.h>
-
-#define MAX_SIZE 32768
-
- .text
-
-#if defined(__BIG_ENDIAN__) && defined(REFLECT)
-#define BYTESWAP_DATA
-#elif defined(__LITTLE_ENDIAN__) && !defined(REFLECT)
-#define BYTESWAP_DATA
-#else
-#undef BYTESWAP_DATA
-#endif
-
-#define off16 r25
-#define off32 r26
-#define off48 r27
-#define off64 r28
-#define off80 r29
-#define off96 r30
-#define off112 r31
-
-#define const1 v24
-#define const2 v25
-
-#define byteswap v26
-#define mask_32bit v27
-#define mask_64bit v28
-#define zeroes v29
-
-#ifdef BYTESWAP_DATA
-#define VPERM(A, B, C, D) vperm A, B, C, D
-#else
-#define VPERM(A, B, C, D)
-#endif
-
-/* unsigned int CRC_FUNCTION_NAME(unsigned int crc, void *p, unsigned long len) */
-FUNC_START(CRC_FUNCTION_NAME)
- std r31,-8(r1)
- std r30,-16(r1)
- std r29,-24(r1)
- std r28,-32(r1)
- std r27,-40(r1)
- std r26,-48(r1)
- std r25,-56(r1)
-
- li off16,16
- li off32,32
- li off48,48
- li off64,64
- li off80,80
- li off96,96
- li off112,112
- li r0,0
-
- /* Enough room for saving 10 non volatile VMX registers */
- subi r6,r1,56+10*16
- subi r7,r1,56+2*16
-
- stvx v20,0,r6
- stvx v21,off16,r6
- stvx v22,off32,r6
- stvx v23,off48,r6
- stvx v24,off64,r6
- stvx v25,off80,r6
- stvx v26,off96,r6
- stvx v27,off112,r6
- stvx v28,0,r7
- stvx v29,off16,r7
-
- mr r10,r3
-
- vxor zeroes,zeroes,zeroes
- vspltisw v0,-1
-
- vsldoi mask_32bit,zeroes,v0,4
- vsldoi mask_64bit,zeroes,v0,8
-
- /* Get the initial value into v8 */
- vxor v8,v8,v8
- MTVRD(v8, R3)
-#ifdef REFLECT
- vsldoi v8,zeroes,v8,8 /* shift into bottom 32 bits */
-#else
- vsldoi v8,v8,zeroes,4 /* shift into top 32 bits */
-#endif
-
-#ifdef BYTESWAP_DATA
- LOAD_REG_ADDR(r3, .byteswap_constant)
- lvx byteswap,0,r3
- addi r3,r3,16
-#endif
-
- cmpdi r5,256
- blt .Lshort
-
- rldicr r6,r5,0,56
-
- /* Checksum in blocks of MAX_SIZE */
-1: lis r7,MAX_SIZE@h
- ori r7,r7,MAX_SIZE@l
- mr r9,r7
- cmpd r6,r7
- bgt 2f
- mr r7,r6
-2: subf r6,r7,r6
-
- /* our main loop does 128 bytes at a time */
- srdi r7,r7,7
-
- /*
- * Work out the offset into the constants table to start at. Each
- * constant is 16 bytes, and it is used against 128 bytes of input
- * data - 128 / 16 = 8
- */
- sldi r8,r7,4
- srdi r9,r9,3
- subf r8,r8,r9
-
- /* We reduce our final 128 bytes in a separate step */
- addi r7,r7,-1
- mtctr r7
-
- LOAD_REG_ADDR(r3, .constants)
-
- /* Find the start of our constants */
- add r3,r3,r8
-
- /* zero v0-v7 which will contain our checksums */
- vxor v0,v0,v0
- vxor v1,v1,v1
- vxor v2,v2,v2
- vxor v3,v3,v3
- vxor v4,v4,v4
- vxor v5,v5,v5
- vxor v6,v6,v6
- vxor v7,v7,v7
-
- lvx const1,0,r3
-
- /*
- * If we are looping back to consume more data we use the values
- * already in v16-v23.
- */
- cmpdi r0,1
- beq 2f
-
- /* First warm up pass */
- lvx v16,0,r4
- lvx v17,off16,r4
- VPERM(v16,v16,v16,byteswap)
- VPERM(v17,v17,v17,byteswap)
- lvx v18,off32,r4
- lvx v19,off48,r4
- VPERM(v18,v18,v18,byteswap)
- VPERM(v19,v19,v19,byteswap)
- lvx v20,off64,r4
- lvx v21,off80,r4
- VPERM(v20,v20,v20,byteswap)
- VPERM(v21,v21,v21,byteswap)
- lvx v22,off96,r4
- lvx v23,off112,r4
- VPERM(v22,v22,v22,byteswap)
- VPERM(v23,v23,v23,byteswap)
- addi r4,r4,8*16
-
- /* xor in initial value */
- vxor v16,v16,v8
-
-2: bdz .Lfirst_warm_up_done
-
- addi r3,r3,16
- lvx const2,0,r3
-
- /* Second warm up pass */
- VPMSUMD(v8,v16,const1)
- lvx v16,0,r4
- VPERM(v16,v16,v16,byteswap)
- ori r2,r2,0
-
- VPMSUMD(v9,v17,const1)
- lvx v17,off16,r4
- VPERM(v17,v17,v17,byteswap)
- ori r2,r2,0
-
- VPMSUMD(v10,v18,const1)
- lvx v18,off32,r4
- VPERM(v18,v18,v18,byteswap)
- ori r2,r2,0
-
- VPMSUMD(v11,v19,const1)
- lvx v19,off48,r4
- VPERM(v19,v19,v19,byteswap)
- ori r2,r2,0
-
- VPMSUMD(v12,v20,const1)
- lvx v20,off64,r4
- VPERM(v20,v20,v20,byteswap)
- ori r2,r2,0
-
- VPMSUMD(v13,v21,const1)
- lvx v21,off80,r4
- VPERM(v21,v21,v21,byteswap)
- ori r2,r2,0
-
- VPMSUMD(v14,v22,const1)
- lvx v22,off96,r4
- VPERM(v22,v22,v22,byteswap)
- ori r2,r2,0
-
- VPMSUMD(v15,v23,const1)
- lvx v23,off112,r4
- VPERM(v23,v23,v23,byteswap)
-
- addi r4,r4,8*16
-
- bdz .Lfirst_cool_down
-
- /*
- * main loop. We modulo schedule it such that it takes three iterations
- * to complete - first iteration load, second iteration vpmsum, third
- * iteration xor.
- */
- .balign 16
-4: lvx const1,0,r3
- addi r3,r3,16
- ori r2,r2,0
-
- vxor v0,v0,v8
- VPMSUMD(v8,v16,const2)
- lvx v16,0,r4
- VPERM(v16,v16,v16,byteswap)
- ori r2,r2,0
-
- vxor v1,v1,v9
- VPMSUMD(v9,v17,const2)
- lvx v17,off16,r4
- VPERM(v17,v17,v17,byteswap)
- ori r2,r2,0
-
- vxor v2,v2,v10
- VPMSUMD(v10,v18,const2)
- lvx v18,off32,r4
- VPERM(v18,v18,v18,byteswap)
- ori r2,r2,0
-
- vxor v3,v3,v11
- VPMSUMD(v11,v19,const2)
- lvx v19,off48,r4
- VPERM(v19,v19,v19,byteswap)
- lvx const2,0,r3
- ori r2,r2,0
-
- vxor v4,v4,v12
- VPMSUMD(v12,v20,const1)
- lvx v20,off64,r4
- VPERM(v20,v20,v20,byteswap)
- ori r2,r2,0
-
- vxor v5,v5,v13
- VPMSUMD(v13,v21,const1)
- lvx v21,off80,r4
- VPERM(v21,v21,v21,byteswap)
- ori r2,r2,0
-
- vxor v6,v6,v14
- VPMSUMD(v14,v22,const1)
- lvx v22,off96,r4
- VPERM(v22,v22,v22,byteswap)
- ori r2,r2,0
-
- vxor v7,v7,v15
- VPMSUMD(v15,v23,const1)
- lvx v23,off112,r4
- VPERM(v23,v23,v23,byteswap)
-
- addi r4,r4,8*16
-
- bdnz 4b
-
-.Lfirst_cool_down:
- /* First cool down pass */
- lvx const1,0,r3
- addi r3,r3,16
-
- vxor v0,v0,v8
- VPMSUMD(v8,v16,const1)
- ori r2,r2,0
-
- vxor v1,v1,v9
- VPMSUMD(v9,v17,const1)
- ori r2,r2,0
-
- vxor v2,v2,v10
- VPMSUMD(v10,v18,const1)
- ori r2,r2,0
-
- vxor v3,v3,v11
- VPMSUMD(v11,v19,const1)
- ori r2,r2,0
-
- vxor v4,v4,v12
- VPMSUMD(v12,v20,const1)
- ori r2,r2,0
-
- vxor v5,v5,v13
- VPMSUMD(v13,v21,const1)
- ori r2,r2,0
-
- vxor v6,v6,v14
- VPMSUMD(v14,v22,const1)
- ori r2,r2,0
-
- vxor v7,v7,v15
- VPMSUMD(v15,v23,const1)
- ori r2,r2,0
-
-.Lsecond_cool_down:
- /* Second cool down pass */
- vxor v0,v0,v8
- vxor v1,v1,v9
- vxor v2,v2,v10
- vxor v3,v3,v11
- vxor v4,v4,v12
- vxor v5,v5,v13
- vxor v6,v6,v14
- vxor v7,v7,v15
-
-#ifdef REFLECT
- /*
- * vpmsumd produces a 96 bit result in the least significant bits
- * of the register. Since we are bit reflected we have to shift it
- * left 32 bits so it occupies the least significant bits in the
- * bit reflected domain.
- */
- vsldoi v0,v0,zeroes,4
- vsldoi v1,v1,zeroes,4
- vsldoi v2,v2,zeroes,4
- vsldoi v3,v3,zeroes,4
- vsldoi v4,v4,zeroes,4
- vsldoi v5,v5,zeroes,4
- vsldoi v6,v6,zeroes,4
- vsldoi v7,v7,zeroes,4
-#endif
-
- /* xor with last 1024 bits */
- lvx v8,0,r4
- lvx v9,off16,r4
- VPERM(v8,v8,v8,byteswap)
- VPERM(v9,v9,v9,byteswap)
- lvx v10,off32,r4
- lvx v11,off48,r4
- VPERM(v10,v10,v10,byteswap)
- VPERM(v11,v11,v11,byteswap)
- lvx v12,off64,r4
- lvx v13,off80,r4
- VPERM(v12,v12,v12,byteswap)
- VPERM(v13,v13,v13,byteswap)
- lvx v14,off96,r4
- lvx v15,off112,r4
- VPERM(v14,v14,v14,byteswap)
- VPERM(v15,v15,v15,byteswap)
-
- addi r4,r4,8*16
-
- vxor v16,v0,v8
- vxor v17,v1,v9
- vxor v18,v2,v10
- vxor v19,v3,v11
- vxor v20,v4,v12
- vxor v21,v5,v13
- vxor v22,v6,v14
- vxor v23,v7,v15
-
- li r0,1
- cmpdi r6,0
- addi r6,r6,128
- bne 1b
-
- /* Work out how many bytes we have left */
- andi. r5,r5,127
-
- /* Calculate where in the constant table we need to start */
- subfic r6,r5,128
- add r3,r3,r6
-
- /* How many 16 byte chunks are in the tail */
- srdi r7,r5,4
- mtctr r7
-
- /*
- * Reduce the previously calculated 1024 bits to 64 bits, shifting
- * 32 bits to include the trailing 32 bits of zeros
- */
- lvx v0,0,r3
- lvx v1,off16,r3
- lvx v2,off32,r3
- lvx v3,off48,r3
- lvx v4,off64,r3
- lvx v5,off80,r3
- lvx v6,off96,r3
- lvx v7,off112,r3
- addi r3,r3,8*16
-
- VPMSUMW(v0,v16,v0)
- VPMSUMW(v1,v17,v1)
- VPMSUMW(v2,v18,v2)
- VPMSUMW(v3,v19,v3)
- VPMSUMW(v4,v20,v4)
- VPMSUMW(v5,v21,v5)
- VPMSUMW(v6,v22,v6)
- VPMSUMW(v7,v23,v7)
-
- /* Now reduce the tail (0 - 112 bytes) */
- cmpdi r7,0
- beq 1f
-
- lvx v16,0,r4
- lvx v17,0,r3
- VPERM(v16,v16,v16,byteswap)
- VPMSUMW(v16,v16,v17)
- vxor v0,v0,v16
- bdz 1f
-
- lvx v16,off16,r4
- lvx v17,off16,r3
- VPERM(v16,v16,v16,byteswap)
- VPMSUMW(v16,v16,v17)
- vxor v0,v0,v16
- bdz 1f
-
- lvx v16,off32,r4
- lvx v17,off32,r3
- VPERM(v16,v16,v16,byteswap)
- VPMSUMW(v16,v16,v17)
- vxor v0,v0,v16
- bdz 1f
-
- lvx v16,off48,r4
- lvx v17,off48,r3
- VPERM(v16,v16,v16,byteswap)
- VPMSUMW(v16,v16,v17)
- vxor v0,v0,v16
- bdz 1f
-
- lvx v16,off64,r4
- lvx v17,off64,r3
- VPERM(v16,v16,v16,byteswap)
- VPMSUMW(v16,v16,v17)
- vxor v0,v0,v16
- bdz 1f
-
- lvx v16,off80,r4
- lvx v17,off80,r3
- VPERM(v16,v16,v16,byteswap)
- VPMSUMW(v16,v16,v17)
- vxor v0,v0,v16
- bdz 1f
-
- lvx v16,off96,r4
- lvx v17,off96,r3
- VPERM(v16,v16,v16,byteswap)
- VPMSUMW(v16,v16,v17)
- vxor v0,v0,v16
-
- /* Now xor all the parallel chunks together */
-1: vxor v0,v0,v1
- vxor v2,v2,v3
- vxor v4,v4,v5
- vxor v6,v6,v7
-
- vxor v0,v0,v2
- vxor v4,v4,v6
-
- vxor v0,v0,v4
-
-.Lbarrett_reduction:
- /* Barrett constants */
- LOAD_REG_ADDR(r3, .barrett_constants)
-
- lvx const1,0,r3
- lvx const2,off16,r3
-
- vsldoi v1,v0,v0,8
- vxor v0,v0,v1 /* xor two 64 bit results together */
-
-#ifdef REFLECT
- /* shift left one bit */
- vspltisb v1,1
- vsl v0,v0,v1
-#endif
-
- vand v0,v0,mask_64bit
-#ifndef REFLECT
- /*
- * Now for the Barrett reduction algorithm. The idea is to calculate q,
- * the multiple of our polynomial that we need to subtract. By
- * doing the computation 2x bits higher (ie 64 bits) and shifting the
- * result back down 2x bits, we round down to the nearest multiple.
- */
- VPMSUMD(v1,v0,const1) /* ma */
- vsldoi v1,zeroes,v1,8 /* q = floor(ma/(2^64)) */
- VPMSUMD(v1,v1,const2) /* qn */
- vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */
-
- /*
- * Get the result into r3. We need to shift it left 8 bytes:
- * V0 [ 0 1 2 X ]
- * V0 [ 0 X 2 3 ]
- */
- vsldoi v0,v0,zeroes,8 /* shift result into top 64 bits */
-#else
- /*
- * The reflected version of Barrett reduction. Instead of bit
- * reflecting our data (which is expensive to do), we bit reflect our
- * constants and our algorithm, which means the intermediate data in
- * our vector registers goes from 0-63 instead of 63-0. We can reflect
- * the algorithm because we don't carry in mod 2 arithmetic.
- */
- vand v1,v0,mask_32bit /* bottom 32 bits of a */
- VPMSUMD(v1,v1,const1) /* ma */
- vand v1,v1,mask_32bit /* bottom 32bits of ma */
- VPMSUMD(v1,v1,const2) /* qn */
- vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */
-
- /*
- * Since we are bit reflected, the result (ie the low 32 bits) is in
- * the high 32 bits. We just need to shift it left 4 bytes
- * V0 [ 0 1 X 3 ]
- * V0 [ 0 X 2 3 ]
- */
- vsldoi v0,v0,zeroes,4 /* shift result into top 64 bits of */
-#endif
-
- /* Get it into r3 */
- MFVRD(R3, v0)
-
-.Lout:
- subi r6,r1,56+10*16
- subi r7,r1,56+2*16
-
- lvx v20,0,r6
- lvx v21,off16,r6
- lvx v22,off32,r6
- lvx v23,off48,r6
- lvx v24,off64,r6
- lvx v25,off80,r6
- lvx v26,off96,r6
- lvx v27,off112,r6
- lvx v28,0,r7
- lvx v29,off16,r7
-
- ld r31,-8(r1)
- ld r30,-16(r1)
- ld r29,-24(r1)
- ld r28,-32(r1)
- ld r27,-40(r1)
- ld r26,-48(r1)
- ld r25,-56(r1)
-
- blr
-
-.Lfirst_warm_up_done:
- lvx const1,0,r3
- addi r3,r3,16
-
- VPMSUMD(v8,v16,const1)
- VPMSUMD(v9,v17,const1)
- VPMSUMD(v10,v18,const1)
- VPMSUMD(v11,v19,const1)
- VPMSUMD(v12,v20,const1)
- VPMSUMD(v13,v21,const1)
- VPMSUMD(v14,v22,const1)
- VPMSUMD(v15,v23,const1)
-
- b .Lsecond_cool_down
-
-.Lshort:
- cmpdi r5,0
- beq .Lzero
-
- LOAD_REG_ADDR(r3, .short_constants)
-
- /* Calculate where in the constant table we need to start */
- subfic r6,r5,256
- add r3,r3,r6
-
- /* How many 16 byte chunks? */
- srdi r7,r5,4
- mtctr r7
-
- vxor v19,v19,v19
- vxor v20,v20,v20
-
- lvx v0,0,r4
- lvx v16,0,r3
- VPERM(v0,v0,v16,byteswap)
- vxor v0,v0,v8 /* xor in initial value */
- VPMSUMW(v0,v0,v16)
- bdz .Lv0
-
- lvx v1,off16,r4
- lvx v17,off16,r3
- VPERM(v1,v1,v17,byteswap)
- VPMSUMW(v1,v1,v17)
- bdz .Lv1
-
- lvx v2,off32,r4
- lvx v16,off32,r3
- VPERM(v2,v2,v16,byteswap)
- VPMSUMW(v2,v2,v16)
- bdz .Lv2
-
- lvx v3,off48,r4
- lvx v17,off48,r3
- VPERM(v3,v3,v17,byteswap)
- VPMSUMW(v3,v3,v17)
- bdz .Lv3
-
- lvx v4,off64,r4
- lvx v16,off64,r3
- VPERM(v4,v4,v16,byteswap)
- VPMSUMW(v4,v4,v16)
- bdz .Lv4
-
- lvx v5,off80,r4
- lvx v17,off80,r3
- VPERM(v5,v5,v17,byteswap)
- VPMSUMW(v5,v5,v17)
- bdz .Lv5
-
- lvx v6,off96,r4
- lvx v16,off96,r3
- VPERM(v6,v6,v16,byteswap)
- VPMSUMW(v6,v6,v16)
- bdz .Lv6
-
- lvx v7,off112,r4
- lvx v17,off112,r3
- VPERM(v7,v7,v17,byteswap)
- VPMSUMW(v7,v7,v17)
- bdz .Lv7
-
- addi r3,r3,128
- addi r4,r4,128
-
- lvx v8,0,r4
- lvx v16,0,r3
- VPERM(v8,v8,v16,byteswap)
- VPMSUMW(v8,v8,v16)
- bdz .Lv8
-
- lvx v9,off16,r4
- lvx v17,off16,r3
- VPERM(v9,v9,v17,byteswap)
- VPMSUMW(v9,v9,v17)
- bdz .Lv9
-
- lvx v10,off32,r4
- lvx v16,off32,r3
- VPERM(v10,v10,v16,byteswap)
- VPMSUMW(v10,v10,v16)
- bdz .Lv10
-
- lvx v11,off48,r4
- lvx v17,off48,r3
- VPERM(v11,v11,v17,byteswap)
- VPMSUMW(v11,v11,v17)
- bdz .Lv11
-
- lvx v12,off64,r4
- lvx v16,off64,r3
- VPERM(v12,v12,v16,byteswap)
- VPMSUMW(v12,v12,v16)
- bdz .Lv12
-
- lvx v13,off80,r4
- lvx v17,off80,r3
- VPERM(v13,v13,v17,byteswap)
- VPMSUMW(v13,v13,v17)
- bdz .Lv13
-
- lvx v14,off96,r4
- lvx v16,off96,r3
- VPERM(v14,v14,v16,byteswap)
- VPMSUMW(v14,v14,v16)
- bdz .Lv14
-
- lvx v15,off112,r4
- lvx v17,off112,r3
- VPERM(v15,v15,v17,byteswap)
- VPMSUMW(v15,v15,v17)
-
-.Lv15: vxor v19,v19,v15
-.Lv14: vxor v20,v20,v14
-.Lv13: vxor v19,v19,v13
-.Lv12: vxor v20,v20,v12
-.Lv11: vxor v19,v19,v11
-.Lv10: vxor v20,v20,v10
-.Lv9: vxor v19,v19,v9
-.Lv8: vxor v20,v20,v8
-.Lv7: vxor v19,v19,v7
-.Lv6: vxor v20,v20,v6
-.Lv5: vxor v19,v19,v5
-.Lv4: vxor v20,v20,v4
-.Lv3: vxor v19,v19,v3
-.Lv2: vxor v20,v20,v2
-.Lv1: vxor v19,v19,v1
-.Lv0: vxor v20,v20,v0
-
- vxor v0,v19,v20
-
- b .Lbarrett_reduction
-
-.Lzero:
- mr r3,r10
- b .Lout
-
-FUNC_END(CRC_FUNCTION_NAME)
diff --git a/arch/powerpc/lib/crc32.c b/arch/powerpc/lib/crc32.c
deleted file mode 100644
index 0d9befb6e7b83e..00000000000000
--- a/arch/powerpc/lib/crc32.c
+++ /dev/null
@@ -1,93 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include <asm/switch_to.h>
-#include <crypto/internal/simd.h>
-#include <linux/cpufeature.h>
-#include <linux/crc32.h>
-#include <linux/jump_label.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/preempt.h>
-#include <linux/uaccess.h>
-
-#define VMX_ALIGN 16
-#define VMX_ALIGN_MASK (VMX_ALIGN-1)
-
-#define VECTOR_BREAKPOINT 512
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_vec_crypto);
-
-u32 __crc32c_vpmsum(u32 crc, const u8 *p, size_t len);
-
-u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
-{
- return crc32_le_base(crc, p, len);
-}
-EXPORT_SYMBOL(crc32_le_arch);
-
-u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
-{
- unsigned int prealign;
- unsigned int tail;
-
- if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) ||
- !static_branch_likely(&have_vec_crypto) || !crypto_simd_usable())
- return crc32c_base(crc, p, len);
-
- if ((unsigned long)p & VMX_ALIGN_MASK) {
- prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
- crc = crc32c_base(crc, p, prealign);
- len -= prealign;
- p += prealign;
- }
-
- if (len & ~VMX_ALIGN_MASK) {
- preempt_disable();
- pagefault_disable();
- enable_kernel_altivec();
- crc = __crc32c_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
- disable_kernel_altivec();
- pagefault_enable();
- preempt_enable();
- }
-
- tail = len & VMX_ALIGN_MASK;
- if (tail) {
- p += len & ~VMX_ALIGN_MASK;
- crc = crc32c_base(crc, p, tail);
- }
-
- return crc;
-}
-EXPORT_SYMBOL(crc32c_arch);
-
-u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
-{
- return crc32_be_base(crc, p, len);
-}
-EXPORT_SYMBOL(crc32_be_arch);
-
-static int __init crc32_powerpc_init(void)
-{
- if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
- (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO))
- static_branch_enable(&have_vec_crypto);
- return 0;
-}
-subsys_initcall(crc32_powerpc_init);
-
-static void __exit crc32_powerpc_exit(void)
-{
-}
-module_exit(crc32_powerpc_exit);
-
-u32 crc32_optimizations(void)
-{
- if (static_key_enabled(&have_vec_crypto))
- return CRC32C_OPTIMIZATION;
- return 0;
-}
-EXPORT_SYMBOL(crc32_optimizations);
-
-MODULE_AUTHOR("Anton Blanchard <anton@samba.org>");
-MODULE_DESCRIPTION("CRC32C using vector polynomial multiply-sum instructions");
-MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/lib/crc32c-vpmsum_asm.S b/arch/powerpc/lib/crc32c-vpmsum_asm.S
deleted file mode 100644
index 1b35c55cce0a61..00000000000000
--- a/arch/powerpc/lib/crc32c-vpmsum_asm.S
+++ /dev/null
@@ -1,842 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Calculate a crc32c with vpmsum acceleration
- *
- * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
- */
- .section .rodata
-.balign 16
-
-.byteswap_constant:
- /* byte reverse permute constant */
- .octa 0x0F0E0D0C0B0A09080706050403020100
-
-.constants:
-
- /* Reduce 262144 kbits to 1024 bits */
- /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */
- .octa 0x00000000b6ca9e20000000009c37c408
-
- /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */
- .octa 0x00000000350249a800000001b51df26c
-
- /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */
- .octa 0x00000001862dac54000000000724b9d0
-
- /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */
- .octa 0x00000001d87fb48c00000001c00532fe
-
- /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */
- .octa 0x00000001f39b699e00000000f05a9362
-
- /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */
- .octa 0x0000000101da11b400000001e1007970
-
- /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */
- .octa 0x00000001cab571e000000000a57366ee
-
- /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */
- .octa 0x00000000c7020cfe0000000192011284
-
- /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */
- .octa 0x00000000cdaed1ae0000000162716d9a
-
- /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */
- .octa 0x00000001e804effc00000000cd97ecde
-
- /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */
- .octa 0x0000000077c3ea3a0000000058812bc0
-
- /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */
- .octa 0x0000000068df31b40000000088b8c12e
-
- /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */
- .octa 0x00000000b059b6c200000001230b234c
-
- /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */
- .octa 0x0000000145fb8ed800000001120b416e
-
- /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */
- .octa 0x00000000cbc0916800000001974aecb0
-
- /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */
- .octa 0x000000005ceeedc2000000008ee3f226
-
- /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */
- .octa 0x0000000047d74e8600000001089aba9a
-
- /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */
- .octa 0x00000001407e9e220000000065113872
-
- /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */
- .octa 0x00000001da967bda000000005c07ec10
-
- /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */
- .octa 0x000000006c8983680000000187590924
-
- /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */
- .octa 0x00000000f2d14c9800000000e35da7c6
-
- /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */
- .octa 0x00000001993c6ad4000000000415855a
-
- /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */
- .octa 0x000000014683d1ac0000000073617758
-
- /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */
- .octa 0x00000001a7c93e6c0000000176021d28
-
- /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */
- .octa 0x000000010211e90a00000001c358fd0a
-
- /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */
- .octa 0x000000001119403e00000001ff7a2c18
-
- /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */
- .octa 0x000000001c3261aa00000000f2d9f7e4
-
- /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */
- .octa 0x000000014e37a634000000016cf1f9c8
-
- /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */
- .octa 0x0000000073786c0c000000010af9279a
-
- /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */
- .octa 0x000000011dc037f80000000004f101e8
-
- /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */
- .octa 0x0000000031433dfc0000000070bcf184
-
- /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */
- .octa 0x000000009cde8348000000000a8de642
-
- /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */
- .octa 0x0000000038d3c2a60000000062ea130c
-
- /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */
- .octa 0x000000011b25f26000000001eb31cbb2
-
- /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */
- .octa 0x000000001629e6f00000000170783448
-
- /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */
- .octa 0x0000000160838b4c00000001a684b4c6
-
- /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */
- .octa 0x000000007a44011c00000000253ca5b4
-
- /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */
- .octa 0x00000000226f417a0000000057b4b1e2
-
- /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */
- .octa 0x0000000045eb2eb400000000b6bd084c
-
- /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */
- .octa 0x000000014459d70c0000000123c2d592
-
- /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */
- .octa 0x00000001d406ed8200000000159dafce
-
- /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */
- .octa 0x0000000160c8e1a80000000127e1a64e
-
- /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */
- .octa 0x0000000027ba80980000000056860754
-
- /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */
- .octa 0x000000006d92d01800000001e661aae8
-
- /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */
- .octa 0x000000012ed7e3f200000000f82c6166
-
- /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */
- .octa 0x000000002dc8778800000000c4f9c7ae
-
- /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */
- .octa 0x0000000018240bb80000000074203d20
-
- /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */
- .octa 0x000000001ad381580000000198173052
-
- /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */
- .octa 0x00000001396b78f200000001ce8aba54
-
- /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */
- .octa 0x000000011a68133400000001850d5d94
-
- /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */
- .octa 0x000000012104732e00000001d609239c
-
- /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */
- .octa 0x00000000a140d90c000000001595f048
-
- /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */
- .octa 0x00000001b7215eda0000000042ccee08
-
- /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */
- .octa 0x00000001aaf1df3c000000010a389d74
-
- /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */
- .octa 0x0000000029d15b8a000000012a840da6
-
- /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */
- .octa 0x00000000f1a96922000000001d181c0c
-
- /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */
- .octa 0x00000001ac80d03c0000000068b7d1f6
-
- /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */
- .octa 0x000000000f11d56a000000005b0f14fc
-
- /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */
- .octa 0x00000001f1c022a20000000179e9e730
-
- /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */
- .octa 0x0000000173d00ae200000001ce1368d6
-
- /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */
- .octa 0x00000001d4ffe4ac0000000112c3a84c
-
- /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */
- .octa 0x000000016edc5ae400000000de940fee
-
- /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */
- .octa 0x00000001f1a0214000000000fe896b7e
-
- /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */
- .octa 0x00000000ca0b28a000000001f797431c
-
- /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */
- .octa 0x00000001928e30a20000000053e989ba
-
- /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */
- .octa 0x0000000097b1b002000000003920cd16
-
- /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */
- .octa 0x00000000b15bf90600000001e6f579b8
-
- /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */
- .octa 0x00000000411c5d52000000007493cb0a
-
- /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */
- .octa 0x00000001c36f330000000001bdd376d8
-
- /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */
- .octa 0x00000001119227e0000000016badfee6
-
- /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */
- .octa 0x00000000114d47020000000071de5c58
-
- /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */
- .octa 0x00000000458b5b9800000000453f317c
-
- /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */
- .octa 0x000000012e31fb8e0000000121675cce
-
- /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */
- .octa 0x000000005cf619d800000001f409ee92
-
- /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */
- .octa 0x0000000063f4d8b200000000f36b9c88
-
- /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */
- .octa 0x000000004138dc8a0000000036b398f4
-
- /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */
- .octa 0x00000001d29ee8e000000001748f9adc
-
- /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */
- .octa 0x000000006a08ace800000001be94ec00
-
- /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */
- .octa 0x0000000127d4201000000000b74370d6
-
- /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */
- .octa 0x0000000019d76b6200000001174d0b98
-
- /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */
- .octa 0x00000001b1471f6e00000000befc06a4
-
- /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */
- .octa 0x00000001f64c19cc00000001ae125288
-
- /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */
- .octa 0x00000000003c0ea00000000095c19b34
-
- /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */
- .octa 0x000000014d73abf600000001a78496f2
-
- /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */
- .octa 0x00000001620eb84400000001ac5390a0
-
- /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */
- .octa 0x0000000147655048000000002a80ed6e
-
- /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */
- .octa 0x0000000067b5077e00000001fa9b0128
-
- /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */
- .octa 0x0000000010ffe20600000001ea94929e
-
- /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */
- .octa 0x000000000fee8f1e0000000125f4305c
-
- /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */
- .octa 0x00000001da26fbae00000001471e2002
-
- /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */
- .octa 0x00000001b3a8bd880000000132d2253a
-
- /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */
- .octa 0x00000000e8f3898e00000000f26b3592
-
- /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */
- .octa 0x00000000b0d0d28c00000000bc8b67b0
-
- /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */
- .octa 0x0000000030f2a798000000013a826ef2
-
- /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */
- .octa 0x000000000fba10020000000081482c84
-
- /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */
- .octa 0x00000000bdb9bd7200000000e77307c2
-
- /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */
- .octa 0x0000000075d3bf5a00000000d4a07ec8
-
- /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */
- .octa 0x00000000ef1f98a00000000017102100
-
- /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */
- .octa 0x00000000689c760200000000db406486
-
- /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */
- .octa 0x000000016d5fa5fe0000000192db7f88
-
- /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */
- .octa 0x00000001d0d2b9ca000000018bf67b1e
-
- /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */
- .octa 0x0000000041e7b470000000007c09163e
-
- /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */
- .octa 0x00000001cbb6495e000000000adac060
-
- /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */
- .octa 0x000000010052a0b000000000bd8316ae
-
- /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */
- .octa 0x00000001d8effb5c000000019f09ab54
-
- /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */
- .octa 0x00000001d969853c0000000125155542
-
- /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */
- .octa 0x00000000523ccce2000000018fdb5882
-
- /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */
- .octa 0x000000001e2436bc00000000e794b3f4
-
- /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */
- .octa 0x00000000ddd1c3a2000000016f9bb022
-
- /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */
- .octa 0x0000000019fcfe3800000000290c9978
-
- /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */
- .octa 0x00000001ce95db640000000083c0f350
-
- /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */
- .octa 0x00000000af5828060000000173ea6628
-
- /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */
- .octa 0x00000001006388f600000001c8b4e00a
-
- /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */
- .octa 0x0000000179eca00a00000000de95d6aa
-
- /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */
- .octa 0x0000000122410a6a000000010b7f7248
-
- /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */
- .octa 0x000000004288e87c00000001326e3a06
-
- /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */
- .octa 0x000000016c5490da00000000bb62c2e6
-
- /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */
- .octa 0x00000000d1c71f6e0000000156a4b2c2
-
- /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */
- .octa 0x00000001b4ce08a6000000011dfe763a
-
- /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */
- .octa 0x00000001466ba60c000000007bcca8e2
-
- /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */
- .octa 0x00000001f6c488a40000000186118faa
-
- /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */
- .octa 0x000000013bfb06820000000111a65a88
-
- /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */
- .octa 0x00000000690e9e54000000003565e1c4
-
- /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */
- .octa 0x00000000281346b6000000012ed02a82
-
- /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */
- .octa 0x000000015646402400000000c486ecfc
-
- /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */
- .octa 0x000000016063a8dc0000000001b951b2
-
- /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */
- .octa 0x0000000116a663620000000048143916
-
- /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */
- .octa 0x000000017e8aa4d200000001dc2ae124
-
- /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */
- .octa 0x00000001728eb10c00000001416c58d6
-
- /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */
- .octa 0x00000001b08fd7fa00000000a479744a
-
- /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */
- .octa 0x00000001092a16e80000000096ca3a26
-
- /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */
- .octa 0x00000000a505637c00000000ff223d4e
-
- /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */
- .octa 0x00000000d94869b2000000010e84da42
-
- /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */
- .octa 0x00000001c8b203ae00000001b61ba3d0
-
- /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */
- .octa 0x000000005704aea000000000680f2de8
-
- /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */
- .octa 0x000000012e295fa2000000008772a9a8
-
- /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */
- .octa 0x000000011d0908bc0000000155f295bc
-
- /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */
- .octa 0x0000000193ed97ea00000000595f9282
-
- /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */
- .octa 0x000000013a0f1c520000000164b1c25a
-
- /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */
- .octa 0x000000010c2c40c000000000fbd67c50
-
- /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */
- .octa 0x00000000ff6fac3e0000000096076268
-
- /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */
- .octa 0x000000017b3609c000000001d288e4cc
-
- /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */
- .octa 0x0000000088c8c92200000001eaac1bdc
-
- /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */
- .octa 0x00000001751baae600000001f1ea39e2
-
- /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */
- .octa 0x000000010795297200000001eb6506fc
-
- /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */
- .octa 0x0000000162b00abe000000010f806ffe
-
- /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */
- .octa 0x000000000d7b404c000000010408481e
-
- /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */
- .octa 0x00000000763b13d40000000188260534
-
- /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */
- .octa 0x00000000f6dc22d80000000058fc73e0
-
- /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */
- .octa 0x000000007daae06000000000391c59b8
-
- /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */
- .octa 0x000000013359ab7c000000018b638400
-
- /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */
- .octa 0x000000008add438a000000011738f5c4
-
- /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */
- .octa 0x00000001edbefdea000000008cf7c6da
-
- /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */
- .octa 0x000000004104e0f800000001ef97fb16
-
- /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */
- .octa 0x00000000b48a82220000000102130e20
-
- /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */
- .octa 0x00000001bcb4684400000000db968898
-
- /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */
- .octa 0x000000013293ce0a00000000b5047b5e
-
- /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */
- .octa 0x00000001710d0844000000010b90fdb2
-
- /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */
- .octa 0x0000000117907f6e000000004834a32e
-
- /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */
- .octa 0x0000000087ddf93e0000000059c8f2b0
-
- /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */
- .octa 0x000000005970e9b00000000122cec508
-
- /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */
- .octa 0x0000000185b2b7d0000000000a330cda
-
- /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */
- .octa 0x00000001dcee0efc000000014a47148c
-
- /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */
- .octa 0x0000000030da27220000000042c61cb8
-
- /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */
- .octa 0x000000012f925a180000000012fe6960
-
- /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */
- .octa 0x00000000dd2e357c00000000dbda2c20
-
- /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */
- .octa 0x00000000071c80de000000011122410c
-
- /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */
- .octa 0x000000011513140a00000000977b2070
-
- /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */
- .octa 0x00000001df876e8e000000014050438e
-
- /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */
- .octa 0x000000015f81d6ce0000000147c840e8
-
- /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */
- .octa 0x000000019dd94dbe00000001cc7c88ce
-
- /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */
- .octa 0x00000001373d206e00000001476b35a4
-
- /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */
- .octa 0x00000000668ccade000000013d52d508
-
- /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */
- .octa 0x00000001b192d268000000008e4be32e
-
- /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */
- .octa 0x00000000e30f3a7800000000024120fe
-
- /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */
- .octa 0x000000010ef1f7bc00000000ddecddb4
-
- /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */
- .octa 0x00000001f5ac738000000000d4d403bc
-
- /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */
- .octa 0x000000011822ea7000000001734b89aa
-
- /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */
- .octa 0x00000000c3a33848000000010e7a58d6
-
- /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */
- .octa 0x00000001bd151c2400000001f9f04e9c
-
- /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */
- .octa 0x0000000056002d7600000000b692225e
-
- /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */
- .octa 0x000000014657c4f4000000019b8d3f3e
-
- /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */
- .octa 0x0000000113742d7c00000001a874f11e
-
- /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */
- .octa 0x000000019c5920ba000000010d5a4254
-
- /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */
- .octa 0x000000005216d2d600000000bbb2f5d6
-
- /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */
- .octa 0x0000000136f5ad8a0000000179cc0e36
-
- /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */
- .octa 0x000000018b07beb600000001dca1da4a
-
- /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */
- .octa 0x00000000db1e93b000000000feb1a192
-
- /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */
- .octa 0x000000000b96fa3a00000000d1eeedd6
-
- /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */
- .octa 0x00000001d9968af0000000008fad9bb4
-
- /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */
- .octa 0x000000000e4a77a200000001884938e4
-
- /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */
- .octa 0x00000000508c2ac800000001bc2e9bc0
-
- /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */
- .octa 0x0000000021572a8000000001f9658a68
-
- /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */
- .octa 0x00000001b859daf2000000001b9224fc
-
- /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */
- .octa 0x000000016f7884740000000055b2fb84
-
- /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */
- .octa 0x00000001b438810e000000018b090348
-
- /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */
- .octa 0x0000000095ddc6f2000000011ccbd5ea
-
- /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */
- .octa 0x00000001d977c20c0000000007ae47f8
-
- /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */
- .octa 0x00000000ebedb99a0000000172acbec0
-
- /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */
- .octa 0x00000001df9e9e9200000001c6e3ff20
-
- /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */
- .octa 0x00000001a4a3f95200000000e1b38744
-
- /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */
- .octa 0x00000000e2f5122000000000791585b2
-
- /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */
- .octa 0x000000004aa01f3e00000000ac53b894
-
- /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */
- .octa 0x00000000b3e90a5800000001ed5f2cf4
-
- /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */
- .octa 0x000000000c9ca2aa00000001df48b2e0
-
- /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */
- .octa 0x000000015168231600000000049c1c62
-
- /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */
- .octa 0x0000000036fce78c000000017c460c12
-
- /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */
- .octa 0x000000009037dc10000000015be4da7e
-
- /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */
- .octa 0x00000000d3298582000000010f38f668
-
- /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */
- .octa 0x00000001b42e8ad60000000039f40a00
-
- /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */
- .octa 0x00000000142a983800000000bd4c10c4
-
- /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */
- .octa 0x0000000109c7f1900000000042db1d98
-
- /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */
- .octa 0x0000000056ff931000000001c905bae6
-
- /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */
- .octa 0x00000001594513aa00000000069d40ea
-
- /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */
- .octa 0x00000001e3b5b1e8000000008e4fbad0
-
- /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */
- .octa 0x000000011dd5fc080000000047bedd46
-
- /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */
- .octa 0x00000001675f0cc20000000026396bf8
-
- /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */
- .octa 0x00000000d1c8dd4400000000379beb92
-
- /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */
- .octa 0x0000000115ebd3d8000000000abae54a
-
- /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */
- .octa 0x00000001ecbd0dac0000000007e6a128
-
- /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */
- .octa 0x00000000cdf67af2000000000ade29d2
-
- /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */
- .octa 0x000000004c01ff4c00000000f974c45c
-
- /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */
- .octa 0x00000000f2d8657e00000000e77ac60a
-
- /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */
- .octa 0x000000006bae74c40000000145895816
-
- /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */
- .octa 0x0000000152af8aa00000000038e362be
-
- /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */
- .octa 0x0000000004663802000000007f991a64
-
- /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */
- .octa 0x00000001ab2f5afc00000000fa366d3a
-
- /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */
- .octa 0x0000000074a4ebd400000001a2bb34f0
-
- /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */
- .octa 0x00000001d7ab3a4c0000000028a9981e
-
- /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */
- .octa 0x00000001a8da60c600000001dbc672be
-
- /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */
- .octa 0x000000013cf6382000000000b04d77f6
-
- /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */
- .octa 0x00000000bec12e1e0000000124400d96
-
- /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */
- .octa 0x00000001c6368010000000014ca4b414
-
- /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */
- .octa 0x00000001e6e78758000000012fe2c938
-
- /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */
- .octa 0x000000008d7f2b3c00000001faed01e6
-
- /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */
- .octa 0x000000016b4a156e000000007e80ecfe
-
- /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */
- .octa 0x00000001c63cfeb60000000098daee94
-
- /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */
- .octa 0x000000015f902670000000010a04edea
-
- /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */
- .octa 0x00000001cd5de11e00000001c00b4524
-
- /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */
- .octa 0x000000001acaec540000000170296550
-
- /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */
- .octa 0x000000002bd0ca780000000181afaa48
-
- /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */
- .octa 0x0000000032d63d5c0000000185a31ffa
-
- /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */
- .octa 0x000000001c6d4e4c000000002469f608
-
- /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */
- .octa 0x0000000106a60b92000000006980102a
-
- /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */
- .octa 0x00000000d3855e120000000111ea9ca8
-
- /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */
- .octa 0x00000000e312563600000001bd1d29ce
-
- /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */
- .octa 0x000000009e8f7ea400000001b34b9580
-
- /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */
- .octa 0x00000001c82e562c000000003076054e
-
- /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */
- .octa 0x00000000ca9f09ce000000012a608ea4
-
- /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */
- .octa 0x00000000c63764e600000000784d05fe
-
- /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */
- .octa 0x0000000168d2e49e000000016ef0d82a
-
- /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */
- .octa 0x00000000e986c1480000000075bda454
-
- /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */
- .octa 0x00000000cfb65894000000003dc0a1c4
-
- /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */
- .octa 0x0000000111cadee400000000e9a5d8be
-
- /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */
- .octa 0x0000000171fb63ce00000001609bc4b4
-
-.short_constants:
-
- /* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */
- /* x^1952 mod p(x)`, x^1984 mod p(x)`, x^2016 mod p(x)`, x^2048 mod p(x)` */
- .octa 0x7fec2963e5bf80485cf015c388e56f72
-
- /* x^1824 mod p(x)`, x^1856 mod p(x)`, x^1888 mod p(x)`, x^1920 mod p(x)` */
- .octa 0x38e888d4844752a9963a18920246e2e6
-
- /* x^1696 mod p(x)`, x^1728 mod p(x)`, x^1760 mod p(x)`, x^1792 mod p(x)` */
- .octa 0x42316c00730206ad419a441956993a31
-
- /* x^1568 mod p(x)`, x^1600 mod p(x)`, x^1632 mod p(x)`, x^1664 mod p(x)` */
- .octa 0x543d5c543e65ddf9924752ba2b830011
-
- /* x^1440 mod p(x)`, x^1472 mod p(x)`, x^1504 mod p(x)`, x^1536 mod p(x)` */
- .octa 0x78e87aaf56767c9255bd7f9518e4a304
-
- /* x^1312 mod p(x)`, x^1344 mod p(x)`, x^1376 mod p(x)`, x^1408 mod p(x)` */
- .octa 0x8f68fcec1903da7f6d76739fe0553f1e
-
- /* x^1184 mod p(x)`, x^1216 mod p(x)`, x^1248 mod p(x)`, x^1280 mod p(x)` */
- .octa 0x3f4840246791d588c133722b1fe0b5c3
-
- /* x^1056 mod p(x)`, x^1088 mod p(x)`, x^1120 mod p(x)`, x^1152 mod p(x)` */
- .octa 0x34c96751b04de25a64b67ee0e55ef1f3
-
- /* x^928 mod p(x)`, x^960 mod p(x)`, x^992 mod p(x)`, x^1024 mod p(x)` */
- .octa 0x156c8e180b4a395b069db049b8fdb1e7
-
- /* x^800 mod p(x)`, x^832 mod p(x)`, x^864 mod p(x)`, x^896 mod p(x)` */
- .octa 0xe0b99ccbe661f7bea11bfaf3c9e90b9e
-
- /* x^672 mod p(x)`, x^704 mod p(x)`, x^736 mod p(x)`, x^768 mod p(x)` */
- .octa 0x041d37768cd75659817cdc5119b29a35
-
- /* x^544 mod p(x)`, x^576 mod p(x)`, x^608 mod p(x)`, x^640 mod p(x)` */
- .octa 0x3a0777818cfaa9651ce9d94b36c41f1c
-
- /* x^416 mod p(x)`, x^448 mod p(x)`, x^480 mod p(x)`, x^512 mod p(x)` */
- .octa 0x0e148e8252377a554f256efcb82be955
-
- /* x^288 mod p(x)`, x^320 mod p(x)`, x^352 mod p(x)`, x^384 mod p(x)` */
- .octa 0x9c25531d19e65ddeec1631edb2dea967
-
- /* x^160 mod p(x)`, x^192 mod p(x)`, x^224 mod p(x)`, x^256 mod p(x)` */
- .octa 0x790606ff9957c0a65d27e147510ac59a
-
- /* x^32 mod p(x)`, x^64 mod p(x)`, x^96 mod p(x)`, x^128 mod p(x)` */
- .octa 0x82f63b786ea2d55ca66805eb18b8ea18
-
-
-.barrett_constants:
- /* 33 bit reflected Barrett constant m - (4^32)/n */
- .octa 0x000000000000000000000000dea713f1 /* x^64 div p(x)` */
- /* 33 bit reflected Barrett constant n */
- .octa 0x00000000000000000000000105ec76f1
-
-#define CRC_FUNCTION_NAME __crc32c_vpmsum
-#define REFLECT
-#include "crc-vpmsum-template.S"
diff --git a/arch/powerpc/lib/crct10dif-vpmsum_asm.S b/arch/powerpc/lib/crct10dif-vpmsum_asm.S
deleted file mode 100644
index 47a6266d89a8a7..00000000000000
--- a/arch/powerpc/lib/crct10dif-vpmsum_asm.S
+++ /dev/null
@@ -1,845 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Calculate a CRC T10DIF with vpmsum acceleration
- *
- * Constants generated by crc32-vpmsum, available at
- * https://github.com/antonblanchard/crc32-vpmsum
- *
- * crc32-vpmsum is
- * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
- */
- .section .rodata
-.balign 16
-
-.byteswap_constant:
- /* byte reverse permute constant */
- .octa 0x0F0E0D0C0B0A09080706050403020100
-
-.constants:
-
- /* Reduce 262144 kbits to 1024 bits */
- /* x^261184 mod p(x), x^261120 mod p(x) */
- .octa 0x0000000056d300000000000052550000
-
- /* x^260160 mod p(x), x^260096 mod p(x) */
- .octa 0x00000000ee67000000000000a1e40000
-
- /* x^259136 mod p(x), x^259072 mod p(x) */
- .octa 0x0000000060830000000000004ad10000
-
- /* x^258112 mod p(x), x^258048 mod p(x) */
- .octa 0x000000008cfe0000000000009ab40000
-
- /* x^257088 mod p(x), x^257024 mod p(x) */
- .octa 0x000000003e93000000000000fdb50000
-
- /* x^256064 mod p(x), x^256000 mod p(x) */
- .octa 0x000000003c2000000000000045480000
-
- /* x^255040 mod p(x), x^254976 mod p(x) */
- .octa 0x00000000b1fc0000000000008d690000
-
- /* x^254016 mod p(x), x^253952 mod p(x) */
- .octa 0x00000000f82b00000000000024ad0000
-
- /* x^252992 mod p(x), x^252928 mod p(x) */
- .octa 0x0000000044420000000000009f1a0000
-
- /* x^251968 mod p(x), x^251904 mod p(x) */
- .octa 0x00000000e88c00000000000066ec0000
-
- /* x^250944 mod p(x), x^250880 mod p(x) */
- .octa 0x00000000385c000000000000c87d0000
-
- /* x^249920 mod p(x), x^249856 mod p(x) */
- .octa 0x000000003227000000000000c8ff0000
-
- /* x^248896 mod p(x), x^248832 mod p(x) */
- .octa 0x00000000a9a900000000000033440000
-
- /* x^247872 mod p(x), x^247808 mod p(x) */
- .octa 0x00000000abaa00000000000066eb0000
-
- /* x^246848 mod p(x), x^246784 mod p(x) */
- .octa 0x000000001ac3000000000000c4ef0000
-
- /* x^245824 mod p(x), x^245760 mod p(x) */
- .octa 0x0000000063f000000000000056f30000
-
- /* x^244800 mod p(x), x^244736 mod p(x) */
- .octa 0x0000000032cc00000000000002050000
-
- /* x^243776 mod p(x), x^243712 mod p(x) */
- .octa 0x00000000f8b5000000000000568e0000
-
- /* x^242752 mod p(x), x^242688 mod p(x) */
- .octa 0x000000008db100000000000064290000
-
- /* x^241728 mod p(x), x^241664 mod p(x) */
- .octa 0x0000000059ca0000000000006b660000
-
- /* x^240704 mod p(x), x^240640 mod p(x) */
- .octa 0x000000005f5c00000000000018f80000
-
- /* x^239680 mod p(x), x^239616 mod p(x) */
- .octa 0x0000000061af000000000000b6090000
-
- /* x^238656 mod p(x), x^238592 mod p(x) */
- .octa 0x00000000e29e000000000000099a0000
-
- /* x^237632 mod p(x), x^237568 mod p(x) */
- .octa 0x000000000975000000000000a8360000
-
- /* x^236608 mod p(x), x^236544 mod p(x) */
- .octa 0x0000000043900000000000004f570000
-
- /* x^235584 mod p(x), x^235520 mod p(x) */
- .octa 0x00000000f9cd000000000000134c0000
-
- /* x^234560 mod p(x), x^234496 mod p(x) */
- .octa 0x000000007c29000000000000ec380000
-
- /* x^233536 mod p(x), x^233472 mod p(x) */
- .octa 0x000000004c6a000000000000b0d10000
-
- /* x^232512 mod p(x), x^232448 mod p(x) */
- .octa 0x00000000e7290000000000007d3e0000
-
- /* x^231488 mod p(x), x^231424 mod p(x) */
- .octa 0x00000000f1ab000000000000f0b20000
-
- /* x^230464 mod p(x), x^230400 mod p(x) */
- .octa 0x0000000039db0000000000009c270000
-
- /* x^229440 mod p(x), x^229376 mod p(x) */
- .octa 0x000000005e2800000000000092890000
-
- /* x^228416 mod p(x), x^228352 mod p(x) */
- .octa 0x00000000d44e000000000000d5ee0000
-
- /* x^227392 mod p(x), x^227328 mod p(x) */
- .octa 0x00000000cd0a00000000000041f50000
-
- /* x^226368 mod p(x), x^226304 mod p(x) */
- .octa 0x00000000c5b400000000000010520000
-
- /* x^225344 mod p(x), x^225280 mod p(x) */
- .octa 0x00000000fd2100000000000042170000
-
- /* x^224320 mod p(x), x^224256 mod p(x) */
- .octa 0x000000002f2500000000000095c20000
-
- /* x^223296 mod p(x), x^223232 mod p(x) */
- .octa 0x000000001b0100000000000001ce0000
-
- /* x^222272 mod p(x), x^222208 mod p(x) */
- .octa 0x000000000d430000000000002aca0000
-
- /* x^221248 mod p(x), x^221184 mod p(x) */
- .octa 0x0000000030a6000000000000385e0000
-
- /* x^220224 mod p(x), x^220160 mod p(x) */
- .octa 0x00000000e37b0000000000006f7a0000
-
- /* x^219200 mod p(x), x^219136 mod p(x) */
- .octa 0x00000000873600000000000024320000
-
- /* x^218176 mod p(x), x^218112 mod p(x) */
- .octa 0x00000000e9fb000000000000bd9c0000
-
- /* x^217152 mod p(x), x^217088 mod p(x) */
- .octa 0x000000003b9500000000000054bc0000
-
- /* x^216128 mod p(x), x^216064 mod p(x) */
- .octa 0x00000000133e000000000000a4660000
-
- /* x^215104 mod p(x), x^215040 mod p(x) */
- .octa 0x00000000784500000000000079930000
-
- /* x^214080 mod p(x), x^214016 mod p(x) */
- .octa 0x00000000b9800000000000001bb80000
-
- /* x^213056 mod p(x), x^212992 mod p(x) */
- .octa 0x00000000687600000000000024400000
-
- /* x^212032 mod p(x), x^211968 mod p(x) */
- .octa 0x00000000aff300000000000029e10000
-
- /* x^211008 mod p(x), x^210944 mod p(x) */
- .octa 0x0000000024b50000000000005ded0000
-
- /* x^209984 mod p(x), x^209920 mod p(x) */
- .octa 0x0000000017e8000000000000b12e0000
-
- /* x^208960 mod p(x), x^208896 mod p(x) */
- .octa 0x00000000128400000000000026d20000
-
- /* x^207936 mod p(x), x^207872 mod p(x) */
- .octa 0x000000002115000000000000a32a0000
-
- /* x^206912 mod p(x), x^206848 mod p(x) */
- .octa 0x000000009595000000000000a1210000
-
- /* x^205888 mod p(x), x^205824 mod p(x) */
- .octa 0x00000000281e000000000000ee8b0000
-
- /* x^204864 mod p(x), x^204800 mod p(x) */
- .octa 0x0000000006010000000000003d0d0000
-
- /* x^203840 mod p(x), x^203776 mod p(x) */
- .octa 0x00000000e2b600000000000034e90000
-
- /* x^202816 mod p(x), x^202752 mod p(x) */
- .octa 0x000000001bd40000000000004cdb0000
-
- /* x^201792 mod p(x), x^201728 mod p(x) */
- .octa 0x00000000df2800000000000030e90000
-
- /* x^200768 mod p(x), x^200704 mod p(x) */
- .octa 0x0000000049c200000000000042590000
-
- /* x^199744 mod p(x), x^199680 mod p(x) */
- .octa 0x000000009b97000000000000df950000
-
- /* x^198720 mod p(x), x^198656 mod p(x) */
- .octa 0x000000006184000000000000da7b0000
-
- /* x^197696 mod p(x), x^197632 mod p(x) */
- .octa 0x00000000461700000000000012510000
-
- /* x^196672 mod p(x), x^196608 mod p(x) */
- .octa 0x000000009b40000000000000f37e0000
-
- /* x^195648 mod p(x), x^195584 mod p(x) */
- .octa 0x00000000eeb2000000000000ecf10000
-
- /* x^194624 mod p(x), x^194560 mod p(x) */
- .octa 0x00000000b2e800000000000050f20000
-
- /* x^193600 mod p(x), x^193536 mod p(x) */
- .octa 0x00000000f59a000000000000e0b30000
-
- /* x^192576 mod p(x), x^192512 mod p(x) */
- .octa 0x00000000467f0000000000004d5a0000
-
- /* x^191552 mod p(x), x^191488 mod p(x) */
- .octa 0x00000000da92000000000000bb010000
-
- /* x^190528 mod p(x), x^190464 mod p(x) */
- .octa 0x000000001e1000000000000022a40000
-
- /* x^189504 mod p(x), x^189440 mod p(x) */
- .octa 0x0000000058fe000000000000836f0000
-
- /* x^188480 mod p(x), x^188416 mod p(x) */
- .octa 0x00000000b9ce000000000000d78d0000
-
- /* x^187456 mod p(x), x^187392 mod p(x) */
- .octa 0x0000000022210000000000004f8d0000
-
- /* x^186432 mod p(x), x^186368 mod p(x) */
- .octa 0x00000000744600000000000033760000
-
- /* x^185408 mod p(x), x^185344 mod p(x) */
- .octa 0x000000001c2e000000000000a1e50000
-
- /* x^184384 mod p(x), x^184320 mod p(x) */
- .octa 0x00000000dcc8000000000000a1a40000
-
- /* x^183360 mod p(x), x^183296 mod p(x) */
- .octa 0x00000000910f00000000000019a20000
-
- /* x^182336 mod p(x), x^182272 mod p(x) */
- .octa 0x0000000055d5000000000000f6ae0000
-
- /* x^181312 mod p(x), x^181248 mod p(x) */
- .octa 0x00000000c8ba000000000000a7ac0000
-
- /* x^180288 mod p(x), x^180224 mod p(x) */
- .octa 0x0000000031f8000000000000eea20000
-
- /* x^179264 mod p(x), x^179200 mod p(x) */
- .octa 0x000000001966000000000000c4d90000
-
- /* x^178240 mod p(x), x^178176 mod p(x) */
- .octa 0x00000000b9810000000000002b470000
-
- /* x^177216 mod p(x), x^177152 mod p(x) */
- .octa 0x000000008303000000000000f7cf0000
-
- /* x^176192 mod p(x), x^176128 mod p(x) */
- .octa 0x000000002ce500000000000035b30000
-
- /* x^175168 mod p(x), x^175104 mod p(x) */
- .octa 0x000000002fae0000000000000c7c0000
-
- /* x^174144 mod p(x), x^174080 mod p(x) */
- .octa 0x00000000f50c0000000000009edf0000
-
- /* x^173120 mod p(x), x^173056 mod p(x) */
- .octa 0x00000000714f00000000000004cd0000
-
- /* x^172096 mod p(x), x^172032 mod p(x) */
- .octa 0x00000000c161000000000000541b0000
-
- /* x^171072 mod p(x), x^171008 mod p(x) */
- .octa 0x0000000021c8000000000000e2700000
-
- /* x^170048 mod p(x), x^169984 mod p(x) */
- .octa 0x00000000b93d00000000000009a60000
-
- /* x^169024 mod p(x), x^168960 mod p(x) */
- .octa 0x00000000fbcf000000000000761c0000
-
- /* x^168000 mod p(x), x^167936 mod p(x) */
- .octa 0x0000000026350000000000009db30000
-
- /* x^166976 mod p(x), x^166912 mod p(x) */
- .octa 0x00000000b64f0000000000003e9f0000
-
- /* x^165952 mod p(x), x^165888 mod p(x) */
- .octa 0x00000000bd0e00000000000078590000
-
- /* x^164928 mod p(x), x^164864 mod p(x) */
- .octa 0x00000000d9360000000000008bc80000
-
- /* x^163904 mod p(x), x^163840 mod p(x) */
- .octa 0x000000002f140000000000008c9f0000
-
- /* x^162880 mod p(x), x^162816 mod p(x) */
- .octa 0x000000006a270000000000006af70000
-
- /* x^161856 mod p(x), x^161792 mod p(x) */
- .octa 0x000000006685000000000000e5210000
-
- /* x^160832 mod p(x), x^160768 mod p(x) */
- .octa 0x0000000062da00000000000008290000
-
- /* x^159808 mod p(x), x^159744 mod p(x) */
- .octa 0x00000000bb4b000000000000e4d00000
-
- /* x^158784 mod p(x), x^158720 mod p(x) */
- .octa 0x00000000d2490000000000004ae10000
-
- /* x^157760 mod p(x), x^157696 mod p(x) */
- .octa 0x00000000c85b00000000000000e70000
-
- /* x^156736 mod p(x), x^156672 mod p(x) */
- .octa 0x00000000c37a00000000000015650000
-
- /* x^155712 mod p(x), x^155648 mod p(x) */
- .octa 0x0000000018530000000000001c2f0000
-
- /* x^154688 mod p(x), x^154624 mod p(x) */
- .octa 0x00000000b46600000000000037bd0000
-
- /* x^153664 mod p(x), x^153600 mod p(x) */
- .octa 0x00000000439b00000000000012190000
-
- /* x^152640 mod p(x), x^152576 mod p(x) */
- .octa 0x00000000b1260000000000005ece0000
-
- /* x^151616 mod p(x), x^151552 mod p(x) */
- .octa 0x00000000d8110000000000002a5e0000
-
- /* x^150592 mod p(x), x^150528 mod p(x) */
- .octa 0x00000000099f00000000000052330000
-
- /* x^149568 mod p(x), x^149504 mod p(x) */
- .octa 0x00000000f9f9000000000000f9120000
-
- /* x^148544 mod p(x), x^148480 mod p(x) */
- .octa 0x000000005cc00000000000000ddc0000
-
- /* x^147520 mod p(x), x^147456 mod p(x) */
- .octa 0x00000000343b00000000000012200000
-
- /* x^146496 mod p(x), x^146432 mod p(x) */
- .octa 0x000000009222000000000000d12b0000
-
- /* x^145472 mod p(x), x^145408 mod p(x) */
- .octa 0x00000000d781000000000000eb2d0000
-
- /* x^144448 mod p(x), x^144384 mod p(x) */
- .octa 0x000000000bf400000000000058970000
-
- /* x^143424 mod p(x), x^143360 mod p(x) */
- .octa 0x00000000094200000000000013690000
-
- /* x^142400 mod p(x), x^142336 mod p(x) */
- .octa 0x00000000d55100000000000051950000
-
- /* x^141376 mod p(x), x^141312 mod p(x) */
- .octa 0x000000008f11000000000000954b0000
-
- /* x^140352 mod p(x), x^140288 mod p(x) */
- .octa 0x00000000140f000000000000b29e0000
-
- /* x^139328 mod p(x), x^139264 mod p(x) */
- .octa 0x00000000c6db000000000000db5d0000
-
- /* x^138304 mod p(x), x^138240 mod p(x) */
- .octa 0x00000000715b000000000000dfaf0000
-
- /* x^137280 mod p(x), x^137216 mod p(x) */
- .octa 0x000000000dea000000000000e3b60000
-
- /* x^136256 mod p(x), x^136192 mod p(x) */
- .octa 0x000000006f94000000000000ddaf0000
-
- /* x^135232 mod p(x), x^135168 mod p(x) */
- .octa 0x0000000024e1000000000000e4f70000
-
- /* x^134208 mod p(x), x^134144 mod p(x) */
- .octa 0x000000008810000000000000aa110000
-
- /* x^133184 mod p(x), x^133120 mod p(x) */
- .octa 0x0000000030c2000000000000a8e60000
-
- /* x^132160 mod p(x), x^132096 mod p(x) */
- .octa 0x00000000e6d0000000000000ccf30000
-
- /* x^131136 mod p(x), x^131072 mod p(x) */
- .octa 0x000000004da000000000000079bf0000
-
- /* x^130112 mod p(x), x^130048 mod p(x) */
- .octa 0x000000007759000000000000b3a30000
-
- /* x^129088 mod p(x), x^129024 mod p(x) */
- .octa 0x00000000597400000000000028790000
-
- /* x^128064 mod p(x), x^128000 mod p(x) */
- .octa 0x000000007acd000000000000b5820000
-
- /* x^127040 mod p(x), x^126976 mod p(x) */
- .octa 0x00000000e6e400000000000026ad0000
-
- /* x^126016 mod p(x), x^125952 mod p(x) */
- .octa 0x000000006d49000000000000985b0000
-
- /* x^124992 mod p(x), x^124928 mod p(x) */
- .octa 0x000000000f0800000000000011520000
-
- /* x^123968 mod p(x), x^123904 mod p(x) */
- .octa 0x000000002c7f000000000000846c0000
-
- /* x^122944 mod p(x), x^122880 mod p(x) */
- .octa 0x000000005ce7000000000000ae1d0000
-
- /* x^121920 mod p(x), x^121856 mod p(x) */
- .octa 0x00000000d4cb000000000000e21d0000
-
- /* x^120896 mod p(x), x^120832 mod p(x) */
- .octa 0x000000003a2300000000000019bb0000
-
- /* x^119872 mod p(x), x^119808 mod p(x) */
- .octa 0x000000000e1700000000000095290000
-
- /* x^118848 mod p(x), x^118784 mod p(x) */
- .octa 0x000000006e6400000000000050d20000
-
- /* x^117824 mod p(x), x^117760 mod p(x) */
- .octa 0x000000008d5c0000000000000cd10000
-
- /* x^116800 mod p(x), x^116736 mod p(x) */
- .octa 0x00000000ef310000000000007b570000
-
- /* x^115776 mod p(x), x^115712 mod p(x) */
- .octa 0x00000000645d00000000000053d60000
-
- /* x^114752 mod p(x), x^114688 mod p(x) */
- .octa 0x0000000018fc00000000000077510000
-
- /* x^113728 mod p(x), x^113664 mod p(x) */
- .octa 0x000000000cb3000000000000a7b70000
-
- /* x^112704 mod p(x), x^112640 mod p(x) */
- .octa 0x00000000991b000000000000d0780000
-
- /* x^111680 mod p(x), x^111616 mod p(x) */
- .octa 0x00000000845a000000000000be3c0000
-
- /* x^110656 mod p(x), x^110592 mod p(x) */
- .octa 0x00000000d3a9000000000000df020000
-
- /* x^109632 mod p(x), x^109568 mod p(x) */
- .octa 0x0000000017d7000000000000063e0000
-
- /* x^108608 mod p(x), x^108544 mod p(x) */
- .octa 0x000000007a860000000000008ab40000
-
- /* x^107584 mod p(x), x^107520 mod p(x) */
- .octa 0x00000000fd7c000000000000c7bd0000
-
- /* x^106560 mod p(x), x^106496 mod p(x) */
- .octa 0x00000000a56b000000000000efd60000
-
- /* x^105536 mod p(x), x^105472 mod p(x) */
- .octa 0x0000000010e400000000000071380000
-
- /* x^104512 mod p(x), x^104448 mod p(x) */
- .octa 0x00000000994500000000000004d30000
-
- /* x^103488 mod p(x), x^103424 mod p(x) */
- .octa 0x00000000b83c0000000000003b0e0000
-
- /* x^102464 mod p(x), x^102400 mod p(x) */
- .octa 0x00000000d6c10000000000008b020000
-
- /* x^101440 mod p(x), x^101376 mod p(x) */
- .octa 0x000000009efc000000000000da940000
-
- /* x^100416 mod p(x), x^100352 mod p(x) */
- .octa 0x000000005e87000000000000f9f70000
-
- /* x^99392 mod p(x), x^99328 mod p(x) */
- .octa 0x000000006c9b00000000000045e40000
-
- /* x^98368 mod p(x), x^98304 mod p(x) */
- .octa 0x00000000178a00000000000083940000
-
- /* x^97344 mod p(x), x^97280 mod p(x) */
- .octa 0x00000000f0c8000000000000f0a00000
-
- /* x^96320 mod p(x), x^96256 mod p(x) */
- .octa 0x00000000f699000000000000b74b0000
-
- /* x^95296 mod p(x), x^95232 mod p(x) */
- .octa 0x00000000316d000000000000c1cf0000
-
- /* x^94272 mod p(x), x^94208 mod p(x) */
- .octa 0x00000000987e00000000000072680000
-
- /* x^93248 mod p(x), x^93184 mod p(x) */
- .octa 0x00000000acff000000000000e0ab0000
-
- /* x^92224 mod p(x), x^92160 mod p(x) */
- .octa 0x00000000a1f6000000000000c5a80000
-
- /* x^91200 mod p(x), x^91136 mod p(x) */
- .octa 0x0000000061bd000000000000cf690000
-
- /* x^90176 mod p(x), x^90112 mod p(x) */
- .octa 0x00000000c9f2000000000000cbcc0000
-
- /* x^89152 mod p(x), x^89088 mod p(x) */
- .octa 0x000000005a33000000000000de050000
-
- /* x^88128 mod p(x), x^88064 mod p(x) */
- .octa 0x00000000e416000000000000ccd70000
-
- /* x^87104 mod p(x), x^87040 mod p(x) */
- .octa 0x0000000058930000000000002f670000
-
- /* x^86080 mod p(x), x^86016 mod p(x) */
- .octa 0x00000000a9d3000000000000152f0000
-
- /* x^85056 mod p(x), x^84992 mod p(x) */
- .octa 0x00000000c114000000000000ecc20000
-
- /* x^84032 mod p(x), x^83968 mod p(x) */
- .octa 0x00000000b9270000000000007c890000
-
- /* x^83008 mod p(x), x^82944 mod p(x) */
- .octa 0x000000002e6000000000000006ee0000
-
- /* x^81984 mod p(x), x^81920 mod p(x) */
- .octa 0x00000000dfc600000000000009100000
-
- /* x^80960 mod p(x), x^80896 mod p(x) */
- .octa 0x000000004911000000000000ad4e0000
-
- /* x^79936 mod p(x), x^79872 mod p(x) */
- .octa 0x00000000ae1b000000000000b04d0000
-
- /* x^78912 mod p(x), x^78848 mod p(x) */
- .octa 0x0000000005fa000000000000e9900000
-
- /* x^77888 mod p(x), x^77824 mod p(x) */
- .octa 0x0000000004a1000000000000cc6f0000
-
- /* x^76864 mod p(x), x^76800 mod p(x) */
- .octa 0x00000000af73000000000000ed110000
-
- /* x^75840 mod p(x), x^75776 mod p(x) */
- .octa 0x0000000082530000000000008f7e0000
-
- /* x^74816 mod p(x), x^74752 mod p(x) */
- .octa 0x00000000cfdc000000000000594f0000
-
- /* x^73792 mod p(x), x^73728 mod p(x) */
- .octa 0x00000000a6b6000000000000a8750000
-
- /* x^72768 mod p(x), x^72704 mod p(x) */
- .octa 0x00000000fd76000000000000aa0c0000
-
- /* x^71744 mod p(x), x^71680 mod p(x) */
- .octa 0x0000000006f500000000000071db0000
-
- /* x^70720 mod p(x), x^70656 mod p(x) */
- .octa 0x0000000037ca000000000000ab0c0000
-
- /* x^69696 mod p(x), x^69632 mod p(x) */
- .octa 0x00000000d7ab000000000000b7a00000
-
- /* x^68672 mod p(x), x^68608 mod p(x) */
- .octa 0x00000000440800000000000090d30000
-
- /* x^67648 mod p(x), x^67584 mod p(x) */
- .octa 0x00000000186100000000000054730000
-
- /* x^66624 mod p(x), x^66560 mod p(x) */
- .octa 0x000000007368000000000000a3a20000
-
- /* x^65600 mod p(x), x^65536 mod p(x) */
- .octa 0x0000000026d0000000000000f9040000
-
- /* x^64576 mod p(x), x^64512 mod p(x) */
- .octa 0x00000000fe770000000000009c0a0000
-
- /* x^63552 mod p(x), x^63488 mod p(x) */
- .octa 0x000000002cba000000000000d1e70000
-
- /* x^62528 mod p(x), x^62464 mod p(x) */
- .octa 0x00000000f8bd0000000000005ac10000
-
- /* x^61504 mod p(x), x^61440 mod p(x) */
- .octa 0x000000007372000000000000d68d0000
-
- /* x^60480 mod p(x), x^60416 mod p(x) */
- .octa 0x00000000f37f00000000000089f60000
-
- /* x^59456 mod p(x), x^59392 mod p(x) */
- .octa 0x00000000078400000000000008a90000
-
- /* x^58432 mod p(x), x^58368 mod p(x) */
- .octa 0x00000000d3e400000000000042360000
-
- /* x^57408 mod p(x), x^57344 mod p(x) */
- .octa 0x00000000eba800000000000092d50000
-
- /* x^56384 mod p(x), x^56320 mod p(x) */
- .octa 0x00000000afbe000000000000b4d50000
-
- /* x^55360 mod p(x), x^55296 mod p(x) */
- .octa 0x00000000d8ca000000000000c9060000
-
- /* x^54336 mod p(x), x^54272 mod p(x) */
- .octa 0x00000000c2d00000000000008f4f0000
-
- /* x^53312 mod p(x), x^53248 mod p(x) */
- .octa 0x00000000373200000000000028690000
-
- /* x^52288 mod p(x), x^52224 mod p(x) */
- .octa 0x0000000046ae000000000000c3b30000
-
- /* x^51264 mod p(x), x^51200 mod p(x) */
- .octa 0x00000000b243000000000000f8700000
-
- /* x^50240 mod p(x), x^50176 mod p(x) */
- .octa 0x00000000f7f500000000000029eb0000
-
- /* x^49216 mod p(x), x^49152 mod p(x) */
- .octa 0x000000000c7e000000000000fe730000
-
- /* x^48192 mod p(x), x^48128 mod p(x) */
- .octa 0x00000000c38200000000000096000000
-
- /* x^47168 mod p(x), x^47104 mod p(x) */
- .octa 0x000000008956000000000000683c0000
-
- /* x^46144 mod p(x), x^46080 mod p(x) */
- .octa 0x00000000422d0000000000005f1e0000
-
- /* x^45120 mod p(x), x^45056 mod p(x) */
- .octa 0x00000000ac0f0000000000006f810000
-
- /* x^44096 mod p(x), x^44032 mod p(x) */
- .octa 0x00000000ce30000000000000031f0000
-
- /* x^43072 mod p(x), x^43008 mod p(x) */
- .octa 0x000000003d43000000000000455a0000
-
- /* x^42048 mod p(x), x^41984 mod p(x) */
- .octa 0x000000007ebe000000000000a6050000
-
- /* x^41024 mod p(x), x^40960 mod p(x) */
- .octa 0x00000000976e00000000000077eb0000
-
- /* x^40000 mod p(x), x^39936 mod p(x) */
- .octa 0x000000000872000000000000389c0000
-
- /* x^38976 mod p(x), x^38912 mod p(x) */
- .octa 0x000000008979000000000000c7b20000
-
- /* x^37952 mod p(x), x^37888 mod p(x) */
- .octa 0x000000005c1e0000000000001d870000
-
- /* x^36928 mod p(x), x^36864 mod p(x) */
- .octa 0x00000000aebb00000000000045810000
-
- /* x^35904 mod p(x), x^35840 mod p(x) */
- .octa 0x000000004f7e0000000000006d4a0000
-
- /* x^34880 mod p(x), x^34816 mod p(x) */
- .octa 0x00000000ea98000000000000b9200000
-
- /* x^33856 mod p(x), x^33792 mod p(x) */
- .octa 0x00000000f39600000000000022f20000
-
- /* x^32832 mod p(x), x^32768 mod p(x) */
- .octa 0x000000000bc500000000000041ca0000
-
- /* x^31808 mod p(x), x^31744 mod p(x) */
- .octa 0x00000000786400000000000078500000
-
- /* x^30784 mod p(x), x^30720 mod p(x) */
- .octa 0x00000000be970000000000009e7e0000
-
- /* x^29760 mod p(x), x^29696 mod p(x) */
- .octa 0x00000000dd6d000000000000a53c0000
-
- /* x^28736 mod p(x), x^28672 mod p(x) */
- .octa 0x000000004c3f00000000000039340000
-
- /* x^27712 mod p(x), x^27648 mod p(x) */
- .octa 0x0000000093a4000000000000b58e0000
-
- /* x^26688 mod p(x), x^26624 mod p(x) */
- .octa 0x0000000050fb00000000000062d40000
-
- /* x^25664 mod p(x), x^25600 mod p(x) */
- .octa 0x00000000f505000000000000a26f0000
-
- /* x^24640 mod p(x), x^24576 mod p(x) */
- .octa 0x0000000064f900000000000065e60000
-
- /* x^23616 mod p(x), x^23552 mod p(x) */
- .octa 0x00000000e8c2000000000000aad90000
-
- /* x^22592 mod p(x), x^22528 mod p(x) */
- .octa 0x00000000720b000000000000a3b00000
-
- /* x^21568 mod p(x), x^21504 mod p(x) */
- .octa 0x00000000e992000000000000d2680000
-
- /* x^20544 mod p(x), x^20480 mod p(x) */
- .octa 0x000000009132000000000000cf4c0000
-
- /* x^19520 mod p(x), x^19456 mod p(x) */
- .octa 0x00000000608a00000000000076610000
-
- /* x^18496 mod p(x), x^18432 mod p(x) */
- .octa 0x000000009948000000000000fb9f0000
-
- /* x^17472 mod p(x), x^17408 mod p(x) */
- .octa 0x00000000173000000000000003770000
-
- /* x^16448 mod p(x), x^16384 mod p(x) */
- .octa 0x000000006fe300000000000004880000
-
- /* x^15424 mod p(x), x^15360 mod p(x) */
- .octa 0x00000000e15300000000000056a70000
-
- /* x^14400 mod p(x), x^14336 mod p(x) */
- .octa 0x0000000092d60000000000009dfd0000
-
- /* x^13376 mod p(x), x^13312 mod p(x) */
- .octa 0x0000000002fd00000000000074c80000
-
- /* x^12352 mod p(x), x^12288 mod p(x) */
- .octa 0x00000000c78b000000000000a3ec0000
-
- /* x^11328 mod p(x), x^11264 mod p(x) */
- .octa 0x000000009262000000000000b3530000
-
- /* x^10304 mod p(x), x^10240 mod p(x) */
- .octa 0x0000000084f200000000000047bf0000
-
- /* x^9280 mod p(x), x^9216 mod p(x) */
- .octa 0x0000000067ee000000000000e97c0000
-
- /* x^8256 mod p(x), x^8192 mod p(x) */
- .octa 0x00000000535b00000000000091e10000
-
- /* x^7232 mod p(x), x^7168 mod p(x) */
- .octa 0x000000007ebb00000000000055060000
-
- /* x^6208 mod p(x), x^6144 mod p(x) */
- .octa 0x00000000c6a1000000000000fd360000
-
- /* x^5184 mod p(x), x^5120 mod p(x) */
- .octa 0x000000001be500000000000055860000
-
- /* x^4160 mod p(x), x^4096 mod p(x) */
- .octa 0x00000000ae0e0000000000005bd00000
-
- /* x^3136 mod p(x), x^3072 mod p(x) */
- .octa 0x0000000022040000000000008db20000
-
- /* x^2112 mod p(x), x^2048 mod p(x) */
- .octa 0x00000000c9eb000000000000efe20000
-
- /* x^1088 mod p(x), x^1024 mod p(x) */
- .octa 0x0000000039b400000000000051d10000
-
-.short_constants:
-
- /* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */
- /* x^2048 mod p(x), x^2016 mod p(x), x^1984 mod p(x), x^1952 mod p(x) */
- .octa 0xefe20000dccf00009440000033590000
-
- /* x^1920 mod p(x), x^1888 mod p(x), x^1856 mod p(x), x^1824 mod p(x) */
- .octa 0xee6300002f3f000062180000e0ed0000
-
- /* x^1792 mod p(x), x^1760 mod p(x), x^1728 mod p(x), x^1696 mod p(x) */
- .octa 0xcf5f000017ef0000ccbe000023d30000
-
- /* x^1664 mod p(x), x^1632 mod p(x), x^1600 mod p(x), x^1568 mod p(x) */
- .octa 0x6d0c0000a30e00000920000042630000
-
- /* x^1536 mod p(x), x^1504 mod p(x), x^1472 mod p(x), x^1440 mod p(x) */
- .octa 0x21d30000932b0000a7a00000efcc0000
-
- /* x^1408 mod p(x), x^1376 mod p(x), x^1344 mod p(x), x^1312 mod p(x) */
- .octa 0x10be00000b310000666f00000d1c0000
-
- /* x^1280 mod p(x), x^1248 mod p(x), x^1216 mod p(x), x^1184 mod p(x) */
- .octa 0x1f240000ce9e0000caad0000589e0000
-
- /* x^1152 mod p(x), x^1120 mod p(x), x^1088 mod p(x), x^1056 mod p(x) */
- .octa 0x29610000d02b000039b400007cf50000
-
- /* x^1024 mod p(x), x^992 mod p(x), x^960 mod p(x), x^928 mod p(x) */
- .octa 0x51d100009d9d00003c0e0000bfd60000
-
- /* x^896 mod p(x), x^864 mod p(x), x^832 mod p(x), x^800 mod p(x) */
- .octa 0xda390000ceae000013830000713c0000
-
- /* x^768 mod p(x), x^736 mod p(x), x^704 mod p(x), x^672 mod p(x) */
- .octa 0xb67800001e16000085c0000080a60000
-
- /* x^640 mod p(x), x^608 mod p(x), x^576 mod p(x), x^544 mod p(x) */
- .octa 0x0db40000f7f90000371d0000e6580000
-
- /* x^512 mod p(x), x^480 mod p(x), x^448 mod p(x), x^416 mod p(x) */
- .octa 0x87e70000044c0000aadb0000a4970000
-
- /* x^384 mod p(x), x^352 mod p(x), x^320 mod p(x), x^288 mod p(x) */
- .octa 0x1f990000ad180000d8b30000e7b50000
-
- /* x^256 mod p(x), x^224 mod p(x), x^192 mod p(x), x^160 mod p(x) */
- .octa 0xbe6c00006ee300004c1a000006df0000
-
- /* x^128 mod p(x), x^96 mod p(x), x^64 mod p(x), x^32 mod p(x) */
- .octa 0xfb0b00002d560000136800008bb70000
-
-
-.barrett_constants:
- /* Barrett constant m - (4^32)/n */
- .octa 0x000000000000000000000001f65a57f8 /* x^64 div p(x) */
- /* Barrett constant n */
- .octa 0x0000000000000000000000018bb70000
-
-#define CRC_FUNCTION_NAME __crct10dif_vpmsum
-#include "crc-vpmsum-template.S"
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 2dd7bc0563a8c3..5352932badd881 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -24,9 +24,6 @@ config RISCV
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
select ARCH_HAS_BINFMT_FLAT
- select ARCH_HAS_CRC32 if RISCV_ISA_ZBC
- select ARCH_HAS_CRC64 if 64BIT && RISCV_ISA_ZBC
- select ARCH_HAS_CRC_T10DIF if RISCV_ISA_ZBC
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL if MMU
select ARCH_HAS_DEBUG_VM_PGTABLE
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 0baec92d2f55b8..a4f4b48ed3a473 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -16,12 +16,6 @@ endif
lib-$(CONFIG_MMU) += uaccess.o
lib-$(CONFIG_64BIT) += tishift.o
lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
-obj-$(CONFIG_CRC32_ARCH) += crc32-riscv.o
-crc32-riscv-y := crc32.o crc32_msb.o crc32_lsb.o
-obj-$(CONFIG_CRC64_ARCH) += crc64-riscv.o
-crc64-riscv-y := crc64.o crc64_msb.o crc64_lsb.o
-obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-riscv.o
-crc-t10dif-riscv-y := crc-t10dif.o crc16_msb.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_RISCV_ISA_V) += xor.o
lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o
diff --git a/arch/riscv/lib/crc-clmul-consts.h b/arch/riscv/lib/crc-clmul-consts.h
deleted file mode 100644
index 8d73449235ef2e..00000000000000
--- a/arch/riscv/lib/crc-clmul-consts.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * CRC constants generated by:
- *
- * ./scripts/gen-crc-consts.py riscv_clmul crc16_msb_0x8bb7,crc32_msb_0x04c11db7,crc32_lsb_0xedb88320,crc32_lsb_0x82f63b78,crc64_msb_0x42f0e1eba9ea3693,crc64_lsb_0x9a6c9329ac4bc9b5
- *
- * Do not edit manually.
- */
-
-struct crc_clmul_consts {
- unsigned long fold_across_2_longs_const_hi;
- unsigned long fold_across_2_longs_const_lo;
- unsigned long barrett_reduction_const_1;
- unsigned long barrett_reduction_const_2;
-};
-
-/*
- * Constants generated for most-significant-bit-first CRC-16 using
- * G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0
- */
-static const struct crc_clmul_consts crc16_msb_0x8bb7_consts __maybe_unused = {
-#ifdef CONFIG_64BIT
- .fold_across_2_longs_const_hi = 0x0000000000001faa, /* x^192 mod G */
- .fold_across_2_longs_const_lo = 0x000000000000a010, /* x^128 mod G */
- .barrett_reduction_const_1 = 0xfb2d2bfc0e99d245, /* floor(x^79 / G) */
- .barrett_reduction_const_2 = 0x0000000000008bb7, /* G - x^16 */
-#else
- .fold_across_2_longs_const_hi = 0x00005890, /* x^96 mod G */
- .fold_across_2_longs_const_lo = 0x0000f249, /* x^64 mod G */
- .barrett_reduction_const_1 = 0xfb2d2bfc, /* floor(x^47 / G) */
- .barrett_reduction_const_2 = 0x00008bb7, /* G - x^16 */
-#endif
-};
-
-/*
- * Constants generated for most-significant-bit-first CRC-32 using
- * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 +
- * x^5 + x^4 + x^2 + x^1 + x^0
- */
-static const struct crc_clmul_consts crc32_msb_0x04c11db7_consts __maybe_unused = {
-#ifdef CONFIG_64BIT
- .fold_across_2_longs_const_hi = 0x00000000c5b9cd4c, /* x^192 mod G */
- .fold_across_2_longs_const_lo = 0x00000000e8a45605, /* x^128 mod G */
- .barrett_reduction_const_1 = 0x826880efa40da72d, /* floor(x^95 / G) */
- .barrett_reduction_const_2 = 0x0000000004c11db7, /* G - x^32 */
-#else
- .fold_across_2_longs_const_hi = 0xf200aa66, /* x^96 mod G */
- .fold_across_2_longs_const_lo = 0x490d678d, /* x^64 mod G */
- .barrett_reduction_const_1 = 0x826880ef, /* floor(x^63 / G) */
- .barrett_reduction_const_2 = 0x04c11db7, /* G - x^32 */
-#endif
-};
-
-/*
- * Constants generated for least-significant-bit-first CRC-32 using
- * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 +
- * x^5 + x^4 + x^2 + x^1 + x^0
- */
-static const struct crc_clmul_consts crc32_lsb_0xedb88320_consts __maybe_unused = {
-#ifdef CONFIG_64BIT
- .fold_across_2_longs_const_hi = 0x65673b4600000000, /* x^191 mod G */
- .fold_across_2_longs_const_lo = 0x9ba54c6f00000000, /* x^127 mod G */
- .barrett_reduction_const_1 = 0xb4e5b025f7011641, /* floor(x^95 / G) */
- .barrett_reduction_const_2 = 0x00000000edb88320, /* (G - x^32) * x^32 */
-#else
- .fold_across_2_longs_const_hi = 0xccaa009e, /* x^95 mod G */
- .fold_across_2_longs_const_lo = 0xb8bc6765, /* x^63 mod G */
- .barrett_reduction_const_1 = 0xf7011641, /* floor(x^63 / G) */
- .barrett_reduction_const_2 = 0xedb88320, /* (G - x^32) * x^0 */
-#endif
-};
-
-/*
- * Constants generated for least-significant-bit-first CRC-32 using
- * G(x) = x^32 + x^28 + x^27 + x^26 + x^25 + x^23 + x^22 + x^20 + x^19 + x^18 +
- * x^14 + x^13 + x^11 + x^10 + x^9 + x^8 + x^6 + x^0
- */
-static const struct crc_clmul_consts crc32_lsb_0x82f63b78_consts __maybe_unused = {
-#ifdef CONFIG_64BIT
- .fold_across_2_longs_const_hi = 0x3743f7bd00000000, /* x^191 mod G */
- .fold_across_2_longs_const_lo = 0x3171d43000000000, /* x^127 mod G */
- .barrett_reduction_const_1 = 0x4869ec38dea713f1, /* floor(x^95 / G) */
- .barrett_reduction_const_2 = 0x0000000082f63b78, /* (G - x^32) * x^32 */
-#else
- .fold_across_2_longs_const_hi = 0x493c7d27, /* x^95 mod G */
- .fold_across_2_longs_const_lo = 0xdd45aab8, /* x^63 mod G */
- .barrett_reduction_const_1 = 0xdea713f1, /* floor(x^63 / G) */
- .barrett_reduction_const_2 = 0x82f63b78, /* (G - x^32) * x^0 */
-#endif
-};
-
-/*
- * Constants generated for most-significant-bit-first CRC-64 using
- * G(x) = x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
- * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
- * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
- * x^7 + x^4 + x^1 + x^0
- */
-#ifdef CONFIG_64BIT
-static const struct crc_clmul_consts crc64_msb_0x42f0e1eba9ea3693_consts __maybe_unused = {
- .fold_across_2_longs_const_hi = 0x4eb938a7d257740e, /* x^192 mod G */
- .fold_across_2_longs_const_lo = 0x05f5c3c7eb52fab6, /* x^128 mod G */
- .barrett_reduction_const_1 = 0xabc694e836627c39, /* floor(x^127 / G) */
- .barrett_reduction_const_2 = 0x42f0e1eba9ea3693, /* G - x^64 */
-};
-#endif
-
-/*
- * Constants generated for least-significant-bit-first CRC-64 using
- * G(x) = x^64 + x^63 + x^61 + x^59 + x^58 + x^56 + x^55 + x^52 + x^49 + x^48 +
- * x^47 + x^46 + x^44 + x^41 + x^37 + x^36 + x^34 + x^32 + x^31 + x^28 +
- * x^26 + x^23 + x^22 + x^19 + x^16 + x^13 + x^12 + x^10 + x^9 + x^6 +
- * x^4 + x^3 + x^0
- */
-#ifdef CONFIG_64BIT
-static const struct crc_clmul_consts crc64_lsb_0x9a6c9329ac4bc9b5_consts __maybe_unused = {
- .fold_across_2_longs_const_hi = 0xeadc41fd2ba3d420, /* x^191 mod G */
- .fold_across_2_longs_const_lo = 0x21e9761e252621ac, /* x^127 mod G */
- .barrett_reduction_const_1 = 0x27ecfa329aef9f77, /* floor(x^127 / G) */
- .barrett_reduction_const_2 = 0x9a6c9329ac4bc9b5, /* (G - x^64) * x^0 */
-};
-#endif
diff --git a/arch/riscv/lib/crc-clmul-template.h b/arch/riscv/lib/crc-clmul-template.h
deleted file mode 100644
index 77187e7f176232..00000000000000
--- a/arch/riscv/lib/crc-clmul-template.h
+++ /dev/null
@@ -1,265 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* Copyright 2025 Google LLC */
-
-/*
- * This file is a "template" that generates a CRC function optimized using the
- * RISC-V Zbc (scalar carryless multiplication) extension. The includer of this
- * file must define the following parameters to specify the type of CRC:
- *
- * crc_t: the data type of the CRC, e.g. u32 for a 32-bit CRC
- * LSB_CRC: 0 for a msb (most-significant-bit) first CRC, i.e. natural
- * mapping between bits and polynomial coefficients
- * 1 for a lsb (least-significant-bit) first CRC, i.e. reflected
- * mapping between bits and polynomial coefficients
- */
-
-#include <asm/byteorder.h>
-#include <linux/minmax.h>
-
-#define CRC_BITS (8 * sizeof(crc_t)) /* a.k.a. 'n' */
-
-static inline unsigned long clmul(unsigned long a, unsigned long b)
-{
- unsigned long res;
-
- asm(".option push\n"
- ".option arch,+zbc\n"
- "clmul %0, %1, %2\n"
- ".option pop\n"
- : "=r" (res) : "r" (a), "r" (b));
- return res;
-}
-
-static inline unsigned long clmulh(unsigned long a, unsigned long b)
-{
- unsigned long res;
-
- asm(".option push\n"
- ".option arch,+zbc\n"
- "clmulh %0, %1, %2\n"
- ".option pop\n"
- : "=r" (res) : "r" (a), "r" (b));
- return res;
-}
-
-static inline unsigned long clmulr(unsigned long a, unsigned long b)
-{
- unsigned long res;
-
- asm(".option push\n"
- ".option arch,+zbc\n"
- "clmulr %0, %1, %2\n"
- ".option pop\n"
- : "=r" (res) : "r" (a), "r" (b));
- return res;
-}
-
-/*
- * crc_load_long() loads one "unsigned long" of aligned data bytes, producing a
- * polynomial whose bit order matches the CRC's bit order.
- */
-#ifdef CONFIG_64BIT
-# if LSB_CRC
-# define crc_load_long(x) le64_to_cpup(x)
-# else
-# define crc_load_long(x) be64_to_cpup(x)
-# endif
-#else
-# if LSB_CRC
-# define crc_load_long(x) le32_to_cpup(x)
-# else
-# define crc_load_long(x) be32_to_cpup(x)
-# endif
-#endif
-
-/* XOR @crc into the end of @msgpoly that represents the high-order terms. */
-static inline unsigned long
-crc_clmul_prep(crc_t crc, unsigned long msgpoly)
-{
-#if LSB_CRC
- return msgpoly ^ crc;
-#else
- return msgpoly ^ ((unsigned long)crc << (BITS_PER_LONG - CRC_BITS));
-#endif
-}
-
-/*
- * Multiply the long-sized @msgpoly by x^n (a.k.a. x^CRC_BITS) and reduce it
- * modulo the generator polynomial G. This gives the CRC of @msgpoly.
- */
-static inline crc_t
-crc_clmul_long(unsigned long msgpoly, const struct crc_clmul_consts *consts)
-{
- unsigned long tmp;
-
- /*
- * First step of Barrett reduction with integrated multiplication by
- * x^n: calculate floor((msgpoly * x^n) / G). This is the value by
- * which G needs to be multiplied to cancel out the x^n and higher terms
- * of msgpoly * x^n. Do it using the following formula:
- *
- * msb-first:
- * floor((msgpoly * floor(x^(BITS_PER_LONG-1+n) / G)) / x^(BITS_PER_LONG-1))
- * lsb-first:
- * floor((msgpoly * floor(x^(BITS_PER_LONG-1+n) / G) * x) / x^BITS_PER_LONG)
- *
- * barrett_reduction_const_1 contains floor(x^(BITS_PER_LONG-1+n) / G),
- * which fits a long exactly. Using any lower power of x there would
- * not carry enough precision through the calculation, while using any
- * higher power of x would require extra instructions to handle a wider
- * multiplication. In the msb-first case, using this power of x results
- * in needing a floored division by x^(BITS_PER_LONG-1), which matches
- * what clmulr produces. In the lsb-first case, a factor of x gets
- * implicitly introduced by each carryless multiplication (shown as
- * '* x' above), and the floored division instead needs to be by
- * x^BITS_PER_LONG which matches what clmul produces.
- */
-#if LSB_CRC
- tmp = clmul(msgpoly, consts->barrett_reduction_const_1);
-#else
- tmp = clmulr(msgpoly, consts->barrett_reduction_const_1);
-#endif
-
- /*
- * Second step of Barrett reduction:
- *
- * crc := (msgpoly * x^n) + (G * floor((msgpoly * x^n) / G))
- *
- * This reduces (msgpoly * x^n) modulo G by adding the appropriate
- * multiple of G to it. The result uses only the x^0..x^(n-1) terms.
- * HOWEVER, since the unreduced value (msgpoly * x^n) is zero in those
- * terms in the first place, it is more efficient to do the equivalent:
- *
- * crc := ((G - x^n) * floor((msgpoly * x^n) / G)) mod x^n
- *
- * In the lsb-first case further modify it to the following which avoids
- * a shift, as the crc ends up in the physically low n bits from clmulr:
- *
- * product := ((G - x^n) * x^(BITS_PER_LONG - n)) * floor((msgpoly * x^n) / G) * x
- * crc := floor(product / x^(BITS_PER_LONG + 1 - n)) mod x^n
- *
- * barrett_reduction_const_2 contains the constant multiplier (G - x^n)
- * or (G - x^n) * x^(BITS_PER_LONG - n) from the formulas above. The
- * cast of the result to crc_t is essential, as it applies the mod x^n!
- */
-#if LSB_CRC
- return clmulr(tmp, consts->barrett_reduction_const_2);
-#else
- return clmul(tmp, consts->barrett_reduction_const_2);
-#endif
-}
-
-/* Update @crc with the data from @msgpoly. */
-static inline crc_t
-crc_clmul_update_long(crc_t crc, unsigned long msgpoly,
- const struct crc_clmul_consts *consts)
-{
- return crc_clmul_long(crc_clmul_prep(crc, msgpoly), consts);
-}
-
-/* Update @crc with 1 <= @len < sizeof(unsigned long) bytes of data. */
-static inline crc_t
-crc_clmul_update_partial(crc_t crc, const u8 *p, size_t len,
- const struct crc_clmul_consts *consts)
-{
- unsigned long msgpoly;
- size_t i;
-
-#if LSB_CRC
- msgpoly = (unsigned long)p[0] << (BITS_PER_LONG - 8);
- for (i = 1; i < len; i++)
- msgpoly = (msgpoly >> 8) ^ ((unsigned long)p[i] << (BITS_PER_LONG - 8));
-#else
- msgpoly = p[0];
- for (i = 1; i < len; i++)
- msgpoly = (msgpoly << 8) ^ p[i];
-#endif
-
- if (len >= sizeof(crc_t)) {
- #if LSB_CRC
- msgpoly ^= (unsigned long)crc << (BITS_PER_LONG - 8*len);
- #else
- msgpoly ^= (unsigned long)crc << (8*len - CRC_BITS);
- #endif
- return crc_clmul_long(msgpoly, consts);
- }
-#if LSB_CRC
- msgpoly ^= (unsigned long)crc << (BITS_PER_LONG - 8*len);
- return crc_clmul_long(msgpoly, consts) ^ (crc >> (8*len));
-#else
- msgpoly ^= crc >> (CRC_BITS - 8*len);
- return crc_clmul_long(msgpoly, consts) ^ (crc << (8*len));
-#endif
-}
-
-static inline crc_t
-crc_clmul(crc_t crc, const void *p, size_t len,
- const struct crc_clmul_consts *consts)
-{
- size_t align;
-
- /* This implementation assumes that the CRC fits in an unsigned long. */
- BUILD_BUG_ON(sizeof(crc_t) > sizeof(unsigned long));
-
- /* If the buffer is not long-aligned, align it. */
- align = (unsigned long)p % sizeof(unsigned long);
- if (align && len) {
- align = min(sizeof(unsigned long) - align, len);
- crc = crc_clmul_update_partial(crc, p, align, consts);
- p += align;
- len -= align;
- }
-
- if (len >= 4 * sizeof(unsigned long)) {
- unsigned long m0, m1;
-
- m0 = crc_clmul_prep(crc, crc_load_long(p));
- m1 = crc_load_long(p + sizeof(unsigned long));
- p += 2 * sizeof(unsigned long);
- len -= 2 * sizeof(unsigned long);
- /*
- * Main loop. Each iteration starts with a message polynomial
- * (x^BITS_PER_LONG)*m0 + m1, then logically extends it by two
- * more longs of data to form x^(3*BITS_PER_LONG)*m0 +
- * x^(2*BITS_PER_LONG)*m1 + x^BITS_PER_LONG*m2 + m3, then
- * "folds" that back into a congruent (modulo G) value that uses
- * just m0 and m1 again. This is done by multiplying m0 by the
- * precomputed constant (x^(3*BITS_PER_LONG) mod G) and m1 by
- * the precomputed constant (x^(2*BITS_PER_LONG) mod G), then
- * adding the results to m2 and m3 as appropriate. Each such
- * multiplication produces a result twice the length of a long,
- * which in RISC-V is two instructions clmul and clmulh.
- *
- * This could be changed to fold across more than 2 longs at a
- * time if there is a CPU that can take advantage of it.
- */
- do {
- unsigned long p0, p1, p2, p3;
-
- p0 = clmulh(m0, consts->fold_across_2_longs_const_hi);
- p1 = clmul(m0, consts->fold_across_2_longs_const_hi);
- p2 = clmulh(m1, consts->fold_across_2_longs_const_lo);
- p3 = clmul(m1, consts->fold_across_2_longs_const_lo);
- m0 = (LSB_CRC ? p1 ^ p3 : p0 ^ p2) ^ crc_load_long(p);
- m1 = (LSB_CRC ? p0 ^ p2 : p1 ^ p3) ^
- crc_load_long(p + sizeof(unsigned long));
-
- p += 2 * sizeof(unsigned long);
- len -= 2 * sizeof(unsigned long);
- } while (len >= 2 * sizeof(unsigned long));
-
- crc = crc_clmul_long(m0, consts);
- crc = crc_clmul_update_long(crc, m1, consts);
- }
-
- while (len >= sizeof(unsigned long)) {
- crc = crc_clmul_update_long(crc, crc_load_long(p), consts);
- p += sizeof(unsigned long);
- len -= sizeof(unsigned long);
- }
-
- if (len)
- crc = crc_clmul_update_partial(crc, p, len, consts);
-
- return crc;
-}
diff --git a/arch/riscv/lib/crc-clmul.h b/arch/riscv/lib/crc-clmul.h
deleted file mode 100644
index dd17362458158e..00000000000000
--- a/arch/riscv/lib/crc-clmul.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* Copyright 2025 Google LLC */
-
-#ifndef _RISCV_CRC_CLMUL_H
-#define _RISCV_CRC_CLMUL_H
-
-#include <linux/types.h>
-#include "crc-clmul-consts.h"
-
-u16 crc16_msb_clmul(u16 crc, const void *p, size_t len,
- const struct crc_clmul_consts *consts);
-u32 crc32_msb_clmul(u32 crc, const void *p, size_t len,
- const struct crc_clmul_consts *consts);
-u32 crc32_lsb_clmul(u32 crc, const void *p, size_t len,
- const struct crc_clmul_consts *consts);
-#ifdef CONFIG_64BIT
-u64 crc64_msb_clmul(u64 crc, const void *p, size_t len,
- const struct crc_clmul_consts *consts);
-u64 crc64_lsb_clmul(u64 crc, const void *p, size_t len,
- const struct crc_clmul_consts *consts);
-#endif
-
-#endif /* _RISCV_CRC_CLMUL_H */
diff --git a/arch/riscv/lib/crc-t10dif.c b/arch/riscv/lib/crc-t10dif.c
deleted file mode 100644
index e6b0051ccd86ca..00000000000000
--- a/arch/riscv/lib/crc-t10dif.c
+++ /dev/null
@@ -1,24 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RISC-V optimized CRC-T10DIF function
- *
- * Copyright 2025 Google LLC
- */
-
-#include <asm/hwcap.h>
-#include <asm/alternative-macros.h>
-#include <linux/crc-t10dif.h>
-#include <linux/module.h>
-
-#include "crc-clmul.h"
-
-u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len)
-{
- if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
- return crc16_msb_clmul(crc, p, len, &crc16_msb_0x8bb7_consts);
- return crc_t10dif_generic(crc, p, len);
-}
-EXPORT_SYMBOL(crc_t10dif_arch);
-
-MODULE_DESCRIPTION("RISC-V optimized CRC-T10DIF function");
-MODULE_LICENSE("GPL");
diff --git a/arch/riscv/lib/crc16_msb.c b/arch/riscv/lib/crc16_msb.c
deleted file mode 100644
index 554d295e95f597..00000000000000
--- a/arch/riscv/lib/crc16_msb.c
+++ /dev/null
@@ -1,18 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RISC-V optimized most-significant-bit-first CRC16
- *
- * Copyright 2025 Google LLC
- */
-
-#include "crc-clmul.h"
-
-typedef u16 crc_t;
-#define LSB_CRC 0
-#include "crc-clmul-template.h"
-
-u16 crc16_msb_clmul(u16 crc, const void *p, size_t len,
- const struct crc_clmul_consts *consts)
-{
- return crc_clmul(crc, p, len, consts);
-}
diff --git a/arch/riscv/lib/crc32.c b/arch/riscv/lib/crc32.c
deleted file mode 100644
index a3188b7d9c403e..00000000000000
--- a/arch/riscv/lib/crc32.c
+++ /dev/null
@@ -1,53 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RISC-V optimized CRC32 functions
- *
- * Copyright 2025 Google LLC
- */
-
-#include <asm/hwcap.h>
-#include <asm/alternative-macros.h>
-#include <linux/crc32.h>
-#include <linux/module.h>
-
-#include "crc-clmul.h"
-
-u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
-{
- if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
- return crc32_lsb_clmul(crc, p, len,
- &crc32_lsb_0xedb88320_consts);
- return crc32_le_base(crc, p, len);
-}
-EXPORT_SYMBOL(crc32_le_arch);
-
-u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
-{
- if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
- return crc32_msb_clmul(crc, p, len,
- &crc32_msb_0x04c11db7_consts);
- return crc32_be_base(crc, p, len);
-}
-EXPORT_SYMBOL(crc32_be_arch);
-
-u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
-{
- if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
- return crc32_lsb_clmul(crc, p, len,
- &crc32_lsb_0x82f63b78_consts);
- return crc32c_base(crc, p, len);
-}
-EXPORT_SYMBOL(crc32c_arch);
-
-u32 crc32_optimizations(void)
-{
- if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
- return CRC32_LE_OPTIMIZATION |
- CRC32_BE_OPTIMIZATION |
- CRC32C_OPTIMIZATION;
- return 0;
-}
-EXPORT_SYMBOL(crc32_optimizations);
-
-MODULE_DESCRIPTION("RISC-V optimized CRC32 functions");
-MODULE_LICENSE("GPL");
diff --git a/arch/riscv/lib/crc32_lsb.c b/arch/riscv/lib/crc32_lsb.c
deleted file mode 100644
index 72fd67e7470caa..00000000000000
--- a/arch/riscv/lib/crc32_lsb.c
+++ /dev/null
@@ -1,18 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RISC-V optimized least-significant-bit-first CRC32
- *
- * Copyright 2025 Google LLC
- */
-
-#include "crc-clmul.h"
-
-typedef u32 crc_t;
-#define LSB_CRC 1
-#include "crc-clmul-template.h"
-
-u32 crc32_lsb_clmul(u32 crc, const void *p, size_t len,
- const struct crc_clmul_consts *consts)
-{
- return crc_clmul(crc, p, len, consts);
-}
diff --git a/arch/riscv/lib/crc32_msb.c b/arch/riscv/lib/crc32_msb.c
deleted file mode 100644
index fdbeaccc369fd3..00000000000000
--- a/arch/riscv/lib/crc32_msb.c
+++ /dev/null
@@ -1,18 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RISC-V optimized most-significant-bit-first CRC32
- *
- * Copyright 2025 Google LLC
- */
-
-#include "crc-clmul.h"
-
-typedef u32 crc_t;
-#define LSB_CRC 0
-#include "crc-clmul-template.h"
-
-u32 crc32_msb_clmul(u32 crc, const void *p, size_t len,
- const struct crc_clmul_consts *consts)
-{
- return crc_clmul(crc, p, len, consts);
-}
diff --git a/arch/riscv/lib/crc64.c b/arch/riscv/lib/crc64.c
deleted file mode 100644
index f0015a27836a43..00000000000000
--- a/arch/riscv/lib/crc64.c
+++ /dev/null
@@ -1,34 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RISC-V optimized CRC64 functions
- *
- * Copyright 2025 Google LLC
- */
-
-#include <asm/hwcap.h>
-#include <asm/alternative-macros.h>
-#include <linux/crc64.h>
-#include <linux/module.h>
-
-#include "crc-clmul.h"
-
-u64 crc64_be_arch(u64 crc, const u8 *p, size_t len)
-{
- if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
- return crc64_msb_clmul(crc, p, len,
- &crc64_msb_0x42f0e1eba9ea3693_consts);
- return crc64_be_generic(crc, p, len);
-}
-EXPORT_SYMBOL(crc64_be_arch);
-
-u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len)
-{
- if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
- return crc64_lsb_clmul(crc, p, len,
- &crc64_lsb_0x9a6c9329ac4bc9b5_consts);
- return crc64_nvme_generic(crc, p, len);
-}
-EXPORT_SYMBOL(crc64_nvme_arch);
-
-MODULE_DESCRIPTION("RISC-V optimized CRC64 functions");
-MODULE_LICENSE("GPL");
diff --git a/arch/riscv/lib/crc64_lsb.c b/arch/riscv/lib/crc64_lsb.c
deleted file mode 100644
index c5371bb85d9011..00000000000000
--- a/arch/riscv/lib/crc64_lsb.c
+++ /dev/null
@@ -1,18 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RISC-V optimized least-significant-bit-first CRC64
- *
- * Copyright 2025 Google LLC
- */
-
-#include "crc-clmul.h"
-
-typedef u64 crc_t;
-#define LSB_CRC 1
-#include "crc-clmul-template.h"
-
-u64 crc64_lsb_clmul(u64 crc, const void *p, size_t len,
- const struct crc_clmul_consts *consts)
-{
- return crc_clmul(crc, p, len, consts);
-}
diff --git a/arch/riscv/lib/crc64_msb.c b/arch/riscv/lib/crc64_msb.c
deleted file mode 100644
index 1925d1dbe225c9..00000000000000
--- a/arch/riscv/lib/crc64_msb.c
+++ /dev/null
@@ -1,18 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RISC-V optimized most-significant-bit-first CRC64
- *
- * Copyright 2025 Google LLC
- */
-
-#include "crc-clmul.h"
-
-typedef u64 crc_t;
-#define LSB_CRC 0
-#include "crc-clmul-template.h"
-
-u64 crc64_msb_clmul(u64 crc, const void *p, size_t len,
- const struct crc_clmul_consts *consts)
-{
- return crc_clmul(crc, p, len, consts);
-}
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index a8e74ed8e3ccf5..25a773e6596ea4 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -75,7 +75,6 @@ config S390
select ARCH_ENABLE_MEMORY_HOTREMOVE
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
select ARCH_HAS_CPU_FINALIZE_INIT
- select ARCH_HAS_CRC32
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index cd35cdbfa87134..7c8583d46eca1d 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -25,6 +25,3 @@ obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) += test_modules_helpers.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
obj-$(CONFIG_EXPOLINE_EXTERN) += expoline.o
-
-obj-$(CONFIG_CRC32_ARCH) += crc32-s390.o
-crc32-s390-y := crc32.o crc32le-vx.o crc32be-vx.o
diff --git a/arch/s390/lib/crc32-vx.h b/arch/s390/lib/crc32-vx.h
deleted file mode 100644
index 652c96e1a822c0..00000000000000
--- a/arch/s390/lib/crc32-vx.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#ifndef _CRC32_VX_S390_H
-#define _CRC32_VX_S390_H
-
-#include <linux/types.h>
-
-u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
-u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
-u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
-
-#endif /* _CRC32_VX_S390_H */
diff --git a/arch/s390/lib/crc32.c b/arch/s390/lib/crc32.c
deleted file mode 100644
index 3c4b344417c114..00000000000000
--- a/arch/s390/lib/crc32.c
+++ /dev/null
@@ -1,77 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * CRC-32 implemented with the z/Architecture Vector Extension Facility.
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-#define KMSG_COMPONENT "crc32-vx"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/module.h>
-#include <linux/cpufeature.h>
-#include <linux/crc32.h>
-#include <asm/fpu.h>
-#include "crc32-vx.h"
-
-#define VX_MIN_LEN 64
-#define VX_ALIGNMENT 16L
-#define VX_ALIGN_MASK (VX_ALIGNMENT - 1)
-
-/*
- * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension
- *
- * Creates a function to perform a particular CRC-32 computation. Depending
- * on the message buffer, the hardware-accelerated or software implementation
- * is used. Note that the message buffer is aligned to improve fetch
- * operations of VECTOR LOAD MULTIPLE instructions.
- */
-#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw) \
- u32 ___fname(u32 crc, const u8 *data, size_t datalen) \
- { \
- unsigned long prealign, aligned, remaining; \
- DECLARE_KERNEL_FPU_ONSTACK16(vxstate); \
- \
- if (datalen < VX_MIN_LEN + VX_ALIGN_MASK || !cpu_has_vx()) \
- return ___crc32_sw(crc, data, datalen); \
- \
- if ((unsigned long)data & VX_ALIGN_MASK) { \
- prealign = VX_ALIGNMENT - \
- ((unsigned long)data & VX_ALIGN_MASK); \
- datalen -= prealign; \
- crc = ___crc32_sw(crc, data, prealign); \
- data = (void *)((unsigned long)data + prealign); \
- } \
- \
- aligned = datalen & ~VX_ALIGN_MASK; \
- remaining = datalen & VX_ALIGN_MASK; \
- \
- kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \
- crc = ___crc32_vx(crc, data, aligned); \
- kernel_fpu_end(&vxstate, KERNEL_VXR_LOW); \
- \
- if (remaining) \
- crc = ___crc32_sw(crc, data + aligned, remaining); \
- \
- return crc; \
- } \
- EXPORT_SYMBOL(___fname);
-
-DEFINE_CRC32_VX(crc32_le_arch, crc32_le_vgfm_16, crc32_le_base)
-DEFINE_CRC32_VX(crc32_be_arch, crc32_be_vgfm_16, crc32_be_base)
-DEFINE_CRC32_VX(crc32c_arch, crc32c_le_vgfm_16, crc32c_base)
-
-u32 crc32_optimizations(void)
-{
- if (cpu_has_vx()) {
- return CRC32_LE_OPTIMIZATION |
- CRC32_BE_OPTIMIZATION |
- CRC32C_OPTIMIZATION;
- }
- return 0;
-}
-EXPORT_SYMBOL(crc32_optimizations);
-
-MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>");
-MODULE_DESCRIPTION("CRC-32 algorithms using z/Architecture Vector Extension Facility");
-MODULE_LICENSE("GPL");
diff --git a/arch/s390/lib/crc32be-vx.c b/arch/s390/lib/crc32be-vx.c
deleted file mode 100644
index fed7c9c70d055c..00000000000000
--- a/arch/s390/lib/crc32be-vx.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Hardware-accelerated CRC-32 variants for Linux on z Systems
- *
- * Use the z/Architecture Vector Extension Facility to accelerate the
- * computing of CRC-32 checksums.
- *
- * This CRC-32 implementation algorithm processes the most-significant
- * bit first (BE).
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-
-#include <linux/types.h>
-#include <asm/fpu.h>
-#include "crc32-vx.h"
-
-/* Vector register range containing CRC-32 constants */
-#define CONST_R1R2 9
-#define CONST_R3R4 10
-#define CONST_R5 11
-#define CONST_R6 12
-#define CONST_RU_POLY 13
-#define CONST_CRC_POLY 14
-
-/*
- * The CRC-32 constant block contains reduction constants to fold and
- * process particular chunks of the input data stream in parallel.
- *
- * For the CRC-32 variants, the constants are precomputed according to
- * these definitions:
- *
- * R1 = x4*128+64 mod P(x)
- * R2 = x4*128 mod P(x)
- * R3 = x128+64 mod P(x)
- * R4 = x128 mod P(x)
- * R5 = x96 mod P(x)
- * R6 = x64 mod P(x)
- *
- * Barret reduction constant, u, is defined as floor(x**64 / P(x)).
- *
- * where P(x) is the polynomial in the normal domain and the P'(x) is the
- * polynomial in the reversed (bitreflected) domain.
- *
- * Note that the constant definitions below are extended in order to compute
- * intermediate results with a single VECTOR GALOIS FIELD MULTIPLY instruction.
- * The rightmost doubleword can be 0 to prevent contribution to the result or
- * can be multiplied by 1 to perform an XOR without the need for a separate
- * VECTOR EXCLUSIVE OR instruction.
- *
- * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
- *
- * P(x) = 0x04C11DB7
- * P'(x) = 0xEDB88320
- */
-
-static unsigned long constants_CRC_32_BE[] = {
- 0x08833794c, 0x0e6228b11, /* R1, R2 */
- 0x0c5b9cd4c, 0x0e8a45605, /* R3, R4 */
- 0x0f200aa66, 1UL << 32, /* R5, x32 */
- 0x0490d678d, 1, /* R6, 1 */
- 0x104d101df, 0, /* u */
- 0x104C11DB7, 0, /* P(x) */
-};
-
-/**
- * crc32_be_vgfm_16 - Compute CRC-32 (BE variant) with vector registers
- * @crc: Initial CRC value, typically ~0.
- * @buf: Input buffer pointer, performance might be improved if the
- * buffer is on a doubleword boundary.
- * @size: Size of the buffer, must be 64 bytes or greater.
- *
- * Register usage:
- * V0: Initial CRC value and intermediate constants and results.
- * V1..V4: Data for CRC computation.
- * V5..V8: Next data chunks that are fetched from the input buffer.
- * V9..V14: CRC-32 constants.
- */
-u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size)
-{
- /* Load CRC-32 constants */
- fpu_vlm(CONST_R1R2, CONST_CRC_POLY, &constants_CRC_32_BE);
- fpu_vzero(0);
-
- /* Load the initial CRC value into the leftmost word of V0. */
- fpu_vlvgf(0, crc, 0);
-
- /* Load a 64-byte data chunk and XOR with CRC */
- fpu_vlm(1, 4, buf);
- fpu_vx(1, 0, 1);
- buf += 64;
- size -= 64;
-
- while (size >= 64) {
- /* Load the next 64-byte data chunk into V5 to V8 */
- fpu_vlm(5, 8, buf);
-
- /*
- * Perform a GF(2) multiplication of the doublewords in V1 with
- * the reduction constants in V0. The intermediate result is
- * then folded (accumulated) with the next data chunk in V5 and
- * stored in V1. Repeat this step for the register contents
- * in V2, V3, and V4 respectively.
- */
- fpu_vgfmag(1, CONST_R1R2, 1, 5);
- fpu_vgfmag(2, CONST_R1R2, 2, 6);
- fpu_vgfmag(3, CONST_R1R2, 3, 7);
- fpu_vgfmag(4, CONST_R1R2, 4, 8);
- buf += 64;
- size -= 64;
- }
-
- /* Fold V1 to V4 into a single 128-bit value in V1 */
- fpu_vgfmag(1, CONST_R3R4, 1, 2);
- fpu_vgfmag(1, CONST_R3R4, 1, 3);
- fpu_vgfmag(1, CONST_R3R4, 1, 4);
-
- while (size >= 16) {
- fpu_vl(2, buf);
- fpu_vgfmag(1, CONST_R3R4, 1, 2);
- buf += 16;
- size -= 16;
- }
-
- /*
- * The R5 constant is used to fold a 128-bit value into an 96-bit value
- * that is XORed with the next 96-bit input data chunk. To use a single
- * VGFMG instruction, multiply the rightmost 64-bit with x^32 (1<<32) to
- * form an intermediate 96-bit value (with appended zeros) which is then
- * XORed with the intermediate reduction result.
- */
- fpu_vgfmg(1, CONST_R5, 1);
-
- /*
- * Further reduce the remaining 96-bit value to a 64-bit value using a
- * single VGFMG, the rightmost doubleword is multiplied with 0x1. The
- * intermediate result is then XORed with the product of the leftmost
- * doubleword with R6. The result is a 64-bit value and is subject to
- * the Barret reduction.
- */
- fpu_vgfmg(1, CONST_R6, 1);
-
- /*
- * The input values to the Barret reduction are the degree-63 polynomial
- * in V1 (R(x)), degree-32 generator polynomial, and the reduction
- * constant u. The Barret reduction result is the CRC value of R(x) mod
- * P(x).
- *
- * The Barret reduction algorithm is defined as:
- *
- * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
- * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
- * 3. C(x) = R(x) XOR T2(x) mod x^32
- *
- * Note: To compensate the division by x^32, use the vector unpack
- * instruction to move the leftmost word into the leftmost doubleword
- * of the vector register. The rightmost doubleword is multiplied
- * with zero to not contribute to the intermediate results.
- */
-
- /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
- fpu_vupllf(2, 1);
- fpu_vgfmg(2, CONST_RU_POLY, 2);
-
- /*
- * Compute the GF(2) product of the CRC polynomial in VO with T1(x) in
- * V2 and XOR the intermediate result, T2(x), with the value in V1.
- * The final result is in the rightmost word of V2.
- */
- fpu_vupllf(2, 2);
- fpu_vgfmag(2, CONST_CRC_POLY, 2, 1);
- return fpu_vlgvf(2, 3);
-}
diff --git a/arch/s390/lib/crc32le-vx.c b/arch/s390/lib/crc32le-vx.c
deleted file mode 100644
index 2f629f394df750..00000000000000
--- a/arch/s390/lib/crc32le-vx.c
+++ /dev/null
@@ -1,240 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Hardware-accelerated CRC-32 variants for Linux on z Systems
- *
- * Use the z/Architecture Vector Extension Facility to accelerate the
- * computing of bitreflected CRC-32 checksums for IEEE 802.3 Ethernet
- * and Castagnoli.
- *
- * This CRC-32 implementation algorithm is bitreflected and processes
- * the least-significant bit first (Little-Endian).
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-
-#include <linux/types.h>
-#include <asm/fpu.h>
-#include "crc32-vx.h"
-
-/* Vector register range containing CRC-32 constants */
-#define CONST_PERM_LE2BE 9
-#define CONST_R2R1 10
-#define CONST_R4R3 11
-#define CONST_R5 12
-#define CONST_RU_POLY 13
-#define CONST_CRC_POLY 14
-
-/*
- * The CRC-32 constant block contains reduction constants to fold and
- * process particular chunks of the input data stream in parallel.
- *
- * For the CRC-32 variants, the constants are precomputed according to
- * these definitions:
- *
- * R1 = [(x4*128+32 mod P'(x) << 32)]' << 1
- * R2 = [(x4*128-32 mod P'(x) << 32)]' << 1
- * R3 = [(x128+32 mod P'(x) << 32)]' << 1
- * R4 = [(x128-32 mod P'(x) << 32)]' << 1
- * R5 = [(x64 mod P'(x) << 32)]' << 1
- * R6 = [(x32 mod P'(x) << 32)]' << 1
- *
- * The bitreflected Barret reduction constant, u', is defined as
- * the bit reversal of floor(x**64 / P(x)).
- *
- * where P(x) is the polynomial in the normal domain and the P'(x) is the
- * polynomial in the reversed (bitreflected) domain.
- *
- * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
- *
- * P(x) = 0x04C11DB7
- * P'(x) = 0xEDB88320
- *
- * CRC-32C (Castagnoli) polynomials:
- *
- * P(x) = 0x1EDC6F41
- * P'(x) = 0x82F63B78
- */
-
-static unsigned long constants_CRC_32_LE[] = {
- 0x0f0e0d0c0b0a0908, 0x0706050403020100, /* BE->LE mask */
- 0x1c6e41596, 0x154442bd4, /* R2, R1 */
- 0x0ccaa009e, 0x1751997d0, /* R4, R3 */
- 0x0, 0x163cd6124, /* R5 */
- 0x0, 0x1f7011641, /* u' */
- 0x0, 0x1db710641 /* P'(x) << 1 */
-};
-
-static unsigned long constants_CRC_32C_LE[] = {
- 0x0f0e0d0c0b0a0908, 0x0706050403020100, /* BE->LE mask */
- 0x09e4addf8, 0x740eef02, /* R2, R1 */
- 0x14cd00bd6, 0xf20c0dfe, /* R4, R3 */
- 0x0, 0x0dd45aab8, /* R5 */
- 0x0, 0x0dea713f1, /* u' */
- 0x0, 0x105ec76f0 /* P'(x) << 1 */
-};
-
-/**
- * crc32_le_vgfm_generic - Compute CRC-32 (LE variant) with vector registers
- * @crc: Initial CRC value, typically ~0.
- * @buf: Input buffer pointer, performance might be improved if the
- * buffer is on a doubleword boundary.
- * @size: Size of the buffer, must be 64 bytes or greater.
- * @constants: CRC-32 constant pool base pointer.
- *
- * Register usage:
- * V0: Initial CRC value and intermediate constants and results.
- * V1..V4: Data for CRC computation.
- * V5..V8: Next data chunks that are fetched from the input buffer.
- * V9: Constant for BE->LE conversion and shift operations
- * V10..V14: CRC-32 constants.
- */
-static u32 crc32_le_vgfm_generic(u32 crc, unsigned char const *buf, size_t size, unsigned long *constants)
-{
- /* Load CRC-32 constants */
- fpu_vlm(CONST_PERM_LE2BE, CONST_CRC_POLY, constants);
-
- /*
- * Load the initial CRC value.
- *
- * The CRC value is loaded into the rightmost word of the
- * vector register and is later XORed with the LSB portion
- * of the loaded input data.
- */
- fpu_vzero(0); /* Clear V0 */
- fpu_vlvgf(0, crc, 3); /* Load CRC into rightmost word */
-
- /* Load a 64-byte data chunk and XOR with CRC */
- fpu_vlm(1, 4, buf);
- fpu_vperm(1, 1, 1, CONST_PERM_LE2BE);
- fpu_vperm(2, 2, 2, CONST_PERM_LE2BE);
- fpu_vperm(3, 3, 3, CONST_PERM_LE2BE);
- fpu_vperm(4, 4, 4, CONST_PERM_LE2BE);
-
- fpu_vx(1, 0, 1); /* V1 ^= CRC */
- buf += 64;
- size -= 64;
-
- while (size >= 64) {
- fpu_vlm(5, 8, buf);
- fpu_vperm(5, 5, 5, CONST_PERM_LE2BE);
- fpu_vperm(6, 6, 6, CONST_PERM_LE2BE);
- fpu_vperm(7, 7, 7, CONST_PERM_LE2BE);
- fpu_vperm(8, 8, 8, CONST_PERM_LE2BE);
- /*
- * Perform a GF(2) multiplication of the doublewords in V1 with
- * the R1 and R2 reduction constants in V0. The intermediate
- * result is then folded (accumulated) with the next data chunk
- * in V5 and stored in V1. Repeat this step for the register
- * contents in V2, V3, and V4 respectively.
- */
- fpu_vgfmag(1, CONST_R2R1, 1, 5);
- fpu_vgfmag(2, CONST_R2R1, 2, 6);
- fpu_vgfmag(3, CONST_R2R1, 3, 7);
- fpu_vgfmag(4, CONST_R2R1, 4, 8);
- buf += 64;
- size -= 64;
- }
-
- /*
- * Fold V1 to V4 into a single 128-bit value in V1. Multiply V1 with R3
- * and R4 and accumulating the next 128-bit chunk until a single 128-bit
- * value remains.
- */
- fpu_vgfmag(1, CONST_R4R3, 1, 2);
- fpu_vgfmag(1, CONST_R4R3, 1, 3);
- fpu_vgfmag(1, CONST_R4R3, 1, 4);
-
- while (size >= 16) {
- fpu_vl(2, buf);
- fpu_vperm(2, 2, 2, CONST_PERM_LE2BE);
- fpu_vgfmag(1, CONST_R4R3, 1, 2);
- buf += 16;
- size -= 16;
- }
-
- /*
- * Set up a vector register for byte shifts. The shift value must
- * be loaded in bits 1-4 in byte element 7 of a vector register.
- * Shift by 8 bytes: 0x40
- * Shift by 4 bytes: 0x20
- */
- fpu_vleib(9, 0x40, 7);
-
- /*
- * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes
- * to move R4 into the rightmost doubleword and set the leftmost
- * doubleword to 0x1.
- */
- fpu_vsrlb(0, CONST_R4R3, 9);
- fpu_vleig(0, 1, 0);
-
- /*
- * Compute GF(2) product of V1 and V0. The rightmost doubleword
- * of V1 is multiplied with R4. The leftmost doubleword of V1 is
- * multiplied by 0x1 and is then XORed with rightmost product.
- * Implicitly, the intermediate leftmost product becomes padded
- */
- fpu_vgfmg(1, 0, 1);
-
- /*
- * Now do the final 32-bit fold by multiplying the rightmost word
- * in V1 with R5 and XOR the result with the remaining bits in V1.
- *
- * To achieve this by a single VGFMAG, right shift V1 by a word
- * and store the result in V2 which is then accumulated. Use the
- * vector unpack instruction to load the rightmost half of the
- * doubleword into the rightmost doubleword element of V1; the other
- * half is loaded in the leftmost doubleword.
- * The vector register with CONST_R5 contains the R5 constant in the
- * rightmost doubleword and the leftmost doubleword is zero to ignore
- * the leftmost product of V1.
- */
- fpu_vleib(9, 0x20, 7); /* Shift by words */
- fpu_vsrlb(2, 1, 9); /* Store remaining bits in V2 */
- fpu_vupllf(1, 1); /* Split rightmost doubleword */
- fpu_vgfmag(1, CONST_R5, 1, 2); /* V1 = (V1 * R5) XOR V2 */
-
- /*
- * Apply a Barret reduction to compute the final 32-bit CRC value.
- *
- * The input values to the Barret reduction are the degree-63 polynomial
- * in V1 (R(x)), degree-32 generator polynomial, and the reduction
- * constant u. The Barret reduction result is the CRC value of R(x) mod
- * P(x).
- *
- * The Barret reduction algorithm is defined as:
- *
- * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
- * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
- * 3. C(x) = R(x) XOR T2(x) mod x^32
- *
- * Note: The leftmost doubleword of vector register containing
- * CONST_RU_POLY is zero and, thus, the intermediate GF(2) product
- * is zero and does not contribute to the final result.
- */
-
- /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
- fpu_vupllf(2, 1);
- fpu_vgfmg(2, CONST_RU_POLY, 2);
-
- /*
- * Compute the GF(2) product of the CRC polynomial with T1(x) in
- * V2 and XOR the intermediate result, T2(x), with the value in V1.
- * The final result is stored in word element 2 of V2.
- */
- fpu_vupllf(2, 2);
- fpu_vgfmag(2, CONST_CRC_POLY, 2, 1);
-
- return fpu_vlgvf(2, 2);
-}
-
-u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size)
-{
- return crc32_le_vgfm_generic(crc, buf, size, &constants_CRC_32_LE[0]);
-}
-
-u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size)
-{
- return crc32_le_vgfm_generic(crc, buf, size, &constants_CRC_32C_LE[0]);
-}
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 0f88123925a4f9..dcfdb7f1dae976 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -110,7 +110,6 @@ config SPARC64
select HAVE_SETUP_PER_CPU_AREA
select NEED_PER_CPU_EMBED_FIRST_CHUNK
select NEED_PER_CPU_PAGE_FIRST_CHUNK
- select ARCH_HAS_CRC32
config ARCH_PROC_KCORE_TEXT
def_bool y
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 5cf9781d68b40e..2d6c3c53527348 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -54,5 +54,3 @@ lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
obj-$(CONFIG_SPARC64) += iomap.o
obj-$(CONFIG_SPARC32) += atomic32.o
obj-$(CONFIG_SPARC64) += PeeCeeI.o
-obj-$(CONFIG_CRC32_ARCH) += crc32-sparc.o
-crc32-sparc-y := crc32.o crc32c_asm.o
diff --git a/arch/sparc/lib/crc32.c b/arch/sparc/lib/crc32.c
deleted file mode 100644
index 40d4720a42a1b4..00000000000000
--- a/arch/sparc/lib/crc32.c
+++ /dev/null
@@ -1,93 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* CRC32c (Castagnoli), sparc64 crc32c opcode accelerated
- *
- * This is based largely upon arch/x86/crypto/crc32c-intel.c
- *
- * Copyright (C) 2008 Intel Corporation
- * Authors: Austin Zhang <austin_zhang@linux.intel.com>
- * Kent Liu <kent.liu@intel.com>
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/crc32.h>
-#include <asm/pstate.h>
-#include <asm/elf.h>
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32c_opcode);
-
-u32 crc32_le_arch(u32 crc, const u8 *data, size_t len)
-{
- return crc32_le_base(crc, data, len);
-}
-EXPORT_SYMBOL(crc32_le_arch);
-
-void crc32c_sparc64(u32 *crcp, const u64 *data, size_t len);
-
-u32 crc32c_arch(u32 crc, const u8 *data, size_t len)
-{
- size_t n = -(uintptr_t)data & 7;
-
- if (!static_branch_likely(&have_crc32c_opcode))
- return crc32c_base(crc, data, len);
-
- if (n) {
- /* Data isn't 8-byte aligned. Align it. */
- n = min(n, len);
- crc = crc32c_base(crc, data, n);
- data += n;
- len -= n;
- }
- n = len & ~7U;
- if (n) {
- crc32c_sparc64(&crc, (const u64 *)data, n);
- data += n;
- len -= n;
- }
- if (len)
- crc = crc32c_base(crc, data, len);
- return crc;
-}
-EXPORT_SYMBOL(crc32c_arch);
-
-u32 crc32_be_arch(u32 crc, const u8 *data, size_t len)
-{
- return crc32_be_base(crc, data, len);
-}
-EXPORT_SYMBOL(crc32_be_arch);
-
-static int __init crc32_sparc_init(void)
-{
- unsigned long cfr;
-
- if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
- return 0;
-
- __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
- if (!(cfr & CFR_CRC32C))
- return 0;
-
- static_branch_enable(&have_crc32c_opcode);
- pr_info("Using sparc64 crc32c opcode optimized CRC32C implementation\n");
- return 0;
-}
-subsys_initcall(crc32_sparc_init);
-
-static void __exit crc32_sparc_exit(void)
-{
-}
-module_exit(crc32_sparc_exit);
-
-u32 crc32_optimizations(void)
-{
- if (static_key_enabled(&have_crc32c_opcode))
- return CRC32C_OPTIMIZATION;
- return 0;
-}
-EXPORT_SYMBOL(crc32_optimizations);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated");
diff --git a/arch/sparc/lib/crc32c_asm.S b/arch/sparc/lib/crc32c_asm.S
deleted file mode 100644
index 4db873850f44c9..00000000000000
--- a/arch/sparc/lib/crc32c_asm.S
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/opcodes.h>
-#include <asm/visasm.h>
-#include <asm/asi.h>
-
-ENTRY(crc32c_sparc64)
- /* %o0=crc32p, %o1=data_ptr, %o2=len */
- VISEntryHalf
- lda [%o0] ASI_PL, %f1
-1: ldd [%o1], %f2
- CRC32C(0,2,0)
- subcc %o2, 8, %o2
- bne,pt %icc, 1b
- add %o1, 0x8, %o1
- sta %f1, [%o0] ASI_PL
- VISExitHalf
-2: retl
- nop
-ENDPROC(crc32c_sparc64)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ce91d47fb55345..edaab220d9c147 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -79,9 +79,6 @@ config X86
select ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
select ARCH_HAS_CPU_FINALIZE_INIT
select ARCH_HAS_CPU_PASID if IOMMU_SVA
- select ARCH_HAS_CRC32
- select ARCH_HAS_CRC64 if X86_64
- select ARCH_HAS_CRC_T10DIF
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 4fa5c4e1ba8a00..dc5ee2a6938c46 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -40,16 +40,6 @@ lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_MITIGATION_RETPOLINE) += retpoline.o
-obj-$(CONFIG_CRC32_ARCH) += crc32-x86.o
-crc32-x86-y := crc32.o crc32-pclmul.o
-crc32-x86-$(CONFIG_64BIT) += crc32c-3way.o
-
-obj-$(CONFIG_CRC64_ARCH) += crc64-x86.o
-crc64-x86-y := crc64.o crc64-pclmul.o
-
-obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-x86.o
-crc-t10dif-x86-y := crc-t10dif.o crc16-msb-pclmul.o
-
obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
obj-y += iomem.o
diff --git a/arch/x86/lib/crc-pclmul-consts.h b/arch/x86/lib/crc-pclmul-consts.h
deleted file mode 100644
index fcc63c0643330f..00000000000000
--- a/arch/x86/lib/crc-pclmul-consts.h
+++ /dev/null
@@ -1,195 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * CRC constants generated by:
- *
- * ./scripts/gen-crc-consts.py x86_pclmul crc16_msb_0x8bb7,crc32_lsb_0xedb88320,crc64_msb_0x42f0e1eba9ea3693,crc64_lsb_0x9a6c9329ac4bc9b5
- *
- * Do not edit manually.
- */
-
-/*
- * CRC folding constants generated for most-significant-bit-first CRC-16 using
- * G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0
- */
-static const struct {
- u8 bswap_mask[16];
- u64 fold_across_2048_bits_consts[2];
- u64 fold_across_1024_bits_consts[2];
- u64 fold_across_512_bits_consts[2];
- u64 fold_across_256_bits_consts[2];
- u64 fold_across_128_bits_consts[2];
- u8 shuf_table[48];
- u64 barrett_reduction_consts[2];
-} crc16_msb_0x8bb7_consts ____cacheline_aligned __maybe_unused = {
- .bswap_mask = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0},
- .fold_across_2048_bits_consts = {
- 0xdccf000000000000, /* LO64_TERMS: (x^2000 mod G) * x^48 */
- 0x4b0b000000000000, /* HI64_TERMS: (x^2064 mod G) * x^48 */
- },
- .fold_across_1024_bits_consts = {
- 0x9d9d000000000000, /* LO64_TERMS: (x^976 mod G) * x^48 */
- 0x7cf5000000000000, /* HI64_TERMS: (x^1040 mod G) * x^48 */
- },
- .fold_across_512_bits_consts = {
- 0x044c000000000000, /* LO64_TERMS: (x^464 mod G) * x^48 */
- 0xe658000000000000, /* HI64_TERMS: (x^528 mod G) * x^48 */
- },
- .fold_across_256_bits_consts = {
- 0x6ee3000000000000, /* LO64_TERMS: (x^208 mod G) * x^48 */
- 0xe7b5000000000000, /* HI64_TERMS: (x^272 mod G) * x^48 */
- },
- .fold_across_128_bits_consts = {
- 0x2d56000000000000, /* LO64_TERMS: (x^80 mod G) * x^48 */
- 0x06df000000000000, /* HI64_TERMS: (x^144 mod G) * x^48 */
- },
- .shuf_table = {
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- },
- .barrett_reduction_consts = {
- 0x8bb7000000000000, /* LO64_TERMS: (G - x^16) * x^48 */
- 0xf65a57f81d33a48a, /* HI64_TERMS: (floor(x^79 / G) * x) - x^64 */
- },
-};
-
-/*
- * CRC folding constants generated for least-significant-bit-first CRC-32 using
- * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 +
- * x^5 + x^4 + x^2 + x^1 + x^0
- */
-static const struct {
- u64 fold_across_2048_bits_consts[2];
- u64 fold_across_1024_bits_consts[2];
- u64 fold_across_512_bits_consts[2];
- u64 fold_across_256_bits_consts[2];
- u64 fold_across_128_bits_consts[2];
- u8 shuf_table[48];
- u64 barrett_reduction_consts[2];
-} crc32_lsb_0xedb88320_consts ____cacheline_aligned __maybe_unused = {
- .fold_across_2048_bits_consts = {
- 0x00000000ce3371cb, /* HI64_TERMS: (x^2079 mod G) * x^32 */
- 0x00000000e95c1271, /* LO64_TERMS: (x^2015 mod G) * x^32 */
- },
- .fold_across_1024_bits_consts = {
- 0x0000000033fff533, /* HI64_TERMS: (x^1055 mod G) * x^32 */
- 0x00000000910eeec1, /* LO64_TERMS: (x^991 mod G) * x^32 */
- },
- .fold_across_512_bits_consts = {
- 0x000000008f352d95, /* HI64_TERMS: (x^543 mod G) * x^32 */
- 0x000000001d9513d7, /* LO64_TERMS: (x^479 mod G) * x^32 */
- },
- .fold_across_256_bits_consts = {
- 0x00000000f1da05aa, /* HI64_TERMS: (x^287 mod G) * x^32 */
- 0x0000000081256527, /* LO64_TERMS: (x^223 mod G) * x^32 */
- },
- .fold_across_128_bits_consts = {
- 0x00000000ae689191, /* HI64_TERMS: (x^159 mod G) * x^32 */
- 0x00000000ccaa009e, /* LO64_TERMS: (x^95 mod G) * x^32 */
- },
- .shuf_table = {
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- },
- .barrett_reduction_consts = {
- 0xb4e5b025f7011641, /* HI64_TERMS: floor(x^95 / G) */
- 0x00000001db710640, /* LO64_TERMS: (G - x^32) * x^31 */
- },
-};
-
-/*
- * CRC folding constants generated for most-significant-bit-first CRC-64 using
- * G(x) = x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
- * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
- * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
- * x^7 + x^4 + x^1 + x^0
- */
-static const struct {
- u8 bswap_mask[16];
- u64 fold_across_2048_bits_consts[2];
- u64 fold_across_1024_bits_consts[2];
- u64 fold_across_512_bits_consts[2];
- u64 fold_across_256_bits_consts[2];
- u64 fold_across_128_bits_consts[2];
- u8 shuf_table[48];
- u64 barrett_reduction_consts[2];
-} crc64_msb_0x42f0e1eba9ea3693_consts ____cacheline_aligned __maybe_unused = {
- .bswap_mask = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0},
- .fold_across_2048_bits_consts = {
- 0x7f52691a60ddc70d, /* LO64_TERMS: (x^2048 mod G) * x^0 */
- 0x7036b0389f6a0c82, /* HI64_TERMS: (x^2112 mod G) * x^0 */
- },
- .fold_across_1024_bits_consts = {
- 0x05cf79dea9ac37d6, /* LO64_TERMS: (x^1024 mod G) * x^0 */
- 0x001067e571d7d5c2, /* HI64_TERMS: (x^1088 mod G) * x^0 */
- },
- .fold_across_512_bits_consts = {
- 0x5f6843ca540df020, /* LO64_TERMS: (x^512 mod G) * x^0 */
- 0xddf4b6981205b83f, /* HI64_TERMS: (x^576 mod G) * x^0 */
- },
- .fold_across_256_bits_consts = {
- 0x571bee0a227ef92b, /* LO64_TERMS: (x^256 mod G) * x^0 */
- 0x44bef2a201b5200c, /* HI64_TERMS: (x^320 mod G) * x^0 */
- },
- .fold_across_128_bits_consts = {
- 0x05f5c3c7eb52fab6, /* LO64_TERMS: (x^128 mod G) * x^0 */
- 0x4eb938a7d257740e, /* HI64_TERMS: (x^192 mod G) * x^0 */
- },
- .shuf_table = {
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- },
- .barrett_reduction_consts = {
- 0x42f0e1eba9ea3693, /* LO64_TERMS: (G - x^64) * x^0 */
- 0x578d29d06cc4f872, /* HI64_TERMS: (floor(x^127 / G) * x) - x^64 */
- },
-};
-
-/*
- * CRC folding constants generated for least-significant-bit-first CRC-64 using
- * G(x) = x^64 + x^63 + x^61 + x^59 + x^58 + x^56 + x^55 + x^52 + x^49 + x^48 +
- * x^47 + x^46 + x^44 + x^41 + x^37 + x^36 + x^34 + x^32 + x^31 + x^28 +
- * x^26 + x^23 + x^22 + x^19 + x^16 + x^13 + x^12 + x^10 + x^9 + x^6 +
- * x^4 + x^3 + x^0
- */
-static const struct {
- u64 fold_across_2048_bits_consts[2];
- u64 fold_across_1024_bits_consts[2];
- u64 fold_across_512_bits_consts[2];
- u64 fold_across_256_bits_consts[2];
- u64 fold_across_128_bits_consts[2];
- u8 shuf_table[48];
- u64 barrett_reduction_consts[2];
-} crc64_lsb_0x9a6c9329ac4bc9b5_consts ____cacheline_aligned __maybe_unused = {
- .fold_across_2048_bits_consts = {
- 0x37ccd3e14069cabc, /* HI64_TERMS: (x^2111 mod G) * x^0 */
- 0xa043808c0f782663, /* LO64_TERMS: (x^2047 mod G) * x^0 */
- },
- .fold_across_1024_bits_consts = {
- 0xa1ca681e733f9c40, /* HI64_TERMS: (x^1087 mod G) * x^0 */
- 0x5f852fb61e8d92dc, /* LO64_TERMS: (x^1023 mod G) * x^0 */
- },
- .fold_across_512_bits_consts = {
- 0x0c32cdb31e18a84a, /* HI64_TERMS: (x^575 mod G) * x^0 */
- 0x62242240ace5045a, /* LO64_TERMS: (x^511 mod G) * x^0 */
- },
- .fold_across_256_bits_consts = {
- 0xb0bc2e589204f500, /* HI64_TERMS: (x^319 mod G) * x^0 */
- 0xe1e0bb9d45d7a44c, /* LO64_TERMS: (x^255 mod G) * x^0 */
- },
- .fold_across_128_bits_consts = {
- 0xeadc41fd2ba3d420, /* HI64_TERMS: (x^191 mod G) * x^0 */
- 0x21e9761e252621ac, /* LO64_TERMS: (x^127 mod G) * x^0 */
- },
- .shuf_table = {
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- },
- .barrett_reduction_consts = {
- 0x27ecfa329aef9f77, /* HI64_TERMS: floor(x^127 / G) */
- 0x34d926535897936a, /* LO64_TERMS: (G - x^64 - x^0) / x */
- },
-};
diff --git a/arch/x86/lib/crc-pclmul-template.S b/arch/x86/lib/crc-pclmul-template.S
deleted file mode 100644
index ae0b6144c503c8..00000000000000
--- a/arch/x86/lib/crc-pclmul-template.S
+++ /dev/null
@@ -1,582 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-//
-// Template to generate [V]PCLMULQDQ-based CRC functions for x86
-//
-// Copyright 2025 Google LLC
-//
-// Author: Eric Biggers <ebiggers@google.com>
-
-#include <linux/linkage.h>
-#include <linux/objtool.h>
-
-// Offsets within the generated constants table
-.set OFFSETOF_BSWAP_MASK, -5*16 // msb-first CRCs only
-.set OFFSETOF_FOLD_ACROSS_2048_BITS_CONSTS, -4*16 // must precede next
-.set OFFSETOF_FOLD_ACROSS_1024_BITS_CONSTS, -3*16 // must precede next
-.set OFFSETOF_FOLD_ACROSS_512_BITS_CONSTS, -2*16 // must precede next
-.set OFFSETOF_FOLD_ACROSS_256_BITS_CONSTS, -1*16 // must precede next
-.set OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS, 0*16 // must be 0
-.set OFFSETOF_SHUF_TABLE, 1*16
-.set OFFSETOF_BARRETT_REDUCTION_CONSTS, 4*16
-
-// Emit a VEX (or EVEX) coded instruction if allowed, or emulate it using the
-// corresponding non-VEX instruction plus any needed moves. The supported
-// instruction formats are:
-//
-// - Two-arg [src, dst], where the non-VEX format is the same.
-// - Three-arg [src1, src2, dst] where the non-VEX format is
-// [src1, src2_and_dst]. If src2 != dst, then src1 must != dst too.
-//
-// \insn gives the instruction without a "v" prefix and including any immediate
-// argument if needed to make the instruction follow one of the above formats.
-// If \unaligned_mem_tmp is given, then the emitted non-VEX code moves \arg1 to
-// it first; this is needed when \arg1 is an unaligned mem operand.
-.macro _cond_vex insn:req, arg1:req, arg2:req, arg3, unaligned_mem_tmp
-.if AVX_LEVEL == 0
- // VEX not allowed. Emulate it.
- .ifnb \arg3 // Three-arg [src1, src2, dst]
- .ifc "\arg2", "\arg3" // src2 == dst?
- .ifnb \unaligned_mem_tmp
- movdqu \arg1, \unaligned_mem_tmp
- \insn \unaligned_mem_tmp, \arg3
- .else
- \insn \arg1, \arg3
- .endif
- .else // src2 != dst
- .ifc "\arg1", "\arg3"
- .error "Can't have src1 == dst when src2 != dst"
- .endif
- .ifnb \unaligned_mem_tmp
- movdqu \arg1, \unaligned_mem_tmp
- movdqa \arg2, \arg3
- \insn \unaligned_mem_tmp, \arg3
- .else
- movdqa \arg2, \arg3
- \insn \arg1, \arg3
- .endif
- .endif
- .else // Two-arg [src, dst]
- .ifnb \unaligned_mem_tmp
- movdqu \arg1, \unaligned_mem_tmp
- \insn \unaligned_mem_tmp, \arg2
- .else
- \insn \arg1, \arg2
- .endif
- .endif
-.else
- // VEX is allowed. Emit the desired instruction directly.
- .ifnb \arg3
- v\insn \arg1, \arg2, \arg3
- .else
- v\insn \arg1, \arg2
- .endif
-.endif
-.endm
-
-// Broadcast an aligned 128-bit mem operand to all 128-bit lanes of a vector
-// register of length VL.
-.macro _vbroadcast src, dst
-.if VL == 16
- _cond_vex movdqa, \src, \dst
-.elseif VL == 32
- vbroadcasti128 \src, \dst
-.else
- vbroadcasti32x4 \src, \dst
-.endif
-.endm
-
-// Load \vl bytes from the unaligned mem operand \src into \dst, and if the CRC
-// is msb-first use \bswap_mask to reflect the bytes within each 128-bit lane.
-.macro _load_data vl, src, bswap_mask, dst
-.if \vl < 64
- _cond_vex movdqu, "\src", \dst
-.else
- vmovdqu8 \src, \dst
-.endif
-.if !LSB_CRC
- _cond_vex pshufb, \bswap_mask, \dst, \dst
-.endif
-.endm
-
-.macro _prepare_v0 vl, v0, v1, bswap_mask
-.if LSB_CRC
- .if \vl < 64
- _cond_vex pxor, (BUF), \v0, \v0, unaligned_mem_tmp=\v1
- .else
- vpxorq (BUF), \v0, \v0
- .endif
-.else
- _load_data \vl, (BUF), \bswap_mask, \v1
- .if \vl < 64
- _cond_vex pxor, \v1, \v0, \v0
- .else
- vpxorq \v1, \v0, \v0
- .endif
-.endif
-.endm
-
-// The x^0..x^63 terms, i.e. poly128 mod x^64, i.e. the physically low qword for
-// msb-first order or the physically high qword for lsb-first order
-#define LO64_TERMS 0
-
-// The x^64..x^127 terms, i.e. floor(poly128 / x^64), i.e. the physically high
-// qword for msb-first order or the physically low qword for lsb-first order
-#define HI64_TERMS 1
-
-// Multiply the given \src1_terms of each 128-bit lane of \src1 by the given
-// \src2_terms of each 128-bit lane of \src2, and write the result(s) to \dst.
-.macro _pclmulqdq src1, src1_terms, src2, src2_terms, dst
- _cond_vex "pclmulqdq $((\src1_terms ^ LSB_CRC) << 4) ^ (\src2_terms ^ LSB_CRC),", \
- \src1, \src2, \dst
-.endm
-
-// Fold \acc into \data and store the result back into \acc. \data can be an
-// unaligned mem operand if using VEX is allowed and the CRC is lsb-first so no
-// byte-reflection is needed; otherwise it must be a vector register. \consts
-// is a vector register containing the needed fold constants, and \tmp is a
-// temporary vector register. All arguments must be the same length.
-.macro _fold_vec acc, data, consts, tmp
- _pclmulqdq \consts, HI64_TERMS, \acc, HI64_TERMS, \tmp
- _pclmulqdq \consts, LO64_TERMS, \acc, LO64_TERMS, \acc
-.if AVX_LEVEL <= 2
- _cond_vex pxor, \data, \tmp, \tmp
- _cond_vex pxor, \tmp, \acc, \acc
-.else
- vpternlogq $0x96, \data, \tmp, \acc
-.endif
-.endm
-
-// Fold \acc into \data and store the result back into \acc. \data is an
-// unaligned mem operand, \consts is a vector register containing the needed
-// fold constants, \bswap_mask is a vector register containing the
-// byte-reflection table if the CRC is msb-first, and \tmp1 and \tmp2 are
-// temporary vector registers. All arguments must have length \vl.
-.macro _fold_vec_mem vl, acc, data, consts, bswap_mask, tmp1, tmp2
-.if AVX_LEVEL == 0 || !LSB_CRC
- _load_data \vl, \data, \bswap_mask, \tmp1
- _fold_vec \acc, \tmp1, \consts, \tmp2
-.else
- _fold_vec \acc, \data, \consts, \tmp1
-.endif
-.endm
-
-// Load the constants for folding across 2**i vectors of length VL at a time
-// into all 128-bit lanes of the vector register CONSTS.
-.macro _load_vec_folding_consts i
- _vbroadcast OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS+(4-LOG2_VL-\i)*16(CONSTS_PTR), \
- CONSTS
-.endm
-
-// Given vector registers \v0 and \v1 of length \vl, fold \v0 into \v1 and store
-// the result back into \v0. If the remaining length mod \vl is nonzero, also
-// fold \vl data bytes from BUF. For both operations the fold distance is \vl.
-// \consts must be a register of length \vl containing the fold constants.
-.macro _fold_vec_final vl, v0, v1, consts, bswap_mask, tmp1, tmp2
- _fold_vec \v0, \v1, \consts, \tmp1
- test $\vl, LEN8
- jz .Lfold_vec_final_done\@
- _fold_vec_mem \vl, \v0, (BUF), \consts, \bswap_mask, \tmp1, \tmp2
- add $\vl, BUF
-.Lfold_vec_final_done\@:
-.endm
-
-// This macro generates the body of a CRC function with the following prototype:
-//
-// crc_t crc_func(crc_t crc, const u8 *buf, size_t len, const void *consts);
-//
-// |crc| is the initial CRC, and crc_t is a data type wide enough to hold it.
-// |buf| is the data to checksum. |len| is the data length in bytes, which must
-// be at least 16. |consts| is a pointer to the fold_across_128_bits_consts
-// field of the constants struct that was generated for the chosen CRC variant.
-//
-// Moving onto the macro parameters, \n is the number of bits in the CRC, e.g.
-// 32 for a CRC-32. Currently the supported values are 8, 16, 32, and 64. If
-// the file is compiled in i386 mode, then the maximum supported value is 32.
-//
-// \lsb_crc is 1 if the CRC processes the least significant bit of each byte
-// first, i.e. maps bit0 to x^7, bit1 to x^6, ..., bit7 to x^0. \lsb_crc is 0
-// if the CRC processes the most significant bit of each byte first, i.e. maps
-// bit0 to x^0, bit1 to x^1, bit7 to x^7.
-//
-// \vl is the maximum length of vector register to use in bytes: 16, 32, or 64.
-//
-// \avx_level is the level of AVX support to use: 0 for SSE only, 2 for AVX2, or
-// 512 for AVX512.
-//
-// If \vl == 16 && \avx_level == 0, the generated code requires:
-// PCLMULQDQ && SSE4.1. (Note: all known CPUs with PCLMULQDQ also have SSE4.1.)
-//
-// If \vl == 32 && \avx_level == 2, the generated code requires:
-// VPCLMULQDQ && AVX2.
-//
-// If \vl == 64 && \avx_level == 512, the generated code requires:
-// VPCLMULQDQ && AVX512BW && AVX512VL.
-//
-// Other \vl and \avx_level combinations are either not supported or not useful.
-.macro _crc_pclmul n, lsb_crc, vl, avx_level
- .set LSB_CRC, \lsb_crc
- .set VL, \vl
- .set AVX_LEVEL, \avx_level
-
- // Define aliases for the xmm, ymm, or zmm registers according to VL.
-.irp i, 0,1,2,3,4,5,6,7
- .if VL == 16
- .set V\i, %xmm\i
- .set LOG2_VL, 4
- .elseif VL == 32
- .set V\i, %ymm\i
- .set LOG2_VL, 5
- .elseif VL == 64
- .set V\i, %zmm\i
- .set LOG2_VL, 6
- .else
- .error "Unsupported vector length"
- .endif
-.endr
- // Define aliases for the function parameters.
- // Note: when crc_t is shorter than u32, zero-extension to 32 bits is
- // guaranteed by the ABI. Zero-extension to 64 bits is *not* guaranteed
- // when crc_t is shorter than u64.
-#ifdef __x86_64__
-.if \n <= 32
- .set CRC, %edi
-.else
- .set CRC, %rdi
-.endif
- .set BUF, %rsi
- .set LEN, %rdx
- .set LEN32, %edx
- .set LEN8, %dl
- .set CONSTS_PTR, %rcx
-#else
- // 32-bit support, assuming -mregparm=3 and not including support for
- // CRC-64 (which would use both eax and edx to pass the crc parameter).
- .set CRC, %eax
- .set BUF, %edx
- .set LEN, %ecx
- .set LEN32, %ecx
- .set LEN8, %cl
- .set CONSTS_PTR, %ebx // Passed on stack
-#endif
-
- // Define aliases for some local variables. V0-V5 are used without
- // aliases (for accumulators, data, temporary values, etc). Staying
- // within the first 8 vector registers keeps the code 32-bit SSE
- // compatible and reduces the size of 64-bit SSE code slightly.
- .set BSWAP_MASK, V6
- .set BSWAP_MASK_YMM, %ymm6
- .set BSWAP_MASK_XMM, %xmm6
- .set CONSTS, V7
- .set CONSTS_YMM, %ymm7
- .set CONSTS_XMM, %xmm7
-
- // Use ANNOTATE_NOENDBR to suppress an objtool warning, since the
- // functions generated by this macro are called only by static_call.
- ANNOTATE_NOENDBR
-
-#ifdef __i386__
- push CONSTS_PTR
- mov 8(%esp), CONSTS_PTR
-#endif
-
- // Create a 128-bit vector that contains the initial CRC in the end
- // representing the high-order polynomial coefficients, and the rest 0.
- // If the CRC is msb-first, also load the byte-reflection table.
-.if \n <= 32
- _cond_vex movd, CRC, %xmm0
-.else
- _cond_vex movq, CRC, %xmm0
-.endif
-.if !LSB_CRC
- _cond_vex pslldq, $(128-\n)/8, %xmm0, %xmm0
- _vbroadcast OFFSETOF_BSWAP_MASK(CONSTS_PTR), BSWAP_MASK
-.endif
-
- // Load the first vector of data and XOR the initial CRC into the
- // appropriate end of the first 128-bit lane of data. If LEN < VL, then
- // use a short vector and jump ahead to the final reduction. (LEN >= 16
- // is guaranteed here but not necessarily LEN >= VL.)
-.if VL >= 32
- cmp $VL, LEN
- jae .Lat_least_1vec\@
- .if VL == 64
- cmp $32, LEN32
- jb .Lless_than_32bytes\@
- _prepare_v0 32, %ymm0, %ymm1, BSWAP_MASK_YMM
- add $32, BUF
- jmp .Lreduce_256bits_to_128bits\@
-.Lless_than_32bytes\@:
- .endif
- _prepare_v0 16, %xmm0, %xmm1, BSWAP_MASK_XMM
- add $16, BUF
- vmovdqa OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS(CONSTS_PTR), CONSTS_XMM
- jmp .Lcheck_for_partial_block\@
-.Lat_least_1vec\@:
-.endif
- _prepare_v0 VL, V0, V1, BSWAP_MASK
-
- // Handle VL <= LEN < 4*VL.
- cmp $4*VL-1, LEN
- ja .Lat_least_4vecs\@
- add $VL, BUF
- // If VL <= LEN < 2*VL, then jump ahead to the reduction from 1 vector.
- // If VL==16 then load fold_across_128_bits_consts first, as the final
- // reduction depends on it and it won't be loaded anywhere else.
- cmp $2*VL-1, LEN32
-.if VL == 16
- _cond_vex movdqa, OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS(CONSTS_PTR), CONSTS_XMM
-.endif
- jbe .Lreduce_1vec_to_128bits\@
- // Otherwise 2*VL <= LEN < 4*VL. Load one more vector and jump ahead to
- // the reduction from 2 vectors.
- _load_data VL, (BUF), BSWAP_MASK, V1
- add $VL, BUF
- jmp .Lreduce_2vecs_to_1\@
-
-.Lat_least_4vecs\@:
- // Load 3 more vectors of data.
- _load_data VL, 1*VL(BUF), BSWAP_MASK, V1
- _load_data VL, 2*VL(BUF), BSWAP_MASK, V2
- _load_data VL, 3*VL(BUF), BSWAP_MASK, V3
- sub $-4*VL, BUF // Shorter than 'add 4*VL' when VL=32
- add $-4*VL, LEN // Shorter than 'sub 4*VL' when VL=32
-
- // Main loop: while LEN >= 4*VL, fold the 4 vectors V0-V3 into the next
- // 4 vectors of data and write the result back to V0-V3.
- cmp $4*VL-1, LEN // Shorter than 'cmp 4*VL' when VL=32
- jbe .Lreduce_4vecs_to_2\@
- _load_vec_folding_consts 2
-.Lfold_4vecs_loop\@:
- _fold_vec_mem VL, V0, 0*VL(BUF), CONSTS, BSWAP_MASK, V4, V5
- _fold_vec_mem VL, V1, 1*VL(BUF), CONSTS, BSWAP_MASK, V4, V5
- _fold_vec_mem VL, V2, 2*VL(BUF), CONSTS, BSWAP_MASK, V4, V5
- _fold_vec_mem VL, V3, 3*VL(BUF), CONSTS, BSWAP_MASK, V4, V5
- sub $-4*VL, BUF
- add $-4*VL, LEN
- cmp $4*VL-1, LEN
- ja .Lfold_4vecs_loop\@
-
- // Fold V0,V1 into V2,V3 and write the result back to V0,V1. Then fold
- // two more vectors of data from BUF, if at least that much remains.
-.Lreduce_4vecs_to_2\@:
- _load_vec_folding_consts 1
- _fold_vec V0, V2, CONSTS, V4
- _fold_vec V1, V3, CONSTS, V4
- test $2*VL, LEN8
- jz .Lreduce_2vecs_to_1\@
- _fold_vec_mem VL, V0, 0*VL(BUF), CONSTS, BSWAP_MASK, V4, V5
- _fold_vec_mem VL, V1, 1*VL(BUF), CONSTS, BSWAP_MASK, V4, V5
- sub $-2*VL, BUF
-
- // Fold V0 into V1 and write the result back to V0. Then fold one more
- // vector of data from BUF, if at least that much remains.
-.Lreduce_2vecs_to_1\@:
- _load_vec_folding_consts 0
- _fold_vec_final VL, V0, V1, CONSTS, BSWAP_MASK, V4, V5
-
-.Lreduce_1vec_to_128bits\@:
-.if VL == 64
- // Reduce 512-bit %zmm0 to 256-bit %ymm0. Then fold 256 more bits of
- // data from BUF, if at least that much remains.
- vbroadcasti128 OFFSETOF_FOLD_ACROSS_256_BITS_CONSTS(CONSTS_PTR), CONSTS_YMM
- vextracti64x4 $1, %zmm0, %ymm1
- _fold_vec_final 32, %ymm0, %ymm1, CONSTS_YMM, BSWAP_MASK_YMM, %ymm4, %ymm5
-.Lreduce_256bits_to_128bits\@:
-.endif
-.if VL >= 32
- // Reduce 256-bit %ymm0 to 128-bit %xmm0. Then fold 128 more bits of
- // data from BUF, if at least that much remains.
- vmovdqa OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS(CONSTS_PTR), CONSTS_XMM
- vextracti128 $1, %ymm0, %xmm1
- _fold_vec_final 16, %xmm0, %xmm1, CONSTS_XMM, BSWAP_MASK_XMM, %xmm4, %xmm5
-.Lcheck_for_partial_block\@:
-.endif
- and $15, LEN32
- jz .Lreduce_128bits_to_crc\@
-
- // 1 <= LEN <= 15 data bytes remain in BUF. The polynomial is now
- // A*(x^(8*LEN)) + B, where A is the 128-bit polynomial stored in %xmm0
- // and B is the polynomial of the remaining LEN data bytes. To reduce
- // this to 128 bits without needing fold constants for each possible
- // LEN, rearrange this expression into C1*(x^128) + C2, where
- // C1 = floor(A / x^(128 - 8*LEN)) and C2 = A*x^(8*LEN) + B mod x^128.
- // Then fold C1 into C2, which is just another fold across 128 bits.
-
-.if !LSB_CRC || AVX_LEVEL == 0
- // Load the last 16 data bytes. Note that originally LEN was >= 16.
- _load_data 16, "-16(BUF,LEN)", BSWAP_MASK_XMM, %xmm2
-.endif // Else will use vpblendvb mem operand later.
-.if !LSB_CRC
- neg LEN // Needed for indexing shuf_table
-.endif
-
- // tmp = A*x^(8*LEN) mod x^128
- // lsb: pshufb by [LEN, LEN+1, ..., 15, -1, -1, ..., -1]
- // i.e. right-shift by LEN bytes.
- // msb: pshufb by [-1, -1, ..., -1, 0, 1, ..., 15-LEN]
- // i.e. left-shift by LEN bytes.
- _cond_vex movdqu, "OFFSETOF_SHUF_TABLE+16(CONSTS_PTR,LEN)", %xmm3
- _cond_vex pshufb, %xmm3, %xmm0, %xmm1
-
- // C1 = floor(A / x^(128 - 8*LEN))
- // lsb: pshufb by [-1, -1, ..., -1, 0, 1, ..., LEN-1]
- // i.e. left-shift by 16-LEN bytes.
- // msb: pshufb by [16-LEN, 16-LEN+1, ..., 15, -1, -1, ..., -1]
- // i.e. right-shift by 16-LEN bytes.
- _cond_vex pshufb, "OFFSETOF_SHUF_TABLE+32*!LSB_CRC(CONSTS_PTR,LEN)", \
- %xmm0, %xmm0, unaligned_mem_tmp=%xmm4
-
- // C2 = tmp + B. This is just a blend of tmp with the last 16 data
- // bytes (reflected if msb-first). The blend mask is the shuffle table
- // that was used to create tmp. 0 selects tmp, and 1 last16databytes.
-.if AVX_LEVEL == 0
- movdqa %xmm0, %xmm4
- movdqa %xmm3, %xmm0
- pblendvb %xmm2, %xmm1 // uses %xmm0 as implicit operand
- movdqa %xmm4, %xmm0
-.elseif LSB_CRC
- vpblendvb %xmm3, -16(BUF,LEN), %xmm1, %xmm1
-.else
- vpblendvb %xmm3, %xmm2, %xmm1, %xmm1
-.endif
-
- // Fold C1 into C2 and store the 128-bit result in %xmm0.
- _fold_vec %xmm0, %xmm1, CONSTS_XMM, %xmm4
-
-.Lreduce_128bits_to_crc\@:
- // Compute the CRC as %xmm0 * x^n mod G. Here %xmm0 means the 128-bit
- // polynomial stored in %xmm0 (using either lsb-first or msb-first bit
- // order according to LSB_CRC), and G is the CRC's generator polynomial.
-
- // First, multiply %xmm0 by x^n and reduce the result to 64+n bits:
- //
- // t0 := (x^(64+n) mod G) * floor(%xmm0 / x^64) +
- // x^n * (%xmm0 mod x^64)
- //
- // Store t0 * x^(64-n) in %xmm0. I.e., actually do:
- //
- // %xmm0 := ((x^(64+n) mod G) * x^(64-n)) * floor(%xmm0 / x^64) +
- // x^64 * (%xmm0 mod x^64)
- //
- // The extra unreduced factor of x^(64-n) makes floor(t0 / x^n) aligned
- // to the HI64_TERMS of %xmm0 so that the next pclmulqdq can easily
- // select it. The 64-bit constant (x^(64+n) mod G) * x^(64-n) in the
- // msb-first case, or (x^(63+n) mod G) * x^(64-n) in the lsb-first case
- // (considering the extra factor of x that gets implicitly introduced by
- // each pclmulqdq when using lsb-first order), is identical to the
- // constant that was used earlier for folding the LO64_TERMS across 128
- // bits. Thus it's already available in LO64_TERMS of CONSTS_XMM.
- _pclmulqdq CONSTS_XMM, LO64_TERMS, %xmm0, HI64_TERMS, %xmm1
-.if LSB_CRC
- _cond_vex psrldq, $8, %xmm0, %xmm0 // x^64 * (%xmm0 mod x^64)
-.else
- _cond_vex pslldq, $8, %xmm0, %xmm0 // x^64 * (%xmm0 mod x^64)
-.endif
- _cond_vex pxor, %xmm1, %xmm0, %xmm0
- // The HI64_TERMS of %xmm0 now contain floor(t0 / x^n).
- // The LO64_TERMS of %xmm0 now contain (t0 mod x^n) * x^(64-n).
-
- // First step of Barrett reduction: Compute floor(t0 / G). This is the
- // polynomial by which G needs to be multiplied to cancel out the x^n
- // and higher terms of t0, i.e. to reduce t0 mod G. First do:
- //
- // t1 := floor(x^(63+n) / G) * x * floor(t0 / x^n)
- //
- // Then the desired value floor(t0 / G) is floor(t1 / x^64). The 63 in
- // x^(63+n) is the maximum degree of floor(t0 / x^n) and thus the lowest
- // value that makes enough precision be carried through the calculation.
- //
- // The '* x' makes it so the result is floor(t1 / x^64) rather than
- // floor(t1 / x^63), making it qword-aligned in HI64_TERMS so that it
- // can be extracted much more easily in the next step. In the lsb-first
- // case the '* x' happens implicitly. In the msb-first case it must be
- // done explicitly; floor(x^(63+n) / G) * x is a 65-bit constant, so the
- // constant passed to pclmulqdq is (floor(x^(63+n) / G) * x) - x^64, and
- // the multiplication by the x^64 term is handled using a pxor. The
- // pxor causes the low 64 terms of t1 to be wrong, but they are unused.
- _cond_vex movdqa, OFFSETOF_BARRETT_REDUCTION_CONSTS(CONSTS_PTR), CONSTS_XMM
- _pclmulqdq CONSTS_XMM, HI64_TERMS, %xmm0, HI64_TERMS, %xmm1
-.if !LSB_CRC
- _cond_vex pxor, %xmm0, %xmm1, %xmm1 // += x^64 * floor(t0 / x^n)
-.endif
- // The HI64_TERMS of %xmm1 now contain floor(t1 / x^64) = floor(t0 / G).
-
- // Second step of Barrett reduction: Cancel out the x^n and higher terms
- // of t0 by subtracting the needed multiple of G. This gives the CRC:
- //
- // crc := t0 - (G * floor(t0 / G))
- //
- // But %xmm0 contains t0 * x^(64-n), so it's more convenient to do:
- //
- // crc := ((t0 * x^(64-n)) - ((G * x^(64-n)) * floor(t0 / G))) / x^(64-n)
- //
- // Furthermore, since the resulting CRC is n-bit, if mod x^n is
- // explicitly applied to it then the x^n term of G makes no difference
- // in the result and can be omitted. This helps keep the constant
- // multiplier in 64 bits in most cases. This gives the following:
- //
- // %xmm0 := %xmm0 - (((G - x^n) * x^(64-n)) * floor(t0 / G))
- // crc := (%xmm0 / x^(64-n)) mod x^n
- //
- // In the lsb-first case, each pclmulqdq implicitly introduces
- // an extra factor of x, so in that case the constant that needs to be
- // passed to pclmulqdq is actually '(G - x^n) * x^(63-n)' when n <= 63.
- // For lsb-first CRCs where n=64, the extra factor of x cannot be as
- // easily avoided. In that case, instead pass '(G - x^n - x^0) / x' to
- // pclmulqdq and handle the x^0 term (i.e. 1) separately. (All CRC
- // polynomials have nonzero x^n and x^0 terms.) It works out as: the
- // CRC has be XORed with the physically low qword of %xmm1, representing
- // floor(t0 / G). The most efficient way to do that is to move it to
- // the physically high qword and use a ternlog to combine the two XORs.
-.if LSB_CRC && \n == 64
- _cond_vex punpcklqdq, %xmm1, %xmm2, %xmm2
- _pclmulqdq CONSTS_XMM, LO64_TERMS, %xmm1, HI64_TERMS, %xmm1
- .if AVX_LEVEL <= 2
- _cond_vex pxor, %xmm2, %xmm0, %xmm0
- _cond_vex pxor, %xmm1, %xmm0, %xmm0
- .else
- vpternlogq $0x96, %xmm2, %xmm1, %xmm0
- .endif
- _cond_vex "pextrq $1,", %xmm0, %rax // (%xmm0 / x^0) mod x^64
-.else
- _pclmulqdq CONSTS_XMM, LO64_TERMS, %xmm1, HI64_TERMS, %xmm1
- _cond_vex pxor, %xmm1, %xmm0, %xmm0
- .if \n == 8
- _cond_vex "pextrb $7 + LSB_CRC,", %xmm0, %eax // (%xmm0 / x^56) mod x^8
- .elseif \n == 16
- _cond_vex "pextrw $3 + LSB_CRC,", %xmm0, %eax // (%xmm0 / x^48) mod x^16
- .elseif \n == 32
- _cond_vex "pextrd $1 + LSB_CRC,", %xmm0, %eax // (%xmm0 / x^32) mod x^32
- .else // \n == 64 && !LSB_CRC
- _cond_vex movq, %xmm0, %rax // (%xmm0 / x^0) mod x^64
- .endif
-.endif
-
-.if VL > 16
- vzeroupper // Needed when ymm or zmm registers may have been used.
-.endif
-#ifdef __i386__
- pop CONSTS_PTR
-#endif
- RET
-.endm
-
-#ifdef CONFIG_AS_VPCLMULQDQ
-#define DEFINE_CRC_PCLMUL_FUNCS(prefix, bits, lsb) \
-SYM_FUNC_START(prefix##_pclmul_sse); \
- _crc_pclmul n=bits, lsb_crc=lsb, vl=16, avx_level=0; \
-SYM_FUNC_END(prefix##_pclmul_sse); \
- \
-SYM_FUNC_START(prefix##_vpclmul_avx2); \
- _crc_pclmul n=bits, lsb_crc=lsb, vl=32, avx_level=2; \
-SYM_FUNC_END(prefix##_vpclmul_avx2); \
- \
-SYM_FUNC_START(prefix##_vpclmul_avx512); \
- _crc_pclmul n=bits, lsb_crc=lsb, vl=64, avx_level=512; \
-SYM_FUNC_END(prefix##_vpclmul_avx512);
-#else
-#define DEFINE_CRC_PCLMUL_FUNCS(prefix, bits, lsb) \
-SYM_FUNC_START(prefix##_pclmul_sse); \
- _crc_pclmul n=bits, lsb_crc=lsb, vl=16, avx_level=0; \
-SYM_FUNC_END(prefix##_pclmul_sse);
-#endif // !CONFIG_AS_VPCLMULQDQ
diff --git a/arch/x86/lib/crc-pclmul-template.h b/arch/x86/lib/crc-pclmul-template.h
deleted file mode 100644
index c5b3bfe11d8da0..00000000000000
--- a/arch/x86/lib/crc-pclmul-template.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Macros for accessing the [V]PCLMULQDQ-based CRC functions that are
- * instantiated by crc-pclmul-template.S
- *
- * Copyright 2025 Google LLC
- *
- * Author: Eric Biggers <ebiggers@google.com>
- */
-#ifndef _CRC_PCLMUL_TEMPLATE_H
-#define _CRC_PCLMUL_TEMPLATE_H
-
-#include <asm/cpufeatures.h>
-#include <asm/simd.h>
-#include <crypto/internal/simd.h>
-#include <linux/static_call.h>
-#include "crc-pclmul-consts.h"
-
-#define DECLARE_CRC_PCLMUL_FUNCS(prefix, crc_t) \
-crc_t prefix##_pclmul_sse(crc_t crc, const u8 *p, size_t len, \
- const void *consts_ptr); \
-crc_t prefix##_vpclmul_avx2(crc_t crc, const u8 *p, size_t len, \
- const void *consts_ptr); \
-crc_t prefix##_vpclmul_avx512(crc_t crc, const u8 *p, size_t len, \
- const void *consts_ptr); \
-DEFINE_STATIC_CALL(prefix##_pclmul, prefix##_pclmul_sse)
-
-#define INIT_CRC_PCLMUL(prefix) \
-do { \
- if (IS_ENABLED(CONFIG_AS_VPCLMULQDQ) && \
- boot_cpu_has(X86_FEATURE_VPCLMULQDQ) && \
- boot_cpu_has(X86_FEATURE_AVX2) && \
- cpu_has_xfeatures(XFEATURE_MASK_YMM, NULL)) { \
- if (boot_cpu_has(X86_FEATURE_AVX512BW) && \
- boot_cpu_has(X86_FEATURE_AVX512VL) && \
- !boot_cpu_has(X86_FEATURE_PREFER_YMM) && \
- cpu_has_xfeatures(XFEATURE_MASK_AVX512, NULL)) { \
- static_call_update(prefix##_pclmul, \
- prefix##_vpclmul_avx512); \
- } else { \
- static_call_update(prefix##_pclmul, \
- prefix##_vpclmul_avx2); \
- } \
- } \
-} while (0)
-
-/*
- * Call a [V]PCLMULQDQ optimized CRC function if the data length is at least 16
- * bytes, the CPU has PCLMULQDQ support, and the current context may use SIMD.
- *
- * 16 bytes is the minimum length supported by the [V]PCLMULQDQ functions.
- * There is overhead associated with kernel_fpu_begin() and kernel_fpu_end(),
- * varying by CPU and factors such as which parts of the "FPU" state userspace
- * has touched, which could result in a larger cutoff being better. Indeed, a
- * larger cutoff is usually better for a *single* message. However, the
- * overhead of the FPU section gets amortized if multiple FPU sections get
- * executed before returning to userspace, since the XSAVE and XRSTOR occur only
- * once. Considering that and the fact that the [V]PCLMULQDQ code is lighter on
- * the dcache than the table-based code is, a 16-byte cutoff seems to work well.
- */
-#define CRC_PCLMUL(crc, p, len, prefix, consts, have_pclmulqdq) \
-do { \
- if ((len) >= 16 && static_branch_likely(&(have_pclmulqdq)) && \
- crypto_simd_usable()) { \
- const void *consts_ptr; \
- \
- consts_ptr = (consts).fold_across_128_bits_consts; \
- kernel_fpu_begin(); \
- crc = static_call(prefix##_pclmul)((crc), (p), (len), \
- consts_ptr); \
- kernel_fpu_end(); \
- return crc; \
- } \
-} while (0)
-
-#endif /* _CRC_PCLMUL_TEMPLATE_H */
diff --git a/arch/x86/lib/crc-t10dif.c b/arch/x86/lib/crc-t10dif.c
deleted file mode 100644
index db7ce59c31ace0..00000000000000
--- a/arch/x86/lib/crc-t10dif.c
+++ /dev/null
@@ -1,40 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * CRC-T10DIF using [V]PCLMULQDQ instructions
- *
- * Copyright 2024 Google LLC
- */
-
-#include <linux/crc-t10dif.h>
-#include <linux/module.h>
-#include "crc-pclmul-template.h"
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
-
-DECLARE_CRC_PCLMUL_FUNCS(crc16_msb, u16);
-
-u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len)
-{
- CRC_PCLMUL(crc, p, len, crc16_msb, crc16_msb_0x8bb7_consts,
- have_pclmulqdq);
- return crc_t10dif_generic(crc, p, len);
-}
-EXPORT_SYMBOL(crc_t10dif_arch);
-
-static int __init crc_t10dif_x86_init(void)
-{
- if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
- static_branch_enable(&have_pclmulqdq);
- INIT_CRC_PCLMUL(crc16_msb);
- }
- return 0;
-}
-subsys_initcall(crc_t10dif_x86_init);
-
-static void __exit crc_t10dif_x86_exit(void)
-{
-}
-module_exit(crc_t10dif_x86_exit);
-
-MODULE_DESCRIPTION("CRC-T10DIF using [V]PCLMULQDQ instructions");
-MODULE_LICENSE("GPL");
diff --git a/arch/x86/lib/crc16-msb-pclmul.S b/arch/x86/lib/crc16-msb-pclmul.S
deleted file mode 100644
index e9fe248093a887..00000000000000
--- a/arch/x86/lib/crc16-msb-pclmul.S
+++ /dev/null
@@ -1,6 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-// Copyright 2025 Google LLC
-
-#include "crc-pclmul-template.S"
-
-DEFINE_CRC_PCLMUL_FUNCS(crc16_msb, /* bits= */ 16, /* lsb= */ 0)
diff --git a/arch/x86/lib/crc32-pclmul.S b/arch/x86/lib/crc32-pclmul.S
deleted file mode 100644
index f20f40fb0172d7..00000000000000
--- a/arch/x86/lib/crc32-pclmul.S
+++ /dev/null
@@ -1,6 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-// Copyright 2025 Google LLC
-
-#include "crc-pclmul-template.S"
-
-DEFINE_CRC_PCLMUL_FUNCS(crc32_lsb, /* bits= */ 32, /* lsb= */ 1)
diff --git a/arch/x86/lib/crc32.c b/arch/x86/lib/crc32.c
deleted file mode 100644
index d09343e2cea932..00000000000000
--- a/arch/x86/lib/crc32.c
+++ /dev/null
@@ -1,111 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * x86-optimized CRC32 functions
- *
- * Copyright (C) 2008 Intel Corporation
- * Copyright 2012 Xyratex Technology Limited
- * Copyright 2024 Google LLC
- */
-
-#include <linux/crc32.h>
-#include <linux/module.h>
-#include "crc-pclmul-template.h"
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32);
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
-
-DECLARE_CRC_PCLMUL_FUNCS(crc32_lsb, u32);
-
-u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
-{
- CRC_PCLMUL(crc, p, len, crc32_lsb, crc32_lsb_0xedb88320_consts,
- have_pclmulqdq);
- return crc32_le_base(crc, p, len);
-}
-EXPORT_SYMBOL(crc32_le_arch);
-
-#ifdef CONFIG_X86_64
-#define CRC32_INST "crc32q %1, %q0"
-#else
-#define CRC32_INST "crc32l %1, %0"
-#endif
-
-/*
- * Use carryless multiply version of crc32c when buffer size is >= 512 to
- * account for FPU state save/restore overhead.
- */
-#define CRC32C_PCLMUL_BREAKEVEN 512
-
-asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
-
-u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
-{
- size_t num_longs;
-
- if (!static_branch_likely(&have_crc32))
- return crc32c_base(crc, p, len);
-
- if (IS_ENABLED(CONFIG_X86_64) && len >= CRC32C_PCLMUL_BREAKEVEN &&
- static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) {
- kernel_fpu_begin();
- crc = crc32c_x86_3way(crc, p, len);
- kernel_fpu_end();
- return crc;
- }
-
- for (num_longs = len / sizeof(unsigned long);
- num_longs != 0; num_longs--, p += sizeof(unsigned long))
- asm(CRC32_INST : "+r" (crc) : ASM_INPUT_RM (*(unsigned long *)p));
-
- if (sizeof(unsigned long) > 4 && (len & 4)) {
- asm("crc32l %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u32 *)p));
- p += 4;
- }
- if (len & 2) {
- asm("crc32w %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u16 *)p));
- p += 2;
- }
- if (len & 1)
- asm("crc32b %1, %0" : "+r" (crc) : ASM_INPUT_RM (*p));
-
- return crc;
-}
-EXPORT_SYMBOL(crc32c_arch);
-
-u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
-{
- return crc32_be_base(crc, p, len);
-}
-EXPORT_SYMBOL(crc32_be_arch);
-
-static int __init crc32_x86_init(void)
-{
- if (boot_cpu_has(X86_FEATURE_XMM4_2))
- static_branch_enable(&have_crc32);
- if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
- static_branch_enable(&have_pclmulqdq);
- INIT_CRC_PCLMUL(crc32_lsb);
- }
- return 0;
-}
-subsys_initcall(crc32_x86_init);
-
-static void __exit crc32_x86_exit(void)
-{
-}
-module_exit(crc32_x86_exit);
-
-u32 crc32_optimizations(void)
-{
- u32 optimizations = 0;
-
- if (static_key_enabled(&have_crc32))
- optimizations |= CRC32C_OPTIMIZATION;
- if (static_key_enabled(&have_pclmulqdq))
- optimizations |= CRC32_LE_OPTIMIZATION;
- return optimizations;
-}
-EXPORT_SYMBOL(crc32_optimizations);
-
-MODULE_DESCRIPTION("x86-optimized CRC32 functions");
-MODULE_LICENSE("GPL");
diff --git a/arch/x86/lib/crc32c-3way.S b/arch/x86/lib/crc32c-3way.S
deleted file mode 100644
index 9b8770503bbcdf..00000000000000
--- a/arch/x86/lib/crc32c-3way.S
+++ /dev/null
@@ -1,360 +0,0 @@
-/*
- * Implement fast CRC32C with PCLMULQDQ instructions. (x86_64)
- *
- * The white papers on CRC32C calculations with PCLMULQDQ instruction can be
- * downloaded from:
- * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/crc-iscsi-polynomial-crc32-instruction-paper.pdf
- * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-paper.pdf
- *
- * Copyright (C) 2012 Intel Corporation.
- * Copyright 2024 Google LLC
- *
- * Authors:
- * Wajdi Feghali <wajdi.k.feghali@intel.com>
- * James Guilford <james.guilford@intel.com>
- * David Cote <david.m.cote@intel.com>
- * Tim Chen <tim.c.chen@linux.intel.com>
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/linkage.h>
-
-## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
-
-# Define threshold below which buffers are considered "small" and routed to
-# regular CRC code that does not interleave the CRC instructions.
-#define SMALL_SIZE 200
-
-# u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
-
-.text
-SYM_FUNC_START(crc32c_x86_3way)
-#define crc0 %edi
-#define crc0_q %rdi
-#define bufp %rsi
-#define bufp_d %esi
-#define len %rdx
-#define len_dw %edx
-#define n_misaligned %ecx /* overlaps chunk_bytes! */
-#define n_misaligned_q %rcx
-#define chunk_bytes %ecx /* overlaps n_misaligned! */
-#define chunk_bytes_q %rcx
-#define crc1 %r8
-#define crc2 %r9
-
- cmp $SMALL_SIZE, len
- jb .Lsmall
-
- ################################################################
- ## 1) ALIGN:
- ################################################################
- mov bufp_d, n_misaligned
- neg n_misaligned
- and $7, n_misaligned # calculate the misalignment amount of
- # the address
- je .Laligned # Skip if aligned
-
- # Process 1 <= n_misaligned <= 7 bytes individually in order to align
- # the remaining data to an 8-byte boundary.
-.Ldo_align:
- movq (bufp), %rax
- add n_misaligned_q, bufp
- sub n_misaligned_q, len
-.Lalign_loop:
- crc32b %al, crc0 # compute crc32 of 1-byte
- shr $8, %rax # get next byte
- dec n_misaligned
- jne .Lalign_loop
-.Laligned:
-
- ################################################################
- ## 2) PROCESS BLOCK:
- ################################################################
-
- cmp $128*24, len
- jae .Lfull_block
-
-.Lpartial_block:
- # Compute floor(len / 24) to get num qwords to process from each lane.
- imul $2731, len_dw, %eax # 2731 = ceil(2^16 / 24)
- shr $16, %eax
- jmp .Lcrc_3lanes
-
-.Lfull_block:
- # Processing 128 qwords from each lane.
- mov $128, %eax
-
- ################################################################
- ## 3) CRC each of three lanes:
- ################################################################
-
-.Lcrc_3lanes:
- xor crc1,crc1
- xor crc2,crc2
- mov %eax, chunk_bytes
- shl $3, chunk_bytes # num bytes to process from each lane
- sub $5, %eax # 4 for 4x_loop, 1 for special last iter
- jl .Lcrc_3lanes_4x_done
-
- # Unroll the loop by a factor of 4 to reduce the overhead of the loop
- # bookkeeping instructions, which can compete with crc32q for the ALUs.
-.Lcrc_3lanes_4x_loop:
- crc32q (bufp), crc0_q
- crc32q (bufp,chunk_bytes_q), crc1
- crc32q (bufp,chunk_bytes_q,2), crc2
- crc32q 8(bufp), crc0_q
- crc32q 8(bufp,chunk_bytes_q), crc1
- crc32q 8(bufp,chunk_bytes_q,2), crc2
- crc32q 16(bufp), crc0_q
- crc32q 16(bufp,chunk_bytes_q), crc1
- crc32q 16(bufp,chunk_bytes_q,2), crc2
- crc32q 24(bufp), crc0_q
- crc32q 24(bufp,chunk_bytes_q), crc1
- crc32q 24(bufp,chunk_bytes_q,2), crc2
- add $32, bufp
- sub $4, %eax
- jge .Lcrc_3lanes_4x_loop
-
-.Lcrc_3lanes_4x_done:
- add $4, %eax
- jz .Lcrc_3lanes_last_qword
-
-.Lcrc_3lanes_1x_loop:
- crc32q (bufp), crc0_q
- crc32q (bufp,chunk_bytes_q), crc1
- crc32q (bufp,chunk_bytes_q,2), crc2
- add $8, bufp
- dec %eax
- jnz .Lcrc_3lanes_1x_loop
-
-.Lcrc_3lanes_last_qword:
- crc32q (bufp), crc0_q
- crc32q (bufp,chunk_bytes_q), crc1
-# SKIP crc32q (bufp,chunk_bytes_q,2), crc2 ; Don't do this one yet
-
- ################################################################
- ## 4) Combine three results:
- ################################################################
-
- lea (K_table-8)(%rip), %rax # first entry is for idx 1
- pmovzxdq (%rax,chunk_bytes_q), %xmm0 # 2 consts: K1:K2
- lea (chunk_bytes,chunk_bytes,2), %eax # chunk_bytes * 3
- sub %rax, len # len -= chunk_bytes * 3
-
- movq crc0_q, %xmm1 # CRC for block 1
- pclmulqdq $0x00, %xmm0, %xmm1 # Multiply by K2
-
- movq crc1, %xmm2 # CRC for block 2
- pclmulqdq $0x10, %xmm0, %xmm2 # Multiply by K1
-
- pxor %xmm2,%xmm1
- movq %xmm1, %rax
- xor (bufp,chunk_bytes_q,2), %rax
- mov crc2, crc0_q
- crc32 %rax, crc0_q
- lea 8(bufp,chunk_bytes_q,2), bufp
-
- ################################################################
- ## 5) If more blocks remain, goto (2):
- ################################################################
-
- cmp $128*24, len
- jae .Lfull_block
- cmp $SMALL_SIZE, len
- jae .Lpartial_block
-
- #######################################################################
- ## 6) Process any remainder without interleaving:
- #######################################################################
-.Lsmall:
- test len_dw, len_dw
- jz .Ldone
- mov len_dw, %eax
- shr $3, %eax
- jz .Ldo_dword
-.Ldo_qwords:
- crc32q (bufp), crc0_q
- add $8, bufp
- dec %eax
- jnz .Ldo_qwords
-.Ldo_dword:
- test $4, len_dw
- jz .Ldo_word
- crc32l (bufp), crc0
- add $4, bufp
-.Ldo_word:
- test $2, len_dw
- jz .Ldo_byte
- crc32w (bufp), crc0
- add $2, bufp
-.Ldo_byte:
- test $1, len_dw
- jz .Ldone
- crc32b (bufp), crc0
-.Ldone:
- mov crc0, %eax
- RET
-SYM_FUNC_END(crc32c_x86_3way)
-
-.section .rodata, "a", @progbits
- ################################################################
- ## PCLMULQDQ tables
- ## Table is 128 entries x 2 words (8 bytes) each
- ################################################################
-.align 8
-K_table:
- .long 0x493c7d27, 0x00000001
- .long 0xba4fc28e, 0x493c7d27
- .long 0xddc0152b, 0xf20c0dfe
- .long 0x9e4addf8, 0xba4fc28e
- .long 0x39d3b296, 0x3da6d0cb
- .long 0x0715ce53, 0xddc0152b
- .long 0x47db8317, 0x1c291d04
- .long 0x0d3b6092, 0x9e4addf8
- .long 0xc96cfdc0, 0x740eef02
- .long 0x878a92a7, 0x39d3b296
- .long 0xdaece73e, 0x083a6eec
- .long 0xab7aff2a, 0x0715ce53
- .long 0x2162d385, 0xc49f4f67
- .long 0x83348832, 0x47db8317
- .long 0x299847d5, 0x2ad91c30
- .long 0xb9e02b86, 0x0d3b6092
- .long 0x18b33a4e, 0x6992cea2
- .long 0xb6dd949b, 0xc96cfdc0
- .long 0x78d9ccb7, 0x7e908048
- .long 0xbac2fd7b, 0x878a92a7
- .long 0xa60ce07b, 0x1b3d8f29
- .long 0xce7f39f4, 0xdaece73e
- .long 0x61d82e56, 0xf1d0f55e
- .long 0xd270f1a2, 0xab7aff2a
- .long 0xc619809d, 0xa87ab8a8
- .long 0x2b3cac5d, 0x2162d385
- .long 0x65863b64, 0x8462d800
- .long 0x1b03397f, 0x83348832
- .long 0xebb883bd, 0x71d111a8
- .long 0xb3e32c28, 0x299847d5
- .long 0x064f7f26, 0xffd852c6
- .long 0xdd7e3b0c, 0xb9e02b86
- .long 0xf285651c, 0xdcb17aa4
- .long 0x10746f3c, 0x18b33a4e
- .long 0xc7a68855, 0xf37c5aee
- .long 0x271d9844, 0xb6dd949b
- .long 0x8e766a0c, 0x6051d5a2
- .long 0x93a5f730, 0x78d9ccb7
- .long 0x6cb08e5c, 0x18b0d4ff
- .long 0x6b749fb2, 0xbac2fd7b
- .long 0x1393e203, 0x21f3d99c
- .long 0xcec3662e, 0xa60ce07b
- .long 0x96c515bb, 0x8f158014
- .long 0xe6fc4e6a, 0xce7f39f4
- .long 0x8227bb8a, 0xa00457f7
- .long 0xb0cd4768, 0x61d82e56
- .long 0x39c7ff35, 0x8d6d2c43
- .long 0xd7a4825c, 0xd270f1a2
- .long 0x0ab3844b, 0x00ac29cf
- .long 0x0167d312, 0xc619809d
- .long 0xf6076544, 0xe9adf796
- .long 0x26f6a60a, 0x2b3cac5d
- .long 0xa741c1bf, 0x96638b34
- .long 0x98d8d9cb, 0x65863b64
- .long 0x49c3cc9c, 0xe0e9f351
- .long 0x68bce87a, 0x1b03397f
- .long 0x57a3d037, 0x9af01f2d
- .long 0x6956fc3b, 0xebb883bd
- .long 0x42d98888, 0x2cff42cf
- .long 0x3771e98f, 0xb3e32c28
- .long 0xb42ae3d9, 0x88f25a3a
- .long 0x2178513a, 0x064f7f26
- .long 0xe0ac139e, 0x4e36f0b0
- .long 0x170076fa, 0xdd7e3b0c
- .long 0x444dd413, 0xbd6f81f8
- .long 0x6f345e45, 0xf285651c
- .long 0x41d17b64, 0x91c9bd4b
- .long 0xff0dba97, 0x10746f3c
- .long 0xa2b73df1, 0x885f087b
- .long 0xf872e54c, 0xc7a68855
- .long 0x1e41e9fc, 0x4c144932
- .long 0x86d8e4d2, 0x271d9844
- .long 0x651bd98b, 0x52148f02
- .long 0x5bb8f1bc, 0x8e766a0c
- .long 0xa90fd27a, 0xa3c6f37a
- .long 0xb3af077a, 0x93a5f730
- .long 0x4984d782, 0xd7c0557f
- .long 0xca6ef3ac, 0x6cb08e5c
- .long 0x234e0b26, 0x63ded06a
- .long 0xdd66cbbb, 0x6b749fb2
- .long 0x4597456a, 0x4d56973c
- .long 0xe9e28eb4, 0x1393e203
- .long 0x7b3ff57a, 0x9669c9df
- .long 0xc9c8b782, 0xcec3662e
- .long 0x3f70cc6f, 0xe417f38a
- .long 0x93e106a4, 0x96c515bb
- .long 0x62ec6c6d, 0x4b9e0f71
- .long 0xd813b325, 0xe6fc4e6a
- .long 0x0df04680, 0xd104b8fc
- .long 0x2342001e, 0x8227bb8a
- .long 0x0a2a8d7e, 0x5b397730
- .long 0x6d9a4957, 0xb0cd4768
- .long 0xe8b6368b, 0xe78eb416
- .long 0xd2c3ed1a, 0x39c7ff35
- .long 0x995a5724, 0x61ff0e01
- .long 0x9ef68d35, 0xd7a4825c
- .long 0x0c139b31, 0x8d96551c
- .long 0xf2271e60, 0x0ab3844b
- .long 0x0b0bf8ca, 0x0bf80dd2
- .long 0x2664fd8b, 0x0167d312
- .long 0xed64812d, 0x8821abed
- .long 0x02ee03b2, 0xf6076544
- .long 0x8604ae0f, 0x6a45d2b2
- .long 0x363bd6b3, 0x26f6a60a
- .long 0x135c83fd, 0xd8d26619
- .long 0x5fabe670, 0xa741c1bf
- .long 0x35ec3279, 0xde87806c
- .long 0x00bcf5f6, 0x98d8d9cb
- .long 0x8ae00689, 0x14338754
- .long 0x17f27698, 0x49c3cc9c
- .long 0x58ca5f00, 0x5bd2011f
- .long 0xaa7c7ad5, 0x68bce87a
- .long 0xb5cfca28, 0xdd07448e
- .long 0xded288f8, 0x57a3d037
- .long 0x59f229bc, 0xdde8f5b9
- .long 0x6d390dec, 0x6956fc3b
- .long 0x37170390, 0xa3e3e02c
- .long 0x6353c1cc, 0x42d98888
- .long 0xc4584f5c, 0xd73c7bea
- .long 0xf48642e9, 0x3771e98f
- .long 0x531377e2, 0x80ff0093
- .long 0xdd35bc8d, 0xb42ae3d9
- .long 0xb25b29f2, 0x8fe4c34d
- .long 0x9a5ede41, 0x2178513a
- .long 0xa563905d, 0xdf99fc11
- .long 0x45cddf4e, 0xe0ac139e
- .long 0xacfa3103, 0x6c23e841
- .long 0xa51b6135, 0x170076fa
diff --git a/arch/x86/lib/crc64-pclmul.S b/arch/x86/lib/crc64-pclmul.S
deleted file mode 100644
index 4173051b5197cd..00000000000000
--- a/arch/x86/lib/crc64-pclmul.S
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-// Copyright 2025 Google LLC
-
-#include "crc-pclmul-template.S"
-
-DEFINE_CRC_PCLMUL_FUNCS(crc64_msb, /* bits= */ 64, /* lsb= */ 0)
-DEFINE_CRC_PCLMUL_FUNCS(crc64_lsb, /* bits= */ 64, /* lsb= */ 1)
diff --git a/arch/x86/lib/crc64.c b/arch/x86/lib/crc64.c
deleted file mode 100644
index 351a09f5813e2f..00000000000000
--- a/arch/x86/lib/crc64.c
+++ /dev/null
@@ -1,50 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * CRC64 using [V]PCLMULQDQ instructions
- *
- * Copyright 2025 Google LLC
- */
-
-#include <linux/crc64.h>
-#include <linux/module.h>
-#include "crc-pclmul-template.h"
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
-
-DECLARE_CRC_PCLMUL_FUNCS(crc64_msb, u64);
-DECLARE_CRC_PCLMUL_FUNCS(crc64_lsb, u64);
-
-u64 crc64_be_arch(u64 crc, const u8 *p, size_t len)
-{
- CRC_PCLMUL(crc, p, len, crc64_msb, crc64_msb_0x42f0e1eba9ea3693_consts,
- have_pclmulqdq);
- return crc64_be_generic(crc, p, len);
-}
-EXPORT_SYMBOL_GPL(crc64_be_arch);
-
-u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len)
-{
- CRC_PCLMUL(crc, p, len, crc64_lsb, crc64_lsb_0x9a6c9329ac4bc9b5_consts,
- have_pclmulqdq);
- return crc64_nvme_generic(crc, p, len);
-}
-EXPORT_SYMBOL_GPL(crc64_nvme_arch);
-
-static int __init crc64_x86_init(void)
-{
- if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
- static_branch_enable(&have_pclmulqdq);
- INIT_CRC_PCLMUL(crc64_msb);
- INIT_CRC_PCLMUL(crc64_lsb);
- }
- return 0;
-}
-subsys_initcall(crc64_x86_init);
-
-static void __exit crc64_x86_exit(void)
-{
-}
-module_exit(crc64_x86_exit);
-
-MODULE_DESCRIPTION("CRC64 using [V]PCLMULQDQ instructions");
-MODULE_LICENSE("GPL");