diff options
Diffstat (limited to 'arch/arm64/lib/crc32.S')
-rw-r--r-- | arch/arm64/lib/crc32.S | 160 |
1 files changed, 160 insertions, 0 deletions
diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S new file mode 100644 index 0000000000..8340dccff4 --- /dev/null +++ b/arch/arm64/lib/crc32.S @@ -0,0 +1,160 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Accelerated CRC32(C) using AArch64 CRC instructions + * + * Copyright (C) 2016 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org> + */ + +#include <linux/linkage.h> +#include <asm/alternative.h> +#include <asm/assembler.h> + + .arch armv8-a+crc + + .macro byteorder, reg, be + .if \be +CPU_LE( rev \reg, \reg ) + .else +CPU_BE( rev \reg, \reg ) + .endif + .endm + + .macro byteorder16, reg, be + .if \be +CPU_LE( rev16 \reg, \reg ) + .else +CPU_BE( rev16 \reg, \reg ) + .endif + .endm + + .macro bitorder, reg, be + .if \be + rbit \reg, \reg + .endif + .endm + + .macro bitorder16, reg, be + .if \be + rbit \reg, \reg + lsr \reg, \reg, #16 + .endif + .endm + + .macro bitorder8, reg, be + .if \be + rbit \reg, \reg + lsr \reg, \reg, #24 + .endif + .endm + + .macro __crc32, c, be=0 + bitorder w0, \be + cmp x2, #16 + b.lt 8f // less than 16 bytes + + and x7, x2, #0x1f + and x2, x2, #~0x1f + cbz x7, 32f // multiple of 32 bytes + + and x8, x7, #0xf + ldp x3, x4, [x1] + add x8, x8, x1 + add x1, x1, x7 + ldp x5, x6, [x8] + byteorder x3, \be + byteorder x4, \be + byteorder x5, \be + byteorder x6, \be + bitorder x3, \be + bitorder x4, \be + bitorder x5, \be + bitorder x6, \be + + tst x7, #8 + crc32\c\()x w8, w0, x3 + csel x3, x3, x4, eq + csel w0, w0, w8, eq + tst x7, #4 + lsr x4, x3, #32 + crc32\c\()w w8, w0, w3 + csel x3, x3, x4, eq + csel w0, w0, w8, eq + tst x7, #2 + lsr w4, w3, #16 + crc32\c\()h w8, w0, w3 + csel w3, w3, w4, eq + csel w0, w0, w8, eq + tst x7, #1 + crc32\c\()b w8, w0, w3 + csel w0, w0, w8, eq + tst x7, #16 + crc32\c\()x w8, w0, x5 + crc32\c\()x w8, w8, x6 + csel w0, w0, w8, eq + cbz x2, 0f + +32: ldp x3, x4, [x1], #32 + sub x2, x2, #32 + ldp x5, x6, [x1, #-16] + byteorder x3, \be + byteorder x4, \be + byteorder x5, \be + byteorder x6, \be + bitorder x3, \be + bitorder x4, \be + bitorder x5, \be + bitorder x6, \be + crc32\c\()x w0, w0, x3 + crc32\c\()x w0, w0, x4 + crc32\c\()x w0, w0, x5 + crc32\c\()x w0, w0, x6 + cbnz x2, 32b +0: bitorder w0, \be + ret + +8: tbz x2, #3, 4f + ldr x3, [x1], #8 + byteorder x3, \be + bitorder x3, \be + crc32\c\()x w0, w0, x3 +4: tbz x2, #2, 2f + ldr w3, [x1], #4 + byteorder w3, \be + bitorder w3, \be + crc32\c\()w w0, w0, w3 +2: tbz x2, #1, 1f + ldrh w3, [x1], #2 + byteorder16 w3, \be + bitorder16 w3, \be + crc32\c\()h w0, w0, w3 +1: tbz x2, #0, 0f + ldrb w3, [x1] + bitorder8 w3, \be + crc32\c\()b w0, w0, w3 +0: bitorder w0, \be + ret + .endm + + .align 5 +SYM_FUNC_START(crc32_le) +alternative_if_not ARM64_HAS_CRC32 + b crc32_le_base +alternative_else_nop_endif + __crc32 +SYM_FUNC_END(crc32_le) + + .align 5 +SYM_FUNC_START(__crc32c_le) +alternative_if_not ARM64_HAS_CRC32 + b __crc32c_le_base +alternative_else_nop_endif + __crc32 c +SYM_FUNC_END(__crc32c_le) + + .align 5 +SYM_FUNC_START(crc32_be) +alternative_if_not ARM64_HAS_CRC32 + b crc32_be_base +alternative_else_nop_endif + __crc32 be=1 +SYM_FUNC_END(crc32_be) |