diff options
Diffstat (limited to 'arch/arm/lib')
50 files changed, 5018 insertions, 0 deletions
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile new file mode 100644 index 000000000..6d2ba454f --- /dev/null +++ b/arch/arm/lib/Makefile @@ -0,0 +1,50 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# linux/arch/arm/lib/Makefile +# +# Copyright (C) 1995-2000 Russell King +# + +lib-y := changebit.o csumipv6.o csumpartial.o \ + csumpartialcopy.o csumpartialcopyuser.o clearbit.o \ + delay.o delay-loop.o findbit.o memchr.o memcpy.o \ + memmove.o memset.o setbit.o \ + strchr.o strrchr.o \ + testchangebit.o testclearbit.o testsetbit.o \ + ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \ + ucmpdi2.o lib1funcs.o div64.o \ + io-readsb.o io-writesb.o io-readsl.o io-writesl.o \ + call_with_stack.o bswapsdi2.o + +mmu-y := clear_user.o copy_page.o getuser.o putuser.o \ + copy_from_user.o copy_to_user.o + +ifdef CONFIG_CC_IS_CLANG + lib-y += backtrace-clang.o +else + lib-y += backtrace.o +endif + +# using lib_ here won't override already available weak symbols +obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o + +lib-$(CONFIG_MMU) += $(mmu-y) + +ifeq ($(CONFIG_CPU_32v3),y) + lib-y += io-readsw-armv3.o io-writesw-armv3.o +else + lib-y += io-readsw-armv4.o io-writesw-armv4.o +endif + +ifeq ($(CONFIG_ARCH_RPC),y) + AFLAGS_delay-loop.o += -march=armv4 +endif + +$(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S +$(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S + +ifeq ($(CONFIG_KERNEL_MODE_NEON),y) + NEON_FLAGS := -march=armv7-a -mfloat-abi=softfp -mfpu=neon + CFLAGS_xor-neon.o += $(NEON_FLAGS) + obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o +endif diff --git a/arch/arm/lib/ashldi3.S b/arch/arm/lib/ashldi3.S new file mode 100644 index 000000000..b05e95840 --- /dev/null +++ b/arch/arm/lib/ashldi3.S @@ -0,0 +1,54 @@ +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + + +#include <linux/linkage.h> +#include <asm/assembler.h> + +#ifdef __ARMEB__ +#define al r1 +#define ah r0 +#else +#define al r0 +#define ah r1 +#endif + +ENTRY(__ashldi3) +ENTRY(__aeabi_llsl) + + subs r3, r2, #32 + rsb ip, r2, #32 + movmi ah, ah, lsl r2 + movpl ah, al, lsl r3 + ARM( orrmi ah, ah, al, lsr ip ) + THUMB( lsrmi r3, al, ip ) + THUMB( orrmi ah, ah, r3 ) + mov al, al, lsl r2 + ret lr + +ENDPROC(__ashldi3) +ENDPROC(__aeabi_llsl) diff --git a/arch/arm/lib/ashrdi3.S b/arch/arm/lib/ashrdi3.S new file mode 100644 index 000000000..275d7d234 --- /dev/null +++ b/arch/arm/lib/ashrdi3.S @@ -0,0 +1,54 @@ +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + + +#include <linux/linkage.h> +#include <asm/assembler.h> + +#ifdef __ARMEB__ +#define al r1 +#define ah r0 +#else +#define al r0 +#define ah r1 +#endif + +ENTRY(__ashrdi3) +ENTRY(__aeabi_lasr) + + subs r3, r2, #32 + rsb ip, r2, #32 + movmi al, al, lsr r2 + movpl al, ah, asr r3 + ARM( orrmi al, al, ah, lsl ip ) + THUMB( lslmi r3, ah, ip ) + THUMB( orrmi al, al, r3 ) + mov ah, ah, asr r2 + ret lr + +ENDPROC(__ashrdi3) +ENDPROC(__aeabi_lasr) diff --git a/arch/arm/lib/backtrace-clang.S b/arch/arm/lib/backtrace-clang.S new file mode 100644 index 000000000..290c52a60 --- /dev/null +++ b/arch/arm/lib/backtrace-clang.S @@ -0,0 +1,230 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/backtrace-clang.S + * + * Copyright (C) 2019 Nathan Huckleberry + * + */ +#include <linux/kern_levels.h> +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + +/* fp is 0 or stack frame */ + +#define frame r4 +#define sv_fp r5 +#define sv_pc r6 +#define mask r7 +#define sv_lr r8 +#define loglvl r9 + +ENTRY(c_backtrace) + +#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK) + ret lr +ENDPROC(c_backtrace) +#else + + +/* + * Clang does not store pc or sp in function prologues so we don't know exactly + * where the function starts. + * + * We can treat the current frame's lr as the saved pc and the preceding + * frame's lr as the current frame's lr, but we can't trace the most recent + * call. Inserting a false stack frame allows us to reference the function + * called last in the stacktrace. + * + * If the call instruction was a bl we can look at the callers branch + * instruction to calculate the saved pc. We can recover the pc in most cases, + * but in cases such as calling function pointers we cannot. In this case, + * default to using the lr. This will be some address in the function, but will + * not be the function start. + * + * Unfortunately due to the stack frame layout we can't dump r0 - r3, but these + * are less frequently saved. + * + * Stack frame layout: + * <larger addresses> + * saved lr + * frame=> saved fp + * optionally saved caller registers (r4 - r10) + * optionally saved arguments (r0 - r3) + * <top of stack frame> + * <smaller addresses> + * + * Functions start with the following code sequence: + * corrected pc => stmfd sp!, {..., fp, lr} + * add fp, sp, #x + * stmfd sp!, {r0 - r3} (optional) + * + * + * + * + * + * + * The diagram below shows an example stack setup for dump_stack. + * + * The frame for c_backtrace has pointers to the code of dump_stack. This is + * why the frame of c_backtrace is used to for the pc calculation of + * dump_stack. This is why we must move back a frame to print dump_stack. + * + * The stored locals for dump_stack are in dump_stack's frame. This means that + * to fully print dump_stack's frame we need both the frame for dump_stack (for + * locals) and the frame that was called by dump_stack (for pc). + * + * To print locals we must know where the function start is. If we read the + * function prologue opcodes we can determine which variables are stored in the + * stack frame. + * + * To find the function start of dump_stack we can look at the stored LR of + * show_stack. It points at the instruction directly after the bl dump_stack. + * We can then read the offset from the bl opcode to determine where the branch + * takes us. The address calculated must be the start of dump_stack. + * + * c_backtrace frame dump_stack: + * {[LR] } ============| ... + * {[FP] } =======| | bl c_backtrace + * | |=> ... + * {[R4-R10]} | + * {[R0-R3] } | show_stack: + * dump_stack frame | ... + * {[LR] } =============| bl dump_stack + * {[FP] } <=======| |=> ... + * {[R4-R10]} + * {[R0-R3] } + */ + + stmfd sp!, {r4 - r9, fp, lr} @ Save an extra register + @ to ensure 8 byte alignment + movs frame, r0 @ if frame pointer is zero + beq no_frame @ we have no stack frames + mov loglvl, r2 + tst r1, #0x10 @ 26 or 32-bit mode? + moveq mask, #0xfc000003 + movne mask, #0 @ mask for 32-bit + +/* + * Switches the current frame to be the frame for dump_stack. + */ + add frame, sp, #24 @ switch to false frame +for_each_frame: tst frame, mask @ Check for address exceptions + bne no_frame + +/* + * sv_fp is the stack frame with the locals for the current considered + * function. + * + * sv_pc is the saved lr frame the frame above. This is a pointer to a code + * address within the current considered function, but it is not the function + * start. This value gets updated to be the function start later if it is + * possible. + */ +1001: ldr sv_pc, [frame, #4] @ get saved 'pc' +1002: ldr sv_fp, [frame, #0] @ get saved fp + + teq sv_fp, mask @ make sure next frame exists + beq no_frame + +/* + * sv_lr is the lr from the function that called the current function. This is + * a pointer to a code address in the current function's caller. sv_lr-4 is + * the instruction used to call the current function. + * + * This sv_lr can be used to calculate the function start if the function was + * called using a bl instruction. If the function start can be recovered sv_pc + * is overwritten with the function start. + * + * If the current function was called using a function pointer we cannot + * recover the function start and instead continue with sv_pc as an arbitrary + * value within the current function. If this is the case we cannot print + * registers for the current function, but the stacktrace is still printed + * properly. + */ +1003: ldr sv_lr, [sv_fp, #4] @ get saved lr from next frame + +1004: ldr r0, [sv_lr, #-4] @ get call instruction + ldr r3, .Lopcode+4 + and r2, r3, r0 @ is this a bl call + teq r2, r3 + bne finished_setup @ give up if it's not + and r0, #0xffffff @ get call offset 24-bit int + lsl r0, r0, #8 @ sign extend offset + asr r0, r0, #8 + ldr sv_pc, [sv_fp, #4] @ get lr address + add sv_pc, sv_pc, #-4 @ get call instruction address + add sv_pc, sv_pc, #8 @ take care of prefetch + add sv_pc, sv_pc, r0, lsl #2@ find function start + +finished_setup: + + bic sv_pc, sv_pc, mask @ mask PC/LR for the mode + +/* + * Print the function (sv_pc) and where it was called from (sv_lr). + */ + mov r0, sv_pc + + mov r1, sv_lr + mov r2, frame + bic r1, r1, mask @ mask PC/LR for the mode + mov r3, loglvl + bl dump_backtrace_entry + +/* + * Test if the function start is a stmfd instruction to determine which + * registers were stored in the function prologue. + * + * If we could not recover the sv_pc because we were called through a function + * pointer the comparison will fail and no registers will print. Unwinding will + * continue as if there had been no registers stored in this frame. + */ +1005: ldr r1, [sv_pc, #0] @ if stmfd sp!, {..., fp, lr} + ldr r3, .Lopcode @ instruction exists, + teq r3, r1, lsr #11 + ldr r0, [frame] @ locals are stored in + @ the preceding frame + subeq r0, r0, #4 + mov r2, loglvl + bleq dump_backtrace_stm @ dump saved registers + +/* + * If we are out of frames or if the next frame is invalid. + */ + teq sv_fp, #0 @ zero saved fp means + beq no_frame @ no further frames + + cmp sv_fp, frame @ next frame must be + mov frame, sv_fp @ above the current frame +#ifdef CONFIG_IRQSTACKS + @ + @ Kernel stacks may be discontiguous in memory. If the next + @ frame is below the previous frame, accept it as long as it + @ lives in kernel memory. + @ + cmpls sv_fp, #PAGE_OFFSET +#endif + bhi for_each_frame + +1006: adr r0, .Lbad + mov r1, loglvl + mov r2, frame + bl _printk +no_frame: ldmfd sp!, {r4 - r9, fp, pc} +ENDPROC(c_backtrace) + .pushsection __ex_table,"a" + .align 3 + .long 1001b, 1006b + .long 1002b, 1006b + .long 1003b, 1006b + .long 1004b, finished_setup + .long 1005b, 1006b + .popsection + +.Lbad: .asciz "%sBacktrace aborted due to bad frame pointer <%p>\n" + .align +.Lopcode: .word 0xe92d4800 >> 11 @ stmfd sp!, {... fp, lr} + .word 0x0b000000 @ bl if these bits are set + +#endif diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S new file mode 100644 index 000000000..293a2716b --- /dev/null +++ b/arch/arm/lib/backtrace.S @@ -0,0 +1,131 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/backtrace.S + * + * Copyright (C) 1995, 1996 Russell King + * + * 27/03/03 Ian Molton Clean up CONFIG_CPU + */ +#include <linux/kern_levels.h> +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + +@ fp is 0 or stack frame + +#define frame r4 +#define sv_fp r5 +#define sv_pc r6 +#define mask r7 +#define offset r8 +#define loglvl r9 + +ENTRY(c_backtrace) + +#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK) + ret lr +ENDPROC(c_backtrace) +#else + stmfd sp!, {r4 - r9, lr} @ Save an extra register so we have a location... + movs frame, r0 @ if frame pointer is zero + beq no_frame @ we have no stack frames + mov loglvl, r2 + + tst r1, #0x10 @ 26 or 32-bit mode? + ARM( moveq mask, #0xfc000003 ) + THUMB( moveq mask, #0xfc000000 ) + THUMB( orreq mask, #0x03 ) + movne mask, #0 @ mask for 32-bit + +1: stmfd sp!, {pc} @ calculate offset of PC stored + ldr r0, [sp], #4 @ by stmfd for this CPU + adr r1, 1b + sub offset, r0, r1 + +/* + * Stack frame layout: + * optionally saved caller registers (r4 - r10) + * saved fp + * saved sp + * saved lr + * frame => saved pc + * optionally saved arguments (r0 - r3) + * saved sp => <next word> + * + * Functions start with the following code sequence: + * mov ip, sp + * stmfd sp!, {r0 - r3} (optional) + * corrected pc => stmfd sp!, {..., fp, ip, lr, pc} + */ +for_each_frame: tst frame, mask @ Check for address exceptions + bne no_frame + +1001: ldr sv_pc, [frame, #0] @ get saved pc +1002: ldr sv_fp, [frame, #-12] @ get saved fp + + sub sv_pc, sv_pc, offset @ Correct PC for prefetching + bic sv_pc, sv_pc, mask @ mask PC/LR for the mode + +1003: ldr r2, [sv_pc, #-4] @ if stmfd sp!, {args} exists, + ldr r3, .Ldsi+4 @ adjust saved 'pc' back one + teq r3, r2, lsr #11 @ instruction + subne r0, sv_pc, #4 @ allow for mov + subeq r0, sv_pc, #8 @ allow for mov + stmia + + ldr r1, [frame, #-4] @ get saved lr + mov r2, frame + bic r1, r1, mask @ mask PC/LR for the mode + mov r3, loglvl + bl dump_backtrace_entry + + ldr r1, [sv_pc, #-4] @ if stmfd sp!, {args} exists, + ldr r3, .Ldsi+4 + teq r3, r1, lsr #11 + ldreq r0, [frame, #-8] @ get sp + subeq r0, r0, #4 @ point at the last arg + mov r2, loglvl + bleq dump_backtrace_stm @ dump saved registers + +1004: ldr r1, [sv_pc, #0] @ if stmfd sp!, {..., fp, ip, lr, pc} + ldr r3, .Ldsi @ instruction exists, + teq r3, r1, lsr #11 + subeq r0, frame, #16 + mov r2, loglvl + bleq dump_backtrace_stm @ dump saved registers + + teq sv_fp, #0 @ zero saved fp means + beq no_frame @ no further frames + + cmp sv_fp, frame @ next frame must be + mov frame, sv_fp @ above the current frame +#ifdef CONFIG_IRQSTACKS + @ + @ Kernel stacks may be discontiguous in memory. If the next + @ frame is below the previous frame, accept it as long as it + @ lives in kernel memory. + @ + cmpls sv_fp, #PAGE_OFFSET +#endif + bhi for_each_frame + +1006: adr r0, .Lbad + mov r1, loglvl + mov r2, frame + bl _printk +no_frame: ldmfd sp!, {r4 - r9, pc} +ENDPROC(c_backtrace) + + .pushsection __ex_table,"a" + .align 3 + .long 1001b, 1006b + .long 1002b, 1006b + .long 1003b, 1006b + .long 1004b, 1006b + .popsection + +.Lbad: .asciz "%sBacktrace aborted due to bad frame pointer <%p>\n" + .align +.Ldsi: .word 0xe92dd800 >> 11 @ stmfd sp!, {... fp, ip, lr, pc} + .word 0xe92d0000 >> 11 @ stmfd sp!, {} + +#endif diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h new file mode 100644 index 000000000..f069d1b23 --- /dev/null +++ b/arch/arm/lib/bitops.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <asm/assembler.h> +#include <asm/unwind.h> + +#if __LINUX_ARM_ARCH__ >= 6 + .macro bitop, name, instr +ENTRY( \name ) +UNWIND( .fnstart ) + ands ip, r1, #3 + strbne r1, [ip] @ assert word-aligned + mov r2, #1 + and r3, r0, #31 @ Get bit offset + mov r0, r0, lsr #5 + add r1, r1, r0, lsl #2 @ Get word offset +#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP) + .arch_extension mp + ALT_SMP(W(pldw) [r1]) + ALT_UP(W(nop)) +#endif + mov r3, r2, lsl r3 +1: ldrex r2, [r1] + \instr r2, r2, r3 + strex r0, r2, [r1] + cmp r0, #0 + bne 1b + bx lr +UNWIND( .fnend ) +ENDPROC(\name ) + .endm + + .macro __testop, name, instr, store, barrier +ENTRY( \name ) +UNWIND( .fnstart ) + ands ip, r1, #3 + strbne r1, [ip] @ assert word-aligned + mov r2, #1 + and r3, r0, #31 @ Get bit offset + mov r0, r0, lsr #5 + add r1, r1, r0, lsl #2 @ Get word offset + mov r3, r2, lsl r3 @ create mask + \barrier +#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP) + .arch_extension mp + ALT_SMP(W(pldw) [r1]) + ALT_UP(W(nop)) +#endif +1: ldrex r2, [r1] + ands r0, r2, r3 @ save old value of bit + \instr r2, r2, r3 @ toggle bit + strex ip, r2, [r1] + cmp ip, #0 + bne 1b + \barrier + cmp r0, #0 + movne r0, #1 +2: bx lr +UNWIND( .fnend ) +ENDPROC(\name ) + .endm + + .macro testop, name, instr, store + __testop \name, \instr, \store, smp_dmb + .endm + + .macro sync_testop, name, instr, store + __testop \name, \instr, \store, __smp_dmb + .endm +#else + .macro bitop, name, instr +ENTRY( \name ) +UNWIND( .fnstart ) + ands ip, r1, #3 + strbne r1, [ip] @ assert word-aligned + and r2, r0, #31 + mov r0, r0, lsr #5 + mov r3, #1 + mov r3, r3, lsl r2 + save_and_disable_irqs ip + ldr r2, [r1, r0, lsl #2] + \instr r2, r2, r3 + str r2, [r1, r0, lsl #2] + restore_irqs ip + ret lr +UNWIND( .fnend ) +ENDPROC(\name ) + .endm + +/** + * testop - implement a test_and_xxx_bit operation. + * @instr: operational instruction + * @store: store instruction + * + * Note: we can trivially conditionalise the store instruction + * to avoid dirtying the data cache. + */ + .macro testop, name, instr, store +ENTRY( \name ) +UNWIND( .fnstart ) + ands ip, r1, #3 + strbne r1, [ip] @ assert word-aligned + and r3, r0, #31 + mov r0, r0, lsr #5 + save_and_disable_irqs ip + ldr r2, [r1, r0, lsl #2]! + mov r0, #1 + tst r2, r0, lsl r3 + \instr r2, r2, r0, lsl r3 + \store r2, [r1] + moveq r0, #0 + restore_irqs ip + ret lr +UNWIND( .fnend ) +ENDPROC(\name ) + .endm +#endif diff --git a/arch/arm/lib/bswapsdi2.S b/arch/arm/lib/bswapsdi2.S new file mode 100644 index 000000000..591ba077e --- /dev/null +++ b/arch/arm/lib/bswapsdi2.S @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/linkage.h> +#include <asm/assembler.h> + +#if __LINUX_ARM_ARCH__ >= 6 +ENTRY(__bswapsi2) + rev r0, r0 + bx lr +ENDPROC(__bswapsi2) + +ENTRY(__bswapdi2) + rev r3, r0 + rev r0, r1 + mov r1, r3 + bx lr +ENDPROC(__bswapdi2) +#else +ENTRY(__bswapsi2) + eor r3, r0, r0, ror #16 + mov r3, r3, lsr #8 + bic r3, r3, #0xff00 + eor r0, r3, r0, ror #8 + ret lr +ENDPROC(__bswapsi2) + +ENTRY(__bswapdi2) + mov ip, r1 + eor r3, ip, ip, ror #16 + eor r1, r0, r0, ror #16 + mov r1, r1, lsr #8 + mov r3, r3, lsr #8 + bic r3, r3, #0xff00 + bic r1, r1, #0xff00 + eor r1, r1, r0, ror #8 + eor r0, r3, ip, ror #8 + ret lr +ENDPROC(__bswapdi2) +#endif diff --git a/arch/arm/lib/call_with_stack.S b/arch/arm/lib/call_with_stack.S new file mode 100644 index 000000000..5030d4e8d --- /dev/null +++ b/arch/arm/lib/call_with_stack.S @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * arch/arm/lib/call_with_stack.S + * + * Copyright (C) 2011 ARM Ltd. + * Written by Will Deacon <will.deacon@arm.com> + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/unwind.h> + +/* + * void call_with_stack(void (*fn)(void *), void *arg, void *sp) + * + * Change the stack to that pointed at by sp, then invoke fn(arg) with + * the new stack. + * + * The sequence below follows the APCS frame convention for frame pointer + * unwinding, and implements the unwinder annotations needed by the EABI + * unwinder. + */ + +ENTRY(call_with_stack) +#if defined(CONFIG_UNWINDER_FRAME_POINTER) && defined(CONFIG_CC_IS_GCC) + mov ip, sp + push {fp, ip, lr, pc} + sub fp, ip, #4 +#else +UNWIND( .fnstart ) +UNWIND( .save {fpreg, lr} ) + push {fpreg, lr} +UNWIND( .setfp fpreg, sp ) + mov fpreg, sp +#endif + mov sp, r2 + mov r2, r0 + mov r0, r1 + + bl_r r2 + +#if defined(CONFIG_UNWINDER_FRAME_POINTER) && defined(CONFIG_CC_IS_GCC) + ldmdb fp, {fp, sp, pc} +#else + mov sp, fpreg + pop {fpreg, pc} +UNWIND( .fnend ) +#endif + .globl call_with_stack_end +call_with_stack_end: +ENDPROC(call_with_stack) diff --git a/arch/arm/lib/changebit.S b/arch/arm/lib/changebit.S new file mode 100644 index 000000000..02424765e --- /dev/null +++ b/arch/arm/lib/changebit.S @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/changebit.S + * + * Copyright (C) 1995-1996 Russell King + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include "bitops.h" + .text + +bitop _change_bit, eor diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S new file mode 100644 index 000000000..8f2c4dbfc --- /dev/null +++ b/arch/arm/lib/clear_user.S @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/clear_user.S + * + * Copyright (C) 1995, 1996,1997,1998 Russell King + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/unwind.h> + + .text + +/* Prototype: unsigned long arm_clear_user(void *addr, size_t sz) + * Purpose : clear some user memory + * Params : addr - user memory address to clear + * : sz - number of bytes to clear + * Returns : number of bytes NOT cleared + */ +ENTRY(__clear_user_std) +WEAK(arm_clear_user) +UNWIND(.fnstart) +UNWIND(.save {r1, lr}) + stmfd sp!, {r1, lr} + mov r2, #0 + cmp r1, #4 + blt 2f + ands ip, r0, #3 + beq 1f + cmp ip, #2 + strusr r2, r0, 1 + strusr r2, r0, 1, le + strusr r2, r0, 1, lt + rsb ip, ip, #4 + sub r1, r1, ip @ 7 6 5 4 3 2 1 +1: subs r1, r1, #8 @ -1 -2 -3 -4 -5 -6 -7 + strusr r2, r0, 4, pl, rept=2 + bpl 1b + adds r1, r1, #4 @ 3 2 1 0 -1 -2 -3 + strusr r2, r0, 4, pl +2: tst r1, #2 @ 1x 1x 0x 0x 1x 1x 0x + strusr r2, r0, 1, ne, rept=2 + tst r1, #1 @ x1 x0 x1 x0 x1 x0 x1 + it ne @ explicit IT needed for the label +USER( strbtne r2, [r0]) + mov r0, #0 + ldmfd sp!, {r1, pc} +UNWIND(.fnend) +ENDPROC(arm_clear_user) +ENDPROC(__clear_user_std) + + .pushsection .text.fixup,"ax" + .align 0 +9001: ldmfd sp!, {r0, pc} + .popsection + diff --git a/arch/arm/lib/clearbit.S b/arch/arm/lib/clearbit.S new file mode 100644 index 000000000..4646dee8a --- /dev/null +++ b/arch/arm/lib/clearbit.S @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/clearbit.S + * + * Copyright (C) 1995-1996 Russell King + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include "bitops.h" + .text + +bitop _clear_bit, bic diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S new file mode 100644 index 000000000..270de7deb --- /dev/null +++ b/arch/arm/lib/copy_from_user.S @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/copy_from_user.S + * + * Author: Nicolas Pitre + * Created: Sep 29, 2005 + * Copyright: MontaVista Software, Inc. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/unwind.h> + +/* + * Prototype: + * + * size_t arm_copy_from_user(void *to, const void *from, size_t n) + * + * Purpose: + * + * copy a block to kernel memory from user memory + * + * Params: + * + * to = kernel memory + * from = user memory + * n = number of bytes to copy + * + * Return value: + * + * Number of bytes NOT copied. + */ + +#ifdef CONFIG_CPU_USE_DOMAINS + +#ifndef CONFIG_THUMB2_KERNEL +#define LDR1W_SHIFT 0 +#else +#define LDR1W_SHIFT 1 +#endif + + .macro ldr1w ptr reg abort + ldrusr \reg, \ptr, 4, abort=\abort + .endm + + .macro ldr4w ptr reg1 reg2 reg3 reg4 abort + ldr1w \ptr, \reg1, \abort + ldr1w \ptr, \reg2, \abort + ldr1w \ptr, \reg3, \abort + ldr1w \ptr, \reg4, \abort + .endm + + .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + ldr4w \ptr, \reg1, \reg2, \reg3, \reg4, \abort + ldr4w \ptr, \reg5, \reg6, \reg7, \reg8, \abort + .endm + +#else + +#define LDR1W_SHIFT 0 + + .macro ldr1w ptr reg abort + USERL(\abort, W(ldr) \reg, [\ptr], #4) + .endm + + .macro ldr4w ptr reg1 reg2 reg3 reg4 abort + USERL(\abort, ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}) + .endm + + .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + USERL(\abort, ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}) + .endm + +#endif /* CONFIG_CPU_USE_DOMAINS */ + + .macro ldr1b ptr reg cond=al abort + ldrusr \reg, \ptr, 1, \cond, abort=\abort + .endm + +#define STR1W_SHIFT 0 + + .macro str1w ptr reg abort + W(str) \reg, [\ptr], #4 + .endm + + .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} + .endm + + .macro str1b ptr reg cond=al abort + strb\cond \reg, [\ptr], #1 + .endm + + .macro enter regs:vararg + mov r3, #0 +UNWIND( .save {r0, r2, r3, \regs} ) + stmdb sp!, {r0, r2, r3, \regs} + .endm + + .macro exit regs:vararg + add sp, sp, #8 + ldmfd sp!, {r0, \regs} + .endm + + .text + +ENTRY(arm_copy_from_user) +#ifdef CONFIG_CPU_SPECTRE + ldr r3, =TASK_SIZE + uaccess_mask_range_ptr r1, r2, r3, ip +#endif + +#include "copy_template.S" + +ENDPROC(arm_copy_from_user) + + .pushsection .text.fixup,"ax" + .align 0 + copy_abort_preamble + ldmfd sp!, {r1, r2, r3} + sub r0, r0, r1 + rsb r0, r0, r2 + copy_abort_end + .popsection + diff --git a/arch/arm/lib/copy_page.S b/arch/arm/lib/copy_page.S new file mode 100644 index 000000000..5db1a8ee3 --- /dev/null +++ b/arch/arm/lib/copy_page.S @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/copypage.S + * + * Copyright (C) 1995-1999 Russell King + * + * ASM optimised string functions + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/cache.h> + +#define COPY_COUNT (PAGE_SZ / (2 * L1_CACHE_BYTES) PLD( -1 )) + + .text + .align 5 +/* + * StrongARM optimised copy_page routine + * now 1.78bytes/cycle, was 1.60 bytes/cycle (50MHz bus -> 89MB/s) + * Note that we probably achieve closer to the 100MB/s target with + * the core clock switching. + */ +ENTRY(copy_page) + stmfd sp!, {r4, lr} @ 2 + PLD( pld [r1, #0] ) + PLD( pld [r1, #L1_CACHE_BYTES] ) + mov r2, #COPY_COUNT @ 1 + ldmia r1!, {r3, r4, ip, lr} @ 4+1 +1: PLD( pld [r1, #2 * L1_CACHE_BYTES]) + PLD( pld [r1, #3 * L1_CACHE_BYTES]) +2: + .rept (2 * L1_CACHE_BYTES / 16 - 1) + stmia r0!, {r3, r4, ip, lr} @ 4 + ldmia r1!, {r3, r4, ip, lr} @ 4 + .endr + subs r2, r2, #1 @ 1 + stmia r0!, {r3, r4, ip, lr} @ 4 + ldmiagt r1!, {r3, r4, ip, lr} @ 4 + bgt 1b @ 1 + PLD( ldmiaeq r1!, {r3, r4, ip, lr} ) + PLD( beq 2b ) + ldmfd sp!, {r4, pc} @ 3 +ENDPROC(copy_page) diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S new file mode 100644 index 000000000..8fbafb074 --- /dev/null +++ b/arch/arm/lib/copy_template.S @@ -0,0 +1,273 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/copy_template.s + * + * Code template for optimized memory copy functions + * + * Author: Nicolas Pitre + * Created: Sep 28, 2005 + * Copyright: MontaVista Software, Inc. + */ + +/* + * Theory of operation + * ------------------- + * + * This file provides the core code for a forward memory copy used in + * the implementation of memcopy(), copy_to_user() and copy_from_user(). + * + * The including file must define the following accessor macros + * according to the need of the given function: + * + * ldr1w ptr reg abort + * + * This loads one word from 'ptr', stores it in 'reg' and increments + * 'ptr' to the next word. The 'abort' argument is used for fixup tables. + * + * ldr4w ptr reg1 reg2 reg3 reg4 abort + * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + * + * This loads four or eight words starting from 'ptr', stores them + * in provided registers and increments 'ptr' past those words. + * The'abort' argument is used for fixup tables. + * + * ldr1b ptr reg cond abort + * + * Similar to ldr1w, but it loads a byte and increments 'ptr' one byte. + * It also must apply the condition code if provided, otherwise the + * "al" condition is assumed by default. + * + * str1w ptr reg abort + * str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + * str1b ptr reg cond abort + * + * Same as their ldr* counterparts, but data is stored to 'ptr' location + * rather than being loaded. + * + * enter reg1 reg2 + * + * Preserve the provided registers on the stack plus any additional + * data as needed by the implementation including this code. Called + * upon code entry. + * + * usave reg1 reg2 + * + * Unwind annotation macro is corresponding for 'enter' macro. + * It tell unwinder that preserved some provided registers on the stack + * and additional data by a prior 'enter' macro. + * + * exit reg1 reg2 + * + * Restore registers with the values previously saved with the + * 'preserv' macro. Called upon code termination. + * + * LDR1W_SHIFT + * STR1W_SHIFT + * + * Correction to be applied to the "ip" register when branching into + * the ldr1w or str1w instructions (some of these macros may expand to + * than one 32bit instruction in Thumb-2) + */ + + UNWIND( .fnstart ) + enter r4, UNWIND(fpreg,) lr + UNWIND( .setfp fpreg, sp ) + UNWIND( mov fpreg, sp ) + + subs r2, r2, #4 + blt 8f + ands ip, r0, #3 + PLD( pld [r1, #0] ) + bne 9f + ands ip, r1, #3 + bne 10f + +1: subs r2, r2, #(28) + stmfd sp!, {r5, r6, r8, r9} + blt 5f + + CALGN( ands ip, r0, #31 ) + CALGN( rsb r3, ip, #32 ) + CALGN( sbcsne r4, r3, r2 ) @ C is always set here + CALGN( bcs 2f ) + CALGN( adr r4, 6f ) + CALGN( subs r2, r2, r3 ) @ C gets set + CALGN( add pc, r4, ip ) + + PLD( pld [r1, #0] ) +2: PLD( subs r2, r2, #96 ) + PLD( pld [r1, #28] ) + PLD( blt 4f ) + PLD( pld [r1, #60] ) + PLD( pld [r1, #92] ) + +3: PLD( pld [r1, #124] ) +4: ldr8w r1, r3, r4, r5, r6, r8, r9, ip, lr, abort=20f + subs r2, r2, #32 + str8w r0, r3, r4, r5, r6, r8, r9, ip, lr, abort=20f + bge 3b + PLD( cmn r2, #96 ) + PLD( bge 4b ) + +5: ands ip, r2, #28 + rsb ip, ip, #32 +#if LDR1W_SHIFT > 0 + lsl ip, ip, #LDR1W_SHIFT +#endif + addne pc, pc, ip @ C is always clear here + b 7f +6: + .rept (1 << LDR1W_SHIFT) + W(nop) + .endr + ldr1w r1, r3, abort=20f + ldr1w r1, r4, abort=20f + ldr1w r1, r5, abort=20f + ldr1w r1, r6, abort=20f + ldr1w r1, r8, abort=20f + ldr1w r1, r9, abort=20f + ldr1w r1, lr, abort=20f + +#if LDR1W_SHIFT < STR1W_SHIFT + lsl ip, ip, #STR1W_SHIFT - LDR1W_SHIFT +#elif LDR1W_SHIFT > STR1W_SHIFT + lsr ip, ip, #LDR1W_SHIFT - STR1W_SHIFT +#endif + add pc, pc, ip + nop + .rept (1 << STR1W_SHIFT) + W(nop) + .endr + str1w r0, r3, abort=20f + str1w r0, r4, abort=20f + str1w r0, r5, abort=20f + str1w r0, r6, abort=20f + str1w r0, r8, abort=20f + str1w r0, r9, abort=20f + str1w r0, lr, abort=20f + + CALGN( bcs 2b ) + +7: ldmfd sp!, {r5, r6, r8, r9} + +8: movs r2, r2, lsl #31 + ldr1b r1, r3, ne, abort=21f + ldr1b r1, r4, cs, abort=21f + ldr1b r1, ip, cs, abort=21f + str1b r0, r3, ne, abort=21f + str1b r0, r4, cs, abort=21f + str1b r0, ip, cs, abort=21f + + exit r4, UNWIND(fpreg,) pc + +9: rsb ip, ip, #4 + cmp ip, #2 + ldr1b r1, r3, gt, abort=21f + ldr1b r1, r4, ge, abort=21f + ldr1b r1, lr, abort=21f + str1b r0, r3, gt, abort=21f + str1b r0, r4, ge, abort=21f + subs r2, r2, ip + str1b r0, lr, abort=21f + blt 8b + ands ip, r1, #3 + beq 1b + +10: bic r1, r1, #3 + cmp ip, #2 + ldr1w r1, lr, abort=21f + beq 17f + bgt 18f + + + .macro forward_copy_shift pull push + + subs r2, r2, #28 + blt 14f + + CALGN( ands ip, r0, #31 ) + CALGN( rsb ip, ip, #32 ) + CALGN( sbcsne r4, ip, r2 ) @ C is always set here + CALGN( subcc r2, r2, ip ) + CALGN( bcc 15f ) + +11: stmfd sp!, {r5, r6, r8 - r10} + + PLD( pld [r1, #0] ) + PLD( subs r2, r2, #96 ) + PLD( pld [r1, #28] ) + PLD( blt 13f ) + PLD( pld [r1, #60] ) + PLD( pld [r1, #92] ) + +12: PLD( pld [r1, #124] ) +13: ldr4w r1, r4, r5, r6, r8, abort=19f + mov r3, lr, lspull #\pull + subs r2, r2, #32 + ldr4w r1, r9, r10, ip, lr, abort=19f + orr r3, r3, r4, lspush #\push + mov r4, r4, lspull #\pull + orr r4, r4, r5, lspush #\push + mov r5, r5, lspull #\pull + orr r5, r5, r6, lspush #\push + mov r6, r6, lspull #\pull + orr r6, r6, r8, lspush #\push + mov r8, r8, lspull #\pull + orr r8, r8, r9, lspush #\push + mov r9, r9, lspull #\pull + orr r9, r9, r10, lspush #\push + mov r10, r10, lspull #\pull + orr r10, r10, ip, lspush #\push + mov ip, ip, lspull #\pull + orr ip, ip, lr, lspush #\push + str8w r0, r3, r4, r5, r6, r8, r9, r10, ip, abort=19f + bge 12b + PLD( cmn r2, #96 ) + PLD( bge 13b ) + + ldmfd sp!, {r5, r6, r8 - r10} + +14: ands ip, r2, #28 + beq 16f + +15: mov r3, lr, lspull #\pull + ldr1w r1, lr, abort=21f + subs ip, ip, #4 + orr r3, r3, lr, lspush #\push + str1w r0, r3, abort=21f + bgt 15b + CALGN( cmp r2, #0 ) + CALGN( bge 11b ) + +16: sub r1, r1, #(\push / 8) + b 8b + + .endm + + + forward_copy_shift pull=8 push=24 + +17: forward_copy_shift pull=16 push=16 + +18: forward_copy_shift pull=24 push=8 + + UNWIND( .fnend ) + +/* + * Abort preamble and completion macros. + * If a fixup handler is required then those macros must surround it. + * It is assumed that the fixup code will handle the private part of + * the exit macro. + */ + + .macro copy_abort_preamble +19: ldmfd sp!, {r5, r6, r8 - r10} + b 21f +20: ldmfd sp!, {r5, r6, r8, r9} +21: + .endm + + .macro copy_abort_end + ldmfd sp!, {r4, UNWIND(fpreg,) pc} + .endm + diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S new file mode 100644 index 000000000..fac49e57c --- /dev/null +++ b/arch/arm/lib/copy_to_user.S @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/copy_to_user.S + * + * Author: Nicolas Pitre + * Created: Sep 29, 2005 + * Copyright: MontaVista Software, Inc. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/unwind.h> + +/* + * Prototype: + * + * size_t arm_copy_to_user(void *to, const void *from, size_t n) + * + * Purpose: + * + * copy a block to user memory from kernel memory + * + * Params: + * + * to = user memory + * from = kernel memory + * n = number of bytes to copy + * + * Return value: + * + * Number of bytes NOT copied. + */ + +#define LDR1W_SHIFT 0 + + .macro ldr1w ptr reg abort + W(ldr) \reg, [\ptr], #4 + .endm + + .macro ldr4w ptr reg1 reg2 reg3 reg4 abort + ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4} + .endm + + .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} + .endm + + .macro ldr1b ptr reg cond=al abort + ldrb\cond \reg, [\ptr], #1 + .endm + +#ifdef CONFIG_CPU_USE_DOMAINS + +#ifndef CONFIG_THUMB2_KERNEL +#define STR1W_SHIFT 0 +#else +#define STR1W_SHIFT 1 +#endif + + .macro str1w ptr reg abort + strusr \reg, \ptr, 4, abort=\abort + .endm + + .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + str1w \ptr, \reg1, \abort + str1w \ptr, \reg2, \abort + str1w \ptr, \reg3, \abort + str1w \ptr, \reg4, \abort + str1w \ptr, \reg5, \abort + str1w \ptr, \reg6, \abort + str1w \ptr, \reg7, \abort + str1w \ptr, \reg8, \abort + .endm + +#else + +#define STR1W_SHIFT 0 + + .macro str1w ptr reg abort + USERL(\abort, W(str) \reg, [\ptr], #4) + .endm + + .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + USERL(\abort, stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}) + .endm + +#endif /* CONFIG_CPU_USE_DOMAINS */ + + .macro str1b ptr reg cond=al abort + strusr \reg, \ptr, 1, \cond, abort=\abort + .endm + + .macro enter regs:vararg + mov r3, #0 +UNWIND( .save {r0, r2, r3, \regs} ) + stmdb sp!, {r0, r2, r3, \regs} + .endm + + .macro exit regs:vararg + add sp, sp, #8 + ldmfd sp!, {r0, \regs} + .endm + + .text + +ENTRY(__copy_to_user_std) +WEAK(arm_copy_to_user) +#ifdef CONFIG_CPU_SPECTRE + ldr r3, =TASK_SIZE + uaccess_mask_range_ptr r0, r2, r3, ip +#endif + +#include "copy_template.S" + +ENDPROC(arm_copy_to_user) +ENDPROC(__copy_to_user_std) + + .pushsection .text.fixup,"ax" + .align 0 + copy_abort_preamble + ldmfd sp!, {r1, r2, r3} + sub r0, r0, r1 + rsb r0, r0, r2 + copy_abort_end + .popsection diff --git a/arch/arm/lib/csumipv6.S b/arch/arm/lib/csumipv6.S new file mode 100644 index 000000000..3559d5151 --- /dev/null +++ b/arch/arm/lib/csumipv6.S @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/csumipv6.S + * + * Copyright (C) 1995-1998 Russell King + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + +ENTRY(__csum_ipv6_magic) + str lr, [sp, #-4]! + adds ip, r2, r3 + ldmia r1, {r1 - r3, lr} + adcs ip, ip, r1 + adcs ip, ip, r2 + adcs ip, ip, r3 + adcs ip, ip, lr + ldmia r0, {r0 - r3} + adcs r0, ip, r0 + adcs r0, r0, r1 + adcs r0, r0, r2 + ldr r2, [sp, #4] + adcs r0, r0, r3 + adcs r0, r0, r2 + adcs r0, r0, #0 + ldmfd sp!, {pc} +ENDPROC(__csum_ipv6_magic) + diff --git a/arch/arm/lib/csumpartial.S b/arch/arm/lib/csumpartial.S new file mode 100644 index 000000000..87c9471be --- /dev/null +++ b/arch/arm/lib/csumpartial.S @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/csumpartial.S + * + * Copyright (C) 1995-1998 Russell King + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + +/* + * Function: __u32 csum_partial(const char *src, int len, __u32 sum) + * Params : r0 = buffer, r1 = len, r2 = checksum + * Returns : r0 = new checksum + */ + +buf .req r0 +len .req r1 +sum .req r2 +td0 .req r3 +td1 .req r4 @ save before use +td2 .req r5 @ save before use +td3 .req lr + +.Lzero: mov r0, sum + add sp, sp, #4 + ldr pc, [sp], #4 + + /* + * Handle 0 to 7 bytes, with any alignment of source and + * destination pointers. Note that when we get here, C = 0 + */ +.Lless8: teq len, #0 @ check for zero count + beq .Lzero + + /* we must have at least one byte. */ + tst buf, #1 @ odd address? + movne sum, sum, ror #8 + ldrbne td0, [buf], #1 + subne len, len, #1 + adcsne sum, sum, td0, put_byte_1 + +.Lless4: tst len, #6 + beq .Lless8_byte + + /* we are now half-word aligned */ + +.Lless8_wordlp: +#if __LINUX_ARM_ARCH__ >= 4 + ldrh td0, [buf], #2 + sub len, len, #2 +#else + ldrb td0, [buf], #1 + ldrb td3, [buf], #1 + sub len, len, #2 +#ifndef __ARMEB__ + orr td0, td0, td3, lsl #8 +#else + orr td0, td3, td0, lsl #8 +#endif +#endif + adcs sum, sum, td0 + tst len, #6 + bne .Lless8_wordlp + +.Lless8_byte: tst len, #1 @ odd number of bytes + ldrbne td0, [buf], #1 @ include last byte + adcsne sum, sum, td0, put_byte_0 @ update checksum + +.Ldone: adc r0, sum, #0 @ collect up the last carry + ldr td0, [sp], #4 + tst td0, #1 @ check buffer alignment + movne r0, r0, ror #8 @ rotate checksum by 8 bits + ldr pc, [sp], #4 @ return + +.Lnot_aligned: tst buf, #1 @ odd address + ldrbne td0, [buf], #1 @ make even + subne len, len, #1 + adcsne sum, sum, td0, put_byte_1 @ update checksum + + tst buf, #2 @ 32-bit aligned? +#if __LINUX_ARM_ARCH__ >= 4 + ldrhne td0, [buf], #2 @ make 32-bit aligned + subne len, len, #2 +#else + ldrbne td0, [buf], #1 + ldrbne ip, [buf], #1 + subne len, len, #2 +#ifndef __ARMEB__ + orrne td0, td0, ip, lsl #8 +#else + orrne td0, ip, td0, lsl #8 +#endif +#endif + adcsne sum, sum, td0 @ update checksum + ret lr + +ENTRY(csum_partial) + stmfd sp!, {buf, lr} + cmp len, #8 @ Ensure that we have at least + blo .Lless8 @ 8 bytes to copy. + + tst buf, #1 + movne sum, sum, ror #8 + + adds sum, sum, #0 @ C = 0 + tst buf, #3 @ Test destination alignment + blne .Lnot_aligned @ align destination, return here + +1: bics ip, len, #31 + beq 3f + + stmfd sp!, {r4 - r5} +2: ldmia buf!, {td0, td1, td2, td3} + adcs sum, sum, td0 + adcs sum, sum, td1 + adcs sum, sum, td2 + adcs sum, sum, td3 + ldmia buf!, {td0, td1, td2, td3} + adcs sum, sum, td0 + adcs sum, sum, td1 + adcs sum, sum, td2 + adcs sum, sum, td3 + sub ip, ip, #32 + teq ip, #0 + bne 2b + ldmfd sp!, {r4 - r5} + +3: tst len, #0x1c @ should not change C + beq .Lless4 + +4: ldr td0, [buf], #4 + sub len, len, #4 + adcs sum, sum, td0 + tst len, #0x1c + bne 4b + b .Lless4 +ENDPROC(csum_partial) diff --git a/arch/arm/lib/csumpartialcopy.S b/arch/arm/lib/csumpartialcopy.S new file mode 100644 index 000000000..1ca6aadd6 --- /dev/null +++ b/arch/arm/lib/csumpartialcopy.S @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/csumpartialcopy.S + * + * Copyright (C) 1995-1998 Russell King + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + +/* Function: __u32 csum_partial_copy_nocheck(const char *src, char *dst, int len) + * Params : r0 = src, r1 = dst, r2 = len + * Returns : r0 = new checksum + */ + + .macro save_regs + stmfd sp!, {r1, r4 - r8, lr} + .endm + + .macro load_regs + ldmfd sp!, {r1, r4 - r8, pc} + .endm + + .macro load1b, reg1 + ldrb \reg1, [r0], #1 + .endm + + .macro load2b, reg1, reg2 + ldrb \reg1, [r0], #1 + ldrb \reg2, [r0], #1 + .endm + + .macro load1l, reg1 + ldr \reg1, [r0], #4 + .endm + + .macro load2l, reg1, reg2 + ldr \reg1, [r0], #4 + ldr \reg2, [r0], #4 + .endm + + .macro load4l, reg1, reg2, reg3, reg4 + ldmia r0!, {\reg1, \reg2, \reg3, \reg4} + .endm + +#define FN_ENTRY ENTRY(csum_partial_copy_nocheck) +#define FN_EXIT ENDPROC(csum_partial_copy_nocheck) + +#include "csumpartialcopygeneric.S" diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S new file mode 100644 index 000000000..0fd5c10e9 --- /dev/null +++ b/arch/arm/lib/csumpartialcopygeneric.S @@ -0,0 +1,331 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/csumpartialcopygeneric.S + * + * Copyright (C) 1995-2001 Russell King + */ +#include <asm/assembler.h> + +/* + * unsigned int + * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, ) + * r0 = src, r1 = dst, r2 = len, r3 = sum + * Returns : r0 = checksum + * + * Note that 'tst' and 'teq' preserve the carry flag. + */ + +src .req r0 +dst .req r1 +len .req r2 +sum .req r3 + +.Lzero: mov r0, sum + load_regs + + /* + * Align an unaligned destination pointer. We know that + * we have >= 8 bytes here, so we don't need to check + * the length. Note that the source pointer hasn't been + * aligned yet. + */ +.Ldst_unaligned: + tst dst, #1 + beq .Ldst_16bit + + load1b ip + sub len, len, #1 + adcs sum, sum, ip, put_byte_1 @ update checksum + strb ip, [dst], #1 + tst dst, #2 + reteq lr @ dst is now 32bit aligned + +.Ldst_16bit: load2b r8, ip + sub len, len, #2 + adcs sum, sum, r8, put_byte_0 + strb r8, [dst], #1 + adcs sum, sum, ip, put_byte_1 + strb ip, [dst], #1 + ret lr @ dst is now 32bit aligned + + /* + * Handle 0 to 7 bytes, with any alignment of source and + * destination pointers. Note that when we get here, C = 0 + */ +.Lless8: teq len, #0 @ check for zero count + beq .Lzero + + /* we must have at least one byte. */ + tst dst, #1 @ dst 16-bit aligned + beq .Lless8_aligned + + /* Align dst */ + load1b ip + sub len, len, #1 + adcs sum, sum, ip, put_byte_1 @ update checksum + strb ip, [dst], #1 + tst len, #6 + beq .Lless8_byteonly + +1: load2b r8, ip + sub len, len, #2 + adcs sum, sum, r8, put_byte_0 + strb r8, [dst], #1 + adcs sum, sum, ip, put_byte_1 + strb ip, [dst], #1 +.Lless8_aligned: + tst len, #6 + bne 1b +.Lless8_byteonly: + tst len, #1 + beq .Ldone + load1b r8 + adcs sum, sum, r8, put_byte_0 @ update checksum + strb r8, [dst], #1 + b .Ldone + +FN_ENTRY + save_regs + mov sum, #-1 + + cmp len, #8 @ Ensure that we have at least + blo .Lless8 @ 8 bytes to copy. + + adds sum, sum, #0 @ C = 0 + tst dst, #3 @ Test destination alignment + blne .Ldst_unaligned @ align destination, return here + + /* + * Ok, the dst pointer is now 32bit aligned, and we know + * that we must have more than 4 bytes to copy. Note + * that C contains the carry from the dst alignment above. + */ + + tst src, #3 @ Test source alignment + bne .Lsrc_not_aligned + + /* Routine for src & dst aligned */ + + bics ip, len, #15 + beq 2f + +1: load4l r4, r5, r6, r7 + stmia dst!, {r4, r5, r6, r7} + adcs sum, sum, r4 + adcs sum, sum, r5 + adcs sum, sum, r6 + adcs sum, sum, r7 + sub ip, ip, #16 + teq ip, #0 + bne 1b + +2: ands ip, len, #12 + beq 4f + tst ip, #8 + beq 3f + load2l r4, r5 + stmia dst!, {r4, r5} + adcs sum, sum, r4 + adcs sum, sum, r5 + tst ip, #4 + beq 4f + +3: load1l r4 + str r4, [dst], #4 + adcs sum, sum, r4 + +4: ands len, len, #3 + beq .Ldone + load1l r4 + tst len, #2 + mov r5, r4, get_byte_0 + beq .Lexit + adcs sum, sum, r4, lspush #16 + strb r5, [dst], #1 + mov r5, r4, get_byte_1 + strb r5, [dst], #1 + mov r5, r4, get_byte_2 +.Lexit: tst len, #1 + strbne r5, [dst], #1 + andne r5, r5, #255 + adcsne sum, sum, r5, put_byte_0 + + /* + * If the dst pointer was not 16-bit aligned, we + * need to rotate the checksum here to get around + * the inefficient byte manipulations in the + * architecture independent code. + */ +.Ldone: adc r0, sum, #0 + ldr sum, [sp, #0] @ dst + tst sum, #1 + movne r0, r0, ror #8 + load_regs + +.Lsrc_not_aligned: + adc sum, sum, #0 @ include C from dst alignment + and ip, src, #3 + bic src, src, #3 + load1l r5 + cmp ip, #2 + beq .Lsrc2_aligned + bhi .Lsrc3_aligned + mov r4, r5, lspull #8 @ C = 0 + bics ip, len, #15 + beq 2f +1: load4l r5, r6, r7, r8 + orr r4, r4, r5, lspush #24 + mov r5, r5, lspull #8 + orr r5, r5, r6, lspush #24 + mov r6, r6, lspull #8 + orr r6, r6, r7, lspush #24 + mov r7, r7, lspull #8 + orr r7, r7, r8, lspush #24 + stmia dst!, {r4, r5, r6, r7} + adcs sum, sum, r4 + adcs sum, sum, r5 + adcs sum, sum, r6 + adcs sum, sum, r7 + mov r4, r8, lspull #8 + sub ip, ip, #16 + teq ip, #0 + bne 1b +2: ands ip, len, #12 + beq 4f + tst ip, #8 + beq 3f + load2l r5, r6 + orr r4, r4, r5, lspush #24 + mov r5, r5, lspull #8 + orr r5, r5, r6, lspush #24 + stmia dst!, {r4, r5} + adcs sum, sum, r4 + adcs sum, sum, r5 + mov r4, r6, lspull #8 + tst ip, #4 + beq 4f +3: load1l r5 + orr r4, r4, r5, lspush #24 + str r4, [dst], #4 + adcs sum, sum, r4 + mov r4, r5, lspull #8 +4: ands len, len, #3 + beq .Ldone + mov r5, r4, get_byte_0 + tst len, #2 + beq .Lexit + adcs sum, sum, r4, lspush #16 + strb r5, [dst], #1 + mov r5, r4, get_byte_1 + strb r5, [dst], #1 + mov r5, r4, get_byte_2 + b .Lexit + +.Lsrc2_aligned: mov r4, r5, lspull #16 + adds sum, sum, #0 + bics ip, len, #15 + beq 2f +1: load4l r5, r6, r7, r8 + orr r4, r4, r5, lspush #16 + mov r5, r5, lspull #16 + orr r5, r5, r6, lspush #16 + mov r6, r6, lspull #16 + orr r6, r6, r7, lspush #16 + mov r7, r7, lspull #16 + orr r7, r7, r8, lspush #16 + stmia dst!, {r4, r5, r6, r7} + adcs sum, sum, r4 + adcs sum, sum, r5 + adcs sum, sum, r6 + adcs sum, sum, r7 + mov r4, r8, lspull #16 + sub ip, ip, #16 + teq ip, #0 + bne 1b +2: ands ip, len, #12 + beq 4f + tst ip, #8 + beq 3f + load2l r5, r6 + orr r4, r4, r5, lspush #16 + mov r5, r5, lspull #16 + orr r5, r5, r6, lspush #16 + stmia dst!, {r4, r5} + adcs sum, sum, r4 + adcs sum, sum, r5 + mov r4, r6, lspull #16 + tst ip, #4 + beq 4f +3: load1l r5 + orr r4, r4, r5, lspush #16 + str r4, [dst], #4 + adcs sum, sum, r4 + mov r4, r5, lspull #16 +4: ands len, len, #3 + beq .Ldone + mov r5, r4, get_byte_0 + tst len, #2 + beq .Lexit + adcs sum, sum, r4 + strb r5, [dst], #1 + mov r5, r4, get_byte_1 + strb r5, [dst], #1 + tst len, #1 + beq .Ldone + load1b r5 + b .Lexit + +.Lsrc3_aligned: mov r4, r5, lspull #24 + adds sum, sum, #0 + bics ip, len, #15 + beq 2f +1: load4l r5, r6, r7, r8 + orr r4, r4, r5, lspush #8 + mov r5, r5, lspull #24 + orr r5, r5, r6, lspush #8 + mov r6, r6, lspull #24 + orr r6, r6, r7, lspush #8 + mov r7, r7, lspull #24 + orr r7, r7, r8, lspush #8 + stmia dst!, {r4, r5, r6, r7} + adcs sum, sum, r4 + adcs sum, sum, r5 + adcs sum, sum, r6 + adcs sum, sum, r7 + mov r4, r8, lspull #24 + sub ip, ip, #16 + teq ip, #0 + bne 1b +2: ands ip, len, #12 + beq 4f + tst ip, #8 + beq 3f + load2l r5, r6 + orr r4, r4, r5, lspush #8 + mov r5, r5, lspull #24 + orr r5, r5, r6, lspush #8 + stmia dst!, {r4, r5} + adcs sum, sum, r4 + adcs sum, sum, r5 + mov r4, r6, lspull #24 + tst ip, #4 + beq 4f +3: load1l r5 + orr r4, r4, r5, lspush #8 + str r4, [dst], #4 + adcs sum, sum, r4 + mov r4, r5, lspull #24 +4: ands len, len, #3 + beq .Ldone + mov r5, r4, get_byte_0 + tst len, #2 + beq .Lexit + strb r5, [dst], #1 + adcs sum, sum, r4 + load1l r4 + mov r5, r4, get_byte_0 + strb r5, [dst], #1 + adcs sum, sum, r4, lspush #24 + mov r5, r4, get_byte_1 + b .Lexit +FN_EXIT diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S new file mode 100644 index 000000000..6928781e6 --- /dev/null +++ b/arch/arm/lib/csumpartialcopyuser.S @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/csumpartialcopyuser.S + * + * Copyright (C) 1995-1998 Russell King + * + * 27/03/03 Ian Molton Clean up CONFIG_CPU + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/errno.h> +#include <asm/asm-offsets.h> + + .text + +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + .macro save_regs + mrc p15, 0, ip, c3, c0, 0 + stmfd sp!, {r1, r2, r4 - r8, ip, lr} + uaccess_enable ip + .endm + + .macro load_regs + ldmfd sp!, {r1, r2, r4 - r8, ip, lr} + mcr p15, 0, ip, c3, c0, 0 + ret lr + .endm +#else + .macro save_regs + stmfd sp!, {r1, r2, r4 - r8, lr} + .endm + + .macro load_regs + ldmfd sp!, {r1, r2, r4 - r8, pc} + .endm +#endif + + .macro load1b, reg1 + ldrusr \reg1, r0, 1 + .endm + + .macro load2b, reg1, reg2 + ldrusr \reg1, r0, 1 + ldrusr \reg2, r0, 1 + .endm + + .macro load1l, reg1 + ldrusr \reg1, r0, 4 + .endm + + .macro load2l, reg1, reg2 + ldrusr \reg1, r0, 4 + ldrusr \reg2, r0, 4 + .endm + + .macro load4l, reg1, reg2, reg3, reg4 + ldrusr \reg1, r0, 4 + ldrusr \reg2, r0, 4 + ldrusr \reg3, r0, 4 + ldrusr \reg4, r0, 4 + .endm + +/* + * unsigned int + * csum_partial_copy_from_user(const char *src, char *dst, int len) + * r0 = src, r1 = dst, r2 = len + * Returns : r0 = checksum or 0 + */ + +#define FN_ENTRY ENTRY(csum_partial_copy_from_user) +#define FN_EXIT ENDPROC(csum_partial_copy_from_user) + +#include "csumpartialcopygeneric.S" + +/* + * We report fault by returning 0 csum - impossible in normal case, since + * we start with 0xffffffff for initial sum. + */ + .pushsection .text.fixup,"ax" + .align 4 +9001: mov r0, #0 + load_regs + .popsection diff --git a/arch/arm/lib/delay-loop.S b/arch/arm/lib/delay-loop.S new file mode 100644 index 000000000..3ccade0f8 --- /dev/null +++ b/arch/arm/lib/delay-loop.S @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/delay.S + * + * Copyright (C) 1995, 1996 Russell King + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/delay.h> + + .text + +.LC0: .word loops_per_jiffy +.LC1: .word UDELAY_MULT + +/* + * loops = r0 * HZ * loops_per_jiffy / 1000000 + * + * r0 <= 2000 + * HZ <= 1000 + */ + +ENTRY(__loop_udelay) + ldr r2, .LC1 + mul r0, r2, r0 @ r0 = delay_us * UDELAY_MULT +ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0xfffffaf0 + ldr r2, .LC0 + ldr r2, [r2] + umull r1, r0, r2, r0 @ r0-r1 = r0 * loops_per_jiffy + adds r1, r1, #0xffffffff @ rounding up ... + adcs r0, r0, r0 @ and right shift by 31 + reteq lr + + .align 3 + +@ Delay routine +ENTRY(__loop_delay) + subs r0, r0, #1 +#if 0 + retls lr + subs r0, r0, #1 + retls lr + subs r0, r0, #1 + retls lr + subs r0, r0, #1 + retls lr + subs r0, r0, #1 + retls lr + subs r0, r0, #1 + retls lr + subs r0, r0, #1 + retls lr + subs r0, r0, #1 +#endif + bhi __loop_delay + ret lr +ENDPROC(__loop_udelay) +ENDPROC(__loop_const_udelay) +ENDPROC(__loop_delay) diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c new file mode 100644 index 000000000..b7fe84f68 --- /dev/null +++ b/arch/arm/lib/delay.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Delay loops based on the OpenRISC implementation. + * + * Copyright (C) 2012 ARM Limited + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/clocksource.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/timex.h> + +/* + * Default to the loop-based delay implementation. + */ +struct arm_delay_ops arm_delay_ops __ro_after_init = { + .delay = __loop_delay, + .const_udelay = __loop_const_udelay, + .udelay = __loop_udelay, +}; + +static const struct delay_timer *delay_timer; +static bool delay_calibrated; +static u64 delay_res; + +int read_current_timer(unsigned long *timer_val) +{ + if (!delay_timer) + return -ENXIO; + + *timer_val = delay_timer->read_current_timer(); + return 0; +} +EXPORT_SYMBOL_GPL(read_current_timer); + +static inline u64 cyc_to_ns(u64 cyc, u32 mult, u32 shift) +{ + return (cyc * mult) >> shift; +} + +static void __timer_delay(unsigned long cycles) +{ + cycles_t start = get_cycles(); + + while ((get_cycles() - start) < cycles) + cpu_relax(); +} + +static void __timer_const_udelay(unsigned long xloops) +{ + unsigned long long loops = xloops; + loops *= arm_delay_ops.ticks_per_jiffy; + __timer_delay(loops >> UDELAY_SHIFT); +} + +static void __timer_udelay(unsigned long usecs) +{ + __timer_const_udelay(usecs * UDELAY_MULT); +} + +void __init register_current_timer_delay(const struct delay_timer *timer) +{ + u32 new_mult, new_shift; + u64 res; + + clocks_calc_mult_shift(&new_mult, &new_shift, timer->freq, + NSEC_PER_SEC, 3600); + res = cyc_to_ns(1ULL, new_mult, new_shift); + + if (res > 1000) { + pr_err("Ignoring delay timer %ps, which has insufficient resolution of %lluns\n", + timer, res); + return; + } + + if (!delay_calibrated && (!delay_res || (res < delay_res))) { + pr_info("Switching to timer-based delay loop, resolution %lluns\n", res); + delay_timer = timer; + lpj_fine = timer->freq / HZ; + delay_res = res; + + /* cpufreq may scale loops_per_jiffy, so keep a private copy */ + arm_delay_ops.ticks_per_jiffy = lpj_fine; + arm_delay_ops.delay = __timer_delay; + arm_delay_ops.const_udelay = __timer_const_udelay; + arm_delay_ops.udelay = __timer_udelay; + } else { + pr_info("Ignoring duplicate/late registration of read_current_timer delay\n"); + } +} + +unsigned long calibrate_delay_is_known(void) +{ + delay_calibrated = true; + return lpj_fine; +} + +void calibration_delay_done(void) +{ + delay_calibrated = true; +} diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S new file mode 100644 index 000000000..a87c02925 --- /dev/null +++ b/arch/arm/lib/div64.S @@ -0,0 +1,209 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/div64.S + * + * Optimized computation of 64-bit dividend / 32-bit divisor + * + * Author: Nicolas Pitre + * Created: Oct 5, 2003 + * Copyright: Monta Vista Software, Inc. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/unwind.h> + +#ifdef __ARMEB__ +#define xh r0 +#define xl r1 +#define yh r2 +#define yl r3 +#else +#define xl r0 +#define xh r1 +#define yl r2 +#define yh r3 +#endif + +/* + * __do_div64: perform a division with 64-bit dividend and 32-bit divisor. + * + * Note: Calling convention is totally non standard for optimal code. + * This is meant to be used by do_div() from include/asm/div64.h only. + * + * Input parameters: + * xh-xl = dividend (clobbered) + * r4 = divisor (preserved) + * + * Output values: + * yh-yl = result + * xh = remainder + * + * Clobbered regs: xl, ip + */ + +ENTRY(__do_div64) +UNWIND(.fnstart) + + @ Test for easy paths first. + subs ip, r4, #1 + bls 9f @ divisor is 0 or 1 + tst ip, r4 + beq 8f @ divisor is power of 2 + + @ See if we need to handle upper 32-bit result. + cmp xh, r4 + mov yh, #0 + blo 3f + + @ Align divisor with upper part of dividend. + @ The aligned divisor is stored in yl preserving the original. + @ The bit position is stored in ip. + +#if __LINUX_ARM_ARCH__ >= 5 + + clz yl, r4 + clz ip, xh + sub yl, yl, ip + mov ip, #1 + mov ip, ip, lsl yl + mov yl, r4, lsl yl + +#else + + mov yl, r4 + mov ip, #1 +1: cmp yl, #0x80000000 + cmpcc yl, xh + movcc yl, yl, lsl #1 + movcc ip, ip, lsl #1 + bcc 1b + +#endif + + @ The division loop for needed upper bit positions. + @ Break out early if dividend reaches 0. +2: cmp xh, yl + orrcs yh, yh, ip + subscs xh, xh, yl + movsne ip, ip, lsr #1 + mov yl, yl, lsr #1 + bne 2b + + @ See if we need to handle lower 32-bit result. +3: cmp xh, #0 + mov yl, #0 + cmpeq xl, r4 + movlo xh, xl + retlo lr + + @ The division loop for lower bit positions. + @ Here we shift remainer bits leftwards rather than moving the + @ divisor for comparisons, considering the carry-out bit as well. + mov ip, #0x80000000 +4: movs xl, xl, lsl #1 + adcs xh, xh, xh + beq 6f + cmpcc xh, r4 +5: orrcs yl, yl, ip + subcs xh, xh, r4 + movs ip, ip, lsr #1 + bne 4b + ret lr + + @ The top part of remainder became zero. If carry is set + @ (the 33th bit) this is a false positive so resume the loop. + @ Otherwise, if lower part is also null then we are done. +6: bcs 5b + cmp xl, #0 + reteq lr + + @ We still have remainer bits in the low part. Bring them up. + +#if __LINUX_ARM_ARCH__ >= 5 + + clz xh, xl @ we know xh is zero here so... + add xh, xh, #1 + mov xl, xl, lsl xh + mov ip, ip, lsr xh + +#else + +7: movs xl, xl, lsl #1 + mov ip, ip, lsr #1 + bcc 7b + +#endif + + @ Current remainder is now 1. It is worthless to compare with + @ divisor at this point since divisor can not be smaller than 3 here. + @ If possible, branch for another shift in the division loop. + @ If no bit position left then we are done. + movs ip, ip, lsr #1 + mov xh, #1 + bne 4b + ret lr + +8: @ Division by a power of 2: determine what that divisor order is + @ then simply shift values around + +#if __LINUX_ARM_ARCH__ >= 5 + + clz ip, r4 + rsb ip, ip, #31 + +#else + + mov yl, r4 + cmp r4, #(1 << 16) + mov ip, #0 + movhs yl, yl, lsr #16 + movhs ip, #16 + + cmp yl, #(1 << 8) + movhs yl, yl, lsr #8 + addhs ip, ip, #8 + + cmp yl, #(1 << 4) + movhs yl, yl, lsr #4 + addhs ip, ip, #4 + + cmp yl, #(1 << 2) + addhi ip, ip, #3 + addls ip, ip, yl, lsr #1 + +#endif + + mov yh, xh, lsr ip + mov yl, xl, lsr ip + rsb ip, ip, #32 + ARM( orr yl, yl, xh, lsl ip ) + THUMB( lsl xh, xh, ip ) + THUMB( orr yl, yl, xh ) + mov xh, xl, lsl ip + mov xh, xh, lsr ip + ret lr + + @ eq -> division by 1: obvious enough... +9: moveq yl, xl + moveq yh, xh + moveq xh, #0 + reteq lr +UNWIND(.fnend) + +UNWIND(.fnstart) +UNWIND(.pad #4) +UNWIND(.save {lr}) +Ldiv0_64: + @ Division by 0: + str lr, [sp, #-8]! + bl __div0 + + @ as wrong as it could be... + mov yl, #0 + mov yh, #0 + mov xh, #0 + ldr pc, [sp], #8 + +UNWIND(.fnend) +ENDPROC(__do_div64) diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S new file mode 100644 index 000000000..7fd3600db --- /dev/null +++ b/arch/arm/lib/findbit.S @@ -0,0 +1,193 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/findbit.S + * + * Copyright (C) 1995-2000 Russell King + * + * 16th March 2001 - John Ripley <jripley@sonicblue.com> + * Fixed so that "size" is an exclusive not an inclusive quantity. + * All users of these functions expect exclusive sizes, and may + * also call with zero size. + * Reworked by rmk. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + +/* + * Purpose : Find a 'zero' bit + * Prototype: int find_first_zero_bit(void *addr, unsigned int maxbit); + */ +ENTRY(_find_first_zero_bit_le) + teq r1, #0 + beq 3f + mov r2, #0 +1: + ARM( ldrb r3, [r0, r2, lsr #3] ) + THUMB( lsr r3, r2, #3 ) + THUMB( ldrb r3, [r0, r3] ) + eors r3, r3, #0xff @ invert bits + bne .L_found @ any now set - found zero bit + add r2, r2, #8 @ next bit pointer +2: cmp r2, r1 @ any more? + blo 1b +3: mov r0, r1 @ no free bits + ret lr +ENDPROC(_find_first_zero_bit_le) + +/* + * Purpose : Find next 'zero' bit + * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset) + */ +ENTRY(_find_next_zero_bit_le) + cmp r2, r1 + bhs 3b + ands ip, r2, #7 + beq 1b @ If new byte, goto old routine + ARM( ldrb r3, [r0, r2, lsr #3] ) + THUMB( lsr r3, r2, #3 ) + THUMB( ldrb r3, [r0, r3] ) + eor r3, r3, #0xff @ now looking for a 1 bit + movs r3, r3, lsr ip @ shift off unused bits + bne .L_found + orr r2, r2, #7 @ if zero, then no bits here + add r2, r2, #1 @ align bit pointer + b 2b @ loop for next bit +ENDPROC(_find_next_zero_bit_le) + +/* + * Purpose : Find a 'one' bit + * Prototype: int find_first_bit(const unsigned long *addr, unsigned int maxbit); + */ +ENTRY(_find_first_bit_le) + teq r1, #0 + beq 3f + mov r2, #0 +1: + ARM( ldrb r3, [r0, r2, lsr #3] ) + THUMB( lsr r3, r2, #3 ) + THUMB( ldrb r3, [r0, r3] ) + movs r3, r3 + bne .L_found @ any now set - found zero bit + add r2, r2, #8 @ next bit pointer +2: cmp r2, r1 @ any more? + blo 1b +3: mov r0, r1 @ no free bits + ret lr +ENDPROC(_find_first_bit_le) + +/* + * Purpose : Find next 'one' bit + * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset) + */ +ENTRY(_find_next_bit_le) + cmp r2, r1 + bhs 3b + ands ip, r2, #7 + beq 1b @ If new byte, goto old routine + ARM( ldrb r3, [r0, r2, lsr #3] ) + THUMB( lsr r3, r2, #3 ) + THUMB( ldrb r3, [r0, r3] ) + movs r3, r3, lsr ip @ shift off unused bits + bne .L_found + orr r2, r2, #7 @ if zero, then no bits here + add r2, r2, #1 @ align bit pointer + b 2b @ loop for next bit +ENDPROC(_find_next_bit_le) + +#ifdef __ARMEB__ + +ENTRY(_find_first_zero_bit_be) + teq r1, #0 + beq 3f + mov r2, #0 +1: eor r3, r2, #0x18 @ big endian byte ordering + ARM( ldrb r3, [r0, r3, lsr #3] ) + THUMB( lsr r3, #3 ) + THUMB( ldrb r3, [r0, r3] ) + eors r3, r3, #0xff @ invert bits + bne .L_found @ any now set - found zero bit + add r2, r2, #8 @ next bit pointer +2: cmp r2, r1 @ any more? + blo 1b +3: mov r0, r1 @ no free bits + ret lr +ENDPROC(_find_first_zero_bit_be) + +ENTRY(_find_next_zero_bit_be) + cmp r2, r1 + bhs 3b + ands ip, r2, #7 + beq 1b @ If new byte, goto old routine + eor r3, r2, #0x18 @ big endian byte ordering + ARM( ldrb r3, [r0, r3, lsr #3] ) + THUMB( lsr r3, #3 ) + THUMB( ldrb r3, [r0, r3] ) + eor r3, r3, #0xff @ now looking for a 1 bit + movs r3, r3, lsr ip @ shift off unused bits + bne .L_found + orr r2, r2, #7 @ if zero, then no bits here + add r2, r2, #1 @ align bit pointer + b 2b @ loop for next bit +ENDPROC(_find_next_zero_bit_be) + +ENTRY(_find_first_bit_be) + teq r1, #0 + beq 3f + mov r2, #0 +1: eor r3, r2, #0x18 @ big endian byte ordering + ARM( ldrb r3, [r0, r3, lsr #3] ) + THUMB( lsr r3, #3 ) + THUMB( ldrb r3, [r0, r3] ) + movs r3, r3 + bne .L_found @ any now set - found zero bit + add r2, r2, #8 @ next bit pointer +2: cmp r2, r1 @ any more? + blo 1b +3: mov r0, r1 @ no free bits + ret lr +ENDPROC(_find_first_bit_be) + +ENTRY(_find_next_bit_be) + cmp r2, r1 + bhs 3b + ands ip, r2, #7 + beq 1b @ If new byte, goto old routine + eor r3, r2, #0x18 @ big endian byte ordering + ARM( ldrb r3, [r0, r3, lsr #3] ) + THUMB( lsr r3, #3 ) + THUMB( ldrb r3, [r0, r3] ) + movs r3, r3, lsr ip @ shift off unused bits + bne .L_found + orr r2, r2, #7 @ if zero, then no bits here + add r2, r2, #1 @ align bit pointer + b 2b @ loop for next bit +ENDPROC(_find_next_bit_be) + +#endif + +/* + * One or more bits in the LSB of r3 are assumed to be set. + */ +.L_found: +#if __LINUX_ARM_ARCH__ >= 5 + rsb r0, r3, #0 + and r3, r3, r0 + clz r3, r3 + rsb r3, r3, #31 + add r0, r2, r3 +#else + tst r3, #0x0f + addeq r2, r2, #4 + movne r3, r3, lsl #4 + tst r3, #0x30 + addeq r2, r2, #2 + movne r3, r3, lsl #2 + tst r3, #0x40 + addeq r2, r2, #1 + mov r0, r2 +#endif + cmp r1, r0 @ Clamp to maxbit + movlo r0, r1 + ret lr + diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S new file mode 100644 index 000000000..c5e420750 --- /dev/null +++ b/arch/arm/lib/getuser.S @@ -0,0 +1,167 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/getuser.S + * + * Copyright (C) 2001 Russell King + * + * Idea from x86 version, (C) Copyright 1998 Linus Torvalds + * + * These functions have a non-standard call interface to make them more + * efficient, especially as they return an error value in addition to + * the "real" return value. + * + * __get_user_X + * + * Inputs: r0 contains the address + * r1 contains the address limit, which must be preserved + * Outputs: r0 is the error code + * r2, r3 contains the zero-extended value + * lr corrupted + * + * No other registers must be altered. (see <asm/uaccess.h> + * for specific ASM register usage). + * + * Note that ADDR_LIMIT is either 0 or 0xc0000000. + * Note also that it is intended that __get_user_bad is not global. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/errno.h> +#include <asm/domain.h> + +ENTRY(__get_user_1) + check_uaccess r0, 1, r1, r2, __get_user_bad +1: TUSER(ldrb) r2, [r0] + mov r0, #0 + ret lr +ENDPROC(__get_user_1) +_ASM_NOKPROBE(__get_user_1) + +ENTRY(__get_user_2) + check_uaccess r0, 2, r1, r2, __get_user_bad +#if __LINUX_ARM_ARCH__ >= 6 + +2: TUSER(ldrh) r2, [r0] + +#else + +#ifdef CONFIG_CPU_USE_DOMAINS +rb .req ip +2: ldrbt r2, [r0], #1 +3: ldrbt rb, [r0], #0 +#else +rb .req r0 +2: ldrb r2, [r0] +3: ldrb rb, [r0, #1] +#endif +#ifndef __ARMEB__ + orr r2, r2, rb, lsl #8 +#else + orr r2, rb, r2, lsl #8 +#endif + +#endif /* __LINUX_ARM_ARCH__ >= 6 */ + + mov r0, #0 + ret lr +ENDPROC(__get_user_2) +_ASM_NOKPROBE(__get_user_2) + +ENTRY(__get_user_4) + check_uaccess r0, 4, r1, r2, __get_user_bad +4: TUSER(ldr) r2, [r0] + mov r0, #0 + ret lr +ENDPROC(__get_user_4) +_ASM_NOKPROBE(__get_user_4) + +ENTRY(__get_user_8) + check_uaccess r0, 8, r1, r2, __get_user_bad8 +#ifdef CONFIG_THUMB2_KERNEL +5: TUSER(ldr) r2, [r0] +6: TUSER(ldr) r3, [r0, #4] +#else +5: TUSER(ldr) r2, [r0], #4 +6: TUSER(ldr) r3, [r0] +#endif + mov r0, #0 + ret lr +ENDPROC(__get_user_8) +_ASM_NOKPROBE(__get_user_8) + +#ifdef __ARMEB__ +ENTRY(__get_user_32t_8) + check_uaccess r0, 8, r1, r2, __get_user_bad +#ifdef CONFIG_CPU_USE_DOMAINS + add r0, r0, #4 +7: ldrt r2, [r0] +#else +7: ldr r2, [r0, #4] +#endif + mov r0, #0 + ret lr +ENDPROC(__get_user_32t_8) +_ASM_NOKPROBE(__get_user_32t_8) + +ENTRY(__get_user_64t_1) + check_uaccess r0, 1, r1, r2, __get_user_bad8 +8: TUSER(ldrb) r3, [r0] + mov r0, #0 + ret lr +ENDPROC(__get_user_64t_1) +_ASM_NOKPROBE(__get_user_64t_1) + +ENTRY(__get_user_64t_2) + check_uaccess r0, 2, r1, r2, __get_user_bad8 +#ifdef CONFIG_CPU_USE_DOMAINS +rb .req ip +9: ldrbt r3, [r0], #1 +10: ldrbt rb, [r0], #0 +#else +rb .req r0 +9: ldrb r3, [r0] +10: ldrb rb, [r0, #1] +#endif + orr r3, rb, r3, lsl #8 + mov r0, #0 + ret lr +ENDPROC(__get_user_64t_2) +_ASM_NOKPROBE(__get_user_64t_2) + +ENTRY(__get_user_64t_4) + check_uaccess r0, 4, r1, r2, __get_user_bad8 +11: TUSER(ldr) r3, [r0] + mov r0, #0 + ret lr +ENDPROC(__get_user_64t_4) +_ASM_NOKPROBE(__get_user_64t_4) +#endif + +__get_user_bad8: + mov r3, #0 +__get_user_bad: + mov r2, #0 + mov r0, #-EFAULT + ret lr +ENDPROC(__get_user_bad) +ENDPROC(__get_user_bad8) +_ASM_NOKPROBE(__get_user_bad) +_ASM_NOKPROBE(__get_user_bad8) + +.pushsection __ex_table, "a" + .long 1b, __get_user_bad + .long 2b, __get_user_bad +#if __LINUX_ARM_ARCH__ < 6 + .long 3b, __get_user_bad +#endif + .long 4b, __get_user_bad + .long 5b, __get_user_bad8 + .long 6b, __get_user_bad8 +#ifdef __ARMEB__ + .long 7b, __get_user_bad + .long 8b, __get_user_bad8 + .long 9b, __get_user_bad8 + .long 10b, __get_user_bad8 + .long 11b, __get_user_bad8 +#endif +.popsection diff --git a/arch/arm/lib/io-readsb.S b/arch/arm/lib/io-readsb.S new file mode 100644 index 000000000..0def9388f --- /dev/null +++ b/arch/arm/lib/io-readsb.S @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/io-readsb.S + * + * Copyright (C) 1995-2000 Russell King + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + +.Linsb_align: rsb ip, ip, #4 + cmp ip, r2 + movgt ip, r2 + cmp ip, #2 + ldrb r3, [r0] + strb r3, [r1], #1 + ldrbge r3, [r0] + strbge r3, [r1], #1 + ldrbgt r3, [r0] + strbgt r3, [r1], #1 + subs r2, r2, ip + bne .Linsb_aligned + +ENTRY(__raw_readsb) + teq r2, #0 @ do we have to check for the zero len? + reteq lr + ands ip, r1, #3 + bne .Linsb_align + +.Linsb_aligned: stmfd sp!, {r4 - r6, lr} + + subs r2, r2, #16 + bmi .Linsb_no_16 + +.Linsb_16_lp: ldrb r3, [r0] + ldrb r4, [r0] + ldrb r5, [r0] + mov r3, r3, put_byte_0 + ldrb r6, [r0] + orr r3, r3, r4, put_byte_1 + ldrb r4, [r0] + orr r3, r3, r5, put_byte_2 + ldrb r5, [r0] + orr r3, r3, r6, put_byte_3 + ldrb r6, [r0] + mov r4, r4, put_byte_0 + ldrb ip, [r0] + orr r4, r4, r5, put_byte_1 + ldrb r5, [r0] + orr r4, r4, r6, put_byte_2 + ldrb r6, [r0] + orr r4, r4, ip, put_byte_3 + ldrb ip, [r0] + mov r5, r5, put_byte_0 + ldrb lr, [r0] + orr r5, r5, r6, put_byte_1 + ldrb r6, [r0] + orr r5, r5, ip, put_byte_2 + ldrb ip, [r0] + orr r5, r5, lr, put_byte_3 + ldrb lr, [r0] + mov r6, r6, put_byte_0 + orr r6, r6, ip, put_byte_1 + ldrb ip, [r0] + orr r6, r6, lr, put_byte_2 + orr r6, r6, ip, put_byte_3 + stmia r1!, {r3 - r6} + + subs r2, r2, #16 + bpl .Linsb_16_lp + + tst r2, #15 + ldmfdeq sp!, {r4 - r6, pc} + +.Linsb_no_16: tst r2, #8 + beq .Linsb_no_8 + + ldrb r3, [r0] + ldrb r4, [r0] + ldrb r5, [r0] + mov r3, r3, put_byte_0 + ldrb r6, [r0] + orr r3, r3, r4, put_byte_1 + ldrb r4, [r0] + orr r3, r3, r5, put_byte_2 + ldrb r5, [r0] + orr r3, r3, r6, put_byte_3 + ldrb r6, [r0] + mov r4, r4, put_byte_0 + ldrb ip, [r0] + orr r4, r4, r5, put_byte_1 + orr r4, r4, r6, put_byte_2 + orr r4, r4, ip, put_byte_3 + stmia r1!, {r3, r4} + +.Linsb_no_8: tst r2, #4 + beq .Linsb_no_4 + + ldrb r3, [r0] + ldrb r4, [r0] + ldrb r5, [r0] + ldrb r6, [r0] + mov r3, r3, put_byte_0 + orr r3, r3, r4, put_byte_1 + orr r3, r3, r5, put_byte_2 + orr r3, r3, r6, put_byte_3 + str r3, [r1], #4 + +.Linsb_no_4: ands r2, r2, #3 + ldmfdeq sp!, {r4 - r6, pc} + + cmp r2, #2 + ldrb r3, [r0] + strb r3, [r1], #1 + ldrbge r3, [r0] + strbge r3, [r1], #1 + ldrbgt r3, [r0] + strbgt r3, [r1] + + ldmfd sp!, {r4 - r6, pc} +ENDPROC(__raw_readsb) diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S new file mode 100644 index 000000000..d9f6b372b --- /dev/null +++ b/arch/arm/lib/io-readsl.S @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/io-readsl.S + * + * Copyright (C) 1995-2000 Russell King + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + +ENTRY(__raw_readsl) + teq r2, #0 @ do we have to check for the zero len? + reteq lr + ands ip, r1, #3 + bne 3f + + subs r2, r2, #4 + bmi 2f + stmfd sp!, {r4, lr} +1: ldr r3, [r0, #0] + ldr r4, [r0, #0] + ldr ip, [r0, #0] + ldr lr, [r0, #0] + subs r2, r2, #4 + stmia r1!, {r3, r4, ip, lr} + bpl 1b + ldmfd sp!, {r4, lr} +2: movs r2, r2, lsl #31 + ldrcs r3, [r0, #0] + ldrcs ip, [r0, #0] + stmiacs r1!, {r3, ip} + ldrne r3, [r0, #0] + strne r3, [r1, #0] + ret lr + +3: ldr r3, [r0] + cmp ip, #2 + mov ip, r3, get_byte_0 + strb ip, [r1], #1 + bgt 6f + mov ip, r3, get_byte_1 + strb ip, [r1], #1 + beq 5f + mov ip, r3, get_byte_2 + strb ip, [r1], #1 + +4: subs r2, r2, #1 + mov ip, r3, lspull #24 + ldrne r3, [r0] + orrne ip, ip, r3, lspush #8 + strne ip, [r1], #4 + bne 4b + b 8f + +5: subs r2, r2, #1 + mov ip, r3, lspull #16 + ldrne r3, [r0] + orrne ip, ip, r3, lspush #16 + strne ip, [r1], #4 + bne 5b + b 7f + +6: subs r2, r2, #1 + mov ip, r3, lspull #8 + ldrne r3, [r0] + orrne ip, ip, r3, lspush #24 + strne ip, [r1], #4 + bne 6b + + mov r3, ip, get_byte_2 + strb r3, [r1, #2] +7: mov r3, ip, get_byte_1 + strb r3, [r1, #1] +8: mov r3, ip, get_byte_0 + strb r3, [r1, #0] + ret lr +ENDPROC(__raw_readsl) diff --git a/arch/arm/lib/io-readsw-armv3.S b/arch/arm/lib/io-readsw-armv3.S new file mode 100644 index 000000000..266043610 --- /dev/null +++ b/arch/arm/lib/io-readsw-armv3.S @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/io-readsw-armv3.S + * + * Copyright (C) 1995-2000 Russell King + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + +.Linsw_bad_alignment: + adr r0, .Linsw_bad_align_msg + mov r2, lr + b panic +.Linsw_bad_align_msg: + .asciz "insw: bad buffer alignment (0x%p, lr=0x%08lX)\n" + .align + +.Linsw_align: tst r1, #1 + bne .Linsw_bad_alignment + + ldr r3, [r0] + strb r3, [r1], #1 + mov r3, r3, lsr #8 + strb r3, [r1], #1 + + subs r2, r2, #1 + reteq lr + +ENTRY(__raw_readsw) + teq r2, #0 @ do we have to check for the zero len? + reteq lr + tst r1, #3 + bne .Linsw_align + +.Linsw_aligned: mov ip, #0xff + orr ip, ip, ip, lsl #8 + stmfd sp!, {r4, r5, r6, lr} + + subs r2, r2, #8 + bmi .Lno_insw_8 + +.Linsw_8_lp: ldr r3, [r0] + and r3, r3, ip + ldr r4, [r0] + orr r3, r3, r4, lsl #16 + + ldr r4, [r0] + and r4, r4, ip + ldr r5, [r0] + orr r4, r4, r5, lsl #16 + + ldr r5, [r0] + and r5, r5, ip + ldr r6, [r0] + orr r5, r5, r6, lsl #16 + + ldr r6, [r0] + and r6, r6, ip + ldr lr, [r0] + orr r6, r6, lr, lsl #16 + + stmia r1!, {r3 - r6} + + subs r2, r2, #8 + bpl .Linsw_8_lp + + tst r2, #7 + ldmfdeq sp!, {r4, r5, r6, pc} + +.Lno_insw_8: tst r2, #4 + beq .Lno_insw_4 + + ldr r3, [r0] + and r3, r3, ip + ldr r4, [r0] + orr r3, r3, r4, lsl #16 + + ldr r4, [r0] + and r4, r4, ip + ldr r5, [r0] + orr r4, r4, r5, lsl #16 + + stmia r1!, {r3, r4} + +.Lno_insw_4: tst r2, #2 + beq .Lno_insw_2 + + ldr r3, [r0] + and r3, r3, ip + ldr r4, [r0] + orr r3, r3, r4, lsl #16 + + str r3, [r1], #4 + +.Lno_insw_2: tst r2, #1 + ldrne r3, [r0] + strbne r3, [r1], #1 + movne r3, r3, lsr #8 + strbne r3, [r1] + + ldmfd sp!, {r4, r5, r6, pc} + + diff --git a/arch/arm/lib/io-readsw-armv4.S b/arch/arm/lib/io-readsw-armv4.S new file mode 100644 index 000000000..228c176a9 --- /dev/null +++ b/arch/arm/lib/io-readsw-armv4.S @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/io-readsw-armv4.S + * + * Copyright (C) 1995-2000 Russell King + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .macro pack, rd, hw1, hw2 +#ifndef __ARMEB__ + orr \rd, \hw1, \hw2, lsl #16 +#else + orr \rd, \hw2, \hw1, lsl #16 +#endif + .endm + +.Linsw_align: movs ip, r1, lsl #31 + bne .Linsw_noalign + ldrh ip, [r0] + sub r2, r2, #1 + strh ip, [r1], #2 + +ENTRY(__raw_readsw) + teq r2, #0 + reteq lr + tst r1, #3 + bne .Linsw_align + + stmfd sp!, {r4, r5, lr} + + subs r2, r2, #8 + bmi .Lno_insw_8 + +.Linsw_8_lp: ldrh r3, [r0] + ldrh r4, [r0] + pack r3, r3, r4 + + ldrh r4, [r0] + ldrh r5, [r0] + pack r4, r4, r5 + + ldrh r5, [r0] + ldrh ip, [r0] + pack r5, r5, ip + + ldrh ip, [r0] + ldrh lr, [r0] + pack ip, ip, lr + + subs r2, r2, #8 + stmia r1!, {r3 - r5, ip} + bpl .Linsw_8_lp + +.Lno_insw_8: tst r2, #4 + beq .Lno_insw_4 + + ldrh r3, [r0] + ldrh r4, [r0] + pack r3, r3, r4 + + ldrh r4, [r0] + ldrh ip, [r0] + pack r4, r4, ip + + stmia r1!, {r3, r4} + +.Lno_insw_4: movs r2, r2, lsl #31 + bcc .Lno_insw_2 + + ldrh r3, [r0] + ldrh ip, [r0] + pack r3, r3, ip + str r3, [r1], #4 + +.Lno_insw_2: ldrhne r3, [r0] + strhne r3, [r1] + + ldmfd sp!, {r4, r5, pc} + +#ifdef __ARMEB__ +#define _BE_ONLY_(code...) code +#define _LE_ONLY_(code...) +#define push_hbyte0 lsr #8 +#define pull_hbyte1 lsl #24 +#else +#define _BE_ONLY_(code...) +#define _LE_ONLY_(code...) code +#define push_hbyte0 lsl #24 +#define pull_hbyte1 lsr #8 +#endif + +.Linsw_noalign: stmfd sp!, {r4, lr} + ldrbcc ip, [r1, #-1]! + bcc 1f + + ldrh ip, [r0] + sub r2, r2, #1 + _BE_ONLY_( mov ip, ip, ror #8 ) + strb ip, [r1], #1 + _LE_ONLY_( mov ip, ip, lsr #8 ) + _BE_ONLY_( mov ip, ip, lsr #24 ) + +1: subs r2, r2, #2 + bmi 3f + _BE_ONLY_( mov ip, ip, lsl #24 ) + +2: ldrh r3, [r0] + ldrh r4, [r0] + subs r2, r2, #2 + orr ip, ip, r3, lsl #8 + orr ip, ip, r4, push_hbyte0 + str ip, [r1], #4 + mov ip, r4, pull_hbyte1 + bpl 2b + + _BE_ONLY_( mov ip, ip, lsr #24 ) + +3: tst r2, #1 + strb ip, [r1], #1 + ldrhne ip, [r0] + _BE_ONLY_( movne ip, ip, ror #8 ) + strbne ip, [r1], #1 + _LE_ONLY_( movne ip, ip, lsr #8 ) + _BE_ONLY_( movne ip, ip, lsr #24 ) + strbne ip, [r1] + ldmfd sp!, {r4, pc} +ENDPROC(__raw_readsw) diff --git a/arch/arm/lib/io-writesb.S b/arch/arm/lib/io-writesb.S new file mode 100644 index 000000000..e2ae312f0 --- /dev/null +++ b/arch/arm/lib/io-writesb.S @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/io-writesb.S + * + * Copyright (C) 1995-2000 Russell King + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .macro outword, rd +#ifndef __ARMEB__ + strb \rd, [r0] + mov \rd, \rd, lsr #8 + strb \rd, [r0] + mov \rd, \rd, lsr #8 + strb \rd, [r0] + mov \rd, \rd, lsr #8 + strb \rd, [r0] +#else + mov lr, \rd, lsr #24 + strb lr, [r0] + mov lr, \rd, lsr #16 + strb lr, [r0] + mov lr, \rd, lsr #8 + strb lr, [r0] + strb \rd, [r0] +#endif + .endm + +.Loutsb_align: rsb ip, ip, #4 + cmp ip, r2 + movgt ip, r2 + cmp ip, #2 + ldrb r3, [r1], #1 + strb r3, [r0] + ldrbge r3, [r1], #1 + strbge r3, [r0] + ldrbgt r3, [r1], #1 + strbgt r3, [r0] + subs r2, r2, ip + bne .Loutsb_aligned + +ENTRY(__raw_writesb) + teq r2, #0 @ do we have to check for the zero len? + reteq lr + ands ip, r1, #3 + bne .Loutsb_align + +.Loutsb_aligned: + stmfd sp!, {r4, r5, lr} + + subs r2, r2, #16 + bmi .Loutsb_no_16 + +.Loutsb_16_lp: ldmia r1!, {r3, r4, r5, ip} + outword r3 + outword r4 + outword r5 + outword ip + subs r2, r2, #16 + bpl .Loutsb_16_lp + + tst r2, #15 + ldmfdeq sp!, {r4, r5, pc} + +.Loutsb_no_16: tst r2, #8 + beq .Loutsb_no_8 + + ldmia r1!, {r3, r4} + outword r3 + outword r4 + +.Loutsb_no_8: tst r2, #4 + beq .Loutsb_no_4 + + ldr r3, [r1], #4 + outword r3 + +.Loutsb_no_4: ands r2, r2, #3 + ldmfdeq sp!, {r4, r5, pc} + + cmp r2, #2 + ldrb r3, [r1], #1 + strb r3, [r0] + ldrbge r3, [r1], #1 + strbge r3, [r0] + ldrbgt r3, [r1] + strbgt r3, [r0] + + ldmfd sp!, {r4, r5, pc} +ENDPROC(__raw_writesb) diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S new file mode 100644 index 000000000..89ef7be61 --- /dev/null +++ b/arch/arm/lib/io-writesl.S @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/io-writesl.S + * + * Copyright (C) 1995-2000 Russell King + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + +ENTRY(__raw_writesl) + teq r2, #0 @ do we have to check for the zero len? + reteq lr + ands ip, r1, #3 + bne 3f + + subs r2, r2, #4 + bmi 2f + stmfd sp!, {r4, lr} +1: ldmia r1!, {r3, r4, ip, lr} + subs r2, r2, #4 + str r3, [r0, #0] + str r4, [r0, #0] + str ip, [r0, #0] + str lr, [r0, #0] + bpl 1b + ldmfd sp!, {r4, lr} +2: movs r2, r2, lsl #31 + ldmiacs r1!, {r3, ip} + strcs r3, [r0, #0] + ldrne r3, [r1, #0] + strcs ip, [r0, #0] + strne r3, [r0, #0] + ret lr + +3: bic r1, r1, #3 + ldr r3, [r1], #4 + cmp ip, #2 + blt 5f + bgt 6f + +4: mov ip, r3, lspull #16 + ldr r3, [r1], #4 + subs r2, r2, #1 + orr ip, ip, r3, lspush #16 + str ip, [r0] + bne 4b + ret lr + +5: mov ip, r3, lspull #8 + ldr r3, [r1], #4 + subs r2, r2, #1 + orr ip, ip, r3, lspush #24 + str ip, [r0] + bne 5b + ret lr + +6: mov ip, r3, lspull #24 + ldr r3, [r1], #4 + subs r2, r2, #1 + orr ip, ip, r3, lspush #8 + str ip, [r0] + bne 6b + ret lr +ENDPROC(__raw_writesl) diff --git a/arch/arm/lib/io-writesw-armv3.S b/arch/arm/lib/io-writesw-armv3.S new file mode 100644 index 000000000..4cabbee7f --- /dev/null +++ b/arch/arm/lib/io-writesw-armv3.S @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/io-writesw-armv3.S + * + * Copyright (C) 1995-2000 Russell King + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + +.Loutsw_bad_alignment: + adr r0, .Loutsw_bad_align_msg + mov r2, lr + b panic +.Loutsw_bad_align_msg: + .asciz "outsw: bad buffer alignment (0x%p, lr=0x%08lX)\n" + .align + +.Loutsw_align: tst r1, #1 + bne .Loutsw_bad_alignment + + add r1, r1, #2 + + ldr r3, [r1, #-4] + mov r3, r3, lsr #16 + orr r3, r3, r3, lsl #16 + str r3, [r0] + subs r2, r2, #1 + reteq lr + +ENTRY(__raw_writesw) + teq r2, #0 @ do we have to check for the zero len? + reteq lr + tst r1, #3 + bne .Loutsw_align + + stmfd sp!, {r4, r5, r6, lr} + + subs r2, r2, #8 + bmi .Lno_outsw_8 + +.Loutsw_8_lp: ldmia r1!, {r3, r4, r5, r6} + + mov ip, r3, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r3, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r4, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r4, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r5, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r5, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r6, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r6, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + subs r2, r2, #8 + bpl .Loutsw_8_lp + + tst r2, #7 + ldmfdeq sp!, {r4, r5, r6, pc} + +.Lno_outsw_8: tst r2, #4 + beq .Lno_outsw_4 + + ldmia r1!, {r3, r4} + + mov ip, r3, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r3, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r4, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r4, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + +.Lno_outsw_4: tst r2, #2 + beq .Lno_outsw_2 + + ldr r3, [r1], #4 + + mov ip, r3, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r3, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + +.Lno_outsw_2: tst r2, #1 + + ldrne r3, [r1] + + movne ip, r3, lsl #16 + orrne ip, ip, ip, lsr #16 + strne ip, [r0] + + ldmfd sp!, {r4, r5, r6, pc} diff --git a/arch/arm/lib/io-writesw-armv4.S b/arch/arm/lib/io-writesw-armv4.S new file mode 100644 index 000000000..12eec5326 --- /dev/null +++ b/arch/arm/lib/io-writesw-armv4.S @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/io-writesw-armv4.S + * + * Copyright (C) 1995-2000 Russell King + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .macro outword, rd +#ifndef __ARMEB__ + strh \rd, [r0] + mov \rd, \rd, lsr #16 + strh \rd, [r0] +#else + mov lr, \rd, lsr #16 + strh lr, [r0] + strh \rd, [r0] +#endif + .endm + +.Loutsw_align: movs ip, r1, lsl #31 + bne .Loutsw_noalign + + ldrh r3, [r1], #2 + sub r2, r2, #1 + strh r3, [r0] + +ENTRY(__raw_writesw) + teq r2, #0 + reteq lr + ands r3, r1, #3 + bne .Loutsw_align + + stmfd sp!, {r4, r5, lr} + + subs r2, r2, #8 + bmi .Lno_outsw_8 + +.Loutsw_8_lp: ldmia r1!, {r3, r4, r5, ip} + subs r2, r2, #8 + outword r3 + outword r4 + outword r5 + outword ip + bpl .Loutsw_8_lp + +.Lno_outsw_8: tst r2, #4 + beq .Lno_outsw_4 + + ldmia r1!, {r3, ip} + outword r3 + outword ip + +.Lno_outsw_4: movs r2, r2, lsl #31 + bcc .Lno_outsw_2 + + ldr r3, [r1], #4 + outword r3 + +.Lno_outsw_2: ldrhne r3, [r1] + strhne r3, [r0] + + ldmfd sp!, {r4, r5, pc} + +#ifdef __ARMEB__ +#define pull_hbyte0 lsl #8 +#define push_hbyte1 lsr #24 +#else +#define pull_hbyte0 lsr #24 +#define push_hbyte1 lsl #8 +#endif + +.Loutsw_noalign: + ARM( ldr r3, [r1, -r3]! ) + THUMB( rsb r3, r3, #0 ) + THUMB( ldr r3, [r1, r3] ) + THUMB( sub r1, r3 ) + subcs r2, r2, #1 + bcs 2f + subs r2, r2, #2 + bmi 3f + +1: mov ip, r3, lsr #8 + strh ip, [r0] +2: mov ip, r3, pull_hbyte0 + ldr r3, [r1, #4]! + subs r2, r2, #2 + orr ip, ip, r3, push_hbyte1 + strh ip, [r0] + bpl 1b + + tst r2, #1 +3: movne ip, r3, lsr #8 + strhne ip, [r0] + ret lr +ENDPROC(__raw_writesw) diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S new file mode 100644 index 000000000..c23f9d9e2 --- /dev/null +++ b/arch/arm/lib/lib1funcs.S @@ -0,0 +1,371 @@ +/* + * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines + * + * Author: Nicolas Pitre <nico@fluxnic.net> + * - contributed to gcc-3.4 on Sep 30, 2003 + * - adapted for the Linux kernel on Oct 2, 2003 + */ + +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + + +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/unwind.h> + +.macro ARM_DIV_BODY dividend, divisor, result, curbit + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \curbit, \divisor + clz \result, \dividend + sub \result, \curbit, \result + mov \curbit, #1 + mov \divisor, \divisor, lsl \result + mov \curbit, \curbit, lsl \result + mov \result, #0 + +#else + + @ Initially shift the divisor left 3 bits if possible, + @ set curbit accordingly. This allows for curbit to be located + @ at the left end of each 4 bit nibbles in the division loop + @ to save one loop in most cases. + tst \divisor, #0xe0000000 + moveq \divisor, \divisor, lsl #3 + moveq \curbit, #8 + movne \curbit, #1 + + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + movlo \curbit, \curbit, lsl #4 + blo 1b + + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + movlo \curbit, \curbit, lsl #1 + blo 1b + + mov \result, #0 + +#endif + + @ Division loop +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + orrhs \result, \result, \curbit + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + orrhs \result, \result, \curbit, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + orrhs \result, \result, \curbit, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + orrhs \result, \result, \curbit, lsr #3 + cmp \dividend, #0 @ Early termination? + movsne \curbit, \curbit, lsr #4 @ No, any more bits to do? + movne \divisor, \divisor, lsr #4 + bne 1b + +.endm + + +.macro ARM_DIV2_ORDER divisor, order + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \order, \divisor + rsb \order, \order, #31 + +#else + + cmp \divisor, #(1 << 16) + movhs \divisor, \divisor, lsr #16 + movhs \order, #16 + movlo \order, #0 + + cmp \divisor, #(1 << 8) + movhs \divisor, \divisor, lsr #8 + addhs \order, \order, #8 + + cmp \divisor, #(1 << 4) + movhs \divisor, \divisor, lsr #4 + addhs \order, \order, #4 + + cmp \divisor, #(1 << 2) + addhi \order, \order, #3 + addls \order, \order, \divisor, lsr #1 + +#endif + +.endm + + +.macro ARM_MOD_BODY dividend, divisor, order, spare + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \order, \divisor + clz \spare, \dividend + sub \order, \order, \spare + mov \divisor, \divisor, lsl \order + +#else + + mov \order, #0 + + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + addlo \order, \order, #4 + blo 1b + + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + addlo \order, \order, #1 + blo 1b + +#endif + + @ Perform all needed subtractions to keep only the reminder. + @ Do comparisons in batch of 4 first. + subs \order, \order, #3 @ yes, 3 is intended here + blt 2f + +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + cmp \dividend, #1 + mov \divisor, \divisor, lsr #4 + subsge \order, \order, #4 + bge 1b + + tst \order, #3 + teqne \dividend, #0 + beq 5f + + @ Either 1, 2 or 3 comparison/subtractions are left. +2: cmn \order, #2 + blt 4f + beq 3f + cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +3: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +4: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor +5: +.endm + + +#ifdef CONFIG_ARM_PATCH_IDIV + .align 3 +#endif + +ENTRY(__udivsi3) +ENTRY(__aeabi_uidiv) +UNWIND(.fnstart) + + subs r2, r1, #1 + reteq lr + bcc Ldiv0 + cmp r0, r1 + bls 11f + tst r1, r2 + beq 12f + + ARM_DIV_BODY r0, r1, r2, r3 + + mov r0, r2 + ret lr + +11: moveq r0, #1 + movne r0, #0 + ret lr + +12: ARM_DIV2_ORDER r1, r2 + + mov r0, r0, lsr r2 + ret lr + +UNWIND(.fnend) +ENDPROC(__udivsi3) +ENDPROC(__aeabi_uidiv) + +ENTRY(__umodsi3) +UNWIND(.fnstart) + + subs r2, r1, #1 @ compare divisor with 1 + bcc Ldiv0 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + retls lr + + ARM_MOD_BODY r0, r1, r2, r3 + + ret lr + +UNWIND(.fnend) +ENDPROC(__umodsi3) + +#ifdef CONFIG_ARM_PATCH_IDIV + .align 3 +#endif + +ENTRY(__divsi3) +ENTRY(__aeabi_idiv) +UNWIND(.fnstart) + + cmp r1, #0 + eor ip, r0, r1 @ save the sign of the result. + beq Ldiv0 + rsbmi r1, r1, #0 @ loops below use unsigned. + subs r2, r1, #1 @ division by 1 or -1 ? + beq 10f + movs r3, r0 + rsbmi r3, r0, #0 @ positive dividend value + cmp r3, r1 + bls 11f + tst r1, r2 @ divisor is power of 2 ? + beq 12f + + ARM_DIV_BODY r3, r1, r0, r2 + + cmp ip, #0 + rsbmi r0, r0, #0 + ret lr + +10: teq ip, r0 @ same sign ? + rsbmi r0, r0, #0 + ret lr + +11: movlo r0, #0 + moveq r0, ip, asr #31 + orreq r0, r0, #1 + ret lr + +12: ARM_DIV2_ORDER r1, r2 + + cmp ip, #0 + mov r0, r3, lsr r2 + rsbmi r0, r0, #0 + ret lr + +UNWIND(.fnend) +ENDPROC(__divsi3) +ENDPROC(__aeabi_idiv) + +ENTRY(__modsi3) +UNWIND(.fnstart) + + cmp r1, #0 + beq Ldiv0 + rsbmi r1, r1, #0 @ loops below use unsigned. + movs ip, r0 @ preserve sign of dividend + rsbmi r0, r0, #0 @ if negative make positive + subs r2, r1, #1 @ compare divisor with 1 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + bls 10f + + ARM_MOD_BODY r0, r1, r2, r3 + +10: cmp ip, #0 + rsbmi r0, r0, #0 + ret lr + +UNWIND(.fnend) +ENDPROC(__modsi3) + +#ifdef CONFIG_AEABI + +ENTRY(__aeabi_uidivmod) +UNWIND(.fnstart) +UNWIND(.save {r0, r1, ip, lr} ) + + stmfd sp!, {r0, r1, ip, lr} + bl __aeabi_uidiv + ldmfd sp!, {r1, r2, ip, lr} + mul r3, r0, r2 + sub r1, r1, r3 + ret lr + +UNWIND(.fnend) +ENDPROC(__aeabi_uidivmod) + +ENTRY(__aeabi_idivmod) +UNWIND(.fnstart) +UNWIND(.save {r0, r1, ip, lr} ) + stmfd sp!, {r0, r1, ip, lr} + bl __aeabi_idiv + ldmfd sp!, {r1, r2, ip, lr} + mul r3, r0, r2 + sub r1, r1, r3 + ret lr + +UNWIND(.fnend) +ENDPROC(__aeabi_idivmod) + +#endif + +Ldiv0: +UNWIND(.fnstart) +UNWIND(.pad #4) +UNWIND(.save {lr}) + str lr, [sp, #-8]! + bl __div0 + mov r0, #0 @ About as wrong as it could be. + ldr pc, [sp], #8 +UNWIND(.fnend) +ENDPROC(Ldiv0) diff --git a/arch/arm/lib/lshrdi3.S b/arch/arm/lib/lshrdi3.S new file mode 100644 index 000000000..922dcd88b --- /dev/null +++ b/arch/arm/lib/lshrdi3.S @@ -0,0 +1,54 @@ +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + + +#include <linux/linkage.h> +#include <asm/assembler.h> + +#ifdef __ARMEB__ +#define al r1 +#define ah r0 +#else +#define al r0 +#define ah r1 +#endif + +ENTRY(__lshrdi3) +ENTRY(__aeabi_llsr) + + subs r3, r2, #32 + rsb ip, r2, #32 + movmi al, al, lsr r2 + movpl al, ah, lsr r3 + ARM( orrmi al, al, ah, lsl ip ) + THUMB( lslmi r3, ah, ip ) + THUMB( orrmi al, al, r3 ) + mov ah, ah, lsr r2 + ret lr + +ENDPROC(__lshrdi3) +ENDPROC(__aeabi_llsr) diff --git a/arch/arm/lib/memchr.S b/arch/arm/lib/memchr.S new file mode 100644 index 000000000..95bedafd0 --- /dev/null +++ b/arch/arm/lib/memchr.S @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/memchr.S + * + * Copyright (C) 1995-2000 Russell King + * + * ASM optimised string functions + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .align 5 +ENTRY(memchr) +1: subs r2, r2, #1 + bmi 2f + ldrb r3, [r0], #1 + teq r3, r1 + bne 1b + sub r0, r0, #1 +2: movne r0, #0 + ret lr +ENDPROC(memchr) diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S new file mode 100644 index 000000000..90f2b645a --- /dev/null +++ b/arch/arm/lib/memcpy.S @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/memcpy.S + * + * Author: Nicolas Pitre + * Created: Sep 28, 2005 + * Copyright: MontaVista Software, Inc. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/unwind.h> + +#define LDR1W_SHIFT 0 +#define STR1W_SHIFT 0 + + .macro ldr1w ptr reg abort + W(ldr) \reg, [\ptr], #4 + .endm + + .macro ldr4w ptr reg1 reg2 reg3 reg4 abort + ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4} + .endm + + .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} + .endm + + .macro ldr1b ptr reg cond=al abort + ldrb\cond \reg, [\ptr], #1 + .endm + + .macro str1w ptr reg abort + W(str) \reg, [\ptr], #4 + .endm + + .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} + .endm + + .macro str1b ptr reg cond=al abort + strb\cond \reg, [\ptr], #1 + .endm + + .macro enter regs:vararg +UNWIND( .save {r0, \regs} ) + stmdb sp!, {r0, \regs} + .endm + + .macro exit regs:vararg + ldmfd sp!, {r0, \regs} + .endm + + .text + +/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ + +ENTRY(__memcpy) +ENTRY(mmiocpy) +WEAK(memcpy) + +#include "copy_template.S" + +ENDPROC(memcpy) +ENDPROC(mmiocpy) +ENDPROC(__memcpy) diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S new file mode 100644 index 000000000..641055403 --- /dev/null +++ b/arch/arm/lib/memmove.S @@ -0,0 +1,206 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/memmove.S + * + * Author: Nicolas Pitre + * Created: Sep 28, 2005 + * Copyright: (C) MontaVista Software Inc. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/unwind.h> + + .text + +/* + * Prototype: void *memmove(void *dest, const void *src, size_t n); + * + * Note: + * + * If the memory regions don't overlap, we simply branch to memcpy which is + * normally a bit faster. Otherwise the copy is done going downwards. This + * is a transposition of the code from copy_template.S but with the copy + * occurring in the opposite direction. + */ + +ENTRY(__memmove) +WEAK(memmove) + UNWIND( .fnstart ) + + subs ip, r0, r1 + cmphi r2, ip + bls __memcpy + UNWIND( .fnend ) + + UNWIND( .fnstart ) + UNWIND( .save {r0, r4, fpreg, lr} ) + stmfd sp!, {r0, r4, UNWIND(fpreg,) lr} + UNWIND( .setfp fpreg, sp ) + UNWIND( mov fpreg, sp ) + add r1, r1, r2 + add r0, r0, r2 + subs r2, r2, #4 + blt 8f + ands ip, r0, #3 + PLD( pld [r1, #-4] ) + bne 9f + ands ip, r1, #3 + bne 10f + +1: subs r2, r2, #(28) + stmfd sp!, {r5, r6, r8, r9} + blt 5f + + CALGN( ands ip, r0, #31 ) + CALGN( sbcsne r4, ip, r2 ) @ C is always set here + CALGN( bcs 2f ) + CALGN( adr r4, 6f ) + CALGN( subs r2, r2, ip ) @ C is set here + CALGN( rsb ip, ip, #32 ) + CALGN( add pc, r4, ip ) + + PLD( pld [r1, #-4] ) +2: PLD( subs r2, r2, #96 ) + PLD( pld [r1, #-32] ) + PLD( blt 4f ) + PLD( pld [r1, #-64] ) + PLD( pld [r1, #-96] ) + +3: PLD( pld [r1, #-128] ) +4: ldmdb r1!, {r3, r4, r5, r6, r8, r9, ip, lr} + subs r2, r2, #32 + stmdb r0!, {r3, r4, r5, r6, r8, r9, ip, lr} + bge 3b + PLD( cmn r2, #96 ) + PLD( bge 4b ) + +5: ands ip, r2, #28 + rsb ip, ip, #32 + addne pc, pc, ip @ C is always clear here + b 7f +6: W(nop) + W(ldr) r3, [r1, #-4]! + W(ldr) r4, [r1, #-4]! + W(ldr) r5, [r1, #-4]! + W(ldr) r6, [r1, #-4]! + W(ldr) r8, [r1, #-4]! + W(ldr) r9, [r1, #-4]! + W(ldr) lr, [r1, #-4]! + + add pc, pc, ip + nop + W(nop) + W(str) r3, [r0, #-4]! + W(str) r4, [r0, #-4]! + W(str) r5, [r0, #-4]! + W(str) r6, [r0, #-4]! + W(str) r8, [r0, #-4]! + W(str) r9, [r0, #-4]! + W(str) lr, [r0, #-4]! + + CALGN( bcs 2b ) + +7: ldmfd sp!, {r5, r6, r8, r9} + +8: movs r2, r2, lsl #31 + ldrbne r3, [r1, #-1]! + ldrbcs r4, [r1, #-1]! + ldrbcs ip, [r1, #-1] + strbne r3, [r0, #-1]! + strbcs r4, [r0, #-1]! + strbcs ip, [r0, #-1] + ldmfd sp!, {r0, r4, UNWIND(fpreg,) pc} + +9: cmp ip, #2 + ldrbgt r3, [r1, #-1]! + ldrbge r4, [r1, #-1]! + ldrb lr, [r1, #-1]! + strbgt r3, [r0, #-1]! + strbge r4, [r0, #-1]! + subs r2, r2, ip + strb lr, [r0, #-1]! + blt 8b + ands ip, r1, #3 + beq 1b + +10: bic r1, r1, #3 + cmp ip, #2 + ldr r3, [r1, #0] + beq 17f + blt 18f + + + .macro backward_copy_shift push pull + + subs r2, r2, #28 + blt 14f + + CALGN( ands ip, r0, #31 ) + CALGN( sbcsne r4, ip, r2 ) @ C is always set here + CALGN( subcc r2, r2, ip ) + CALGN( bcc 15f ) + +11: stmfd sp!, {r5, r6, r8 - r10} + + PLD( pld [r1, #-4] ) + PLD( subs r2, r2, #96 ) + PLD( pld [r1, #-32] ) + PLD( blt 13f ) + PLD( pld [r1, #-64] ) + PLD( pld [r1, #-96] ) + +12: PLD( pld [r1, #-128] ) +13: ldmdb r1!, {r8, r9, r10, ip} + mov lr, r3, lspush #\push + subs r2, r2, #32 + ldmdb r1!, {r3, r4, r5, r6} + orr lr, lr, ip, lspull #\pull + mov ip, ip, lspush #\push + orr ip, ip, r10, lspull #\pull + mov r10, r10, lspush #\push + orr r10, r10, r9, lspull #\pull + mov r9, r9, lspush #\push + orr r9, r9, r8, lspull #\pull + mov r8, r8, lspush #\push + orr r8, r8, r6, lspull #\pull + mov r6, r6, lspush #\push + orr r6, r6, r5, lspull #\pull + mov r5, r5, lspush #\push + orr r5, r5, r4, lspull #\pull + mov r4, r4, lspush #\push + orr r4, r4, r3, lspull #\pull + stmdb r0!, {r4 - r6, r8 - r10, ip, lr} + bge 12b + PLD( cmn r2, #96 ) + PLD( bge 13b ) + + ldmfd sp!, {r5, r6, r8 - r10} + +14: ands ip, r2, #28 + beq 16f + +15: mov lr, r3, lspush #\push + ldr r3, [r1, #-4]! + subs ip, ip, #4 + orr lr, lr, r3, lspull #\pull + str lr, [r0, #-4]! + bgt 15b + CALGN( cmp r2, #0 ) + CALGN( bge 11b ) + +16: add r1, r1, #(\pull / 8) + b 8b + + .endm + + + backward_copy_shift push=8 pull=24 + +17: backward_copy_shift push=16 pull=16 + +18: backward_copy_shift push=24 pull=8 + + UNWIND( .fnend ) +ENDPROC(memmove) +ENDPROC(__memmove) diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S new file mode 100644 index 000000000..de75ae4d5 --- /dev/null +++ b/arch/arm/lib/memset.S @@ -0,0 +1,148 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/memset.S + * + * Copyright (C) 1995-2000 Russell King + * + * ASM optimised string functions + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/unwind.h> + + .text + .align 5 + +ENTRY(__memset) +ENTRY(mmioset) +WEAK(memset) +UNWIND( .fnstart ) + and r1, r1, #255 @ cast to unsigned char + ands r3, r0, #3 @ 1 unaligned? + mov ip, r0 @ preserve r0 as return value + bne 6f @ 1 +/* + * we know that the pointer in ip is aligned to a word boundary. + */ +1: orr r1, r1, r1, lsl #8 + orr r1, r1, r1, lsl #16 + mov r3, r1 +7: cmp r2, #16 + blt 4f +UNWIND( .fnend ) + +#if ! CALGN(1)+0 + +/* + * We need 2 extra registers for this loop - use r8 and the LR + */ +UNWIND( .fnstart ) +UNWIND( .save {r8, lr} ) + stmfd sp!, {r8, lr} + mov r8, r1 + mov lr, r3 + +2: subs r2, r2, #64 + stmiage ip!, {r1, r3, r8, lr} @ 64 bytes at a time. + stmiage ip!, {r1, r3, r8, lr} + stmiage ip!, {r1, r3, r8, lr} + stmiage ip!, {r1, r3, r8, lr} + bgt 2b + ldmfdeq sp!, {r8, pc} @ Now <64 bytes to go. +/* + * No need to correct the count; we're only testing bits from now on + */ + tst r2, #32 + stmiane ip!, {r1, r3, r8, lr} + stmiane ip!, {r1, r3, r8, lr} + tst r2, #16 + stmiane ip!, {r1, r3, r8, lr} + ldmfd sp!, {r8, lr} +UNWIND( .fnend ) + +#else + +/* + * This version aligns the destination pointer in order to write + * whole cache lines at once. + */ + +UNWIND( .fnstart ) +UNWIND( .save {r4-r8, lr} ) + stmfd sp!, {r4-r8, lr} + mov r4, r1 + mov r5, r3 + mov r6, r1 + mov r7, r3 + mov r8, r1 + mov lr, r3 + + cmp r2, #96 + tstgt ip, #31 + ble 3f + + and r8, ip, #31 + rsb r8, r8, #32 + sub r2, r2, r8 + movs r8, r8, lsl #(32 - 4) + stmiacs ip!, {r4, r5, r6, r7} + stmiami ip!, {r4, r5} + tst r8, #(1 << 30) + mov r8, r1 + strne r1, [ip], #4 + +3: subs r2, r2, #64 + stmiage ip!, {r1, r3-r8, lr} + stmiage ip!, {r1, r3-r8, lr} + bgt 3b + ldmfdeq sp!, {r4-r8, pc} + + tst r2, #32 + stmiane ip!, {r1, r3-r8, lr} + tst r2, #16 + stmiane ip!, {r4-r7} + ldmfd sp!, {r4-r8, lr} +UNWIND( .fnend ) + +#endif + +UNWIND( .fnstart ) +4: tst r2, #8 + stmiane ip!, {r1, r3} + tst r2, #4 + strne r1, [ip], #4 +/* + * When we get here, we've got less than 4 bytes to set. We + * may have an unaligned pointer as well. + */ +5: tst r2, #2 + strbne r1, [ip], #1 + strbne r1, [ip], #1 + tst r2, #1 + strbne r1, [ip], #1 + ret lr + +6: subs r2, r2, #4 @ 1 do we have enough + blt 5b @ 1 bytes to align with? + cmp r3, #2 @ 1 + strblt r1, [ip], #1 @ 1 + strble r1, [ip], #1 @ 1 + strb r1, [ip], #1 @ 1 + add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) + b 1b +UNWIND( .fnend ) +ENDPROC(memset) +ENDPROC(mmioset) +ENDPROC(__memset) + +ENTRY(__memset32) +UNWIND( .fnstart ) + mov r3, r1 @ copy r1 to r3 and fall into memset64 +UNWIND( .fnend ) +ENDPROC(__memset32) +ENTRY(__memset64) +UNWIND( .fnstart ) + mov ip, r0 @ preserve r0 as return value + b 7b @ jump into the middle of memset +UNWIND( .fnend ) +ENDPROC(__memset64) diff --git a/arch/arm/lib/muldi3.S b/arch/arm/lib/muldi3.S new file mode 100644 index 000000000..8362fe6c0 --- /dev/null +++ b/arch/arm/lib/muldi3.S @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/muldi3.S + * + * Author: Nicolas Pitre + * Created: Oct 19, 2005 + * Copyright: Monta Vista Software, Inc. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + +#ifdef __ARMEB__ +#define xh r0 +#define xl r1 +#define yh r2 +#define yl r3 +#else +#define xl r0 +#define xh r1 +#define yl r2 +#define yh r3 +#endif + +ENTRY(__muldi3) +ENTRY(__aeabi_lmul) + + mul xh, yl, xh + mla xh, xl, yh, xh + mov ip, xl, lsr #16 + mov yh, yl, lsr #16 + bic xl, xl, ip, lsl #16 + bic yl, yl, yh, lsl #16 + mla xh, yh, ip, xh + mul yh, xl, yh + mul xl, yl, xl + mul ip, yl, ip + adds xl, xl, yh, lsl #16 + adc xh, xh, yh, lsr #16 + adds xl, xl, ip, lsl #16 + adc xh, xh, ip, lsr #16 + ret lr + +ENDPROC(__muldi3) +ENDPROC(__aeabi_lmul) diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S new file mode 100644 index 000000000..bdd8836dc --- /dev/null +++ b/arch/arm/lib/putuser.S @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/putuser.S + * + * Copyright (C) 2001 Russell King + * + * Idea from x86 version, (C) Copyright 1998 Linus Torvalds + * + * These functions have a non-standard call interface to make + * them more efficient, especially as they return an error + * value in addition to the "real" return value. + * + * __put_user_X + * + * Inputs: r0 contains the address + * r1 contains the address limit, which must be preserved + * r2, r3 contains the value + * Outputs: r0 is the error code + * lr corrupted + * + * No other registers must be altered. (see <asm/uaccess.h> + * for specific ASM register usage). + * + * Note that ADDR_LIMIT is either 0 or 0xc0000000 + * Note also that it is intended that __put_user_bad is not global. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/errno.h> +#include <asm/domain.h> + +ENTRY(__put_user_1) + check_uaccess r0, 1, r1, ip, __put_user_bad +1: TUSER(strb) r2, [r0] + mov r0, #0 + ret lr +ENDPROC(__put_user_1) + +ENTRY(__put_user_2) + check_uaccess r0, 2, r1, ip, __put_user_bad +#if __LINUX_ARM_ARCH__ >= 6 + +2: TUSER(strh) r2, [r0] + +#else + + mov ip, r2, lsr #8 +#ifndef __ARMEB__ +2: TUSER(strb) r2, [r0], #1 +3: TUSER(strb) ip, [r0] +#else +2: TUSER(strb) ip, [r0], #1 +3: TUSER(strb) r2, [r0] +#endif + +#endif /* __LINUX_ARM_ARCH__ >= 6 */ + mov r0, #0 + ret lr +ENDPROC(__put_user_2) + +ENTRY(__put_user_4) + check_uaccess r0, 4, r1, ip, __put_user_bad +4: TUSER(str) r2, [r0] + mov r0, #0 + ret lr +ENDPROC(__put_user_4) + +ENTRY(__put_user_8) + check_uaccess r0, 8, r1, ip, __put_user_bad +#ifdef CONFIG_THUMB2_KERNEL +5: TUSER(str) r2, [r0] +6: TUSER(str) r3, [r0, #4] +#else +5: TUSER(str) r2, [r0], #4 +6: TUSER(str) r3, [r0] +#endif + mov r0, #0 + ret lr +ENDPROC(__put_user_8) + +__put_user_bad: + mov r0, #-EFAULT + ret lr +ENDPROC(__put_user_bad) + +.pushsection __ex_table, "a" + .long 1b, __put_user_bad + .long 2b, __put_user_bad +#if __LINUX_ARM_ARCH__ < 6 + .long 3b, __put_user_bad +#endif + .long 4b, __put_user_bad + .long 5b, __put_user_bad + .long 6b, __put_user_bad +.popsection diff --git a/arch/arm/lib/setbit.S b/arch/arm/lib/setbit.S new file mode 100644 index 000000000..19a96f43f --- /dev/null +++ b/arch/arm/lib/setbit.S @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/setbit.S + * + * Copyright (C) 1995-1996 Russell King + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include "bitops.h" + .text + +bitop _set_bit, orr diff --git a/arch/arm/lib/strchr.S b/arch/arm/lib/strchr.S new file mode 100644 index 000000000..09e2cc8a8 --- /dev/null +++ b/arch/arm/lib/strchr.S @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/strchr.S + * + * Copyright (C) 1995-2000 Russell King + * + * ASM optimised string functions + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .align 5 +ENTRY(strchr) + and r1, r1, #0xff +1: ldrb r2, [r0], #1 + teq r2, r1 + teqne r2, #0 + bne 1b + teq r2, r1 + movne r0, #0 + subeq r0, r0, #1 + ret lr +ENDPROC(strchr) diff --git a/arch/arm/lib/strrchr.S b/arch/arm/lib/strrchr.S new file mode 100644 index 000000000..5e87247d1 --- /dev/null +++ b/arch/arm/lib/strrchr.S @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/strrchr.S + * + * Copyright (C) 1995-2000 Russell King + * + * ASM optimised string functions + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .align 5 +ENTRY(strrchr) + mov r3, #0 +1: ldrb r2, [r0], #1 + teq r2, r1 + subeq r3, r0, #1 + teq r2, #0 + bne 1b + mov r0, r3 + ret lr +ENDPROC(strrchr) diff --git a/arch/arm/lib/testchangebit.S b/arch/arm/lib/testchangebit.S new file mode 100644 index 000000000..f13fe9bc2 --- /dev/null +++ b/arch/arm/lib/testchangebit.S @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/testchangebit.S + * + * Copyright (C) 1995-1996 Russell King + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include "bitops.h" + .text + +testop _test_and_change_bit, eor, str + +#if __LINUX_ARM_ARCH__ >= 6 +sync_testop _sync_test_and_change_bit, eor, str +#endif diff --git a/arch/arm/lib/testclearbit.S b/arch/arm/lib/testclearbit.S new file mode 100644 index 000000000..4d2c5ca62 --- /dev/null +++ b/arch/arm/lib/testclearbit.S @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/testclearbit.S + * + * Copyright (C) 1995-1996 Russell King + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include "bitops.h" + .text + +testop _test_and_clear_bit, bicne, strne + +#if __LINUX_ARM_ARCH__ >= 6 +sync_testop _sync_test_and_clear_bit, bicne, strne +#endif diff --git a/arch/arm/lib/testsetbit.S b/arch/arm/lib/testsetbit.S new file mode 100644 index 000000000..649dbab65 --- /dev/null +++ b/arch/arm/lib/testsetbit.S @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/testsetbit.S + * + * Copyright (C) 1995-1996 Russell King + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include "bitops.h" + .text + +testop _test_and_set_bit, orreq, streq + +#if __LINUX_ARM_ARCH__ >= 6 +sync_testop _sync_test_and_set_bit, orreq, streq +#endif diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c new file mode 100644 index 000000000..e4c2677cc --- /dev/null +++ b/arch/arm/lib/uaccess_with_memcpy.c @@ -0,0 +1,279 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/arch/arm/lib/uaccess_with_memcpy.c + * + * Written by: Lennert Buytenhek and Nicolas Pitre + * Copyright (C) 2009 Marvell Semiconductor + */ + +#include <linux/kernel.h> +#include <linux/ctype.h> +#include <linux/uaccess.h> +#include <linux/rwsem.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/hardirq.h> /* for in_atomic() */ +#include <linux/gfp.h> +#include <linux/highmem.h> +#include <linux/hugetlb.h> +#include <asm/current.h> +#include <asm/page.h> + +static int +pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp) +{ + unsigned long addr = (unsigned long)_addr; + pgd_t *pgd; + p4d_t *p4d; + pmd_t *pmd; + pte_t *pte; + pud_t *pud; + spinlock_t *ptl; + + pgd = pgd_offset(current->mm, addr); + if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd))) + return 0; + + p4d = p4d_offset(pgd, addr); + if (unlikely(p4d_none(*p4d) || p4d_bad(*p4d))) + return 0; + + pud = pud_offset(p4d, addr); + if (unlikely(pud_none(*pud) || pud_bad(*pud))) + return 0; + + pmd = pmd_offset(pud, addr); + if (unlikely(pmd_none(*pmd))) + return 0; + + /* + * A pmd can be bad if it refers to a HugeTLB or THP page. + * + * Both THP and HugeTLB pages have the same pmd layout + * and should not be manipulated by the pte functions. + * + * Lock the page table for the destination and check + * to see that it's still huge and whether or not we will + * need to fault on write. + */ + if (unlikely(pmd_thp_or_huge(*pmd))) { + ptl = ¤t->mm->page_table_lock; + spin_lock(ptl); + if (unlikely(!pmd_thp_or_huge(*pmd) + || pmd_hugewillfault(*pmd))) { + spin_unlock(ptl); + return 0; + } + + *ptep = NULL; + *ptlp = ptl; + return 1; + } + + if (unlikely(pmd_bad(*pmd))) + return 0; + + pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl); + if (unlikely(!pte_present(*pte) || !pte_young(*pte) || + !pte_write(*pte) || !pte_dirty(*pte))) { + pte_unmap_unlock(pte, ptl); + return 0; + } + + *ptep = pte; + *ptlp = ptl; + + return 1; +} + +static unsigned long noinline +__copy_to_user_memcpy(void __user *to, const void *from, unsigned long n) +{ + unsigned long ua_flags; + int atomic; + + /* the mmap semaphore is taken only if not in an atomic context */ + atomic = faulthandler_disabled(); + + if (!atomic) + mmap_read_lock(current->mm); + while (n) { + pte_t *pte; + spinlock_t *ptl; + int tocopy; + + while (!pin_page_for_write(to, &pte, &ptl)) { + if (!atomic) + mmap_read_unlock(current->mm); + if (__put_user(0, (char __user *)to)) + goto out; + if (!atomic) + mmap_read_lock(current->mm); + } + + tocopy = (~(unsigned long)to & ~PAGE_MASK) + 1; + if (tocopy > n) + tocopy = n; + + ua_flags = uaccess_save_and_enable(); + __memcpy((void *)to, from, tocopy); + uaccess_restore(ua_flags); + to += tocopy; + from += tocopy; + n -= tocopy; + + if (pte) + pte_unmap_unlock(pte, ptl); + else + spin_unlock(ptl); + } + if (!atomic) + mmap_read_unlock(current->mm); + +out: + return n; +} + +unsigned long +arm_copy_to_user(void __user *to, const void *from, unsigned long n) +{ + /* + * This test is stubbed out of the main function above to keep + * the overhead for small copies low by avoiding a large + * register dump on the stack just to reload them right away. + * With frame pointer disabled, tail call optimization kicks in + * as well making this test almost invisible. + */ + if (n < 64) { + unsigned long ua_flags = uaccess_save_and_enable(); + n = __copy_to_user_std(to, from, n); + uaccess_restore(ua_flags); + } else { + n = __copy_to_user_memcpy(uaccess_mask_range_ptr(to, n), + from, n); + } + return n; +} + +static unsigned long noinline +__clear_user_memset(void __user *addr, unsigned long n) +{ + unsigned long ua_flags; + + mmap_read_lock(current->mm); + while (n) { + pte_t *pte; + spinlock_t *ptl; + int tocopy; + + while (!pin_page_for_write(addr, &pte, &ptl)) { + mmap_read_unlock(current->mm); + if (__put_user(0, (char __user *)addr)) + goto out; + mmap_read_lock(current->mm); + } + + tocopy = (~(unsigned long)addr & ~PAGE_MASK) + 1; + if (tocopy > n) + tocopy = n; + + ua_flags = uaccess_save_and_enable(); + __memset((void *)addr, 0, tocopy); + uaccess_restore(ua_flags); + addr += tocopy; + n -= tocopy; + + if (pte) + pte_unmap_unlock(pte, ptl); + else + spin_unlock(ptl); + } + mmap_read_unlock(current->mm); + +out: + return n; +} + +unsigned long arm_clear_user(void __user *addr, unsigned long n) +{ + /* See rational for this in __copy_to_user() above. */ + if (n < 64) { + unsigned long ua_flags = uaccess_save_and_enable(); + n = __clear_user_std(addr, n); + uaccess_restore(ua_flags); + } else { + n = __clear_user_memset(addr, n); + } + return n; +} + +#if 0 + +/* + * This code is disabled by default, but kept around in case the chosen + * thresholds need to be revalidated. Some overhead (small but still) + * would be implied by a runtime determined variable threshold, and + * so far the measurement on concerned targets didn't show a worthwhile + * variation. + * + * Note that a fairly precise sched_clock() implementation is needed + * for results to make some sense. + */ + +#include <linux/vmalloc.h> + +static int __init test_size_treshold(void) +{ + struct page *src_page, *dst_page; + void *user_ptr, *kernel_ptr; + unsigned long long t0, t1, t2; + int size, ret; + + ret = -ENOMEM; + src_page = alloc_page(GFP_KERNEL); + if (!src_page) + goto no_src; + dst_page = alloc_page(GFP_KERNEL); + if (!dst_page) + goto no_dst; + kernel_ptr = page_address(src_page); + user_ptr = vmap(&dst_page, 1, VM_IOREMAP, __pgprot(__PAGE_COPY)); + if (!user_ptr) + goto no_vmap; + + /* warm up the src page dcache */ + ret = __copy_to_user_memcpy(user_ptr, kernel_ptr, PAGE_SIZE); + + for (size = PAGE_SIZE; size >= 4; size /= 2) { + t0 = sched_clock(); + ret |= __copy_to_user_memcpy(user_ptr, kernel_ptr, size); + t1 = sched_clock(); + ret |= __copy_to_user_std(user_ptr, kernel_ptr, size); + t2 = sched_clock(); + printk("copy_to_user: %d %llu %llu\n", size, t1 - t0, t2 - t1); + } + + for (size = PAGE_SIZE; size >= 4; size /= 2) { + t0 = sched_clock(); + ret |= __clear_user_memset(user_ptr, size); + t1 = sched_clock(); + ret |= __clear_user_std(user_ptr, size); + t2 = sched_clock(); + printk("clear_user: %d %llu %llu\n", size, t1 - t0, t2 - t1); + } + + if (ret) + ret = -EFAULT; + + vunmap(user_ptr); +no_vmap: + put_page(dst_page); +no_dst: + put_page(src_page); +no_src: + return ret; +} + +subsys_initcall(test_size_treshold); + +#endif diff --git a/arch/arm/lib/ucmpdi2.S b/arch/arm/lib/ucmpdi2.S new file mode 100644 index 000000000..679e16a21 --- /dev/null +++ b/arch/arm/lib/ucmpdi2.S @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/ucmpdi2.S + * + * Author: Nicolas Pitre + * Created: Oct 19, 2005 + * Copyright: Monta Vista Software, Inc. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + +#ifdef __ARMEB__ +#define xh r0 +#define xl r1 +#define yh r2 +#define yl r3 +#else +#define xl r0 +#define xh r1 +#define yl r2 +#define yh r3 +#endif + +ENTRY(__ucmpdi2) + + cmp xh, yh + cmpeq xl, yl + movlo r0, #0 + moveq r0, #1 + movhi r0, #2 + ret lr + +ENDPROC(__ucmpdi2) + +#ifdef CONFIG_AEABI + +ENTRY(__aeabi_ulcmp) + + cmp xh, yh + cmpeq xl, yl + movlo r0, #-1 + moveq r0, #0 + movhi r0, #1 + ret lr + +ENDPROC(__aeabi_ulcmp) + +#endif + diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c new file mode 100644 index 000000000..522510bae --- /dev/null +++ b/arch/arm/lib/xor-neon.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/arch/arm/lib/xor-neon.c + * + * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> + */ + +#include <linux/raid/xor.h> +#include <linux/module.h> + +MODULE_LICENSE("GPL"); + +#ifndef __ARM_NEON__ +#error You should compile this file with '-march=armv7-a -mfloat-abi=softfp -mfpu=neon' +#endif + +/* + * Pull in the reference implementations while instructing GCC (through + * -ftree-vectorize) to attempt to exploit implicit parallelism and emit + * NEON instructions. Clang does this by default at O2 so no pragma is + * needed. + */ +#ifdef CONFIG_CC_IS_GCC +#pragma GCC optimize "tree-vectorize" +#endif + +#pragma GCC diagnostic ignored "-Wunused-variable" +#include <asm-generic/xor.h> + +struct xor_block_template const xor_block_neon_inner = { + .name = "__inner_neon__", + .do_2 = xor_8regs_2, + .do_3 = xor_8regs_3, + .do_4 = xor_8regs_4, + .do_5 = xor_8regs_5, +}; +EXPORT_SYMBOL(xor_block_neon_inner); |