diff options
Diffstat (limited to '')
-rw-r--r-- | src/VBox/Devices/EFI/Firmware/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.asm | 141 |
1 files changed, 141 insertions, 0 deletions
diff --git a/src/VBox/Devices/EFI/Firmware/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.asm b/src/VBox/Devices/EFI/Firmware/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.asm new file mode 100644 index 00000000..13dbebc7 --- /dev/null +++ b/src/VBox/Devices/EFI/Firmware/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.asm @@ -0,0 +1,141 @@ +;------------------------------------------------------------------------------ +; +; CopyMem() worker for ARM +; +; This file started out as C code that did 64 bit moves if the buffer was +; 32-bit aligned, else it does a byte copy. It also does a byte copy for +; any trailing bytes. It was updated to do 32-byte copies using stm/ldm. +; +; Copyright (c) 2008 - 2010, Apple Inc. All rights reserved.<BR> +; Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR> +; SPDX-License-Identifier: BSD-2-Clause-Patent +; +;------------------------------------------------------------------------------ + + EXPORT InternalMemCopyMem + AREA SetMem, CODE, READONLY + THUMB + +InternalMemCopyMem + stmfd sp!, {r4-r11, lr} + // Save the input parameters in extra registers (r11 = destination, r14 = source, r12 = length) + mov r11, r0 + mov r10, r0 + mov r12, r2 + mov r14, r1 + +memcopy_check_overlapped + cmp r11, r1 + // If (dest < source) + bcc memcopy_check_optim_default + + // If (source + length < dest) + rsb r3, r1, r11 + cmp r12, r3 + bcc memcopy_check_optim_default + b memcopy_check_optim_overlap + +memcopy_check_optim_default + // Check if we can use an optimized path ((length >= 32) && destination word-aligned && source word-aligned) for the memcopy (optimized path if r0 == 1) + tst r0, #0xF + movne r0, #0 + bne memcopy_default + tst r1, #0xF + movne r3, #0 + moveq r3, #1 + cmp r2, #31 + movls r0, #0 + andhi r0, r3, #1 + b memcopy_default + +memcopy_check_optim_overlap + // r10 = dest_end, r14 = source_end + add r10, r11, r12 + add r14, r12, r1 + + // Are we in the optimized case ((length >= 32) && dest_end word-aligned && source_end word-aligned) + cmp r2, #31 + movls r0, #0 + movhi r0, #1 + tst r10, #0xF + movne r0, #0 + tst r14, #0xF + movne r0, #0 + b memcopy_overlapped + +memcopy_overlapped_non_optim + // We read 1 byte from the end of the source buffer + sub r3, r14, #1 + sub r12, r12, #1 + ldrb r3, [r3, #0] + sub r2, r10, #1 + cmp r12, #0 + // We write 1 byte at the end of the dest buffer + sub r10, r10, #1 + sub r14, r14, #1 + strb r3, [r2, #0] + bne memcopy_overlapped_non_optim + b memcopy_end + +// r10 = dest_end, r14 = source_end +memcopy_overlapped + // Are we in the optimized case ? + cmp r0, #0 + beq memcopy_overlapped_non_optim + + // Optimized Overlapped - Read 32 bytes + sub r14, r14, #32 + sub r12, r12, #32 + cmp r12, #31 + ldmia r14, {r2-r9} + + // If length is less than 32 then disable optim + movls r0, #0 + + cmp r12, #0 + + // Optimized Overlapped - Write 32 bytes + sub r10, r10, #32 + stmia r10, {r2-r9} + + // while (length != 0) + bne memcopy_overlapped + b memcopy_end + +memcopy_default_non_optim + // Byte copy + ldrb r3, [r14], #1 + sub r12, r12, #1 + strb r3, [r10], #1 + +memcopy_default + cmp r12, #0 + beq memcopy_end + +// r10 = dest, r14 = source +memcopy_default_loop + cmp r0, #0 + beq memcopy_default_non_optim + + // Optimized memcopy - Read 32 Bytes + sub r12, r12, #32 + cmp r12, #31 + ldmia r14!, {r2-r9} + + // If length is less than 32 then disable optim + movls r0, #0 + + cmp r12, #0 + + // Optimized memcopy - Write 32 Bytes + stmia r10!, {r2-r9} + + // while (length != 0) + bne memcopy_default_loop + +memcopy_end + mov r0, r11 + ldmfd sp!, {r4-r11, pc} + + END + |