diff options
Diffstat (limited to 'fluent-bit/lib/luajit-3065c9/src/vm_arm.dasc')
-rw-r--r-- | fluent-bit/lib/luajit-3065c9/src/vm_arm.dasc | 4663 |
1 files changed, 4663 insertions, 0 deletions
diff --git a/fluent-bit/lib/luajit-3065c9/src/vm_arm.dasc b/fluent-bit/lib/luajit-3065c9/src/vm_arm.dasc new file mode 100644 index 00000000..770c1602 --- /dev/null +++ b/fluent-bit/lib/luajit-3065c9/src/vm_arm.dasc @@ -0,0 +1,4663 @@ +|// Low-level VM code for ARM CPUs. +|// Bytecode interpreter, fast functions and helper functions. +|// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h +| +|.arch arm +|.section code_op, code_sub +| +|.actionlist build_actionlist +|.globals GLOB_ +|.globalnames globnames +|.externnames extnames +| +|// Note: The ragged indentation of the instructions is intentional. +|// The starting columns indicate data dependencies. +| +|//----------------------------------------------------------------------- +| +|// Fixed register assignments for the interpreter. +| +|// The following must be C callee-save. +|.define MASKR8, r4 // 255*8 constant for fast bytecode decoding. +|.define KBASE, r5 // Constants of current Lua function. +|.define PC, r6 // Next PC. +|.define DISPATCH, r7 // Opcode dispatch table. +|.define LREG, r8 // Register holding lua_State (also in SAVE_L). +| +|// C callee-save in EABI, but often refetched. Temporary in iOS 3.0+. +|.define BASE, r9 // Base of current Lua stack frame. +| +|// The following temporaries are not saved across C calls, except for RA/RC. +|.define RA, r10 // Callee-save. +|.define RC, r11 // Callee-save. +|.define RB, r12 +|.define OP, r12 // Overlaps RB, must not be lr. +|.define INS, lr +| +|// Calling conventions. Also used as temporaries. +|.define CARG1, r0 +|.define CARG2, r1 +|.define CARG3, r2 +|.define CARG4, r3 +|.define CARG12, r0 // For 1st soft-fp double. +|.define CARG34, r2 // For 2nd soft-fp double. +| +|.define CRET1, r0 +|.define CRET2, r1 +| +|// Stack layout while in interpreter. Must match with lj_frame.h. +|.define SAVE_R4, [sp, #28] +|.define CFRAME_SPACE, #28 +|.define SAVE_ERRF, [sp, #24] +|.define SAVE_NRES, [sp, #20] +|.define SAVE_CFRAME, [sp, #16] +|.define SAVE_L, [sp, #12] +|.define SAVE_PC, [sp, #8] +|.define SAVE_MULTRES, [sp, #4] +|.define ARG5, [sp] +| +|.define TMPDhi, [sp, #4] +|.define TMPDlo, [sp] +|.define TMPD, [sp] +|.define TMPDp, sp +| +|.if FPU +|.macro saveregs +| push {r5, r6, r7, r8, r9, r10, r11, lr} +| vpush {d8-d15} +| sub sp, sp, CFRAME_SPACE+4 +| str r4, SAVE_R4 +|.endmacro +|.macro restoreregs_ret +| ldr r4, SAVE_R4 +| add sp, sp, CFRAME_SPACE+4 +| vpop {d8-d15} +| pop {r5, r6, r7, r8, r9, r10, r11, pc} +|.endmacro +|.else +|.macro saveregs +| push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +| sub sp, sp, CFRAME_SPACE +|.endmacro +|.macro restoreregs_ret +| add sp, sp, CFRAME_SPACE +| pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +|.endmacro +|.endif +| +|// Type definitions. Some of these are only used for documentation. +|.type L, lua_State, LREG +|.type GL, global_State +|.type TVALUE, TValue +|.type GCOBJ, GCobj +|.type STR, GCstr +|.type TAB, GCtab +|.type LFUNC, GCfuncL +|.type CFUNC, GCfuncC +|.type PROTO, GCproto +|.type UPVAL, GCupval +|.type NODE, Node +|.type NARGS8, int +|.type TRACE, GCtrace +|.type SBUF, SBuf +| +|//----------------------------------------------------------------------- +| +|// Trap for not-yet-implemented parts. +|.macro NYI; ud; .endmacro +| +|//----------------------------------------------------------------------- +| +|// Access to frame relative to BASE. +|.define FRAME_FUNC, #-8 +|.define FRAME_PC, #-4 +| +|.macro decode_RA8, dst, ins; and dst, MASKR8, ins, lsr #5; .endmacro +|.macro decode_RB8, dst, ins; and dst, MASKR8, ins, lsr #21; .endmacro +|.macro decode_RC8, dst, ins; and dst, MASKR8, ins, lsr #13; .endmacro +|.macro decode_RD, dst, ins; lsr dst, ins, #16; .endmacro +|.macro decode_OP, dst, ins; and dst, ins, #255; .endmacro +| +|// Instruction fetch. +|.macro ins_NEXT1 +| ldrb OP, [PC] +|.endmacro +|.macro ins_NEXT2 +| ldr INS, [PC], #4 +|.endmacro +|// Instruction decode+dispatch. +|.macro ins_NEXT3 +| ldr OP, [DISPATCH, OP, lsl #2] +| decode_RA8 RA, INS +| decode_RD RC, INS +| bx OP +|.endmacro +|.macro ins_NEXT +| ins_NEXT1 +| ins_NEXT2 +| ins_NEXT3 +|.endmacro +| +|// Instruction footer. +|.if 1 +| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. +| .define ins_next, ins_NEXT +| .define ins_next_, ins_NEXT +| .define ins_next1, ins_NEXT1 +| .define ins_next2, ins_NEXT2 +| .define ins_next3, ins_NEXT3 +|.else +| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. +| // Affects only certain kinds of benchmarks (and only with -j off). +| .macro ins_next +| b ->ins_next +| .endmacro +| .macro ins_next1 +| .endmacro +| .macro ins_next2 +| .endmacro +| .macro ins_next3 +| b ->ins_next +| .endmacro +| .macro ins_next_ +| ->ins_next: +| ins_NEXT +| .endmacro +|.endif +| +|// Avoid register name substitution for field name. +#define field_pc pc +| +|// Call decode and dispatch. +|.macro ins_callt +| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC +| ldr PC, LFUNC:CARG3->field_pc +| ldrb OP, [PC] // STALL: load PC. early PC. +| ldr INS, [PC], #4 +| ldr OP, [DISPATCH, OP, lsl #2] // STALL: load OP. early OP. +| decode_RA8 RA, INS +| add RA, RA, BASE +| bx OP +|.endmacro +| +|.macro ins_call +| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, PC = caller PC +| str PC, [BASE, FRAME_PC] +| ins_callt // STALL: locked PC. +|.endmacro +| +|//----------------------------------------------------------------------- +| +|// Macros to test operand types. +|.macro checktp, reg, tp; cmn reg, #-tp; .endmacro +|.macro checktpeq, reg, tp; cmneq reg, #-tp; .endmacro +|.macro checktpne, reg, tp; cmnne reg, #-tp; .endmacro +|.macro checkstr, reg, target; checktp reg, LJ_TSTR; bne target; .endmacro +|.macro checktab, reg, target; checktp reg, LJ_TTAB; bne target; .endmacro +|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC; bne target; .endmacro +| +|// Assumes DISPATCH is relative to GL. +#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) +#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) +| +#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) +| +|.macro hotcheck, delta +| lsr CARG1, PC, #1 +| and CARG1, CARG1, #126 +| sub CARG1, CARG1, #-GG_DISP2HOT +| ldrh CARG2, [DISPATCH, CARG1] +| subs CARG2, CARG2, #delta +| strh CARG2, [DISPATCH, CARG1] +|.endmacro +| +|.macro hotloop +| hotcheck HOTCOUNT_LOOP +| blo ->vm_hotloop +|.endmacro +| +|.macro hotcall +| hotcheck HOTCOUNT_CALL +| blo ->vm_hotcall +|.endmacro +| +|// Set current VM state. +|.macro mv_vmstate, reg, st; mvn reg, #LJ_VMST_..st; .endmacro +|.macro st_vmstate, reg; str reg, [DISPATCH, #DISPATCH_GL(vmstate)]; .endmacro +| +|// Move table write barrier back. Overwrites mark and tmp. +|.macro barrierback, tab, mark, tmp +| ldr tmp, [DISPATCH, #DISPATCH_GL(gc.grayagain)] +| bic mark, mark, #LJ_GC_BLACK // black2gray(tab) +| str tab, [DISPATCH, #DISPATCH_GL(gc.grayagain)] +| strb mark, tab->marked +| str tmp, tab->gclist +|.endmacro +| +|.macro .IOS, a, b +|.if IOS +| a, b +|.endif +|.endmacro +| +|//----------------------------------------------------------------------- + +#if !LJ_DUALNUM +#error "Only dual-number mode supported for ARM target" +#endif + +/* Generate subroutines used by opcodes and other parts of the VM. */ +/* The .code_sub section should be last to help static branch prediction. */ +static void build_subroutines(BuildCtx *ctx) +{ + |.code_sub + | + |//----------------------------------------------------------------------- + |//-- Return handling ---------------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_returnp: + | // See vm_return. Also: RB = previous base. + | tst PC, #FRAME_P + | beq ->cont_dispatch + | + | // Return from pcall or xpcall fast func. + | ldr PC, [RB, FRAME_PC] // Fetch PC of previous frame. + | mvn CARG2, #~LJ_TTRUE + | mov BASE, RB + | // Prepending may overwrite the pcall frame, so do it at the end. + | str CARG2, [RA, FRAME_PC] // Prepend true to results. + | sub RA, RA, #8 + | + |->vm_returnc: + | adds RC, RC, #8 // RC = (nresults+1)*8. + | mov CRET1, #LUA_YIELD + | beq ->vm_unwind_c_eh + | str RC, SAVE_MULTRES + | ands CARG1, PC, #FRAME_TYPE + | beq ->BC_RET_Z // Handle regular return to Lua. + | + |->vm_return: + | // BASE = base, RA = resultptr, RC/MULTRES = (nresults+1)*8, PC = return + | // CARG1 = PC & FRAME_TYPE + | bic RB, PC, #FRAME_TYPEP + | cmp CARG1, #FRAME_C + | sub RB, BASE, RB // RB = previous base. + | bne ->vm_returnp + | + | str RB, L->base + | ldr KBASE, SAVE_NRES + | mv_vmstate CARG4, C + | sub BASE, BASE, #8 + | subs CARG3, RC, #8 + | lsl KBASE, KBASE, #3 // KBASE = (nresults_wanted+1)*8 + | st_vmstate CARG4 + | beq >2 + |1: + | subs CARG3, CARG3, #8 + | ldrd CARG12, [RA], #8 + | strd CARG12, [BASE], #8 + | bne <1 + |2: + | cmp KBASE, RC // More/less results wanted? + | bne >6 + |3: + | str BASE, L->top // Store new top. + | + |->vm_leave_cp: + | ldr RC, SAVE_CFRAME // Restore previous C frame. + | mov CRET1, #0 // Ok return status for vm_pcall. + | str RC, L->cframe + | + |->vm_leave_unw: + | restoreregs_ret + | + |6: + | blt >7 // Less results wanted? + | // More results wanted. Check stack size and fill up results with nil. + | ldr CARG3, L->maxstack + | mvn CARG2, #~LJ_TNIL + | cmp BASE, CARG3 + | bhs >8 + | str CARG2, [BASE, #4] + | add RC, RC, #8 + | add BASE, BASE, #8 + | b <2 + | + |7: // Less results wanted. + | sub CARG1, RC, KBASE + | cmp KBASE, #0 // LUA_MULTRET+1 case? + | subne BASE, BASE, CARG1 // Either keep top or shrink it. + | b <3 + | + |8: // Corner case: need to grow stack for filling up results. + | // This can happen if: + | // - A C function grows the stack (a lot). + | // - The GC shrinks the stack in between. + | // - A return back from a lua_call() with (high) nresults adjustment. + | str BASE, L->top // Save current top held in BASE (yes). + | lsr CARG2, KBASE, #3 + | mov CARG1, L + | bl extern lj_state_growstack // (lua_State *L, int n) + | ldr BASE, L->top // Need the (realloced) L->top in BASE. + | b <2 + | + |->vm_unwind_c: // Unwind C stack, return from vm_pcall. + | // (void *cframe, int errcode) + | mov sp, CARG1 + | mov CRET1, CARG2 + |->vm_unwind_c_eh: // Landing pad for external unwinder. + | ldr L, SAVE_L + | mv_vmstate CARG4, C + | ldr GL:CARG3, L->glref + | str CARG4, GL:CARG3->vmstate + | b ->vm_leave_unw + | + |->vm_unwind_ff: // Unwind C stack, return from ff pcall. + | // (void *cframe) + | bic CARG1, CARG1, #~CFRAME_RAWMASK // Use two steps: bic sp is deprecated. + | mov sp, CARG1 + |->vm_unwind_ff_eh: // Landing pad for external unwinder. + | ldr L, SAVE_L + | mov MASKR8, #255 + | mov RC, #16 // 2 results: false + error message. + | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. + | ldr BASE, L->base + | ldr DISPATCH, L->glref // Setup pointer to dispatch table. + | mvn CARG1, #~LJ_TFALSE + | sub RA, BASE, #8 // Results start at BASE-8. + | ldr PC, [BASE, FRAME_PC] // Fetch PC of previous frame. + | add DISPATCH, DISPATCH, #GG_G2DISP + | mv_vmstate CARG2, INTERP + | str CARG1, [BASE, #-4] // Prepend false to error message. + | st_vmstate CARG2 + | b ->vm_returnc + | + |->vm_unwind_ext: // Complete external unwind. +#if !LJ_NO_UNWIND + | push {r0, r1, r2, lr} + | bl extern _Unwind_Complete + | ldr r0, [sp] + | bl extern _Unwind_DeleteException + | pop {r0, r1, r2, lr} + | mov r0, r1 + | bx r2 +#endif + | + |//----------------------------------------------------------------------- + |//-- Grow stack for calls ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_growstack_c: // Grow stack for C function. + | // CARG1 = L + | mov CARG2, #LUA_MINSTACK + | b >2 + | + |->vm_growstack_l: // Grow stack for Lua function. + | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC + | add RC, BASE, RC + | sub RA, RA, BASE + | mov CARG1, L + | str BASE, L->base + | add PC, PC, #4 // Must point after first instruction. + | str RC, L->top + | lsr CARG2, RA, #3 + |2: + | // L->base = new base, L->top = top + | str PC, SAVE_PC + | bl extern lj_state_growstack // (lua_State *L, int n) + | ldr BASE, L->base + | ldr RC, L->top + | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] + | sub NARGS8:RC, RC, BASE + | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC + | ins_callt // Just retry the call. + | + |//----------------------------------------------------------------------- + |//-- Entry points into the assembler VM --------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_resume: // Setup C frame and resume thread. + | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) + | saveregs + | mov L, CARG1 + | ldr DISPATCH, L:CARG1->glref // Setup pointer to dispatch table. + | mov BASE, CARG2 + | add DISPATCH, DISPATCH, #GG_G2DISP + | str L, SAVE_L + | mov PC, #FRAME_CP + | str CARG3, SAVE_NRES + | add CARG2, sp, #CFRAME_RESUME + | ldrb CARG1, L->status + | str CARG3, SAVE_ERRF + | str L, SAVE_PC // Any value outside of bytecode is ok. + | str CARG3, SAVE_CFRAME + | cmp CARG1, #0 + | str CARG2, L->cframe + | beq >3 + | + | // Resume after yield (like a return). + | str L, [DISPATCH, #DISPATCH_GL(cur_L)] + | mov RA, BASE + | ldr BASE, L->base + | ldr CARG1, L->top + | mov MASKR8, #255 + | strb CARG3, L->status + | sub RC, CARG1, BASE + | ldr PC, [BASE, FRAME_PC] + | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. + | mv_vmstate CARG2, INTERP + | add RC, RC, #8 + | ands CARG1, PC, #FRAME_TYPE + | st_vmstate CARG2 + | str RC, SAVE_MULTRES + | beq ->BC_RET_Z + | b ->vm_return + | + |->vm_pcall: // Setup protected C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) + | saveregs + | mov PC, #FRAME_CP + | str CARG4, SAVE_ERRF + | b >1 + | + |->vm_call: // Setup C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1) + | saveregs + | mov PC, #FRAME_C + | + |1: // Entry point for vm_pcall above (PC = ftype). + | ldr RC, L:CARG1->cframe + | str CARG3, SAVE_NRES + | mov L, CARG1 + | str CARG1, SAVE_L + | ldr DISPATCH, L->glref // Setup pointer to dispatch table. + | mov BASE, CARG2 + | str CARG1, SAVE_PC // Any value outside of bytecode is ok. + | str RC, SAVE_CFRAME + | add DISPATCH, DISPATCH, #GG_G2DISP + | str sp, L->cframe // Add our C frame to cframe chain. + | + |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). + | str L, [DISPATCH, #DISPATCH_GL(cur_L)] + | ldr RB, L->base // RB = old base (for vmeta_call). + | ldr CARG1, L->top + | mov MASKR8, #255 + | add PC, PC, BASE + | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. + | sub PC, PC, RB // PC = frame delta + frame type + | mv_vmstate CARG2, INTERP + | sub NARGS8:RC, CARG1, BASE + | st_vmstate CARG2 + | + |->vm_call_dispatch: + | // RB = old base, BASE = new base, RC = nargs*8, PC = caller PC + | ldrd CARG34, [BASE, FRAME_FUNC] + | checkfunc CARG4, ->vmeta_call + | + |->vm_call_dispatch_f: + | ins_call + | // BASE = new base, CARG3 = func, RC = nargs*8, PC = caller PC + | + |->vm_cpcall: // Setup protected C frame, call C. + | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) + | saveregs + | mov L, CARG1 + | ldr RA, L:CARG1->stack + | str CARG1, SAVE_L + | ldr DISPATCH, L->glref // Setup pointer to dispatch table. + | ldr RB, L->top + | str CARG1, SAVE_PC // Any value outside of bytecode is ok. + | ldr RC, L->cframe + | add DISPATCH, DISPATCH, #GG_G2DISP + | sub RA, RA, RB // Compute -savestack(L, L->top). + | mov RB, #0 + | str RA, SAVE_NRES // Neg. delta means cframe w/o frame. + | str RB, SAVE_ERRF // No error function. + | str RC, SAVE_CFRAME + | str sp, L->cframe // Add our C frame to cframe chain. + | str L, [DISPATCH, #DISPATCH_GL(cur_L)] + | blx CARG4 // (lua_State *L, lua_CFunction func, void *ud) + | movs BASE, CRET1 + | mov PC, #FRAME_CP + | bne <3 // Else continue with the call. + | b ->vm_leave_cp // No base? Just remove C frame. + | + |//----------------------------------------------------------------------- + |//-- Metamethod handling ------------------------------------------------ + |//----------------------------------------------------------------------- + | + |//-- Continuation dispatch ---------------------------------------------- + | + |->cont_dispatch: + | // BASE = meta base, RA = resultptr, RC = (nresults+1)*8 + | ldr LFUNC:CARG3, [RB, FRAME_FUNC] + | ldr CARG1, [BASE, #-16] // Get continuation. + | mov CARG4, BASE + | mov BASE, RB // Restore caller BASE. + |.if FFI + | cmp CARG1, #1 + |.endif + | ldr PC, [CARG4, #-12] // Restore PC from [cont|PC]. + | mvn INS, #~LJ_TNIL + | add CARG2, RA, RC + | str INS, [CARG2, #-4] // Ensure one valid arg. + |.if FFI + | bls >1 + |.endif + | ldr CARG3, LFUNC:CARG3->field_pc + | ldr KBASE, [CARG3, #PC2PROTO(k)] + | // BASE = base, RA = resultptr, CARG4 = meta base + | bx CARG1 + | + |.if FFI + |1: + | beq ->cont_ffi_callback // cont = 1: return from FFI callback. + | // cont = 0: tailcall from C function. + | sub CARG4, CARG4, #16 + | sub RC, CARG4, BASE + | b ->vm_call_tail + |.endif + | + |->cont_cat: // RA = resultptr, CARG4 = meta base + | ldr INS, [PC, #-4] + | sub CARG2, CARG4, #16 + | ldrd CARG34, [RA] + | str BASE, L->base + | decode_RB8 RC, INS + | decode_RA8 RA, INS + | add CARG1, BASE, RC + | subs CARG1, CARG2, CARG1 + | strdne CARG34, [CARG2] + | movne CARG3, CARG1 + | bne ->BC_CAT_Z + | strd CARG34, [BASE, RA] + | b ->cont_nop + | + |//-- Table indexing metamethods ----------------------------------------- + | + |->vmeta_tgets1: + | add CARG2, BASE, RB + | b >2 + | + |->vmeta_tgets: + | sub CARG2, DISPATCH, #-DISPATCH_GL(tmptv) + | mvn CARG4, #~LJ_TTAB + | str TAB:RB, [CARG2] + | str CARG4, [CARG2, #4] + |2: + | mvn CARG4, #~LJ_TSTR + | str STR:RC, TMPDlo + | str CARG4, TMPDhi + | mov CARG3, TMPDp + | b >1 + | + |->vmeta_tgetb: // RC = index + | decode_RB8 RB, INS + | str RC, TMPDlo + | mvn CARG4, #~LJ_TISNUM + | add CARG2, BASE, RB + | str CARG4, TMPDhi + | mov CARG3, TMPDp + | b >1 + | + |->vmeta_tgetv: + | add CARG2, BASE, RB + | add CARG3, BASE, RC + |1: + | str BASE, L->base + | mov CARG1, L + | str PC, SAVE_PC + | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) + | // Returns TValue * (finished) or NULL (metamethod). + | .IOS ldr BASE, L->base + | cmp CRET1, #0 + | beq >3 + | ldrd CARG34, [CRET1] + | ins_next1 + | ins_next2 + | strd CARG34, [BASE, RA] + | ins_next3 + | + |3: // Call __index metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k + | rsb CARG1, BASE, #FRAME_CONT + | ldr BASE, L->top + | mov NARGS8:RC, #16 // 2 args for func(t, k). + | str PC, [BASE, #-12] // [cont|PC] + | add PC, CARG1, BASE + | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. + | b ->vm_call_dispatch_f + | + |->vmeta_tgetr: + | .IOS mov RC, BASE + | bl extern lj_tab_getinth // (GCtab *t, int32_t key) + | // Returns cTValue * or NULL. + | .IOS mov BASE, RC + | cmp CRET1, #0 + | ldrdne CARG12, [CRET1] + | mvneq CARG2, #~LJ_TNIL + | b ->BC_TGETR_Z + | + |//----------------------------------------------------------------------- + | + |->vmeta_tsets1: + | add CARG2, BASE, RB + | b >2 + | + |->vmeta_tsets: + | sub CARG2, DISPATCH, #-DISPATCH_GL(tmptv) + | mvn CARG4, #~LJ_TTAB + | str TAB:RB, [CARG2] + | str CARG4, [CARG2, #4] + |2: + | mvn CARG4, #~LJ_TSTR + | str STR:RC, TMPDlo + | str CARG4, TMPDhi + | mov CARG3, TMPDp + | b >1 + | + |->vmeta_tsetb: // RC = index + | decode_RB8 RB, INS + | str RC, TMPDlo + | mvn CARG4, #~LJ_TISNUM + | add CARG2, BASE, RB + | str CARG4, TMPDhi + | mov CARG3, TMPDp + | b >1 + | + |->vmeta_tsetv: + | add CARG2, BASE, RB + | add CARG3, BASE, RC + |1: + | str BASE, L->base + | mov CARG1, L + | str PC, SAVE_PC + | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) + | // Returns TValue * (finished) or NULL (metamethod). + | .IOS ldr BASE, L->base + | cmp CRET1, #0 + | ldrd CARG34, [BASE, RA] + | beq >3 + | ins_next1 + | // NOBARRIER: lj_meta_tset ensures the table is not black. + | strd CARG34, [CRET1] + | ins_next2 + | ins_next3 + | + |3: // Call __newindex metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) + | rsb CARG1, BASE, #FRAME_CONT + | ldr BASE, L->top + | mov NARGS8:RC, #24 // 3 args for func(t, k, v). + | strd CARG34, [BASE, #16] // Copy value to third argument. + | str PC, [BASE, #-12] // [cont|PC] + | add PC, CARG1, BASE + | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. + | b ->vm_call_dispatch_f + | + |->vmeta_tsetr: + | str BASE, L->base + | .IOS mov RC, BASE + | mov CARG1, L + | str PC, SAVE_PC + | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) + | // Returns TValue *. + | .IOS mov BASE, RC + | b ->BC_TSETR_Z + | + |//-- Comparison metamethods --------------------------------------------- + | + |->vmeta_comp: + | mov CARG1, L + | sub PC, PC, #4 + | mov CARG2, RA + | str BASE, L->base + | mov CARG3, RC + | str PC, SAVE_PC + | decode_OP CARG4, INS + | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) + | // Returns 0/1 or TValue * (metamethod). + |3: + | .IOS ldr BASE, L->base + | cmp CRET1, #1 + | bhi ->vmeta_binop + |4: + | ldrh RB, [PC, #2] + | add PC, PC, #4 + | add RB, PC, RB, lsl #2 + | subhs PC, RB, #0x20000 + |->cont_nop: + | ins_next + | + |->cont_ra: // RA = resultptr + | ldr INS, [PC, #-4] + | ldrd CARG12, [RA] + | decode_RA8 CARG3, INS + | strd CARG12, [BASE, CARG3] + | b ->cont_nop + | + |->cont_condt: // RA = resultptr + | ldr CARG2, [RA, #4] + | mvn CARG1, #~LJ_TTRUE + | cmp CARG1, CARG2 // Branch if result is true. + | b <4 + | + |->cont_condf: // RA = resultptr + | ldr CARG2, [RA, #4] + | checktp CARG2, LJ_TFALSE // Branch if result is false. + | b <4 + | + |->vmeta_equal: + | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. + | sub PC, PC, #4 + | str BASE, L->base + | mov CARG1, L + | str PC, SAVE_PC + | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) + | // Returns 0/1 or TValue * (metamethod). + | b <3 + | + |->vmeta_equal_cd: + |.if FFI + | sub PC, PC, #4 + | str BASE, L->base + | mov CARG1, L + | mov CARG2, INS + | str PC, SAVE_PC + | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op) + | // Returns 0/1 or TValue * (metamethod). + | b <3 + |.endif + | + |->vmeta_istype: + | sub PC, PC, #4 + | str BASE, L->base + | mov CARG1, L + | lsr CARG2, RA, #3 + | mov CARG3, RC + | str PC, SAVE_PC + | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) + | .IOS ldr BASE, L->base + | b ->cont_nop + | + |//-- Arithmetic metamethods --------------------------------------------- + | + |->vmeta_arith_vn: + | decode_RB8 RB, INS + | decode_RC8 RC, INS + | add CARG3, BASE, RB + | add CARG4, KBASE, RC + | b >1 + | + |->vmeta_arith_nv: + | decode_RB8 RB, INS + | decode_RC8 RC, INS + | add CARG4, BASE, RB + | add CARG3, KBASE, RC + | b >1 + | + |->vmeta_unm: + | ldr INS, [PC, #-8] + | sub PC, PC, #4 + | add CARG3, BASE, RC + | add CARG4, BASE, RC + | b >1 + | + |->vmeta_arith_vv: + | decode_RB8 RB, INS + | decode_RC8 RC, INS + | add CARG3, BASE, RB + | add CARG4, BASE, RC + |1: + | decode_OP OP, INS + | add CARG2, BASE, RA + | str BASE, L->base + | mov CARG1, L + | str PC, SAVE_PC + | str OP, ARG5 + | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) + | // Returns NULL (finished) or TValue * (metamethod). + | .IOS ldr BASE, L->base + | cmp CRET1, #0 + | beq ->cont_nop + | + | // Call metamethod for binary op. + |->vmeta_binop: + | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 + | sub CARG2, CRET1, BASE + | str PC, [CRET1, #-12] // [cont|PC] + | add PC, CARG2, #FRAME_CONT + | mov BASE, CRET1 + | mov NARGS8:RC, #16 // 2 args for func(o1, o2). + | b ->vm_call_dispatch + | + |->vmeta_len: + | add CARG2, BASE, RC + | str BASE, L->base + | mov CARG1, L + | str PC, SAVE_PC + | bl extern lj_meta_len // (lua_State *L, TValue *o) + | // Returns NULL (retry) or TValue * (metamethod base). + | .IOS ldr BASE, L->base +#if LJ_52 + | cmp CRET1, #0 + | bne ->vmeta_binop // Binop call for compatibility. + | ldr TAB:CARG1, [BASE, RC] + | b ->BC_LEN_Z +#else + | b ->vmeta_binop // Binop call for compatibility. +#endif + | + |//-- Call metamethod ---------------------------------------------------- + | + |->vmeta_call: // Resolve and call __call metamethod. + | // RB = old base, BASE = new base, RC = nargs*8 + | mov CARG1, L + | str RB, L->base // This is the callers base! + | sub CARG2, BASE, #8 + | str PC, SAVE_PC + | add CARG3, BASE, NARGS8:RC + | .IOS mov RA, BASE + | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) + | .IOS mov BASE, RA + | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. + | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. + | ins_call + | + |->vmeta_callt: // Resolve __call for BC_CALLT. + | // BASE = old base, RA = new base, RC = nargs*8 + | mov CARG1, L + | str BASE, L->base + | sub CARG2, RA, #8 + | str PC, SAVE_PC + | add CARG3, RA, NARGS8:RC + | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) + | .IOS ldr BASE, L->base + | ldr LFUNC:CARG3, [RA, FRAME_FUNC] // Guaranteed to be a function here. + | ldr PC, [BASE, FRAME_PC] + | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. + | b ->BC_CALLT2_Z + | + |//-- Argument coercion for 'for' statement ------------------------------ + | + |->vmeta_for: + | mov CARG1, L + | str BASE, L->base + | mov CARG2, RA + | str PC, SAVE_PC + | bl extern lj_meta_for // (lua_State *L, TValue *base) + | .IOS ldr BASE, L->base + |.if JIT + | ldrb OP, [PC, #-4] + |.endif + | ldr INS, [PC, #-4] + |.if JIT + | cmp OP, #BC_JFORI + |.endif + | decode_RA8 RA, INS + | decode_RD RC, INS + |.if JIT + | beq =>BC_JFORI + |.endif + | b =>BC_FORI + | + |//----------------------------------------------------------------------- + |//-- Fast functions ----------------------------------------------------- + |//----------------------------------------------------------------------- + | + |.macro .ffunc, name + |->ff_ .. name: + |.endmacro + | + |.macro .ffunc_1, name + |->ff_ .. name: + | ldrd CARG12, [BASE] + | cmp NARGS8:RC, #8 + | blo ->fff_fallback + |.endmacro + | + |.macro .ffunc_2, name + |->ff_ .. name: + | ldrd CARG12, [BASE] + | ldrd CARG34, [BASE, #8] + | cmp NARGS8:RC, #16 + | blo ->fff_fallback + |.endmacro + | + |.macro .ffunc_n, name + | .ffunc_1 name + | checktp CARG2, LJ_TISNUM + | bhs ->fff_fallback + |.endmacro + | + |.macro .ffunc_nn, name + | .ffunc_2 name + | checktp CARG2, LJ_TISNUM + | cmnlo CARG4, #-LJ_TISNUM + | bhs ->fff_fallback + |.endmacro + | + |.macro .ffunc_d, name + | .ffunc name + | ldr CARG2, [BASE, #4] + | cmp NARGS8:RC, #8 + | vldr d0, [BASE] + | blo ->fff_fallback + | checktp CARG2, LJ_TISNUM + | bhs ->fff_fallback + |.endmacro + | + |.macro .ffunc_dd, name + | .ffunc name + | ldr CARG2, [BASE, #4] + | ldr CARG4, [BASE, #12] + | cmp NARGS8:RC, #16 + | vldr d0, [BASE] + | vldr d1, [BASE, #8] + | blo ->fff_fallback + | checktp CARG2, LJ_TISNUM + | cmnlo CARG4, #-LJ_TISNUM + | bhs ->fff_fallback + |.endmacro + | + |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2. + |.macro ffgccheck + | ldr CARG1, [DISPATCH, #DISPATCH_GL(gc.total)] + | ldr CARG2, [DISPATCH, #DISPATCH_GL(gc.threshold)] + | cmp CARG1, CARG2 + | blge ->fff_gcstep + |.endmacro + | + |//-- Base library: checks ----------------------------------------------- + | + |.ffunc_1 assert + | checktp CARG2, LJ_TTRUE + | bhi ->fff_fallback + | ldr PC, [BASE, FRAME_PC] + | strd CARG12, [BASE, #-8] + | mov RB, BASE + | subs RA, NARGS8:RC, #8 + | add RC, NARGS8:RC, #8 // Compute (nresults+1)*8. + | beq ->fff_res // Done if exactly 1 argument. + |1: + | ldrd CARG12, [RB, #8] + | subs RA, RA, #8 + | strd CARG12, [RB], #8 + | bne <1 + | b ->fff_res + | + |.ffunc type + | ldr CARG2, [BASE, #4] + | cmp NARGS8:RC, #8 + | blo ->fff_fallback + | checktp CARG2, LJ_TISNUM + | mvnlo CARG2, #~LJ_TISNUM + | rsb CARG4, CARG2, #(int)(offsetof(GCfuncC, upvalue)>>3)-1 + | lsl CARG4, CARG4, #3 + | ldrd CARG12, [CFUNC:CARG3, CARG4] + | b ->fff_restv + | + |//-- Base library: getters and setters --------------------------------- + | + |.ffunc_1 getmetatable + | checktp CARG2, LJ_TTAB + | cmnne CARG2, #-LJ_TUDATA + | bne >6 + |1: // Field metatable must be at same offset for GCtab and GCudata! + | ldr TAB:RB, TAB:CARG1->metatable + |2: + | mvn CARG2, #~LJ_TNIL + | ldr STR:RC, [DISPATCH, #DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])] + | cmp TAB:RB, #0 + | beq ->fff_restv + | ldr CARG3, TAB:RB->hmask + | ldr CARG4, STR:RC->sid + | ldr NODE:INS, TAB:RB->node + | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask + | add CARG3, CARG3, CARG3, lsl #1 + | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 + |3: // Rearranged logic, because we expect _not_ to find the key. + | ldrd CARG34, NODE:INS->key // STALL: early NODE:INS. + | ldrd CARG12, NODE:INS->val + | ldr NODE:INS, NODE:INS->next + | checktp CARG4, LJ_TSTR + | cmpeq CARG3, STR:RC + | beq >5 + | cmp NODE:INS, #0 + | bne <3 + |4: + | mov CARG1, RB // Use metatable as default result. + | mvn CARG2, #~LJ_TTAB + | b ->fff_restv + |5: + | checktp CARG2, LJ_TNIL + | bne ->fff_restv + | b <4 + | + |6: + | checktp CARG2, LJ_TISNUM + | mvnhs CARG2, CARG2 + | movlo CARG2, #~LJ_TISNUM + | add CARG4, DISPATCH, CARG2, lsl #2 + | ldr TAB:RB, [CARG4, #DISPATCH_GL(gcroot[GCROOT_BASEMT])] + | b <2 + | + |.ffunc_2 setmetatable + | // Fast path: no mt for table yet and not clearing the mt. + | checktp CARG2, LJ_TTAB + | ldreq TAB:RB, TAB:CARG1->metatable + | checktpeq CARG4, LJ_TTAB + | ldrbeq CARG4, TAB:CARG1->marked + | cmpeq TAB:RB, #0 + | bne ->fff_fallback + | tst CARG4, #LJ_GC_BLACK // isblack(table) + | str TAB:CARG3, TAB:CARG1->metatable + | beq ->fff_restv + | barrierback TAB:CARG1, CARG4, CARG3 + | b ->fff_restv + | + |.ffunc rawget + | ldrd CARG34, [BASE] + | cmp NARGS8:RC, #16 + | blo ->fff_fallback + | mov CARG2, CARG3 + | checktab CARG4, ->fff_fallback + | mov CARG1, L + | add CARG3, BASE, #8 + | .IOS mov RA, BASE + | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) + | // Returns cTValue *. + | .IOS mov BASE, RA + | ldrd CARG12, [CRET1] + | b ->fff_restv + | + |//-- Base library: conversions ------------------------------------------ + | + |.ffunc tonumber + | // Only handles the number case inline (without a base argument). + | ldrd CARG12, [BASE] + | cmp NARGS8:RC, #8 + | bne ->fff_fallback + | checktp CARG2, LJ_TISNUM + | bls ->fff_restv + | b ->fff_fallback + | + |.ffunc_1 tostring + | // Only handles the string or number case inline. + | checktp CARG2, LJ_TSTR + | // A __tostring method in the string base metatable is ignored. + | beq ->fff_restv + | // Handle numbers inline, unless a number base metatable is present. + | ldr CARG4, [DISPATCH, #DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])] + | str BASE, L->base + | checktp CARG2, LJ_TISNUM + | cmpls CARG4, #0 + | str PC, SAVE_PC // Redundant (but a defined value). + | bhi ->fff_fallback + | ffgccheck + | mov CARG1, L + | mov CARG2, BASE + | bl extern lj_strfmt_number // (lua_State *L, cTValue *o) + | // Returns GCstr *. + | ldr BASE, L->base + | mvn CARG2, #~LJ_TSTR + | b ->fff_restv + | + |//-- Base library: iterators ------------------------------------------- + | + |.ffunc_1 next + | mvn CARG4, #~LJ_TNIL + | checktab CARG2, ->fff_fallback + | strd CARG34, [BASE, NARGS8:RC] // Set missing 2nd arg to nil. + | ldr PC, [BASE, FRAME_PC] + | add CARG2, BASE, #8 + | sub CARG3, BASE, #8 + | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) + | // Returns 1=found, 0=end, -1=error. + | .IOS ldr BASE, L->base + | cmp CRET1, #0 + | mov RC, #(2+1)*8 + | bgt ->fff_res // Found key/value. + | bmi ->fff_fallback // Invalid key. + | // End of traversal: return nil. + | mvn CRET2, #~LJ_TNIL + | b ->fff_restv + | + |.ffunc_1 pairs + | checktab CARG2, ->fff_fallback +#if LJ_52 + | ldr TAB:RB, TAB:CARG1->metatable +#endif + | ldrd CFUNC:CARG34, CFUNC:CARG3->upvalue[0] + | ldr PC, [BASE, FRAME_PC] +#if LJ_52 + | cmp TAB:RB, #0 + | bne ->fff_fallback +#endif + | mvn CARG2, #~LJ_TNIL + | mov RC, #(3+1)*8 + | strd CFUNC:CARG34, [BASE, #-8] + | str CARG2, [BASE, #12] + | b ->fff_res + | + |.ffunc_2 ipairs_aux + | checktp CARG2, LJ_TTAB + | checktpeq CARG4, LJ_TISNUM + | bne ->fff_fallback + | ldr RB, TAB:CARG1->asize + | ldr RC, TAB:CARG1->array + | add CARG3, CARG3, #1 + | ldr PC, [BASE, FRAME_PC] + | cmp CARG3, RB + | add RC, RC, CARG3, lsl #3 + | strd CARG34, [BASE, #-8] + | ldrdlo CARG12, [RC] + | mov RC, #(0+1)*8 + | bhs >2 // Not in array part? + |1: + | checktp CARG2, LJ_TNIL + | movne RC, #(2+1)*8 + | strdne CARG12, [BASE] + | b ->fff_res + |2: // Check for empty hash part first. Otherwise call C function. + | ldr RB, TAB:CARG1->hmask + | mov CARG2, CARG3 + | cmp RB, #0 + | beq ->fff_res + | .IOS mov RA, BASE + | bl extern lj_tab_getinth // (GCtab *t, int32_t key) + | // Returns cTValue * or NULL. + | .IOS mov BASE, RA + | cmp CRET1, #0 + | beq ->fff_res + | ldrd CARG12, [CRET1] + | b <1 + | + |.ffunc_1 ipairs + | checktab CARG2, ->fff_fallback +#if LJ_52 + | ldr TAB:RB, TAB:CARG1->metatable +#endif + | ldrd CFUNC:CARG34, CFUNC:CARG3->upvalue[0] + | ldr PC, [BASE, FRAME_PC] +#if LJ_52 + | cmp TAB:RB, #0 + | bne ->fff_fallback +#endif + | mov CARG1, #0 + | mvn CARG2, #~LJ_TISNUM + | mov RC, #(3+1)*8 + | strd CFUNC:CARG34, [BASE, #-8] + | strd CARG12, [BASE, #8] + | b ->fff_res + | + |//-- Base library: catch errors ---------------------------------------- + | + |.ffunc pcall + | ldrb RA, [DISPATCH, #DISPATCH_GL(hookmask)] + | cmp NARGS8:RC, #8 + | blo ->fff_fallback + | tst RA, #HOOK_ACTIVE // Remember active hook before pcall. + | mov RB, BASE + | add BASE, BASE, #8 + | moveq PC, #8+FRAME_PCALL + | movne PC, #8+FRAME_PCALLH + | sub NARGS8:RC, NARGS8:RC, #8 + | b ->vm_call_dispatch + | + |.ffunc_2 xpcall + | ldrb RA, [DISPATCH, #DISPATCH_GL(hookmask)] + | checkfunc CARG4, ->fff_fallback // Traceback must be a function. + | mov RB, BASE + | strd CARG12, [BASE, #8] // Swap function and traceback. + | strd CARG34, [BASE] + | tst RA, #HOOK_ACTIVE // Remember active hook before pcall. + | add BASE, BASE, #16 + | moveq PC, #16+FRAME_PCALL + | movne PC, #16+FRAME_PCALLH + | sub NARGS8:RC, NARGS8:RC, #16 + | b ->vm_call_dispatch + | + |//-- Coroutine library -------------------------------------------------- + | + |.macro coroutine_resume_wrap, resume + |.if resume + |.ffunc_1 coroutine_resume + | checktp CARG2, LJ_TTHREAD + | bne ->fff_fallback + |.else + |.ffunc coroutine_wrap_aux + | ldr L:CARG1, CFUNC:CARG3->upvalue[0].gcr + |.endif + | ldr PC, [BASE, FRAME_PC] + | str BASE, L->base + | ldr CARG2, L:CARG1->top + | ldrb RA, L:CARG1->status + | ldr RB, L:CARG1->base + | add CARG3, CARG2, NARGS8:RC + | add CARG4, CARG2, RA + | str PC, SAVE_PC + | cmp CARG4, RB + | beq ->fff_fallback + | ldr CARG4, L:CARG1->maxstack + | ldr RB, L:CARG1->cframe + | cmp RA, #LUA_YIELD + | cmpls CARG3, CARG4 + | cmpls RB, #0 + | bhi ->fff_fallback + |1: + |.if resume + | sub CARG3, CARG3, #8 // Keep resumed thread in stack for GC. + | add BASE, BASE, #8 + | sub NARGS8:RC, NARGS8:RC, #8 + |.endif + | str CARG3, L:CARG1->top + | str BASE, L->top + |2: // Move args to coroutine. + | ldrd CARG34, [BASE, RB] + | cmp RB, NARGS8:RC + | strdne CARG34, [CARG2, RB] + | add RB, RB, #8 + | bne <2 + | + | mov CARG3, #0 + | mov L:RA, L:CARG1 + | mov CARG4, #0 + | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0) + | // Returns thread status. + |4: + | ldr CARG3, L:RA->base + | mv_vmstate CARG2, INTERP + | ldr CARG4, L:RA->top + | cmp CRET1, #LUA_YIELD + | ldr BASE, L->base + | str L, [DISPATCH, #DISPATCH_GL(cur_L)] + | st_vmstate CARG2 + | bhi >8 + | subs RC, CARG4, CARG3 + | ldr CARG1, L->maxstack + | add CARG2, BASE, RC + | beq >6 // No results? + | cmp CARG2, CARG1 + | mov RB, #0 + | bhi >9 // Need to grow stack? + | + | sub CARG4, RC, #8 + | str CARG3, L:RA->top // Clear coroutine stack. + |5: // Move results from coroutine. + | ldrd CARG12, [CARG3, RB] + | cmp RB, CARG4 + | strd CARG12, [BASE, RB] + | add RB, RB, #8 + | bne <5 + |6: + |.if resume + | mvn CARG3, #~LJ_TTRUE + | add RC, RC, #16 + |7: + | str CARG3, [BASE, #-4] // Prepend true/false to results. + | sub RA, BASE, #8 + |.else + | mov RA, BASE + | add RC, RC, #8 + |.endif + | ands CARG1, PC, #FRAME_TYPE + | str PC, SAVE_PC + | str RC, SAVE_MULTRES + | beq ->BC_RET_Z + | b ->vm_return + | + |8: // Coroutine returned with error (at co->top-1). + |.if resume + | ldrd CARG12, [CARG4, #-8]! + | mvn CARG3, #~LJ_TFALSE + | mov RC, #(2+1)*8 + | str CARG4, L:RA->top // Remove error from coroutine stack. + | strd CARG12, [BASE] // Copy error message. + | b <7 + |.else + | mov CARG1, L + | mov CARG2, L:RA + | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) + | // Never returns. + |.endif + | + |9: // Handle stack expansion on return from yield. + | mov CARG1, L + | lsr CARG2, RC, #3 + | bl extern lj_state_growstack // (lua_State *L, int n) + | mov CRET1, #0 + | b <4 + |.endmacro + | + | coroutine_resume_wrap 1 // coroutine.resume + | coroutine_resume_wrap 0 // coroutine.wrap + | + |.ffunc coroutine_yield + | ldr CARG1, L->cframe + | add CARG2, BASE, NARGS8:RC + | str BASE, L->base + | tst CARG1, #CFRAME_RESUME + | str CARG2, L->top + | mov CRET1, #LUA_YIELD + | mov CARG3, #0 + | beq ->fff_fallback + | str CARG3, L->cframe + | strb CRET1, L->status + | b ->vm_leave_unw + | + |//-- Math library ------------------------------------------------------- + | + |.macro math_round, func + | .ffunc_1 math_ .. func + | checktp CARG2, LJ_TISNUM + | beq ->fff_restv + | bhi ->fff_fallback + | // Round FP value and normalize result. + | lsl CARG3, CARG2, #1 + | adds RB, CARG3, #0x00200000 + | bpl >2 // |x| < 1? + | mvn CARG4, #0x3e0 + | subs RB, CARG4, RB, asr #21 + | lsl CARG4, CARG2, #11 + | lsl CARG3, CARG1, #11 + | orr CARG4, CARG4, #0x80000000 + | rsb INS, RB, #32 + | orr CARG4, CARG4, CARG1, lsr #21 + | bls >3 // |x| >= 2^31? + | orr CARG3, CARG3, CARG4, lsl INS + | lsr CARG1, CARG4, RB + |.if "func" == "floor" + | tst CARG3, CARG2, asr #31 + | addne CARG1, CARG1, #1 + |.else + | bics CARG3, CARG3, CARG2, asr #31 + | addsne CARG1, CARG1, #1 + | ldrdvs CARG12, >9 + | bvs ->fff_restv + |.endif + | cmp CARG2, #0 + | rsblt CARG1, CARG1, #0 + |1: + | mvn CARG2, #~LJ_TISNUM + | b ->fff_restv + | + |2: // |x| < 1 + | bcs ->fff_restv // |x| is not finite. + | orr CARG3, CARG3, CARG1 // ztest = abs(hi) | lo + |.if "func" == "floor" + | tst CARG3, CARG2, asr #31 // return (ztest & sign) == 0 ? 0 : -1 + | moveq CARG1, #0 + | mvnne CARG1, #0 + |.else + | bics CARG3, CARG3, CARG2, asr #31 // return (ztest & ~sign) == 0 ? 0 : 1 + | moveq CARG1, #0 + | movne CARG1, #1 + |.endif + | mvn CARG2, #~LJ_TISNUM + | b ->fff_restv + | + |3: // |x| >= 2^31. Check for x == -(2^31). + | cmpeq CARG4, #0x80000000 + |.if "func" == "floor" + | cmpeq CARG3, #0 + |.endif + | bne >4 + | cmp CARG2, #0 + | movmi CARG1, #0x80000000 + | bmi <1 + |4: + | bl ->vm_..func.._sf + | b ->fff_restv + |.endmacro + | + | math_round floor + | math_round ceil + | + |.align 8 + |9: + | .long 0x00000000, 0x41e00000 // 2^31. + | + |.ffunc_1 math_abs + | checktp CARG2, LJ_TISNUM + | bhi ->fff_fallback + | bicne CARG2, CARG2, #0x80000000 + | bne ->fff_restv + | cmp CARG1, #0 + | rsbslt CARG1, CARG1, #0 + | ldrdvs CARG12, <9 + | // Fallthrough. + | + |->fff_restv: + | // CARG12 = TValue result. + | ldr PC, [BASE, FRAME_PC] + | strd CARG12, [BASE, #-8] + |->fff_res1: + | // PC = return. + | mov RC, #(1+1)*8 + |->fff_res: + | // RC = (nresults+1)*8, PC = return. + | ands CARG1, PC, #FRAME_TYPE + | ldreq INS, [PC, #-4] + | str RC, SAVE_MULTRES + | sub RA, BASE, #8 + | bne ->vm_return + | decode_RB8 RB, INS + |5: + | cmp RB, RC // More results expected? + | bhi >6 + | decode_RA8 CARG1, INS + | ins_next1 + | ins_next2 + | // Adjust BASE. KBASE is assumed to be set for the calling frame. + | sub BASE, RA, CARG1 + | ins_next3 + | + |6: // Fill up results with nil. + | add CARG2, RA, RC + | mvn CARG1, #~LJ_TNIL + | add RC, RC, #8 + | str CARG1, [CARG2, #-4] + | b <5 + | + |.macro math_extern, func + |.if HFABI + | .ffunc_d math_ .. func + |.else + | .ffunc_n math_ .. func + |.endif + | .IOS mov RA, BASE + | bl extern func + | .IOS mov BASE, RA + |.if HFABI + | b ->fff_resd + |.else + | b ->fff_restv + |.endif + |.endmacro + | + |.macro math_extern2, func + |.if HFABI + | .ffunc_dd math_ .. func + |.else + | .ffunc_nn math_ .. func + |.endif + | .IOS mov RA, BASE + | bl extern func + | .IOS mov BASE, RA + |.if HFABI + | b ->fff_resd + |.else + | b ->fff_restv + |.endif + |.endmacro + | + |.if FPU + | .ffunc_d math_sqrt + | vsqrt.f64 d0, d0 + |->fff_resd: + | ldr PC, [BASE, FRAME_PC] + | vstr d0, [BASE, #-8] + | b ->fff_res1 + |.else + | math_extern sqrt + |.endif + | + |.ffunc math_log + |.if HFABI + | ldr CARG2, [BASE, #4] + | cmp NARGS8:RC, #8 // Need exactly 1 argument. + | vldr d0, [BASE] + | bne ->fff_fallback + |.else + | ldrd CARG12, [BASE] + | cmp NARGS8:RC, #8 // Need exactly 1 argument. + | bne ->fff_fallback + |.endif + | checktp CARG2, LJ_TISNUM + | bhs ->fff_fallback + | .IOS mov RA, BASE + | bl extern log + | .IOS mov BASE, RA + |.if HFABI + | b ->fff_resd + |.else + | b ->fff_restv + |.endif + | + | math_extern log10 + | math_extern exp + | math_extern sin + | math_extern cos + | math_extern tan + | math_extern asin + | math_extern acos + | math_extern atan + | math_extern sinh + | math_extern cosh + | math_extern tanh + | math_extern2 pow + | math_extern2 atan2 + | math_extern2 fmod + | + |.if HFABI + | .ffunc math_ldexp + | ldr CARG4, [BASE, #4] + | ldrd CARG12, [BASE, #8] + | cmp NARGS8:RC, #16 + | blo ->fff_fallback + | vldr d0, [BASE] + | checktp CARG4, LJ_TISNUM + | bhs ->fff_fallback + | checktp CARG2, LJ_TISNUM + | bne ->fff_fallback + | .IOS mov RA, BASE + | bl extern ldexp // (double x, int exp) + | .IOS mov BASE, RA + | b ->fff_resd + |.else + |.ffunc_2 math_ldexp + | checktp CARG2, LJ_TISNUM + | bhs ->fff_fallback + | checktp CARG4, LJ_TISNUM + | bne ->fff_fallback + | .IOS mov RA, BASE + | bl extern ldexp // (double x, int exp) + | .IOS mov BASE, RA + | b ->fff_restv + |.endif + | + |.if HFABI + |.ffunc_d math_frexp + | mov CARG1, sp + | .IOS mov RA, BASE + | bl extern frexp + | .IOS mov BASE, RA + | ldr CARG3, [sp] + | mvn CARG4, #~LJ_TISNUM + | ldr PC, [BASE, FRAME_PC] + | vstr d0, [BASE, #-8] + | mov RC, #(2+1)*8 + | strd CARG34, [BASE] + | b ->fff_res + |.else + |.ffunc_n math_frexp + | mov CARG3, sp + | .IOS mov RA, BASE + | bl extern frexp + | .IOS mov BASE, RA + | ldr CARG3, [sp] + | mvn CARG4, #~LJ_TISNUM + | ldr PC, [BASE, FRAME_PC] + | strd CARG12, [BASE, #-8] + | mov RC, #(2+1)*8 + | strd CARG34, [BASE] + | b ->fff_res + |.endif + | + |.if HFABI + |.ffunc_d math_modf + | sub CARG1, BASE, #8 + | ldr PC, [BASE, FRAME_PC] + | .IOS mov RA, BASE + | bl extern modf + | .IOS mov BASE, RA + | mov RC, #(2+1)*8 + | vstr d0, [BASE] + | b ->fff_res + |.else + |.ffunc_n math_modf + | sub CARG3, BASE, #8 + | ldr PC, [BASE, FRAME_PC] + | .IOS mov RA, BASE + | bl extern modf + | .IOS mov BASE, RA + | mov RC, #(2+1)*8 + | strd CARG12, [BASE] + | b ->fff_res + |.endif + | + |.macro math_minmax, name, cond, fcond + |.if FPU + | .ffunc_1 name + | add RB, BASE, RC + | checktp CARG2, LJ_TISNUM + | add RA, BASE, #8 + | bne >4 + |1: // Handle integers. + | ldrd CARG34, [RA] + | cmp RA, RB + | bhs ->fff_restv + | checktp CARG4, LJ_TISNUM + | bne >3 + | cmp CARG1, CARG3 + | add RA, RA, #8 + | mov..cond CARG1, CARG3 + | b <1 + |3: // Convert intermediate result to number and continue below. + | vmov s4, CARG1 + | bhi ->fff_fallback + | vldr d1, [RA] + | vcvt.f64.s32 d0, s4 + | b >6 + | + |4: + | vldr d0, [BASE] + | bhi ->fff_fallback + |5: // Handle numbers. + | ldrd CARG34, [RA] + | vldr d1, [RA] + | cmp RA, RB + | bhs ->fff_resd + | checktp CARG4, LJ_TISNUM + | bhs >7 + |6: + | vcmp.f64 d0, d1 + | vmrs + | add RA, RA, #8 + | vmov..fcond.f64 d0, d1 + | b <5 + |7: // Convert integer to number and continue above. + | vmov s4, CARG3 + | bhi ->fff_fallback + | vcvt.f64.s32 d1, s4 + | b <6 + | + |.else + | + | .ffunc_1 name + | checktp CARG2, LJ_TISNUM + | mov RA, #8 + | bne >4 + |1: // Handle integers. + | ldrd CARG34, [BASE, RA] + | cmp RA, RC + | bhs ->fff_restv + | checktp CARG4, LJ_TISNUM + | bne >3 + | cmp CARG1, CARG3 + | add RA, RA, #8 + | mov..cond CARG1, CARG3 + | b <1 + |3: // Convert intermediate result to number and continue below. + | bhi ->fff_fallback + | bl extern __aeabi_i2d + | ldrd CARG34, [BASE, RA] + | b >6 + | + |4: + | bhi ->fff_fallback + |5: // Handle numbers. + | ldrd CARG34, [BASE, RA] + | cmp RA, RC + | bhs ->fff_restv + | checktp CARG4, LJ_TISNUM + | bhs >7 + |6: + | bl extern __aeabi_cdcmple + | add RA, RA, #8 + | mov..fcond CARG1, CARG3 + | mov..fcond CARG2, CARG4 + | b <5 + |7: // Convert integer to number and continue above. + | bhi ->fff_fallback + | strd CARG12, TMPD + | mov CARG1, CARG3 + | bl extern __aeabi_i2d + | ldrd CARG34, TMPD + | b <6 + |.endif + |.endmacro + | + | math_minmax math_min, gt, pl + | math_minmax math_max, lt, le + | + |//-- String library ----------------------------------------------------- + | + |.ffunc string_byte // Only handle the 1-arg case here. + | ldrd CARG12, [BASE] + | ldr PC, [BASE, FRAME_PC] + | cmp NARGS8:RC, #8 + | checktpeq CARG2, LJ_TSTR // Need exactly 1 argument. + | bne ->fff_fallback + | ldr CARG3, STR:CARG1->len + | ldrb CARG1, STR:CARG1[1] // Access is always ok (NUL at end). + | mvn CARG2, #~LJ_TISNUM + | cmp CARG3, #0 + | moveq RC, #(0+1)*8 + | movne RC, #(1+1)*8 + | strd CARG12, [BASE, #-8] + | b ->fff_res + | + |.ffunc string_char // Only handle the 1-arg case here. + | ffgccheck + | ldrd CARG12, [BASE] + | ldr PC, [BASE, FRAME_PC] + | cmp NARGS8:RC, #8 // Need exactly 1 argument. + | checktpeq CARG2, LJ_TISNUM + | bicseq CARG4, CARG1, #255 + | mov CARG3, #1 + | bne ->fff_fallback + | str CARG1, TMPD + | mov CARG2, TMPDp // Points to stack. Little-endian. + |->fff_newstr: + | // CARG2 = str, CARG3 = len. + | str BASE, L->base + | mov CARG1, L + | str PC, SAVE_PC + | bl extern lj_str_new // (lua_State *L, char *str, size_t l) + |->fff_resstr: + | // Returns GCstr *. + | ldr BASE, L->base + | mvn CARG2, #~LJ_TSTR + | b ->fff_restv + | + |.ffunc string_sub + | ffgccheck + | ldrd CARG12, [BASE] + | ldrd CARG34, [BASE, #16] + | cmp NARGS8:RC, #16 + | mvn RB, #0 + | beq >1 + | blo ->fff_fallback + | checktp CARG4, LJ_TISNUM + | mov RB, CARG3 + | bne ->fff_fallback + |1: + | ldrd CARG34, [BASE, #8] + | checktp CARG2, LJ_TSTR + | ldreq CARG2, STR:CARG1->len + | checktpeq CARG4, LJ_TISNUM + | bne ->fff_fallback + | // CARG1 = str, CARG2 = str->len, CARG3 = start, RB = end + | add CARG4, CARG2, #1 + | cmp CARG3, #0 // if (start < 0) start += len+1 + | addlt CARG3, CARG3, CARG4 + | cmp CARG3, #1 // if (start < 1) start = 1 + | movlt CARG3, #1 + | cmp RB, #0 // if (end < 0) end += len+1 + | addlt RB, RB, CARG4 + | bic RB, RB, RB, asr #31 // if (end < 0) end = 0 + | cmp RB, CARG2 // if (end > len) end = len + | add CARG1, STR:CARG1, #sizeof(GCstr)-1 + | movgt RB, CARG2 + | add CARG2, CARG1, CARG3 + | subs CARG3, RB, CARG3 // len = end - start + | add CARG3, CARG3, #1 // len += 1 + | bge ->fff_newstr + |->fff_emptystr: + | sub STR:CARG1, DISPATCH, #-DISPATCH_GL(strempty) + | mvn CARG2, #~LJ_TSTR + | b ->fff_restv + | + |.macro ffstring_op, name + | .ffunc string_ .. name + | ffgccheck + | ldr CARG3, [BASE, #4] + | cmp NARGS8:RC, #8 + | ldr STR:CARG2, [BASE] + | blo ->fff_fallback + | sub SBUF:CARG1, DISPATCH, #-DISPATCH_GL(tmpbuf) + | checkstr CARG3, ->fff_fallback + | ldr CARG4, SBUF:CARG1->b + | str BASE, L->base + | str PC, SAVE_PC + | str L, SBUF:CARG1->L + | str CARG4, SBUF:CARG1->w + | bl extern lj_buf_putstr_ .. name + | bl extern lj_buf_tostr + | b ->fff_resstr + |.endmacro + | + |ffstring_op reverse + |ffstring_op lower + |ffstring_op upper + | + |//-- Bit library -------------------------------------------------------- + | + |// FP number to bit conversion for soft-float. Clobbers r0-r3. + |->vm_tobit_fb: + | bhi ->fff_fallback + |->vm_tobit: + | lsl RB, CARG2, #1 + | adds RB, RB, #0x00200000 + | movpl CARG1, #0 // |x| < 1? + | bxpl lr + | mvn CARG4, #0x3e0 + | subs RB, CARG4, RB, asr #21 + | bmi >1 // |x| >= 2^32? + | lsl CARG4, CARG2, #11 + | orr CARG4, CARG4, #0x80000000 + | orr CARG4, CARG4, CARG1, lsr #21 + | cmp CARG2, #0 + | lsr CARG1, CARG4, RB + | rsblt CARG1, CARG1, #0 + | bx lr + |1: + | add RB, RB, #21 + | lsr CARG4, CARG1, RB + | rsb RB, RB, #20 + | lsl CARG1, CARG2, #12 + | cmp CARG2, #0 + | orr CARG1, CARG4, CARG1, lsl RB + | rsblt CARG1, CARG1, #0 + | bx lr + | + |.macro .ffunc_bit, name + | .ffunc_1 bit_..name + | checktp CARG2, LJ_TISNUM + | blne ->vm_tobit_fb + |.endmacro + | + |.ffunc_bit tobit + | mvn CARG2, #~LJ_TISNUM + | b ->fff_restv + | + |.macro .ffunc_bit_op, name, ins + | .ffunc_bit name + | mov CARG3, CARG1 + | mov RA, #8 + |1: + | ldrd CARG12, [BASE, RA] + | cmp RA, NARGS8:RC + | add RA, RA, #8 + | bge >2 + | checktp CARG2, LJ_TISNUM + | blne ->vm_tobit_fb + | ins CARG3, CARG3, CARG1 + | b <1 + |.endmacro + | + |.ffunc_bit_op band, and + |.ffunc_bit_op bor, orr + |.ffunc_bit_op bxor, eor + | + |2: + | mvn CARG4, #~LJ_TISNUM + | ldr PC, [BASE, FRAME_PC] + | strd CARG34, [BASE, #-8] + | b ->fff_res1 + | + |.ffunc_bit bswap + | eor CARG3, CARG1, CARG1, ror #16 + | bic CARG3, CARG3, #0x00ff0000 + | ror CARG1, CARG1, #8 + | mvn CARG2, #~LJ_TISNUM + | eor CARG1, CARG1, CARG3, lsr #8 + | b ->fff_restv + | + |.ffunc_bit bnot + | mvn CARG1, CARG1 + | mvn CARG2, #~LJ_TISNUM + | b ->fff_restv + | + |.macro .ffunc_bit_sh, name, ins, shmod + | .ffunc bit_..name + | ldrd CARG12, [BASE, #8] + | cmp NARGS8:RC, #16 + | blo ->fff_fallback + | checktp CARG2, LJ_TISNUM + | blne ->vm_tobit_fb + |.if shmod == 0 + | and RA, CARG1, #31 + |.else + | rsb RA, CARG1, #0 + |.endif + | ldrd CARG12, [BASE] + | checktp CARG2, LJ_TISNUM + | blne ->vm_tobit_fb + | ins CARG1, CARG1, RA + | mvn CARG2, #~LJ_TISNUM + | b ->fff_restv + |.endmacro + | + |.ffunc_bit_sh lshift, lsl, 0 + |.ffunc_bit_sh rshift, lsr, 0 + |.ffunc_bit_sh arshift, asr, 0 + |.ffunc_bit_sh rol, ror, 1 + |.ffunc_bit_sh ror, ror, 0 + | + |//----------------------------------------------------------------------- + | + |->fff_fallback: // Call fast function fallback handler. + | // BASE = new base, RC = nargs*8 + | ldr CARG3, [BASE, FRAME_FUNC] + | ldr CARG2, L->maxstack + | add CARG1, BASE, NARGS8:RC + | ldr PC, [BASE, FRAME_PC] // Fallback may overwrite PC. + | str CARG1, L->top + | ldr CARG3, CFUNC:CARG3->f + | str BASE, L->base + | add CARG1, CARG1, #8*LUA_MINSTACK + | str PC, SAVE_PC // Redundant (but a defined value). + | cmp CARG1, CARG2 + | mov CARG1, L + | bhi >5 // Need to grow stack. + | blx CARG3 // (lua_State *L) + | // Either throws an error, or recovers and returns -1, 0 or nresults+1. + | ldr BASE, L->base + | cmp CRET1, #0 + | lsl RC, CRET1, #3 + | sub RA, BASE, #8 + | bgt ->fff_res // Returned nresults+1? + |1: // Returned 0 or -1: retry fast path. + | ldr CARG1, L->top + | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] + | sub NARGS8:RC, CARG1, BASE + | bne ->vm_call_tail // Returned -1? + | ins_callt // Returned 0: retry fast path. + | + |// Reconstruct previous base for vmeta_call during tailcall. + |->vm_call_tail: + | ands CARG1, PC, #FRAME_TYPE + | bic CARG2, PC, #FRAME_TYPEP + | ldreq INS, [PC, #-4] + | andeq CARG2, MASKR8, INS, lsr #5 // Conditional decode_RA8. + | addeq CARG2, CARG2, #8 + | sub RB, BASE, CARG2 + | b ->vm_call_dispatch // Resolve again for tailcall. + | + |5: // Grow stack for fallback handler. + | mov CARG2, #LUA_MINSTACK + | bl extern lj_state_growstack // (lua_State *L, int n) + | ldr BASE, L->base + | cmp CARG1, CARG1 // Set zero-flag to force retry. + | b <1 + | + |->fff_gcstep: // Call GC step function. + | // BASE = new base, RC = nargs*8 + | mov RA, lr + | str BASE, L->base + | add CARG2, BASE, NARGS8:RC + | str PC, SAVE_PC // Redundant (but a defined value). + | str CARG2, L->top + | mov CARG1, L + | bl extern lj_gc_step // (lua_State *L) + | ldr BASE, L->base + | mov lr, RA // Help return address predictor. + | ldr CFUNC:CARG3, [BASE, FRAME_FUNC] + | bx lr + | + |//----------------------------------------------------------------------- + |//-- Special dispatch targets ------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_record: // Dispatch target for recording phase. + |.if JIT + | ldrb CARG1, [DISPATCH, #DISPATCH_GL(hookmask)] + | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent. + | bne >5 + | // Decrement the hookcount for consistency, but always do the call. + | ldr CARG2, [DISPATCH, #DISPATCH_GL(hookcount)] + | tst CARG1, #HOOK_ACTIVE + | bne >1 + | sub CARG2, CARG2, #1 + | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT + | strne CARG2, [DISPATCH, #DISPATCH_GL(hookcount)] + | b >1 + |.endif + | + |->vm_rethook: // Dispatch target for return hooks. + | ldrb CARG1, [DISPATCH, #DISPATCH_GL(hookmask)] + | tst CARG1, #HOOK_ACTIVE // Hook already active? + | beq >1 + |5: // Re-dispatch to static ins. + | decode_OP OP, INS + | add OP, DISPATCH, OP, lsl #2 + | ldr pc, [OP, #GG_DISP2STATIC] + | + |->vm_inshook: // Dispatch target for instr/line hooks. + | ldrb CARG1, [DISPATCH, #DISPATCH_GL(hookmask)] + | ldr CARG2, [DISPATCH, #DISPATCH_GL(hookcount)] + | tst CARG1, #HOOK_ACTIVE // Hook already active? + | bne <5 + | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT + | beq <5 + | subs CARG2, CARG2, #1 + | str CARG2, [DISPATCH, #DISPATCH_GL(hookcount)] + | beq >1 + | tst CARG1, #LUA_MASKLINE + | beq <5 + |1: + | mov CARG1, L + | str BASE, L->base + | mov CARG2, PC + | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. + | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) + |3: + | ldr BASE, L->base + |4: // Re-dispatch to static ins. + | ldrb OP, [PC, #-4] + | ldr INS, [PC, #-4] + | add OP, DISPATCH, OP, lsl #2 + | ldr OP, [OP, #GG_DISP2STATIC] + | decode_RA8 RA, INS + | decode_RD RC, INS + | bx OP + | + |->cont_hook: // Continue from hook yield. + | ldr CARG1, [CARG4, #-24] + | add PC, PC, #4 + | str CARG1, SAVE_MULTRES // Restore MULTRES for *M ins. + | b <4 + | + |->vm_hotloop: // Hot loop counter underflow. + |.if JIT + | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Same as curr_topL(L). + | sub CARG1, DISPATCH, #-GG_DISP2J + | str PC, SAVE_PC + | ldr CARG3, LFUNC:CARG3->field_pc + | mov CARG2, PC + | str L, [DISPATCH, #DISPATCH_J(L)] + | ldrb CARG3, [CARG3, #PC2PROTO(framesize)] + | str BASE, L->base + | add CARG3, BASE, CARG3, lsl #3 + | str CARG3, L->top + | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc) + | b <3 + |.endif + | + |->vm_callhook: // Dispatch target for call hooks. + | mov CARG2, PC + |.if JIT + | b >1 + |.endif + | + |->vm_hotcall: // Hot call counter underflow. + |.if JIT + | orr CARG2, PC, #1 + |1: + |.endif + | add CARG4, BASE, RC + | str PC, SAVE_PC + | mov CARG1, L + | str BASE, L->base + | sub RA, RA, BASE + | str CARG4, L->top + | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc) + | // Returns ASMFunction. + | ldr BASE, L->base + | ldr CARG4, L->top + | mov CARG2, #0 + | add RA, BASE, RA + | sub NARGS8:RC, CARG4, BASE + | str CARG2, SAVE_PC // Invalidate for subsequent line hook. + | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] + | ldr INS, [PC, #-4] + | bx CRET1 + | + |->cont_stitch: // Trace stitching. + |.if JIT + | // RA = resultptr, CARG4 = meta base + | ldr RB, SAVE_MULTRES + | ldr INS, [PC, #-4] + | ldr TRACE:CARG3, [CARG4, #-24] // Save previous trace. + | subs RB, RB, #8 + | decode_RA8 RC, INS // Call base. + | beq >2 + |1: // Move results down. + | ldrd CARG12, [RA] + | add RA, RA, #8 + | subs RB, RB, #8 + | strd CARG12, [BASE, RC] + | add RC, RC, #8 + | bne <1 + |2: + | decode_RA8 RA, INS + | decode_RB8 RB, INS + | add RA, RA, RB + |3: + | cmp RA, RC + | mvn CARG2, #~LJ_TNIL + | bhi >9 // More results wanted? + | + | ldrh RA, TRACE:CARG3->traceno + | ldrh RC, TRACE:CARG3->link + | cmp RC, RA + | beq ->cont_nop // Blacklisted. + | cmp RC, #0 + | bne =>BC_JLOOP // Jump to stitched trace. + | + | // Stitch a new trace to the previous trace. + | str RA, [DISPATCH, #DISPATCH_J(exitno)] + | str L, [DISPATCH, #DISPATCH_J(L)] + | str BASE, L->base + | sub CARG1, DISPATCH, #-GG_DISP2J + | mov CARG2, PC + | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) + | ldr BASE, L->base + | b ->cont_nop + | + |9: // Fill up results with nil. + | strd CARG12, [BASE, RC] + | add RC, RC, #8 + | b <3 + |.endif + | + |->vm_profhook: // Dispatch target for profiler hook. +#if LJ_HASPROFILE + | mov CARG1, L + | str BASE, L->base + | mov CARG2, PC + | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) + | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. + | ldr BASE, L->base + | sub PC, PC, #4 + | b ->cont_nop +#endif + | + |//----------------------------------------------------------------------- + |//-- Trace exit handler ------------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_exit_handler: + |.if JIT + | sub sp, sp, #12 + | push {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12} + | ldr CARG1, [sp, #64] // Load original value of lr. + | ldr DISPATCH, [lr] // Load DISPATCH. + | add CARG3, sp, #64 // Recompute original value of sp. + | mv_vmstate CARG4, EXIT + | str CARG3, [sp, #52] // Store sp in RID_SP + | st_vmstate CARG4 + | ldr CARG2, [CARG1, #-4]! // Get exit instruction. + | str CARG1, [sp, #56] // Store exit pc in RID_LR and RID_PC. + | str CARG1, [sp, #60] + |.if FPU + | vpush {d0-d15} + |.endif + | lsl CARG2, CARG2, #8 + | add CARG1, CARG1, CARG2, asr #6 + | ldr CARG2, [lr, #4] // Load exit stub group offset. + | sub CARG1, CARG1, lr + | ldr L, [DISPATCH, #DISPATCH_GL(cur_L)] + | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number. + | ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)] + | str CARG1, [DISPATCH, #DISPATCH_J(exitno)] + | mov CARG4, #0 + | str BASE, L->base + | str L, [DISPATCH, #DISPATCH_J(L)] + | str CARG4, [DISPATCH, #DISPATCH_GL(jit_base)] + | sub CARG1, DISPATCH, #-GG_DISP2J + | mov CARG2, sp + | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) + | // Returns MULTRES (unscaled) or negated error code. + | ldr CARG2, L->cframe + | ldr BASE, L->base + | bic CARG2, CARG2, #~CFRAME_RAWMASK // Use two steps: bic sp is deprecated. + | mov sp, CARG2 + | ldr PC, SAVE_PC // Get SAVE_PC. + | str L, SAVE_L // Set SAVE_L (on-trace resume/yield). + | b >1 + |.endif + |->vm_exit_interp: + | // CARG1 = MULTRES or negated error code, BASE, PC and DISPATCH set. + |.if JIT + | ldr L, SAVE_L + |1: + | cmp CARG1, #0 + | blt >9 // Check for error from exit. + | lsl RC, CARG1, #3 + | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] + | str RC, SAVE_MULTRES + | mov CARG3, #0 + | str BASE, L->base + | ldr CARG2, LFUNC:CARG2->field_pc + | str CARG3, [DISPATCH, #DISPATCH_GL(jit_base)] + | mv_vmstate CARG4, INTERP + | ldr KBASE, [CARG2, #PC2PROTO(k)] + | // Modified copy of ins_next which handles function header dispatch, too. + | ldrb OP, [PC] + | mov MASKR8, #255 + | ldr INS, [PC], #4 + | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. + | st_vmstate CARG4 + | cmp OP, #BC_FUNCC+2 // Fast function? + | bhs >4 + |2: + | cmp OP, #BC_FUNCF // Function header? + | ldr OP, [DISPATCH, OP, lsl #2] + | decode_RA8 RA, INS + | lsrlo RC, INS, #16 // No: Decode operands A*8 and D. + | subhs RC, RC, #8 + | addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8 + | ldrhs CARG3, [BASE, FRAME_FUNC] + | bx OP + | + |4: // Check frame below fast function. + | ldr CARG1, [BASE, FRAME_PC] + | ands CARG2, CARG1, #FRAME_TYPE + | bne <2 // Trace stitching continuation? + | // Otherwise set KBASE for Lua function below fast function. + | ldr CARG3, [CARG1, #-4] + | decode_RA8 CARG1, CARG3 + | sub CARG2, BASE, CARG1 + | ldr LFUNC:CARG3, [CARG2, #-16] + | ldr CARG3, LFUNC:CARG3->field_pc + | ldr KBASE, [CARG3, #PC2PROTO(k)] + | b <2 + | + |9: // Rethrow error from the right C frame. + | rsb CARG2, CARG1, #0 + | mov CARG1, L + | bl extern lj_err_trace // (lua_State *L, int errcode) + |.endif + | + |//----------------------------------------------------------------------- + |//-- Math helper functions ---------------------------------------------- + |//----------------------------------------------------------------------- + | + |// FP value rounding. Called from JIT code. + |// + |// double lj_vm_floor/ceil/trunc(double x); + |.macro vm_round, func, hf + |.if hf == 1 + | vmov CARG1, CARG2, d0 + |.endif + | lsl CARG3, CARG2, #1 + | adds RB, CARG3, #0x00200000 + | bpl >2 // |x| < 1? + | mvn CARG4, #0x3cc + | subs RB, CARG4, RB, asr #21 // 2^0: RB = 51, 2^51: RB = 0. + | bxlo lr // |x| >= 2^52: done. + | mvn CARG4, #1 + | bic CARG3, CARG1, CARG4, lsl RB // ztest = lo & ~lomask + | and CARG1, CARG1, CARG4, lsl RB // lo &= lomask + | subs RB, RB, #32 + | bicpl CARG4, CARG2, CARG4, lsl RB // |x| <= 2^20: ztest |= hi & ~himask + | orrpl CARG3, CARG3, CARG4 + | mvnpl CARG4, #1 + | andpl CARG2, CARG2, CARG4, lsl RB // |x| <= 2^20: hi &= himask + |.if "func" == "floor" + | tst CARG3, CARG2, asr #31 // iszero = ((ztest & signmask) == 0) + |.else + | bics CARG3, CARG3, CARG2, asr #31 // iszero = ((ztest & ~signmask) == 0) + |.endif + |.if hf == 1 + | vmoveq d0, CARG1, CARG2 + |.endif + | bxeq lr // iszero: done. + | mvn CARG4, #1 + | cmp RB, #0 + | lslpl CARG3, CARG4, RB + | mvnmi CARG3, #0 + | add RB, RB, #32 + | subs CARG1, CARG1, CARG4, lsl RB // lo = lo-lomask + | sbc CARG2, CARG2, CARG3 // hi = hi-himask+carry + |.if hf == 1 + | vmov d0, CARG1, CARG2 + |.endif + | bx lr + | + |2: // |x| < 1: + | bxcs lr // |x| is not finite. + | orr CARG3, CARG3, CARG1 // ztest = (2*hi) | lo + |.if "func" == "floor" + | tst CARG3, CARG2, asr #31 // iszero = ((ztest & signmask) == 0) + |.else + | bics CARG3, CARG3, CARG2, asr #31 // iszero = ((ztest & ~signmask) == 0) + |.endif + | mov CARG1, #0 // lo = 0 + | and CARG2, CARG2, #0x80000000 + | ldrne CARG4, <9 // hi = sign(x) | (iszero ? 0.0 : 1.0) + | orrne CARG2, CARG2, CARG4 + |.if hf == 1 + | vmov d0, CARG1, CARG2 + |.endif + | bx lr + |.endmacro + | + |9: + | .long 0x3ff00000 // hiword(+1.0) + | + |->vm_floor: + |.if HFABI + | vm_round floor, 1 + |.endif + |->vm_floor_sf: + | vm_round floor, 0 + | + |->vm_ceil: + |.if HFABI + | vm_round ceil, 1 + |.endif + |->vm_ceil_sf: + | vm_round ceil, 0 + | + |.macro vm_trunc, hf + |.if JIT + |.if hf == 1 + | vmov CARG1, CARG2, d0 + |.endif + | lsl CARG3, CARG2, #1 + | adds RB, CARG3, #0x00200000 + | andpl CARG2, CARG2, #0x80000000 // |x| < 1? hi = sign(x), lo = 0. + | movpl CARG1, #0 + |.if hf == 1 + | vmovpl d0, CARG1, CARG2 + |.endif + | bxpl lr + | mvn CARG4, #0x3cc + | subs RB, CARG4, RB, asr #21 // 2^0: RB = 51, 2^51: RB = 0. + | bxlo lr // |x| >= 2^52: already done. + | mvn CARG4, #1 + | and CARG1, CARG1, CARG4, lsl RB // lo &= lomask + | subs RB, RB, #32 + | andpl CARG2, CARG2, CARG4, lsl RB // |x| <= 2^20: hi &= himask + |.if hf == 1 + | vmov d0, CARG1, CARG2 + |.endif + | bx lr + |.endif + |.endmacro + | + |->vm_trunc: + |.if HFABI + | vm_trunc 1 + |.endif + |->vm_trunc_sf: + | vm_trunc 0 + | + | // double lj_vm_mod(double dividend, double divisor); + |->vm_mod: + |.if FPU + | // Special calling convention. Also, RC (r11) is not preserved. + | vdiv.f64 d0, d6, d7 + | mov RC, lr + | vmov CARG1, CARG2, d0 + | bl ->vm_floor_sf + | vmov d0, CARG1, CARG2 + | vmul.f64 d0, d0, d7 + | mov lr, RC + | vsub.f64 d6, d6, d0 + | bx lr + |.else + | push {r0, r1, r2, r3, r4, lr} + | bl extern __aeabi_ddiv + | bl ->vm_floor_sf + | ldrd CARG34, [sp, #8] + | bl extern __aeabi_dmul + | ldrd CARG34, [sp] + | eor CARG2, CARG2, #0x80000000 + | bl extern __aeabi_dadd + | add sp, sp, #20 + | pop {pc} + |.endif + | + | // int lj_vm_modi(int dividend, int divisor); + |->vm_modi: + | ands RB, CARG1, #0x80000000 + | rsbmi CARG1, CARG1, #0 // a = |dividend| + | eor RB, RB, CARG2, asr #1 // Keep signdiff and sign(divisor). + | cmp CARG2, #0 + | rsbmi CARG2, CARG2, #0 // b = |divisor| + | subs CARG4, CARG2, #1 + | cmpne CARG1, CARG2 + | moveq CARG1, #0 // if (b == 1 || a == b) a = 0 + | tsthi CARG2, CARG4 + | andeq CARG1, CARG1, CARG4 // else if ((b & (b-1)) == 0) a &= b-1 + | bls >1 + | // Use repeated subtraction to get the remainder. + | clz CARG3, CARG1 + | clz CARG4, CARG2 + | sub CARG4, CARG4, CARG3 + | rsbs CARG3, CARG4, #31 // entry = (31-(clz(b)-clz(a)))*8 + | addne pc, pc, CARG3, lsl #3 // Duff's device. + | nop + { + int i; + for (i = 31; i >= 0; i--) { + | cmp CARG1, CARG2, lsl #i + | subhs CARG1, CARG1, CARG2, lsl #i + } + } + |1: + | cmp CARG1, #0 + | cmpne RB, #0 + | submi CARG1, CARG1, CARG2 // if (y != 0 && signdiff) y = y - b + | eors CARG2, CARG1, RB, lsl #1 + | rsbmi CARG1, CARG1, #0 // if (sign(divisor) != sign(y)) y = -y + | bx lr + | + |//----------------------------------------------------------------------- + |//-- Miscellaneous functions -------------------------------------------- + |//----------------------------------------------------------------------- + | + |.define NEXT_TAB, TAB:CARG1 + |.define NEXT_RES, CARG1 + |.define NEXT_IDX, CARG2 + |.define NEXT_TMP0, CARG3 + |.define NEXT_TMP1, CARG4 + |.define NEXT_LIM, r12 + |.define NEXT_RES_PTR, sp + |.define NEXT_RES_VAL, [sp] + |.define NEXT_RES_KEY_I, [sp, #8] + |.define NEXT_RES_KEY_IT, [sp, #12] + | + |// TValue *lj_vm_next(GCtab *t, uint32_t idx) + |// Next idx returned in CRET2. + |->vm_next: + |.if JIT + | ldr NEXT_TMP0, NEXT_TAB->array + | ldr NEXT_LIM, NEXT_TAB->asize + | add NEXT_TMP0, NEXT_TMP0, NEXT_IDX, lsl #3 + |1: // Traverse array part. + | subs NEXT_TMP1, NEXT_IDX, NEXT_LIM + | bhs >5 + | ldr NEXT_TMP1, [NEXT_TMP0, #4] + | str NEXT_IDX, NEXT_RES_KEY_I + | add NEXT_TMP0, NEXT_TMP0, #8 + | add NEXT_IDX, NEXT_IDX, #1 + | checktp NEXT_TMP1, LJ_TNIL + | beq <1 // Skip holes in array part. + | ldr NEXT_TMP0, [NEXT_TMP0, #-8] + | mov NEXT_RES, NEXT_RES_PTR + | strd NEXT_TMP0, NEXT_RES_VAL // Stores NEXT_TMP1, too. + | mvn NEXT_TMP0, #~LJ_TISNUM + | str NEXT_TMP0, NEXT_RES_KEY_IT + | bx lr + | + |5: // Traverse hash part. + | ldr NEXT_TMP0, NEXT_TAB->hmask + | ldr NODE:NEXT_RES, NEXT_TAB->node + | add NEXT_TMP1, NEXT_TMP1, NEXT_TMP1, lsl #1 + | add NEXT_LIM, NEXT_LIM, NEXT_TMP0 + | add NODE:NEXT_RES, NODE:NEXT_RES, NEXT_TMP1, lsl #3 + |6: + | cmp NEXT_IDX, NEXT_LIM + | bhi >9 + | ldr NEXT_TMP1, NODE:NEXT_RES->val.it + | checktp NEXT_TMP1, LJ_TNIL + | add NEXT_IDX, NEXT_IDX, #1 + | bxne lr + | // Skip holes in hash part. + | add NEXT_RES, NEXT_RES, #sizeof(Node) + | b <6 + | + |9: // End of iteration. Set the key to nil (not the value). + | mvn NEXT_TMP0, #0 + | mov NEXT_RES, NEXT_RES_PTR + | str NEXT_TMP0, NEXT_RES_KEY_IT + | bx lr + |.endif + | + |//----------------------------------------------------------------------- + |//-- FFI helper functions ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |// Handler for callback functions. + |// Saveregs already performed. Callback slot number in [sp], g in r12. + |->vm_ffi_callback: + |.if FFI + |.type CTSTATE, CTState, PC + | ldr CTSTATE, GL:r12->ctype_state + | add DISPATCH, r12, #GG_G2DISP + |.if FPU + | str r4, SAVE_R4 + | add r4, sp, CFRAME_SPACE+4+8*8 + | vstmdb r4!, {d8-d15} + |.endif + |.if HFABI + | add r12, CTSTATE, #offsetof(CTState, cb.fpr[8]) + |.endif + | strd CARG34, CTSTATE->cb.gpr[2] + | strd CARG12, CTSTATE->cb.gpr[0] + |.if HFABI + | vstmdb r12!, {d0-d7} + |.endif + | ldr CARG4, [sp] + | add CARG3, sp, #CFRAME_SIZE + | mov CARG1, CTSTATE + | lsr CARG4, CARG4, #3 + | str CARG3, CTSTATE->cb.stack + | mov CARG2, sp + | str CARG4, CTSTATE->cb.slot + | str CTSTATE, SAVE_PC // Any value outside of bytecode is ok. + | bl extern lj_ccallback_enter // (CTState *cts, void *cf) + | // Returns lua_State *. + | ldr BASE, L:CRET1->base + | mv_vmstate CARG2, INTERP + | ldr RC, L:CRET1->top + | mov MASKR8, #255 + | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] + | mov L, CRET1 + | sub RC, RC, BASE + | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. + | st_vmstate CARG2 + | ins_callt + |.endif + | + |->cont_ffi_callback: // Return from FFI callback. + |.if FFI + | ldr CTSTATE, [DISPATCH, #DISPATCH_GL(ctype_state)] + | str BASE, L->base + | str CARG4, L->top + | str L, CTSTATE->L + | mov CARG1, CTSTATE + | mov CARG2, RA + | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) + | ldrd CARG12, CTSTATE->cb.gpr[0] + |.if HFABI + | vldr d0, CTSTATE->cb.fpr[0] + |.endif + | b ->vm_leave_unw + |.endif + | + |->vm_ffi_call: // Call C function via FFI. + | // Caveat: needs special frame unwinding, see below. + |.if FFI + | .type CCSTATE, CCallState, r4 + | push {CCSTATE, r5, r11, lr} + | mov CCSTATE, CARG1 + | ldr CARG1, CCSTATE:CARG1->spadj + | ldrb CARG2, CCSTATE->nsp + | add CARG3, CCSTATE, #offsetof(CCallState, stack) + |.if HFABI + | add RB, CCSTATE, #offsetof(CCallState, fpr[0]) + |.endif + | mov r11, sp + | sub sp, sp, CARG1 // Readjust stack. + | subs CARG2, CARG2, #1 + |.if HFABI + | vldm RB, {d0-d7} + |.endif + | ldr RB, CCSTATE->func + | bmi >2 + |1: // Copy stack slots. + | ldr CARG4, [CARG3, CARG2, lsl #2] + | str CARG4, [sp, CARG2, lsl #2] + | subs CARG2, CARG2, #1 + | bpl <1 + |2: + | ldrd CARG12, CCSTATE->gpr[0] + | ldrd CARG34, CCSTATE->gpr[2] + | blx RB + | mov sp, r11 + |.if HFABI + | add r12, CCSTATE, #offsetof(CCallState, fpr[4]) + |.endif + | strd CRET1, CCSTATE->gpr[0] + |.if HFABI + | vstmdb r12!, {d0-d3} + |.endif + | pop {CCSTATE, r5, r11, pc} + |.endif + |// Note: vm_ffi_call must be the last function in this object file! + | + |//----------------------------------------------------------------------- +} + +/* Generate the code for a single instruction. */ +static void build_ins(BuildCtx *ctx, BCOp op, int defop) +{ + int vk = 0; + |=>defop: + + switch (op) { + + /* -- Comparison ops ---------------------------------------------------- */ + + /* Remember: all ops branch for a true comparison, fall through otherwise. */ + + case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: + | // RA = src1*8, RC = src2, JMP with RC = target + | lsl RC, RC, #3 + | ldrd CARG12, [RA, BASE]! + | ldrh RB, [PC, #2] + | ldrd CARG34, [RC, BASE]! + | add PC, PC, #4 + | add RB, PC, RB, lsl #2 + | checktp CARG2, LJ_TISNUM + | bne >3 + | checktp CARG4, LJ_TISNUM + | bne >4 + | cmp CARG1, CARG3 + if (op == BC_ISLT) { + | sublt PC, RB, #0x20000 + } else if (op == BC_ISGE) { + | subge PC, RB, #0x20000 + } else if (op == BC_ISLE) { + | suble PC, RB, #0x20000 + } else { + | subgt PC, RB, #0x20000 + } + |1: + | ins_next + | + |3: // CARG12 is not an integer. + |.if FPU + | vldr d0, [RA] + | bhi ->vmeta_comp + | // d0 is a number. + | checktp CARG4, LJ_TISNUM + | vldr d1, [RC] + | blo >5 + | bhi ->vmeta_comp + | // d0 is a number, CARG3 is an integer. + | vmov s4, CARG3 + | vcvt.f64.s32 d1, s4 + | b >5 + |4: // CARG1 is an integer, CARG34 is not an integer. + | vldr d1, [RC] + | bhi ->vmeta_comp + | // CARG1 is an integer, d1 is a number. + | vmov s4, CARG1 + | vcvt.f64.s32 d0, s4 + |5: // d0 and d1 are numbers. + | vcmp.f64 d0, d1 + | vmrs + | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. + if (op == BC_ISLT) { + | sublo PC, RB, #0x20000 + } else if (op == BC_ISGE) { + | subhs PC, RB, #0x20000 + } else if (op == BC_ISLE) { + | subls PC, RB, #0x20000 + } else { + | subhi PC, RB, #0x20000 + } + | b <1 + |.else + | bhi ->vmeta_comp + | // CARG12 is a number. + | checktp CARG4, LJ_TISNUM + | movlo RA, RB // Save RB. + | blo >5 + | bhi ->vmeta_comp + | // CARG12 is a number, CARG3 is an integer. + | mov CARG1, CARG3 + | mov RC, RA + | mov RA, RB // Save RB. + | bl extern __aeabi_i2d + | mov CARG3, CARG1 + | mov CARG4, CARG2 + | ldrd CARG12, [RC] // Restore first operand. + | b >5 + |4: // CARG1 is an integer, CARG34 is not an integer. + | bhi ->vmeta_comp + | // CARG1 is an integer, CARG34 is a number. + | mov RA, RB // Save RB. + | bl extern __aeabi_i2d + | ldrd CARG34, [RC] // Restore second operand. + |5: // CARG12 and CARG34 are numbers. + | bl extern __aeabi_cdcmple + | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. + if (op == BC_ISLT) { + | sublo PC, RA, #0x20000 + } else if (op == BC_ISGE) { + | subhs PC, RA, #0x20000 + } else if (op == BC_ISLE) { + | subls PC, RA, #0x20000 + } else { + | subhi PC, RA, #0x20000 + } + | b <1 + |.endif + break; + + case BC_ISEQV: case BC_ISNEV: + vk = op == BC_ISEQV; + | // RA = src1*8, RC = src2, JMP with RC = target + | lsl RC, RC, #3 + | ldrd CARG12, [RA, BASE]! + | ldrh RB, [PC, #2] + | ldrd CARG34, [RC, BASE]! + | add PC, PC, #4 + | add RB, PC, RB, lsl #2 + | checktp CARG2, LJ_TISNUM + | cmnls CARG4, #-LJ_TISNUM + if (vk) { + | bls ->BC_ISEQN_Z + } else { + | bls ->BC_ISNEN_Z + } + | // Either or both types are not numbers. + |.if FFI + | checktp CARG2, LJ_TCDATA + | checktpne CARG4, LJ_TCDATA + | beq ->vmeta_equal_cd + |.endif + | cmp CARG2, CARG4 // Compare types. + | bne >2 // Not the same type? + | checktp CARG2, LJ_TISPRI + | bhs >1 // Same type and primitive type? + | + | // Same types and not a primitive type. Compare GCobj or pvalue. + | cmp CARG1, CARG3 + if (vk) { + | bne >3 // Different GCobjs or pvalues? + |1: // Branch if same. + | sub PC, RB, #0x20000 + |2: // Different. + | ins_next + |3: + | checktp CARG2, LJ_TISTABUD + | bhi <2 // Different objects and not table/ud? + } else { + | beq >1 // Same GCobjs or pvalues? + | checktp CARG2, LJ_TISTABUD + | bhi >2 // Different objects and not table/ud? + } + | // Different tables or userdatas. Need to check __eq metamethod. + | // Field metatable must be at same offset for GCtab and GCudata! + | ldr TAB:RA, TAB:CARG1->metatable + | cmp TAB:RA, #0 + if (vk) { + | beq <2 // No metatable? + } else { + | beq >2 // No metatable? + } + | ldrb RA, TAB:RA->nomm + | mov CARG4, #1-vk // ne = 0 or 1. + | mov CARG2, CARG1 + | tst RA, #1<<MM_eq + | beq ->vmeta_equal // 'no __eq' flag not set? + if (vk) { + | b <2 + } else { + |2: // Branch if different. + | sub PC, RB, #0x20000 + |1: // Same. + | ins_next + } + break; + + case BC_ISEQS: case BC_ISNES: + vk = op == BC_ISEQS; + | // RA = src*8, RC = str_const (~), JMP with RC = target + | mvn RC, RC + | ldrd CARG12, [BASE, RA] + | ldrh RB, [PC, #2] + | ldr STR:CARG3, [KBASE, RC, lsl #2] + | add PC, PC, #4 + | add RB, PC, RB, lsl #2 + | checktp CARG2, LJ_TSTR + |.if FFI + | bne >7 + | cmp CARG1, CARG3 + |.else + | cmpeq CARG1, CARG3 + |.endif + if (vk) { + | subeq PC, RB, #0x20000 + |1: + } else { + |1: + | subne PC, RB, #0x20000 + } + | ins_next + | + |.if FFI + |7: + | checktp CARG2, LJ_TCDATA + | bne <1 + | b ->vmeta_equal_cd + |.endif + break; + + case BC_ISEQN: case BC_ISNEN: + vk = op == BC_ISEQN; + | // RA = src*8, RC = num_const (~), JMP with RC = target + | lsl RC, RC, #3 + | ldrd CARG12, [RA, BASE]! + | ldrh RB, [PC, #2] + | ldrd CARG34, [RC, KBASE]! + | add PC, PC, #4 + | add RB, PC, RB, lsl #2 + if (vk) { + |->BC_ISEQN_Z: + } else { + |->BC_ISNEN_Z: + } + | checktp CARG2, LJ_TISNUM + | bne >3 + | checktp CARG4, LJ_TISNUM + | bne >4 + | cmp CARG1, CARG3 + if (vk) { + | subeq PC, RB, #0x20000 + |1: + } else { + |1: + | subne PC, RB, #0x20000 + } + |2: + | ins_next + | + |3: // CARG12 is not an integer. + |.if FFI + | bhi >7 + |.else + if (!vk) { + | subhi PC, RB, #0x20000 + } + | bhi <2 + |.endif + |.if FPU + | checktp CARG4, LJ_TISNUM + | vmov s4, CARG3 + | vldr d0, [RA] + | vldrlo d1, [RC] + | vcvths.f64.s32 d1, s4 + | b >5 + |4: // CARG1 is an integer, d1 is a number. + | vmov s4, CARG1 + | vldr d1, [RC] + | vcvt.f64.s32 d0, s4 + |5: // d0 and d1 are numbers. + | vcmp.f64 d0, d1 + | vmrs + if (vk) { + | subeq PC, RB, #0x20000 + } else { + | subne PC, RB, #0x20000 + } + | b <2 + |.else + | // CARG12 is a number. + | checktp CARG4, LJ_TISNUM + | movlo RA, RB // Save RB. + | blo >5 + | // CARG12 is a number, CARG3 is an integer. + | mov CARG1, CARG3 + | mov RC, RA + |4: // CARG1 is an integer, CARG34 is a number. + | mov RA, RB // Save RB. + | bl extern __aeabi_i2d + | ldrd CARG34, [RC] // Restore other operand. + |5: // CARG12 and CARG34 are numbers. + | bl extern __aeabi_cdcmpeq + if (vk) { + | subeq PC, RA, #0x20000 + } else { + | subne PC, RA, #0x20000 + } + | b <2 + |.endif + | + |.if FFI + |7: + | checktp CARG2, LJ_TCDATA + | bne <1 + | b ->vmeta_equal_cd + |.endif + break; + + case BC_ISEQP: case BC_ISNEP: + vk = op == BC_ISEQP; + | // RA = src*8, RC = primitive_type (~), JMP with RC = target + | ldrd CARG12, [BASE, RA] + | ldrh RB, [PC, #2] + | add PC, PC, #4 + | mvn RC, RC + | add RB, PC, RB, lsl #2 + |.if FFI + | checktp CARG2, LJ_TCDATA + | beq ->vmeta_equal_cd + |.endif + | cmp CARG2, RC + if (vk) { + | subeq PC, RB, #0x20000 + } else { + | subne PC, RB, #0x20000 + } + | ins_next + break; + + /* -- Unary test and copy ops ------------------------------------------- */ + + case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: + | // RA = dst*8 or unused, RC = src, JMP with RC = target + | add RC, BASE, RC, lsl #3 + | ldrh RB, [PC, #2] + | ldrd CARG12, [RC] + | add PC, PC, #4 + | add RB, PC, RB, lsl #2 + | checktp CARG2, LJ_TTRUE + if (op == BC_ISTC || op == BC_IST) { + | subls PC, RB, #0x20000 + if (op == BC_ISTC) { + | strdls CARG12, [BASE, RA] + } + } else { + | subhi PC, RB, #0x20000 + if (op == BC_ISFC) { + | strdhi CARG12, [BASE, RA] + } + } + | ins_next + break; + + case BC_ISTYPE: + | // RA = src*8, RC = -type + | ldrd CARG12, [BASE, RA] + | ins_next1 + | cmn CARG2, RC + | ins_next2 + | bne ->vmeta_istype + | ins_next3 + break; + case BC_ISNUM: + | // RA = src*8, RC = -(TISNUM-1) + | ldrd CARG12, [BASE, RA] + | ins_next1 + | checktp CARG2, LJ_TISNUM + | ins_next2 + | bhs ->vmeta_istype + | ins_next3 + break; + + /* -- Unary ops --------------------------------------------------------- */ + + case BC_MOV: + | // RA = dst*8, RC = src + | lsl RC, RC, #3 + | ins_next1 + | ldrd CARG12, [BASE, RC] + | ins_next2 + | strd CARG12, [BASE, RA] + | ins_next3 + break; + case BC_NOT: + | // RA = dst*8, RC = src + | add RC, BASE, RC, lsl #3 + | ins_next1 + | ldr CARG1, [RC, #4] + | add RA, BASE, RA + | ins_next2 + | checktp CARG1, LJ_TTRUE + | mvnls CARG2, #~LJ_TFALSE + | mvnhi CARG2, #~LJ_TTRUE + | str CARG2, [RA, #4] + | ins_next3 + break; + case BC_UNM: + | // RA = dst*8, RC = src + | lsl RC, RC, #3 + | ldrd CARG12, [BASE, RC] + | ins_next1 + | ins_next2 + | checktp CARG2, LJ_TISNUM + | bhi ->vmeta_unm + | eorne CARG2, CARG2, #0x80000000 + | bne >5 + | rsbseq CARG1, CARG1, #0 + | ldrdvs CARG12, >9 + |5: + | strd CARG12, [BASE, RA] + | ins_next3 + | + |.align 8 + |9: + | .long 0x00000000, 0x41e00000 // 2^31. + break; + case BC_LEN: + | // RA = dst*8, RC = src + | lsl RC, RC, #3 + | ldrd CARG12, [BASE, RC] + | checkstr CARG2, >2 + | ldr CARG1, STR:CARG1->len + |1: + | mvn CARG2, #~LJ_TISNUM + | ins_next1 + | ins_next2 + | strd CARG12, [BASE, RA] + | ins_next3 + |2: + | checktab CARG2, ->vmeta_len +#if LJ_52 + | ldr TAB:CARG3, TAB:CARG1->metatable + | cmp TAB:CARG3, #0 + | bne >9 + |3: +#endif + |->BC_LEN_Z: + | .IOS mov RC, BASE + | bl extern lj_tab_len // (GCtab *t) + | // Returns uint32_t (but less than 2^31). + | .IOS mov BASE, RC + | b <1 +#if LJ_52 + |9: + | ldrb CARG4, TAB:CARG3->nomm + | tst CARG4, #1<<MM_len + | bne <3 // 'no __len' flag set: done. + | b ->vmeta_len +#endif + break; + + /* -- Binary ops -------------------------------------------------------- */ + + |.macro ins_arithcheck, cond, ncond, target + ||if (vk == 1) { + | cmn CARG4, #-LJ_TISNUM + | cmn..cond CARG2, #-LJ_TISNUM + ||} else { + | cmn CARG2, #-LJ_TISNUM + | cmn..cond CARG4, #-LJ_TISNUM + ||} + | b..ncond target + |.endmacro + |.macro ins_arithcheck_int, target + | ins_arithcheck eq, ne, target + |.endmacro + |.macro ins_arithcheck_num, target + | ins_arithcheck lo, hs, target + |.endmacro + | + |.macro ins_arithpre + | decode_RB8 RB, INS + | decode_RC8 RC, INS + | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + ||switch (vk) { + ||case 0: + | .if FPU + | ldrd CARG12, [RB, BASE]! + | ldrd CARG34, [RC, KBASE]! + | .else + | ldrd CARG12, [BASE, RB] + | ldrd CARG34, [KBASE, RC] + | .endif + || break; + ||case 1: + | .if FPU + | ldrd CARG34, [RB, BASE]! + | ldrd CARG12, [RC, KBASE]! + | .else + | ldrd CARG34, [BASE, RB] + | ldrd CARG12, [KBASE, RC] + | .endif + || break; + ||default: + | .if FPU + | ldrd CARG12, [RB, BASE]! + | ldrd CARG34, [RC, BASE]! + | .else + | ldrd CARG12, [BASE, RB] + | ldrd CARG34, [BASE, RC] + | .endif + || break; + ||} + |.endmacro + | + |.macro ins_arithpre_fpu, reg1, reg2 + |.if FPU + ||if (vk == 1) { + | vldr reg2, [RB] + | vldr reg1, [RC] + ||} else { + | vldr reg1, [RB] + | vldr reg2, [RC] + ||} + |.endif + |.endmacro + | + |.macro ins_arithpost_fpu, reg + | ins_next1 + | add RA, BASE, RA + | ins_next2 + | vstr reg, [RA] + | ins_next3 + |.endmacro + | + |.macro ins_arithfallback, ins + ||switch (vk) { + ||case 0: + | ins ->vmeta_arith_vn + || break; + ||case 1: + | ins ->vmeta_arith_nv + || break; + ||default: + | ins ->vmeta_arith_vv + || break; + ||} + |.endmacro + | + |.macro ins_arithdn, intins, fpins, fpcall + | ins_arithpre + |.if "intins" ~= "vm_modi" and not FPU + | ins_next1 + |.endif + | ins_arithcheck_int >5 + |.if "intins" == "smull" + | smull CARG1, RC, CARG3, CARG1 + | cmp RC, CARG1, asr #31 + | ins_arithfallback bne + |.elif "intins" == "vm_modi" + | movs CARG2, CARG3 + | ins_arithfallback beq + | bl ->vm_modi + | mvn CARG2, #~LJ_TISNUM + |.else + | intins CARG1, CARG1, CARG3 + | ins_arithfallback bvs + |.endif + |4: + |.if "intins" == "vm_modi" or FPU + | ins_next1 + |.endif + | ins_next2 + | strd CARG12, [BASE, RA] + | ins_next3 + |5: // FP variant. + | ins_arithpre_fpu d6, d7 + | ins_arithfallback ins_arithcheck_num + |.if FPU + |.if "intins" == "vm_modi" + | bl fpcall + |.else + | fpins d6, d6, d7 + |.endif + | ins_arithpost_fpu d6 + |.else + | bl fpcall + |.if "intins" ~= "vm_modi" + | ins_next1 + |.endif + | b <4 + |.endif + |.endmacro + | + |.macro ins_arithfp, fpins, fpcall + | ins_arithpre + |.if "fpins" ~= "extern" or HFABI + | ins_arithpre_fpu d0, d1 + |.endif + | ins_arithfallback ins_arithcheck_num + |.if "fpins" == "extern" + | .IOS mov RC, BASE + | bl fpcall + | .IOS mov BASE, RC + |.elif FPU + | fpins d0, d0, d1 + |.else + | bl fpcall + |.endif + |.if ("fpins" ~= "extern" or HFABI) and FPU + | ins_arithpost_fpu d0 + |.else + | ins_next1 + | ins_next2 + | strd CARG12, [BASE, RA] + | ins_next3 + |.endif + |.endmacro + + case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: + | ins_arithdn adds, vadd.f64, extern __aeabi_dadd + break; + case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: + | ins_arithdn subs, vsub.f64, extern __aeabi_dsub + break; + case BC_MULVN: case BC_MULNV: case BC_MULVV: + | ins_arithdn smull, vmul.f64, extern __aeabi_dmul + break; + case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: + | ins_arithfp vdiv.f64, extern __aeabi_ddiv + break; + case BC_MODVN: case BC_MODNV: case BC_MODVV: + | ins_arithdn vm_modi, vm_mod, ->vm_mod + break; + case BC_POW: + | // NYI: (partial) integer arithmetic. + | ins_arithfp extern, extern pow + break; + + case BC_CAT: + | decode_RB8 RC, INS + | decode_RC8 RB, INS + | // RA = dst*8, RC = src_start*8, RB = src_end*8 (note: RB/RC swapped!) + | sub CARG3, RB, RC + | str BASE, L->base + | add CARG2, BASE, RB + |->BC_CAT_Z: + | // RA = dst*8, RC = src_start*8, CARG2 = top-1 + | mov CARG1, L + | str PC, SAVE_PC + | lsr CARG3, CARG3, #3 + | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) + | // Returns NULL (finished) or TValue * (metamethod). + | ldr BASE, L->base + | cmp CRET1, #0 + | bne ->vmeta_binop + | ldrd CARG34, [BASE, RC] + | ins_next1 + | ins_next2 + | strd CARG34, [BASE, RA] // Copy result to RA. + | ins_next3 + break; + + /* -- Constant ops ------------------------------------------------------ */ + + case BC_KSTR: + | // RA = dst*8, RC = str_const (~) + | mvn RC, RC + | ins_next1 + | ldr CARG1, [KBASE, RC, lsl #2] + | mvn CARG2, #~LJ_TSTR + | ins_next2 + | strd CARG12, [BASE, RA] + | ins_next3 + break; + case BC_KCDATA: + |.if FFI + | // RA = dst*8, RC = cdata_const (~) + | mvn RC, RC + | ins_next1 + | ldr CARG1, [KBASE, RC, lsl #2] + | mvn CARG2, #~LJ_TCDATA + | ins_next2 + | strd CARG12, [BASE, RA] + | ins_next3 + |.endif + break; + case BC_KSHORT: + | // RA = dst*8, (RC = int16_literal) + | mov CARG1, INS, asr #16 // Refetch sign-extended reg. + | mvn CARG2, #~LJ_TISNUM + | ins_next1 + | ins_next2 + | strd CARG12, [BASE, RA] + | ins_next3 + break; + case BC_KNUM: + | // RA = dst*8, RC = num_const + | lsl RC, RC, #3 + | ins_next1 + | ldrd CARG12, [KBASE, RC] + | ins_next2 + | strd CARG12, [BASE, RA] + | ins_next3 + break; + case BC_KPRI: + | // RA = dst*8, RC = primitive_type (~) + | add RA, BASE, RA + | mvn RC, RC + | ins_next1 + | ins_next2 + | str RC, [RA, #4] + | ins_next3 + break; + case BC_KNIL: + | // RA = base*8, RC = end + | add RA, BASE, RA + | add RC, BASE, RC, lsl #3 + | mvn CARG1, #~LJ_TNIL + | str CARG1, [RA, #4] + | add RA, RA, #8 + |1: + | str CARG1, [RA, #4] + | cmp RA, RC + | add RA, RA, #8 + | blt <1 + | ins_next_ + break; + + /* -- Upvalue and function ops ------------------------------------------ */ + + case BC_UGET: + | // RA = dst*8, RC = uvnum + | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] + | lsl RC, RC, #2 + | add RC, RC, #offsetof(GCfuncL, uvptr) + | ldr UPVAL:CARG2, [LFUNC:CARG2, RC] + | ldr CARG2, UPVAL:CARG2->v + | ldrd CARG34, [CARG2] + | ins_next1 + | ins_next2 + | strd CARG34, [BASE, RA] + | ins_next3 + break; + case BC_USETV: + | // RA = uvnum*8, RC = src + | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] + | lsr RA, RA, #1 + | add RA, RA, #offsetof(GCfuncL, uvptr) + | lsl RC, RC, #3 + | ldr UPVAL:CARG2, [LFUNC:CARG2, RA] + | ldrd CARG34, [BASE, RC] + | ldrb RB, UPVAL:CARG2->marked + | ldrb RC, UPVAL:CARG2->closed + | ldr CARG2, UPVAL:CARG2->v + | tst RB, #LJ_GC_BLACK // isblack(uv) + | add RB, CARG4, #-LJ_TISGCV + | cmpne RC, #0 + | strd CARG34, [CARG2] + | bne >2 // Upvalue is closed and black? + |1: + | ins_next + | + |2: // Check if new value is collectable. + | cmn RB, #-(LJ_TNUMX - LJ_TISGCV) + | ldrbhi RC, GCOBJ:CARG3->gch.marked + | bls <1 // tvisgcv(v) + | sub CARG1, DISPATCH, #-GG_DISP2G + | tst RC, #LJ_GC_WHITES + | // Crossed a write barrier. Move the barrier forward. + |.if IOS + | beq <1 + | mov RC, BASE + | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) + | mov BASE, RC + |.else + | blne extern lj_gc_barrieruv // (global_State *g, TValue *tv) + |.endif + | b <1 + break; + case BC_USETS: + | // RA = uvnum*8, RC = str_const (~) + | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] + | lsr RA, RA, #1 + | add RA, RA, #offsetof(GCfuncL, uvptr) + | mvn RC, RC + | ldr UPVAL:CARG2, [LFUNC:CARG2, RA] + | ldr STR:CARG3, [KBASE, RC, lsl #2] + | ldrb RB, UPVAL:CARG2->marked + | ldrb RC, UPVAL:CARG2->closed + | ldr CARG2, UPVAL:CARG2->v + | mvn CARG4, #~LJ_TSTR + | tst RB, #LJ_GC_BLACK // isblack(uv) + | ldrb RB, STR:CARG3->marked + | strd CARG34, [CARG2] + | bne >2 + |1: + | ins_next + | + |2: // Check if string is white and ensure upvalue is closed. + | tst RB, #LJ_GC_WHITES // iswhite(str) + | cmpne RC, #0 + | sub CARG1, DISPATCH, #-GG_DISP2G + | // Crossed a write barrier. Move the barrier forward. + |.if IOS + | beq <1 + | mov RC, BASE + | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) + | mov BASE, RC + |.else + | blne extern lj_gc_barrieruv // (global_State *g, TValue *tv) + |.endif + | b <1 + break; + case BC_USETN: + | // RA = uvnum*8, RC = num_const + | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] + | lsr RA, RA, #1 + | add RA, RA, #offsetof(GCfuncL, uvptr) + | lsl RC, RC, #3 + | ldr UPVAL:CARG2, [LFUNC:CARG2, RA] + | ldrd CARG34, [KBASE, RC] + | ldr CARG2, UPVAL:CARG2->v + | ins_next1 + | ins_next2 + | strd CARG34, [CARG2] + | ins_next3 + break; + case BC_USETP: + | // RA = uvnum*8, RC = primitive_type (~) + | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] + | lsr RA, RA, #1 + | add RA, RA, #offsetof(GCfuncL, uvptr) + | ldr UPVAL:CARG2, [LFUNC:CARG2, RA] + | mvn RC, RC + | ldr CARG2, UPVAL:CARG2->v + | ins_next1 + | ins_next2 + | str RC, [CARG2, #4] + | ins_next3 + break; + + case BC_UCLO: + | // RA = level*8, RC = target + | ldr CARG3, L->openupval + | add RC, PC, RC, lsl #2 + | str BASE, L->base + | cmp CARG3, #0 + | sub PC, RC, #0x20000 + | beq >1 + | mov CARG1, L + | add CARG2, BASE, RA + | bl extern lj_func_closeuv // (lua_State *L, TValue *level) + | ldr BASE, L->base + |1: + | ins_next + break; + + case BC_FNEW: + | // RA = dst*8, RC = proto_const (~) (holding function prototype) + | mvn RC, RC + | str BASE, L->base + | ldr CARG2, [KBASE, RC, lsl #2] + | str PC, SAVE_PC + | ldr CARG3, [BASE, FRAME_FUNC] + | mov CARG1, L + | // (lua_State *L, GCproto *pt, GCfuncL *parent) + | bl extern lj_func_newL_gc + | // Returns GCfuncL *. + | ldr BASE, L->base + | mvn CARG2, #~LJ_TFUNC + | ins_next1 + | ins_next2 + | strd CARG12, [BASE, RA] + | ins_next3 + break; + + /* -- Table ops --------------------------------------------------------- */ + + case BC_TNEW: + case BC_TDUP: + | // RA = dst*8, RC = (hbits|asize) | tab_const (~) + if (op == BC_TDUP) { + | mvn RC, RC + } + | ldr CARG3, [DISPATCH, #DISPATCH_GL(gc.total)] + | ldr CARG4, [DISPATCH, #DISPATCH_GL(gc.threshold)] + | str BASE, L->base + | str PC, SAVE_PC + | cmp CARG3, CARG4 + | mov CARG1, L + | bhs >5 + |1: + if (op == BC_TNEW) { + | lsl CARG2, RC, #21 + | lsr CARG3, RC, #11 + | asr RC, CARG2, #21 + | lsr CARG2, CARG2, #21 + | cmn RC, #1 + | addeq CARG2, CARG2, #2 + | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) + | // Returns GCtab *. + } else { + | ldr CARG2, [KBASE, RC, lsl #2] + | bl extern lj_tab_dup // (lua_State *L, Table *kt) + | // Returns GCtab *. + } + | ldr BASE, L->base + | mvn CARG2, #~LJ_TTAB + | ins_next1 + | ins_next2 + | strd CARG12, [BASE, RA] + | ins_next3 + |5: + | bl extern lj_gc_step_fixtop // (lua_State *L) + | mov CARG1, L + | b <1 + break; + + case BC_GGET: + | // RA = dst*8, RC = str_const (~) + case BC_GSET: + | // RA = dst*8, RC = str_const (~) + | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] + | mvn RC, RC + | ldr TAB:CARG1, LFUNC:CARG2->env + | ldr STR:RC, [KBASE, RC, lsl #2] + if (op == BC_GGET) { + | b ->BC_TGETS_Z + } else { + | b ->BC_TSETS_Z + } + break; + + case BC_TGETV: + | decode_RB8 RB, INS + | decode_RC8 RC, INS + | // RA = dst*8, RB = table*8, RC = key*8 + | ldrd TAB:CARG12, [BASE, RB] + | ldrd CARG34, [BASE, RC] + | checktab CARG2, ->vmeta_tgetv // STALL: load CARG12. + | checktp CARG4, LJ_TISNUM // Integer key? + | ldreq CARG4, TAB:CARG1->array + | ldreq CARG2, TAB:CARG1->asize + | bne >9 + | + | add CARG4, CARG4, CARG3, lsl #3 + | cmp CARG3, CARG2 // In array part? + | ldrdlo CARG34, [CARG4] + | bhs ->vmeta_tgetv + | ins_next1 // Overwrites RB! + | checktp CARG4, LJ_TNIL + | beq >5 + |1: + | ins_next2 + | strd CARG34, [BASE, RA] + | ins_next3 + | + |5: // Check for __index if table value is nil. + | ldr TAB:CARG2, TAB:CARG1->metatable + | cmp TAB:CARG2, #0 + | beq <1 // No metatable: done. + | ldrb CARG2, TAB:CARG2->nomm + | tst CARG2, #1<<MM_index + | bne <1 // 'no __index' flag set: done. + | decode_RB8 RB, INS // Restore RB. + | b ->vmeta_tgetv + | + |9: + | checktp CARG4, LJ_TSTR // String key? + | moveq STR:RC, CARG3 + | beq ->BC_TGETS_Z + | b ->vmeta_tgetv + break; + case BC_TGETS: + | decode_RB8 RB, INS + | and RC, RC, #255 + | // RA = dst*8, RB = table*8, RC = str_const (~) + | ldrd CARG12, [BASE, RB] + | mvn RC, RC + | ldr STR:RC, [KBASE, RC, lsl #2] // STALL: early RC. + | checktab CARG2, ->vmeta_tgets1 + |->BC_TGETS_Z: + | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 + | ldr CARG3, TAB:CARG1->hmask + | ldr CARG4, STR:RC->sid + | ldr NODE:INS, TAB:CARG1->node + | mov TAB:RB, TAB:CARG1 + | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask + | add CARG3, CARG3, CARG3, lsl #1 + | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 + |1: + | ldrd CARG12, NODE:INS->key // STALL: early NODE:INS. + | ldrd CARG34, NODE:INS->val + | ldr NODE:INS, NODE:INS->next + | checktp CARG2, LJ_TSTR + | cmpeq CARG1, STR:RC + | bne >4 + | checktp CARG4, LJ_TNIL + | beq >5 + |3: + | ins_next1 + | ins_next2 + | strd CARG34, [BASE, RA] + | ins_next3 + | + |4: // Follow hash chain. + | cmp NODE:INS, #0 + | bne <1 + | // End of hash chain: key not found, nil result. + | + |5: // Check for __index if table value is nil. + | ldr TAB:CARG1, TAB:RB->metatable + | mov CARG3, #0 // Optional clear of undef. value (during load stall). + | mvn CARG4, #~LJ_TNIL + | cmp TAB:CARG1, #0 + | beq <3 // No metatable: done. + | ldrb CARG2, TAB:CARG1->nomm + | tst CARG2, #1<<MM_index + | bne <3 // 'no __index' flag set: done. + | b ->vmeta_tgets + break; + case BC_TGETB: + | decode_RB8 RB, INS + | and RC, RC, #255 + | // RA = dst*8, RB = table*8, RC = index + | ldrd CARG12, [BASE, RB] + | checktab CARG2, ->vmeta_tgetb // STALL: load CARG12. + | ldr CARG3, TAB:CARG1->asize + | ldr CARG4, TAB:CARG1->array + | lsl CARG2, RC, #3 + | cmp RC, CARG3 + | ldrdlo CARG34, [CARG4, CARG2] + | bhs ->vmeta_tgetb + | ins_next1 // Overwrites RB! + | checktp CARG4, LJ_TNIL + | beq >5 + |1: + | ins_next2 + | strd CARG34, [BASE, RA] + | ins_next3 + | + |5: // Check for __index if table value is nil. + | ldr TAB:CARG2, TAB:CARG1->metatable + | cmp TAB:CARG2, #0 + | beq <1 // No metatable: done. + | ldrb CARG2, TAB:CARG2->nomm + | tst CARG2, #1<<MM_index + | bne <1 // 'no __index' flag set: done. + | b ->vmeta_tgetb + break; + case BC_TGETR: + | decode_RB8 RB, INS + | decode_RC8 RC, INS + | // RA = dst*8, RB = table*8, RC = key*8 + | ldr TAB:CARG1, [BASE, RB] + | ldr CARG2, [BASE, RC] + | ldr CARG4, TAB:CARG1->array + | ldr CARG3, TAB:CARG1->asize + | add CARG4, CARG4, CARG2, lsl #3 + | cmp CARG2, CARG3 // In array part? + | bhs ->vmeta_tgetr + | ldrd CARG12, [CARG4] + |->BC_TGETR_Z: + | ins_next1 + | ins_next2 + | strd CARG12, [BASE, RA] + | ins_next3 + break; + + case BC_TSETV: + | decode_RB8 RB, INS + | decode_RC8 RC, INS + | // RA = src*8, RB = table*8, RC = key*8 + | ldrd TAB:CARG12, [BASE, RB] + | ldrd CARG34, [BASE, RC] + | checktab CARG2, ->vmeta_tsetv // STALL: load CARG12. + | checktp CARG4, LJ_TISNUM // Integer key? + | ldreq CARG2, TAB:CARG1->array + | ldreq CARG4, TAB:CARG1->asize + | bne >9 + | + | add CARG2, CARG2, CARG3, lsl #3 + | cmp CARG3, CARG4 // In array part? + | ldrlo INS, [CARG2, #4] + | bhs ->vmeta_tsetv + | ins_next1 // Overwrites RB! + | checktp INS, LJ_TNIL + | ldrb INS, TAB:CARG1->marked + | ldrd CARG34, [BASE, RA] + | beq >5 + |1: + | tst INS, #LJ_GC_BLACK // isblack(table) + | strd CARG34, [CARG2] + | bne >7 + |2: + | ins_next2 + | ins_next3 + | + |5: // Check for __newindex if previous value is nil. + | ldr TAB:RA, TAB:CARG1->metatable + | cmp TAB:RA, #0 + | beq <1 // No metatable: done. + | ldrb RA, TAB:RA->nomm + | tst RA, #1<<MM_newindex + | bne <1 // 'no __newindex' flag set: done. + | ldr INS, [PC, #-4] // Restore RA and RB. + | decode_RB8 RB, INS + | decode_RA8 RA, INS + | b ->vmeta_tsetv + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:CARG1, INS, CARG3 + | b <2 + | + |9: + | checktp CARG4, LJ_TSTR // String key? + | moveq STR:RC, CARG3 + | beq ->BC_TSETS_Z + | b ->vmeta_tsetv + break; + case BC_TSETS: + | decode_RB8 RB, INS + | and RC, RC, #255 + | // RA = src*8, RB = table*8, RC = str_const (~) + | ldrd CARG12, [BASE, RB] + | mvn RC, RC + | ldr STR:RC, [KBASE, RC, lsl #2] // STALL: early RC. + | checktab CARG2, ->vmeta_tsets1 + |->BC_TSETS_Z: + | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 + | ldr CARG3, TAB:CARG1->hmask + | ldr CARG4, STR:RC->sid + | ldr NODE:INS, TAB:CARG1->node + | mov TAB:RB, TAB:CARG1 + | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask + | add CARG3, CARG3, CARG3, lsl #1 + | mov CARG4, #0 + | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 + | strb CARG4, TAB:RB->nomm // Clear metamethod cache. + |1: + | ldrd CARG12, NODE:INS->key + | ldr CARG4, NODE:INS->val.it + | ldr NODE:CARG3, NODE:INS->next + | checktp CARG2, LJ_TSTR + | cmpeq CARG1, STR:RC + | bne >5 + | ldrb CARG2, TAB:RB->marked + | checktp CARG4, LJ_TNIL // Key found, but nil value? + | ldrd CARG34, [BASE, RA] + | beq >4 + |2: + | tst CARG2, #LJ_GC_BLACK // isblack(table) + | strd CARG34, NODE:INS->val + | bne >7 + |3: + | ins_next + | + |4: // Check for __newindex if previous value is nil. + | ldr TAB:CARG1, TAB:RB->metatable + | cmp TAB:CARG1, #0 + | beq <2 // No metatable: done. + | ldrb CARG1, TAB:CARG1->nomm + | tst CARG1, #1<<MM_newindex + | bne <2 // 'no __newindex' flag set: done. + | b ->vmeta_tsets + | + |5: // Follow hash chain. + | movs NODE:INS, NODE:CARG3 + | bne <1 + | // End of hash chain: key not found, add a new one. + | + | // But check for __newindex first. + | ldr TAB:CARG1, TAB:RB->metatable + | mov CARG3, TMPDp + | str PC, SAVE_PC + | cmp TAB:CARG1, #0 // No metatable: continue. + | str BASE, L->base + | ldrbne CARG2, TAB:CARG1->nomm + | mov CARG1, L + | beq >6 + | tst CARG2, #1<<MM_newindex + | beq ->vmeta_tsets // 'no __newindex' flag NOT set: check. + |6: + | mvn CARG4, #~LJ_TSTR + | str STR:RC, TMPDlo + | mov CARG2, TAB:RB + | str CARG4, TMPDhi + | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) + | // Returns TValue *. + | ldr BASE, L->base + | ldrd CARG34, [BASE, RA] + | strd CARG34, [CRET1] + | b <3 // No 2nd write barrier needed. + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, CARG2, CARG3 + | b <3 + break; + case BC_TSETB: + | decode_RB8 RB, INS + | and RC, RC, #255 + | // RA = src*8, RB = table*8, RC = index + | ldrd CARG12, [BASE, RB] + | checktab CARG2, ->vmeta_tsetb // STALL: load CARG12. + | ldr CARG3, TAB:CARG1->asize + | ldr RB, TAB:CARG1->array + | lsl CARG2, RC, #3 + | cmp RC, CARG3 + | ldrdlo CARG34, [CARG2, RB]! + | bhs ->vmeta_tsetb + | ins_next1 // Overwrites RB! + | checktp CARG4, LJ_TNIL + | ldrb INS, TAB:CARG1->marked + | ldrd CARG34, [BASE, RA] + | beq >5 + |1: + | tst INS, #LJ_GC_BLACK // isblack(table) + | strd CARG34, [CARG2] + | bne >7 + |2: + | ins_next2 + | ins_next3 + | + |5: // Check for __newindex if previous value is nil. + | ldr TAB:RA, TAB:CARG1->metatable + | cmp TAB:RA, #0 + | beq <1 // No metatable: done. + | ldrb RA, TAB:RA->nomm + | tst RA, #1<<MM_newindex + | bne <1 // 'no __newindex' flag set: done. + | ldr INS, [PC, #-4] // Restore INS. + | decode_RA8 RA, INS + | b ->vmeta_tsetb + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:CARG1, INS, CARG3 + | b <2 + break; + case BC_TSETR: + | decode_RB8 RB, INS + | decode_RC8 RC, INS + | // RA = src*8, RB = table*8, RC = key*8 + | ldr TAB:CARG2, [BASE, RB] + | ldr CARG3, [BASE, RC] + | ldrb INS, TAB:CARG2->marked + | ldr CARG1, TAB:CARG2->array + | ldr CARG4, TAB:CARG2->asize + | tst INS, #LJ_GC_BLACK // isblack(table) + | add CARG1, CARG1, CARG3, lsl #3 + | bne >7 + |2: + | cmp CARG3, CARG4 // In array part? + | bhs ->vmeta_tsetr + |->BC_TSETR_Z: + | ldrd CARG34, [BASE, RA] + | ins_next1 + | ins_next2 + | strd CARG34, [CARG1] + | ins_next3 + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:CARG2, INS, RB + | b <2 + break; + + case BC_TSETM: + | // RA = base*8 (table at base-1), RC = num_const (start index) + | add RA, BASE, RA + |1: + | ldr RB, SAVE_MULTRES + | ldr TAB:CARG2, [RA, #-8] // Guaranteed to be a table. + | ldr CARG1, [KBASE, RC, lsl #3] // Integer constant is in lo-word. + | subs RB, RB, #8 + | ldr CARG4, TAB:CARG2->asize + | beq >4 // Nothing to copy? + | add CARG3, CARG1, RB, lsr #3 + | cmp CARG3, CARG4 + | ldr CARG4, TAB:CARG2->array + | add RB, RA, RB + | bhi >5 + | add INS, CARG4, CARG1, lsl #3 + | ldrb CARG1, TAB:CARG2->marked + |3: // Copy result slots to table. + | ldrd CARG34, [RA], #8 + | strd CARG34, [INS], #8 + | cmp RA, RB + | blo <3 + | tst CARG1, #LJ_GC_BLACK // isblack(table) + | bne >7 + |4: + | ins_next + | + |5: // Need to resize array part. + | str BASE, L->base + | mov CARG1, L + | str PC, SAVE_PC + | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) + | // Must not reallocate the stack. + | .IOS ldr BASE, L->base + | b <1 + | + |7: // Possible table write barrier for any value. Skip valiswhite check. + | barrierback TAB:CARG2, CARG1, CARG3 + | b <4 + break; + + /* -- Calls and vararg handling ----------------------------------------- */ + + case BC_CALLM: + | // RA = base*8, (RB = nresults+1,) RC = extra_nargs + | ldr CARG1, SAVE_MULTRES + | decode_RC8 NARGS8:RC, INS + | add NARGS8:RC, NARGS8:RC, CARG1 + | b ->BC_CALL_Z + break; + case BC_CALL: + | decode_RC8 NARGS8:RC, INS + | // RA = base*8, (RB = nresults+1,) RC = (nargs+1)*8 + |->BC_CALL_Z: + | mov RB, BASE // Save old BASE for vmeta_call. + | ldrd CARG34, [BASE, RA]! + | sub NARGS8:RC, NARGS8:RC, #8 + | add BASE, BASE, #8 + | checkfunc CARG4, ->vmeta_call + | ins_call + break; + + case BC_CALLMT: + | // RA = base*8, (RB = 0,) RC = extra_nargs + | ldr CARG1, SAVE_MULTRES + | add NARGS8:RC, CARG1, RC, lsl #3 + | b ->BC_CALLT1_Z + break; + case BC_CALLT: + | lsl NARGS8:RC, RC, #3 + | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 + |->BC_CALLT1_Z: + | ldrd LFUNC:CARG34, [RA, BASE]! + | sub NARGS8:RC, NARGS8:RC, #8 + | add RA, RA, #8 + | checkfunc CARG4, ->vmeta_callt + | ldr PC, [BASE, FRAME_PC] + |->BC_CALLT2_Z: + | mov RB, #0 + | ldrb CARG4, LFUNC:CARG3->ffid + | tst PC, #FRAME_TYPE + | bne >7 + |1: + | str LFUNC:CARG3, [BASE, FRAME_FUNC] // Copy function down, but keep PC. + | cmp NARGS8:RC, #0 + | beq >3 + |2: + | ldrd CARG12, [RA, RB] + | add INS, RB, #8 + | cmp INS, NARGS8:RC + | strd CARG12, [BASE, RB] + | mov RB, INS + | bne <2 + |3: + | cmp CARG4, #1 // (> FF_C) Calling a fast function? + | bhi >5 + |4: + | ins_callt + | + |5: // Tailcall to a fast function with a Lua frame below. + | ldr INS, [PC, #-4] + | decode_RA8 RA, INS + | sub CARG1, BASE, RA + | ldr LFUNC:CARG1, [CARG1, #-16] + | ldr CARG1, LFUNC:CARG1->field_pc + | ldr KBASE, [CARG1, #PC2PROTO(k)] + | b <4 + | + |7: // Tailcall from a vararg function. + | eor PC, PC, #FRAME_VARG + | tst PC, #FRAME_TYPEP // Vararg frame below? + | movne CARG4, #0 // Clear ffid if no Lua function below. + | bne <1 + | sub BASE, BASE, PC + | ldr PC, [BASE, FRAME_PC] + | tst PC, #FRAME_TYPE + | movne CARG4, #0 // Clear ffid if no Lua function below. + | b <1 + break; + + case BC_ITERC: + | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1)) + | add RA, BASE, RA + | mov RB, BASE // Save old BASE for vmeta_call. + | ldrd CARG34, [RA, #-16] + | ldrd CARG12, [RA, #-8] + | add BASE, RA, #8 + | strd CARG34, [RA, #8] // Copy state. + | strd CARG12, [RA, #16] // Copy control var. + | // STALL: locked CARG34. + | ldrd LFUNC:CARG34, [RA, #-24] + | mov NARGS8:RC, #16 // Iterators get 2 arguments. + | // STALL: load CARG34. + | strd LFUNC:CARG34, [RA] // Copy callable. + | checkfunc CARG4, ->vmeta_call + | ins_call + break; + + case BC_ITERN: + |.if JIT + | hotloop + |.endif + |->vm_IITERN: + | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1)) + | add RA, BASE, RA + | ldr TAB:RB, [RA, #-16] + | ldr CARG1, [RA, #-8] // Get index from control var. + | ldr INS, TAB:RB->asize + | ldr CARG2, TAB:RB->array + | add PC, PC, #4 + |1: // Traverse array part. + | subs RC, CARG1, INS + | add CARG3, CARG2, CARG1, lsl #3 + | bhs >5 // Index points after array part? + | ldrd CARG34, [CARG3] + | checktp CARG4, LJ_TNIL + | addeq CARG1, CARG1, #1 // Skip holes in array part. + | beq <1 + | ldrh RC, [PC, #-2] + | mvn CARG2, #~LJ_TISNUM + | strd CARG34, [RA, #8] + | add RC, PC, RC, lsl #2 + | add RB, CARG1, #1 + | strd CARG12, [RA] + | sub PC, RC, #0x20000 + | str RB, [RA, #-8] // Update control var. + |3: + | ins_next + | + |5: // Traverse hash part. + | ldr CARG4, TAB:RB->hmask + | ldr NODE:RB, TAB:RB->node + |6: + | add CARG1, RC, RC, lsl #1 + | cmp RC, CARG4 // End of iteration? Branch to ITERL+1. + | add NODE:CARG3, NODE:RB, CARG1, lsl #3 // node = tab->node + idx*3*8 + | bhi <3 + | ldrd CARG12, NODE:CARG3->val + | checktp CARG2, LJ_TNIL + | add RC, RC, #1 + | beq <6 // Skip holes in hash part. + | ldrh RB, [PC, #-2] + | add RC, RC, INS + | ldrd CARG34, NODE:CARG3->key + | str RC, [RA, #-8] // Update control var. + | strd CARG12, [RA, #8] + | add RC, PC, RB, lsl #2 + | sub PC, RC, #0x20000 + | strd CARG34, [RA] + | b <3 + break; + + case BC_ISNEXT: + | // RA = base*8, RC = target (points to ITERN) + | add RA, BASE, RA + | add RC, PC, RC, lsl #2 + | ldrd CFUNC:CARG12, [RA, #-24] + | ldr CARG3, [RA, #-12] + | ldr CARG4, [RA, #-4] + | checktp CARG2, LJ_TFUNC + | ldrbeq CARG1, CFUNC:CARG1->ffid + | checktpeq CARG3, LJ_TTAB + | checktpeq CARG4, LJ_TNIL + | cmpeq CARG1, #FF_next_N + | subeq PC, RC, #0x20000 + | bne >5 + | ins_next1 + | ins_next2 + | mov CARG1, #0 + | mvn CARG2, #~LJ_KEYINDEX + | strd CARG1, [RA, #-8] // Initialize control var. + |1: + | ins_next3 + |5: // Despecialize bytecode if any of the checks fail. + | mov CARG1, #BC_JMP + | mov OP, #BC_ITERC + | strb CARG1, [PC, #-4] + | sub PC, RC, #0x20000 + |.if JIT + | ldrb CARG1, [PC] + | cmp CARG1, #BC_ITERN + | bne >6 + |.endif + | strb OP, [PC] // Subsumes ins_next1. + | ins_next2 + | b <1 + |.if JIT + |6: // Unpatch JLOOP. + | ldr CARG1, [DISPATCH, #DISPATCH_J(trace)] + | ldrh CARG2, [PC, #2] + | ldr TRACE:CARG1, [CARG1, CARG2, lsl #2] + | // Subsumes ins_next1 and ins_next2. + | ldr INS, TRACE:CARG1->startins + | bfi INS, OP, #0, #8 + | str INS, [PC], #4 + | b <1 + |.endif + break; + + case BC_VARG: + | decode_RB8 RB, INS + | decode_RC8 RC, INS + | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 + | ldr CARG1, [BASE, FRAME_PC] + | add RC, BASE, RC + | add RA, BASE, RA + | add RC, RC, #FRAME_VARG + | add CARG4, RA, RB + | sub CARG3, BASE, #8 // CARG3 = vtop + | sub RC, RC, CARG1 // RC = vbase + | // Note: RC may now be even _above_ BASE if nargs was < numparams. + | cmp RB, #0 + | sub CARG1, CARG3, RC + | beq >5 // Copy all varargs? + | sub CARG4, CARG4, #16 + |1: // Copy vararg slots to destination slots. + | cmp RC, CARG3 + | ldrdlo CARG12, [RC], #8 + | mvnhs CARG2, #~LJ_TNIL + | cmp RA, CARG4 + | strd CARG12, [RA], #8 + | blo <1 + |2: + | ins_next + | + |5: // Copy all varargs. + | ldr CARG4, L->maxstack + | cmp CARG1, #0 + | movle RB, #8 // MULTRES = (0+1)*8 + | addgt RB, CARG1, #8 + | add CARG2, RA, CARG1 + | str RB, SAVE_MULTRES + | ble <2 + | cmp CARG2, CARG4 + | bhi >7 + |6: + | ldrd CARG12, [RC], #8 + | strd CARG12, [RA], #8 + | cmp RC, CARG3 + | blo <6 + | b <2 + | + |7: // Grow stack for varargs. + | lsr CARG2, CARG1, #3 + | str RA, L->top + | mov CARG1, L + | str BASE, L->base + | sub RC, RC, BASE // Need delta, because BASE may change. + | str PC, SAVE_PC + | sub RA, RA, BASE + | bl extern lj_state_growstack // (lua_State *L, int n) + | ldr BASE, L->base + | add RA, BASE, RA + | add RC, BASE, RC + | sub CARG3, BASE, #8 + | b <6 + break; + + /* -- Returns ----------------------------------------------------------- */ + + case BC_RETM: + | // RA = results*8, RC = extra results + | ldr CARG1, SAVE_MULTRES + | ldr PC, [BASE, FRAME_PC] + | add RA, BASE, RA + | add RC, CARG1, RC, lsl #3 + | b ->BC_RETM_Z + break; + + case BC_RET: + | // RA = results*8, RC = nresults+1 + | ldr PC, [BASE, FRAME_PC] + | lsl RC, RC, #3 + | add RA, BASE, RA + |->BC_RETM_Z: + | str RC, SAVE_MULTRES + |1: + | ands CARG1, PC, #FRAME_TYPE + | eor CARG2, PC, #FRAME_VARG + | bne ->BC_RETV2_Z + | + |->BC_RET_Z: + | // BASE = base, RA = resultptr, RC = (nresults+1)*8, PC = return + | ldr INS, [PC, #-4] + | subs CARG4, RC, #8 + | sub CARG3, BASE, #8 + | beq >3 + |2: + | ldrd CARG12, [RA], #8 + | add BASE, BASE, #8 + | subs CARG4, CARG4, #8 + | strd CARG12, [BASE, #-16] + | bne <2 + |3: + | decode_RA8 RA, INS + | sub CARG4, CARG3, RA + | decode_RB8 RB, INS + | ldr LFUNC:CARG1, [CARG4, FRAME_FUNC] + |5: + | cmp RB, RC // More results expected? + | bhi >6 + | mov BASE, CARG4 + | ldr CARG2, LFUNC:CARG1->field_pc + | ins_next1 + | ins_next2 + | ldr KBASE, [CARG2, #PC2PROTO(k)] + | ins_next3 + | + |6: // Fill up results with nil. + | mvn CARG2, #~LJ_TNIL + | add BASE, BASE, #8 + | add RC, RC, #8 + | str CARG2, [BASE, #-12] + | b <5 + | + |->BC_RETV1_Z: // Non-standard return case. + | add RA, BASE, RA + |->BC_RETV2_Z: + | tst CARG2, #FRAME_TYPEP + | bne ->vm_return + | // Return from vararg function: relocate BASE down. + | sub BASE, BASE, CARG2 + | ldr PC, [BASE, FRAME_PC] + | b <1 + break; + + case BC_RET0: case BC_RET1: + | // RA = results*8, RC = nresults+1 + | ldr PC, [BASE, FRAME_PC] + | lsl RC, RC, #3 + | str RC, SAVE_MULTRES + | ands CARG1, PC, #FRAME_TYPE + | eor CARG2, PC, #FRAME_VARG + | ldreq INS, [PC, #-4] + | bne ->BC_RETV1_Z + if (op == BC_RET1) { + | ldrd CARG12, [BASE, RA] + } + | sub CARG4, BASE, #8 + | decode_RA8 RA, INS + if (op == BC_RET1) { + | strd CARG12, [CARG4] + } + | sub BASE, CARG4, RA + | decode_RB8 RB, INS + | ldr LFUNC:CARG1, [BASE, FRAME_FUNC] + |5: + | cmp RB, RC + | bhi >6 + | ldr CARG2, LFUNC:CARG1->field_pc + | ins_next1 + | ins_next2 + | ldr KBASE, [CARG2, #PC2PROTO(k)] + | ins_next3 + | + |6: // Fill up results with nil. + | sub CARG2, CARG4, #4 + | mvn CARG3, #~LJ_TNIL + | str CARG3, [CARG2, RC] + | add RC, RC, #8 + | b <5 + break; + + /* -- Loops and branches ------------------------------------------------ */ + + |.define FOR_IDX, [RA]; .define FOR_TIDX, [RA, #4] + |.define FOR_STOP, [RA, #8]; .define FOR_TSTOP, [RA, #12] + |.define FOR_STEP, [RA, #16]; .define FOR_TSTEP, [RA, #20] + |.define FOR_EXT, [RA, #24]; .define FOR_TEXT, [RA, #28] + + case BC_FORL: + |.if JIT + | hotloop + |.endif + | // Fall through. Assumes BC_IFORL follows. + break; + + case BC_JFORI: + case BC_JFORL: +#if !LJ_HASJIT + break; +#endif + case BC_FORI: + case BC_IFORL: + | // RA = base*8, RC = target (after end of loop or start of loop) + vk = (op == BC_IFORL || op == BC_JFORL); + | ldrd CARG12, [RA, BASE]! + if (op != BC_JFORL) { + | add RC, PC, RC, lsl #2 + } + if (!vk) { + | ldrd CARG34, FOR_STOP + | checktp CARG2, LJ_TISNUM + | ldr RB, FOR_TSTEP + | bne >5 + | checktp CARG4, LJ_TISNUM + | ldr CARG4, FOR_STEP + | checktpeq RB, LJ_TISNUM + | bne ->vmeta_for + | cmp CARG4, #0 + | blt >4 + | cmp CARG1, CARG3 + } else { + | ldrd CARG34, FOR_STEP + | checktp CARG2, LJ_TISNUM + | bne >5 + | adds CARG1, CARG1, CARG3 + | ldr CARG4, FOR_STOP + if (op == BC_IFORL) { + | addvs RC, PC, #0x20000 // Overflow: prevent branch. + } else { + | bvs >2 // Overflow: do not enter mcode. + } + | cmp CARG3, #0 + | blt >4 + | cmp CARG1, CARG4 + } + |1: + if (op == BC_FORI) { + | subgt PC, RC, #0x20000 + } else if (op == BC_JFORI) { + | sub PC, RC, #0x20000 + | ldrhle RC, [PC, #-2] + } else if (op == BC_IFORL) { + | suble PC, RC, #0x20000 + } + if (vk) { + | strd CARG12, FOR_IDX + } + |2: + | ins_next1 + | ins_next2 + | strd CARG12, FOR_EXT + if (op == BC_JFORI || op == BC_JFORL) { + | ble =>BC_JLOOP + } + |3: + | ins_next3 + | + |4: // Invert check for negative step. + if (!vk) { + | cmp CARG3, CARG1 + } else { + | cmp CARG4, CARG1 + } + | b <1 + | + |5: // FP loop. + if (!vk) { + | cmnlo CARG4, #-LJ_TISNUM + | cmnlo RB, #-LJ_TISNUM + | bhs ->vmeta_for + |.if FPU + | vldr d0, FOR_IDX + | vldr d1, FOR_STOP + | cmp RB, #0 + | vstr d0, FOR_EXT + |.else + | cmp RB, #0 + | strd CARG12, FOR_EXT + | blt >8 + |.endif + } else { + |.if FPU + | vldr d0, FOR_IDX + | vldr d2, FOR_STEP + | vldr d1, FOR_STOP + | cmp CARG4, #0 + | vadd.f64 d0, d0, d2 + |.else + | cmp CARG4, #0 + | blt >8 + | bl extern __aeabi_dadd + | strd CARG12, FOR_IDX + | ldrd CARG34, FOR_STOP + | strd CARG12, FOR_EXT + |.endif + } + |6: + |.if FPU + | vcmpge.f64 d0, d1 + | vcmplt.f64 d1, d0 + | vmrs + |.else + | bl extern __aeabi_cdcmple + |.endif + if (vk) { + |.if FPU + | vstr d0, FOR_IDX + | vstr d0, FOR_EXT + |.endif + } + if (op == BC_FORI) { + | subhi PC, RC, #0x20000 + } else if (op == BC_JFORI) { + | sub PC, RC, #0x20000 + | ldrhls RC, [PC, #-2] + | bls =>BC_JLOOP + } else if (op == BC_IFORL) { + | subls PC, RC, #0x20000 + } else { + | bls =>BC_JLOOP + } + | ins_next1 + | ins_next2 + | b <3 + | + |.if not FPU + |8: // Invert check for negative step. + if (vk) { + | bl extern __aeabi_dadd + | strd CARG12, FOR_IDX + | strd CARG12, FOR_EXT + } + | mov CARG3, CARG1 + | mov CARG4, CARG2 + | ldrd CARG12, FOR_STOP + | b <6 + |.endif + break; + + case BC_ITERL: + |.if JIT + | hotloop + |.endif + | // Fall through. Assumes BC_IITERL follows. + break; + + case BC_JITERL: +#if !LJ_HASJIT + break; +#endif + case BC_IITERL: + | // RA = base*8, RC = target + | ldrd CARG12, [RA, BASE]! + if (op == BC_JITERL) { + | cmn CARG2, #-LJ_TNIL // Stop if iterator returned nil. + | strdne CARG12, [RA, #-8] + | bne =>BC_JLOOP + } else { + | add RC, PC, RC, lsl #2 + | // STALL: load CARG12. + | cmn CARG2, #-LJ_TNIL // Stop if iterator returned nil. + | subne PC, RC, #0x20000 // Otherwise save control var + branch. + | strdne CARG12, [RA, #-8] + } + | ins_next + break; + + case BC_LOOP: + | // RA = base*8, RC = target (loop extent) + | // Note: RA/RC is only used by trace recorder to determine scope/extent + | // This opcode does NOT jump, it's only purpose is to detect a hot loop. + |.if JIT + | hotloop + |.endif + | // Fall through. Assumes BC_ILOOP follows. + break; + + case BC_ILOOP: + | // RA = base*8, RC = target (loop extent) + | ins_next + break; + + case BC_JLOOP: + |.if JIT + | // RA = base (ignored), RC = traceno + | ldr CARG1, [DISPATCH, #DISPATCH_J(trace)] + | mov CARG2, #0 // Traces on ARM don't store the trace number, so use 0. + | ldr TRACE:RC, [CARG1, RC, lsl #2] + | st_vmstate CARG2 + | ldr RA, TRACE:RC->mcode + | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)] + | str L, [DISPATCH, #DISPATCH_GL(tmpbuf.L)] + | bx RA + |.endif + break; + + case BC_JMP: + | // RA = base*8 (only used by trace recorder), RC = target + | add RC, PC, RC, lsl #2 + | sub PC, RC, #0x20000 + | ins_next + break; + + /* -- Function headers -------------------------------------------------- */ + + case BC_FUNCF: + |.if JIT + | hotcall + |.endif + case BC_FUNCV: /* NYI: compiled vararg functions. */ + | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. + break; + + case BC_JFUNCF: +#if !LJ_HASJIT + break; +#endif + case BC_IFUNCF: + | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 + | ldr CARG1, L->maxstack + | ldrb CARG2, [PC, #-4+PC2PROTO(numparams)] + | ldr KBASE, [PC, #-4+PC2PROTO(k)] + | cmp RA, CARG1 + | bhi ->vm_growstack_l + if (op != BC_JFUNCF) { + | ins_next1 + | ins_next2 + } + |2: + | cmp NARGS8:RC, CARG2, lsl #3 // Check for missing parameters. + | mvn CARG4, #~LJ_TNIL + | blo >3 + if (op == BC_JFUNCF) { + | decode_RD RC, INS + | b =>BC_JLOOP + } else { + | ins_next3 + } + | + |3: // Clear missing parameters. + | strd CARG34, [BASE, NARGS8:RC] + | add NARGS8:RC, NARGS8:RC, #8 + | b <2 + break; + + case BC_JFUNCV: +#if !LJ_HASJIT + break; +#endif + | NYI // NYI: compiled vararg functions + break; /* NYI: compiled vararg functions. */ + + case BC_IFUNCV: + | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 + | ldr CARG1, L->maxstack + | add CARG4, BASE, RC + | add RA, RA, RC + | str LFUNC:CARG3, [CARG4] // Store copy of LFUNC. + | add CARG2, RC, #8+FRAME_VARG + | ldr KBASE, [PC, #-4+PC2PROTO(k)] + | cmp RA, CARG1 + | str CARG2, [CARG4, #4] // Store delta + FRAME_VARG. + | bhs ->vm_growstack_l + | ldrb RB, [PC, #-4+PC2PROTO(numparams)] + | mov RA, BASE + | mov RC, CARG4 + | cmp RB, #0 + | add BASE, CARG4, #8 + | beq >3 + | mvn CARG3, #~LJ_TNIL + |1: + | cmp RA, RC // Less args than parameters? + | ldrdlo CARG12, [RA], #8 + | movhs CARG2, CARG3 + | strlo CARG3, [RA, #-4] // Clear old fixarg slot (help the GC). + |2: + | subs RB, RB, #1 + | strd CARG12, [CARG4, #8]! + | bne <1 + |3: + | ins_next + break; + + case BC_FUNCC: + case BC_FUNCCW: + | // BASE = new base, RA = BASE+framesize*8, CARG3 = CFUNC, RC = nargs*8 + if (op == BC_FUNCC) { + | ldr CARG4, CFUNC:CARG3->f + } else { + | ldr CARG4, [DISPATCH, #DISPATCH_GL(wrapf)] + } + | add CARG2, RA, NARGS8:RC + | ldr CARG1, L->maxstack + | add RC, BASE, NARGS8:RC + | str BASE, L->base + | cmp CARG2, CARG1 + | str RC, L->top + if (op == BC_FUNCCW) { + | ldr CARG2, CFUNC:CARG3->f + } + | mv_vmstate CARG3, C + | mov CARG1, L + | bhi ->vm_growstack_c // Need to grow stack. + | st_vmstate CARG3 + | blx CARG4 // (lua_State *L [, lua_CFunction f]) + | // Returns nresults. + | ldr BASE, L->base + | mv_vmstate CARG3, INTERP + | ldr CRET2, L->top + | str L, [DISPATCH, #DISPATCH_GL(cur_L)] + | lsl RC, CRET1, #3 + | st_vmstate CARG3 + | ldr PC, [BASE, FRAME_PC] + | sub RA, CRET2, RC // RA = L->top - nresults*8 + | b ->vm_returnc + break; + + /* ---------------------------------------------------------------------- */ + + default: + fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); + exit(2); + break; + } +} + +static int build_backend(BuildCtx *ctx) +{ + int op; + + dasm_growpc(Dst, BC__MAX); + + build_subroutines(ctx); + + |.code_op + for (op = 0; op < BC__MAX; op++) + build_ins(ctx, (BCOp)op, op); + + return BC__MAX; +} + +/* Emit pseudo frame-info for all assembler functions. */ +static void emit_asm_debug(BuildCtx *ctx) +{ + int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); + int i; + switch (ctx->mode) { + case BUILD_elfasm: + fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n"); + fprintf(ctx->fp, + ".Lframe0:\n" + "\t.long .LECIE0-.LSCIE0\n" + ".LSCIE0:\n" + "\t.long 0xffffffff\n" + "\t.byte 0x1\n" + "\t.string \"\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -4\n" + "\t.byte 0xe\n" /* Return address is in lr. */ + "\t.byte 0xc\n\t.uleb128 0xd\n\t.uleb128 0\n" /* def_cfa sp */ + "\t.align 2\n" + ".LECIE0:\n\n"); + fprintf(ctx->fp, + ".LSFDE0:\n" + "\t.long .LEFDE0-.LASFDE0\n" + ".LASFDE0:\n" + "\t.long .Lframe0\n" + "\t.long .Lbegin\n" + "\t.long %d\n" + "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ + "\t.byte 0x8e\n\t.uleb128 1\n", /* offset lr */ + fcofs, CFRAME_SIZE); + for (i = 11; i >= (LJ_ARCH_HASFPU ? 5 : 4); i--) /* offset r4-r11 */ + fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2+(11-i)); +#if LJ_ARCH_HASFPU + for (i = 15; i >= 8; i--) /* offset d8-d15 */ + fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 %d, %d\n", + 64+2*i, 10+2*(15-i)); + fprintf(ctx->fp, "\t.byte 0x84\n\t.uleb128 %d\n", 25); /* offset r4 */ +#endif + fprintf(ctx->fp, + "\t.align 2\n" + ".LEFDE0:\n\n"); +#if LJ_HASFFI + fprintf(ctx->fp, + ".LSFDE1:\n" + "\t.long .LEFDE1-.LASFDE1\n" + ".LASFDE1:\n" + "\t.long .Lframe0\n" + "\t.long lj_vm_ffi_call\n" + "\t.long %d\n" + "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ + "\t.byte 0x8e\n\t.uleb128 1\n" /* offset lr */ + "\t.byte 0x8b\n\t.uleb128 2\n" /* offset r11 */ + "\t.byte 0x85\n\t.uleb128 3\n" /* offset r5 */ + "\t.byte 0x84\n\t.uleb128 4\n" /* offset r4 */ + "\t.byte 0xd\n\t.uleb128 0xb\n" /* def_cfa_register r11 */ + "\t.align 2\n" + ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); +#endif + break; + default: + break; + } +} + |