summaryrefslogtreecommitdiffstats
path: root/fluent-bit/lib/wasm-micro-runtime-WAMR-1.2.2/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'fluent-bit/lib/wasm-micro-runtime-WAMR-1.2.2/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp')
-rw-r--r--fluent-bit/lib/wasm-micro-runtime-WAMR-1.2.2/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp9508
1 files changed, 9508 insertions, 0 deletions
diff --git a/fluent-bit/lib/wasm-micro-runtime-WAMR-1.2.2/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp b/fluent-bit/lib/wasm-micro-runtime-WAMR-1.2.2/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp
new file mode 100644
index 000000000..e28acf98a
--- /dev/null
+++ b/fluent-bit/lib/wasm-micro-runtime-WAMR-1.2.2/core/iwasm/fast-jit/cg/x86-64/jit_codegen_x86_64.cpp
@@ -0,0 +1,9508 @@
+/*
+ * Copyright (C) 2021 Intel Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "jit_codegen.h"
+#include "jit_codecache.h"
+#include "jit_compiler.h"
+#include "jit_frontend.h"
+#include "jit_dump.h"
+
+#include <asmjit/core.h>
+#include <asmjit/x86.h>
+#if WASM_ENABLE_FAST_JIT_DUMP != 0
+#include <Zydis/Zydis.h>
+#endif
+
+#define CODEGEN_CHECK_ARGS 1
+#define CODEGEN_DUMP 0
+
+using namespace asmjit;
+
+static char *code_block_switch_to_jitted_from_interp = NULL;
+static char *code_block_return_to_interp_from_jitted = NULL;
+#if WASM_ENABLE_LAZY_JIT != 0
+static char *code_block_compile_fast_jit_and_then_call = NULL;
+#endif
+
+typedef enum {
+ REG_BPL_IDX = 0,
+ REG_AXL_IDX,
+ REG_BXL_IDX,
+ REG_CXL_IDX,
+ REG_DXL_IDX,
+ REG_DIL_IDX,
+ REG_SIL_IDX,
+ REG_I8_FREE_IDX = REG_SIL_IDX
+} RegIndexI8;
+
+typedef enum {
+ REG_BP_IDX = 0,
+ REG_AX_IDX,
+ REG_BX_IDX,
+ REG_CX_IDX,
+ REG_DX_IDX,
+ REG_DI_IDX,
+ REG_SI_IDX,
+ REG_I16_FREE_IDX = REG_SI_IDX
+} RegIndexI16;
+
+typedef enum {
+ REG_EBP_IDX = 0,
+ REG_EAX_IDX,
+ REG_EBX_IDX,
+ REG_ECX_IDX,
+ REG_EDX_IDX,
+ REG_EDI_IDX,
+ REG_ESI_IDX,
+ REG_I32_FREE_IDX = REG_ESI_IDX
+} RegIndexI32;
+
+typedef enum {
+ REG_RBP_IDX = 0,
+ REG_RAX_IDX,
+ REG_RBX_IDX,
+ REG_RCX_IDX,
+ REG_RDX_IDX,
+ REG_RDI_IDX,
+ REG_RSI_IDX,
+ REG_RSP_IDX,
+ REG_R8_IDX,
+ REG_R9_IDX,
+ REG_R10_IDX,
+ REG_R11_IDX,
+ REG_R12_IDX,
+ REG_R13_IDX,
+ REG_R14_IDX,
+ REG_R15_IDX,
+ REG_I64_FREE_IDX = REG_RSI_IDX
+} RegIndexI64;
+
+/* clang-format off */
+x86::Gp regs_i8[] = {
+ x86::bpl, x86::al, x86::bl, x86::cl,
+ x86::dl, x86::dil, x86::sil, x86::spl,
+ x86::r8b, x86::r9b, x86::r10b, x86::r11b,
+ x86::r12b, x86::r13b, x86::r14b, x86::r15b
+};
+
+x86::Gp regs_i16[] = {
+ x86::bp, x86::ax, x86::bx, x86::cx,
+ x86::dx, x86::di, x86::si, x86::sp,
+ x86::r8w, x86::r9w, x86::r10w, x86::r11w,
+ x86::r12w, x86::r13w, x86::r14w, x86::r15w
+};
+
+x86::Gp regs_i32[] = {
+ x86::ebp, x86::eax, x86::ebx, x86::ecx,
+ x86::edx, x86::edi, x86::esi, x86::esp,
+ x86::r8d, x86::r9d, x86::r10d, x86::r11d,
+ x86::r12d, x86::r13d, x86::r14d, x86::r15d
+};
+
+x86::Gp regs_i64[] = {
+ x86::rbp, x86::rax, x86::rbx, x86::rcx,
+ x86::rdx, x86::rdi, x86::rsi, x86::rsp,
+ x86::r8, x86::r9, x86::r10, x86::r11,
+ x86::r12, x86::r13, x86::r14, x86::r15,
+};
+
+#define REG_F32_FREE_IDX 15
+#define REG_F64_FREE_IDX 15
+
+x86::Xmm regs_float[] = {
+ x86::xmm0,
+ x86::xmm1,
+ x86::xmm2,
+ x86::xmm3,
+ x86::xmm4,
+ x86::xmm5,
+ x86::xmm6,
+ x86::xmm7,
+ x86::xmm8,
+ x86::xmm9,
+ x86::xmm10,
+ x86::xmm11,
+ x86::xmm12,
+ x86::xmm13,
+ x86::xmm14,
+ x86::xmm15,
+};
+/* clang-format on */
+
+int
+jit_codegen_interp_jitted_glue(void *exec_env, JitInterpSwitchInfo *info,
+ uint32 func_idx, void *target)
+{
+ typedef int32 (*F)(const void *exec_env, void *info, uint32 func_idx,
+ const void *target);
+ union {
+ F f;
+ void *v;
+ } u;
+
+ u.v = code_block_switch_to_jitted_from_interp;
+ return u.f(exec_env, info, func_idx, target);
+}
+
+#define PRINT_LINE() LOG_VERBOSE("<Line:%d>\n", __LINE__)
+
+#if CODEGEN_DUMP != 0
+#define GOTO_FAIL \
+ do { \
+ PRINT_LINE(); \
+ goto fail; \
+ } while (0)
+#else
+#define GOTO_FAIL goto fail
+#endif
+
+#if CODEGEN_CHECK_ARGS == 0
+
+#define CHECK_EQKIND(reg0, reg1) (void)0
+#define CHECK_CONST(reg0) (void)0
+#define CHECK_NCONST(reg0) (void)0
+#define CHECK_KIND(reg0, type) (void)0
+#define CHECK_REG_NO(no, kind) (void)0
+#else
+
+/* Check if two register's kind is equal */
+#define CHECK_EQKIND(reg0, reg1) \
+ do { \
+ if (jit_reg_kind(reg0) != jit_reg_kind(reg1)) { \
+ PRINT_LINE(); \
+ LOG_VERBOSE("reg type not equal:\n"); \
+ jit_dump_reg(cc, reg0); \
+ jit_dump_reg(cc, reg1); \
+ GOTO_FAIL; \
+ } \
+ } while (0)
+
+/* Check if a register is an const */
+#define CHECK_CONST(reg0) \
+ do { \
+ if (!jit_reg_is_const(reg0)) { \
+ PRINT_LINE(); \
+ LOG_VERBOSE("reg is not const:\n"); \
+ jit_dump_reg(cc, reg0); \
+ GOTO_FAIL; \
+ } \
+ } while (0)
+
+/* Check if a register is not an const */
+#define CHECK_NCONST(reg0) \
+ do { \
+ if (jit_reg_is_const(reg0)) { \
+ PRINT_LINE(); \
+ LOG_VERBOSE("reg is const:\n"); \
+ jit_dump_reg(cc, reg0); \
+ GOTO_FAIL; \
+ } \
+ } while (0)
+
+/* Check if a register is a special type */
+#define CHECK_KIND(reg0, type) \
+ do { \
+ if (jit_reg_kind(reg0) != type) { \
+ PRINT_LINE(); \
+ LOG_VERBOSE("invalid reg type %d, expected is: %d", \
+ jit_reg_kind(reg0), type); \
+ jit_dump_reg(cc, reg0); \
+ GOTO_FAIL; \
+ } \
+ } while (0)
+
+#define CHECK_I32_REG_NO(no) \
+ do { \
+ if ((uint32)no >= sizeof(regs_i32) / sizeof(regs_i32[0])) \
+ GOTO_FAIL; \
+ } while (0)
+
+#define CHECK_I64_REG_NO(no) \
+ do { \
+ if ((uint32)no >= sizeof(regs_i64) / sizeof(regs_i64[0])) \
+ GOTO_FAIL; \
+ } while (0)
+
+#define CHECK_F32_REG_NO(no) \
+ do { \
+ if ((uint32)no >= sizeof(regs_float) / sizeof(regs_float[0])) \
+ GOTO_FAIL; \
+ } while (0)
+
+#define CHECK_F64_REG_NO(no) \
+ do { \
+ if ((uint32)no >= sizeof(regs_float) / sizeof(regs_float[0])) \
+ GOTO_FAIL; \
+ } while (0)
+
+/* Check if a register number is valid */
+#define CHECK_REG_NO(no, kind) \
+ do { \
+ if (kind == JIT_REG_KIND_I32 || kind == JIT_REG_KIND_I64) { \
+ CHECK_I32_REG_NO(no); \
+ CHECK_I64_REG_NO(no); \
+ } \
+ else if (kind == JIT_REG_KIND_F32 || kind == JIT_REG_KIND_F64) { \
+ CHECK_F32_REG_NO(no); \
+ CHECK_F64_REG_NO(no); \
+ } \
+ else \
+ GOTO_FAIL; \
+ } while (0)
+
+#endif /* end of CODEGEN_CHECK_ARGS == 0 */
+
+/* Load one operand from insn and check none */
+#define LOAD_1ARG() r0 = *jit_insn_opnd(insn, 0)
+
+/* Load two operands from insn and check if r0 is non-const */
+#define LOAD_2ARGS() \
+ r0 = *jit_insn_opnd(insn, 0); \
+ r1 = *jit_insn_opnd(insn, 1); \
+ CHECK_NCONST(r0)
+
+/* Load three operands from insn and check if r0 is non-const */
+#define LOAD_3ARGS() \
+ r0 = *jit_insn_opnd(insn, 0); \
+ r1 = *jit_insn_opnd(insn, 1); \
+ r2 = *jit_insn_opnd(insn, 2); \
+ CHECK_NCONST(r0)
+
+/* Load three operands from insn and check none */
+#define LOAD_3ARGS_NO_ASSIGN() \
+ r0 = *jit_insn_opnd(insn, 0); \
+ r1 = *jit_insn_opnd(insn, 1); \
+ r2 = *jit_insn_opnd(insn, 2);
+
+/* Load four operands from insn and check if r0 is non-const */
+#define LOAD_4ARGS() \
+ r0 = *jit_insn_opnd(insn, 0); \
+ r1 = *jit_insn_opnd(insn, 1); \
+ r2 = *jit_insn_opnd(insn, 2); \
+ r3 = *jit_insn_opnd(insn, 3); \
+ CHECK_NCONST(r0)
+
+/* Load five operands from insn and check if r0 is non-const */
+#define LOAD_4ARGS_NO_ASSIGN() \
+ r0 = *jit_insn_opnd(insn, 0); \
+ r1 = *jit_insn_opnd(insn, 1); \
+ r2 = *jit_insn_opnd(insn, 2); \
+ r3 = *jit_insn_opnd(insn, 3);
+
+class JitErrorHandler : public ErrorHandler
+{
+ public:
+ Error err;
+
+ JitErrorHandler()
+ : err(kErrorOk)
+ {}
+
+ void handleError(Error e, const char *msg, BaseEmitter *base) override
+ {
+ (void)msg;
+ (void)base;
+ this->err = e;
+ }
+};
+
+/* Alu opcode */
+typedef enum { ADD, SUB, MUL, DIV_S, REM_S, DIV_U, REM_U, MIN, MAX } ALU_OP;
+/* Bit opcode */
+typedef enum { OR, XOR, AND } BIT_OP;
+/* Shift opcode */
+typedef enum { SHL, SHRS, SHRU, ROTL, ROTR } SHIFT_OP;
+/* Bitcount opcode */
+typedef enum { CLZ, CTZ, POPCNT } BITCOUNT_OP;
+/* Condition opcode */
+typedef enum { EQ, NE, GTS, GES, LTS, LES, GTU, GEU, LTU, LEU } COND_OP;
+
+typedef union _cast_float_to_integer {
+ float f;
+ uint32 i;
+} cast_float_to_integer;
+
+typedef union _cast_double_to_integer {
+ double d;
+ uint64 i;
+} cast_double_to_integer;
+
+static uint32
+local_log2(uint32 data)
+{
+ uint32 ret = 0;
+ while (data >>= 1) {
+ ret++;
+ }
+ return ret;
+}
+
+static uint64
+local_log2l(uint64 data)
+{
+ uint64 ret = 0;
+ while (data >>= 1) {
+ ret++;
+ }
+ return ret;
+}
+
+/* Jmp type */
+typedef enum JmpType {
+ JMP_DST_LABEL_REL, /* jmp to dst label with relative addr */
+ JMP_DST_LABEL_ABS, /* jmp to dst label with absolute addr */
+ JMP_END_OF_CALLBC, /* jmp to end of CALLBC */
+ JMP_LOOKUPSWITCH_BASE, /* LookupSwitch table base addr */
+} JmpType;
+
+/**
+ * Jmp info, save the info on first encoding pass,
+ * and replace the offset with exact offset when the code cache
+ * has been allocated actually.
+ */
+typedef struct JmpInfo {
+ bh_list_link link;
+ JmpType type;
+ uint32 label_src;
+ uint32 offset;
+ union {
+ uint32 label_dst;
+ } dst_info;
+} JmpInfo;
+
+static bool
+label_is_neighboring(JitCompContext *cc, int32 label_prev, int32 label_succ)
+{
+ return (label_prev == 0 && label_succ == 2)
+ || (label_prev >= 2 && label_succ == label_prev + 1)
+ || (label_prev == (int32)jit_cc_label_num(cc) - 1
+ && label_succ == 1);
+}
+
+static bool
+label_is_ahead(JitCompContext *cc, int32 label_dst, int32 label_src)
+{
+ return (label_dst == 0 && label_src != 0)
+ || (label_dst != 1 && label_src == 1)
+ || (2 <= label_dst && label_dst < label_src
+ && label_src <= (int32)jit_cc_label_num(cc) - 1);
+}
+
+/**
+ * Encode jumping from one label to the other label
+ *
+ * @param a the assembler to emit the code
+ * @param jmp_info_list the jmp info list
+ * @param label_dst the index of dst label
+ * @param label_src the index of src label
+ *
+ * @return true if success, false if failed
+ */
+static bool
+jmp_from_label_to_label(x86::Assembler &a, bh_list *jmp_info_list,
+ int32 label_dst, int32 label_src)
+{
+ Imm imm(INT32_MAX);
+ JmpInfo *node;
+
+ node = (JmpInfo *)jit_calloc(sizeof(JmpInfo));
+ if (!node)
+ return false;
+
+ node->type = JMP_DST_LABEL_REL;
+ node->label_src = label_src;
+ node->dst_info.label_dst = label_dst;
+ node->offset = a.code()->sectionById(0)->buffer().size() + 2;
+ bh_list_insert(jmp_info_list, node);
+
+ a.jmp(imm);
+ return true;
+}
+
+/**
+ * Encode detecting compare result register according to condition code
+ * and then jumping to suitable label when the condtion is met
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param jmp_info_list the jmp info list
+ * @param label_src the index of src label
+ * @param op the opcode of condition operation
+ * @param r1 the label info when condition is met
+ * @param r2 the label info when condition is unmet, do nonthing if VOID
+ * @param is_last_insn if current insn is the last insn of current block
+ *
+ * @return true if success, false if failed
+ */
+static bool
+cmp_r_and_jmp_label(JitCompContext *cc, x86::Assembler &a,
+ bh_list *jmp_info_list, int32 label_src, COND_OP op,
+ JitReg r1, JitReg r2, bool is_last_insn)
+{
+ Imm imm(INT32_MAX);
+ JmpInfo *node;
+
+ node = (JmpInfo *)jit_malloc(sizeof(JmpInfo));
+ if (!node)
+ return false;
+
+ node->type = JMP_DST_LABEL_REL;
+ node->label_src = label_src;
+ node->dst_info.label_dst = jit_reg_no(r1);
+ node->offset = a.code()->sectionById(0)->buffer().size() + 2;
+ bh_list_insert(jmp_info_list, node);
+
+ bool fp_cmp = cc->last_cmp_on_fp;
+
+ bh_assert(!fp_cmp || (fp_cmp && (op == GTS || op == GES)));
+
+ switch (op) {
+ case EQ:
+ {
+ a.je(imm);
+ break;
+ }
+ case NE:
+ {
+ a.jne(imm);
+ break;
+ }
+ case GTS:
+ {
+ if (fp_cmp)
+ a.ja(imm);
+ else
+ a.jg(imm);
+ break;
+ }
+ case LES:
+ {
+ a.jng(imm);
+ break;
+ }
+ case GES:
+ {
+ if (fp_cmp)
+ a.jae(imm);
+ else
+ a.jnl(imm);
+ break;
+ }
+ case LTS:
+ {
+ a.jl(imm);
+ break;
+ }
+ case GTU:
+ {
+ a.ja(imm);
+ break;
+ }
+ case LEU:
+ {
+ a.jna(imm);
+ break;
+ }
+ case GEU:
+ {
+ a.jnb(imm);
+ break;
+ }
+ case LTU:
+ {
+ a.jb(imm);
+ break;
+ }
+ default:
+ {
+ bh_assert(0);
+ break;
+ }
+ }
+
+ if (r2) {
+ int32 label_dst = jit_reg_no(r2);
+ if (!(is_last_insn && label_is_neighboring(cc, label_src, label_dst)))
+ if (!jmp_from_label_to_label(a, jmp_info_list, label_dst,
+ label_src))
+ return false;
+ }
+
+ return true;
+}
+
+#if WASM_ENABLE_FAST_JIT_DUMP != 0
+static void
+dump_native(char *data, uint32 length)
+{
+ /* Initialize decoder context */
+ ZydisDecoder decoder;
+ ZydisDecoderInit(&decoder, ZYDIS_MACHINE_MODE_LONG_64,
+ ZYDIS_STACK_WIDTH_64);
+
+ /* Initialize formatter */
+ ZydisFormatter formatter;
+ ZydisFormatterInit(&formatter, ZYDIS_FORMATTER_STYLE_INTEL);
+
+ /* Loop over the instructions in our buffer */
+ ZyanU64 runtime_address = (ZyanU64)(uintptr_t)data;
+ ZyanUSize offset = 0;
+ ZydisDecodedInstruction instruction;
+ ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT_VISIBLE];
+
+ while (ZYAN_SUCCESS(ZydisDecoderDecodeFull(
+ &decoder, data + offset, length - offset, &instruction, operands,
+ ZYDIS_MAX_OPERAND_COUNT_VISIBLE, ZYDIS_DFLAG_VISIBLE_OPERANDS_ONLY))) {
+ /* Print current instruction pointer */
+ os_printf("%012" PRIX64 " ", runtime_address);
+
+ /* Format & print the binary instruction structure to
+ human readable format */
+ char buffer[256];
+ ZydisFormatterFormatInstruction(&formatter, &instruction, operands,
+ instruction.operand_count_visible,
+ buffer, sizeof(buffer),
+ runtime_address);
+ puts(buffer);
+
+ offset += instruction.length;
+ runtime_address += instruction.length;
+ }
+}
+#endif
+
+/**
+ * Encode extending register of byte to register of dword
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst register
+ * @param reg_no_src tho no of src register
+ * @param is_signed the data is signed or unsigned
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+extend_r8_to_r32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src,
+ bool is_signed)
+{
+ if (is_signed) {
+ a.movsx(regs_i32[reg_no_dst], regs_i8[reg_no_src]);
+ }
+ else {
+ a.movzx(regs_i32[reg_no_dst], regs_i8[reg_no_src]);
+ }
+ return true;
+}
+/**
+ * Encode extending register of word to register of dword
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst register
+ * @param reg_no_src tho no of src register
+ * @param is_signed the data is signed or unsigned
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+extend_r16_to_r32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src,
+ bool is_signed)
+{
+ if (is_signed) {
+ a.movsx(regs_i32[reg_no_dst], regs_i16[reg_no_src]);
+ }
+ else {
+ a.movzx(regs_i32[reg_no_dst], regs_i16[reg_no_src]);
+ }
+ return true;
+}
+
+/**
+ * Encode extending register of byte to register of qword
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst register
+ * @param reg_no_src tho no of src register
+ * @param is_signed the data is signed or unsigned
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+extend_r8_to_r64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src,
+ bool is_signed)
+{
+ if (is_signed) {
+ a.movsx(regs_i64[reg_no_dst], regs_i8[reg_no_src]);
+ }
+ else {
+ a.movzx(regs_i64[reg_no_dst], regs_i8[reg_no_src]);
+ }
+ return true;
+}
+
+/**
+ * Encode extending register of word to register of qword
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst register
+ * @param reg_no_src tho no of src register
+ * @param is_signed the data is signed or unsigned
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+extend_r16_to_r64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src,
+ bool is_signed)
+{
+ if (is_signed) {
+ a.movsx(regs_i64[reg_no_dst], regs_i16[reg_no_src]);
+ }
+ else {
+ a.movzx(regs_i64[reg_no_dst], regs_i16[reg_no_src]);
+ }
+ return true;
+}
+
+/**
+ * Encode extending register of dword to register of qword
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst register
+ * @param reg_no_src tho no of src register
+ * @param is_signed the data is signed or unsigned
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+extend_r32_to_r64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src,
+ bool is_signed)
+{
+ if (is_signed) {
+ a.movsxd(regs_i64[reg_no_dst], regs_i32[reg_no_src]);
+ }
+ else {
+ /*
+ * The upper 32-bit will be zero-extended, ref to Intel document,
+ * 3.4.1.1 General-Purpose Registers: 32-bit operands generate
+ * a 32-bit result, zero-extended to a 64-bit result in the
+ * destination general-purpose register
+ */
+ a.mov(regs_i32[reg_no_dst], regs_i32[reg_no_src]);
+ }
+ return true;
+}
+
+static bool
+mov_r_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src);
+
+static bool
+mov_r_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src);
+
+static void
+mov_r_to_r(x86::Assembler &a, uint32 kind_dst, int32 reg_no_dst,
+ int32 reg_no_src)
+{
+ if (kind_dst == JIT_REG_KIND_I32)
+ mov_r_to_r_i32(a, reg_no_dst, reg_no_src);
+ else if (kind_dst == JIT_REG_KIND_I64)
+ mov_r_to_r_i64(a, reg_no_dst, reg_no_src);
+ else if (kind_dst == JIT_REG_KIND_F32) {
+ /* TODO */
+ bh_assert(0);
+ }
+ else if (kind_dst == JIT_REG_KIND_F64) {
+ /* TODO */
+ bh_assert(0);
+ }
+ else {
+ bh_assert(0);
+ }
+}
+
+/**
+ * Encode moving memory to a register
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ * could be 1(byte), 2(short), 4(int32), 8(int64),
+ * skipped by float and double
+ * @param kind_dst the kind of data to move, could be I32, I64, F32 or F64
+ * @param is_signed whether the data is signed or unsigned
+ * @param reg_no_dst the index of dest register
+ * @param m_src the memory operand which contains the source data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+mov_m_to_r(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, bool is_signed,
+ int32 reg_no_dst, x86::Mem &m_src)
+{
+ if (kind_dst == JIT_REG_KIND_I32) {
+ switch (bytes_dst) {
+ case 1:
+ case 2:
+ if (is_signed)
+ a.movsx(regs_i32[reg_no_dst], m_src);
+ else
+ a.movzx(regs_i32[reg_no_dst], m_src);
+ break;
+ case 4:
+ a.mov(regs_i32[reg_no_dst], m_src);
+ break;
+ default:
+ bh_assert(0);
+ return false;
+ }
+ }
+ else if (kind_dst == JIT_REG_KIND_I64) {
+ switch (bytes_dst) {
+ case 1:
+ case 2:
+ if (is_signed)
+ a.movsx(regs_i64[reg_no_dst], m_src);
+ else
+ a.movzx(regs_i64[reg_no_dst], m_src);
+ break;
+ case 4:
+ if (is_signed)
+ a.movsxd(regs_i64[reg_no_dst], m_src);
+ else
+ /*
+ * The upper 32-bit will be zero-extended, ref to Intel
+ * document, 3.4.1.1 General-Purpose Registers: 32-bit
+ * operands generate a 32-bit result, zero-extended to
+ * a 64-bit result in the destination general-purpose
+ * register
+ */
+ a.mov(regs_i32[reg_no_dst], m_src);
+ break;
+ case 8:
+ a.mov(regs_i64[reg_no_dst], m_src);
+ break;
+ default:
+ bh_assert(0);
+ return false;
+ }
+ }
+ else if (kind_dst == JIT_REG_KIND_F32) {
+ a.movss(regs_float[reg_no_dst], m_src);
+ }
+ else if (kind_dst == JIT_REG_KIND_F64) {
+ a.movsd(regs_float[reg_no_dst], m_src);
+ }
+ return true;
+}
+
+/**
+ * Encode moving register to memory
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ * could be 1(byte), 2(short), 4(int32), 8(int64),
+ * skipped by float and double
+ * @param kind_dst the kind of data to move, could be I32, I64, F32 or F64
+ * @param is_signed whether the data is signed or unsigned
+ * @param m_dst the dest memory operand
+ * @param reg_no_src the index of dest register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+mov_r_to_m(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst,
+ x86::Mem &m_dst, int32 reg_no_src)
+{
+ if (kind_dst == JIT_REG_KIND_I32) {
+ bh_assert(reg_no_src < 16);
+ switch (bytes_dst) {
+ case 1:
+ a.mov(m_dst, regs_i8[reg_no_src]);
+ break;
+ case 2:
+ a.mov(m_dst, regs_i16[reg_no_src]);
+ break;
+ case 4:
+ a.mov(m_dst, regs_i32[reg_no_src]);
+ break;
+ default:
+ bh_assert(0);
+ return false;
+ }
+ }
+ else if (kind_dst == JIT_REG_KIND_I64) {
+ bh_assert(reg_no_src < 16);
+ switch (bytes_dst) {
+ case 1:
+ a.mov(m_dst, regs_i8[reg_no_src]);
+ break;
+ case 2:
+ a.mov(m_dst, regs_i16[reg_no_src]);
+ break;
+ case 4:
+ a.mov(m_dst, regs_i32[reg_no_src]);
+ break;
+ case 8:
+ a.mov(m_dst, regs_i64[reg_no_src]);
+ break;
+ default:
+ bh_assert(0);
+ return false;
+ }
+ }
+ else if (kind_dst == JIT_REG_KIND_F32) {
+ a.movss(m_dst, regs_float[reg_no_src]);
+ }
+ else if (kind_dst == JIT_REG_KIND_F64) {
+ a.movsd(m_dst, regs_float[reg_no_src]);
+ }
+ return true;
+}
+
+/**
+ * Encode moving immediate data to memory
+ *
+ * @param m dst memory
+ * @param imm src immediate data
+ *
+ * @return new stream
+ */
+static bool
+mov_imm_to_m(x86::Assembler &a, x86::Mem &m_dst, Imm imm_src, uint32 bytes_dst)
+{
+ if (bytes_dst == 8) {
+ int64 value = imm_src.value();
+ if (value >= INT32_MIN && value <= INT32_MAX) {
+ imm_src.setValue((int32)value);
+ a.mov(m_dst, imm_src);
+ }
+ else {
+ /* There is no instruction `MOV m64, imm64`, we use
+ two instructions to implement it */
+ a.mov(regs_i64[REG_I64_FREE_IDX], imm_src);
+ a.mov(m_dst, regs_i64[REG_I64_FREE_IDX]);
+ }
+ }
+ else
+ a.mov(m_dst, imm_src);
+ return true;
+}
+
+#if WASM_ENABLE_SHARED_MEMORY != 0
+/**
+ * Encode exchange register with memory
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ * could be 1(byte), 2(short), 4(int32), 8(int64),
+ * skipped by float and double
+ * @param kind_dst the kind of data to move, could only be I32 or I64
+ * @param m_dst the dest memory operand
+ * @param reg_no_src the index of dest register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+xchg_r_to_m(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst,
+ x86::Mem &m_dst, int32 reg_no_src)
+{
+ bh_assert((kind_dst == JIT_REG_KIND_I32 && bytes_dst <= 4)
+ || kind_dst == JIT_REG_KIND_I64);
+ bh_assert(reg_no_src < 16);
+ switch (bytes_dst) {
+ case 1:
+ a.xchg(m_dst, regs_i8[reg_no_src]);
+ break;
+ case 2:
+ a.xchg(m_dst, regs_i16[reg_no_src]);
+ break;
+ case 4:
+ a.xchg(m_dst, regs_i32[reg_no_src]);
+ break;
+ case 8:
+ a.xchg(m_dst, regs_i64[reg_no_src]);
+ break;
+ default:
+ bh_assert(0);
+ return false;
+ }
+ return true;
+}
+#endif
+/**
+ * Encode loading register data from memory with imm base and imm offset
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ * could be 1(byte), 2(short), 4(int32), 8(int64), skipped by
+ * float/double
+ * @param kind_dst the kind of data to move, could be I32, I64, F32 or F64
+ * @param is_signed the data is signed or unsigned
+ * @param reg_no_dst the index of dest register
+ * @param base the base address of the memory
+ * @param offset the offset address of the memory
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+ld_r_from_base_imm_offset_imm(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, bool is_signed, int32 reg_no_dst,
+ int32 base, int32 offset)
+{
+ x86::Mem m((uintptr_t)(base + offset), bytes_dst);
+ return mov_m_to_r(a, bytes_dst, kind_dst, is_signed, reg_no_dst, m);
+}
+
+/**
+ * Encode loading register data from memory with imm base and register offset
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ * could be 1(byte), 2(short), 4(int32), 8(int64), skipped by
+ * float/double
+ * @param kind_dst the kind of data to move, could be I32, I64, F32 or F64
+ * @param is_signed the data is signed or unsigned
+ * @param reg_no_dst the index of dest register
+ * @param base the base address of the memory
+ * @param reg_no_offset the no of register which stores the offset of the memory
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+ld_r_from_base_imm_offset_r(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, bool is_signed, int32 reg_no_dst,
+ int32 base, int32 reg_no_offset)
+{
+ x86::Mem m(regs_i64[reg_no_offset], base, bytes_dst);
+ return mov_m_to_r(a, bytes_dst, kind_dst, is_signed, reg_no_dst, m);
+}
+
+/**
+ * Encode loading register data from memory with register base and imm offset
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ * could be 1(byte), 2(short), 4(int32), 8(int64), skipped by
+ * float/double
+ * @param kind_dst the kind of data to move, could be I32, I64, F32 or F64
+ * @param is_signed the data is signed or unsigned
+ * @param reg_no_dst the index of dest register
+ * @param reg_no_base the no of register which stores the base of the memory
+ * @param offset the offset address of the memory
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+ld_r_from_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, bool is_signed, int32 reg_no_dst,
+ int32 reg_no_base, int32 offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst);
+ return mov_m_to_r(a, bytes_dst, kind_dst, is_signed, reg_no_dst, m);
+}
+
+/**
+ * Encode loading register data from memory with register base and register
+ * offset
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ * could be 1(byte), 2(short), 4(int32), 8(int64), skipped by
+ * float/double
+ * @param kind_dst the kind of data to move, could be I32, I64, F32 or F64
+ * @param is_signed the data is signed or unsigned
+ * @param reg_no_dst the index of dest register
+ * @param reg_no_base the no of register which stores the base of the memory
+ * @param reg_no_offset the no of register which stores the offset of the memory
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+ld_r_from_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst,
+ bool is_signed, int32 reg_no_dst, int32 reg_no_base,
+ int32 reg_no_offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst);
+ return mov_m_to_r(a, bytes_dst, kind_dst, is_signed, reg_no_dst, m);
+}
+
+/**
+ * Encode storing register data to memory with imm base and imm offset
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ * could be 1(byte), 2(short), 4(int32), 8(int64), skipped by
+ * float/double
+ * @param kind_dst the kind of data to move, could be I32, I64, F32 or F64
+ * @param reg_no_src the index of src register
+ * @param base the base address of the dst memory
+ * @param offset the offset address of the dst memory
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+st_r_to_base_imm_offset_imm(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_src, int32 base,
+ int32 offset, bool atomic)
+{
+ x86::Mem m((uintptr_t)(base + offset), bytes_dst);
+#if WASM_ENABLE_SHARED_MEMORY != 0
+ if (atomic)
+ return xchg_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src);
+#endif
+ return mov_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src);
+}
+
+/**
+ * Encode storing register data to memory with imm base and register offset
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ * could be 1(byte), 2(short), 4(int32), 8(int64), skipped by
+ * float/double
+ * @param kind_dst the kind of data to move, could be I32, I64, F32 or F64
+ * @param reg_no_src the index of src register
+ * @param base the base address of the dst memory
+ * @param reg_no_offset the no of register which stores the offset of the dst
+ * memory
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+st_r_to_base_imm_offset_r(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst,
+ int32 reg_no_src, int32 base, int32 reg_no_offset,
+ bool atomic)
+{
+ x86::Mem m(regs_i64[reg_no_offset], base, bytes_dst);
+#if WASM_ENABLE_SHARED_MEMORY != 0
+ if (atomic)
+ return xchg_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src);
+#endif
+ return mov_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src);
+}
+
+/**
+ * Encode storing register data to memory with register base and imm offset
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ * could be 1(byte), 2(short), 4(int32), 8(int64), skipped by
+ * float/double
+ * @param kind_dst the kind of data to move, could be I32, I64, F32 or F64
+ * @param reg_no_src the index of src register
+ * @param reg_no_base the no of register which stores the base of the dst memory
+ * @param offset the offset address of the dst memory
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+st_r_to_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst,
+ int32 reg_no_src, int32 reg_no_base, int32 offset,
+ bool atomic)
+{
+ x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst);
+#if WASM_ENABLE_SHARED_MEMORY != 0
+ if (atomic)
+ return xchg_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src);
+#endif
+ return mov_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src);
+}
+
+/**
+ * Encode storing register data to memory with register base and register offset
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ * could be 1(byte), 2(short), 4(int32), 8(int64), skipped by
+ * float/double
+ * @param kind_dst the kind of data to move, could be I32, I64, F32 or F64
+ * @param reg_no_src the index of src register
+ * @param reg_no_base the no of register which stores the base of the dst memory
+ * @param reg_no_offset the no of register which stores the offset of the dst
+ * memory
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+st_r_to_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst,
+ int32 reg_no_src, int32 reg_no_base,
+ int32 reg_no_offset, bool atomic)
+{
+ x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst);
+#if WASM_ENABLE_SHARED_MEMORY != 0
+ if (atomic)
+ return xchg_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src);
+#endif
+ return mov_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src);
+}
+
+static void
+imm_set_value(Imm &imm, void *data, uint32 bytes)
+{
+ switch (bytes) {
+ case 1:
+ imm.setValue(*(uint8 *)data);
+ break;
+ case 2:
+ imm.setValue(*(uint16 *)data);
+ break;
+ case 4:
+ imm.setValue(*(uint32 *)data);
+ break;
+ case 8:
+ imm.setValue(*(uint64 *)data);
+ break;
+ default:
+ bh_assert(0);
+ }
+}
+
+#if WASM_ENABLE_SHARED_MEMORY != 0
+static uint32
+mov_imm_to_free_reg(x86::Assembler &a, Imm &imm, uint32 bytes)
+{
+ uint32 reg_no;
+
+ switch (bytes) {
+ case 1:
+ reg_no = REG_I8_FREE_IDX;
+ a.mov(regs_i8[reg_no], imm);
+ break;
+ case 2:
+ reg_no = REG_I16_FREE_IDX;
+ a.mov(regs_i16[reg_no], imm);
+ break;
+ case 4:
+ reg_no = REG_I32_FREE_IDX;
+ a.mov(regs_i32[reg_no], imm);
+ break;
+ case 8:
+ reg_no = REG_I64_FREE_IDX;
+ a.mov(regs_i64[reg_no], imm);
+ break;
+ default:
+ bh_assert(0);
+ }
+
+ return reg_no;
+}
+#endif
+
+/**
+ * Encode storing int32 imm data to memory with imm base and imm offset
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ * could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param data_src the src immediate data
+ * @param base the base address of dst memory
+ * @param offset the offset address of dst memory
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+st_imm_to_base_imm_offset_imm(x86::Assembler &a, uint32 bytes_dst,
+ void *data_src, int32 base, int32 offset,
+ bool atomic)
+{
+ x86::Mem m((uintptr_t)(base + offset), bytes_dst);
+ Imm imm;
+ imm_set_value(imm, data_src, bytes_dst);
+#if WASM_ENABLE_SHARED_MEMORY != 0
+ uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst);
+ if (atomic) {
+ return xchg_r_to_m(a, bytes_dst, JIT_REG_KIND_I64, m, reg_no_src);
+ }
+#endif
+ return mov_imm_to_m(a, m, imm, bytes_dst);
+}
+
+/**
+ * Encode storing int32 imm data to memory with imm base and reg offset
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ * could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param data_src the src immediate data
+ * @param base the base address of dst memory
+ * @param reg_no_offset the no of register that stores the offset address
+ * of dst memory
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+st_imm_to_base_imm_offset_r(x86::Assembler &a, uint32 bytes_dst, void *data_src,
+ int32 base, int32 reg_no_offset, bool atomic)
+{
+ x86::Mem m(regs_i64[reg_no_offset], base, bytes_dst);
+ Imm imm;
+ imm_set_value(imm, data_src, bytes_dst);
+#if WASM_ENABLE_SHARED_MEMORY != 0
+ uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst);
+ if (atomic) {
+ return xchg_r_to_m(a, bytes_dst, JIT_REG_KIND_I64, m, reg_no_src);
+ }
+#endif
+ return mov_imm_to_m(a, m, imm, bytes_dst);
+}
+
+/**
+ * Encode storing int32 imm data to memory with reg base and imm offset
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ * could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param data_src the src immediate data
+ * @param reg_no_base the no of register that stores the base address
+ * of dst memory
+ * @param offset the offset address of dst memory
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+st_imm_to_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst, void *data_src,
+ int32 reg_no_base, int32 offset, bool atomic)
+{
+ x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst);
+ Imm imm;
+ imm_set_value(imm, data_src, bytes_dst);
+#if WASM_ENABLE_SHARED_MEMORY != 0
+ uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst);
+ if (atomic) {
+ return xchg_r_to_m(a, bytes_dst, JIT_REG_KIND_I64, m, reg_no_src);
+ }
+#endif
+ return mov_imm_to_m(a, m, imm, bytes_dst);
+}
+
+/**
+ * Encode storing int32 imm data to memory with reg base and reg offset
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ * could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param data_src the src immediate data
+ * @param reg_no_base the no of register that stores the base address
+ * of dst memory
+ * @param reg_no_offset the no of register that stores the offset address
+ * of dst memory
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+st_imm_to_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst, void *data_src,
+ int32 reg_no_base, int32 reg_no_offset, bool atomic)
+{
+ x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst);
+ Imm imm;
+ imm_set_value(imm, data_src, bytes_dst);
+#if WASM_ENABLE_SHARED_MEMORY != 0
+ uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst);
+ if (atomic) {
+ return xchg_r_to_m(a, bytes_dst, JIT_REG_KIND_I64, m, reg_no_src);
+ }
+#endif
+ return mov_imm_to_m(a, m, imm, bytes_dst);
+}
+
+/**
+ * Encode moving immediate int32 data to register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst register
+ * @param data the immediate data to move
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+mov_imm_to_r_i32(x86::Assembler &a, int32 reg_no, int32 data)
+{
+ Imm imm(data);
+ a.mov(regs_i32[reg_no], imm);
+ return true;
+}
+
+/**
+ * Encode moving int32 data from src register to dst register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst register
+ * @param reg_no_src the no of src register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+mov_r_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ if (reg_no_dst != reg_no_src)
+ a.mov(regs_i32[reg_no_dst], regs_i32[reg_no_src]);
+ return true;
+}
+
+/**
+ * Encode moving immediate int64 data to register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst register
+ * @param data the immediate data to move
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+mov_imm_to_r_i64(x86::Assembler &a, int32 reg_no, int64 data)
+{
+ Imm imm(data);
+ a.mov(regs_i64[reg_no], imm);
+ return true;
+}
+
+/**
+ * Encode moving int64 data from src register to dst register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst register
+ * @param reg_no_src the no of src register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+mov_r_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ if (reg_no_dst != reg_no_src)
+ a.mov(regs_i64[reg_no_dst], regs_i64[reg_no_src]);
+ return true;
+}
+
+/**
+ * Encode moving immediate float data to register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst register
+ * @param data the immediate data to move
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+mov_imm_to_r_f32(x86::Assembler &a, int32 reg_no, float data)
+{
+ /* imm -> gp -> xmm */
+ cast_float_to_integer v = { .f = data };
+ Imm imm(v.i);
+ a.mov(regs_i32[REG_I32_FREE_IDX], imm);
+ a.movd(regs_float[reg_no], regs_i32[REG_I32_FREE_IDX]);
+ return true;
+}
+
+/**
+ * Encode moving float data from src register to dst register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst register
+ * @param reg_no_src the no of src register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+mov_r_to_r_f32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ if (reg_no_dst != reg_no_src) {
+ a.movss(regs_float[reg_no_dst], regs_float[reg_no_src]);
+ }
+ return true;
+}
+
+/**
+ * Encode moving immediate double data to register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst register
+ * @param data the immediate data to move
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+mov_imm_to_r_f64(x86::Assembler &a, int32 reg_no, double data)
+{
+ cast_double_to_integer v = { .d = data };
+ Imm imm(v.i);
+ a.mov(regs_i64[REG_I32_FREE_IDX], imm);
+ /* REG_I32_FREE_IDX == REG_I64_FREE_IDX */
+ a.movq(regs_float[reg_no], regs_i64[REG_I64_FREE_IDX]);
+ return true;
+}
+
+/**
+ * Encode moving double data from src register to dst register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst register
+ * @param reg_no_src the no of src register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+mov_r_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ if (reg_no_dst != reg_no_src) {
+ a.movsd(regs_float[reg_no_dst], regs_float[reg_no_src]);
+ }
+ return true;
+}
+
+/* Let compiler do the conversation job as much as possible */
+
+/**
+ * Encoding convert int8 immediate data to int32 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the dst register, need to be converted to int32
+ * @param data the src int8 immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_i8_to_r_i32(x86::Assembler &a, int32 reg_no, int8 data)
+{
+ return mov_imm_to_r_i32(a, reg_no, (int32)data);
+}
+
+/**
+ * encoding convert int8 register to int32 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the dst register
+ * @param reg_no_src the src register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_i8_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ return extend_r8_to_r32(a, reg_no_dst, reg_no_src, true);
+}
+
+/**
+ * encoding convert int8 immediate data to int64 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the dst register, need to be converted to int64
+ * @param data the src int8 immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_i8_to_r_i64(x86::Assembler &a, int32 reg_no, int8 data)
+{
+ return mov_imm_to_r_i64(a, reg_no, (int64)data);
+}
+
+/**
+ * encoding convert int8 register to int64 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the dst register
+ * @param reg_no_src the src register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_i8_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ return extend_r8_to_r64(a, reg_no_dst, reg_no_src, true);
+}
+
+/**
+ * Encoding convert int16 immediate data to int32 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the dst register, need to be converted to int32
+ * @param data the src int16 immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_i16_to_r_i32(x86::Assembler &a, int32 reg_no, int16 data)
+{
+ return mov_imm_to_r_i32(a, reg_no, (int32)data);
+}
+
+/**
+ * encoding convert int16 register to int32 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the dst register
+ * @param reg_no_src the src register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_i16_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ return extend_r16_to_r32(a, reg_no_dst, reg_no_src, true);
+}
+
+/**
+ * encoding convert int16 immediate data to int64 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the dst register, need to be converted to int64
+ * @param data the src int16 immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_i16_to_r_i64(x86::Assembler &a, int32 reg_no, int16 data)
+{
+ return mov_imm_to_r_i64(a, reg_no, (int64)data);
+}
+
+/**
+ * encoding convert int16 register to int64 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the dst register
+ * @param reg_no_src the src register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_i16_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ return extend_r16_to_r64(a, reg_no_dst, reg_no_src, true);
+}
+
+/**
+ * Encoding convert int32 immediate data to int8 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the dst register, need to be converted to int8
+ * @param data the src int32 immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_i32_to_r_i8(x86::Assembler &a, int32 reg_no, int32 data)
+{
+ /* (int32)(int8)data will do sign-extension */
+ /* (int32)(uint32)(int8)data is longer */
+ return mov_imm_to_r_i32(a, reg_no, data & 0x000000FF);
+}
+
+/**
+ * Encoding convert int32 immediate data to int8 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the dst register, need to be converted to int8
+ * @param reg_no_src the src register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_i32_to_r_i8(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ mov_r_to_r_i32(a, reg_no_dst, reg_no_src);
+ a.and_(regs_i32[reg_no_dst], 0x000000FF);
+ return true;
+}
+
+/**
+ * Encoding convert int32 immediate data to uint8 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the dst register, need to be converted to uint8
+ * @param data the src int32 immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_i32_to_r_u8(x86::Assembler &a, int32 reg_no, int32 data)
+{
+ return mov_imm_to_r_i32(a, reg_no, (uint8)data);
+}
+
+/**
+ * Encoding convert int32 immediate data to uint8 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the dst register, need to be converted to uint8
+ * @param reg_no_src the src register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_i32_to_r_u8(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ return convert_r_i32_to_r_i8(a, reg_no_dst, reg_no_src);
+}
+
+/**
+ * Encoding convert int32 immediate data to int16 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the dst register, need to be converted to int16
+ * @param data the src int32 immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_i32_to_r_i16(x86::Assembler &a, int32 reg_no, int32 data)
+{
+ /* (int32)(int16)data will do sign-extension */
+ /* (int32)(uint32)(int16)data is longer */
+ return mov_imm_to_r_i32(a, reg_no, data & 0x0000FFFF);
+}
+
+/**
+ * Encoding convert int32 immediate data to int16 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the dst register, need to be converted to int16
+ * @param reg_no_src the src register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_i32_to_r_i16(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ mov_r_to_r_i32(a, reg_no_dst, reg_no_src);
+ a.and_(regs_i32[reg_no_dst], 0x0000FFFF);
+ return true;
+}
+
+/**
+ * Encoding convert int32 immediate data to uint16 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the dst register, need to be converted to uint16
+ * @param data the src int32 immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_i32_to_r_u16(x86::Assembler &a, int32 reg_no, int32 data)
+{
+ return mov_imm_to_r_i32(a, reg_no, (uint16)data);
+}
+
+/**
+ * Encoding convert int32 immediate data to uint16 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the dst register, need to be converted to uint16
+ * @param reg_no_src the src register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_i32_to_r_u16(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ return convert_r_i32_to_r_i16(a, reg_no_dst, reg_no_src);
+}
+
+/**
+ * Encoding convert int32 immediate data to int64 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the dst register, need to be converted to uint64
+ * @param data the src int32 immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_i32_to_r_i64(x86::Assembler &a, int32 reg_no, int32 data)
+{
+ return mov_imm_to_r_i64(a, reg_no, (int64)data);
+}
+
+/**
+ * Encoding convert int32 register data to int64 register with signed extension
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the dst register, need to be converted to uint64
+ * @param reg_no_src the src register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_i32_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ return extend_r32_to_r64(a, reg_no_dst, reg_no_src, true);
+}
+
+/**
+ * Encode converting int32 register data to float register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst float register
+ * @param reg_no_src the no of src int32 register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_i32_to_r_f32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ a.cvtsi2ss(regs_float[reg_no_dst], regs_i32[reg_no_src]);
+ return true;
+}
+
+/**
+ * Encode converting int32 immediate data to float register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst float register
+ * @param data the src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_i32_to_r_f32(x86::Assembler &a, int32 reg_no, int32 data)
+{
+ mov_imm_to_r_i32(a, REG_I32_FREE_IDX, data);
+ return convert_r_i32_to_r_f32(a, reg_no, REG_I32_FREE_IDX);
+}
+
+/**
+ * Encode converting int32 register data to double register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst double register
+ * @param reg_no_src the no of src int32 register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_i32_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ a.cvtsi2sd(regs_float[reg_no_dst], regs_i32[reg_no_src]);
+ return true;
+}
+
+/**
+ * Encode converting int32 immediate data to double register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst double register
+ * @param data the src immediate int32 data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_i32_to_r_f64(x86::Assembler &a, int32 reg_no, int32 data)
+{
+ mov_imm_to_r_i32(a, REG_I32_FREE_IDX, data);
+ return convert_r_i32_to_r_f64(a, reg_no, REG_I32_FREE_IDX);
+}
+
+/**
+ * Encode converting int64 immediate data to int32 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst int32 register
+ * @param data the src immediate int64 data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_i64_to_r_i32(x86::Assembler &a, int32 reg_no, int64 data)
+{
+ return mov_imm_to_r_i64(a, reg_no, (int32)data);
+}
+
+/**
+ * Encode converting int64 register data to int32 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst int32 register
+ * @param reg_no_src the no of src int64 register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_i64_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ mov_r_to_r_i64(a, reg_no_dst, reg_no_src);
+ a.and_(regs_i64[reg_no_dst], 0x00000000FFFFFFFFLL);
+ return true;
+}
+
+/**
+ * Encode converting int64 immediate data to int8 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst int32 register
+ * @param data the src immediate int64 data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_i64_to_r_i8(x86::Assembler &a, int32 reg_no, int64 data)
+{
+ return mov_imm_to_r_i64(a, reg_no, (int8)data);
+}
+
+/**
+ * Encode converting int64 register data to int8 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst int8 register
+ * @param reg_no_src the no of src int64 register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_i64_to_r_i8(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ mov_r_to_r_i64(a, reg_no_dst, reg_no_src);
+ a.and_(regs_i64[reg_no_dst], 0x00000000000000FFLL);
+ return true;
+}
+
+/**
+ * Encode converting int64 immediate data to int16 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst int32 register
+ * @param data the src immediate int64 data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_i64_to_r_i16(x86::Assembler &a, int32 reg_no, int64 data)
+{
+ return mov_imm_to_r_i64(a, reg_no, (int16)data);
+}
+
+/**
+ * Encode converting int64 register data to int16 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst int16 register
+ * @param reg_no_src the no of src int64 register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_i64_to_r_i16(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ mov_r_to_r_i64(a, reg_no_dst, reg_no_src);
+ a.and_(regs_i64[reg_no_dst], 0x000000000000FFFFLL);
+ return true;
+}
+
+/**
+ * Encode converting uint32 immediate data to int64 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst int64 register
+ * @param data the src immediate uint32 data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_u32_to_r_i64(x86::Assembler &a, int32 reg_no, uint32 data)
+{
+ return mov_imm_to_r_i64(a, reg_no, (int64)(uint64)data);
+}
+
+/**
+ * Encode converting uint32 register data to int64 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst uint32 register
+ * @param reg_no_src the no of src int64 register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_u32_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ return extend_r32_to_r64(a, reg_no_dst, reg_no_src, false);
+}
+
+/**
+ * Encode converting uint32 immediate data to float register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst float register
+ * @param data the src immediate uint32 data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_u32_to_r_f32(x86::Assembler &a, int32 reg_no, uint32 data)
+{
+ mov_imm_to_r_i64(a, REG_I64_FREE_IDX, (int64)(uint64)data);
+ a.cvtsi2ss(regs_float[reg_no], regs_i64[REG_I64_FREE_IDX]);
+ return true;
+}
+
+/**
+ * Encode converting uint32 register data to float register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst uint32 register
+ * @param reg_no_src the no of src float register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_u32_to_r_f32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ extend_r32_to_r64(a, REG_I64_FREE_IDX, reg_no_src, false);
+ a.cvtsi2ss(regs_float[reg_no_dst], regs_i64[REG_I64_FREE_IDX]);
+ return true;
+}
+
+/**
+ * Encode converting uint32 immediate data to double register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst double register
+ * @param data the src immediate uint32 data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_u32_to_r_f64(x86::Assembler &a, int32 reg_no, uint32 data)
+{
+ mov_imm_to_r_i64(a, REG_I64_FREE_IDX, (int64)(uint64)data);
+ a.cvtsi2sd(regs_float[reg_no], regs_i64[REG_I64_FREE_IDX]);
+ return true;
+}
+
+/**
+ * Encode converting uint32 register data to double register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst uint32 register
+ * @param reg_no_src the no of src double register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_u32_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ extend_r32_to_r64(a, REG_I64_FREE_IDX, reg_no_src, false);
+ a.cvtsi2sd(regs_float[reg_no_dst], regs_i64[REG_I64_FREE_IDX]);
+ return true;
+}
+
+/**
+ * Encode converting int64 register data to float register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst float register
+ * @param reg_no_src the no of src int64 register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_i64_to_r_f32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ a.cvtsi2ss(regs_float[reg_no_dst], regs_i64[reg_no_src]);
+ return true;
+}
+
+/**
+ * Encode converting int64 immediate data to float register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst float register
+ * @param data the src immediate int64 data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_i64_to_r_f32(x86::Assembler &a, int32 reg_no, int64 data)
+{
+ mov_imm_to_r_i64(a, REG_I64_FREE_IDX, data);
+ return convert_r_i64_to_r_f32(a, reg_no, REG_I64_FREE_IDX);
+}
+
+/**
+ * Encode converting int64 register data to double register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst double register
+ * @param reg_no_src the no of src int64 register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_i64_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ a.cvtsi2sd(regs_float[reg_no_dst], regs_i64[reg_no_src]);
+ return true;
+}
+
+/**
+ * Encode converting int64 immediate data to double register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst double register
+ * @param data the src immediate int64 data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_i64_to_r_f64(x86::Assembler &a, int32 reg_no, int64 data)
+{
+ mov_imm_to_r_i64(a, REG_I64_FREE_IDX, data);
+ return convert_r_i64_to_r_f64(a, reg_no, REG_I64_FREE_IDX);
+}
+
+/**
+ * Encode converting float immediate data to int32 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst int32 register
+ * @param data the src immediate float data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_f32_to_r_i32(x86::Assembler &a, int32 reg_no, float data)
+{
+ return mov_imm_to_r_i32(a, reg_no, (int32)data);
+}
+
+/**
+ * Encode converting float register data to int32 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst int32 register
+ * @param reg_no_src the no of src float register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_f32_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ a.cvttss2si(regs_i32[reg_no_dst], regs_float[reg_no_src]);
+ return true;
+}
+
+/**
+ * Encode converting float immediate data to int32 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst int32 register
+ * @param data the src immediate float data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_f32_to_r_u32(x86::Assembler &a, int32 reg_no, float data)
+{
+ return mov_imm_to_r_i32(a, reg_no, (uint32)data);
+}
+
+/**
+ * Encode converting float register data to int32 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst int32 register
+ * @param reg_no_src the no of src float register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_f32_to_r_u32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ a.cvttss2si(regs_i64[reg_no_dst], regs_float[reg_no_src]);
+ return true;
+}
+
+/**
+ * Encode converting float immediate data to int64 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst int64 register
+ * @param data the src immediate float data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_f32_to_r_i64(x86::Assembler &a, int32 reg_no, float data)
+{
+ return mov_imm_to_r_i64(a, reg_no, (int64)data);
+}
+
+/**
+ * Encode converting float register data to int64 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst int64 register
+ * @param reg_no_src the no of src float register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_f32_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ a.cvttss2si(regs_i64[reg_no_dst], regs_float[reg_no_src]);
+ return true;
+}
+
+/**
+ * Encode converting float immediate data to double register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst double register
+ * @param data the src immediate float data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_f32_to_r_f64(x86::Assembler &a, int32 reg_no, float data)
+{
+ return mov_imm_to_r_f64(a, reg_no, (double)data);
+}
+
+/**
+ * Encode converting float register data to double register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst double register
+ * @param reg_no_src the no of src float register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_f32_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ a.cvtss2sd(regs_float[reg_no_dst], regs_float[reg_no_src]);
+ return true;
+}
+
+/**
+ * Encode converting double immediate data to int32 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst int32 register
+ * @param data the src immediate double data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_f64_to_r_i32(x86::Assembler &a, int32 reg_no, double data)
+{
+ return mov_imm_to_r_i32(a, reg_no, (int32)data);
+}
+
+/**
+ * Encode converting double register data to int32 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst int32 register
+ * @param reg_no_src the no of src double register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_f64_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ a.cvttsd2si(regs_i32[reg_no_dst], regs_float[reg_no_src]);
+ return true;
+}
+
+/**
+ * Encode converting double immediate data to int64 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst int64 register
+ * @param data the src immediate double data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_f64_to_r_i64(x86::Assembler &a, int32 reg_no, double data)
+{
+ return mov_imm_to_r_i64(a, reg_no, (int64)data);
+}
+
+/**
+ * Encode converting double register data to int64 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst int64 register
+ * @param reg_no_src the no of src double register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_f64_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ a.cvttsd2si(regs_i64[reg_no_dst], regs_float[reg_no_src]);
+ return true;
+}
+
+/**
+ * Encode converting double immediate data to float register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst float register
+ * @param data the src immediate double data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_f64_to_r_f32(x86::Assembler &a, int32 reg_no, double data)
+{
+ return mov_imm_to_r_f32(a, reg_no, (float)data);
+}
+
+/**
+ * Encode converting double register data to float register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst float register
+ * @param reg_no_src the no of src double register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_f64_to_r_f32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ a.cvtsd2ss(regs_float[reg_no_dst], regs_float[reg_no_src]);
+ return true;
+}
+
+/**
+ * Encode converting double immediate data to int32 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst int32 register
+ * @param data the src immediate double data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_imm_f64_to_r_u32(x86::Assembler &a, int32 reg_no, double data)
+{
+ return mov_imm_to_r_i32(a, reg_no, (uint32)data);
+}
+
+/**
+ * Encode converting double register data to int32 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst int32 register
+ * @param reg_no_src the no of src double register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+convert_r_f64_to_r_u32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ a.cvttsd2si(regs_i64[reg_no_dst], regs_float[reg_no_src]);
+ return true;
+}
+
+/**
+ * Encode making negative from int32 immediate data to int32 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst register
+ * @param data the src int32 immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+neg_imm_to_r_i32(x86::Assembler &a, int32 reg_no, int32 data)
+{
+ Imm imm(-data);
+ a.mov(regs_i32[reg_no], imm);
+ return true;
+}
+
+/**
+ * Encode making negative from int32 register to int32 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst register
+ * @param reg_no_src the no of src register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+neg_r_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ mov_r_to_r_i32(a, reg_no_dst, reg_no_src);
+ a.neg(regs_i32[reg_no_dst]);
+ return true;
+}
+
+/**
+ * Encode making negative from int64 immediate data to int64 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst register
+ * @param data the src int64 immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+neg_imm_to_r_i64(x86::Assembler &a, int32 reg_no, int64 data)
+{
+ Imm imm(-data);
+ a.mov(regs_i64[reg_no], imm);
+ return true;
+}
+
+/**
+ * Encode making negative from int64 register to int64 register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst register
+ * @param reg_no_src the no of src register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+neg_r_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ mov_r_to_r_i64(a, reg_no_dst, reg_no_src);
+ a.neg(regs_i64[reg_no_dst]);
+ return true;
+}
+
+/**
+ * Encode making negative from float immediate data to float register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst float register
+ * @param data the src float immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+neg_imm_to_r_f32(x86::Assembler &a, int32 reg_no, float data)
+{
+ bh_assert(0);
+ (void)a;
+ (void)reg_no;
+ (void)data;
+ return false;
+}
+
+/**
+ * Encode making negative from float register to float register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst register
+ * @param reg_no_src the no of src register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+neg_r_to_r_f32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ bh_assert(0);
+ (void)a;
+ (void)reg_no_dst;
+ (void)reg_no_src;
+ return false;
+}
+
+/**
+ * Encode making negative from double immediate data to double register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst double register
+ * @param data the src double immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+neg_imm_to_r_f64(x86::Assembler &a, int32 reg_no, double data)
+{
+ bh_assert(0);
+ (void)a;
+ (void)reg_no;
+ (void)data;
+ return false;
+}
+
+/**
+ * Encode making negative from double register to double register
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst double register
+ * @param reg_no_src the no of src double register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+neg_r_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ bh_assert(0);
+ (void)a;
+ (void)reg_no_dst;
+ (void)reg_no_src;
+ return false;
+}
+
+static COND_OP
+not_cond(COND_OP op)
+{
+ COND_OP not_list[] = { NE, EQ, LES, LTS, GES, GTS, LEU, LTU, GEU, GTU };
+
+ bh_assert(op <= LEU);
+ return not_list[op];
+}
+
+/**
+ * Encode int32 alu operation of reg and data, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no the no of register, as first operand, and save result
+ * @param data the immediate data, as the second operand
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_r_r_imm_i32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+ int32 reg_no_src, int32 data)
+{
+ Imm imm(data);
+
+ switch (op) {
+ case ADD:
+ mov_r_to_r(a, JIT_REG_KIND_I32, reg_no_dst, reg_no_src);
+ if (data == 1)
+ a.inc(regs_i32[reg_no_dst]);
+ else if (data == -1)
+ a.dec(regs_i32[reg_no_dst]);
+ else if (data != 0)
+ a.add(regs_i32[reg_no_dst], imm);
+ break;
+ case SUB:
+ mov_r_to_r(a, JIT_REG_KIND_I32, reg_no_dst, reg_no_src);
+ if (data == -1)
+ a.inc(regs_i32[reg_no_dst]);
+ else if (data == 1)
+ a.dec(regs_i32[reg_no_dst]);
+ else if (data != 0)
+ a.sub(regs_i32[reg_no_dst], imm);
+ break;
+ case MUL:
+ if (data == 0)
+ a.xor_(regs_i32[reg_no_dst], regs_i32[reg_no_dst]);
+ else if (data == -1) {
+ mov_r_to_r(a, JIT_REG_KIND_I32, reg_no_dst, reg_no_src);
+ a.neg(regs_i32[reg_no_dst]);
+ }
+ else if (data == 1) {
+ mov_r_to_r(a, JIT_REG_KIND_I32, reg_no_dst, reg_no_src);
+ }
+ else if (data > 0 && (data & (data - 1)) == 0x0) {
+ mov_r_to_r(a, JIT_REG_KIND_I32, reg_no_dst, reg_no_src);
+ data = (int32)local_log2(data);
+ imm.setValue(data);
+ a.shl(regs_i32[reg_no_dst], imm);
+ }
+ else {
+ a.imul(regs_i32[reg_no_dst], regs_i32[reg_no_src], imm);
+ }
+ break;
+ case DIV_S:
+ case REM_S:
+ bh_assert(reg_no_src == REG_EAX_IDX);
+ if (op == DIV_S) {
+ bh_assert(reg_no_dst == REG_EAX_IDX);
+ }
+ else {
+ bh_assert(reg_no_dst == REG_EDX_IDX);
+ }
+ a.mov(regs_i32[REG_I32_FREE_IDX], imm);
+ /* signed extend eax to edx:eax */
+ a.cdq();
+ a.idiv(regs_i32[REG_I32_FREE_IDX]);
+ break;
+ case DIV_U:
+ case REM_U:
+ bh_assert(reg_no_src == REG_EAX_IDX);
+ if (op == DIV_U) {
+ bh_assert(reg_no_dst == REG_EAX_IDX);
+ }
+ else {
+ bh_assert(reg_no_dst == REG_EDX_IDX);
+ }
+ a.mov(regs_i32[REG_I32_FREE_IDX], imm);
+ /* unsigned extend eax to edx:eax */
+ a.xor_(regs_i32[REG_EDX_IDX], regs_i32[REG_EDX_IDX]);
+ a.div(regs_i32[REG_I32_FREE_IDX]);
+ break;
+ default:
+ bh_assert(0);
+ break;
+ }
+
+ return true;
+}
+
+/**
+ * Encode int32 alu operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register, as first operand, and save result
+ * @param reg_no_src the no of register, as the second operand
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_r_r_r_i32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst, int32 reg_no1_src,
+ int32 reg_no2_src)
+{
+ switch (op) {
+ case ADD:
+ if (reg_no_dst != reg_no2_src) {
+ mov_r_to_r(a, JIT_REG_KIND_I32, reg_no_dst, reg_no1_src);
+ a.add(regs_i32[reg_no_dst], regs_i32[reg_no2_src]);
+ }
+ else
+ a.add(regs_i32[reg_no2_src], regs_i32[reg_no1_src]);
+ break;
+ case SUB:
+ if (reg_no_dst != reg_no2_src) {
+ mov_r_to_r(a, JIT_REG_KIND_I32, reg_no_dst, reg_no1_src);
+ a.sub(regs_i32[reg_no_dst], regs_i32[reg_no2_src]);
+ }
+ else {
+ a.sub(regs_i32[reg_no2_src], regs_i32[reg_no1_src]);
+ a.neg(regs_i32[reg_no2_src]);
+ }
+ break;
+ case MUL:
+ if (reg_no_dst != reg_no2_src) {
+ mov_r_to_r(a, JIT_REG_KIND_I32, reg_no_dst, reg_no1_src);
+ a.imul(regs_i32[reg_no_dst], regs_i32[reg_no2_src]);
+ }
+ else
+ a.imul(regs_i32[reg_no2_src], regs_i32[reg_no1_src]);
+ break;
+ case DIV_S:
+ case REM_S:
+ bh_assert(reg_no1_src == REG_EAX_IDX);
+ if (op == DIV_S) {
+ bh_assert(reg_no_dst == REG_EAX_IDX);
+ }
+ else {
+ bh_assert(reg_no_dst == REG_EDX_IDX);
+ if (reg_no2_src == REG_EDX_IDX) {
+ /* convert `REM_S edx, eax, edx` into
+ `mov esi, edx` and `REM_S edx eax, rsi` to
+ avoid overwritting edx when a.cdq() */
+ a.mov(regs_i32[REG_I32_FREE_IDX], regs_i32[REG_EDX_IDX]);
+ reg_no2_src = REG_I32_FREE_IDX;
+ }
+ }
+ /* signed extend eax to edx:eax */
+ a.cdq();
+ a.idiv(regs_i32[reg_no2_src]);
+ break;
+ case DIV_U:
+ case REM_U:
+ bh_assert(reg_no1_src == REG_EAX_IDX);
+ if (op == DIV_U) {
+ bh_assert(reg_no_dst == REG_EAX_IDX);
+ }
+ else {
+ bh_assert(reg_no_dst == REG_EDX_IDX);
+ if (reg_no2_src == REG_EDX_IDX) {
+ /* convert `REM_U edx, eax, edx` into
+ `mov esi, edx` and `REM_U edx eax, rsi` to
+ avoid overwritting edx when unsigned extend
+ eax to edx:eax */
+ a.mov(regs_i32[REG_I32_FREE_IDX], regs_i32[REG_EDX_IDX]);
+ reg_no2_src = REG_I32_FREE_IDX;
+ }
+ }
+ /* unsigned extend eax to edx:eax */
+ a.xor_(regs_i32[REG_EDX_IDX], regs_i32[REG_EDX_IDX]);
+ a.div(regs_i32[reg_no2_src]);
+ break;
+ default:
+ bh_assert(0);
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * Encode int32 alu operation of imm and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_imm_imm_to_r_i32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+ int32 data1_src, int32 data2_src)
+{
+ Imm imm;
+ int32 data = 0;
+
+ switch (op) {
+ case ADD:
+ data = data1_src + data2_src;
+ break;
+ case SUB:
+ data = data1_src - data2_src;
+ break;
+ case MUL:
+ data = data1_src * data2_src;
+ break;
+ case DIV_S:
+ data = data1_src / data2_src;
+ break;
+ case REM_S:
+ data = data1_src % data2_src;
+ break;
+ case DIV_U:
+ data = (uint32)data1_src / (uint32)data2_src;
+ break;
+ case REM_U:
+ data = (uint32)data1_src % (uint32)data2_src;
+ break;
+ default:
+ bh_assert(0);
+ return false;
+ }
+
+ imm.setValue(data);
+ a.mov(regs_i32[reg_no_dst], imm);
+ return true;
+}
+
+/**
+ * Encode int32 alu operation of imm and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_imm_r_to_r_i32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+ int32 data1_src, int32 reg_no2_src)
+{
+ if (op == ADD || op == MUL)
+ return alu_r_r_imm_i32(a, op, reg_no_dst, reg_no2_src, data1_src);
+ else if (op == SUB) {
+ if (!alu_r_r_imm_i32(a, op, reg_no_dst, reg_no2_src, data1_src))
+ return false;
+ a.neg(regs_i32[reg_no_dst]);
+ return true;
+ }
+ else {
+ if (reg_no_dst != reg_no2_src) {
+ if (!mov_imm_to_r_i32(a, reg_no_dst, data1_src)
+ || !alu_r_r_r_i32(a, op, reg_no_dst, reg_no_dst, reg_no2_src))
+ return false;
+ return true;
+ }
+ else {
+ if (!mov_imm_to_r_i32(a, REG_I32_FREE_IDX, data1_src)
+ || !alu_r_r_r_i32(a, op, reg_no_dst, REG_I32_FREE_IDX,
+ reg_no2_src))
+ return false;
+ return true;
+ }
+ }
+
+ return true;
+}
+
+/**
+ * Encode int32 alu operation of reg and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_r_imm_to_r_i32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+ int32 reg_no1_src, int32 data2_src)
+{
+ return alu_r_r_imm_i32(a, op, reg_no_dst, reg_no1_src, data2_src);
+}
+
+/**
+ * Encode int32 alu operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_r_r_to_r_i32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+ int32 reg_no1_src, int32 reg_no2_src)
+{
+ return alu_r_r_r_i32(a, op, reg_no_dst, reg_no1_src, reg_no2_src);
+}
+
+/**
+ * Encode int64 alu operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register, as first operand, and save result
+ * @param reg_no_src the no of register, as the second operand
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_r_r_r_i64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst, int32 reg_no1_src,
+ int32 reg_no2_src)
+{
+ switch (op) {
+ case ADD:
+ if (reg_no_dst != reg_no2_src) {
+ mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no1_src);
+ a.add(regs_i64[reg_no_dst], regs_i64[reg_no2_src]);
+ }
+ else
+ a.add(regs_i64[reg_no2_src], regs_i64[reg_no1_src]);
+ break;
+ case SUB:
+ if (reg_no_dst != reg_no2_src) {
+ mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no1_src);
+ a.sub(regs_i64[reg_no_dst], regs_i64[reg_no2_src]);
+ }
+ else {
+ a.sub(regs_i64[reg_no2_src], regs_i64[reg_no1_src]);
+ a.neg(regs_i64[reg_no2_src]);
+ }
+ break;
+ case MUL:
+ if (reg_no_dst != reg_no2_src) {
+ mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no1_src);
+ a.imul(regs_i64[reg_no_dst], regs_i64[reg_no2_src]);
+ }
+ else
+ a.imul(regs_i64[reg_no2_src], regs_i64[reg_no1_src]);
+ break;
+ case DIV_S:
+ case REM_S:
+ bh_assert(reg_no1_src == REG_RAX_IDX);
+ if (op == DIV_S) {
+ bh_assert(reg_no_dst == REG_RAX_IDX);
+ }
+ else {
+ bh_assert(reg_no_dst == REG_RDX_IDX);
+ }
+ /* signed extend rax to rdx:rax */
+ a.cqo();
+ a.idiv(regs_i64[reg_no2_src]);
+ break;
+ case DIV_U:
+ case REM_U:
+ bh_assert(reg_no1_src == REG_RAX_IDX);
+ if (op == DIV_U) {
+ bh_assert(reg_no_dst == REG_RAX_IDX);
+ }
+ else {
+ bh_assert(reg_no_dst == REG_RDX_IDX);
+ }
+ /* unsigned extend rax to rdx:rax */
+ a.xor_(regs_i64[REG_RDX_IDX], regs_i64[REG_RDX_IDX]);
+ a.div(regs_i64[reg_no2_src]);
+ break;
+ default:
+ bh_assert(0);
+ break;
+ }
+
+ return true;
+}
+
+/**
+ * Encode int64 alu operation of reg and data, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no the no of register, as first operand, and save result
+ * @param data the immediate data, as the second operand
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_r_r_imm_i64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+ int32 reg_no_src, int64 data)
+{
+ Imm imm(data);
+
+ switch (op) {
+ case ADD:
+ mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no_src);
+ if (data == 1)
+ a.inc(regs_i64[reg_no_dst]);
+ else if (data == -1)
+ a.dec(regs_i64[reg_no_dst]);
+ else if (data != 0) {
+ if (data >= INT32_MIN && data <= INT32_MAX) {
+ imm.setValue((int32)data);
+ a.add(regs_i64[reg_no_dst], imm);
+ }
+ else {
+ a.mov(regs_i64[REG_I64_FREE_IDX], imm);
+ a.add(regs_i64[reg_no_dst], regs_i64[REG_I64_FREE_IDX]);
+ }
+ }
+ break;
+ case SUB:
+ mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no_src);
+ if (data == -1)
+ a.inc(regs_i64[reg_no_dst]);
+ else if (data == 1)
+ a.dec(regs_i64[reg_no_dst]);
+ else if (data != 0) {
+ if (data >= INT32_MIN && data <= INT32_MAX) {
+ imm.setValue((int32)data);
+ a.sub(regs_i64[reg_no_dst], imm);
+ }
+ else {
+ a.mov(regs_i64[REG_I64_FREE_IDX], imm);
+ a.sub(regs_i64[reg_no_dst], regs_i64[REG_I64_FREE_IDX]);
+ }
+ }
+ break;
+ case MUL:
+ if (data == 0)
+ a.xor_(regs_i64[reg_no_dst], regs_i64[reg_no_dst]);
+ else if (data == -1) {
+ mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no_src);
+ a.neg(regs_i64[reg_no_dst]);
+ }
+ else if (data == 1) {
+ mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no_src);
+ }
+ else if (data > 0 && (data & (data - 1)) == 0x0) {
+ mov_r_to_r(a, JIT_REG_KIND_I64, reg_no_dst, reg_no_src);
+ data = (int64)local_log2l(data);
+ imm.setValue(data);
+ a.shl(regs_i64[reg_no_dst], imm);
+ }
+ else if (INT32_MIN <= data && data <= INT32_MAX) {
+ a.imul(regs_i64[reg_no_dst], regs_i64[reg_no_src], imm);
+ }
+ else {
+ mov_imm_to_r_i64(
+ a, reg_no_dst == reg_no_src ? REG_I64_FREE_IDX : reg_no_dst,
+ data);
+ alu_r_r_r_i64(a, op, reg_no_dst,
+ reg_no_dst == reg_no_src ? REG_I64_FREE_IDX
+ : reg_no_dst,
+ reg_no_src);
+ }
+ break;
+ case DIV_S:
+ case REM_S:
+ bh_assert(reg_no_src == REG_RAX_IDX);
+ if (op == DIV_S) {
+ bh_assert(reg_no_dst == REG_RAX_IDX);
+ }
+ else {
+ bh_assert(reg_no_dst == REG_RDX_IDX);
+ }
+ a.mov(regs_i64[REG_I64_FREE_IDX], imm);
+ /* signed extend rax to rdx:rax */
+ a.cqo();
+ a.idiv(regs_i64[REG_I64_FREE_IDX]);
+ break;
+ case DIV_U:
+ case REM_U:
+ bh_assert(reg_no_src == REG_RAX_IDX);
+ if (op == DIV_U) {
+ bh_assert(reg_no_dst == REG_RAX_IDX);
+ }
+ else {
+ bh_assert(reg_no_dst == REG_RDX_IDX);
+ }
+ a.mov(regs_i64[REG_I64_FREE_IDX], imm);
+ /* unsigned extend rax to rdx:rax */
+ a.xor_(regs_i64[REG_RDX_IDX], regs_i64[REG_RDX_IDX]);
+ a.div(regs_i64[REG_I64_FREE_IDX]);
+ break;
+ default:
+ bh_assert(0);
+ break;
+ }
+
+ return true;
+}
+
+/**
+ * Encode int64 alu operation of imm and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_imm_imm_to_r_i64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+ int64 data1_src, int64 data2_src)
+{
+ Imm imm;
+ int64 data = 0;
+
+ switch (op) {
+ case ADD:
+ data = data1_src + data2_src;
+ break;
+ case SUB:
+ data = data1_src - data2_src;
+ break;
+ case MUL:
+ data = data1_src * data2_src;
+ break;
+ case DIV_S:
+ data = data1_src / data2_src;
+ break;
+ case REM_S:
+ data = data1_src % data2_src;
+ break;
+ case DIV_U:
+ data = (uint64)data1_src / (uint64)data2_src;
+ break;
+ case REM_U:
+ data = (uint64)data1_src % (uint64)data2_src;
+ break;
+ default:
+ bh_assert(0);
+ break;
+ }
+
+ imm.setValue(data);
+ a.mov(regs_i64[reg_no_dst], imm);
+ return true;
+}
+
+/**
+ * Encode int64 alu operation of imm and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_imm_r_to_r_i64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+ int64 data1_src, int32 reg_no2_src)
+{
+ if (op == ADD || op == MUL)
+ return alu_r_r_imm_i64(a, op, reg_no_dst, reg_no2_src, data1_src);
+ else if (op == SUB) {
+ if (!alu_r_r_imm_i64(a, op, reg_no_dst, reg_no2_src, data1_src))
+ return false;
+ a.neg(regs_i64[reg_no_dst]);
+ return true;
+ }
+ else {
+ if (reg_no_dst != reg_no2_src) {
+ if (!mov_imm_to_r_i64(a, reg_no_dst, data1_src)
+ || !alu_r_r_r_i64(a, op, reg_no_dst, reg_no_dst, reg_no2_src))
+ return false;
+ return true;
+ }
+ else {
+ if (!mov_imm_to_r_i64(a, REG_I64_FREE_IDX, data1_src)
+ || !alu_r_r_r_i64(a, op, reg_no_dst, REG_I64_FREE_IDX,
+ reg_no2_src))
+ return false;
+ return true;
+ }
+ }
+
+ return true;
+}
+
+/**
+ * Encode int64 alu operation of reg and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_r_imm_to_r_i64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+ int32 reg_no1_src, int64 data2_src)
+{
+ return alu_r_r_imm_i64(a, op, reg_no_dst, reg_no1_src, data2_src);
+}
+
+/**
+ * Encode int64 alu operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_r_r_to_r_i64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+ int32 reg_no1_src, int32 reg_no2_src)
+{
+ return alu_r_r_r_i64(a, op, reg_no_dst, reg_no1_src, reg_no2_src);
+}
+
+/**
+ * Encode float alu operation of imm and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_imm_imm_to_r_f32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+ float data1_src, float data2_src)
+{
+ Imm imm;
+ float data = 0;
+
+ switch (op) {
+ case ADD:
+ {
+ data = data1_src + data2_src;
+ break;
+ }
+ case SUB:
+ {
+ data = data1_src - data2_src;
+ break;
+ }
+ case MUL:
+ {
+ data = data1_src * data2_src;
+ break;
+ }
+ case DIV_S:
+ {
+ data = data1_src / data2_src;
+ break;
+ }
+ case MAX:
+ {
+ data = fmaxf(data1_src, data2_src);
+ break;
+ }
+ case MIN:
+ {
+ data = fminf(data1_src, data2_src);
+ break;
+ }
+ default:
+ {
+ bh_assert(0);
+ return false;
+ }
+ }
+
+ return mov_imm_to_r_f32(a, reg_no_dst, data);
+}
+
+static bool
+alu_r_m_float(x86::Assembler &a, ALU_OP op, int32 reg_no, x86::Mem &m,
+ bool is_f32)
+{
+ switch (op) {
+ case ADD:
+ {
+ if (is_f32)
+ a.addss(regs_float[reg_no], m);
+ else
+ a.addsd(regs_float[reg_no], m);
+ break;
+ }
+ case SUB:
+ {
+ if (is_f32)
+ a.subss(regs_float[reg_no], m);
+ else
+ a.subsd(regs_float[reg_no], m);
+ break;
+ }
+ case MUL:
+ {
+ if (is_f32)
+ a.mulss(regs_float[reg_no], m);
+ else
+ a.mulsd(regs_float[reg_no], m);
+ break;
+ }
+ case DIV_S:
+ {
+ if (is_f32)
+ a.divss(regs_float[reg_no], m);
+ else
+ a.divsd(regs_float[reg_no], m);
+ break;
+ }
+ case MAX:
+ {
+ if (is_f32)
+ a.maxss(regs_float[reg_no], m);
+ else
+ a.maxsd(regs_float[reg_no], m);
+ break;
+ }
+ case MIN:
+ {
+ if (is_f32)
+ a.minss(regs_float[reg_no], m);
+ else
+ a.minsd(regs_float[reg_no], m);
+ break;
+ }
+ default:
+ {
+ bh_assert(0);
+ return false;
+ }
+ }
+ return true;
+}
+
+/**
+ * Encode float alu operation of imm and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_imm_r_to_r_f32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+ float data1_src, int32 reg_no2_src)
+{
+ const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info();
+ /* xmm -> m128 */
+ x86::Mem cache = x86::xmmword_ptr(regs_i64[hreg_info->exec_env_hreg_index],
+ offsetof(WASMExecEnv, jit_cache));
+ a.movups(cache, regs_float[reg_no2_src]);
+
+ /* imm -> gp -> xmm */
+ mov_imm_to_r_f32(a, reg_no_dst, data1_src);
+
+ return alu_r_m_float(a, op, reg_no_dst, cache, true);
+}
+
+/**
+ * Encode float alu operation of reg and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_r_imm_to_r_f32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+ int32 reg_no1_src, float data2_src)
+{
+ const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info();
+ /* imm -> m32 */
+ x86::Mem cache = x86::dword_ptr(regs_i64[hreg_info->exec_env_hreg_index],
+ offsetof(WASMExecEnv, jit_cache));
+ cast_float_to_integer v = { .f = data2_src };
+ Imm imm(v.i);
+ mov_imm_to_m(a, cache, imm, 4);
+
+ mov_r_to_r_f32(a, reg_no_dst, reg_no1_src);
+ return alu_r_m_float(a, op, reg_no_dst, cache, true);
+}
+
+/**
+ * Encode float alu operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_r_r_to_r_f32(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+ int32 reg_no1_src, int32 reg_no2_src)
+{
+ bool store_result = false;
+
+ /**
+ * - op r0,r0,r1. do nothing since instructions always store results in
+ * the first register
+ *
+ * - op r1,r0,r1. use FREE_REG to cache and replace r0, and then store
+ * results in r1
+ *
+ * - op r0,r1,r2. use r0 to cache and replace r1, and accept the result
+ * naturally
+ **/
+ if (reg_no_dst == reg_no2_src) {
+ store_result = true;
+ reg_no_dst = REG_F32_FREE_IDX;
+ }
+ mov_r_to_r_f32(a, reg_no_dst, reg_no1_src);
+
+ switch (op) {
+ case ADD:
+ {
+ a.addss(regs_float[reg_no_dst], regs_float[reg_no2_src]);
+ break;
+ }
+ case SUB:
+ {
+ a.subss(regs_float[reg_no_dst], regs_float[reg_no2_src]);
+ break;
+ }
+ case MUL:
+ {
+ a.mulss(regs_float[reg_no_dst], regs_float[reg_no2_src]);
+ break;
+ }
+ case DIV_S:
+ {
+ a.divss(regs_float[reg_no_dst], regs_float[reg_no2_src]);
+ break;
+ }
+ case MAX:
+ {
+ a.maxss(regs_float[reg_no_dst], regs_float[reg_no2_src]);
+ break;
+ }
+ case MIN:
+ {
+ a.minss(regs_float[reg_no_dst], regs_float[reg_no2_src]);
+ break;
+ }
+ default:
+ {
+ bh_assert(0);
+ return false;
+ }
+ }
+
+ if (store_result)
+ mov_r_to_r_f32(a, reg_no2_src, REG_F32_FREE_IDX);
+
+ return true;
+}
+
+/**
+ * Encode double alu operation of imm and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_imm_imm_to_r_f64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+ double data1_src, double data2_src)
+{
+ Imm imm;
+ double data = 0;
+
+ switch (op) {
+ case ADD:
+ {
+ data = data1_src + data2_src;
+ break;
+ }
+ case SUB:
+ {
+ data = data1_src - data2_src;
+ break;
+ }
+ case MUL:
+ {
+ data = data1_src * data2_src;
+ break;
+ }
+ case DIV_S:
+ {
+ data = data1_src / data2_src;
+ break;
+ }
+ case MAX:
+ {
+ data = fmax(data1_src, data2_src);
+ break;
+ }
+ case MIN:
+ {
+ data = fmin(data1_src, data2_src);
+ break;
+ }
+ default:
+ {
+ bh_assert(0);
+ return false;
+ }
+ }
+
+ return mov_imm_to_r_f64(a, reg_no_dst, data);
+}
+
+/**
+ * Encode double alu operation of imm and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_imm_r_to_r_f64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+ double data1_src, int32 reg_no2_src)
+{
+ const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info();
+ /* xmm -> m128 */
+ x86::Mem cache = x86::qword_ptr(regs_i64[hreg_info->exec_env_hreg_index],
+ offsetof(WASMExecEnv, jit_cache));
+ a.movupd(cache, regs_float[reg_no2_src]);
+
+ /* imm -> gp -> xmm */
+ mov_imm_to_r_f64(a, reg_no_dst, data1_src);
+
+ return alu_r_m_float(a, op, reg_no_dst, cache, false);
+}
+
+/**
+ * Encode double alu operation of reg and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_r_imm_to_r_f64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+ int32 reg_no1_src, double data2_src)
+{
+ const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info();
+ /* imm -> m64 */
+ x86::Mem cache = x86::qword_ptr(regs_i64[hreg_info->exec_env_hreg_index],
+ offsetof(WASMExecEnv, jit_cache));
+ cast_double_to_integer v = { .d = data2_src };
+ Imm imm(v.i);
+ mov_imm_to_m(a, cache, imm, 8);
+
+ mov_r_to_r_f64(a, reg_no_dst, reg_no1_src);
+ return alu_r_m_float(a, op, reg_no_dst, cache, false);
+}
+
+/**
+ * Encode double alu operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of ALU operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+alu_r_r_to_r_f64(x86::Assembler &a, ALU_OP op, int32 reg_no_dst,
+ int32 reg_no1_src, int32 reg_no2_src)
+{
+ bool store_result = false;
+
+ /**
+ * - op r0,r0,r1. do nothing since instructions always store results in
+ * the first register
+ *
+ * - op r1,r0,r1. use FREE_REG to cache and replace r0, and then store
+ * results in r1
+ *
+ * - op r0,r1,r2. use r0 to cache and replace r1, and accept the result
+ * naturally
+ **/
+ if (reg_no_dst == reg_no2_src) {
+ store_result = true;
+ reg_no_dst = REG_F64_FREE_IDX;
+ }
+ mov_r_to_r_f64(a, reg_no_dst, reg_no1_src);
+
+ switch (op) {
+ case ADD:
+ {
+ a.addsd(regs_float[reg_no_dst], regs_float[reg_no2_src]);
+ break;
+ }
+ case SUB:
+ {
+ a.subsd(regs_float[reg_no_dst], regs_float[reg_no2_src]);
+ break;
+ }
+ case MUL:
+ {
+ a.mulsd(regs_float[reg_no_dst], regs_float[reg_no2_src]);
+ break;
+ }
+ case DIV_S:
+ {
+ a.divsd(regs_float[reg_no_dst], regs_float[reg_no2_src]);
+ break;
+ }
+ case MAX:
+ {
+ a.maxsd(regs_float[reg_no_dst], regs_float[reg_no2_src]);
+ break;
+ }
+ case MIN:
+ {
+ a.minsd(regs_float[reg_no_dst], regs_float[reg_no2_src]);
+ break;
+ }
+ default:
+ {
+ bh_assert(0);
+ return false;
+ }
+ }
+
+ if (store_result)
+ mov_r_to_r_f64(a, reg_no2_src, REG_F64_FREE_IDX);
+
+ return true;
+}
+
+/**
+ * Encode int32 bit operation of reg and data, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BIT operation
+ * @param reg_no the no of register, as first operand, and save result
+ * @param data the immediate data, as the second operand
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bit_r_imm_i32(x86::Assembler &a, BIT_OP op, int32 reg_no, int32 data)
+{
+ Imm imm(data);
+
+ switch (op) {
+ case OR:
+ if (data != 0)
+ a.or_(regs_i32[reg_no], imm);
+ break;
+ case XOR:
+ if (data == -1)
+ a.not_(regs_i32[reg_no]);
+ else if (data != 0)
+ a.xor_(regs_i32[reg_no], imm);
+ break;
+ case AND:
+ if (data != -1)
+ a.and_(regs_i32[reg_no], imm);
+ break;
+ default:
+ bh_assert(0);
+ break;
+ }
+ return true;
+}
+
+/**
+ * Encode int32 bit operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BIT operation
+ * @param reg_no_dst the no of register, as first operand, and save result
+ * @param reg_no_src the no of register, as second operand
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bit_r_r_i32(x86::Assembler &a, BIT_OP op, int32 reg_no_dst, int32 reg_no_src)
+{
+ switch (op) {
+ case OR:
+ a.or_(regs_i32[reg_no_dst], regs_i32[reg_no_src]);
+ break;
+ case XOR:
+ a.xor_(regs_i32[reg_no_dst], regs_i32[reg_no_src]);
+ break;
+ case AND:
+ a.and_(regs_i32[reg_no_dst], regs_i32[reg_no_src]);
+ break;
+ default:
+ bh_assert(0);
+ break;
+ }
+ return true;
+}
+
+/**
+ * Encode int32 bit operation of imm and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BIT operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bit_imm_imm_to_r_i32(x86::Assembler &a, BIT_OP op, int32 reg_no_dst,
+ int32 data1_src, int32 data2_src)
+{
+ Imm imm;
+
+ switch (op) {
+ case OR:
+ imm.setValue(data1_src | data2_src);
+ break;
+ case XOR:
+ imm.setValue(data1_src ^ data2_src);
+ break;
+ case AND:
+ imm.setValue(data1_src & data2_src);
+ break;
+ default:
+ bh_assert(0);
+ break;
+ }
+
+ a.mov(regs_i32[reg_no_dst], imm);
+ return true;
+}
+
+/**
+ * Encode int32 bit operation of imm and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BIT operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bit_imm_r_to_r_i32(x86::Assembler &a, BIT_OP op, int32 reg_no_dst,
+ int32 data1_src, int32 reg_no2_src)
+{
+ if (op == AND && data1_src == 0)
+ a.xor_(regs_i32[reg_no_dst], regs_i32[reg_no_dst]);
+ else if (op == OR && data1_src == -1) {
+ Imm imm(-1);
+ a.mov(regs_i32[reg_no_dst], imm);
+ }
+ else {
+ mov_r_to_r_i32(a, reg_no_dst, reg_no2_src);
+ return bit_r_imm_i32(a, op, reg_no_dst, data1_src);
+ }
+ return true;
+}
+
+/**
+ * Encode int32 bit operation of reg and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BIT operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bit_r_imm_to_r_i32(x86::Assembler &a, BIT_OP op, int32 reg_no_dst,
+ int32 reg_no1_src, int32 data2_src)
+{
+ return bit_imm_r_to_r_i32(a, op, reg_no_dst, data2_src, reg_no1_src);
+}
+
+/**
+ * Encode int32 bit operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BIT operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bit_r_r_to_r_i32(x86::Assembler &a, BIT_OP op, int32 reg_no_dst,
+ int32 reg_no1_src, int32 reg_no2_src)
+{
+ if (reg_no_dst != reg_no2_src) {
+ mov_r_to_r_i32(a, reg_no_dst, reg_no1_src);
+ return bit_r_r_i32(a, op, reg_no_dst, reg_no2_src);
+ }
+ else
+ return bit_r_r_i32(a, op, reg_no_dst, reg_no1_src);
+ return false;
+}
+
+/**
+ * Encode int64 bit operation of reg and data, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BIT operation
+ * @param reg_no the no of register, as first operand, and save result
+ * @param data the immediate data, as the second operand
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bit_r_imm_i64(x86::Assembler &a, BIT_OP op, int32 reg_no, int64 data)
+{
+ Imm imm(data);
+
+ switch (op) {
+ case OR:
+ if (data != 0) {
+ if (data >= INT32_MIN && data <= INT32_MAX) {
+ imm.setValue((int32)data);
+ a.or_(regs_i64[reg_no], imm);
+ }
+ else {
+ a.mov(regs_i64[REG_I64_FREE_IDX], imm);
+ a.or_(regs_i64[reg_no], regs_i64[REG_I64_FREE_IDX]);
+ }
+ }
+ break;
+ case XOR:
+ if (data == -1LL)
+ a.not_(regs_i64[reg_no]);
+ else if (data != 0) {
+ if (data >= INT32_MIN && data <= INT32_MAX) {
+ imm.setValue((int32)data);
+ a.xor_(regs_i64[reg_no], imm);
+ }
+ else {
+ a.mov(regs_i64[REG_I64_FREE_IDX], imm);
+ a.xor_(regs_i64[reg_no], regs_i64[REG_I64_FREE_IDX]);
+ }
+ }
+ break;
+ case AND:
+ if (data != -1LL) {
+ if (data >= INT32_MIN && data <= INT32_MAX) {
+ imm.setValue((int32)data);
+ a.and_(regs_i64[reg_no], imm);
+ }
+ else {
+ a.mov(regs_i64[REG_I64_FREE_IDX], imm);
+ a.and_(regs_i64[reg_no], regs_i64[REG_I64_FREE_IDX]);
+ }
+ }
+ break;
+ default:
+ bh_assert(0);
+ break;
+ }
+ return true;
+}
+
+/**
+ * Encode int64 bit operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BIT operation
+ * @param reg_no_dst the no of register, as first operand, and save result
+ * @param reg_no_src the no of register, as second operand
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bit_r_r_i64(x86::Assembler &a, BIT_OP op, int32 reg_no_dst, int32 reg_no_src)
+{
+ switch (op) {
+ case OR:
+ a.or_(regs_i64[reg_no_dst], regs_i64[reg_no_src]);
+ break;
+ case XOR:
+ a.xor_(regs_i64[reg_no_dst], regs_i64[reg_no_src]);
+ break;
+ case AND:
+ a.and_(regs_i64[reg_no_dst], regs_i64[reg_no_src]);
+ break;
+ default:
+ bh_assert(0);
+ break;
+ }
+ return true;
+}
+
+/**
+ * Encode int64 bit operation of imm and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BIT operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bit_imm_imm_to_r_i64(x86::Assembler &a, BIT_OP op, int32 reg_no_dst,
+ int32 data1_src, int64 data2_src)
+{
+ Imm imm;
+
+ switch (op) {
+ case OR:
+ imm.setValue(data1_src | data2_src);
+ break;
+ case XOR:
+ imm.setValue(data1_src ^ data2_src);
+ break;
+ case AND:
+ imm.setValue(data1_src & data2_src);
+ break;
+ default:
+ bh_assert(0);
+ break;
+ }
+
+ a.mov(regs_i64[reg_no_dst], imm);
+ return true;
+}
+
+/**
+ * Encode int64 bit operation of imm and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BIT operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bit_imm_r_to_r_i64(x86::Assembler &a, BIT_OP op, int32 reg_no_dst,
+ int64 data1_src, int32 reg_no2_src)
+{
+ if (op == AND && data1_src == 0)
+ a.xor_(regs_i64[reg_no_dst], regs_i64[reg_no_dst]);
+ else if (op == OR && data1_src == -1LL) {
+ Imm imm(-1LL);
+ a.mov(regs_i64[reg_no_dst], imm);
+ }
+ else {
+ mov_r_to_r_i64(a, reg_no_dst, reg_no2_src);
+ return bit_r_imm_i64(a, op, reg_no_dst, data1_src);
+ }
+ return true;
+}
+
+/**
+ * Encode int64 bit operation of reg and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BIT operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bit_r_imm_to_r_i64(x86::Assembler &a, BIT_OP op, int32 reg_no_dst,
+ int32 reg_no1_src, int64 data2_src)
+{
+ return bit_imm_r_to_r_i64(a, op, reg_no_dst, data2_src, reg_no1_src);
+}
+
+/**
+ * Encode int64 bit operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BIT operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bit_r_r_to_r_i64(x86::Assembler &a, BIT_OP op, int32 reg_no_dst,
+ int32 reg_no1_src, int32 reg_no2_src)
+{
+ if (reg_no_dst != reg_no2_src) {
+ mov_r_to_r_i64(a, reg_no_dst, reg_no1_src);
+ return bit_r_r_i64(a, op, reg_no_dst, reg_no2_src);
+ }
+ else
+ return bit_r_r_i64(a, op, reg_no_dst, reg_no1_src);
+ return false;
+}
+
+/**
+ * Encode int32 shift operation of imm and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of SHIFT operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+shift_imm_imm_to_r_i32(x86::Assembler &a, SHIFT_OP op, int32 reg_no_dst,
+ int32 data1_src, int32 data2_src)
+{
+ int32 data;
+ switch (op) {
+ case SHL:
+ {
+ data = data1_src << data2_src;
+ break;
+ }
+ case SHRS:
+ {
+ data = data1_src >> data2_src;
+ break;
+ }
+ case SHRU:
+ {
+ data = ((uint32)data1_src) >> data2_src;
+ break;
+ }
+ case ROTL:
+ {
+ data = (data1_src << data2_src)
+ | (((uint32)data1_src) >> (32 - data2_src));
+ break;
+ }
+ case ROTR:
+ {
+ data = (((uint32)data1_src) >> data2_src)
+ | (data1_src << (32 - data2_src));
+ break;
+ }
+ default:
+ {
+ bh_assert(0);
+ goto fail;
+ }
+ }
+
+ return mov_imm_to_r_i32(a, reg_no_dst, data);
+fail:
+ return false;
+}
+
+/**
+ * Encode int32 shift operation of imm and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of SHIFT operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+shift_imm_r_to_r_i32(x86::Assembler &a, SHIFT_OP op, int32 reg_no_dst,
+ int32 data1_src, int32 reg_no2_src)
+{
+ /* Should have been optimized by previous lower */
+ bh_assert(0);
+ (void)a;
+ (void)op;
+ (void)reg_no_dst;
+ (void)data1_src;
+ (void)reg_no2_src;
+ return false;
+}
+
+/**
+ * Encode int32 shift operation of reg and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of SHIFT operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+shift_r_imm_to_r_i32(x86::Assembler &a, SHIFT_OP op, int32 reg_no_dst,
+ int32 reg_no1_src, int32 data2_src)
+{
+ /* SHL/SHA/SHR r/m32, imm8 */
+ Imm imm((uint8)data2_src);
+
+ mov_r_to_r_i32(a, reg_no_dst, reg_no1_src);
+ switch (op) {
+ case SHL:
+ {
+ a.shl(regs_i32[reg_no_dst], imm);
+ break;
+ }
+ case SHRS:
+ {
+ a.sar(regs_i32[reg_no_dst], imm);
+ break;
+ }
+ case SHRU:
+ {
+ a.shr(regs_i32[reg_no_dst], imm);
+ break;
+ }
+ case ROTL:
+ {
+ a.rol(regs_i32[reg_no_dst], imm);
+ break;
+ }
+ case ROTR:
+ {
+ a.ror(regs_i32[reg_no_dst], imm);
+ break;
+ }
+ default:
+ {
+ bh_assert(0);
+ goto fail;
+ }
+ }
+
+ return true;
+fail:
+ return false;
+}
+
+/**
+ * Encode int32 shift operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of shift operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+shift_r_r_to_r_i32(x86::Assembler &a, SHIFT_OP op, int32 reg_no_dst,
+ int32 reg_no1_src, int32 reg_no2_src)
+{
+ /* should be CL */
+ if (reg_no2_src != REG_ECX_IDX)
+ return false;
+
+ mov_r_to_r_i32(a, reg_no_dst, reg_no1_src);
+
+ switch (op) {
+ case SHL:
+ {
+ a.shl(regs_i32[reg_no_dst], x86::cl);
+ break;
+ }
+ case SHRS:
+ {
+ a.sar(regs_i32[reg_no_dst], x86::cl);
+ break;
+ }
+ case SHRU:
+ {
+ a.shr(regs_i32[reg_no_dst], x86::cl);
+ break;
+ }
+ case ROTL:
+ {
+ a.rol(regs_i32[reg_no_dst], x86::cl);
+ break;
+ }
+ case ROTR:
+ {
+ a.ror(regs_i32[reg_no_dst], x86::cl);
+ break;
+ }
+ default:
+ {
+ bh_assert(0);
+ goto fail;
+ }
+ }
+
+ return true;
+fail:
+ return false;
+}
+
+/**
+ * Encode int64 shift operation of imm and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of SHIFT operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+shift_imm_imm_to_r_i64(x86::Assembler &a, SHIFT_OP op, int32 reg_no_dst,
+ int64 data1_src, int64 data2_src)
+{
+ int64 data;
+
+ switch (op) {
+ case SHL:
+ {
+ data = data1_src << data2_src;
+ break;
+ }
+ case SHRS:
+ {
+ data = data1_src >> data2_src;
+ break;
+ }
+ case SHRU:
+ {
+ data = ((uint64)data1_src) >> data2_src;
+ break;
+ }
+ case ROTL:
+ {
+ data = (data1_src << data2_src)
+ | (((uint64)data1_src) >> (64LL - data2_src));
+ break;
+ }
+ case ROTR:
+ {
+ data = (((uint64)data1_src) >> data2_src)
+ | (data1_src << (64LL - data2_src));
+ break;
+ }
+ default:
+ {
+ bh_assert(0);
+ goto fail;
+ }
+ }
+
+ return mov_imm_to_r_i64(a, reg_no_dst, data);
+fail:
+ return false;
+}
+
+/**
+ * Encode int64 shift operation of imm and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of SHIFT operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+shift_imm_r_to_r_i64(x86::Assembler &a, SHIFT_OP op, int32 reg_no_dst,
+ int64 data1_src, int32 reg_no2_src)
+{
+ /* Should have been optimized by previous lower */
+ bh_assert(0);
+ (void)a;
+ (void)op;
+ (void)reg_no_dst;
+ (void)data1_src;
+ (void)reg_no2_src;
+ return false;
+}
+
+/**
+ * Encode int64 shift operation of reg and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of SHIFT operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+shift_r_imm_to_r_i64(x86::Assembler &a, SHIFT_OP op, int32 reg_no_dst,
+ int32 reg_no1_src, int64 data2_src)
+{
+ /* SHL/SHA/SHR r/m64, imm8 */
+ Imm imm((uint8)data2_src);
+
+ mov_r_to_r_i64(a, reg_no_dst, reg_no1_src);
+ switch (op) {
+ case SHL:
+ {
+ a.shl(regs_i64[reg_no_dst], imm);
+ break;
+ }
+ case SHRS:
+ {
+ a.sar(regs_i64[reg_no_dst], imm);
+ break;
+ }
+ case SHRU:
+ {
+ a.shr(regs_i64[reg_no_dst], imm);
+ break;
+ }
+ case ROTL:
+ {
+ a.rol(regs_i64[reg_no_dst], imm);
+ break;
+ }
+ case ROTR:
+ {
+ a.ror(regs_i64[reg_no_dst], imm);
+ break;
+ }
+ default:
+ {
+ bh_assert(0);
+ goto fail;
+ }
+ }
+
+ return true;
+fail:
+ return false;
+}
+
+/**
+ * Encode int64 shift operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of shift operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+shift_r_r_to_r_i64(x86::Assembler &a, SHIFT_OP op, int32 reg_no_dst,
+ int32 reg_no1_src, int32 reg_no2_src)
+{
+ /* should be CL */
+ if (reg_no2_src != REG_ECX_IDX)
+ return false;
+
+ mov_r_to_r_i64(a, reg_no_dst, reg_no1_src);
+
+ switch (op) {
+ case SHL:
+ {
+ a.shl(regs_i64[reg_no_dst], x86::cl);
+ break;
+ }
+ case SHRS:
+ {
+ a.sar(regs_i64[reg_no_dst], x86::cl);
+ break;
+ }
+ case SHRU:
+ {
+ a.shr(regs_i64[reg_no_dst], x86::cl);
+ break;
+ }
+ case ROTL:
+ {
+ a.rol(regs_i64[reg_no_dst], x86::cl);
+ break;
+ }
+ case ROTR:
+ {
+ a.ror(regs_i64[reg_no_dst], x86::cl);
+ break;
+ }
+ default:
+ {
+ bh_assert(0);
+ goto fail;
+ }
+ }
+
+ return true;
+fail:
+ return false;
+}
+
+/**
+ * Encode int32 cmp operation of imm and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_imm_imm_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 data1_src,
+ int32 data2_src)
+{
+ Imm imm(data1_src);
+ a.mov(regs_i32[REG_I32_FREE_IDX], imm);
+ imm.setValue(data2_src);
+ a.cmp(regs_i32[REG_I32_FREE_IDX], imm);
+ (void)reg_no_dst;
+ return true;
+}
+
+/**
+ * Encode int32 cmp operation of imm and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_imm_r_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 data1_src,
+ int32 reg_no2_src)
+{
+ Imm imm(data1_src);
+ a.mov(regs_i32[REG_I32_FREE_IDX], imm);
+ a.cmp(regs_i32[REG_I32_FREE_IDX], regs_i32[reg_no2_src]);
+ (void)reg_no_dst;
+ return true;
+}
+
+/**
+ * Encode int32 cmp operation of reg and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_r_imm_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src,
+ int32 data2_src)
+{
+ Imm imm(data2_src);
+ a.cmp(regs_i32[reg_no1_src], imm);
+ (void)reg_no_dst;
+ return true;
+}
+
+/**
+ * Encode int32 cmp operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_r_r_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src,
+ int32 reg_no2_src)
+{
+ a.cmp(regs_i32[reg_no1_src], regs_i32[reg_no2_src]);
+ (void)reg_no_dst;
+ return true;
+}
+
+/**
+ * Encode int64 cmp operation of imm and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_imm_imm_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 data1_src,
+ int32 data2_src)
+{
+ Imm imm(data1_src);
+ a.mov(regs_i64[REG_I64_FREE_IDX], imm);
+ imm.setValue(data2_src);
+ a.cmp(regs_i64[REG_I64_FREE_IDX], imm);
+ (void)reg_no_dst;
+ return true;
+}
+
+/**
+ * Encode int64 cmp operation of imm and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_imm_r_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int64 data1_src,
+ int32 reg_no2_src)
+{
+ Imm imm(data1_src);
+ a.mov(regs_i64[REG_I64_FREE_IDX], imm);
+ a.cmp(regs_i64[REG_I64_FREE_IDX], regs_i64[reg_no2_src]);
+ (void)reg_no_dst;
+ return true;
+}
+
+/**
+ * Encode int64 cmp operation of reg and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_r_imm_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src,
+ int64 data2_src)
+{
+ Imm imm(data2_src);
+
+ if (data2_src >= INT32_MIN && data2_src <= INT32_MAX) {
+ imm.setValue((int32)data2_src);
+ a.cmp(regs_i64[reg_no1_src], imm);
+ }
+ else {
+ a.mov(regs_i64[REG_I64_FREE_IDX], imm);
+ a.cmp(regs_i64[reg_no1_src], regs_i64[REG_I64_FREE_IDX]);
+ }
+ (void)reg_no_dst;
+ return true;
+}
+
+/**
+ * Encode int64 cmp operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_r_r_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src,
+ int32 reg_no2_src)
+{
+ a.cmp(regs_i64[reg_no1_src], regs_i64[reg_no2_src]);
+ (void)reg_no_dst;
+ return true;
+}
+
+/**
+ * Encode float cmp operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_r_r_to_r_f32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src,
+ int32 reg_no2_src)
+{
+ a.comiss(regs_float[reg_no1_src], regs_float[reg_no2_src]);
+ (void)reg_no_dst;
+ return true;
+}
+
+/**
+ * Encode float cmp operation of imm and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_imm_imm_to_r_f32(x86::Assembler &a, int32 reg_no_dst, float data1_src,
+ float data2_src)
+{
+ /* should have been optimized in the frontend */
+ bh_assert(0);
+ (void)a;
+ (void)reg_no_dst;
+ (void)data1_src;
+ (void)data2_src;
+ return false;
+}
+
+/**
+ * Encode float cmp operation of imm and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_imm_r_to_r_f32(x86::Assembler &a, int32 reg_no_dst, float data1_src,
+ int32 reg_no2_src)
+{
+ mov_imm_to_r_f32(a, REG_F32_FREE_IDX, data1_src);
+ a.comiss(regs_float[REG_F32_FREE_IDX], regs_float[reg_no2_src]);
+ (void)reg_no_dst;
+ return true;
+}
+
+/**
+ * Encode float cmp operation of reg and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_r_imm_to_r_f32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src,
+ float data2_src)
+{
+ mov_imm_to_r_f32(a, REG_F32_FREE_IDX, data2_src);
+ a.comiss(regs_float[reg_no1_src], regs_float[REG_F32_FREE_IDX]);
+ (void)reg_no_dst;
+ return true;
+}
+
+/**
+ * Encode double cmp operation of reg and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_r_r_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src,
+ int32 reg_no2_src)
+{
+ a.comisd(regs_float[reg_no1_src], regs_float[reg_no2_src]);
+ (void)reg_no_dst;
+ return true;
+}
+
+/**
+ * Encode double cmp operation of imm and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_imm_imm_to_r_f64(x86::Assembler &a, int32 reg_no_dst, double data1_src,
+ double data2_src)
+{
+ /* should have been optimized in the frontend */
+ bh_assert(0);
+ (void)a;
+ (void)reg_no_dst;
+ (void)data1_src;
+ (void)data2_src;
+ return false;
+}
+
+/**
+ * Encode double cmp operation of imm and reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param data1_src the first src immediate data
+ * @param reg_no2_src the reg no of second src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_imm_r_to_r_f64(x86::Assembler &a, int32 reg_no_dst, double data1_src,
+ int32 reg_no2_src)
+{
+ mov_imm_to_r_f64(a, REG_F64_FREE_IDX, data1_src);
+ a.comisd(regs_float[REG_F64_FREE_IDX], regs_float[reg_no2_src]);
+ (void)reg_no_dst;
+ return true;
+}
+
+/**
+ * Encode double cmp operation of reg and imm, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of cmp operation
+ * @param reg_no_dst the no of register
+ * @param reg_no1_src the reg no of first src register data
+ * @param data2_src the second src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cmp_r_imm_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no1_src,
+ double data2_src)
+{
+ mov_imm_to_r_f64(a, REG_F64_FREE_IDX, data2_src);
+ a.comisd(regs_float[reg_no1_src], regs_float[REG_F64_FREE_IDX]);
+ (void)reg_no_dst;
+ return true;
+}
+
+/**
+ * Encode insn ld: LD_type r0, r1, r2
+ * @param kind the data kind, such as I32, I64, F32 and F64
+ * @param bytes_dst the byte number of dst data
+ * @param is_signed the data is signed or unsigned
+ */
+#define LD_R_R_R(kind, bytes_dst, is_signed) \
+ do { \
+ int32 reg_no_dst = 0, reg_no_base = 0, reg_no_offset = 0; \
+ int32 base = 0, offset = 0; \
+ bool _ret = false; \
+ \
+ if (jit_reg_is_const(r1)) { \
+ CHECK_KIND(r1, JIT_REG_KIND_I32); \
+ } \
+ else { \
+ CHECK_KIND(r1, JIT_REG_KIND_I64); \
+ } \
+ if (jit_reg_is_const(r2)) { \
+ CHECK_KIND(r2, JIT_REG_KIND_I32); \
+ } \
+ else { \
+ CHECK_KIND(r2, JIT_REG_KIND_I64); \
+ } \
+ \
+ reg_no_dst = jit_reg_no(r0); \
+ CHECK_REG_NO(reg_no_dst, jit_reg_kind(r0)); \
+ if (jit_reg_is_const(r1)) \
+ base = jit_cc_get_const_I32(cc, r1); \
+ else { \
+ reg_no_base = jit_reg_no(r1); \
+ CHECK_REG_NO(reg_no_base, jit_reg_kind(r1)); \
+ } \
+ if (jit_reg_is_const(r2)) \
+ offset = jit_cc_get_const_I32(cc, r2); \
+ else { \
+ reg_no_offset = jit_reg_no(r2); \
+ CHECK_REG_NO(reg_no_offset, jit_reg_kind(r2)); \
+ } \
+ \
+ if (jit_reg_is_const(r1)) { \
+ if (jit_reg_is_const(r2)) \
+ _ret = ld_r_from_base_imm_offset_imm( \
+ a, bytes_dst, JIT_REG_KIND_##kind, is_signed, reg_no_dst, \
+ base, offset); \
+ else \
+ _ret = ld_r_from_base_imm_offset_r( \
+ a, bytes_dst, JIT_REG_KIND_##kind, is_signed, reg_no_dst, \
+ base, reg_no_offset); \
+ } \
+ else if (jit_reg_is_const(r2)) \
+ _ret = ld_r_from_base_r_offset_imm( \
+ a, bytes_dst, JIT_REG_KIND_##kind, is_signed, reg_no_dst, \
+ reg_no_base, offset); \
+ else \
+ _ret = ld_r_from_base_r_offset_r( \
+ a, bytes_dst, JIT_REG_KIND_##kind, is_signed, reg_no_dst, \
+ reg_no_base, reg_no_offset); \
+ if (!_ret) \
+ GOTO_FAIL; \
+ } while (0)
+
+/**
+ * Encode insn sd: ST_type r0, r1, r2
+ * @param kind the data kind, such as I32, I64, F32 and F64
+ * @param bytes_dst the byte number of dst data
+ * @param atomic whether it's atomic store
+ */
+#define ST_R_R_R(kind, type, bytes_dst, atomic) \
+ do { \
+ type data_src = 0; \
+ int32 reg_no_src = 0, reg_no_base = 0, reg_no_offset = 0; \
+ int32 base = 0, offset = 0; \
+ bool _ret = false; \
+ \
+ if (jit_reg_is_const(r1)) { \
+ CHECK_KIND(r1, JIT_REG_KIND_I32); \
+ } \
+ else { \
+ CHECK_KIND(r1, JIT_REG_KIND_I64); \
+ } \
+ if (jit_reg_is_const(r2)) { \
+ CHECK_KIND(r2, JIT_REG_KIND_I32); \
+ } \
+ else { \
+ CHECK_KIND(r2, JIT_REG_KIND_I64); \
+ } \
+ \
+ if (jit_reg_is_const(r0)) \
+ data_src = jit_cc_get_const_##kind(cc, r0); \
+ else { \
+ reg_no_src = jit_reg_no(r0); \
+ CHECK_REG_NO(reg_no_src, jit_reg_kind(r0)); \
+ } \
+ if (jit_reg_is_const(r1)) \
+ base = jit_cc_get_const_I32(cc, r1); \
+ else { \
+ reg_no_base = jit_reg_no(r1); \
+ CHECK_REG_NO(reg_no_base, jit_reg_kind(r1)); \
+ } \
+ if (jit_reg_is_const(r2)) \
+ offset = jit_cc_get_const_I32(cc, r2); \
+ else { \
+ reg_no_offset = jit_reg_no(r2); \
+ CHECK_REG_NO(reg_no_offset, jit_reg_kind(r2)); \
+ } \
+ \
+ if (jit_reg_is_const(r0)) { \
+ if (jit_reg_is_const(r1)) { \
+ if (jit_reg_is_const(r2)) \
+ _ret = st_imm_to_base_imm_offset_imm( \
+ a, bytes_dst, &data_src, base, offset, atomic); \
+ else \
+ _ret = st_imm_to_base_imm_offset_r( \
+ a, bytes_dst, &data_src, base, reg_no_offset, atomic); \
+ } \
+ else if (jit_reg_is_const(r2)) \
+ _ret = st_imm_to_base_r_offset_imm( \
+ a, bytes_dst, &data_src, reg_no_base, offset, atomic); \
+ else \
+ _ret = st_imm_to_base_r_offset_r(a, bytes_dst, &data_src, \
+ reg_no_base, reg_no_offset, \
+ atomic); \
+ } \
+ else if (jit_reg_is_const(r1)) { \
+ if (jit_reg_is_const(r2)) \
+ _ret = st_r_to_base_imm_offset_imm( \
+ a, bytes_dst, JIT_REG_KIND_##kind, reg_no_src, base, \
+ offset, atomic); \
+ else \
+ _ret = st_r_to_base_imm_offset_r( \
+ a, bytes_dst, JIT_REG_KIND_##kind, reg_no_src, base, \
+ reg_no_offset, atomic); \
+ } \
+ else if (jit_reg_is_const(r2)) \
+ _ret = st_r_to_base_r_offset_imm(a, bytes_dst, \
+ JIT_REG_KIND_##kind, reg_no_src, \
+ reg_no_base, offset, atomic); \
+ else \
+ _ret = st_r_to_base_r_offset_r(a, bytes_dst, JIT_REG_KIND_##kind, \
+ reg_no_src, reg_no_base, \
+ reg_no_offset, atomic); \
+ if (!_ret) \
+ GOTO_FAIL; \
+ } while (0)
+
+/**
+ * Encode insn mov: MOV r0, r1
+ * @param kind the data kind, such as I32, I64, F32 and F64
+ * @param Type the data type, such as int32, int64, float32, and float64
+ * @param type the abbreviation of data type, such as i32, i64, f32, and f64
+ * @param bytes_dst the byte number of dst data
+ */
+#define MOV_R_R(kind, Type, type) \
+ do { \
+ bool _ret = false; \
+ int32 reg_no_dst = 0, reg_no_src = 0; \
+ CHECK_EQKIND(r0, r1); \
+ \
+ CHECK_NCONST(r0); \
+ reg_no_dst = jit_reg_no(r0); \
+ CHECK_REG_NO(reg_no_dst, jit_reg_kind(r0)); \
+ \
+ if (jit_reg_is_const(r1)) { \
+ Type data = jit_cc_get_const_##kind(cc, r1); \
+ _ret = mov_imm_to_r_##type(a, reg_no_dst, data); \
+ } \
+ else { \
+ reg_no_src = jit_reg_no(r1); \
+ CHECK_REG_NO(reg_no_src, jit_reg_kind(r1)); \
+ _ret = mov_r_to_r_##type(a, reg_no_dst, reg_no_src); \
+ } \
+ if (!_ret) \
+ GOTO_FAIL; \
+ } while (0)
+
+/**
+ * Encode mov insn, MOV r0, r1
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param r0 dst jit register that contains the dst operand info
+ * @param r1 src jit register that contains the src operand info
+ *
+ * @return true if success, false if failed
+ */
+static bool
+lower_mov(JitCompContext *cc, x86::Assembler &a, JitReg r0, JitReg r1)
+{
+ switch (jit_reg_kind(r0)) {
+ case JIT_REG_KIND_I32:
+ MOV_R_R(I32, int32, i32);
+ break;
+ case JIT_REG_KIND_I64:
+ MOV_R_R(I64, int64, i64);
+ break;
+ case JIT_REG_KIND_F32:
+ MOV_R_R(F32, float32, f32);
+ break;
+ case JIT_REG_KIND_F64:
+ MOV_R_R(F64, float64, f64);
+ break;
+ default:
+ LOG_VERBOSE("Invalid reg type of mov: %d\n", jit_reg_kind(r0));
+ GOTO_FAIL;
+ }
+
+ return true;
+fail:
+ return false;
+}
+
+/**
+ * Encode insn neg: NEG r0, r1
+ * @param kind the data kind, such as I32, I64, F32 and F64
+ * @param Type the data type, such as int32, int64, float32, and float64
+ * @param type the abbreviation of data type, such as i32, i64, f32, and f64
+ */
+#define NEG_R_R(kind, Type, type) \
+ do { \
+ bool _ret = false; \
+ int32 reg_no_dst = 0, reg_no_src = 0; \
+ CHECK_EQKIND(r0, r1); \
+ \
+ CHECK_NCONST(r0); \
+ reg_no_dst = jit_reg_no(r0); \
+ CHECK_REG_NO(reg_no_dst, jit_reg_kind(r0)); \
+ \
+ if (jit_reg_is_const(r1)) { \
+ Type data = jit_cc_get_const_##kind(cc, r1); \
+ _ret = neg_imm_to_r_##type(a, reg_no_dst, data); \
+ } \
+ else { \
+ reg_no_src = jit_reg_no(r1); \
+ CHECK_REG_NO(reg_no_src, jit_reg_kind(r1)); \
+ _ret = neg_r_to_r_##type(a, reg_no_dst, reg_no_src); \
+ } \
+ if (!_ret) \
+ GOTO_FAIL; \
+ } while (0)
+
+/**
+ * Encode neg insn, NEG r0, r1
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param r0 dst jit register that contains the dst operand info
+ * @param r1 src jit register that contains the src operand info
+ *
+ * @return true if success, false if failed
+ */
+static bool
+lower_neg(JitCompContext *cc, x86::Assembler &a, JitReg r0, JitReg r1)
+{
+ switch (jit_reg_kind(r0)) {
+ case JIT_REG_KIND_I32:
+ NEG_R_R(I32, int32, i32);
+ break;
+ case JIT_REG_KIND_I64:
+ NEG_R_R(I64, int64, i64);
+ break;
+ case JIT_REG_KIND_F32:
+ NEG_R_R(F32, float32, f32);
+ break;
+ case JIT_REG_KIND_F64:
+ NEG_R_R(F64, float64, f64);
+ break;
+ default:
+ LOG_VERBOSE("Invalid reg type of neg: %d\n", jit_reg_kind(r0));
+ GOTO_FAIL;
+ }
+
+ return true;
+fail:
+ return false;
+}
+
+/**
+ * Encode insn convert: I32TOI8 r0, r1, or I32TOI16, I32TOF32, F32TOF64, etc.
+ * @param kind0 the dst JIT_REG_KIND, such as I32, I64, F32 and F64
+ * @param kind1 the src JIT_REG_KIND, such as I32, I64, F32 and F64
+ * @param type0 the dst data type, such as i8, u8, i16, u16, i32, f32, i64, f32,
+ * f64
+ * @param type1 the src data type, such as i8, u8, i16, u16, i32, f32, i64, f32,
+ * f64
+ */
+#define CONVERT_R_R(kind0, kind1, type0, type1, Type1) \
+ do { \
+ bool _ret = false; \
+ int32 reg_no_dst = 0, reg_no_src = 0; \
+ CHECK_KIND(r0, JIT_REG_KIND_##kind0); \
+ CHECK_KIND(r1, JIT_REG_KIND_##kind1); \
+ \
+ CHECK_NCONST(r0); \
+ reg_no_dst = jit_reg_no(r0); \
+ CHECK_REG_NO(reg_no_dst, jit_reg_kind(r0)); \
+ \
+ if (jit_reg_is_const(r1)) { \
+ Type1 data = jit_cc_get_const_##kind1(cc, r1); \
+ _ret = convert_imm_##type1##_to_r_##type0(a, reg_no_dst, data); \
+ } \
+ else { \
+ reg_no_src = jit_reg_no(r1); \
+ CHECK_REG_NO(reg_no_src, jit_reg_kind(r1)); \
+ _ret = \
+ convert_r_##type1##_to_r_##type0(a, reg_no_dst, reg_no_src); \
+ } \
+ if (!_ret) \
+ GOTO_FAIL; \
+ } while (0)
+
+/**
+ * Encode insn alu: ADD/SUB/MUL/DIV/REM r0, r1, r2
+ * @param kind the data kind, such as I32, I64, F32 and F64
+ * @param Type the data type, such as int32, int64, float32, and float64
+ * @param type the abbreviation of data type, such as i32, i64, f32, and f64
+ * @param op the opcode of alu
+ */
+#define ALU_R_R_R(kind, Type, type, op) \
+ do { \
+ Type data1, data2; \
+ int32 reg_no_dst = 0, reg_no_src1 = 0, reg_no_src2 = 0; \
+ bool _ret = false; \
+ \
+ CHECK_EQKIND(r0, r1); \
+ CHECK_EQKIND(r0, r2); \
+ memset(&data1, 0, sizeof(Type)); \
+ memset(&data2, 0, sizeof(Type)); \
+ \
+ reg_no_dst = jit_reg_no(r0); \
+ CHECK_REG_NO(reg_no_dst, jit_reg_kind(r0)); \
+ if (jit_reg_is_const(r1)) \
+ data1 = jit_cc_get_const_##kind(cc, r1); \
+ else { \
+ reg_no_src1 = jit_reg_no(r1); \
+ CHECK_REG_NO(reg_no_src1, jit_reg_kind(r1)); \
+ } \
+ if (jit_reg_is_const(r2)) \
+ data2 = jit_cc_get_const_##kind(cc, r2); \
+ else { \
+ reg_no_src2 = jit_reg_no(r2); \
+ CHECK_REG_NO(reg_no_src2, jit_reg_kind(r2)); \
+ } \
+ \
+ if (jit_reg_is_const(r1)) { \
+ if (jit_reg_is_const(r2)) \
+ _ret = \
+ alu_imm_imm_to_r_##type(a, op, reg_no_dst, data1, data2); \
+ else \
+ _ret = alu_imm_r_to_r_##type(a, op, reg_no_dst, data1, \
+ reg_no_src2); \
+ } \
+ else if (jit_reg_is_const(r2)) \
+ _ret = \
+ alu_r_imm_to_r_##type(a, op, reg_no_dst, reg_no_src1, data2); \
+ else \
+ _ret = alu_r_r_to_r_##type(a, op, reg_no_dst, reg_no_src1, \
+ reg_no_src2); \
+ if (!_ret) \
+ GOTO_FAIL; \
+ } while (0)
+
+/**
+ * Encode alu insn, ADD/SUB/MUL/DIV/REM r0, r1, r2
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param op the opcode of alu operations
+ * @param r0 dst jit register that contains the dst operand info
+ * @param r1 src jit register that contains the first src operand info
+ * @param r2 src jit register that contains the second src operand info
+ *
+ * @return true if success, false if failed
+ */
+static bool
+lower_alu(JitCompContext *cc, x86::Assembler &a, ALU_OP op, JitReg r0,
+ JitReg r1, JitReg r2)
+{
+ switch (jit_reg_kind(r0)) {
+ case JIT_REG_KIND_I32:
+ ALU_R_R_R(I32, int32, i32, op);
+ break;
+ case JIT_REG_KIND_I64:
+ ALU_R_R_R(I64, int64, i64, op);
+ break;
+ case JIT_REG_KIND_F32:
+ ALU_R_R_R(F32, float32, f32, op);
+ break;
+ case JIT_REG_KIND_F64:
+ ALU_R_R_R(F64, float64, f64, op);
+ break;
+ default:
+ LOG_VERBOSE("Invalid reg type of alu: %d\n", jit_reg_kind(r0));
+ GOTO_FAIL;
+ }
+
+ return true;
+fail:
+ return false;
+}
+
+/**
+ * Encode insn bit: AND/OR/XOR r0, r1, r2
+ * @param kind the data kind, such as I32, I64
+ * @param Type the data type, such as int32, int64
+ * @param type the abbreviation of data type, such as i32, i64
+ * @param op the opcode of bit operation
+ */
+#define BIT_R_R_R(kind, Type, type, op) \
+ do { \
+ Type data1, data2; \
+ int32 reg_no_dst = 0, reg_no_src1 = 0, reg_no_src2 = 0; \
+ bool _ret = false; \
+ \
+ CHECK_EQKIND(r0, r1); \
+ CHECK_EQKIND(r0, r2); \
+ memset(&data1, 0, sizeof(Type)); \
+ memset(&data2, 0, sizeof(Type)); \
+ \
+ reg_no_dst = jit_reg_no(r0); \
+ CHECK_REG_NO(reg_no_dst, jit_reg_kind(r0)); \
+ if (jit_reg_is_const(r1)) \
+ data1 = jit_cc_get_const_##kind(cc, r1); \
+ else { \
+ reg_no_src1 = jit_reg_no(r1); \
+ CHECK_REG_NO(reg_no_src1, jit_reg_kind(r1)); \
+ } \
+ if (jit_reg_is_const(r2)) \
+ data2 = jit_cc_get_const_##kind(cc, r2); \
+ else { \
+ reg_no_src2 = jit_reg_no(r2); \
+ CHECK_REG_NO(reg_no_src2, jit_reg_kind(r2)); \
+ } \
+ \
+ if (jit_reg_is_const(r1)) { \
+ if (jit_reg_is_const(r2)) \
+ _ret = \
+ bit_imm_imm_to_r_##type(a, op, reg_no_dst, data1, data2); \
+ else \
+ _ret = bit_imm_r_to_r_##type(a, op, reg_no_dst, data1, \
+ reg_no_src2); \
+ } \
+ else if (jit_reg_is_const(r2)) \
+ _ret = \
+ bit_r_imm_to_r_##type(a, op, reg_no_dst, reg_no_src1, data2); \
+ else \
+ _ret = bit_r_r_to_r_##type(a, op, reg_no_dst, reg_no_src1, \
+ reg_no_src2); \
+ if (!_ret) \
+ GOTO_FAIL; \
+ } while (0)
+
+/**
+ * Encode bit insn, AND/OR/XOR r0, r1, r2
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param op the opcode of bit operations
+ * @param r0 dst jit register that contains the dst operand info
+ * @param r1 src jit register that contains the first src operand info
+ * @param r2 src jit register that contains the second src operand info
+ *
+ * @return true if success, false if failed
+ */
+static bool
+lower_bit(JitCompContext *cc, x86::Assembler &a, BIT_OP op, JitReg r0,
+ JitReg r1, JitReg r2)
+{
+ switch (jit_reg_kind(r0)) {
+ case JIT_REG_KIND_I32:
+ BIT_R_R_R(I32, int32, i32, op);
+ break;
+ case JIT_REG_KIND_I64:
+ BIT_R_R_R(I64, int64, i64, op);
+ break;
+ default:
+ LOG_VERBOSE("Invalid reg type of bit: %d\n", jit_reg_kind(r0));
+ GOTO_FAIL;
+ }
+
+ return true;
+fail:
+ return false;
+}
+
+/**
+ * Encode insn shift: SHL/SHRS/SHRU r0, r1, r2
+ * @param kind the data kind, such as I32, I64
+ * @param Type the data type, such as int32, int64
+ * @param type the abbreviation of data type, such as i32, i64
+ * @param op the opcode of shift operation
+ */
+#define SHIFT_R_R_R(kind, Type, type, op) \
+ do { \
+ Type data1, data2; \
+ int32 reg_no_dst = 0, reg_no_src1 = 0, reg_no_src2 = 0; \
+ bool _ret = false; \
+ \
+ CHECK_EQKIND(r0, r1); \
+ CHECK_KIND(r2, JIT_REG_KIND_##kind); \
+ memset(&data1, 0, sizeof(Type)); \
+ memset(&data2, 0, sizeof(Type)); \
+ \
+ reg_no_dst = jit_reg_no(r0); \
+ CHECK_REG_NO(reg_no_dst, jit_reg_kind(r0)); \
+ if (jit_reg_is_const(r1)) \
+ data1 = jit_cc_get_const_##kind(cc, r1); \
+ else { \
+ reg_no_src1 = jit_reg_no(r1); \
+ CHECK_REG_NO(reg_no_src1, jit_reg_kind(r1)); \
+ } \
+ if (jit_reg_is_const(r2)) \
+ data2 = jit_cc_get_const_##kind(cc, r2); \
+ else { \
+ reg_no_src2 = jit_reg_no(r2); \
+ CHECK_REG_NO(reg_no_src2, jit_reg_kind(r2)); \
+ } \
+ \
+ if (jit_reg_is_const(r1)) { \
+ if (jit_reg_is_const(r2)) \
+ _ret = shift_imm_imm_to_r_##type(a, op, reg_no_dst, data1, \
+ data2); \
+ else \
+ _ret = shift_imm_r_to_r_##type(a, op, reg_no_dst, data1, \
+ reg_no_src2); \
+ } \
+ else if (jit_reg_is_const(r2)) \
+ _ret = shift_r_imm_to_r_##type(a, op, reg_no_dst, reg_no_src1, \
+ data2); \
+ else \
+ _ret = shift_r_r_to_r_##type(a, op, reg_no_dst, reg_no_src1, \
+ reg_no_src2); \
+ if (!_ret) \
+ GOTO_FAIL; \
+ } while (0)
+
+/**
+ * Encode shift insn, SHL/SHRS/SHRU r0, r1, r2
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param op the opcode of shift operations
+ * @param r0 dst jit register that contains the dst operand info
+ * @param r1 src jit register that contains the first src operand info
+ * @param r2 src jit register that contains the second src operand info
+ *
+ * @return true if success, false if failed
+ */
+static bool
+lower_shift(JitCompContext *cc, x86::Assembler &a, SHIFT_OP op, JitReg r0,
+ JitReg r1, JitReg r2)
+{
+ switch (jit_reg_kind(r0)) {
+ case JIT_REG_KIND_I32:
+ SHIFT_R_R_R(I32, int32, i32, op);
+ break;
+ case JIT_REG_KIND_I64:
+ SHIFT_R_R_R(I64, int64, i64, op);
+ break;
+ default:
+ LOG_VERBOSE("Invalid reg type of shift: %d\n", jit_reg_kind(r0));
+ GOTO_FAIL;
+ }
+
+ return true;
+fail:
+ return false;
+}
+
+/**
+ * Encode int32 bitcount operation of reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BITCOUNT operation
+ * @param reg_no_dst the no of register
+ * @param reg_no_src the reg no of first src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bitcount_r_to_r_i32(x86::Assembler &a, BITCOUNT_OP op, int32 reg_no_dst,
+ int32 reg_no_src)
+{
+ switch (op) {
+ case CLZ:
+ a.lzcnt(regs_i32[reg_no_dst], regs_i32[reg_no_src]);
+ break;
+ case CTZ:
+ a.tzcnt(regs_i32[reg_no_dst], regs_i32[reg_no_src]);
+ break;
+ case POPCNT:
+ a.popcnt(regs_i32[reg_no_dst], regs_i32[reg_no_src]);
+ break;
+ default:
+ bh_assert(0);
+ return false;
+ }
+ return true;
+}
+
+/**
+ * Encode int64 bitcount operation of reg, and save result to reg
+ *
+ * @param a the assembler to emit the code
+ * @param op the opcode of BITCOUNT operation
+ * @param reg_no_dst the no of register
+ * @param reg_no_src the reg no of first src register data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+bitcount_r_to_r_i64(x86::Assembler &a, BITCOUNT_OP op, int32 reg_no_dst,
+ int32 reg_no_src)
+{
+ switch (op) {
+ case CLZ:
+ a.lzcnt(regs_i64[reg_no_dst], regs_i64[reg_no_src]);
+ break;
+ case CTZ:
+ a.tzcnt(regs_i64[reg_no_dst], regs_i64[reg_no_src]);
+ break;
+ case POPCNT:
+ a.popcnt(regs_i64[reg_no_dst], regs_i64[reg_no_src]);
+ break;
+ default:
+ bh_assert(0);
+ return false;
+ }
+ return true;
+}
+
+/**
+ * Encode insn bitcount: CLZ/CTZ/POPCNT r0, r1
+ * @param kind the data kind, such as I32, I64
+ * @param Type the data type, such as int32, int64
+ * @param type the abbreviation of data type, such as i32, i64
+ * @param op the opcode of bit operation
+ */
+#define BITCOUNT_R_R(kind, Type, type, op) \
+ do { \
+ int32 reg_no_dst = 0, reg_no_src = 0; \
+ \
+ CHECK_EQKIND(r0, r1); \
+ CHECK_NCONST(r0); \
+ CHECK_NCONST(r1); \
+ \
+ reg_no_dst = jit_reg_no(r0); \
+ CHECK_REG_NO(reg_no_dst, jit_reg_kind(r0)); \
+ reg_no_src = jit_reg_no(r1); \
+ CHECK_REG_NO(reg_no_src, jit_reg_kind(r1)); \
+ if (!bitcount_r_to_r_##type(a, op, reg_no_dst, reg_no_src)) \
+ GOTO_FAIL; \
+ } while (0)
+
+/**
+ * Encode bitcount insn, CLZ/CTZ/POPCNT r0, r1
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param op the opcode of bitcount operations
+ * @param r0 dst jit register that contains the dst operand info
+ * @param r1 src jit register that contains the src operand info
+ *
+ * @return true if success, false if failed
+ */
+static bool
+lower_bitcount(JitCompContext *cc, x86::Assembler &a, BITCOUNT_OP op, JitReg r0,
+ JitReg r1)
+{
+ switch (jit_reg_kind(r0)) {
+ case JIT_REG_KIND_I32:
+ BITCOUNT_R_R(I32, int32, i32, op);
+ break;
+ case JIT_REG_KIND_I64:
+ BITCOUNT_R_R(I64, int64, i64, op);
+ break;
+ default:
+ LOG_VERBOSE("Invalid reg type of bit: %d\n", jit_reg_kind(r0));
+ GOTO_FAIL;
+ }
+
+ return true;
+fail:
+ return false;
+}
+
+/**
+ * Encode insn cmp: CMP r0, r1, r2
+ * @param kind the data kind, such as I32, I64, F32 and F64
+ * @param Type the data type, such as int32, int64, float32, and float64
+ * @param type the abbreviation of data type, such as i32, i64, f32, and f64
+ */
+#define CMP_R_R_R(kind, Type, type) \
+ do { \
+ Type data1, data2; \
+ int32 reg_no_dst = 0, reg_no_src1 = 0, reg_no_src2 = 0; \
+ bool _ret = false; \
+ \
+ CHECK_KIND(r0, JIT_REG_KIND_I32); \
+ CHECK_KIND(r1, JIT_REG_KIND_##kind); \
+ CHECK_EQKIND(r1, r2); \
+ memset(&data1, 0, sizeof(Type)); \
+ memset(&data2, 0, sizeof(Type)); \
+ \
+ reg_no_dst = jit_reg_no(r0); \
+ CHECK_REG_NO(reg_no_dst, jit_reg_kind(r0)); \
+ if (jit_reg_is_const(r1)) \
+ data1 = jit_cc_get_const_##kind(cc, r1); \
+ else { \
+ reg_no_src1 = jit_reg_no(r1); \
+ CHECK_REG_NO(reg_no_src1, jit_reg_kind(r1)); \
+ } \
+ if (jit_reg_is_const(r2)) \
+ data2 = jit_cc_get_const_##kind(cc, r2); \
+ else { \
+ reg_no_src2 = jit_reg_no(r2); \
+ CHECK_REG_NO(reg_no_src2, jit_reg_kind(r2)); \
+ } \
+ \
+ if (jit_reg_is_const(r1)) { \
+ if (jit_reg_is_const(r2)) \
+ _ret = cmp_imm_imm_to_r_##type(a, reg_no_dst, data1, data2); \
+ else \
+ _ret = \
+ cmp_imm_r_to_r_##type(a, reg_no_dst, data1, reg_no_src2); \
+ } \
+ else if (jit_reg_is_const(r2)) \
+ _ret = cmp_r_imm_to_r_##type(a, reg_no_dst, reg_no_src1, data2); \
+ else \
+ _ret = \
+ cmp_r_r_to_r_##type(a, reg_no_dst, reg_no_src1, reg_no_src2); \
+ if (!_ret) \
+ GOTO_FAIL; \
+ } while (0)
+
+/**
+ * Encode cmp insn, CMP r0, r1, r2
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param r0 dst jit register that contains the dst operand info
+ * @param r1 condition jit register
+ * @param r2 src jit register that contains the first src operand info
+ * @param r3 src jit register that contains the second src operand info
+ *
+ * @return true if success, false if failed
+ */
+static bool
+lower_cmp(JitCompContext *cc, x86::Assembler &a, JitReg r0, JitReg r1,
+ JitReg r2)
+{
+ switch (jit_reg_kind(r1)) {
+ case JIT_REG_KIND_I32:
+ CMP_R_R_R(I32, int32, i32);
+ cc->last_cmp_on_fp = false;
+ break;
+ case JIT_REG_KIND_I64:
+ CMP_R_R_R(I64, int64, i64);
+ cc->last_cmp_on_fp = false;
+ break;
+ case JIT_REG_KIND_F32:
+ CMP_R_R_R(F32, float32, f32);
+ cc->last_cmp_on_fp = true;
+ break;
+ case JIT_REG_KIND_F64:
+ CMP_R_R_R(F64, float64, f64);
+ cc->last_cmp_on_fp = true;
+ break;
+ default:
+ cc->last_cmp_on_fp = false;
+ LOG_VERBOSE("Invalid reg type of cmp: %d\n", jit_reg_kind(r1));
+ GOTO_FAIL;
+ }
+
+ return true;
+fail:
+ return false;
+}
+
+/**
+ * Encode detecting the cmp flags in reg, and jmp to the relative address
+ * according to the condition opcode
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param op the condition opcode to jmp
+ * @param offset the relative offset to jmp when the contidtion meeted
+ *
+ * @return return the next address of native code after encoded
+ */
+static bool
+cmp_r_and_jmp_relative(JitCompContext *cc, x86::Assembler &a, COND_OP op,
+ int32 offset)
+{
+ Imm target(INT32_MAX);
+ char *stream;
+ bool fp_cmp = cc->last_cmp_on_fp;
+
+ bh_assert(!fp_cmp || (fp_cmp && (op == GTS || op == GES)));
+
+ switch (op) {
+ case EQ:
+ {
+ a.je(target);
+ break;
+ }
+ case NE:
+ {
+ a.jne(target);
+ break;
+ }
+ case GTS:
+ {
+ if (fp_cmp) {
+ a.ja(target);
+ }
+ else {
+ a.jg(target);
+ }
+ break;
+ }
+ case LES:
+ {
+ a.jng(target);
+ break;
+ }
+ case GES:
+ {
+ if (fp_cmp) {
+ a.jae(target);
+ }
+ else {
+ a.jnl(target);
+ }
+ break;
+ }
+ case LTS:
+ {
+ a.jl(target);
+ break;
+ }
+ case GTU:
+ {
+ a.ja(target);
+ break;
+ }
+ case LEU:
+ {
+ a.jna(target);
+ break;
+ }
+ case GEU:
+ {
+ a.jae(target);
+ break;
+ }
+ case LTU:
+ {
+ a.jb(target);
+ break;
+ }
+ default:
+ {
+ bh_assert(0);
+ break;
+ }
+ }
+
+ JitErrorHandler *err_handler = (JitErrorHandler *)a.code()->errorHandler();
+
+ if (!err_handler->err) {
+ /* The offset written by asmjit is always 0, we patch it again */
+ stream = (char *)a.code()->sectionById(0)->buffer().data()
+ + a.code()->sectionById(0)->buffer().size() - 6;
+ *(int32 *)(stream + 2) = offset;
+ }
+ return true;
+}
+
+/**
+ * Encode select insn, SELECT r0, r1, r2, r3
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param r0 dst jit register that contains the dst operand info
+ * @param r1 src jit register that contains the first src operand info
+ * @param r2 src jit register that contains the second src operand info
+ *
+ * @return true if success, false if failed
+ */
+/* TODO: optimize with setcc */
+static bool
+lower_select(JitCompContext *cc, x86::Assembler &a, COND_OP op, JitReg r0,
+ JitReg r1, JitReg r2, JitReg r3)
+{
+ JitErrorHandler err_handler;
+ Environment env(Arch::kX64);
+ CodeHolder code1, code2;
+ char *stream_mov1, *stream_mov2;
+ uint32 size_mov1, size_mov2;
+
+ code1.init(env);
+ code1.setErrorHandler(&err_handler);
+ x86::Assembler a1(&code1);
+
+ code2.init(env);
+ code2.setErrorHandler(&err_handler);
+ x86::Assembler a2(&code2);
+
+ CHECK_NCONST(r0);
+ CHECK_NCONST(r1);
+ CHECK_KIND(r1, JIT_REG_KIND_I32);
+
+ if (r0 == r3 && r0 != r2 && !cc->last_cmp_on_fp) {
+ JitReg r_tmp;
+
+ /* For i32/i64, exchange r2 and r3 to make r0 equal to r2,
+ so as to decrease possible execution instructions.
+ For f32/f64 comparison, should not change the order as
+ the result of comparison with NaN may be different. */
+ r_tmp = r2;
+ r2 = r3;
+ r3 = r_tmp;
+ op = not_cond(op);
+ }
+
+ if (!lower_mov(cc, a1, r0, r2))
+ GOTO_FAIL;
+
+ if (!lower_mov(cc, a2, r0, r3))
+ GOTO_FAIL;
+
+ stream_mov1 = (char *)a1.code()->sectionById(0)->buffer().data();
+ size_mov1 = a1.code()->sectionById(0)->buffer().size();
+ stream_mov2 = (char *)a2.code()->sectionById(0)->buffer().data();
+ size_mov2 = a2.code()->sectionById(0)->buffer().size();
+
+ if (r0 != r2) {
+ a.embedDataArray(TypeId::kInt8, stream_mov1, size_mov1);
+ }
+
+ if (r3 && r0 != r3) {
+ if (!cmp_r_and_jmp_relative(cc, a, op, (int32)size_mov2))
+ return false;
+ a.embedDataArray(TypeId::kInt8, stream_mov2, size_mov2);
+ }
+
+ return true;
+fail:
+ return false;
+}
+
+/* jmp to dst label */
+#define JMP_TO_LABEL(label_dst, label_src) \
+ do { \
+ if (label_is_ahead(cc, label_dst, label_src)) { \
+ JitErrorHandler *err_handler = \
+ (JitErrorHandler *)a.code()->errorHandler(); \
+ int32 _offset; \
+ char *stream; \
+ Imm imm(INT32_MAX); \
+ a.jmp(imm); \
+ if (!err_handler->err) { \
+ /* The offset written by asmjit is always 0, we patch it \
+ again, 6 is the size of jmp instruciton */ \
+ stream = (char *)a.code()->sectionById(0)->buffer().data() \
+ + a.code()->sectionById(0)->buffer().size() - 6; \
+ _offset = label_offsets[label_dst] \
+ - a.code()->sectionById(0)->buffer().size(); \
+ *(int32 *)(stream + 2) = _offset; \
+ } \
+ } \
+ else { \
+ if (!jmp_from_label_to_label(a, jmp_info_list, label_dst, \
+ label_src)) \
+ GOTO_FAIL; \
+ } \
+ } while (0)
+
+/**
+ * Encode branch insn, BEQ/BNE/../BLTU r0, r1, r2
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param jmp_info_list the jmp info list
+ * @param r0 dst jit register that contains the dst operand info
+ * @param r1 src jit register that contains the first src operand info
+ * @param r2 src jit register that contains the second src operand info
+ * @param is_last_insn if current insn is the last insn of current block
+ *
+ * @return true if success, false if failed
+ */
+static bool
+lower_branch(JitCompContext *cc, x86::Assembler &a, bh_list *jmp_info_list,
+ int32 label_src, COND_OP op, JitReg r0, JitReg r1, JitReg r2,
+ bool is_last_insn)
+{
+ int32 label_dst;
+
+ CHECK_NCONST(r0);
+ CHECK_KIND(r0, JIT_REG_KIND_I32);
+ CHECK_KIND(r1, JIT_REG_KIND_L32);
+
+ CHECK_REG_NO(jit_reg_no(r0), jit_reg_kind(r0));
+
+ label_dst = jit_reg_no(r1);
+ if (label_dst < (int32)jit_cc_label_num(cc) - 1 && is_last_insn
+ && label_is_neighboring(cc, label_src, label_dst)
+ && !cc->last_cmp_on_fp) {
+ JitReg r_tmp;
+
+ r_tmp = r1;
+ r1 = r2;
+ r2 = r_tmp;
+ op = not_cond(op);
+ }
+
+ if (!cmp_r_and_jmp_label(cc, a, jmp_info_list, label_src, op, r1, r2,
+ is_last_insn))
+ GOTO_FAIL;
+
+ return true;
+fail:
+ return false;
+}
+
+/**
+ * Encode lookupswitch with key of immediate data
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param jmp_info_list the jmp info list
+ * @param label_offsets the offsets of each label
+ * @param label_src the index of src label
+ * @param key the entry key
+ * @param opnd the lookup switch operand
+ * @param is_last_insn if current insn is the last insn of current block
+ *
+ * @return true if success, false if failed
+ */
+static bool
+lookupswitch_imm(JitCompContext *cc, x86::Assembler &a, bh_list *jmp_info_list,
+ uint32 *label_offsets, int32 label_src, int32 key,
+ const JitOpndLookupSwitch *opnd, bool is_last_insn)
+{
+ uint32 i;
+ int32 label_dst;
+
+ for (i = 0; i < opnd->match_pairs_num; i++)
+ if (key == opnd->match_pairs[i].value) {
+ label_dst = jit_reg_no(opnd->match_pairs[i].target);
+ if (!(is_last_insn
+ && label_is_neighboring(cc, label_src, label_dst))) {
+ JMP_TO_LABEL(label_dst, label_src);
+ }
+ return true;
+ }
+
+ if (opnd->default_target) {
+ label_dst = jit_reg_no(opnd->default_target);
+ if (!(is_last_insn && label_is_neighboring(cc, label_src, label_dst))) {
+ JMP_TO_LABEL(label_dst, label_src);
+ }
+ }
+
+ return true;
+fail:
+ return false;
+}
+
+/**
+ * Encode detecting lookupswitch entry register and jumping to matched label
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param jmp_info_list the jmp info list
+ * @param label_offsets the offsets of each label
+ * @param label_src the index of src label
+ * @param reg_no the no of entry register
+ * @param opnd the lookup switch operand
+ * @param is_last_insn if current insn is the last insn of current block
+ *
+ * @return true if success, false if failed
+ */
+static bool
+lookupswitch_r(JitCompContext *cc, x86::Assembler &a, bh_list *jmp_info_list,
+ uint32 *label_offsets, int32 label_src, int32 reg_no,
+ const JitOpndLookupSwitch *opnd, bool is_last_insn)
+{
+ JmpInfo *node;
+ Imm imm;
+ x86::Mem m;
+ uint32 i;
+ int32 label_dst = 0;
+ char *stream;
+
+ if (opnd->match_pairs_num < 10) {
+ /* For small count of branches, it is better to compare
+ the key with branch value and jump one by one */
+ for (i = 0; i < opnd->match_pairs_num; i++) {
+ imm.setValue(opnd->match_pairs[i].value);
+ a.cmp(regs_i32[reg_no], imm);
+
+ node = (JmpInfo *)jit_malloc(sizeof(JmpInfo));
+ if (!node)
+ GOTO_FAIL;
+
+ node->type = JMP_DST_LABEL_REL;
+ node->label_src = label_src;
+ node->dst_info.label_dst = jit_reg_no(opnd->match_pairs[i].target);
+ node->offset = a.code()->sectionById(0)->buffer().size() + 2;
+ bh_list_insert(jmp_info_list, node);
+
+ imm.setValue(INT32_MAX);
+ a.je(imm);
+ }
+
+ if (opnd->default_target) {
+ label_dst = jit_reg_no(opnd->default_target);
+ if (!(is_last_insn
+ && label_is_neighboring(cc, label_src, label_dst)))
+ JMP_TO_LABEL(label_dst, label_src);
+ }
+ }
+ else {
+ /* For bigger count of branches, use indirect jump */
+ /* unsigned extend to rsi */
+ a.mov(regs_i32[REG_I32_FREE_IDX], regs_i32[reg_no]);
+ imm.setValue(opnd->match_pairs_num);
+ a.cmp(regs_i64[REG_I64_FREE_IDX], imm);
+
+ /* Jump to default label if rsi >= br_count */
+ stream = (char *)a.code()->sectionById(0)->buffer().data()
+ + a.code()->sectionById(0)->buffer().size();
+ imm.setValue(INT32_MAX);
+ a.jb(imm);
+ *(uint32 *)(stream + 2) = 6;
+
+ node = (JmpInfo *)jit_calloc(sizeof(JmpInfo));
+ if (!node)
+ goto fail;
+
+ node->type = JMP_DST_LABEL_REL;
+ node->label_src = label_src;
+ node->dst_info.label_dst = jit_reg_no(opnd->default_target);
+ node->offset = a.code()->sectionById(0)->buffer().size() + 2;
+ bh_list_insert(jmp_info_list, node);
+
+ imm.setValue(INT32_MAX);
+ a.jmp(imm);
+
+ node = (JmpInfo *)jit_malloc(sizeof(JmpInfo));
+ if (!node)
+ GOTO_FAIL;
+
+ node->type = JMP_LOOKUPSWITCH_BASE;
+ node->offset = a.code()->sectionById(0)->buffer().size() + 2;
+ bh_list_insert(jmp_info_list, node);
+
+ /* LookupSwitch table base addr */
+ imm.setValue(INT64_MAX);
+ a.mov(regs_i64[reg_no], imm);
+
+ /* jmp *(base_addr + rsi * 8) */
+ m = x86::ptr(regs_i64[reg_no], regs_i64[REG_I64_FREE_IDX], 3);
+ a.jmp(m);
+
+ /* Store each dst label absolute address */
+ for (i = 0; i < opnd->match_pairs_num; i++) {
+ node = (JmpInfo *)jit_malloc(sizeof(JmpInfo));
+ if (!node)
+ GOTO_FAIL;
+
+ node->type = JMP_DST_LABEL_ABS;
+ node->dst_info.label_dst = jit_reg_no(opnd->match_pairs[i].target);
+ node->offset = a.code()->sectionById(0)->buffer().size();
+ bh_list_insert(jmp_info_list, node);
+
+ a.embedUInt64(UINT64_MAX);
+ }
+ }
+
+ return true;
+fail:
+ return false;
+}
+
+/**
+ * Encode lookupswitch insn, LOOKUPSWITCH opnd
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param jmp_info_list the jmp info list
+ * @param label_offsets the offsets of each label
+ * @param label_src the index of src label
+ * @param opnd the lookup switch operand
+ * @param is_last_insn if current insn is the last insn of current block
+ *
+ * @return true if success, false if failed
+ */
+static bool
+lower_lookupswitch(JitCompContext *cc, x86::Assembler &a,
+ bh_list *jmp_info_list, uint32 *label_offsets,
+ int32 label_src, const JitOpndLookupSwitch *opnd,
+ bool is_last_insn)
+{
+ JitReg r0 = opnd->value;
+ int32 key, reg_no;
+
+ CHECK_KIND(r0, JIT_REG_KIND_I32);
+ CHECK_KIND(opnd->default_target, JIT_REG_KIND_L32);
+
+ if (jit_reg_is_const(r0)) {
+ key = jit_cc_get_const_I32(cc, r0);
+ if (!lookupswitch_imm(cc, a, jmp_info_list, label_offsets, label_src,
+ key, opnd, is_last_insn))
+ GOTO_FAIL;
+ }
+ else {
+ reg_no = jit_reg_no(r0);
+ CHECK_I32_REG_NO(reg_no);
+ if (!lookupswitch_r(cc, a, jmp_info_list, label_offsets, label_src,
+ reg_no, opnd, is_last_insn))
+ GOTO_FAIL;
+ }
+
+ return true;
+fail:
+ return false;
+}
+
+/**
+ * Encode callnative insn, CALLNATIVE r0, r1, ...
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param insn current insn info
+ *
+ * @return true if success, false if failed
+ */
+static bool
+lower_callnative(JitCompContext *cc, x86::Assembler &a, JitInsn *insn)
+{
+ void (*func_ptr)(void);
+ JitReg ret_reg, func_reg, arg_reg;
+ /* the index of callee saved registers in regs_i64 */
+ uint8 regs_arg_idx[] = { REG_RDI_IDX, REG_RSI_IDX, REG_RDX_IDX,
+ REG_RCX_IDX, REG_R8_IDX, REG_R9_IDX };
+ Imm imm;
+ uint32 i, opnd_num;
+ int32 integer_reg_index = 0, floatpoint_reg_index = 0;
+
+ ret_reg = *(jit_insn_opndv(insn, 0));
+ func_reg = *(jit_insn_opndv(insn, 1));
+ CHECK_KIND(func_reg, JIT_REG_KIND_I64);
+ CHECK_CONST(func_reg);
+
+ func_ptr = (void (*)(void))jit_cc_get_const_I64(cc, func_reg);
+
+ opnd_num = jit_insn_opndv_num(insn);
+ for (i = 0; i < opnd_num - 2; i++) {
+ /*TODO: if arguments number is greater than 6 */
+ bh_assert(integer_reg_index < 6);
+ bh_assert(floatpoint_reg_index < 6);
+
+ arg_reg = *(jit_insn_opndv(insn, i + 2));
+ switch (jit_reg_kind(arg_reg)) {
+ case JIT_REG_KIND_I32:
+ {
+ int32 reg_no = regs_arg_idx[integer_reg_index++];
+ CHECK_I64_REG_NO(reg_no);
+ if (jit_reg_is_const(arg_reg)) {
+ mov_imm_to_r_i64(a, reg_no,
+ (int64)jit_cc_get_const_I32(cc, arg_reg));
+ }
+ else {
+ int32 arg_reg_no = jit_reg_no(arg_reg);
+ CHECK_I32_REG_NO(arg_reg_no);
+ extend_r32_to_r64(a, reg_no, arg_reg_no, true);
+ }
+ break;
+ }
+ case JIT_REG_KIND_I64:
+ {
+ int32 reg_no = regs_arg_idx[integer_reg_index++];
+ CHECK_I64_REG_NO(reg_no);
+ if (jit_reg_is_const(arg_reg)) {
+ mov_imm_to_r_i64(a, reg_no,
+ jit_cc_get_const_I64(cc, arg_reg));
+ }
+ else {
+ int32 arg_reg_no = jit_reg_no(arg_reg);
+ CHECK_I64_REG_NO(arg_reg_no);
+ mov_r_to_r_i64(a, reg_no, arg_reg_no);
+ }
+ break;
+ }
+ case JIT_REG_KIND_F32:
+ {
+ CHECK_F32_REG_NO((int32)floatpoint_reg_index);
+ if (jit_reg_is_const(arg_reg)) {
+ mov_imm_to_r_f32(a, floatpoint_reg_index,
+ jit_cc_get_const_F32(cc, arg_reg));
+ }
+ else {
+ int32 arg_reg_no = jit_reg_no(arg_reg);
+ CHECK_F32_REG_NO(arg_reg_no);
+ mov_r_to_r_f32(a, floatpoint_reg_index, arg_reg_no);
+ }
+ floatpoint_reg_index++;
+ break;
+ }
+ case JIT_REG_KIND_F64:
+ {
+ CHECK_F64_REG_NO((int32)floatpoint_reg_index);
+ if (jit_reg_is_const(arg_reg)) {
+ mov_imm_to_r_f64(a, floatpoint_reg_index,
+ jit_cc_get_const_F64(cc, arg_reg));
+ }
+ else {
+ int32 arg_reg_no = jit_reg_no(arg_reg);
+ CHECK_F64_REG_NO(arg_reg_no);
+ mov_r_to_r_f64(a, floatpoint_reg_index, arg_reg_no);
+ }
+ floatpoint_reg_index++;
+ break;
+ }
+ default:
+ {
+
+ bh_assert(0);
+ goto fail;
+ }
+ }
+ }
+
+ imm.setValue((uint64)func_ptr);
+ a.mov(regs_i64[REG_RAX_IDX], imm);
+ a.call(regs_i64[REG_RAX_IDX]);
+
+ if (ret_reg) {
+ uint32 ret_reg_no = jit_reg_no(ret_reg);
+ if (jit_reg_kind(ret_reg) == JIT_REG_KIND_I64) {
+ CHECK_I64_REG_NO(ret_reg_no);
+ /* mov res, rax */
+ mov_r_to_r_i64(a, ret_reg_no, REG_RAX_IDX);
+ }
+ else if (jit_reg_kind(ret_reg) == JIT_REG_KIND_F64) {
+ CHECK_F64_REG_NO(ret_reg_no);
+ /* mov res, xmm0_f64 */
+ mov_r_to_r_f64(a, ret_reg_no, 0);
+ }
+ else {
+ bh_assert((jit_reg_kind(ret_reg) == JIT_REG_KIND_I32
+ && ret_reg_no == REG_EAX_IDX)
+ || (jit_reg_kind(ret_reg) == JIT_REG_KIND_F32
+ && ret_reg_no == 0));
+ }
+ }
+
+ return true;
+fail:
+ return false;
+}
+
+/**
+ * Encode callbc insn, CALLBC r0, r1, r2
+ *
+ * @param cc the compiler context
+ * @param a the assembler to emit the code
+ * @param jmp_info_list the jmp info list
+ * @param label_src the index of src label
+ * @param insn current insn info
+ *
+ * @return true if success, false if failed
+ */
+static bool
+lower_callbc(JitCompContext *cc, x86::Assembler &a, bh_list *jmp_info_list,
+ int32 label_src, JitInsn *insn)
+{
+ JmpInfo *node;
+ Imm imm;
+ JitReg edx_hreg = jit_reg_new(JIT_REG_KIND_I32, REG_EDX_IDX);
+ JitReg rdx_hreg = jit_reg_new(JIT_REG_KIND_I64, REG_RDX_IDX);
+ JitReg xmm0_f32_hreg = jit_reg_new(JIT_REG_KIND_F32, 0);
+ JitReg xmm0_f64_hreg = jit_reg_new(JIT_REG_KIND_F64, 0);
+ JitReg ret_reg = *(jit_insn_opnd(insn, 0));
+ JitReg func_reg = *(jit_insn_opnd(insn, 2));
+ JitReg func_idx = *(jit_insn_opnd(insn, 3));
+ JitReg src_reg;
+ int32 func_reg_no;
+
+ /* Load return_jitted_addr from stack */
+ x86::Mem m(x86::rbp, cc->jitted_return_address_offset);
+
+ CHECK_KIND(func_reg, JIT_REG_KIND_I64);
+ func_reg_no = jit_reg_no(func_reg);
+ CHECK_I64_REG_NO(func_reg_no);
+
+ CHECK_KIND(func_idx, JIT_REG_KIND_I32);
+ if (jit_reg_is_const(func_idx)) {
+ imm.setValue(jit_cc_get_const_I32(cc, func_idx));
+ a.mov(regs_i64[REG_RDX_IDX], imm);
+ }
+ else {
+ a.movzx(regs_i64[REG_RDX_IDX], regs_i32[jit_reg_no(func_idx)]);
+ }
+
+ node = (JmpInfo *)jit_malloc(sizeof(JmpInfo));
+ if (!node)
+ GOTO_FAIL;
+
+ node->type = JMP_END_OF_CALLBC;
+ node->label_src = label_src;
+ node->offset = a.code()->sectionById(0)->buffer().size() + 2;
+ bh_list_insert(jmp_info_list, node);
+
+ /* Set next jited addr to glue_ret_jited_addr, 0 will be replaced with
+ actual offset after actual code cache is allocated */
+ imm.setValue(INT64_MAX);
+ a.mov(regs_i64[REG_I64_FREE_IDX], imm);
+ a.mov(m, regs_i64[REG_I64_FREE_IDX]);
+ a.jmp(regs_i64[func_reg_no]);
+
+ if (ret_reg) {
+ switch (jit_reg_kind(ret_reg)) {
+ case JIT_REG_KIND_I32:
+ src_reg = edx_hreg;
+ break;
+ case JIT_REG_KIND_I64:
+ src_reg = rdx_hreg;
+ break;
+ case JIT_REG_KIND_F32:
+ src_reg = xmm0_f32_hreg;
+ break;
+ case JIT_REG_KIND_F64:
+ src_reg = xmm0_f64_hreg;
+ break;
+ default:
+ bh_assert(0);
+ return false;
+ }
+
+ if (!lower_mov(cc, a, ret_reg, src_reg))
+ return false;
+ }
+ return true;
+fail:
+ return false;
+}
+
+static bool
+lower_returnbc(JitCompContext *cc, x86::Assembler &a, JitInsn *insn)
+{
+ JitReg edx_hreg = jit_reg_new(JIT_REG_KIND_I32, REG_EDX_IDX);
+ JitReg rdx_hreg = jit_reg_new(JIT_REG_KIND_I64, REG_RDX_IDX);
+ JitReg xmm0_f32_hreg = jit_reg_new(JIT_REG_KIND_F32, 0);
+ JitReg xmm0_f64_hreg = jit_reg_new(JIT_REG_KIND_F64, 0);
+ JitReg act_reg = *(jit_insn_opnd(insn, 0));
+ JitReg ret_reg = *(jit_insn_opnd(insn, 1));
+ JitReg dst_reg;
+ int32 act;
+
+ CHECK_CONST(act_reg);
+ CHECK_KIND(act_reg, JIT_REG_KIND_I32);
+
+ act = jit_cc_get_const_I32(cc, act_reg);
+
+ if (ret_reg) {
+ switch (jit_reg_kind(ret_reg)) {
+ case JIT_REG_KIND_I32:
+ dst_reg = edx_hreg;
+ break;
+ case JIT_REG_KIND_I64:
+ dst_reg = rdx_hreg;
+ break;
+ case JIT_REG_KIND_F32:
+ dst_reg = xmm0_f32_hreg;
+ break;
+ case JIT_REG_KIND_F64:
+ dst_reg = xmm0_f64_hreg;
+ break;
+ default:
+ bh_assert(0);
+ return false;
+ }
+ if (!lower_mov(cc, a, dst_reg, ret_reg))
+ return false;
+ }
+
+ {
+ /* eax = act */
+ Imm imm(act);
+ a.mov(x86::eax, imm);
+
+ x86::Mem m(x86::rbp, cc->jitted_return_address_offset);
+ a.jmp(m);
+ }
+ return true;
+fail:
+ return false;
+}
+
+static bool
+lower_return(JitCompContext *cc, x86::Assembler &a, JitInsn *insn)
+{
+ JitReg act_reg = *(jit_insn_opnd(insn, 0));
+ int32 act;
+
+ CHECK_CONST(act_reg);
+ CHECK_KIND(act_reg, JIT_REG_KIND_I32);
+
+ act = jit_cc_get_const_I32(cc, act_reg);
+ {
+ /* eax = act */
+ Imm imm(act);
+ a.mov(x86::eax, imm);
+
+ imm.setValue((uintptr_t)code_block_return_to_interp_from_jitted);
+ a.mov(regs_i64[REG_I64_FREE_IDX], imm);
+ a.jmp(regs_i64[REG_I64_FREE_IDX]);
+ }
+ return true;
+fail:
+ return false;
+}
+
+/**
+ * Replace all the jmp address pre-saved when the code cache hasn't been
+ * allocated with actual address after code cache allocated
+ *
+ * @param cc compiler context containting the allocated code cacha info
+ * @param jmp_info_list the jmp info list
+ */
+static void
+patch_jmp_info_list(JitCompContext *cc, bh_list *jmp_info_list)
+{
+ JmpInfo *jmp_info, *jmp_info_next;
+ JitReg reg_dst;
+ char *stream;
+
+ jmp_info = (JmpInfo *)bh_list_first_elem(jmp_info_list);
+
+ while (jmp_info) {
+ jmp_info_next = (JmpInfo *)bh_list_elem_next(jmp_info);
+
+ stream = (char *)cc->jitted_addr_begin + jmp_info->offset;
+
+ if (jmp_info->type == JMP_DST_LABEL_REL) {
+ /* Jmp with relative address */
+ reg_dst =
+ jit_reg_new(JIT_REG_KIND_L32, jmp_info->dst_info.label_dst);
+ *(int32 *)stream =
+ (int32)((uintptr_t)*jit_annl_jitted_addr(cc, reg_dst)
+ - (uintptr_t)stream)
+ - 4;
+ }
+ else if (jmp_info->type == JMP_DST_LABEL_ABS) {
+ /* Jmp with absolute address */
+ reg_dst =
+ jit_reg_new(JIT_REG_KIND_L32, jmp_info->dst_info.label_dst);
+ *(uintptr_t *)stream =
+ (uintptr_t)*jit_annl_jitted_addr(cc, reg_dst);
+ }
+ else if (jmp_info->type == JMP_END_OF_CALLBC) {
+ /* 7 is the size of mov and jmp instruction */
+ *(uintptr_t *)stream = (uintptr_t)stream + sizeof(uintptr_t) + 7;
+ }
+ else if (jmp_info->type == JMP_LOOKUPSWITCH_BASE) {
+ /* 11 is the size of 8-byte addr and 3-byte jmp instruction */
+ *(uintptr_t *)stream = (uintptr_t)stream + 11;
+ }
+
+ jmp_info = jmp_info_next;
+ }
+}
+
+/* Free the jmp info list */
+static void
+free_jmp_info_list(bh_list *jmp_info_list)
+{
+ void *cur_node = bh_list_first_elem(jmp_info_list);
+
+ while (cur_node) {
+ void *next_node = bh_list_elem_next(cur_node);
+
+ bh_list_remove(jmp_info_list, cur_node);
+ jit_free(cur_node);
+ cur_node = next_node;
+ }
+}
+
+/**
+ * Encode cast int32 immediate data to float register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst float register
+ * @param data the src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cast_imm_i32_to_r_f32(x86::Assembler &a, int32 reg_no, int32 data)
+{
+ Imm imm(data);
+ a.mov(regs_i32[REG_I32_FREE_IDX], imm);
+ a.movd(regs_float[reg_no], regs_i32[REG_I32_FREE_IDX]);
+ return true;
+}
+
+/**
+ * Encode cast int32 register data to float register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst float register
+ * @param reg_no_src the no of src int32 register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cast_r_i32_to_r_f32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ a.movd(regs_float[reg_no_dst], regs_i32[reg_no_src]);
+ return true;
+}
+
+/**
+ * Encode cast int64 immediate data to double register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst double register
+ * @param data the src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cast_imm_i64_to_r_f64(x86::Assembler &a, int32 reg_no, int64 data)
+{
+ Imm imm(data);
+ a.mov(regs_i64[REG_I64_FREE_IDX], imm);
+ a.movq(regs_float[reg_no], regs_i64[REG_I64_FREE_IDX]);
+ return true;
+}
+
+/**
+ * Encode cast int64 register data to double register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst double register
+ * @param reg_no_src the no of src int64 register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cast_r_i64_to_r_f64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ a.movq(regs_float[reg_no_dst], regs_i64[reg_no_src]);
+ return true;
+}
+
+/**
+ * Encode cast float immediate data to int32 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst int32 register
+ * @param data the src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cast_imm_f32_to_r_i32(x86::Assembler &a, int32 reg_no, float data)
+{
+ cast_float_to_integer v = { .f = data };
+ return mov_imm_to_r_i32(a, reg_no, v.i);
+}
+
+/**
+ * Encode cast float register data to int32 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst int32 register
+ * @param reg_no_src the no of src float register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cast_r_f32_to_r_i32(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ a.movd(regs_i32[reg_no_dst], regs_float[reg_no_src]);
+ return true;
+}
+
+/**
+ * Encode cast double immediate data to int64 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no the no of dst int64 register
+ * @param data the src immediate data
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cast_imm_f64_to_r_i64(x86::Assembler &a, int32 reg_no, double data)
+{
+ cast_double_to_integer v = { .d = data };
+ return mov_imm_to_r_i64(a, reg_no, v.i);
+}
+
+/**
+ * Encode cast float register data to int32 register data
+ *
+ * @param a the assembler to emit the code
+ * @param reg_no_dst the no of dst int32 register
+ * @param reg_no_src the no of src float register
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+cast_r_f64_to_r_i64(x86::Assembler &a, int32 reg_no_dst, int32 reg_no_src)
+{
+ a.movq(regs_i64[reg_no_dst], regs_float[reg_no_src]);
+ return true;
+}
+
+/**
+ * Encode insn cast: F32CASTI32,
+ * @param kind0 the dst JIT_REG_KIND, such as I32, I64, F32 and F64
+ * @param kind1 the src JIT_REG_KIND, such as I32, I64, F32 and F64
+ * @param type0 the dst data type, such as i8, u8, i16, u16, i32, f32, i64, f32,
+ * f64
+ * @param type1 the src data type, such as i8, u8, i16, u16, i32, f32, i64, f32,
+ * f64
+ */
+#define CAST_R_R(kind0, kind1, type0, type1, Type1) \
+ do { \
+ bool _ret = false; \
+ int32 reg_no_dst = 0, reg_no_src = 0; \
+ CHECK_KIND(r0, JIT_REG_KIND_##kind0); \
+ CHECK_KIND(r1, JIT_REG_KIND_##kind1); \
+ \
+ reg_no_dst = jit_reg_no(r0); \
+ CHECK_REG_NO(reg_no_dst, JIT_REG_KIND_##kind0); \
+ if (jit_reg_is_const(r1)) { \
+ Type1 data = jit_cc_get_const_##kind1(cc, r1); \
+ _ret = cast_imm_##type1##_to_r_##type0(a, reg_no_dst, data); \
+ } \
+ else { \
+ reg_no_src = jit_reg_no(r1); \
+ CHECK_REG_NO(reg_no_src, JIT_REG_KIND_##kind1); \
+ _ret = cast_r_##type1##_to_r_##type0(a, reg_no_dst, reg_no_src); \
+ } \
+ if (!_ret) \
+ GOTO_FAIL; \
+ } while (0)
+
+#if WASM_ENABLE_SHARED_MEMORY != 0
+
+/**
+ * Encode extend certain bytes in the src register to a I32 or I64 kind value in
+ * dst register
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ * could be 1(byte), 2(short), 4(int32), 8(int64),
+ * @param kind_dst the kind of data to extend to, could be I32, I64
+ * @param reg_no_src the index of register hold src value
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+extend_r_to_r(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst,
+ int32 reg_no_src, int32 reg_no_dst)
+{
+ if (kind_dst == JIT_REG_KIND_I32) {
+ bh_assert(reg_no_src < 16 && reg_no_dst < 16);
+ switch (bytes_dst) {
+ case 1:
+ extend_r8_to_r32(a, reg_no_dst, reg_no_src, false);
+ break;
+ case 2:
+ extend_r16_to_r32(a, reg_no_dst, reg_no_src, false);
+ break;
+ case 4:
+ mov_r_to_r_i32(a, reg_no_dst, reg_no_src);
+ break;
+ default:
+ bh_assert(0);
+ return false;
+ }
+ }
+ else if (kind_dst == JIT_REG_KIND_I64) {
+ bh_assert(reg_no_src < 16 && reg_no_dst < 16);
+ switch (bytes_dst) {
+ case 1:
+ extend_r8_to_r64(a, reg_no_dst, reg_no_src, false);
+ break;
+ case 2:
+ extend_r16_to_r64(a, reg_no_dst, reg_no_src, false);
+ break;
+ case 4:
+ extend_r32_to_r64(a, reg_no_dst, reg_no_src, false);
+ break;
+ case 8:
+ mov_r_to_r_i64(a, reg_no_dst, reg_no_src);
+ break;
+ default:
+ bh_assert(0);
+ return false;
+ }
+ }
+ else {
+ bh_assert(0);
+ }
+ return true;
+}
+
+/**
+ * Encode atomic compare and exchange, when calling this function,
+ * value for comparison should be already moved in register
+ * al/ax/eax/rax
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ * could be 1(byte), 2(short), 4(int32), 8(int64),
+ * @param kind_dst the kind of data to move, could be I32, I64
+ * @param m_dst the dest memory operand
+ * @param reg_no_xchg the index of register hold exchange value
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+at_cmpxchg(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst,
+ int32 reg_no_xchg, x86::Mem &m_dst)
+{
+ bh_assert((kind_dst == JIT_REG_KIND_I32 && bytes_dst <= 4)
+ || kind_dst == JIT_REG_KIND_I64);
+ bh_assert(reg_no_xchg < 16);
+ switch (bytes_dst) {
+ case 1:
+ a.lock().cmpxchg(m_dst, regs_i8[reg_no_xchg]);
+ break;
+ case 2:
+ a.lock().cmpxchg(m_dst, regs_i16[reg_no_xchg]);
+ break;
+ case 4:
+ a.lock().cmpxchg(m_dst, regs_i32[reg_no_xchg]);
+ break;
+ case 8:
+ a.lock().cmpxchg(m_dst, regs_i64[reg_no_xchg]);
+ break;
+ default:
+ bh_assert(0);
+ return false;
+ }
+ return true;
+}
+
+/**
+ * Encode atomic compare and exchange: load value into a register from
+ * memory with reg base and reg offset, compare (expected) reg data with the
+ * loaded value, if equal, store the (replacement) reg data to the same
+ * memory, else, do nothing. Either way, returns the loaded value
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param reg_no_xchg the no of register that stores the conditionally
+ * replacement value
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory
+ * @param reg_no_offset the no of register that stores the offset address
+ * of src&dst memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_cmpxchg_r_ra_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_xchg,
+ int32 reg_no_base, int32 reg_no_offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst);
+ return at_cmpxchg(a, bytes_dst, kind_dst, reg_no_xchg, m)
+ && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, REG_RAX_IDX);
+}
+
+/**
+ * Encode atomic compare and exchange: load value into a register from
+ * memory with reg base and imm offset, compare (expected) reg data with the
+ * loaded value, if equal, store the (replacement) reg data to the same
+ * memory, else, do nothing. Either way, returns the loaded value
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param reg_no_xchg the no of register that stores the conditionally
+ * replacement value
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory
+ * @param offset the offset address of the memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_cmpxchg_r_ra_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_xchg,
+ int32 reg_no_base, int32 offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst);
+ return at_cmpxchg(a, bytes_dst, kind_dst, reg_no_xchg, m)
+ && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, REG_RAX_IDX);
+}
+
+/**
+ * Encode atomic compare and exchange: load value into a register from
+ * memory with reg base and reg offset, compare (expected) reg data with the
+ * loaded value, if equal, store the (replacement) imm data to the same
+ * memory, else, do nothing. Either way, returns the loaded value
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param data_xchg the immediate data for exchange(conditionally replacment
+ * value)
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory
+ * @param reg_no_offset the no of register that stores the offset address
+ * of src&dst memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_cmpxchg_imm_ra_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, void *data_xchg,
+ int32 reg_no_base, int32 reg_no_offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst);
+ Imm imm;
+ imm_set_value(imm, data_xchg, bytes_dst);
+ uint32 reg_no_xchg = mov_imm_to_free_reg(a, imm, bytes_dst);
+ return at_cmpxchg(a, bytes_dst, kind_dst, reg_no_xchg, m)
+ && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, REG_RAX_IDX);
+}
+
+/**
+ * Encode atomic compare and exchange: load value into a register from
+ * memory with reg base and imm offset, compare (expected) reg data with the
+ * loaded value, if equal, store the (replacement) imm data to the same
+ * memory, else, do nothing. Either way, returns the loaded value
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param data_xchg the immediate data for exchange(conditionally replacment
+ * value)
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory
+ * @param offset the offset address of the memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_cmpxchg_imm_ra_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, void *data_xchg,
+ int32 reg_no_base, int32 offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst);
+ Imm imm;
+ imm_set_value(imm, data_xchg, bytes_dst);
+ uint32 reg_no_xchg = mov_imm_to_free_reg(a, imm, bytes_dst);
+ return at_cmpxchg(a, bytes_dst, kind_dst, reg_no_xchg, m)
+ && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, REG_RAX_IDX);
+}
+
+/**
+ * Encode insn cmpxchg: CMPXCHG_type r0, r1, r2, r3, r4
+ * @param kind the data kind, can only be I32 or I64
+ * @param bytes_dst the byte number of dst data
+ */
+#define CMPXCHG_R_R_R_R_R(kind, type, bytes_dst) \
+ do { \
+ type data_xchg = 0; \
+ int32 reg_no_xchg = 0, reg_no_cmp = 0, reg_no_base = 0, \
+ reg_no_offset = 0; \
+ int32 offset = 0; \
+ bool _ret = false; \
+ if (jit_reg_is_const(r3)) { \
+ CHECK_KIND(r3, JIT_REG_KIND_I32); \
+ } \
+ else { \
+ CHECK_KIND(r3, JIT_REG_KIND_I64); \
+ } \
+ /* r1: expected value(it must in register a) \
+ * r2: memory base addr can't be const */ \
+ CHECK_NCONST(r1); \
+ reg_no_cmp = jit_reg_no(r1); \
+ bh_assert(reg_no_cmp == REG_EAX_IDX || reg_no_cmp == REG_RAX_IDX); \
+ CHECK_REG_NO(reg_no_cmp, jit_reg_kind(r1)); \
+ CHECK_NCONST(r2); \
+ reg_no_base = jit_reg_no(r2); \
+ CHECK_REG_NO(reg_no_base, jit_reg_kind(r2)); \
+ /* r0: replacement value r3: offset can be const */ \
+ if (jit_reg_is_const(r0)) \
+ data_xchg = jit_cc_get_const_##kind(cc, r0); \
+ else { \
+ reg_no_xchg = jit_reg_no(r0); \
+ CHECK_REG_NO(reg_no_xchg, jit_reg_kind(r0)); \
+ } \
+ if (jit_reg_is_const(r3)) \
+ offset = jit_cc_get_const_I32(cc, r3); \
+ else { \
+ reg_no_offset = jit_reg_no(r3); \
+ CHECK_REG_NO(reg_no_offset, jit_reg_kind(r3)); \
+ } \
+ \
+ if (jit_reg_is_const(r0)) { \
+ if (jit_reg_is_const(r3)) \
+ _ret = at_cmpxchg_imm_ra_base_r_offset_imm( \
+ a, bytes_dst, JIT_REG_KIND_##kind, &data_xchg, \
+ reg_no_base, offset); \
+ else \
+ _ret = at_cmpxchg_imm_ra_base_r_offset_r( \
+ a, bytes_dst, JIT_REG_KIND_##kind, &data_xchg, \
+ reg_no_base, reg_no_offset); \
+ } \
+ else { \
+ if (jit_reg_is_const(r3)) \
+ _ret = at_cmpxchg_r_ra_base_r_offset_imm( \
+ a, bytes_dst, JIT_REG_KIND_##kind, reg_no_xchg, \
+ reg_no_base, offset); \
+ else \
+ _ret = at_cmpxchg_r_ra_base_r_offset_r( \
+ a, bytes_dst, JIT_REG_KIND_##kind, reg_no_xchg, \
+ reg_no_base, reg_no_offset); \
+ } \
+ if (!_ret) \
+ GOTO_FAIL; \
+ } while (0)
+
+/**
+ * Encode negate a value in the register
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ * could be 1(byte), 2(short), 4(int32), 8(int64),
+ * @param kind_dst the kind of data to move, could be I32, I64
+ * @param reg_no_src the index of register hold src value
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+neg_r(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, int32 reg_no_src)
+{
+ bh_assert((kind_dst == JIT_REG_KIND_I32 && bytes_dst <= 4)
+ || kind_dst == JIT_REG_KIND_I64);
+ bh_assert(reg_no_src < 16);
+ switch (bytes_dst) {
+ case 1:
+ a.neg(regs_i8[reg_no_src]);
+ break;
+ case 2:
+ a.neg(regs_i16[reg_no_src]);
+ break;
+ case 4:
+ a.neg(regs_i32[reg_no_src]);
+ break;
+ case 8:
+ a.neg(regs_i64[reg_no_src]);
+ break;
+ default:
+ bh_assert(0);
+ return false;
+ }
+ return true;
+}
+
+/**
+ * Encode atomic exchange and add
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ * could be 1(byte), 2(short), 4(int32), 8(int64),
+ * @param kind_dst the kind of data to move, could be I32, I64
+ * @param reg_no_src the index of register hold operand value of add operation
+ * @param m_dst the dest memory operand
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+at_xadd(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, int32 reg_no_src,
+ x86::Mem &m_dst)
+{
+ bh_assert((kind_dst == JIT_REG_KIND_I32 && bytes_dst <= 4)
+ || kind_dst == JIT_REG_KIND_I64);
+ bh_assert(reg_no_src < 16);
+ switch (bytes_dst) {
+ case 1:
+ a.lock().xadd(m_dst, regs_i8[reg_no_src]);
+ break;
+ case 2:
+ a.lock().xadd(m_dst, regs_i16[reg_no_src]);
+ break;
+ case 4:
+ a.lock().xadd(m_dst, regs_i32[reg_no_src]);
+ break;
+ case 8:
+ a.lock().xadd(m_dst, regs_i64[reg_no_src]);
+ break;
+ default:
+ bh_assert(0);
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * Encode atomic rmw add: load value into a register from memory
+ * with reg base and reg offset, add loaded value with imm data, store back
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param reg_no_dst the no of register that stores the returned value
+ * @param data_src the immediate data(first operand)
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory(second operand&store back)
+ * @param offset the offset address of the memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_rmw_add_imm_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_dst,
+ void *data_src, int32 reg_no_base,
+ int32 offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst);
+ Imm imm;
+ imm_set_value(imm, data_src, bytes_dst);
+ uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst);
+ return at_xadd(a, bytes_dst, kind_dst, reg_no_src, m)
+ && extend_r_to_r(a, bytes_dst, kind_dst, reg_no_src, reg_no_dst);
+}
+
+/**
+ * Encode atomic rmw add: load value into a register from memory
+ * with reg base and reg offset, add loaded value with imm data, store back
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param reg_no_dst the no of register that stores the returned value
+ * @param data_src the immediate data(second operand)
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory(first operand&store back location)
+ * @param reg_no_offset the no of register that stores the offset of the memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_rmw_add_imm_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_dst,
+ void *data_src, int32 reg_no_base,
+ int32 reg_no_offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst);
+ Imm imm;
+ imm_set_value(imm, data_src, bytes_dst);
+ uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst);
+ return at_xadd(a, bytes_dst, kind_dst, reg_no_src, m)
+ && extend_r_to_r(a, bytes_dst, kind_dst, reg_no_src, reg_no_dst);
+}
+
+/**
+ * Encode atomic rmw add: load value into a register from memory
+ * with reg base and imm offset, add loaded value with reg data, store back
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param reg_no_dst the no of register that stores the returned value
+ * @param reg_no_src the no of register store the src data(second operand)
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory(first operand&store back location)
+ * @param offset the offset address of the memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_rmw_add_r_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_dst,
+ int32 reg_no_src, int32 reg_no_base,
+ int32 offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst);
+ return at_xadd(a, bytes_dst, kind_dst, reg_no_src, m)
+ && extend_r_to_r(a, bytes_dst, kind_dst, reg_no_src, reg_no_dst);
+}
+
+/**
+ * Encode atomic rmw add: load value into a register from memory
+ * with reg base and reg offset, add loaded value with reg data, store back
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param reg_no_dst the no of register that stores the returned value
+ * @param reg_no_src the no of register store the src data(second operand)
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory(first operand&store back)
+ * @param reg_no_offset the no of register that stores the offset of the memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_rmw_add_r_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_dst,
+ int32 reg_no_src, int32 reg_no_base,
+ int32 reg_no_offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst);
+ return at_xadd(a, bytes_dst, kind_dst, reg_no_src, m)
+ && extend_r_to_r(a, bytes_dst, kind_dst, reg_no_src, reg_no_dst);
+}
+
+/**
+ * Encode atomic rmw sub: load value into a register from memory
+ * with reg base and reg offset, sub loaded value with imm data, store back
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param reg_no_dst the no of register that stores the returned value
+ * @param data_src the immediate data(first operand)
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory(second operand&store back)
+ * @param offset the offset address of the memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_rmw_sub_imm_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_dst,
+ void *data_src, int32 reg_no_base,
+ int32 offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst);
+ Imm imm;
+ imm_set_value(imm, data_src, bytes_dst);
+ uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst);
+ return neg_r(a, bytes_dst, kind_dst, reg_no_src)
+ && at_xadd(a, bytes_dst, kind_dst, reg_no_src, m)
+ && extend_r_to_r(a, bytes_dst, kind_dst, reg_no_src, reg_no_dst);
+}
+
+/**
+ * Encode atomic rmw sub: load value into a register from memory
+ * with reg base and reg offset, sub loaded value with imm data, store back
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param reg_no_dst the no of register that stores the returned value
+ * @param data_src the immediate data(second operand)
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory(first operand&store back location)
+ * @param reg_no_offset the no of register that stores the offset of the memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_rmw_sub_imm_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_dst,
+ void *data_src, int32 reg_no_base,
+ int32 reg_no_offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst);
+ Imm imm;
+ imm_set_value(imm, data_src, bytes_dst);
+ uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst);
+ return neg_r(a, bytes_dst, kind_dst, reg_no_src)
+ && at_xadd(a, bytes_dst, kind_dst, reg_no_src, m)
+ && extend_r_to_r(a, bytes_dst, kind_dst, reg_no_src, reg_no_dst);
+}
+
+/**
+ * Encode atomic rmw sub: load value into a register from memory
+ * with reg base and imm offset, sub loaded value with reg data, store back
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param reg_no_dst the no of register that stores the returned value
+ * @param reg_no_src the no of register store the src data(second operand)
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory(first operand&store back location)
+ * @param offset the offset address of the memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_rmw_sub_r_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_dst,
+ int32 reg_no_src, int32 reg_no_base,
+ int32 offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst);
+ return neg_r(a, bytes_dst, kind_dst, reg_no_src)
+ && at_xadd(a, bytes_dst, kind_dst, reg_no_src, m)
+ && extend_r_to_r(a, bytes_dst, kind_dst, reg_no_src, reg_no_dst);
+}
+
+/**
+ * Encode atomic rmw sub: load value into a register from memory
+ * with reg base and reg offset, sub loaded value with reg data, store back
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param reg_no_dst the no of register that stores the returned value
+ * @param reg_no_src the no of register store the src data(second operand)
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory(first operand&store back)
+ * @param reg_no_offset the no of register that stores the offset of the memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_rmw_sub_r_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_dst,
+ int32 reg_no_src, int32 reg_no_base,
+ int32 reg_no_offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst);
+ return neg_r(a, bytes_dst, kind_dst, reg_no_src)
+ && at_xadd(a, bytes_dst, kind_dst, reg_no_src, m)
+ && extend_r_to_r(a, bytes_dst, kind_dst, reg_no_src, reg_no_dst);
+}
+
+/**
+ * Encode atomic rmw xchg: load value into a register from memory
+ * with reg base and reg offset, exchange loaded value with imm data, store back
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param reg_no_dst the no of register that stores the returned value
+ * @param data_src the immediate data(first operand)
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory(second operand&store back)
+ * @param offset the offset address of the memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_rmw_xchg_imm_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_dst,
+ void *data_src, int32 reg_no_base,
+ int32 offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst);
+ Imm imm;
+ imm_set_value(imm, data_src, bytes_dst);
+ uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst);
+ return xchg_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src)
+ && extend_r_to_r(a, bytes_dst, kind_dst, reg_no_src, reg_no_dst);
+}
+
+/**
+ * Encode atomic rmw xchg: load value into a register from memory
+ * with reg base and reg offset, exchange loaded value with imm data, store back
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param reg_no_dst the no of register that stores the returned value
+ * @param data_src the immediate data(second operand)
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory(first operand&store back location)
+ * @param reg_no_offset the no of register that stores the offset of the memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_rmw_xchg_imm_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_dst,
+ void *data_src, int32 reg_no_base,
+ int32 reg_no_offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst);
+ Imm imm;
+ imm_set_value(imm, data_src, bytes_dst);
+ uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst);
+ return xchg_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src)
+ && extend_r_to_r(a, bytes_dst, kind_dst, reg_no_src, reg_no_dst);
+}
+
+/**
+ * Encode atomic rmw xchg: load value into a register from memory
+ * with reg base and imm offset, exchange loaded value with reg data, store back
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param reg_no_dst the no of register that stores the returned value
+ * @param reg_no_src the no of register store the src data(second operand)
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory(first operand&store back location)
+ * @param offset the offset address of the memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_rmw_xchg_r_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_dst,
+ int32 reg_no_src, int32 reg_no_base,
+ int32 offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst);
+ return xchg_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src)
+ && extend_r_to_r(a, bytes_dst, kind_dst, reg_no_src, reg_no_dst);
+}
+
+/**
+ * Encode atomic rmw xchg: load value into a register from memory
+ * with reg base and reg offset, exchange loaded value with reg data, store back
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param reg_no_dst the no of register that stores the returned value
+ * @param reg_no_src the no of register store the src data(second operand)
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory(first operand&store back)
+ * @param reg_no_offset the no of register that stores the offset of the memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_rmw_xchg_r_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_dst,
+ int32 reg_no_src, int32 reg_no_base,
+ int32 reg_no_offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst);
+ return xchg_r_to_m(a, bytes_dst, kind_dst, m, reg_no_src)
+ && extend_r_to_r(a, bytes_dst, kind_dst, reg_no_src, reg_no_dst);
+}
+
+/**
+ * Encode insn rmw logical operation: generate a loop to make sure it's atomic
+ * @param bin_op the operation, can be and/or/xor
+ * @param kind the data kind, can only be I32 or I64
+ * @param bytes_dst the byte number of dst data
+ */
+#define AT_RMW_LOGICAL_LOOP(bin_op, kind, bytes_dst) \
+ do { \
+ bh_assert((kind_dst == JIT_REG_KIND_I32 && bytes_dst <= 4) \
+ || kind_dst == JIT_REG_KIND_I64); \
+ bh_assert(reg_no_src < 16 && reg_no_dst < 16); \
+ /* read original value in memory(operand 1) to rax(expected) */ \
+ mov_m_to_r(a, bytes_dst, kind_dst, false, REG_RAX_IDX, m_dst); \
+ Label loop = a.newLabel(); \
+ /* check whether loop is valid, and bind the loop label \
+ * to the current position in the code. */ \
+ if (!loop.isValid() || a.bind(loop) != kErrorOk) \
+ return false; \
+ /* move operand 1 to temp reg rb */ \
+ mov_r_to_r(a, kind_dst, REG_RBX_IDX, REG_RAX_IDX); \
+ /* actual logical operation with operand 2, result save to rbx */ \
+ switch (bytes_dst) { \
+ case 1: \
+ a.bin_op##_(regs_i8[REG_RBX_IDX], regs_i8[reg_no_src]); \
+ break; \
+ case 2: \
+ a.bin_op##_(regs_i16[REG_RBX_IDX], regs_i16[reg_no_src]); \
+ break; \
+ case 4: \
+ a.bin_op##_(regs_i32[REG_RBX_IDX], regs_i32[reg_no_src]); \
+ break; \
+ case 8: \
+ a.bin_op##_(regs_i64[REG_RBX_IDX], regs_i64[reg_no_src]); \
+ break; \
+ default: \
+ bh_assert(0); \
+ return false; \
+ } \
+ /* cmp with read value in RAX, try to change with result value in RBX \
+ * REG, if change successfully, mem data is changed and exit loop(ZF \
+ * is set) if not, loop again(ZF is clear) and tries to do logical ops \
+ * atomically */ \
+ at_cmpxchg(a, bytes_dst, kind_dst, REG_RBX_IDX, m_dst); \
+ a.jne(loop); \
+ return true; \
+ } while (0)
+
+/**
+ * Encode atomic logical binary operation: and
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ * could be 1(byte), 2(short), 4(int32), 8(int64),
+ * @param kind_dst the kind of data to move, could be I32, I64
+ * @param reg_no_dst the index of dest register
+ * @param reg_no_src the index of register hold operand value of add operation
+ * @param m_dst the dest memory operand
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+at_and(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, int32 reg_no_dst,
+ int32 reg_no_src, x86::Mem &m_dst)
+{
+ AT_RMW_LOGICAL_LOOP(and, kind_dst, bytes_dst);
+}
+
+/**
+ * Encode atomic logical binary operation: or
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ * could be 1(byte), 2(short), 4(int32), 8(int64),
+ * @param kind_dst the kind of data to move, could be I32, I64
+ * @param reg_no_dst the index of dest register
+ * @param reg_no_src the index of register hold operand value of add operation
+ * @param m_dst the dest memory operand
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+at_or(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, int32 reg_no_dst,
+ int32 reg_no_src, x86::Mem &m_dst)
+{
+ AT_RMW_LOGICAL_LOOP(or, kind_dst, bytes_dst);
+}
+/**
+ * Encode atomic logical binary operation: xor
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data,
+ * could be 1(byte), 2(short), 4(int32), 8(int64),
+ * @param kind_dst the kind of data to move, could be I32, I64
+ * @param reg_no_dst the index of dest register
+ * @param reg_no_src the index of register hold operand value of add operation
+ * @param m_dst the dest memory operand
+ *
+ * @return true if success, false otherwise
+ */
+static bool
+at_xor(x86::Assembler &a, uint32 bytes_dst, uint32 kind_dst, int32 reg_no_dst,
+ int32 reg_no_src, x86::Mem &m_dst)
+{
+ AT_RMW_LOGICAL_LOOP(xor, kind_dst, bytes_dst);
+}
+
+/**
+ * Encode atomic rmw and: load value into a register from memory with reg base
+ * and reg offset, bitwise and loaded value with imm data, store back
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param reg_no_dst the no of register that stores the returned value
+ * @param data_src the immediate data(first operand)
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory(second operand&store back)
+ * @param offset the offset address of the memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_rmw_and_imm_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_dst,
+ void *data_src, int32 reg_no_base,
+ int32 offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst);
+ Imm imm;
+ imm_set_value(imm, data_src, bytes_dst);
+ uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst);
+ return at_and(a, bytes_dst, kind_dst, reg_no_dst, reg_no_src, m)
+ && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, reg_no_dst);
+}
+
+/**
+ * Encode atomic rmw and: load value into a register from memory with reg base
+ * and reg offset, bitwise and loaded value with imm data, store back
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param reg_no_dst the no of register that stores the returned value
+ * @param data_src the immediate data(second operand)
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory(first operand&store back location)
+ * @param reg_no_offset the no of register that stores the offset of the memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_rmw_and_imm_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_dst,
+ void *data_src, int32 reg_no_base,
+ int32 reg_no_offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst);
+ Imm imm;
+ imm_set_value(imm, data_src, bytes_dst);
+ uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst);
+ return at_and(a, bytes_dst, kind_dst, reg_no_dst, reg_no_src, m)
+ && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, reg_no_dst);
+}
+
+/**
+ * Encode atomic rmw and: load value into a register from memory with reg base
+ * and imm offset, bitwise and value with reg data, store back
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param reg_no_dst the no of register that stores the returned value
+ * @param reg_no_src the no of register store the src data(second operand)
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory(first operand&store back location)
+ * @param offset the offset address of the memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_rmw_and_r_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_dst,
+ int32 reg_no_src, int32 reg_no_base,
+ int32 offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst);
+ return at_and(a, bytes_dst, kind_dst, reg_no_dst, reg_no_src, m)
+ && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, reg_no_dst);
+}
+
+/**
+ * Encode atomic rmw and: load value into a register from memory with reg base
+ * and reg offset, bitwise and loaded value with reg data, store back
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param reg_no_dst the no of register that stores the returned value
+ * @param reg_no_src the no of register store the src data(second operand)
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory(first operand&store back)
+ * @param reg_no_offset the no of register that stores the offset of the memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_rmw_and_r_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_dst,
+ int32 reg_no_src, int32 reg_no_base,
+ int32 reg_no_offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst);
+ return at_and(a, bytes_dst, kind_dst, reg_no_dst, reg_no_src, m)
+ && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, reg_no_dst);
+}
+
+/**
+ * Encode atomic rmw or: load value into a register from memory with reg base
+ * and reg offset, bitwise or loaded value with imm data, store back
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param reg_no_dst the no of register that stores the returned value
+ * @param data_src the immediate data(first operand)
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory(second operand&store back)
+ * @param offset the offset address of the memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_rmw_or_imm_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_dst,
+ void *data_src, int32 reg_no_base, int32 offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst);
+ Imm imm;
+ imm_set_value(imm, data_src, bytes_dst);
+ uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst);
+ return at_or(a, bytes_dst, kind_dst, reg_no_dst, reg_no_src, m)
+ && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, reg_no_dst);
+}
+
+/**
+ * Encode atomic rmw or: load value into a register from memory with reg base
+ * and reg offset, bitwise or loaded value with imm data, store back
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param reg_no_dst the no of register that stores the returned value
+ * @param data_src the immediate data(second operand)
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory(first operand&store back location)
+ * @param reg_no_offset the no of register that stores the offset of the memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_rmw_or_imm_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_dst, void *data_src,
+ int32 reg_no_base, int32 reg_no_offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst);
+ Imm imm;
+ imm_set_value(imm, data_src, bytes_dst);
+ uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst);
+ return at_or(a, bytes_dst, kind_dst, reg_no_dst, reg_no_src, m)
+ && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, reg_no_dst);
+}
+
+/**
+ * Encode atomic rmw or: load value into a register from memory with reg base
+ * and imm offset, bitwise or loaded value with reg data, store back
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param reg_no_dst the no of register that stores the returned value
+ * @param reg_no_src the no of register store the src data(second operand)
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory(first operand&store back location)
+ * @param offset the offset address of the memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_rmw_or_r_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_dst,
+ int32 reg_no_src, int32 reg_no_base, int32 offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst);
+ return at_or(a, bytes_dst, kind_dst, reg_no_dst, reg_no_src, m)
+ && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, reg_no_dst);
+}
+
+/**
+ * Encode atomic rmw or: load value into a register from memory with reg base
+ * and reg offset, bitwise or loaded value with reg data, store back
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param reg_no_dst the no of register that stores the returned value
+ * @param reg_no_src the no of register store the src data(second operand)
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory(first operand&store back)
+ * @param reg_no_offset the no of register that stores the offset of the memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_rmw_or_r_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_dst, int32 reg_no_src,
+ int32 reg_no_base, int32 reg_no_offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst);
+ return at_or(a, bytes_dst, kind_dst, reg_no_dst, reg_no_src, m)
+ && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, reg_no_dst);
+}
+
+/**
+ * Encode atomic rmw xor: load value into a register from memory with reg base
+ * and reg offset, bitwise xor loaded value with imm data, store back
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param reg_no_dst the no of register that stores the returned value
+ * @param data_src the immediate data(first operand)
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory(second operand&store back)
+ * @param offset the offset address of the memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_rmw_xor_imm_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_dst,
+ void *data_src, int32 reg_no_base,
+ int32 offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst);
+ Imm imm;
+ imm_set_value(imm, data_src, bytes_dst);
+ uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst);
+ return at_xor(a, bytes_dst, kind_dst, reg_no_dst, reg_no_src, m)
+ && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, reg_no_dst);
+}
+
+/**
+ * Encode atomic rmw xor: load value into a register from memory with reg base
+ * and reg offset, bitwise xor loaded value with imm data, store back
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param reg_no_dst the no of register that stores the returned value
+ * @param data_src the immediate data(second operand)
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory(first operand&store back location)
+ * @param reg_no_offset the no of register that stores the offset of the memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_rmw_xor_imm_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_dst,
+ void *data_src, int32 reg_no_base,
+ int32 reg_no_offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst);
+ Imm imm;
+ imm_set_value(imm, data_src, bytes_dst);
+ uint32 reg_no_src = mov_imm_to_free_reg(a, imm, bytes_dst);
+ return at_xor(a, bytes_dst, kind_dst, reg_no_dst, reg_no_src, m)
+ && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, reg_no_dst);
+}
+
+/**
+ * Encode atomic rmw xor: load value into a register from memory with reg base
+ * and imm offset, bitwise xor exchange loaded value with reg data, store back
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param reg_no_dst the no of register that stores the returned value
+ * @param reg_no_src the no of register store the src data(second operand)
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory(first operand&store back location)
+ * @param offset the offset address of the memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_rmw_xor_r_base_r_offset_imm(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_dst,
+ int32 reg_no_src, int32 reg_no_base,
+ int32 offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], offset, bytes_dst);
+ return at_xor(a, bytes_dst, kind_dst, reg_no_dst, reg_no_src, m)
+ && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, reg_no_dst);
+}
+
+/**
+ * Encode atomic rmw xor: load value into a register from memory with reg base
+ * and reg offset, bitwise xor loaded value with reg data, store back
+ *
+ * @param a the assembler to emit the code
+ * @param bytes_dst the bytes number of the data to actual operated on(load,
+ * compare, replacement) could be 1(byte), 2(short), 4(int32), 8(int64)
+ * @param reg_no_dst the no of register that stores the returned value
+ * @param reg_no_src the no of register store the src data(second operand)
+ * @param reg_no_base the no of register that stores the base address
+ * of src&dst memory(first operand&store back)
+ * @param reg_no_offset the no of register that stores the offset of the memory
+ * @return true if success, false otherwise
+ */
+static bool
+at_rmw_xor_r_base_r_offset_r(x86::Assembler &a, uint32 bytes_dst,
+ uint32 kind_dst, int32 reg_no_dst,
+ int32 reg_no_src, int32 reg_no_base,
+ int32 reg_no_offset)
+{
+ x86::Mem m(regs_i64[reg_no_base], regs_i64[reg_no_offset], 0, 0, bytes_dst);
+ return at_xor(a, bytes_dst, kind_dst, reg_no_dst, reg_no_src, m)
+ && extend_r_to_r(a, bytes_dst, kind_dst, REG_RAX_IDX, reg_no_dst);
+}
+
+/**
+ * Encode insn rmw RMW_type r0, r1, r2, r3
+ * @param bin_op the operation, can be add/sub/xchg/and/or/xor
+ * @param kind the data kind, can only be I32 or I64
+ * @param bytes_dst the byte number of dst data
+ */
+#define AT_RMW_R_R_R_R(bin_op, kind, type, bytes_dst) \
+ do { \
+ type data_src = 0; \
+ int32 reg_no_dst = 0, reg_no_src = 0, reg_no_base = 0, \
+ reg_no_offset = 0; \
+ int32 offset = 0; \
+ bool _ret = false; \
+ if (jit_reg_is_const(r3)) { \
+ CHECK_KIND(r3, JIT_REG_KIND_I32); \
+ } \
+ else { \
+ CHECK_KIND(r3, JIT_REG_KIND_I64); \
+ } \
+ /* r0: read/return value r2: memory base addr can't be const */ \
+ /* already check it's not const in LOAD_4ARGS(); */ \
+ reg_no_dst = jit_reg_no(r0); \
+ CHECK_REG_NO(reg_no_dst, jit_reg_kind(r0)); \
+ /* mem_data base address has to be non-const */ \
+ CHECK_NCONST(r2); \
+ reg_no_base = jit_reg_no(r2); \
+ CHECK_REG_NO(reg_no_base, jit_reg_kind(r2)); \
+ /* r1: source operand value r3: offset can be const */ \
+ if (jit_reg_is_const(r1)) \
+ data_src = jit_cc_get_const_##kind(cc, r1); \
+ else { \
+ reg_no_src = jit_reg_no(r1); \
+ CHECK_REG_NO(reg_no_src, jit_reg_kind(r1)); \
+ } \
+ if (jit_reg_is_const(r3)) \
+ offset = jit_cc_get_const_I32(cc, r3); \
+ else { \
+ reg_no_offset = jit_reg_no(r3); \
+ CHECK_REG_NO(reg_no_offset, jit_reg_kind(r3)); \
+ } \
+ \
+ if (jit_reg_is_const(r1)) { \
+ if (jit_reg_is_const(r3)) \
+ _ret = at_rmw_##bin_op##_imm_base_r_offset_imm( \
+ a, bytes_dst, JIT_REG_KIND_##kind, reg_no_dst, &data_src, \
+ reg_no_base, offset); \
+ else \
+ _ret = at_rmw_##bin_op##_imm_base_r_offset_r( \
+ a, bytes_dst, JIT_REG_KIND_##kind, reg_no_dst, &data_src, \
+ reg_no_base, reg_no_offset); \
+ } \
+ else { \
+ if (jit_reg_is_const(r3)) \
+ _ret = at_rmw_##bin_op##_r_base_r_offset_imm( \
+ a, bytes_dst, JIT_REG_KIND_##kind, reg_no_dst, reg_no_src, \
+ reg_no_base, offset); \
+ else \
+ _ret = at_rmw_##bin_op##_r_base_r_offset_r( \
+ a, bytes_dst, JIT_REG_KIND_##kind, reg_no_dst, reg_no_src, \
+ reg_no_base, reg_no_offset); \
+ } \
+ if (!_ret) \
+ GOTO_FAIL; \
+ } while (0)
+
+/**
+ * Encode insn mfence
+ **/
+static void
+fence(x86::Assembler &a)
+{
+ a.mfence();
+}
+
+/**
+ * Encode insn fence
+ */
+#define FENCE() fence(a)
+
+#endif
+
+bool
+jit_codegen_gen_native(JitCompContext *cc)
+{
+ bool atomic;
+ JitBasicBlock *block;
+ JitInsn *insn;
+ JitReg r0, r1, r2, r3, r4;
+ JmpInfo jmp_info_head;
+ bh_list *jmp_info_list = (bh_list *)&jmp_info_head;
+ uint32 label_index, label_num, i;
+ uint32 *label_offsets = NULL, code_size;
+#if CODEGEN_DUMP != 0
+ uint32 code_offset = 0;
+#endif
+ bool return_value = false, is_last_insn;
+ void **jitted_addr;
+ char *code_buf, *stream;
+
+ JitErrorHandler err_handler;
+ Environment env(Arch::kX64);
+ CodeHolder code;
+ code.init(env);
+ code.setErrorHandler(&err_handler);
+ x86::Assembler a(&code);
+
+ if (BH_LIST_SUCCESS != bh_list_init(jmp_info_list)) {
+ jit_set_last_error(cc, "init jmp info list failed");
+ return false;
+ }
+
+ label_num = jit_cc_label_num(cc);
+
+ if (!(label_offsets =
+ (uint32 *)jit_calloc(((uint32)sizeof(uint32)) * label_num))) {
+ jit_set_last_error(cc, "allocate memory failed");
+ goto fail;
+ }
+
+ for (i = 0; i < label_num; i++) {
+ if (i == 0)
+ label_index = 0;
+ else if (i == label_num - 1)
+ label_index = 1;
+ else
+ label_index = i + 1;
+
+ label_offsets[label_index] = code.sectionById(0)->buffer().size();
+
+ block = *jit_annl_basic_block(
+ cc, jit_reg_new(JIT_REG_KIND_L32, label_index));
+
+#if CODEGEN_DUMP != 0
+ os_printf("\nL%d:\n\n", label_index);
+#endif
+
+ JIT_FOREACH_INSN(block, insn)
+ {
+ is_last_insn = (insn->next == block) ? true : false;
+
+#if CODEGEN_DUMP != 0
+ os_printf("\n");
+ jit_dump_insn(cc, insn);
+#endif
+ switch (insn->opcode) {
+ case JIT_OP_MOV:
+ LOAD_2ARGS();
+ if (!lower_mov(cc, a, r0, r1))
+ GOTO_FAIL;
+ break;
+
+ case JIT_OP_I8TOI32:
+ LOAD_2ARGS();
+ CONVERT_R_R(I32, I32, i32, i8, int8);
+ break;
+
+ case JIT_OP_I8TOI64:
+ LOAD_2ARGS();
+ CONVERT_R_R(I64, I32, i64, i8, int8);
+ break;
+
+ case JIT_OP_I16TOI32:
+ LOAD_2ARGS();
+ CONVERT_R_R(I32, I32, i32, i16, int16);
+ break;
+
+ case JIT_OP_I16TOI64:
+ LOAD_2ARGS();
+ CONVERT_R_R(I64, I32, i64, i16, int16);
+ break;
+
+ case JIT_OP_I32TOI8:
+ LOAD_2ARGS();
+ CONVERT_R_R(I32, I32, i8, i32, int32);
+ break;
+
+ case JIT_OP_I32TOU8:
+ LOAD_2ARGS();
+ CONVERT_R_R(I32, I32, u8, i32, int32);
+ break;
+
+ case JIT_OP_I32TOI16:
+ LOAD_2ARGS();
+ CONVERT_R_R(I32, I32, i16, i32, int32);
+ break;
+
+ case JIT_OP_I32TOU16:
+ LOAD_2ARGS();
+ CONVERT_R_R(I32, I32, u16, i32, int32);
+ break;
+
+ case JIT_OP_I32TOI64:
+ LOAD_2ARGS();
+ CONVERT_R_R(I64, I32, i64, i32, int32);
+ break;
+
+ case JIT_OP_U32TOI64:
+ LOAD_2ARGS();
+ CONVERT_R_R(I64, I32, i64, u32, int32);
+ break;
+
+ case JIT_OP_I32TOF32:
+ LOAD_2ARGS();
+ CONVERT_R_R(F32, I32, f32, i32, int32);
+ break;
+
+ case JIT_OP_U32TOF32:
+ LOAD_2ARGS();
+ CONVERT_R_R(F32, I32, f32, u32, uint32);
+ break;
+
+ case JIT_OP_I32TOF64:
+ LOAD_2ARGS();
+ CONVERT_R_R(F64, I32, f64, i32, int32);
+ break;
+
+ case JIT_OP_U32TOF64:
+ LOAD_2ARGS();
+ CONVERT_R_R(F64, I32, f64, u32, uint32);
+ break;
+
+ case JIT_OP_I64TOI8:
+ LOAD_2ARGS();
+ CONVERT_R_R(I32, I64, i8, i64, int64);
+ break;
+
+ case JIT_OP_I64TOI16:
+ LOAD_2ARGS();
+ CONVERT_R_R(I32, I64, i16, i64, int64);
+ break;
+
+ case JIT_OP_I64TOI32:
+ LOAD_2ARGS();
+ CONVERT_R_R(I32, I64, i32, i64, int64);
+ break;
+
+ case JIT_OP_I64TOF32:
+ LOAD_2ARGS();
+ CONVERT_R_R(F32, I64, f32, i64, int64);
+ break;
+
+ case JIT_OP_I64TOF64:
+ LOAD_2ARGS();
+ CONVERT_R_R(F64, I64, f64, i64, int64);
+ break;
+
+ case JIT_OP_F32TOI32:
+ LOAD_2ARGS();
+ CONVERT_R_R(I32, F32, i32, f32, float32);
+ break;
+
+ case JIT_OP_F32TOI64:
+ LOAD_2ARGS();
+ CONVERT_R_R(I64, F32, i64, f32, float32);
+ break;
+
+ case JIT_OP_F32TOF64:
+ LOAD_2ARGS();
+ CONVERT_R_R(F64, F32, f64, f32, float32);
+ break;
+
+ case JIT_OP_F32TOU32:
+ LOAD_2ARGS();
+ CONVERT_R_R(I32, F32, u32, f32, float32);
+ break;
+
+ case JIT_OP_F64TOI32:
+ LOAD_2ARGS();
+ CONVERT_R_R(I32, F64, i32, f64, float64);
+ break;
+
+ case JIT_OP_F64TOI64:
+ LOAD_2ARGS();
+ CONVERT_R_R(I64, F64, i64, f64, float64);
+ break;
+
+ case JIT_OP_F64TOF32:
+ LOAD_2ARGS();
+ CONVERT_R_R(F32, F64, f32, f64, float64);
+ break;
+
+ case JIT_OP_F64TOU32:
+ LOAD_2ARGS();
+ CONVERT_R_R(I32, F64, u32, f64, float64);
+ break;
+
+ case JIT_OP_NEG:
+ LOAD_2ARGS();
+ if (!lower_neg(cc, a, r0, r1))
+ GOTO_FAIL;
+ break;
+
+ case JIT_OP_ADD:
+ case JIT_OP_SUB:
+ case JIT_OP_MUL:
+ case JIT_OP_DIV_S:
+ case JIT_OP_REM_S:
+ case JIT_OP_DIV_U:
+ case JIT_OP_REM_U:
+ LOAD_3ARGS();
+ if (!lower_alu(cc, a,
+ (ALU_OP)(ADD + (insn->opcode - JIT_OP_ADD)),
+ r0, r1, r2))
+ GOTO_FAIL;
+ break;
+
+ case JIT_OP_SHL:
+ case JIT_OP_SHRS:
+ case JIT_OP_SHRU:
+ case JIT_OP_ROTL:
+ case JIT_OP_ROTR:
+ LOAD_3ARGS();
+ if (!lower_shift(
+ cc, a,
+ (SHIFT_OP)(SHL + (insn->opcode - JIT_OP_SHL)), r0,
+ r1, r2))
+ GOTO_FAIL;
+ break;
+
+ case JIT_OP_OR:
+ case JIT_OP_XOR:
+ case JIT_OP_AND:
+ LOAD_3ARGS();
+ if (!lower_bit(cc, a,
+ (BIT_OP)(OR + (insn->opcode - JIT_OP_OR)),
+ r0, r1, r2))
+ GOTO_FAIL;
+ break;
+
+ case JIT_OP_CLZ:
+ case JIT_OP_CTZ:
+ case JIT_OP_POPCNT:
+ LOAD_2ARGS();
+ if (!lower_bitcount(
+ cc, a,
+ (BITCOUNT_OP)(CLZ + (insn->opcode - JIT_OP_CLZ)),
+ r0, r1))
+ GOTO_FAIL;
+ break;
+
+ case JIT_OP_CMP:
+ LOAD_3ARGS();
+ if (!lower_cmp(cc, a, r0, r1, r2))
+ GOTO_FAIL;
+ break;
+
+ case JIT_OP_SELECTEQ:
+ case JIT_OP_SELECTNE:
+ case JIT_OP_SELECTGTS:
+ case JIT_OP_SELECTGES:
+ case JIT_OP_SELECTLTS:
+ case JIT_OP_SELECTLES:
+ case JIT_OP_SELECTGTU:
+ case JIT_OP_SELECTGEU:
+ case JIT_OP_SELECTLTU:
+ case JIT_OP_SELECTLEU:
+ LOAD_4ARGS();
+ if (!lower_select(
+ cc, a,
+ (COND_OP)(EQ + (insn->opcode - JIT_OP_SELECTEQ)),
+ r0, r1, r2, r3))
+ GOTO_FAIL;
+ break;
+
+ case JIT_OP_LDEXECENV:
+ LOAD_1ARG();
+ CHECK_KIND(r0, JIT_REG_KIND_I32);
+ /* TODO */
+ break;
+
+ case JIT_OP_LDJITINFO:
+ LOAD_1ARG();
+ CHECK_KIND(r0, JIT_REG_KIND_I32);
+ /* TODO */
+ break;
+
+ case JIT_OP_LDI8:
+ LOAD_3ARGS();
+ bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32
+ || jit_reg_kind(r0) == JIT_REG_KIND_I64);
+ if (jit_reg_kind(r0) == JIT_REG_KIND_I32)
+ LD_R_R_R(I32, 1, true);
+ else
+ LD_R_R_R(I64, 1, true);
+ break;
+
+ case JIT_OP_LDU8:
+ LOAD_3ARGS();
+ bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32
+ || jit_reg_kind(r0) == JIT_REG_KIND_I64);
+ if (jit_reg_kind(r0) == JIT_REG_KIND_I32)
+ LD_R_R_R(I32, 1, false);
+ else
+ LD_R_R_R(I64, 1, false);
+ break;
+
+ case JIT_OP_LDI16:
+ LOAD_3ARGS();
+ bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32
+ || jit_reg_kind(r0) == JIT_REG_KIND_I64);
+ if (jit_reg_kind(r0) == JIT_REG_KIND_I32)
+ LD_R_R_R(I32, 2, true);
+ else
+ LD_R_R_R(I64, 2, true);
+ break;
+
+ case JIT_OP_LDU16:
+ LOAD_3ARGS();
+ bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32
+ || jit_reg_kind(r0) == JIT_REG_KIND_I64);
+ if (jit_reg_kind(r0) == JIT_REG_KIND_I32)
+ LD_R_R_R(I32, 2, false);
+ else
+ LD_R_R_R(I64, 2, false);
+ break;
+
+ case JIT_OP_LDI32:
+ LOAD_3ARGS();
+ bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32
+ || jit_reg_kind(r0) == JIT_REG_KIND_I64);
+ if (jit_reg_kind(r0) == JIT_REG_KIND_I32)
+ LD_R_R_R(I32, 4, true);
+ else
+ LD_R_R_R(I64, 4, true);
+ break;
+
+ case JIT_OP_LDU32:
+ LOAD_3ARGS();
+ bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32
+ || jit_reg_kind(r0) == JIT_REG_KIND_I64);
+ if (jit_reg_kind(r0) == JIT_REG_KIND_I32)
+ LD_R_R_R(I32, 4, false);
+ else
+ LD_R_R_R(I64, 4, false);
+ break;
+
+ case JIT_OP_LDI64:
+ case JIT_OP_LDU64:
+ case JIT_OP_LDPTR:
+ LOAD_3ARGS();
+ LD_R_R_R(I64, 8, false);
+ break;
+
+ case JIT_OP_LDF32:
+ LOAD_3ARGS();
+ LD_R_R_R(F32, 4, false);
+ break;
+
+ case JIT_OP_LDF64:
+ LOAD_3ARGS();
+ LD_R_R_R(F64, 8, false);
+ break;
+
+ case JIT_OP_STI8:
+ LOAD_3ARGS_NO_ASSIGN();
+ atomic = insn->flags_u8 & 0x1;
+ ST_R_R_R(I32, int32, 1, atomic);
+ break;
+
+ case JIT_OP_STI16:
+ LOAD_3ARGS_NO_ASSIGN();
+ atomic = insn->flags_u8 & 0x1;
+ ST_R_R_R(I32, int32, 2, atomic);
+ break;
+
+ case JIT_OP_STI32:
+ LOAD_3ARGS_NO_ASSIGN();
+ atomic = insn->flags_u8 & 0x1;
+ ST_R_R_R(I32, int32, 4, atomic);
+ break;
+
+ case JIT_OP_STI64:
+ LOAD_3ARGS_NO_ASSIGN();
+ atomic = insn->flags_u8 & 0x1;
+ ST_R_R_R(I64, int64, 8, atomic);
+ break;
+
+ case JIT_OP_STPTR:
+ LOAD_3ARGS_NO_ASSIGN();
+ ST_R_R_R(I64, int64, 8, false);
+ break;
+
+ case JIT_OP_STF32:
+ LOAD_3ARGS_NO_ASSIGN();
+ ST_R_R_R(F32, float32, 4, false);
+ break;
+
+ case JIT_OP_STF64:
+ LOAD_3ARGS_NO_ASSIGN();
+ ST_R_R_R(F64, float64, 8, false);
+ break;
+
+ case JIT_OP_JMP:
+ LOAD_1ARG();
+ CHECK_KIND(r0, JIT_REG_KIND_L32);
+ if (!(is_last_insn
+ && label_is_neighboring(cc, label_index,
+ jit_reg_no(r0))))
+ JMP_TO_LABEL(jit_reg_no(r0), label_index);
+ break;
+
+ case JIT_OP_BEQ:
+ case JIT_OP_BNE:
+ case JIT_OP_BGTS:
+ case JIT_OP_BGES:
+ case JIT_OP_BLTS:
+ case JIT_OP_BLES:
+ case JIT_OP_BGTU:
+ case JIT_OP_BGEU:
+ case JIT_OP_BLTU:
+ case JIT_OP_BLEU:
+ LOAD_3ARGS();
+ if (!lower_branch(
+ cc, a, jmp_info_list, label_index,
+ (COND_OP)(EQ + (insn->opcode - JIT_OP_BEQ)), r0, r1,
+ r2, is_last_insn))
+ GOTO_FAIL;
+ break;
+
+ case JIT_OP_LOOKUPSWITCH:
+ {
+ JitOpndLookupSwitch *opnd = jit_insn_opndls(insn);
+ if (!lower_lookupswitch(cc, a, jmp_info_list, label_offsets,
+ label_index, opnd, is_last_insn))
+ GOTO_FAIL;
+ break;
+ }
+
+ case JIT_OP_CALLNATIVE:
+ if (!lower_callnative(cc, a, insn))
+ GOTO_FAIL;
+ break;
+
+ case JIT_OP_CALLBC:
+ if (!lower_callbc(cc, a, jmp_info_list, label_index, insn))
+ GOTO_FAIL;
+ break;
+
+ case JIT_OP_RETURNBC:
+ if (!lower_returnbc(cc, a, insn))
+ GOTO_FAIL;
+ break;
+
+ case JIT_OP_RETURN:
+ if (!lower_return(cc, a, insn))
+ GOTO_FAIL;
+ break;
+
+ case JIT_OP_I32CASTF32:
+ LOAD_2ARGS();
+ CAST_R_R(F32, I32, f32, i32, int32);
+ break;
+
+ case JIT_OP_I64CASTF64:
+ LOAD_2ARGS();
+ CAST_R_R(F64, I64, f64, i64, int64);
+ break;
+
+ case JIT_OP_F32CASTI32:
+ LOAD_2ARGS();
+ CAST_R_R(I32, F32, i32, f32, float);
+ break;
+
+ case JIT_OP_F64CASTI64:
+ LOAD_2ARGS();
+ CAST_R_R(I64, F64, i64, f64, double);
+ break;
+
+#if WASM_ENABLE_SHARED_MEMORY != 0
+ case JIT_OP_AT_CMPXCHGU8:
+ LOAD_4ARGS_NO_ASSIGN();
+ if (jit_reg_kind(r0) == JIT_REG_KIND_I32)
+ CMPXCHG_R_R_R_R_R(I32, int32, 1);
+ else
+ CMPXCHG_R_R_R_R_R(I64, int64, 1);
+ break;
+
+ case JIT_OP_AT_CMPXCHGU16:
+ LOAD_4ARGS_NO_ASSIGN();
+ if (jit_reg_kind(r0) == JIT_REG_KIND_I32)
+ CMPXCHG_R_R_R_R_R(I32, int32, 2);
+ else
+ CMPXCHG_R_R_R_R_R(I64, int64, 2);
+ break;
+
+ case JIT_OP_AT_CMPXCHGI32:
+ LOAD_4ARGS_NO_ASSIGN();
+ CMPXCHG_R_R_R_R_R(I32, int32, 4);
+ break;
+
+ case JIT_OP_AT_CMPXCHGU32:
+ LOAD_4ARGS_NO_ASSIGN();
+ CMPXCHG_R_R_R_R_R(I64, int32, 4);
+ break;
+
+ case JIT_OP_AT_CMPXCHGI64:
+ LOAD_4ARGS_NO_ASSIGN();
+ CMPXCHG_R_R_R_R_R(I64, int64, 8);
+ break;
+
+ case JIT_OP_AT_ADDU8:
+ LOAD_4ARGS();
+ bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32
+ || jit_reg_kind(r0) == JIT_REG_KIND_I64);
+ if (jit_reg_kind(r0) == JIT_REG_KIND_I32)
+ AT_RMW_R_R_R_R(add, I32, int32, 1);
+ else
+ AT_RMW_R_R_R_R(add, I64, int64, 1);
+ break;
+
+ case JIT_OP_AT_ADDU16:
+ LOAD_4ARGS();
+ bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32
+ || jit_reg_kind(r0) == JIT_REG_KIND_I64);
+ if (jit_reg_kind(r0) == JIT_REG_KIND_I32)
+ AT_RMW_R_R_R_R(add, I32, int32, 2);
+ else
+ AT_RMW_R_R_R_R(add, I64, int64, 2);
+ break;
+
+ case JIT_OP_AT_ADDI32:
+ LOAD_4ARGS();
+ AT_RMW_R_R_R_R(add, I32, int32, 4);
+ break;
+
+ case JIT_OP_AT_ADDU32:
+ LOAD_4ARGS();
+ AT_RMW_R_R_R_R(add, I64, int64, 4);
+ break;
+
+ case JIT_OP_AT_ADDI64:
+ LOAD_4ARGS();
+ AT_RMW_R_R_R_R(add, I64, int64, 8);
+ break;
+
+ case JIT_OP_AT_SUBU8:
+ LOAD_4ARGS();
+ bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32
+ || jit_reg_kind(r0) == JIT_REG_KIND_I64);
+ if (jit_reg_kind(r0) == JIT_REG_KIND_I32)
+ AT_RMW_R_R_R_R(sub, I32, int32, 1);
+ else
+ AT_RMW_R_R_R_R(sub, I64, int64, 1);
+ break;
+
+ case JIT_OP_AT_SUBU16:
+ LOAD_4ARGS();
+ bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32
+ || jit_reg_kind(r0) == JIT_REG_KIND_I64);
+ if (jit_reg_kind(r0) == JIT_REG_KIND_I32)
+ AT_RMW_R_R_R_R(sub, I32, int32, 2);
+ else
+ AT_RMW_R_R_R_R(sub, I64, int64, 2);
+ break;
+
+ case JIT_OP_AT_SUBI32:
+ LOAD_4ARGS();
+ AT_RMW_R_R_R_R(sub, I32, int32, 4);
+ break;
+
+ case JIT_OP_AT_SUBU32:
+ LOAD_4ARGS();
+ AT_RMW_R_R_R_R(sub, I64, int64, 4);
+ break;
+
+ case JIT_OP_AT_SUBI64:
+ LOAD_4ARGS();
+ AT_RMW_R_R_R_R(sub, I64, int64, 8);
+ break;
+
+ case JIT_OP_AT_XCHGU8:
+ LOAD_4ARGS();
+ bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32
+ || jit_reg_kind(r0) == JIT_REG_KIND_I64);
+ if (jit_reg_kind(r0) == JIT_REG_KIND_I32)
+ AT_RMW_R_R_R_R(xchg, I32, int32, 1);
+ else
+ AT_RMW_R_R_R_R(xchg, I64, int64, 1);
+ break;
+
+ case JIT_OP_AT_XCHGU16:
+ LOAD_4ARGS();
+ bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32
+ || jit_reg_kind(r0) == JIT_REG_KIND_I64);
+ if (jit_reg_kind(r0) == JIT_REG_KIND_I32)
+ AT_RMW_R_R_R_R(xchg, I32, int32, 2);
+ else
+ AT_RMW_R_R_R_R(xchg, I64, int64, 2);
+ break;
+
+ case JIT_OP_AT_XCHGI32:
+ LOAD_4ARGS();
+ AT_RMW_R_R_R_R(xchg, I32, int32, 4);
+ break;
+
+ case JIT_OP_AT_XCHGU32:
+ LOAD_4ARGS();
+ AT_RMW_R_R_R_R(xchg, I64, int64, 4);
+ break;
+
+ case JIT_OP_AT_XCHGI64:
+ LOAD_4ARGS();
+ AT_RMW_R_R_R_R(xchg, I64, int64, 8);
+ break;
+
+ case JIT_OP_AT_ANDU8:
+ LOAD_4ARGS();
+ bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32
+ || jit_reg_kind(r0) == JIT_REG_KIND_I64);
+ if (jit_reg_kind(r0) == JIT_REG_KIND_I32)
+ AT_RMW_R_R_R_R(and, I32, int32, 1);
+ else
+ AT_RMW_R_R_R_R(and, I64, int64, 1);
+ break;
+
+ case JIT_OP_AT_ANDU16:
+ LOAD_4ARGS();
+ bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32
+ || jit_reg_kind(r0) == JIT_REG_KIND_I64);
+ if (jit_reg_kind(r0) == JIT_REG_KIND_I32)
+ AT_RMW_R_R_R_R(and, I32, int32, 2);
+ else
+ AT_RMW_R_R_R_R(and, I64, int64, 2);
+ break;
+
+ case JIT_OP_AT_ANDI32:
+ LOAD_4ARGS();
+ AT_RMW_R_R_R_R(and, I32, int32, 4);
+ break;
+
+ case JIT_OP_AT_ANDU32:
+ LOAD_4ARGS();
+ AT_RMW_R_R_R_R(and, I64, int64, 4);
+ break;
+
+ case JIT_OP_AT_ANDI64:
+ LOAD_4ARGS();
+ AT_RMW_R_R_R_R(and, I64, int64, 8);
+ break;
+
+ case JIT_OP_AT_ORU8:
+ LOAD_4ARGS();
+ bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32
+ || jit_reg_kind(r0) == JIT_REG_KIND_I64);
+ if (jit_reg_kind(r0) == JIT_REG_KIND_I32)
+ AT_RMW_R_R_R_R(or, I32, int32, 1);
+ else
+ AT_RMW_R_R_R_R(or, I64, int64, 1);
+ break;
+
+ case JIT_OP_AT_ORU16:
+ LOAD_4ARGS();
+ bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32
+ || jit_reg_kind(r0) == JIT_REG_KIND_I64);
+ if (jit_reg_kind(r0) == JIT_REG_KIND_I32)
+ AT_RMW_R_R_R_R(or, I32, int32, 2);
+ else
+ AT_RMW_R_R_R_R(or, I64, int64, 2);
+ break;
+
+ case JIT_OP_AT_ORI32:
+ LOAD_4ARGS();
+ AT_RMW_R_R_R_R(or, I32, int32, 4);
+ break;
+
+ case JIT_OP_AT_ORU32:
+ LOAD_4ARGS();
+ AT_RMW_R_R_R_R(or, I64, int64, 4);
+ break;
+
+ case JIT_OP_AT_ORI64:
+ LOAD_4ARGS();
+ AT_RMW_R_R_R_R(or, I64, int64, 8);
+ break;
+
+ case JIT_OP_AT_XORU8:
+ LOAD_4ARGS();
+ bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32
+ || jit_reg_kind(r0) == JIT_REG_KIND_I64);
+ if (jit_reg_kind(r0) == JIT_REG_KIND_I32)
+ AT_RMW_R_R_R_R(xor, I32, int32, 1);
+ else
+ AT_RMW_R_R_R_R(xor, I64, int64, 1);
+ break;
+
+ case JIT_OP_AT_XORU16:
+ LOAD_4ARGS();
+ bh_assert(jit_reg_kind(r0) == JIT_REG_KIND_I32
+ || jit_reg_kind(r0) == JIT_REG_KIND_I64);
+ if (jit_reg_kind(r0) == JIT_REG_KIND_I32)
+ AT_RMW_R_R_R_R(xor, I32, int32, 2);
+ else
+ AT_RMW_R_R_R_R(xor, I64, int64, 2);
+ break;
+
+ case JIT_OP_AT_XORI32:
+ LOAD_4ARGS();
+ AT_RMW_R_R_R_R(xor, I32, int32, 4);
+ break;
+
+ case JIT_OP_AT_XORU32:
+ LOAD_4ARGS();
+ AT_RMW_R_R_R_R(xor, I64, int64, 4);
+ break;
+
+ case JIT_OP_AT_XORI64:
+ LOAD_4ARGS();
+ AT_RMW_R_R_R_R(xor, I64, int64, 8);
+ break;
+
+ case JIT_OP_FENCE:
+ FENCE();
+ break;
+
+#endif
+
+ default:
+ jit_set_last_error_v(cc, "unsupported JIT opcode 0x%2x",
+ insn->opcode);
+ GOTO_FAIL;
+ }
+
+ if (err_handler.err) {
+ jit_set_last_error_v(cc,
+ "failed to generate native code for JIT "
+ "opcode 0x%02x, ErrorCode is %u",
+ insn->opcode, err_handler.err);
+ GOTO_FAIL;
+ }
+
+#if CODEGEN_DUMP != 0
+ dump_native((char *)code.sectionById(0)->buffer().data()
+ + code_offset,
+ code.sectionById(0)->buffer().size() - code_offset);
+ code_offset = code.sectionById(0)->buffer().size();
+#endif
+ }
+ }
+
+ code_buf = (char *)code.sectionById(0)->buffer().data();
+ code_size = code.sectionById(0)->buffer().size();
+ if (!(stream = (char *)jit_code_cache_alloc(code_size))) {
+ jit_set_last_error(cc, "allocate memory failed");
+ goto fail;
+ }
+
+ bh_memcpy_s(stream, code_size, code_buf, code_size);
+ cc->jitted_addr_begin = stream;
+ cc->jitted_addr_end = stream + code_size;
+
+ for (i = 0; i < label_num; i++) {
+ if (i == 0)
+ label_index = 0;
+ else if (i == label_num - 1)
+ label_index = 1;
+ else
+ label_index = i + 1;
+
+ jitted_addr = jit_annl_jitted_addr(
+ cc, jit_reg_new(JIT_REG_KIND_L32, label_index));
+ *jitted_addr = stream + label_offsets[label_index];
+ }
+
+ patch_jmp_info_list(cc, jmp_info_list);
+ return_value = true;
+
+fail:
+
+ jit_free(label_offsets);
+ free_jmp_info_list(jmp_info_list);
+ return return_value;
+}
+
+#if WASM_ENABLE_LAZY_JIT != 0 && WASM_ENABLE_JIT != 0
+
+#define MAX_REG_INTS 6
+#define MAX_REG_FLOATS 8
+
+void *
+jit_codegen_compile_call_to_llvm_jit(const WASMType *func_type)
+{
+ const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info();
+ x86::Gp reg_lp = x86::r10, reg_res = x86::r12;
+ x86::Gp reg_tmp_i64 = x86::r11, reg_tmp_i32 = x86::r11d;
+ /* the index of integer argument registers */
+ uint8 reg_idx_of_int_args[] = { REG_RDI_IDX, REG_RSI_IDX, REG_RDX_IDX,
+ REG_RCX_IDX, REG_R8_IDX, REG_R9_IDX };
+ uint32 n_ints = 0, n_fps = 0, n_stacks = 0, n_pushed;
+ uint32 int_reg_idx = 0, fp_reg_idx = 0, stack_arg_idx = 0;
+ uint32 off_to_lp = 0, off_to_res = 0, code_size, i;
+ uint32 param_count = func_type->param_count;
+ uint32 result_count = func_type->result_count;
+ uint32 ext_result_count;
+ char *code_buf, *stream;
+ Imm imm;
+
+ JitErrorHandler err_handler;
+ Environment env(Arch::kX64);
+ CodeHolder code;
+ code.init(env);
+ code.setErrorHandler(&err_handler);
+ x86::Assembler a(&code);
+
+ /* Load the llvm jit function pointer */
+ {
+ /* r11 = exec_env->module_inst */
+ x86::Mem m1(regs_i64[hreg_info->exec_env_hreg_index],
+ (uint32)offsetof(WASMExecEnv, module_inst));
+ a.mov(reg_tmp_i64, m1);
+ /* r11 = module_inst->func_ptrs */
+ x86::Mem m2(reg_tmp_i64,
+ (uint32)offsetof(WASMModuleInstance, func_ptrs));
+ a.mov(reg_tmp_i64, m2);
+ /* rax = func_ptrs[func_idx] */
+ x86::Mem m3(reg_tmp_i64, x86::rdx, 3, 0);
+ a.mov(x86::rax, m3);
+ }
+
+ n_ints++; /* exec_env */
+
+ for (i = 0; i < param_count; i++) {
+ switch (func_type->types[i]) {
+ case VALUE_TYPE_I32:
+ case VALUE_TYPE_I64:
+#if WASM_ENABLE_REF_TYPES != 0
+ case VALUE_TYPE_FUNCREF:
+ case VALUE_TYPE_EXTERNREF:
+#endif
+ if (n_ints < MAX_REG_INTS)
+ n_ints++;
+ else
+ n_stacks++;
+ break;
+ case VALUE_TYPE_F32:
+ case VALUE_TYPE_F64:
+ if (n_fps < MAX_REG_FLOATS)
+ n_fps++;
+ else
+ n_stacks++;
+ break;
+ }
+ }
+
+ ext_result_count = result_count > 1 ? result_count - 1 : 0;
+
+ if (ext_result_count > 0) {
+ if (n_ints + ext_result_count <= MAX_REG_INTS) {
+ /* extra result pointers can be stored into int registers */
+ n_ints += ext_result_count;
+ }
+ else {
+ /* part or all extra result pointers must be stored into stack */
+ n_stacks += n_ints + ext_result_count - MAX_REG_INTS;
+ n_ints = MAX_REG_INTS;
+ }
+ }
+
+ n_pushed = n_stacks;
+ if (n_stacks & 1) {
+ /* Align stack on 16 bytes */
+ n_pushed++;
+ }
+ if (n_pushed > 0) {
+ imm.setValue(n_pushed * 8);
+ a.sub(x86::rsp, imm);
+ }
+
+ /* r10 = outs_area->lp */
+ {
+ x86::Mem m(regs_i64[hreg_info->exec_env_hreg_index],
+ (uint32)offsetof(WASMExecEnv, wasm_stack.s.top));
+ a.mov(reg_lp, m);
+ a.add(reg_lp, (uint32)offsetof(WASMInterpFrame, lp));
+ }
+
+ /* rdi = exec_env */
+ a.mov(regs_i64[reg_idx_of_int_args[int_reg_idx++]],
+ regs_i64[hreg_info->exec_env_hreg_index]);
+
+ for (i = 0; i < param_count; i++) {
+ x86::Mem m_src(reg_lp, off_to_lp);
+
+ switch (func_type->types[i]) {
+ case VALUE_TYPE_I32:
+#if WASM_ENABLE_REF_TYPES != 0
+ case VALUE_TYPE_FUNCREF:
+ case VALUE_TYPE_EXTERNREF:
+#endif
+ {
+ if (int_reg_idx < MAX_REG_INTS) {
+ a.mov(regs_i32[reg_idx_of_int_args[int_reg_idx]], m_src);
+ int_reg_idx++;
+ }
+ else {
+ a.mov(reg_tmp_i32, m_src);
+ x86::Mem m_dst(x86::rsp, stack_arg_idx * 8);
+ a.mov(m_dst, reg_tmp_i32);
+ stack_arg_idx++;
+ }
+ off_to_lp += 4;
+ break;
+ }
+ case VALUE_TYPE_I64:
+ {
+ if (int_reg_idx < MAX_REG_INTS) {
+ a.mov(regs_i64[reg_idx_of_int_args[int_reg_idx]], m_src);
+ int_reg_idx++;
+ }
+ else {
+ a.mov(reg_tmp_i64, m_src);
+ x86::Mem m_dst(x86::rsp, stack_arg_idx * 8);
+ a.mov(m_dst, reg_tmp_i64);
+ stack_arg_idx++;
+ }
+ off_to_lp += 8;
+ break;
+ }
+ case VALUE_TYPE_F32:
+ {
+ if (fp_reg_idx < MAX_REG_FLOATS) {
+ a.movss(regs_float[fp_reg_idx], m_src);
+ fp_reg_idx++;
+ }
+ else {
+ a.mov(reg_tmp_i32, m_src);
+ x86::Mem m_dst(x86::rsp, stack_arg_idx * 8);
+ a.mov(m_dst, reg_tmp_i32);
+ stack_arg_idx++;
+ }
+ off_to_lp += 4;
+ break;
+ }
+ case VALUE_TYPE_F64:
+ {
+ if (fp_reg_idx < MAX_REG_FLOATS) {
+ a.movsd(regs_float[fp_reg_idx], m_src);
+ fp_reg_idx++;
+ }
+ else {
+ a.mov(reg_tmp_i64, m_src);
+ x86::Mem m_dst(x86::rsp, stack_arg_idx * 8);
+ a.mov(m_dst, reg_tmp_i64);
+ stack_arg_idx++;
+ }
+ off_to_lp += 8;
+ break;
+ }
+ }
+ }
+
+ if (result_count > 0) {
+ switch (func_type->types[param_count]) {
+ case VALUE_TYPE_I32:
+#if WASM_ENABLE_REF_TYPES != 0
+ case VALUE_TYPE_FUNCREF:
+ case VALUE_TYPE_EXTERNREF:
+#endif
+ case VALUE_TYPE_F32:
+ off_to_res = 4;
+ break;
+ case VALUE_TYPE_I64:
+ case VALUE_TYPE_F64:
+ off_to_res = 8;
+ break;
+ }
+
+ /* r12 = cur_frame->sp */
+ x86::Mem m(x86::rbp, (uint32)offsetof(WASMInterpFrame, sp));
+ a.mov(reg_res, m);
+
+ for (i = 0; i < ext_result_count; i++) {
+ x86::Mem m(reg_res, off_to_res);
+
+ if (int_reg_idx < MAX_REG_INTS) {
+ a.lea(regs_i64[reg_idx_of_int_args[int_reg_idx]], m);
+ int_reg_idx++;
+ }
+ else {
+ a.lea(reg_tmp_i64, m);
+ x86::Mem m_dst(x86::rsp, stack_arg_idx * 8);
+ a.mov(m_dst, reg_tmp_i64);
+ stack_arg_idx++;
+ }
+
+ switch (func_type->types[param_count + 1 + i]) {
+ case VALUE_TYPE_I32:
+#if WASM_ENABLE_REF_TYPES != 0
+ case VALUE_TYPE_FUNCREF:
+ case VALUE_TYPE_EXTERNREF:
+#endif
+ case VALUE_TYPE_F32:
+ off_to_res += 4;
+ break;
+ case VALUE_TYPE_I64:
+ case VALUE_TYPE_F64:
+ off_to_res += 8;
+ break;
+ }
+ }
+ }
+
+ bh_assert(int_reg_idx == n_ints);
+ bh_assert(fp_reg_idx == n_fps);
+ bh_assert(stack_arg_idx == n_stacks);
+
+ /* Call the llvm jit function */
+ a.call(x86::rax);
+
+ /* Check if there was exception thrown */
+ {
+ /* r11 = exec_env->module_inst */
+ x86::Mem m1(regs_i64[hreg_info->exec_env_hreg_index],
+ (uint32)offsetof(WASMExecEnv, module_inst));
+ a.mov(reg_tmp_i64, m1);
+ /* module_inst->cur_exception */
+ x86::Mem m2(reg_tmp_i64,
+ (uint32)offsetof(WASMModuleInstance, cur_exception));
+ /* bl = module_inst->cur_exception[0] */
+ a.mov(x86::bl, m2);
+
+ /* cur_exception[0] == 0 ? */
+ Imm imm((uint8)0);
+ a.cmp(x86::bl, imm);
+ /* If yes, jump to `Get function result and return` */
+ imm.setValue(INT32_MAX);
+ a.je(imm);
+
+ char *stream = (char *)a.code()->sectionById(0)->buffer().data()
+ + a.code()->sectionById(0)->buffer().size();
+
+ /* If no, set eax to JIT_INTERP_ACTION_THROWN, and
+ jump to code_block_return_to_interp_from_jitted to
+ return to interpreter */
+ imm.setValue(JIT_INTERP_ACTION_THROWN);
+ a.mov(x86::eax, imm);
+ imm.setValue(code_block_return_to_interp_from_jitted);
+ a.mov(x86::rsi, imm);
+ a.jmp(x86::rsi);
+
+ char *stream_new = (char *)a.code()->sectionById(0)->buffer().data()
+ + a.code()->sectionById(0)->buffer().size();
+
+ *(int32 *)(stream - 4) = (uint32)(stream_new - stream);
+ }
+
+ /* Get function result and return */
+
+ if (result_count > 0 && func_type->types[param_count] != VALUE_TYPE_F32
+ && func_type->types[param_count] != VALUE_TYPE_F64) {
+ a.mov(x86::rdx, x86::rax);
+ }
+
+ if (off_to_res > 0) {
+ imm.setValue(off_to_res);
+ a.add(reg_res, imm);
+ /* cur_frame->sp = r12 */
+ x86::Mem m(x86::rbp, (uint32)offsetof(WASMInterpFrame, sp));
+ a.mov(m, reg_res);
+ }
+
+ if (n_pushed > 0) {
+ imm.setValue(n_pushed * 8);
+ a.add(x86::rsp, imm);
+ }
+
+ /* Return to the caller */
+ {
+ /* eax = action = JIT_INTERP_ACTION_NORMAL */
+ Imm imm(0);
+ a.mov(x86::eax, imm);
+
+ uint32 jitted_return_addr_offset =
+ jit_frontend_get_jitted_return_addr_offset();
+ x86::Mem m(x86::rbp, jitted_return_addr_offset);
+ a.jmp(m);
+ }
+
+ if (err_handler.err)
+ return NULL;
+
+ code_buf = (char *)code.sectionById(0)->buffer().data();
+ code_size = code.sectionById(0)->buffer().size();
+ stream = (char *)jit_code_cache_alloc(code_size);
+ if (!stream)
+ return NULL;
+
+ bh_memcpy_s(stream, code_size, code_buf, code_size);
+
+#if 0
+ dump_native(stream, code_size);
+#endif
+
+ return stream;
+}
+
+static WASMInterpFrame *
+fast_jit_alloc_frame(WASMExecEnv *exec_env, uint32 param_cell_num,
+ uint32 ret_cell_num)
+{
+ WASMModuleInstance *module_inst =
+ (WASMModuleInstance *)exec_env->module_inst;
+ WASMInterpFrame *frame;
+ uint32 size_frame1 = wasm_interp_interp_frame_size(ret_cell_num);
+ uint32 size_frame2 = wasm_interp_interp_frame_size(param_cell_num);
+
+ /**
+ * Check whether we can allocate two frames: the first is an implied
+ * frame to store the function results from jit function to call,
+ * the second is the frame for the jit function
+ */
+ if ((uint8 *)exec_env->wasm_stack.s.top + size_frame1 + size_frame2
+ > exec_env->wasm_stack.s.top_boundary) {
+ wasm_set_exception(module_inst, "wasm operand stack overflow");
+ return NULL;
+ }
+
+ /* Allocate the frame */
+ frame = (WASMInterpFrame *)exec_env->wasm_stack.s.top;
+ exec_env->wasm_stack.s.top += size_frame1;
+
+ frame->function = NULL;
+ frame->ip = NULL;
+ frame->sp = frame->lp;
+ frame->prev_frame = wasm_exec_env_get_cur_frame(exec_env);
+ frame->jitted_return_addr =
+ (uint8 *)code_block_return_to_interp_from_jitted;
+
+ wasm_exec_env_set_cur_frame(exec_env, frame);
+
+ return frame;
+}
+
+void *
+jit_codegen_compile_call_to_fast_jit(const WASMModule *module, uint32 func_idx)
+{
+ uint32 func_idx_non_import = func_idx - module->import_function_count;
+ WASMType *func_type = module->functions[func_idx_non_import]->func_type;
+ /* the index of integer argument registers */
+ uint8 reg_idx_of_int_args[] = { REG_RDI_IDX, REG_RSI_IDX, REG_RDX_IDX,
+ REG_RCX_IDX, REG_R8_IDX, REG_R9_IDX };
+ uint32 int_reg_idx, fp_reg_idx, stack_arg_idx;
+ uint32 switch_info_offset, exec_env_offset, stack_arg_offset;
+ uint32 int_reg_offset, frame_lp_offset;
+ uint32 switch_info_size, code_size, i;
+ uint32 param_count = func_type->param_count;
+ uint32 result_count = func_type->result_count;
+ uint32 ext_result_count = result_count > 1 ? result_count - 1 : 0;
+ uint32 param_cell_num = func_type->param_cell_num;
+ uint32 ret_cell_num =
+ func_type->ret_cell_num > 2 ? func_type->ret_cell_num : 2;
+ char *code_buf, *stream;
+ Imm imm;
+
+ JitErrorHandler err_handler;
+ Environment env(Arch::kX64);
+ CodeHolder code;
+ code.init(env);
+ code.setErrorHandler(&err_handler);
+ x86::Assembler a(&code);
+
+ /**
+ * Push JitInterpSwitchInfo and make stack 16-byte aligned:
+ * the size pushed must be odd multiples of 8, as the stack pointer
+ * %rsp must be aligned to a 16-byte boundary before making a call,
+ * and when a function (including this llvm jit function) gets
+ * control, the %rsp is not 16-byte aligned (call instruction will
+ * push the ret address to stack).
+ */
+ switch_info_size = align_uint((uint32)sizeof(JitInterpSwitchInfo), 16) + 8;
+ imm.setValue((uint64)switch_info_size);
+ a.sub(x86::rsp, imm);
+
+ /* Push all integer argument registers since we will use them as
+ temporarily registers to load/store data */
+ for (i = 0; i < MAX_REG_INTS; i++) {
+ a.push(regs_i64[reg_idx_of_int_args[MAX_REG_INTS - 1 - i]]);
+ }
+
+ /* We don't push float/double register since we don't use them here */
+
+ /**
+ * Layout of the stack now:
+ * stack arguments
+ * ret address of the caller
+ * switch info
+ * int registers: r9, r8, rcx, rdx, rsi
+ * exec_env: rdi
+ */
+
+ /* offset of the first stack argument to the stack pointer,
+ add 8 to skip the ret address of the caller */
+ stack_arg_offset = switch_info_size + 8 * MAX_REG_INTS + 8;
+ /* offset of jit interp switch info to the stack pointer */
+ switch_info_offset = 8 * MAX_REG_INTS;
+ /* offset of the first int register to the stack pointer */
+ int_reg_offset = 8;
+ /* offset of exec_env to the stack pointer */
+ exec_env_offset = 0;
+
+ /* Call fast_jit_alloc_frame to allocate the stack frame to
+ receive the results of the fast jit function to call */
+
+ /* rdi = exec_env, has been already set as exec_env is
+ the first argument of LLVM JIT function */
+ /* rsi = param_cell_num */
+ imm.setValue(param_cell_num);
+ a.mov(x86::rsi, imm);
+ /* rdx = ret_cell_num */
+ imm.setValue(ret_cell_num);
+ a.mov(x86::rdx, imm);
+ /* call fast_jit_alloc_frame */
+ imm.setValue((uint64)(uintptr_t)fast_jit_alloc_frame);
+ a.mov(x86::rax, imm);
+ a.call(x86::rax);
+
+ /* Check the return value, note now rax is the allocated frame */
+ {
+ /* Did fast_jit_alloc_frame return NULL? */
+ Imm imm((uint64)0);
+ a.cmp(x86::rax, imm);
+ /* If no, jump to `Copy arguments to frame lp area` */
+ imm.setValue(INT32_MAX);
+ a.jne(imm);
+
+ char *stream = (char *)a.code()->sectionById(0)->buffer().data()
+ + a.code()->sectionById(0)->buffer().size();
+
+ /* If yes, set eax to 0, return to caller */
+
+ /* Pop all integer arument registers */
+ for (i = 0; i < MAX_REG_INTS; i++) {
+ a.pop(regs_i64[reg_idx_of_int_args[i]]);
+ }
+ /* Pop jit interp switch info */
+ imm.setValue((uint64)switch_info_size);
+ a.add(x86::rsp, imm);
+
+ /* Return to the caller, don't use leave as we didn't
+ `push rbp` and `mov rbp, rsp` */
+ a.ret();
+
+ /* Patch the offset of jne instruction */
+ char *stream_new = (char *)a.code()->sectionById(0)->buffer().data()
+ + a.code()->sectionById(0)->buffer().size();
+ *(int32 *)(stream - 4) = (int32)(stream_new - stream);
+ }
+
+ int_reg_idx = 1; /* skip exec_env */
+ fp_reg_idx = 0;
+ stack_arg_idx = 0;
+
+ /* Offset of the dest arguments to outs area */
+ frame_lp_offset = wasm_interp_interp_frame_size(ret_cell_num)
+ + (uint32)offsetof(WASMInterpFrame, lp);
+
+ /* Copy arguments to frame lp area */
+ for (i = 0; i < func_type->param_count; i++) {
+ x86::Mem m_dst(x86::rax, frame_lp_offset);
+ switch (func_type->types[i]) {
+ case VALUE_TYPE_I32:
+#if WASM_ENABLE_REF_TYPES != 0
+ case VALUE_TYPE_FUNCREF:
+ case VALUE_TYPE_EXTERNREF:
+#endif
+ if (int_reg_idx < MAX_REG_INTS) {
+ /* Copy i32 argument from int register */
+ x86::Mem m_src(x86::rsp, int_reg_offset);
+ a.mov(x86::esi, m_src);
+ a.mov(m_dst, x86::esi);
+ int_reg_offset += 8;
+ int_reg_idx++;
+ }
+ else {
+ /* Copy i32 argument from stack */
+ x86::Mem m_src(x86::rsp, stack_arg_offset);
+ a.mov(x86::esi, m_src);
+ a.mov(m_dst, x86::esi);
+ stack_arg_offset += 8;
+ stack_arg_idx++;
+ }
+ frame_lp_offset += 4;
+ break;
+ case VALUE_TYPE_I64:
+ if (int_reg_idx < MAX_REG_INTS) {
+ /* Copy i64 argument from int register */
+ x86::Mem m_src(x86::rsp, int_reg_offset);
+ a.mov(x86::rsi, m_src);
+ a.mov(m_dst, x86::rsi);
+ int_reg_offset += 8;
+ int_reg_idx++;
+ }
+ else {
+ /* Copy i64 argument from stack */
+ x86::Mem m_src(x86::rsp, stack_arg_offset);
+ a.mov(x86::rsi, m_src);
+ a.mov(m_dst, x86::rsi);
+ stack_arg_offset += 8;
+ stack_arg_idx++;
+ }
+ frame_lp_offset += 8;
+ break;
+ case VALUE_TYPE_F32:
+ if (fp_reg_idx < MAX_REG_FLOATS) {
+ /* Copy f32 argument from fp register */
+ a.movss(m_dst, regs_float[fp_reg_idx++]);
+ }
+ else {
+ /* Copy f32 argument from stack */
+ x86::Mem m_src(x86::rsp, stack_arg_offset);
+ a.mov(x86::esi, m_src);
+ a.mov(m_dst, x86::esi);
+ stack_arg_offset += 8;
+ stack_arg_idx++;
+ }
+ frame_lp_offset += 4;
+ break;
+ case VALUE_TYPE_F64:
+ if (fp_reg_idx < MAX_REG_FLOATS) {
+ /* Copy f64 argument from fp register */
+ a.movsd(m_dst, regs_float[fp_reg_idx++]);
+ }
+ else {
+ /* Copy f64 argument from stack */
+ x86::Mem m_src(x86::rsp, stack_arg_offset);
+ a.mov(x86::rsi, m_src);
+ a.mov(m_dst, x86::rsi);
+ stack_arg_offset += 8;
+ stack_arg_idx++;
+ }
+ frame_lp_offset += 8;
+ break;
+ default:
+ bh_assert(0);
+ }
+ }
+
+ /* Call the fast jit function */
+ {
+ /* info = rsp + switch_info_offset */
+ a.lea(x86::rsi, x86::ptr(x86::rsp, switch_info_offset));
+ /* info.frame = frame = rax, or return of fast_jit_alloc_frame */
+ x86::Mem m1(x86::rsi, (uint32)offsetof(JitInterpSwitchInfo, frame));
+ a.mov(m1, x86::rax);
+
+ /* Call code_block_switch_to_jitted_from_interp
+ with argument (exec_env, info, func_idx, pc) */
+ /* rdi = exec_env */
+ a.mov(x86::rdi, x86::ptr(x86::rsp, exec_env_offset));
+ /* rsi = info, has been set */
+ /* rdx = func_idx */
+ imm.setValue(func_idx);
+ a.mov(x86::rdx, imm);
+ /* module_inst = exec_env->module_inst */
+ a.mov(x86::rcx,
+ x86::ptr(x86::rdi, (uint32)offsetof(WASMExecEnv, module_inst)));
+ /* fast_jit_func_ptrs = module_inst->fast_jit_func_ptrs */
+ a.mov(x86::rcx,
+ x86::ptr(x86::rcx, (uint32)offsetof(WASMModuleInstance,
+ fast_jit_func_ptrs)));
+ imm.setValue(func_idx_non_import);
+ a.mov(x86::rax, imm);
+ x86::Mem m3(x86::rcx, x86::rax, 3, 0);
+ /* rcx = module_inst->fast_jit_func_ptrs[func_idx_non_import] */
+ a.mov(x86::rcx, m3);
+
+ imm.setValue(
+ (uint64)(uintptr_t)code_block_switch_to_jitted_from_interp);
+ a.mov(x86::rax, imm);
+ a.call(x86::rax);
+ }
+
+ /* No need to check exception thrown here as it will be checked
+ in the caller */
+
+ /* Copy function results */
+ if (result_count > 0) {
+ frame_lp_offset = offsetof(WASMInterpFrame, lp);
+
+ switch (func_type->types[param_count]) {
+ case VALUE_TYPE_I32:
+#if WASM_ENABLE_REF_TYPES != 0
+ case VALUE_TYPE_FUNCREF:
+ case VALUE_TYPE_EXTERNREF:
+#endif
+ a.mov(x86::eax, x86::edx);
+ frame_lp_offset += 4;
+ break;
+ case VALUE_TYPE_I64:
+ a.mov(x86::rax, x86::rdx);
+ frame_lp_offset += 8;
+ break;
+ case VALUE_TYPE_F32:
+ /* The first result has been put to xmm0 */
+ frame_lp_offset += 4;
+ break;
+ case VALUE_TYPE_F64:
+ /* The first result has been put to xmm0 */
+ frame_lp_offset += 8;
+ break;
+ default:
+ bh_assert(0);
+ }
+
+ /* Copy extra results from exec_env->cur_frame */
+ if (ext_result_count > 0) {
+ /* rdi = exec_env */
+ a.mov(x86::rdi, x86::ptr(x86::rsp, exec_env_offset));
+ /* rsi = exec_env->cur_frame */
+ a.mov(x86::rsi,
+ x86::ptr(x86::rdi, (uint32)offsetof(WASMExecEnv, cur_frame)));
+
+ for (i = 0; i < ext_result_count; i++) {
+ switch (func_type->types[param_count + 1 + i]) {
+ case VALUE_TYPE_I32:
+#if WASM_ENABLE_REF_TYPES != 0
+ case VALUE_TYPE_FUNCREF:
+ case VALUE_TYPE_EXTERNREF:
+#endif
+ case VALUE_TYPE_F32:
+ {
+ /* Copy 32-bit result */
+ a.mov(x86::ecx, x86::ptr(x86::rsi, frame_lp_offset));
+ if (int_reg_idx < MAX_REG_INTS) {
+ x86::Mem m1(x86::rsp,
+ exec_env_offset + int_reg_idx * 8);
+ a.mov(x86::rdx, m1);
+ x86::Mem m2(x86::rdx, 0);
+ a.mov(m2, x86::ecx);
+ int_reg_idx++;
+ }
+ else {
+ x86::Mem m1(x86::rsp, stack_arg_offset);
+ a.mov(x86::rdx, m1);
+ x86::Mem m2(x86::rdx, 0);
+ a.mov(m2, x86::ecx);
+ stack_arg_offset += 8;
+ stack_arg_idx++;
+ }
+ frame_lp_offset += 4;
+ break;
+ }
+ case VALUE_TYPE_I64:
+ case VALUE_TYPE_F64:
+ {
+ /* Copy 64-bit result */
+ a.mov(x86::rcx, x86::ptr(x86::rsi, frame_lp_offset));
+ if (int_reg_idx < MAX_REG_INTS) {
+ x86::Mem m1(x86::rsp,
+ exec_env_offset + int_reg_idx * 8);
+ a.mov(x86::rdx, m1);
+ x86::Mem m2(x86::rdx, 0);
+ a.mov(m2, x86::rcx);
+ int_reg_idx++;
+ }
+ else {
+ x86::Mem m1(x86::rsp, stack_arg_offset);
+ a.mov(x86::rdx, m1);
+ x86::Mem m2(x86::rdx, 0);
+ a.mov(m2, x86::rcx);
+ stack_arg_offset += 8;
+ stack_arg_idx++;
+ }
+ frame_lp_offset += 8;
+ break;
+ }
+ default:
+ bh_assert(0);
+ }
+ }
+ }
+ }
+
+ /* Free the frame allocated */
+
+ /* rdi = exec_env */
+ a.mov(x86::rdi, x86::ptr(x86::rsp, exec_env_offset));
+ /* rsi = exec_env->cur_frame */
+ a.mov(x86::rsi,
+ x86::ptr(x86::rdi, (uint32)offsetof(WASMExecEnv, cur_frame)));
+ /* rdx = exec_env->cur_frame->prev_frame */
+ a.mov(x86::rdx,
+ x86::ptr(x86::rsi, (uint32)offsetof(WASMInterpFrame, prev_frame)));
+ /* exec_env->wasm_stack.s.top = cur_frame */
+ {
+ x86::Mem m(x86::rdi, offsetof(WASMExecEnv, wasm_stack.s.top));
+ a.mov(m, x86::rsi);
+ }
+ /* exec_env->cur_frame = prev_frame */
+ {
+ x86::Mem m(x86::rdi, offsetof(WASMExecEnv, cur_frame));
+ a.mov(m, x86::rdx);
+ }
+
+ /* Pop all integer arument registers */
+ for (i = 0; i < MAX_REG_INTS; i++) {
+ a.pop(regs_i64[reg_idx_of_int_args[i]]);
+ }
+ /* Pop jit interp switch info */
+ imm.setValue((uint64)switch_info_size);
+ a.add(x86::rsp, imm);
+
+ /* Return to the caller, don't use leave as we didn't
+ `push rbp` and `mov rbp, rsp` */
+ a.ret();
+
+ if (err_handler.err) {
+ return NULL;
+ }
+
+ code_buf = (char *)code.sectionById(0)->buffer().data();
+ code_size = code.sectionById(0)->buffer().size();
+ stream = (char *)jit_code_cache_alloc(code_size);
+ if (!stream)
+ return NULL;
+
+ bh_memcpy_s(stream, code_size, code_buf, code_size);
+
+#if 0
+ printf("Code of call to fast jit of func %u:\n", func_idx);
+ dump_native(stream, code_size);
+ printf("\n");
+#endif
+
+ return stream;
+}
+
+#endif /* end of WASM_ENABLE_LAZY_JIT != 0 && WASM_ENABLE_JIT != 0 */
+
+bool
+jit_codegen_lower(JitCompContext *cc)
+{
+ (void)cc;
+ return true;
+}
+
+void
+jit_codegen_free_native(JitCompContext *cc)
+{
+ (void)cc;
+}
+
+void
+jit_codegen_dump_native(void *begin_addr, void *end_addr)
+{
+#if WASM_ENABLE_FAST_JIT_DUMP != 0
+ os_printf("\n");
+ dump_native((char *)begin_addr, (char *)end_addr - (char *)begin_addr);
+ os_printf("\n");
+#else
+ (void)begin_addr;
+ (void)end_addr;
+#endif
+}
+
+bool
+jit_codegen_init()
+{
+ const JitHardRegInfo *hreg_info = jit_codegen_get_hreg_info();
+ JitGlobals *jit_globals = jit_compiler_get_jit_globals();
+ char *code_buf, *stream;
+ uint32 code_size;
+
+ JitErrorHandler err_handler;
+ Environment env(Arch::kX64);
+ CodeHolder code;
+ code.init(env);
+ code.setErrorHandler(&err_handler);
+ x86::Assembler a(&code);
+
+ /* Initialize code_block_switch_to_jitted_from_interp */
+
+ /* push callee-save registers */
+ a.push(x86::rbp);
+ a.push(x86::rbx);
+ a.push(x86::r12);
+ a.push(x86::r13);
+ a.push(x86::r14);
+ a.push(x86::r15);
+ /* push info */
+ a.push(x86::rsi);
+
+ /* Note: the number of register pushed must be odd, as the stack pointer
+ %rsp must be aligned to a 16-byte boundary before making a call, so
+ when a function (including this function) gets control, %rsp is not
+ aligned. We push odd number registers here to make %rsp happy before
+ calling native functions. */
+
+ /* exec_env_reg = exec_env */
+ a.mov(regs_i64[hreg_info->exec_env_hreg_index], x86::rdi);
+ /* fp_reg = info->frame */
+ a.mov(x86::rbp, x86::ptr(x86::rsi, offsetof(JitInterpSwitchInfo, frame)));
+ /* rdx = func_idx, is already set in the func_idx argument of
+ jit_codegen_interp_jitted_glue */
+ /* jmp target, rcx = pc */
+ a.jmp(x86::rcx);
+
+ if (err_handler.err)
+ return false;
+
+ code_buf = (char *)code.sectionById(0)->buffer().data();
+ code_size = code.sectionById(0)->buffer().size();
+ stream = (char *)jit_code_cache_alloc(code_size);
+ if (!stream)
+ return false;
+
+ bh_memcpy_s(stream, code_size, code_buf, code_size);
+ code_block_switch_to_jitted_from_interp = stream;
+
+#if 0
+ dump_native(stream, code_size);
+#endif
+
+ /* Initialize code_block_return_to_interp_from_jitted */
+
+ a.setOffset(0);
+
+ /* pop info */
+ a.pop(x86::rsi);
+ /* info->frame = fp_reg */
+ {
+ x86::Mem m(x86::rsi, offsetof(JitInterpSwitchInfo, frame));
+ a.mov(m, x86::rbp);
+ }
+ /* info->out.ret.ival[0, 1] = rdx */
+ {
+ x86::Mem m(x86::rsi, offsetof(JitInterpSwitchInfo, out.ret.ival));
+ a.mov(m, x86::rdx);
+ }
+ /* info->out.ret.fval[0, 1] = xmm0 */
+ {
+ x86::Mem m(x86::rsi, offsetof(JitInterpSwitchInfo, out.ret.fval));
+ a.movsd(m, x86::xmm0);
+ }
+
+ /* pop callee-save registers */
+ a.pop(x86::r15);
+ a.pop(x86::r14);
+ a.pop(x86::r13);
+ a.pop(x86::r12);
+ a.pop(x86::rbx);
+ a.pop(x86::rbp);
+ a.ret();
+
+ if (err_handler.err)
+ goto fail1;
+
+ code_buf = (char *)code.sectionById(0)->buffer().data();
+ code_size = code.sectionById(0)->buffer().size();
+ stream = (char *)jit_code_cache_alloc(code_size);
+ if (!stream)
+ goto fail1;
+
+ bh_memcpy_s(stream, code_size, code_buf, code_size);
+ code_block_return_to_interp_from_jitted =
+ jit_globals->return_to_interp_from_jitted = stream;
+
+#if 0
+ dump_native(stream, code_size);
+#endif
+
+#if WASM_ENABLE_LAZY_JIT != 0
+ /* Initialize code_block_compile_fast_jit_and_then_call */
+
+ a.setOffset(0);
+
+ /* Use rbx, r12, r13 to save func_dix, module_inst and module,
+ as they are callee-save registers */
+
+ /* Backup func_idx: rbx = rdx = func_idx, note that rdx has
+ been prepared in the caller:
+ callbc or code_block_switch_to_jitted_from_interp */
+ a.mov(x86::rbx, x86::rdx);
+ /* r12 = module_inst = exec_env->module_inst */
+ {
+ x86::Mem m(regs_i64[hreg_info->exec_env_hreg_index],
+ (uint32)offsetof(WASMExecEnv, module_inst));
+ a.mov(x86::r12, m);
+ }
+ /* rdi = r13 = module_inst->module */
+ {
+ x86::Mem m(x86::r12, (uint32)offsetof(WASMModuleInstance, module));
+ a.mov(x86::rdi, m);
+ a.mov(x86::r13, x86::rdi);
+ }
+ /* rsi = rdx = func_idx */
+ a.mov(x86::rsi, x86::rdx);
+ /* Call jit_compiler_compile(module, func_idx) */
+ {
+ Imm imm((uint64)(uintptr_t)jit_compiler_compile);
+ a.mov(x86::rax, imm);
+ a.call(x86::rax);
+ }
+
+ /* Check if failed to compile the jit function */
+ {
+ /* Did jit_compiler_compile return false? */
+ Imm imm((uint8)0);
+ a.cmp(x86::al, imm);
+ /* If no, jump to `Load compiled func ptr and call it` */
+ imm.setValue(INT32_MAX);
+ a.jne(imm);
+
+ char *stream = (char *)a.code()->sectionById(0)->buffer().data()
+ + a.code()->sectionById(0)->buffer().size();
+
+ /* If yes, call jit_set_exception_with_id to throw exception,
+ and then set eax to JIT_INTERP_ACTION_THROWN, and jump to
+ code_block_return_to_interp_from_jitted to return */
+
+ /* rdi = module_inst */
+ a.mov(x86::rdi, x86::r12);
+ /* rsi = EXCE_FAILED_TO_COMPILE_FAST_JIT_FUNC */
+ imm.setValue(EXCE_FAILED_TO_COMPILE_FAST_JIT_FUNC);
+ a.mov(x86::rsi, imm);
+ /* Call jit_set_exception_with_id */
+ imm.setValue((uint64)(uintptr_t)jit_set_exception_with_id);
+ a.mov(x86::rax, imm);
+ a.call(x86::rax);
+ /* Return to the caller */
+ imm.setValue(JIT_INTERP_ACTION_THROWN);
+ a.mov(x86::eax, imm);
+ imm.setValue(code_block_return_to_interp_from_jitted);
+ a.mov(x86::rsi, imm);
+ a.jmp(x86::rsi);
+
+ /* Patch the offset of jne instruction */
+ char *stream_new = (char *)a.code()->sectionById(0)->buffer().data()
+ + a.code()->sectionById(0)->buffer().size();
+ *(int32 *)(stream - 4) = (int32)(stream_new - stream);
+ }
+
+ /* Load compiled func ptr and call it */
+ {
+ /* rsi = module->import_function_count */
+ x86::Mem m1(x86::r13,
+ (uint32)offsetof(WASMModule, import_function_count));
+ a.movzx(x86::rsi, m1);
+ /* rbx = rbx - module->import_function_count */
+ a.sub(x86::rbx, x86::rsi);
+ /* rax = module->fast_jit_func_ptrs */
+ x86::Mem m2(x86::r13, (uint32)offsetof(WASMModule, fast_jit_func_ptrs));
+ a.mov(x86::rax, m2);
+ /* rax = fast_jit_func_ptrs[rbx] */
+ x86::Mem m3(x86::rax, x86::rbx, 3, 0);
+ a.mov(x86::rax, m3);
+ a.jmp(x86::rax);
+ }
+
+ if (err_handler.err)
+ goto fail2;
+
+ code_buf = (char *)code.sectionById(0)->buffer().data();
+ code_size = code.sectionById(0)->buffer().size();
+ stream = (char *)jit_code_cache_alloc(code_size);
+ if (!stream)
+ goto fail2;
+
+ bh_memcpy_s(stream, code_size, code_buf, code_size);
+ code_block_compile_fast_jit_and_then_call =
+ jit_globals->compile_fast_jit_and_then_call = stream;
+
+#if 0
+ dump_native(stream, code_size);
+#endif
+#endif /* end of WASM_ENABLE_LAZY_JIT != 0 */
+
+ return true;
+
+#if WASM_ENABLE_LAZY_JIT != 0
+fail2:
+ jit_code_cache_free(code_block_return_to_interp_from_jitted);
+#endif
+fail1:
+ jit_code_cache_free(code_block_switch_to_jitted_from_interp);
+ return false;
+}
+
+void
+jit_codegen_destroy()
+{
+#if WASM_ENABLE_LAZY_JIT != 0
+ jit_code_cache_free(code_block_compile_fast_jit_and_then_call);
+#endif
+ jit_code_cache_free(code_block_return_to_interp_from_jitted);
+ jit_code_cache_free(code_block_switch_to_jitted_from_interp);
+}
+
+/* clang-format off */
+static const uint8 hreg_info_I32[3][7] = {
+ /* ebp, eax, ebx, ecx, edx, edi, esi */
+ { 1, 0, 0, 0, 0, 0, 1 }, /* fixed, esi is freely used */
+ { 0, 1, 0, 1, 1, 1, 0 }, /* caller_saved_native */
+ { 0, 1, 1, 1, 1, 1, 0 } /* caller_saved_jitted */
+};
+
+static const uint8 hreg_info_I64[3][16] = {
+ /* rbp, rax, rbx, rcx, rdx, rdi, rsi, rsp,
+ r8, r9, r10, r11, r12, r13, r14, r15 */
+ { 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 1 }, /* fixed, rsi is freely used */
+ { 0, 1, 0, 1, 1, 1, 0, 0,
+ 1, 1, 1, 1, 0, 0, 0, 0 }, /* caller_saved_native */
+ { 0, 1, 1, 1, 1, 1, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 0 }, /* caller_saved_jitted */
+};
+
+/* System V AMD64 ABI Calling Conversion. [XYZ]MM0-7 */
+static uint8 hreg_info_F32[3][16] = {
+ /* xmm0 ~ xmm15 */
+ { 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1 },
+ { 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 0 }, /* caller_saved_native */
+ { 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 0 }, /* caller_saved_jitted */
+};
+
+/* System V AMD64 ABI Calling Conversion. [XYZ]MM0-7 */
+static uint8 hreg_info_F64[3][16] = {
+ /* xmm0 ~ xmm15 */
+ { 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 1 },
+ { 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 0 }, /* caller_saved_native */
+ { 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 0 }, /* caller_saved_jitted */
+};
+
+static const JitHardRegInfo hreg_info = {
+ {
+ { 0, NULL, NULL, NULL }, /* VOID */
+
+ { sizeof(hreg_info_I32[0]), /* I32 */
+ hreg_info_I32[0],
+ hreg_info_I32[1],
+ hreg_info_I32[2] },
+
+ { sizeof(hreg_info_I64[0]), /* I64 */
+ hreg_info_I64[0],
+ hreg_info_I64[1],
+ hreg_info_I64[2] },
+
+ { sizeof(hreg_info_F32[0]), /* F32 */
+ hreg_info_F32[0],
+ hreg_info_F32[1],
+ hreg_info_F32[2] },
+
+ { sizeof(hreg_info_F64[0]), /* F64 */
+ hreg_info_F64[0],
+ hreg_info_F64[1],
+ hreg_info_F64[2] },
+
+ { 0, NULL, NULL, NULL }, /* V8 */
+ { 0, NULL, NULL, NULL }, /* V16 */
+ { 0, NULL, NULL, NULL } /* V32 */
+ },
+ /* frame pointer hreg index: rbp */
+ 0,
+ /* exec_env hreg index: r15 */
+ 15,
+ /* cmp hreg index: esi */
+ 6
+};
+/* clang-format on */
+
+const JitHardRegInfo *
+jit_codegen_get_hreg_info()
+{
+ return &hreg_info;
+}
+
+static const char *reg_names_i32[] = {
+ "ebp", "eax", "ebx", "ecx", "edx", "edi", "esi", "esp",
+};
+
+static const char *reg_names_i64[] = {
+ "rbp", "rax", "rbx", "rcx", "rdx", "rdi", "rsi", "rsp",
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
+};
+
+static const char *reg_names_f32[] = { "xmm0", "xmm1", "xmm2", "xmm3",
+ "xmm4", "xmm5", "xmm6", "xmm7",
+ "xmm8", "xmm9", "xmm10", "xmm11",
+ "xmm12", "xmm13", "xmm14", "xmm15" };
+
+static const char *reg_names_f64[] = {
+ "xmm0_f64", "xmm1_f64", "xmm2_f64", "xmm3_f64", "xmm4_f64", "xmm5_f64",
+ "xmm6_f64", "xmm7_f64", "xmm8_f64", "xmm9_f64", "xmm10_f64", "xmm11_f64",
+ "xmm12_f64", "xmm13_f64", "xmm14_f64", "xmm15_f64"
+};
+
+JitReg
+jit_codegen_get_hreg_by_name(const char *name)
+{
+ size_t i;
+
+ if (name[0] == 'e') {
+ for (i = 0; i < sizeof(reg_names_i32) / sizeof(char *); i++)
+ if (!strcmp(reg_names_i32[i], name))
+ return jit_reg_new(JIT_REG_KIND_I32, i);
+ }
+ else if (name[0] == 'r') {
+ for (i = 0; i < sizeof(reg_names_i64) / sizeof(char *); i++)
+ if (!strcmp(reg_names_i64[i], name))
+ return jit_reg_new(JIT_REG_KIND_I64, i);
+ }
+ else if (!strncmp(name, "xmm", 3)) {
+ if (!strstr(name, "_f64")) {
+ for (i = 0; i < sizeof(reg_names_f32) / sizeof(char *); i++)
+ if (!strcmp(reg_names_f32[i], name))
+ return jit_reg_new(JIT_REG_KIND_F32, i);
+ }
+ else {
+ for (i = 0; i < sizeof(reg_names_f64) / sizeof(char *); i++)
+ if (!strcmp(reg_names_f64[i], name))
+ return jit_reg_new(JIT_REG_KIND_F64, i);
+ }
+ }
+ return 0;
+}