diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-06 03:01:46 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-06 03:01:46 +0000 |
commit | f8fe689a81f906d1b91bb3220acde2a4ecb14c5b (patch) | |
tree | 26484e9d7e2c67806c2d1760196ff01aaa858e8c /src/recompiler/tcg | |
parent | Initial commit. (diff) | |
download | virtualbox-f8fe689a81f906d1b91bb3220acde2a4ecb14c5b.tar.xz virtualbox-f8fe689a81f906d1b91bb3220acde2a4ecb14c5b.zip |
Adding upstream version 6.0.4-dfsg.upstream/6.0.4-dfsgupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | src/recompiler/tcg-runtime.c | 89 | ||||
-rw-r--r-- | src/recompiler/tcg/LICENSE | 3 | ||||
-rw-r--r-- | src/recompiler/tcg/Makefile.kup | 0 | ||||
-rw-r--r-- | src/recompiler/tcg/README | 497 | ||||
-rw-r--r-- | src/recompiler/tcg/TODO | 14 | ||||
-rw-r--r-- | src/recompiler/tcg/i386/Makefile.kup | 0 | ||||
-rw-r--r-- | src/recompiler/tcg/i386/tcg-target.c | 2231 | ||||
-rw-r--r-- | src/recompiler/tcg/i386/tcg-target.h | 134 | ||||
-rw-r--r-- | src/recompiler/tcg/tcg-dyngen.c | 437 | ||||
-rw-r--r-- | src/recompiler/tcg/tcg-op.h | 2469 | ||||
-rw-r--r-- | src/recompiler/tcg/tcg-opc.h | 304 | ||||
-rw-r--r-- | src/recompiler/tcg/tcg-runtime.h | 18 | ||||
-rw-r--r-- | src/recompiler/tcg/tcg.c | 2212 | ||||
-rw-r--r-- | src/recompiler/tcg/tcg.h | 512 |
14 files changed, 8920 insertions, 0 deletions
diff --git a/src/recompiler/tcg-runtime.c b/src/recompiler/tcg-runtime.c new file mode 100644 index 00000000..ab2df5f0 --- /dev/null +++ b/src/recompiler/tcg-runtime.c @@ -0,0 +1,89 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef VBOX +#include <stdint.h> +#else +# include <VBox/types.h> +#endif + +#include "tcg/tcg-runtime.h" + +/* 32-bit helpers */ + +int32_t tcg_helper_div_i32(int32_t arg1, int32_t arg2) +{ + return arg1 / arg2; +} + +int32_t tcg_helper_rem_i32(int32_t arg1, int32_t arg2) +{ + return arg1 % arg2; +} + +uint32_t tcg_helper_divu_i32(uint32_t arg1, uint32_t arg2) +{ + return arg1 / arg2; +} + +uint32_t tcg_helper_remu_i32(uint32_t arg1, uint32_t arg2) +{ + return arg1 % arg2; +} + +/* 64-bit helpers */ + +int64_t tcg_helper_shl_i64(int64_t arg1, int64_t arg2) +{ + return arg1 << arg2; +} + +int64_t tcg_helper_shr_i64(int64_t arg1, int64_t arg2) +{ + return (uint64_t)arg1 >> arg2; +} + +int64_t tcg_helper_sar_i64(int64_t arg1, int64_t arg2) +{ + return arg1 >> arg2; +} + +int64_t tcg_helper_div_i64(int64_t arg1, int64_t arg2) +{ + return arg1 / arg2; +} + +int64_t tcg_helper_rem_i64(int64_t arg1, int64_t arg2) +{ + return arg1 % arg2; +} + +uint64_t tcg_helper_divu_i64(uint64_t arg1, uint64_t arg2) +{ + return arg1 / arg2; +} + +uint64_t tcg_helper_remu_i64(uint64_t arg1, uint64_t arg2) +{ + return arg1 % arg2; +} diff --git a/src/recompiler/tcg/LICENSE b/src/recompiler/tcg/LICENSE new file mode 100644 index 00000000..be817fa1 --- /dev/null +++ b/src/recompiler/tcg/LICENSE @@ -0,0 +1,3 @@ +All the files in this directory and subdirectories are released under +a BSD like license (see header in each file). No other license is +accepted. diff --git a/src/recompiler/tcg/Makefile.kup b/src/recompiler/tcg/Makefile.kup new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/recompiler/tcg/Makefile.kup diff --git a/src/recompiler/tcg/README b/src/recompiler/tcg/README new file mode 100644 index 00000000..8d6fe059 --- /dev/null +++ b/src/recompiler/tcg/README @@ -0,0 +1,497 @@ +Tiny Code Generator - Fabrice Bellard. + +1) Introduction + +TCG (Tiny Code Generator) began as a generic backend for a C +compiler. It was simplified to be used in QEMU. It also has its roots +in the QOP code generator written by Paul Brook. + +2) Definitions + +The TCG "target" is the architecture for which we generate the +code. It is of course not the same as the "target" of QEMU which is +the emulated architecture. As TCG started as a generic C backend used +for cross compiling, it is assumed that the TCG target is different +from the host, although it is never the case for QEMU. + +A TCG "function" corresponds to a QEMU Translated Block (TB). + +A TCG "temporary" is a variable only live in a basic +block. Temporaries are allocated explicitly in each function. + +A TCG "local temporary" is a variable only live in a function. Local +temporaries are allocated explicitly in each function. + +A TCG "global" is a variable which is live in all the functions +(equivalent of a C global variable). They are defined before the +functions defined. A TCG global can be a memory location (e.g. a QEMU +CPU register), a fixed host register (e.g. the QEMU CPU state pointer) +or a memory location which is stored in a register outside QEMU TBs +(not implemented yet). + +A TCG "basic block" corresponds to a list of instructions terminated +by a branch instruction. + +3) Intermediate representation + +3.1) Introduction + +TCG instructions operate on variables which are temporaries, local +temporaries or globals. TCG instructions and variables are strongly +typed. Two types are supported: 32 bit integers and 64 bit +integers. Pointers are defined as an alias to 32 bit or 64 bit +integers depending on the TCG target word size. + +Each instruction has a fixed number of output variable operands, input +variable operands and always constant operands. + +The notable exception is the call instruction which has a variable +number of outputs and inputs. + +In the textual form, output operands usually come first, followed by +input operands, followed by constant operands. The output type is +included in the instruction name. Constants are prefixed with a '$'. + +add_i32 t0, t1, t2 (t0 <- t1 + t2) + +3.2) Assumptions + +* Basic blocks + +- Basic blocks end after branches (e.g. brcond_i32 instruction), + goto_tb and exit_tb instructions. +- Basic blocks start after the end of a previous basic block, or at a + set_label instruction. + +After the end of a basic block, the content of temporaries is +destroyed, but local temporaries and globals are preserved. + +* Floating point types are not supported yet + +* Pointers: depending on the TCG target, pointer size is 32 bit or 64 + bit. The type TCG_TYPE_PTR is an alias to TCG_TYPE_I32 or + TCG_TYPE_I64. + +* Helpers: + +Using the tcg_gen_helper_x_y it is possible to call any function +taking i32, i64 or pointer types. By default, before calling an helper, +all globals are stored at their canonical location and it is assumed +that the function can modify them. This can be overriden by the +TCG_CALL_CONST function modifier. By default, the helper is allowed to +modify the CPU state or raise an exception. This can be overriden by +the TCG_CALL_PURE function modifier, in which case the call to the +function is removed if the return value is not used. + +On some TCG targets (e.g. x86), several calling conventions are +supported. + +* Branches: + +Use the instruction 'br' to jump to a label. Use 'jmp' to jump to an +explicit address. Conditional branches can only jump to labels. + +3.3) Code Optimizations + +When generating instructions, you can count on at least the following +optimizations: + +- Single instructions are simplified, e.g. + + and_i32 t0, t0, $0xffffffff + + is suppressed. + +- A liveness analysis is done at the basic block level. The + information is used to suppress moves from a dead variable to + another one. It is also used to remove instructions which compute + dead results. The later is especially useful for condition code + optimization in QEMU. + + In the following example: + + add_i32 t0, t1, t2 + add_i32 t0, t0, $1 + mov_i32 t0, $1 + + only the last instruction is kept. + +3.4) Instruction Reference + +********* Function call + +* call <ret> <params> ptr + +call function 'ptr' (pointer type) + +<ret> optional 32 bit or 64 bit return value +<params> optional 32 bit or 64 bit parameters + +********* Jumps/Labels + +* jmp t0 + +Absolute jump to address t0 (pointer type). + +* set_label $label + +Define label 'label' at the current program point. + +* br $label + +Jump to label. + +* brcond_i32/i64 cond, t0, t1, label + +Conditional jump if t0 cond t1 is true. cond can be: + TCG_COND_EQ + TCG_COND_NE + TCG_COND_LT /* signed */ + TCG_COND_GE /* signed */ + TCG_COND_LE /* signed */ + TCG_COND_GT /* signed */ + TCG_COND_LTU /* unsigned */ + TCG_COND_GEU /* unsigned */ + TCG_COND_LEU /* unsigned */ + TCG_COND_GTU /* unsigned */ + +********* Arithmetic + +* add_i32/i64 t0, t1, t2 + +t0=t1+t2 + +* sub_i32/i64 t0, t1, t2 + +t0=t1-t2 + +* neg_i32/i64 t0, t1 + +t0=-t1 (two's complement) + +* mul_i32/i64 t0, t1, t2 + +t0=t1*t2 + +* div_i32/i64 t0, t1, t2 + +t0=t1/t2 (signed). Undefined behavior if division by zero or overflow. + +* divu_i32/i64 t0, t1, t2 + +t0=t1/t2 (unsigned). Undefined behavior if division by zero. + +* rem_i32/i64 t0, t1, t2 + +t0=t1%t2 (signed). Undefined behavior if division by zero or overflow. + +* remu_i32/i64 t0, t1, t2 + +t0=t1%t2 (unsigned). Undefined behavior if division by zero. + +********* Logical + +* and_i32/i64 t0, t1, t2 + +t0=t1&t2 + +* or_i32/i64 t0, t1, t2 + +t0=t1|t2 + +* xor_i32/i64 t0, t1, t2 + +t0=t1^t2 + +* not_i32/i64 t0, t1 + +t0=~t1 + +* andc_i32/i64 t0, t1, t2 + +t0=t1&~t2 + +* eqv_i32/i64 t0, t1, t2 + +t0=~(t1^t2), or equivalently, t0=t1^~t2 + +* nand_i32/i64 t0, t1, t2 + +t0=~(t1&t2) + +* nor_i32/i64 t0, t1, t2 + +t0=~(t1|t2) + +* orc_i32/i64 t0, t1, t2 + +t0=t1|~t2 + +********* Shifts/Rotates + +* shl_i32/i64 t0, t1, t2 + +t0=t1 << t2. Undefined behavior if t2 < 0 or t2 >= 32 (resp 64) + +* shr_i32/i64 t0, t1, t2 + +t0=t1 >> t2 (unsigned). Undefined behavior if t2 < 0 or t2 >= 32 (resp 64) + +* sar_i32/i64 t0, t1, t2 + +t0=t1 >> t2 (signed). Undefined behavior if t2 < 0 or t2 >= 32 (resp 64) + +* rotl_i32/i64 t0, t1, t2 + +Rotation of t2 bits to the left. Undefined behavior if t2 < 0 or t2 >= 32 (resp 64) + +* rotr_i32/i64 t0, t1, t2 + +Rotation of t2 bits to the right. Undefined behavior if t2 < 0 or t2 >= 32 (resp 64) + +********* Misc + +* mov_i32/i64 t0, t1 + +t0 = t1 + +Move t1 to t0 (both operands must have the same type). + +* ext8s_i32/i64 t0, t1 +ext8u_i32/i64 t0, t1 +ext16s_i32/i64 t0, t1 +ext16u_i32/i64 t0, t1 +ext32s_i64 t0, t1 +ext32u_i64 t0, t1 + +8, 16 or 32 bit sign/zero extension (both operands must have the same type) + +* bswap16_i32/i64 t0, t1 + +16 bit byte swap on a 32/64 bit value. It assumes that the two/six high order +bytes are set to zero. + +* bswap32_i32/i64 t0, t1 + +32 bit byte swap on a 32/64 bit value. With a 64 bit value, it assumes that +the four high order bytes are set to zero. + +* bswap64_i64 t0, t1 + +64 bit byte swap + +* discard_i32/i64 t0 + +Indicate that the value of t0 won't be used later. It is useful to +force dead code elimination. + +********* Conditional moves + +* setcond_i32/i64 cond, dest, t1, t2 + +dest = (t1 cond t2) + +Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0. + +********* Type conversions + +* ext_i32_i64 t0, t1 +Convert t1 (32 bit) to t0 (64 bit) and does sign extension + +* extu_i32_i64 t0, t1 +Convert t1 (32 bit) to t0 (64 bit) and does zero extension + +* trunc_i64_i32 t0, t1 +Truncate t1 (64 bit) to t0 (32 bit) + +* concat_i32_i64 t0, t1, t2 +Construct t0 (64-bit) taking the low half from t1 (32 bit) and the high half +from t2 (32 bit). + +* concat32_i64 t0, t1, t2 +Construct t0 (64-bit) taking the low half from t1 (64 bit) and the high half +from t2 (64 bit). + +********* Load/Store + +* ld_i32/i64 t0, t1, offset +ld8s_i32/i64 t0, t1, offset +ld8u_i32/i64 t0, t1, offset +ld16s_i32/i64 t0, t1, offset +ld16u_i32/i64 t0, t1, offset +ld32s_i64 t0, t1, offset +ld32u_i64 t0, t1, offset + +t0 = read(t1 + offset) +Load 8, 16, 32 or 64 bits with or without sign extension from host memory. +offset must be a constant. + +* st_i32/i64 t0, t1, offset +st8_i32/i64 t0, t1, offset +st16_i32/i64 t0, t1, offset +st32_i64 t0, t1, offset + +write(t0, t1 + offset) +Write 8, 16, 32 or 64 bits to host memory. + +********* 64-bit target on 32-bit host support + +The following opcodes are internal to TCG. Thus they are to be implemented by +32-bit host code generators, but are not to be emitted by guest translators. +They are emitted as needed by inline functions within "tcg-op.h". + +* brcond2_i32 cond, t0_low, t0_high, t1_low, t1_high, label + +Similar to brcond, except that the 64-bit values T0 and T1 +are formed from two 32-bit arguments. + +* add2_i32 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high +* sub2_i32 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high + +Similar to add/sub, except that the 64-bit inputs T1 and T2 are +formed from two 32-bit arguments, and the 64-bit output T0 +is returned in two 32-bit outputs. + +* mulu2_i32 t0_low, t0_high, t1, t2 + +Similar to mul, except two 32-bit (unsigned) inputs T1 and T2 yielding +the full 64-bit product T0. The later is returned in two 32-bit outputs. + +* setcond2_i32 cond, dest, t1_low, t1_high, t2_low, t2_high + +Similar to setcond, except that the 64-bit values T1 and T2 are +formed from two 32-bit arguments. The result is a 32-bit value. + +********* QEMU specific operations + +* tb_exit t0 + +Exit the current TB and return the value t0 (word type). + +* goto_tb index + +Exit the current TB and jump to the TB index 'index' (constant) if the +current TB was linked to this TB. Otherwise execute the next +instructions. + +* qemu_ld8u t0, t1, flags +qemu_ld8s t0, t1, flags +qemu_ld16u t0, t1, flags +qemu_ld16s t0, t1, flags +qemu_ld32 t0, t1, flags +qemu_ld32u t0, t1, flags +qemu_ld32s t0, t1, flags +qemu_ld64 t0, t1, flags + +Load data at the QEMU CPU address t1 into t0. t1 has the QEMU CPU address +type. 'flags' contains the QEMU memory index (selects user or kernel access) +for example. + +Note that "qemu_ld32" implies a 32-bit result, while "qemu_ld32u" and +"qemu_ld32s" imply a 64-bit result appropriately extended from 32 bits. + +* qemu_st8 t0, t1, flags +qemu_st16 t0, t1, flags +qemu_st32 t0, t1, flags +qemu_st64 t0, t1, flags + +Store the data t0 at the QEMU CPU Address t1. t1 has the QEMU CPU +address type. 'flags' contains the QEMU memory index (selects user or +kernel access) for example. + +Note 1: Some shortcuts are defined when the last operand is known to be +a constant (e.g. addi for add, movi for mov). + +Note 2: When using TCG, the opcodes must never be generated directly +as some of them may not be available as "real" opcodes. Always use the +function tcg_gen_xxx(args). + +4) Backend + +tcg-target.h contains the target specific definitions. tcg-target.c +contains the target specific code. + +4.1) Assumptions + +The target word size (TCG_TARGET_REG_BITS) is expected to be 32 bit or +64 bit. It is expected that the pointer has the same size as the word. + +On a 32 bit target, all 64 bit operations are converted to 32 bits. A +few specific operations must be implemented to allow it (see add2_i32, +sub2_i32, brcond2_i32). + +Floating point operations are not supported in this version. A +previous incarnation of the code generator had full support of them, +but it is better to concentrate on integer operations first. + +On a 64 bit target, no assumption is made in TCG about the storage of +the 32 bit values in 64 bit registers. + +4.2) Constraints + +GCC like constraints are used to define the constraints of every +instruction. Memory constraints are not supported in this +version. Aliases are specified in the input operands as for GCC. + +The same register may be used for both an input and an output, even when +they are not explicitly aliased. If an op expands to multiple target +instructions then care must be taken to avoid clobbering input values. +GCC style "early clobber" outputs are not currently supported. + +A target can define specific register or constant constraints. If an +operation uses a constant input constraint which does not allow all +constants, it must also accept registers in order to have a fallback. + +The movi_i32 and movi_i64 operations must accept any constants. + +The mov_i32 and mov_i64 operations must accept any registers of the +same type. + +The ld/st instructions must accept signed 32 bit constant offsets. It +can be implemented by reserving a specific register to compute the +address if the offset is too big. + +The ld/st instructions must accept any destination (ld) or source (st) +register. + +4.3) Function call assumptions + +- The only supported types for parameters and return value are: 32 and + 64 bit integers and pointer. +- The stack grows downwards. +- The first N parameters are passed in registers. +- The next parameters are passed on the stack by storing them as words. +- Some registers are clobbered during the call. +- The function can return 0 or 1 value in registers. On a 32 bit + target, functions must be able to return 2 values in registers for + 64 bit return type. + +5) Recommended coding rules for best performance + +- Use globals to represent the parts of the QEMU CPU state which are + often modified, e.g. the integer registers and the condition + codes. TCG will be able to use host registers to store them. + +- Avoid globals stored in fixed registers. They must be used only to + store the pointer to the CPU state and possibly to store a pointer + to a register window. + +- Use temporaries. Use local temporaries only when really needed, + e.g. when you need to use a value after a jump. Local temporaries + introduce a performance hit in the current TCG implementation: their + content is saved to memory at end of each basic block. + +- Free temporaries and local temporaries when they are no longer used + (tcg_temp_free). Since tcg_const_x() also creates a temporary, you + should free it after it is used. Freeing temporaries does not yield + a better generated code, but it reduces the memory usage of TCG and + the speed of the translation. + +- Don't hesitate to use helpers for complicated or seldom used target + intructions. There is little performance advantage in using TCG to + implement target instructions taking more than about twenty TCG + instructions. + +- Use the 'discard' instruction if you know that TCG won't be able to + prove that a given global is "dead" at a given program point. The + x86 target uses it to improve the condition codes optimisation. diff --git a/src/recompiler/tcg/TODO b/src/recompiler/tcg/TODO new file mode 100644 index 00000000..2bc2785d --- /dev/null +++ b/src/recompiler/tcg/TODO @@ -0,0 +1,14 @@ +- Add new instructions such as: clz, ctz, popcnt. + +- See if it is worth exporting mul2, mulu2, div2, divu2. + +- Support of globals saved in fixed registers between TBs. + +Ideas: + +- Move the slow part of the qemu_ld/st ops after the end of the TB. + +- Change exception syntax to get closer to QOP system (exception + parameters given with a specific instruction). + +- Add float and vector support. diff --git a/src/recompiler/tcg/i386/Makefile.kup b/src/recompiler/tcg/i386/Makefile.kup new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/recompiler/tcg/i386/Makefile.kup diff --git a/src/recompiler/tcg/i386/tcg-target.c b/src/recompiler/tcg/i386/tcg-target.c new file mode 100644 index 00000000..0943d54e --- /dev/null +++ b/src/recompiler/tcg/i386/tcg-target.c @@ -0,0 +1,2231 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { +#if TCG_TARGET_REG_BITS == 64 + "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", + "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", +#else + "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", +#endif +}; +#endif + +static const int tcg_target_reg_alloc_order[] = { +#if TCG_TARGET_REG_BITS == 64 + TCG_REG_RBP, + TCG_REG_RBX, + TCG_REG_R12, + TCG_REG_R13, + TCG_REG_R14, + TCG_REG_R15, + TCG_REG_R10, + TCG_REG_R11, +# if !defined(VBOX) || !defined(__MINGW64__) + TCG_REG_R9, + TCG_REG_R8, + TCG_REG_RCX, + TCG_REG_RDX, +# endif + TCG_REG_RSI, + TCG_REG_RDI, +# if defined(VBOX) && defined(__MINGW64__) + TCG_REG_R9, + TCG_REG_R8, + TCG_REG_RDX, + TCG_REG_RCX, +# endif + TCG_REG_RAX, +#else + TCG_REG_EBX, + TCG_REG_ESI, + TCG_REG_EDI, + TCG_REG_EBP, + TCG_REG_ECX, + TCG_REG_EDX, + TCG_REG_EAX, +#endif +}; + +static const int tcg_target_call_iarg_regs[] = { +#if TCG_TARGET_REG_BITS == 64 +# if defined(VBOX) && defined(__MINGW64__) + TCG_REG_RCX, + TCG_REG_RDX, +# else + TCG_REG_RDI, + TCG_REG_RSI, + TCG_REG_RDX, + TCG_REG_RCX, +# endif + TCG_REG_R8, + TCG_REG_R9, +#else + TCG_REG_EAX, + TCG_REG_EDX, + TCG_REG_ECX +#endif +}; + +static const int tcg_target_call_oarg_regs[2] = { + TCG_REG_EAX, + TCG_REG_EDX +}; + +static uint8_t *tb_ret_addr; + +static void patch_reloc(uint8_t *code_ptr, int type, + tcg_target_long value, tcg_target_long addend) +{ + value += addend; + switch(type) { + case R_386_PC32: + value -= (uintptr_t)code_ptr; + if (value != (int32_t)value) { + tcg_abort(); + } + *(uint32_t *)code_ptr = value; + break; + case R_386_PC8: + value -= (uintptr_t)code_ptr; + if (value != (int8_t)value) { + tcg_abort(); + } + *(uint8_t *)code_ptr = value; + break; + default: + tcg_abort(); + } +} + +#ifdef VBOX +/* emits stack alignment checks for strict builds. */ +DECLINLINE(void) tcg_gen_stack_alignment_check(TCGContext *s) +{ +# if defined(RT_STRICT) && defined(RT_OS_DARWIN) /** @todo all OSes? */ + tcg_out8(s, 0xf7); tcg_out8(s, 0xc4); /* test %esp, 1fh */ + tcg_out32(s, TCG_TARGET_STACK_ALIGN - 1); + tcg_out8(s, 0x74); /* jz imm8 */ + tcg_out8(s, 1); /* $+3 (over int3) */ + tcg_out8(s, 0xcc); /* int3 */ +# else + NOREF(s); +# endif +} +#endif /* VBOX */ + +/* maximum number of register used for input function arguments */ +static inline int tcg_target_get_call_iarg_regs_count(int flags) +{ + if (TCG_TARGET_REG_BITS == 64) { + return 6; + } + + flags &= TCG_CALL_TYPE_MASK; + switch(flags) { + case TCG_CALL_TYPE_STD: + return 0; + case TCG_CALL_TYPE_REGPARM_1: + case TCG_CALL_TYPE_REGPARM_2: + case TCG_CALL_TYPE_REGPARM: + return flags - TCG_CALL_TYPE_REGPARM_1 + 1; + default: + tcg_abort(); + } +} + +/* parse target specific constraints */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) +{ + const char *ct_str; + + ct_str = *pct_str; + switch(ct_str[0]) { + case 'a': + ct->ct |= TCG_CT_REG; + tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX); + break; + case 'b': + ct->ct |= TCG_CT_REG; + tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX); + break; + case 'c': + ct->ct |= TCG_CT_REG; + tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX); + break; + case 'd': + ct->ct |= TCG_CT_REG; + tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX); + break; + case 'S': + ct->ct |= TCG_CT_REG; + tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI); + break; + case 'D': + ct->ct |= TCG_CT_REG; + tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI); + break; + case 'q': + ct->ct |= TCG_CT_REG; + if (TCG_TARGET_REG_BITS == 64) { + tcg_regset_set32(ct->u.regs, 0, 0xffff); + } else { + tcg_regset_set32(ct->u.regs, 0, 0xf); + } + break; + case 'r': + ct->ct |= TCG_CT_REG; + if (TCG_TARGET_REG_BITS == 64) { + tcg_regset_set32(ct->u.regs, 0, 0xffff); + } else { + tcg_regset_set32(ct->u.regs, 0, 0xff); + } + break; + + /* qemu_ld/st address constraint */ + case 'L': + ct->ct |= TCG_CT_REG; + if (TCG_TARGET_REG_BITS == 64) { + tcg_regset_set32(ct->u.regs, 0, 0xffff); +#if defined(VBOX) && defined(__MINGW64__) + tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]); + tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]); + tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]); +#else + /** @todo figure why RDX isn't mentioned here. */ + tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI); +#endif + } else { + tcg_regset_set32(ct->u.regs, 0, 0xff); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX); + } + break; + + case 'e': + ct->ct |= TCG_CT_CONST_S32; + break; + case 'Z': + ct->ct |= TCG_CT_CONST_U32; + break; + + default: + return -1; + } + ct_str++; + *pct_str = ct_str; + return 0; +} + +/* test if a constant matches the constraint */ +static inline int tcg_target_const_match(tcg_target_long val, + const TCGArgConstraint *arg_ct) +{ + int ct = arg_ct->ct; + if (ct & TCG_CT_CONST) { + return 1; + } + if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) { + return 1; + } + if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) { + return 1; + } + return 0; +} + +#if TCG_TARGET_REG_BITS == 64 +# define LOWREGMASK(x) ((x) & 7) +#else +# define LOWREGMASK(x) (x) +#endif + +#define P_EXT 0x100 /* 0x0f opcode prefix */ +#define P_DATA16 0x200 /* 0x66 opcode prefix */ +#if TCG_TARGET_REG_BITS == 64 +# define P_ADDR32 0x400 /* 0x67 opcode prefix */ +# define P_REXW 0x800 /* Set REX.W = 1 */ +# define P_REXB_R 0x1000 /* REG field as byte register */ +# define P_REXB_RM 0x2000 /* R/M field as byte register */ +#else +# define P_ADDR32 0 +# define P_REXW 0 +# define P_REXB_R 0 +# define P_REXB_RM 0 +#endif + +#define OPC_ARITH_EvIz (0x81) +#define OPC_ARITH_EvIb (0x83) +#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */ +#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3)) +#define OPC_BSWAP (0xc8 | P_EXT) +#define OPC_CALL_Jz (0xe8) +#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3)) +#define OPC_DEC_r32 (0x48) +#define OPC_IMUL_GvEv (0xaf | P_EXT) +#define OPC_IMUL_GvEvIb (0x6b) +#define OPC_IMUL_GvEvIz (0x69) +#define OPC_INC_r32 (0x40) +#define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */ +#define OPC_JCC_short (0x70) /* ... plus condition code */ +#define OPC_JMP_long (0xe9) +#define OPC_JMP_short (0xeb) +#define OPC_LEA (0x8d) +#define OPC_MOVB_EvGv (0x88) /* stores, more or less */ +#define OPC_MOVL_EvGv (0x89) /* stores, more or less */ +#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */ +#define OPC_MOVL_EvIz (0xc7) +#define OPC_MOVL_Iv (0xb8) +#define OPC_MOVSBL (0xbe | P_EXT) +#define OPC_MOVSWL (0xbf | P_EXT) +#define OPC_MOVSLQ (0x63 | P_REXW) +#define OPC_MOVZBL (0xb6 | P_EXT) +#define OPC_MOVZWL (0xb7 | P_EXT) +#define OPC_POP_r32 (0x58) +#define OPC_PUSH_r32 (0x50) +#define OPC_PUSH_Iv (0x68) +#define OPC_PUSH_Ib (0x6a) +#define OPC_RET (0xc3) +#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */ +#define OPC_SHIFT_1 (0xd1) +#define OPC_SHIFT_Ib (0xc1) +#define OPC_SHIFT_cl (0xd3) +#define OPC_TESTL (0x85) +#define OPC_XCHG_ax_r32 (0x90) + +#define OPC_GRP3_Ev (0xf7) +#define OPC_GRP5 (0xff) + +/* Group 1 opcode extensions for 0x80-0x83. + These are also used as modifiers for OPC_ARITH. */ +#define ARITH_ADD 0 +#define ARITH_OR 1 +#define ARITH_ADC 2 +#define ARITH_SBB 3 +#define ARITH_AND 4 +#define ARITH_SUB 5 +#define ARITH_XOR 6 +#define ARITH_CMP 7 + +/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */ +#define SHIFT_ROL 0 +#define SHIFT_ROR 1 +#define SHIFT_SHL 4 +#define SHIFT_SHR 5 +#define SHIFT_SAR 7 + +/* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */ +#define EXT3_NOT 2 +#define EXT3_NEG 3 +#define EXT3_MUL 4 +#define EXT3_IMUL 5 +#define EXT3_DIV 6 +#define EXT3_IDIV 7 + +/* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */ +#define EXT5_INC_Ev 0 +#define EXT5_DEC_Ev 1 +#define EXT5_CALLN_Ev 2 +#define EXT5_JMPN_Ev 4 + +/* Condition codes to be added to OPC_JCC_{long,short}. */ +#define JCC_JMP (-1) +#define JCC_JO 0x0 +#define JCC_JNO 0x1 +#define JCC_JB 0x2 +#define JCC_JAE 0x3 +#define JCC_JE 0x4 +#define JCC_JNE 0x5 +#define JCC_JBE 0x6 +#define JCC_JA 0x7 +#define JCC_JS 0x8 +#define JCC_JNS 0x9 +#define JCC_JP 0xa +#define JCC_JNP 0xb +#define JCC_JL 0xc +#define JCC_JGE 0xd +#define JCC_JLE 0xe +#define JCC_JG 0xf + +static const uint8_t tcg_cond_to_jcc[10] = { + [TCG_COND_EQ] = JCC_JE, + [TCG_COND_NE] = JCC_JNE, + [TCG_COND_LT] = JCC_JL, + [TCG_COND_GE] = JCC_JGE, + [TCG_COND_LE] = JCC_JLE, + [TCG_COND_GT] = JCC_JG, + [TCG_COND_LTU] = JCC_JB, + [TCG_COND_GEU] = JCC_JAE, + [TCG_COND_LEU] = JCC_JBE, + [TCG_COND_GTU] = JCC_JA, +}; + +#if defined(VBOX) +/* Calc the size of the tcg_out_opc() result. */ +static inline unsigned char tcg_calc_opc_len(TCGContext *s, int opc, int r, int rm, int x) +{ + unsigned char len = 1; +# if TCG_TARGET_REG_BITS == 64 + unsigned rex; + rex = 0; + rex |= (opc & P_REXW) >> 8; /* REX.W */ + rex |= (r & 8) >> 1; /* REX.R */ + rex |= (x & 8) >> 2; /* REX.X */ + rex |= (rm & 8) >> 3; /* REX.B */ + rex |= opc & (r >= 4 ? P_REXB_R : 0); + rex |= opc & (rm >= 4 ? P_REXB_RM : 0); + if (rex) len++; + if (opc & P_ADDR32) len++; +# endif + if (opc & P_DATA16) len++; + if (opc & P_EXT) len++; + + return len; +} +#endif + +#if TCG_TARGET_REG_BITS == 64 +static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x) +{ + int rex; + + if (opc & P_DATA16) { + /* We should never be asking for both 16 and 64-bit operation. */ + assert((opc & P_REXW) == 0); + tcg_out8(s, 0x66); + } + if (opc & P_ADDR32) { + tcg_out8(s, 0x67); + } + + rex = 0; + rex |= (opc & P_REXW) >> 8; /* REX.W */ + rex |= (r & 8) >> 1; /* REX.R */ + rex |= (x & 8) >> 2; /* REX.X */ + rex |= (rm & 8) >> 3; /* REX.B */ + + /* P_REXB_{R,RM} indicates that the given register is the low byte. + For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do, + as otherwise the encoding indicates %[abcd]h. Note that the values + that are ORed in merely indicate that the REX byte must be present; + those bits get discarded in output. */ + rex |= opc & (r >= 4 ? P_REXB_R : 0); + rex |= opc & (rm >= 4 ? P_REXB_RM : 0); + + if (rex) { + tcg_out8(s, (uint8_t)(rex | 0x40)); + } + + if (opc & P_EXT) { + tcg_out8(s, 0x0f); + } + tcg_out8(s, opc); +} +#else +static void tcg_out_opc(TCGContext *s, int opc) +{ + if (opc & P_DATA16) { + tcg_out8(s, 0x66); + } + if (opc & P_EXT) { + tcg_out8(s, 0x0f); + } + tcg_out8(s, opc); +} +/* Discard the register arguments to tcg_out_opc early, so as not to penalize + the 32-bit compilation paths. This method works with all versions of gcc, + whereas relying on optimization may not be able to exclude them. */ +#define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc) +#endif + +static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm) +{ + tcg_out_opc(s, opc, r, rm, 0); + tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); +} + +/* Output an opcode with a full "rm + (index<<shift) + offset" address mode. + We handle either RM and INDEX missing with a negative value. In 64-bit + mode for absolute addresses, ~RM is the size of the immediate operand + that will follow the instruction. */ + +static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm, + int index, int shift, + tcg_target_long offset) +{ + int mod, len; + + if (index < 0 && rm < 0) { + if (TCG_TARGET_REG_BITS == 64) { + /* Try for a rip-relative addressing mode. This has replaced + the 32-bit-mode absolute addressing encoding. */ +#ifdef VBOX + tcg_target_long pc = (tcg_target_long)s->code_ptr + + tcg_calc_opc_len(s, opc, r, 0, 0) + 1 + 4; +#else + tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm; +#endif + tcg_target_long disp = offset - pc; + if (disp == (int32_t)disp) { + tcg_out_opc(s, opc, r, 0, 0); + tcg_out8(s, (LOWREGMASK(r) << 3) | 5); + tcg_out32(s, disp); +#ifdef VBOX + Assert(pc == (tcg_target_long)s->code_ptr); +#endif + return; + } + + /* Try for an absolute address encoding. This requires the + use of the MODRM+SIB encoding and is therefore larger than + rip-relative addressing. */ + if (offset == (int32_t)offset) { + tcg_out_opc(s, opc, r, 0, 0); + tcg_out8(s, (LOWREGMASK(r) << 3) | 4); + tcg_out8(s, (4 << 3) | 5); + tcg_out32(s, offset); + return; + } + + /* ??? The memory isn't directly addressable. */ + tcg_abort(); + } else { + /* Absolute address. */ + tcg_out_opc(s, opc, r, 0, 0); + tcg_out8(s, (r << 3) | 5); + tcg_out32(s, offset); + return; + } + } + + /* Find the length of the immediate addend. Note that the encoding + that would be used for (%ebp) indicates absolute addressing. */ + if (rm < 0) { + mod = 0, len = 4, rm = 5; + } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) { + mod = 0, len = 0; + } else if (offset == (int8_t)offset) { + mod = 0x40, len = 1; + } else { + mod = 0x80, len = 4; + } + + /* Use a single byte MODRM format if possible. Note that the encoding + that would be used for %esp is the escape to the two byte form. */ + if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) { + /* Single byte MODRM format. */ + tcg_out_opc(s, opc, r, rm, 0); + tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); + } else { + /* Two byte MODRM+SIB format. */ + + /* Note that the encoding that would place %esp into the index + field indicates no index register. In 64-bit mode, the REX.X + bit counts, so %r12 can be used as the index. */ + if (index < 0) { + index = 4; + } else { + assert(index != TCG_REG_ESP); + } + + tcg_out_opc(s, opc, r, rm, index); + tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4); + tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm)); + } + + if (len == 1) { + tcg_out8(s, offset); + } else if (len == 4) { + tcg_out32(s, offset); + } +} + +/* A simplification of the above with no index or shift. */ +static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, + int rm, tcg_target_long offset) +{ + tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset); +} + +/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */ +static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src) +{ + /* Propagate an opcode prefix, such as P_REXW. */ + int ext = subop & ~0x7; + subop &= 0x7; + + tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src); +} + +static inline void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg) +{ + if (arg != ret) { + int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0); + tcg_out_modrm(s, opc, ret, arg); + } +} + +static void tcg_out_movi(TCGContext *s, TCGType type, + int ret, tcg_target_long arg) +{ + if (arg == 0) { + tgen_arithr(s, ARITH_XOR, ret, ret); + return; + } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) { + tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0); + tcg_out32(s, arg); + } else if (arg == (int32_t)arg) { + tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret); + tcg_out32(s, arg); + } else { + tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0); + tcg_out32(s, arg); + tcg_out32(s, arg >> 31 >> 1); + } +} + +static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val) +{ + if (val == (int8_t)val) { + tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0); + tcg_out8(s, val); + } else if (val == (int32_t)val) { + tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0); + tcg_out32(s, val); + } else { + tcg_abort(); + } +} + +static inline void tcg_out_push(TCGContext *s, int reg) +{ + tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0); +} + +static inline void tcg_out_pop(TCGContext *s, int reg) +{ + tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0); +} + +static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret, + int arg1, tcg_target_long arg2) +{ + int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0); + tcg_out_modrm_offset(s, opc, ret, arg1, arg2); +} + +static inline void tcg_out_st(TCGContext *s, TCGType type, int arg, + int arg1, tcg_target_long arg2) +{ + int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0); + tcg_out_modrm_offset(s, opc, arg, arg1, arg2); +} + +static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count) +{ + /* Propagate an opcode prefix, such as P_DATA16. */ + int ext = subopc & ~0x7; + subopc &= 0x7; + + if (count == 1) { + tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg); + } else { + tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg); + tcg_out8(s, count); + } +} + +static inline void tcg_out_bswap32(TCGContext *s, int reg) +{ + tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0); +} + +static inline void tcg_out_rolw_8(TCGContext *s, int reg) +{ + tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8); +} + +static inline void tcg_out_ext8u(TCGContext *s, int dest, int src) +{ + /* movzbl */ + assert(src < 4 || TCG_TARGET_REG_BITS == 64); + tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src); +} + +static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw) +{ + /* movsbl */ + assert(src < 4 || TCG_TARGET_REG_BITS == 64); + tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src); +} + +static inline void tcg_out_ext16u(TCGContext *s, int dest, int src) +{ + /* movzwl */ + tcg_out_modrm(s, OPC_MOVZWL, dest, src); +} + +static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw) +{ + /* movsw[lq] */ + tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src); +} + +static inline void tcg_out_ext32u(TCGContext *s, int dest, int src) +{ + /* 32-bit mov zero extends. */ + tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src); +} + +static inline void tcg_out_ext32s(TCGContext *s, int dest, int src) +{ + tcg_out_modrm(s, OPC_MOVSLQ, dest, src); +} + +static inline void tcg_out_bswap64(TCGContext *s, int reg) +{ + tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0); +} + +static void tgen_arithi(TCGContext *s, int c, int r0, + tcg_target_long val, int cf) +{ + int rexw = 0; + + if (TCG_TARGET_REG_BITS == 64) { + rexw = c & -8; + c &= 7; + } + + /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce + partial flags update stalls on Pentium4 and are not recommended + by current Intel optimization manuals. */ + if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) { + int is_inc = (c == ARITH_ADD) != (val < 0); /* VBox: cppcheck: "xor" for bools is "not-equals" */ + if (TCG_TARGET_REG_BITS == 64) { + /* The single-byte increment encodings are re-tasked as the + REX prefixes. Use the MODRM encoding. */ + tcg_out_modrm(s, OPC_GRP5 + rexw, + (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0); + } else { + tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0); + } + return; + } + + if (c == ARITH_AND) { + if (TCG_TARGET_REG_BITS == 64) { + if (val == 0xffffffffu) { + tcg_out_ext32u(s, r0, r0); + return; + } + if (val == (uint32_t)val) { + /* AND with no high bits set can use a 32-bit operation. */ + rexw = 0; + } + } + if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) { + tcg_out_ext8u(s, r0, r0); + return; + } + if (val == 0xffffu) { + tcg_out_ext16u(s, r0, r0); + return; + } + } + + if (val == (int8_t)val) { + tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0); + tcg_out8(s, val); + return; + } + if (rexw == 0 || val == (int32_t)val) { + tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0); + tcg_out32(s, val); + return; + } + + tcg_abort(); +} + +static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val) +{ + if (val != 0) { + tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0); + } +} + +#if defined(VBOX) && defined(RT_OS_DARWIN) && ARCH_BITS == 32 +# define VBOX_16_BYTE_STACK_ALIGN +#endif +#ifdef VBOX_16_BYTE_STACK_ALIGN +static void tcg_out_subi(TCGContext *s, int reg, tcg_target_long val) +{ + if (val != 0) { + tgen_arithi(s, ARITH_SUB + P_REXW, reg, val, 0); + } +} +#endif + +/* Use SMALL != 0 to force a short forward branch. */ +static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small) +{ + int32_t val, val1; + TCGLabel *l = &s->labels[label_index]; + + if (l->has_value) { + val = l->u.value - (tcg_target_long)s->code_ptr; + val1 = val - 2; + if ((int8_t)val1 == val1) { + if (opc == -1) { + tcg_out8(s, OPC_JMP_short); + } else { + tcg_out8(s, OPC_JCC_short + opc); + } + tcg_out8(s, val1); + } else { + if (small) { + tcg_abort(); + } + if (opc == -1) { + tcg_out8(s, OPC_JMP_long); + tcg_out32(s, val - 5); + } else { + tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0); + tcg_out32(s, val - 6); + } + } + } else if (small) { + if (opc == -1) { + tcg_out8(s, OPC_JMP_short); + } else { + tcg_out8(s, OPC_JCC_short + opc); + } + tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1); + s->code_ptr += 1; + } else { + if (opc == -1) { + tcg_out8(s, OPC_JMP_long); + } else { + tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0); + } + tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4); + s->code_ptr += 4; + } +} + +static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2, + int const_arg2, int rexw) +{ + if (const_arg2) { + if (arg2 == 0) { + /* test r, r */ + tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1); + } else { + tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0); + } + } else { + tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2); + } +} + +static void tcg_out_brcond32(TCGContext *s, TCGCond cond, + TCGArg arg1, TCGArg arg2, int const_arg2, + int label_index, int small) +{ + tcg_out_cmp(s, arg1, arg2, const_arg2, 0); + tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small); +} + +#if TCG_TARGET_REG_BITS == 64 +static void tcg_out_brcond64(TCGContext *s, TCGCond cond, + TCGArg arg1, TCGArg arg2, int const_arg2, + int label_index, int small) +{ + tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW); + tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small); +} +#else +/* XXX: we implement it at the target level to avoid having to + handle cross basic blocks temporaries */ +static void tcg_out_brcond2(TCGContext *s, const TCGArg *args, + const int *const_args, int small) +{ + int label_next; + label_next = gen_new_label(); + switch(args[4]) { + case TCG_COND_EQ: + tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2], + label_next, 1); + tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3], + args[5], small); + break; + case TCG_COND_NE: + tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2], + args[5], small); + tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3], + args[5], small); + break; + case TCG_COND_LT: + tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3], + args[5], small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2], + args[5], small); + break; + case TCG_COND_LE: + tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3], + args[5], small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2], + args[5], small); + break; + case TCG_COND_GT: + tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3], + args[5], small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2], + args[5], small); + break; + case TCG_COND_GE: + tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3], + args[5], small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2], + args[5], small); + break; + case TCG_COND_LTU: + tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3], + args[5], small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2], + args[5], small); + break; + case TCG_COND_LEU: + tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3], + args[5], small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2], + args[5], small); + break; + case TCG_COND_GTU: + tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3], + args[5], small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2], + args[5], small); + break; + case TCG_COND_GEU: + tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3], + args[5], small); + tcg_out_jxx(s, JCC_JNE, label_next, 1); + tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2], + args[5], small); + break; + default: + tcg_abort(); + } + tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr); +} +#endif + +static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest, + TCGArg arg1, TCGArg arg2, int const_arg2) +{ + tcg_out_cmp(s, arg1, arg2, const_arg2, 0); + tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest); + tcg_out_ext8u(s, dest, dest); +} + +#if TCG_TARGET_REG_BITS == 64 +static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest, + TCGArg arg1, TCGArg arg2, int const_arg2) +{ + tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW); + tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest); + tcg_out_ext8u(s, dest, dest); +} +#else +static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, + const int *const_args) +{ + TCGArg new_args[6]; + int label_true, label_over; + + memcpy(new_args, args+1, 5*sizeof(TCGArg)); + + if (args[0] == args[1] || args[0] == args[2] + || (!const_args[3] && args[0] == args[3]) + || (!const_args[4] && args[0] == args[4])) { + /* When the destination overlaps with one of the argument + registers, don't do anything tricky. */ + label_true = gen_new_label(); + label_over = gen_new_label(); + + new_args[5] = label_true; + tcg_out_brcond2(s, new_args, const_args+1, 1); + + tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); + tcg_out_jxx(s, JCC_JMP, label_over, 1); + tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr); + + tcg_out_movi(s, TCG_TYPE_I32, args[0], 1); + tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr); + } else { + /* When the destination does not overlap one of the arguments, + clear the destination first, jump if cond false, and emit an + increment in the true case. This results in smaller code. */ + + tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); + + label_over = gen_new_label(); + new_args[4] = tcg_invert_cond(new_args[4]); + new_args[5] = label_over; + tcg_out_brcond2(s, new_args, const_args+1, 1); + + tgen_arithi(s, ARITH_ADD, args[0], 1, 0); + tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr); + } +} +#endif + +static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest) +{ +#ifdef VBOX + tcg_target_long disp = dest - (tcg_target_long)s->code_ptr + - tcg_calc_opc_len(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0) + - 4; +#else + tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5; +#endif + + if (disp == (int32_t)disp) { + tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0); + tcg_out32(s, disp); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest); + tcg_out_modrm(s, OPC_GRP5, + call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10); + } +} + +static inline void tcg_out_calli(TCGContext *s, tcg_target_long dest) +{ +#ifdef VBOX + tcg_gen_stack_alignment_check(s); +#endif + tcg_out_branch(s, 1, dest); +} + +static void tcg_out_jmp(TCGContext *s, tcg_target_long dest) +{ + tcg_out_branch(s, 0, dest); +} + +#if defined(CONFIG_SOFTMMU) + +#include "../../softmmu_defs.h" + +static void *qemu_ld_helpers[4] = { + __ldb_mmu, + __ldw_mmu, + __ldl_mmu, + __ldq_mmu, +}; + +static void *qemu_st_helpers[4] = { + __stb_mmu, + __stw_mmu, + __stl_mmu, + __stq_mmu, +}; + +/* Perform the TLB load and compare. + + Inputs: + ADDRLO_IDX contains the index into ARGS of the low part of the + address; the high part of the address is at ADDR_LOW_IDX+1. + + MEM_INDEX and S_BITS are the memory context and log2 size of the load. + + WHICH is the offset into the CPUTLBEntry structure of the slot to read. + This should be offsetof addr_read or addr_write. + + Outputs: + LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses) + positions of the displacements of forward jumps to the TLB miss case. + + First argument register is loaded with the low part of the address. + In the TLB hit case, it has been adjusted as indicated by the TLB + and so is a host address. In the TLB miss case, it continues to + hold a guest address. + + Second argument register is clobbered. */ + +static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx, + int mem_index, int s_bits, + const TCGArg *args, + uint8_t **label_ptr, int which) +{ + const int addrlo = args[addrlo_idx]; + const int r0 = tcg_target_call_iarg_regs[0]; + const int r1 = tcg_target_call_iarg_regs[1]; + TCGType type = TCG_TYPE_I32; + int rexw = 0; + + if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) { + type = TCG_TYPE_I64; + rexw = P_REXW; + } + + tcg_out_mov(s, type, r1, addrlo); + tcg_out_mov(s, type, r0, addrlo); + + tcg_out_shifti(s, SHIFT_SHR + rexw, r1, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + + tgen_arithi(s, ARITH_AND + rexw, r0, + TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0); + tgen_arithi(s, ARITH_AND + rexw, r1, + (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0); + + tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0, + offsetof(CPUState, tlb_table[mem_index][0]) + + which); + + /* cmp 0(r1), r0 */ + tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0); + + tcg_out_mov(s, type, r0, addrlo); + + /* jne label1 */ + tcg_out8(s, OPC_JCC_short + JCC_JNE); + label_ptr[0] = s->code_ptr; + s->code_ptr++; + + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + /* cmp 4(r1), addrhi */ + tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4); + + /* jne label1 */ + tcg_out8(s, OPC_JCC_short + JCC_JNE); + label_ptr[1] = s->code_ptr; + s->code_ptr++; + } + + /* TLB Hit. */ + + /* add addend(r1), r0 */ + tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1, + offsetof(CPUTLBEntry, addend) - which); +} +#endif + +static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi, + int base, tcg_target_long ofs, int sizeop) +{ +#ifdef TARGET_WORDS_BIGENDIAN + const int bswap = 1; +#else + const int bswap = 0; +#endif + switch (sizeop) { + case 0: + tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs); + break; + case 0 | 4: + tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs); + break; + case 1: + tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs); + if (bswap) { + tcg_out_rolw_8(s, datalo); + } + break; + case 1 | 4: + if (bswap) { + tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs); + tcg_out_rolw_8(s, datalo); + tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo); + } else { + tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs); + } + break; + case 2: + tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs); + if (bswap) { + tcg_out_bswap32(s, datalo); + } + break; +#if TCG_TARGET_REG_BITS == 64 + case 2 | 4: + if (bswap) { + tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs); + tcg_out_bswap32(s, datalo); + tcg_out_ext32s(s, datalo, datalo); + } else { + tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs); + } + break; +#endif + case 3: + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs); + if (bswap) { + tcg_out_bswap64(s, datalo); + } + } else { + if (bswap) { + int t = datalo; + datalo = datahi; + datahi = t; + } + if (base != datalo) { + tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs); + tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4); + } else { + tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4); + tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs); + } + if (bswap) { + tcg_out_bswap32(s, datalo); + tcg_out_bswap32(s, datahi); + } + } + break; + default: + tcg_abort(); + } +} + +#if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB) + +static void * const vbox_ld_helpers[] = { + __ldub_vbox_phys, + __lduw_vbox_phys, + __ldul_vbox_phys, + __ldq_vbox_phys, + __ldb_vbox_phys, + __ldw_vbox_phys, + __ldl_vbox_phys, + __ldq_vbox_phys, +}; + +static void * const vbox_st_helpers[] = { + __stb_vbox_phys, + __stw_vbox_phys, + __stl_vbox_phys, + __stq_vbox_phys +}; + +DECLINLINE(void) tcg_out_long_call(TCGContext *s, void* dst) +{ + intptr_t disp; +# ifdef VBOX + tcg_gen_stack_alignment_check(s); +# endif + disp = (uintptr_t)dst - (uintptr_t)s->code_ptr - 5; + tcg_out8(s, 0xe8); /* call disp32 */ + tcg_out32(s, disp); /* disp32 */ +} + +static void tcg_out_vbox_phys_read(TCGContext *s, int index, + int addr_reg, + int data_reg, int data_reg2) +{ + int useReg2 = ((index & 3) == 3); + + /** @todo should we make phys address accessors fastcalls - probably not a big deal */ + /* out parameter (address), note that phys address is always 64-bit */ + AssertMsg(sizeof(RTGCPHYS) == 8, ("Physical address must be 64-bits, update caller\n")); + +# if 0 + tcg_out8(s, 0x6a); tcg_out8(s, 0x00); /* push $0 */ + tcg_out_push(s, addr_reg); +# else + /* mov addr_reg, %eax */ + tcg_out_mov(s, TCG_REG_EAX, addr_reg); +# endif + + tcg_out_long_call(s, vbox_ld_helpers[index]); + + /* mov %eax, data_reg */ + tcg_out_mov(s, data_reg, TCG_REG_EAX); + + /* returned 64-bit value */ + if (useReg2) + tcg_out_mov(s, data_reg2, TCG_REG_EDX); +} + +static void tcg_out_vbox_phys_write(TCGContext *s, int index, + int addr_reg, + int val_reg, int val_reg2) { + int useReg2 = ((index & 3) == 3); + +# if 0 + /* out parameter (value2) */ + if (useReg2) + tcg_out_push(s, val_reg2); + /* out parameter (value) */ + tcg_out_push(s, val_reg); + /* out parameter (address), note that phys address is always 64-bit */ + AssertMsg(sizeof(RTGCPHYS) == 8, ("Physical address must be 64-bits, update caller\n")); + tcg_out8(s, 0x6a); tcg_out8(s, 0x00); /* push $0 */ + tcg_out_push(s, addr_reg); +# else + Assert(val_reg != TCG_REG_EAX && (!useReg2 || (val_reg2 != TCG_REG_EAX))); + /* mov addr_reg, %eax */ + tcg_out_mov(s, TCG_REG_EAX, addr_reg); + Assert(!useReg2 || (val_reg2 != TCG_REG_EDX)); + /* mov val_reg, %edx */ + tcg_out_mov(s, TCG_REG_EDX, val_reg); + if (useReg2) + tcg_out_mov(s, TCG_REG_ECX, val_reg2); + +# endif + /* call it */ + tcg_out_long_call(s, vbox_st_helpers[index]); + + /* clean stack after us */ +# if 0 + tcg_out_addi(s, TCG_REG_ESP, 8 + (useReg2 ? 8 : 4)); +# endif +} + +#endif /* defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB) */ + +/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and + EAX. It will be useful once fixed registers globals are less + common. */ +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, + int opc) +{ + int data_reg, data_reg2 = 0; + int addrlo_idx; +#if defined(CONFIG_SOFTMMU) + int mem_index, s_bits, arg_idx; + uint8_t *label_ptr[3]; +#endif + + data_reg = args[0]; + addrlo_idx = 1; + if (TCG_TARGET_REG_BITS == 32 && opc == 3) { + data_reg2 = args[1]; + addrlo_idx = 2; + } + +#if defined(CONFIG_SOFTMMU) + mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)]; + s_bits = opc & 3; + + tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args, + label_ptr, offsetof(CPUTLBEntry, addr_read)); + + /* TLB Hit. */ + tcg_out_qemu_ld_direct(s, data_reg, data_reg2, + tcg_target_call_iarg_regs[0], 0, opc); + + /* jmp label2 */ + tcg_out8(s, OPC_JMP_short); + label_ptr[2] = s->code_ptr; + s->code_ptr++; + + /* TLB Miss. */ + + /* label1: */ + *label_ptr[0] = s->code_ptr - label_ptr[0] - 1; + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + *label_ptr[1] = s->code_ptr - label_ptr[1] - 1; + } + + /* XXX: move that code at the end of the TB */ + /* The first argument is already loaded with addrlo. */ + arg_idx = 1; + if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) { + tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++], + args[addrlo_idx + 1]); + } + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx], + mem_index); + tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]); + + switch(opc) { + case 0 | 4: + tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW); + break; + case 1 | 4: + tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW); + break; + case 0: + tcg_out_ext8u(s, data_reg, TCG_REG_EAX); + break; + case 1: + tcg_out_ext16u(s, data_reg, TCG_REG_EAX); + break; + case 2: + tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); + break; +#if TCG_TARGET_REG_BITS == 64 + case 2 | 4: + tcg_out_ext32s(s, data_reg, TCG_REG_EAX); + break; +#endif + case 3: + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX); + } else if (data_reg == TCG_REG_EDX) { + /* xchg %edx, %eax */ + tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0); + tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX); + } else { + tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); + tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX); + } + break; + default: + tcg_abort(); + } + + /* label2: */ + *label_ptr[2] = s->code_ptr - label_ptr[2] - 1; +#else +# if defined(VBOX) && defined(__MINGW64__) +# error port me +# endif + { + int32_t offset = GUEST_BASE; + int base = args[addrlo_idx]; + + if (TCG_TARGET_REG_BITS == 64) { + /* ??? We assume all operations have left us with register + contents that are zero extended. So far this appears to + be true. If we want to enforce this, we can either do + an explicit zero-extension here, or (if GUEST_BASE == 0) + use the ADDR32 prefix. For now, do nothing. */ + + if (offset != GUEST_BASE) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE); + tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base); + base = TCG_REG_RDI, offset = 0; + } + } + + tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc); + } +#endif +} + +static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi, + int base, tcg_target_long ofs, int sizeop) +{ +#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB) +#ifdef TARGET_WORDS_BIGENDIAN + const int bswap = 1; +#else + const int bswap = 0; +#endif + /* ??? Ideally we wouldn't need a scratch register. For user-only, + we could perform the bswap twice to restore the original value + instead of moving to the scratch. But as it is, the L constraint + means that the second argument reg is definitely free here. */ + int scratch = tcg_target_call_iarg_regs[1]; + + switch (sizeop) { + case 0: + tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs); + break; + case 1: + if (bswap) { + tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); + tcg_out_rolw_8(s, scratch); + datalo = scratch; + } + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs); + break; + case 2: + if (bswap) { + tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); + tcg_out_bswap32(s, scratch); + datalo = scratch; + } + tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs); + break; + case 3: + if (TCG_TARGET_REG_BITS == 64) { + if (bswap) { + tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo); + tcg_out_bswap64(s, scratch); + datalo = scratch; + } + tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs); + } else if (bswap) { + tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi); + tcg_out_bswap32(s, scratch); + tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs); + tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); + tcg_out_bswap32(s, scratch); + tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4); + } else { + tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs); + tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4); + } + break; + default: + tcg_abort(); + } +#else /* VBOX */ +# error "broken" + tcg_out_vbox_phys_read(s, opc, r0, data_reg, data_reg2); +#endif +} + +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, + int opc) +{ + int data_reg, data_reg2 = 0; + int addrlo_idx; +#if defined(CONFIG_SOFTMMU) + int mem_index, s_bits; + int stack_adjust; + uint8_t *label_ptr[3]; +#endif + + data_reg = args[0]; + addrlo_idx = 1; + if (TCG_TARGET_REG_BITS == 32 && opc == 3) { + data_reg2 = args[1]; + addrlo_idx = 2; + } + +#if defined(CONFIG_SOFTMMU) + mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)]; + s_bits = opc; + + tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args, + label_ptr, offsetof(CPUTLBEntry, addr_write)); + + /* TLB Hit. */ + tcg_out_qemu_st_direct(s, data_reg, data_reg2, + tcg_target_call_iarg_regs[0], 0, opc); + + /* jmp label2 */ + tcg_out8(s, OPC_JMP_short); + label_ptr[2] = s->code_ptr; + s->code_ptr++; + + /* TLB Miss. */ + + /* label1: */ + *label_ptr[0] = s->code_ptr - label_ptr[0] - 1; + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + *label_ptr[1] = s->code_ptr - label_ptr[1] - 1; + } + +# if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB) + + /* XXX: move that code at the end of the TB */ + if (TCG_TARGET_REG_BITS == 64) { +# if defined(VBOX) && defined(__MINGW64__) + tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32), + tcg_target_call_iarg_regs[1], data_reg); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index); +# else + tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32), + TCG_REG_RSI, data_reg); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index); +# endif + stack_adjust = 0; + } else if (TARGET_LONG_BITS == 32) { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, data_reg); + if (opc == 3) { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg2); +# ifdef VBOX_16_BYTE_STACK_ALIGN + tcg_out_subi(s, TCG_REG_ESP, 12); +# endif + tcg_out_pushi(s, mem_index); + stack_adjust = 4; + } else { + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index); + stack_adjust = 0; + } + } else { + if (opc == 3) { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]); +# ifdef VBOX_16_BYTE_STACK_ALIGN + tcg_out_pushi(s, 0); +# endif + tcg_out_pushi(s, mem_index); + tcg_out_push(s, data_reg2); + tcg_out_push(s, data_reg); + stack_adjust = 12; + } else { + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]); + switch(opc) { + case 0: + tcg_out_ext8u(s, TCG_REG_ECX, data_reg); + break; + case 1: + tcg_out_ext16u(s, TCG_REG_ECX, data_reg); + break; + case 2: + tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg); + break; + } +# ifdef VBOX_16_BYTE_STACK_ALIGN + tcg_out_subi(s, TCG_REG_ESP, 12); +# endif + tcg_out_pushi(s, mem_index); + stack_adjust = 4; + } + } + + tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]); + +# ifdef VBOX_16_BYTE_STACK_ALIGN + if (stack_adjust != 0) { + tcg_out_addi(s, TCG_REG_ESP, RT_ALIGN(stack_adjust, 16)); + } +# else + if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) { + /* Pop and discard. This is 2 bytes smaller than the add. */ + tcg_out_pop(s, TCG_REG_ECX); + } else if (stack_adjust != 0) { + tcg_out_addi(s, TCG_REG_ESP, stack_adjust); + } +# endif + +# else /* VBOX && REM_PHYS_ADDR_IN_TLB */ +# error Borked + tcg_out_vbox_phys_write(s, opc, r0, data_reg, data_reg2); +# endif /* VBOX && REM_PHYS_ADDR_IN_TLB */ + + /* label2: */ + *label_ptr[2] = s->code_ptr - label_ptr[2] - 1; +#else +# if defined(VBOX) && defined(__MINGW64__) +# error port me +# endif + { + int32_t offset = GUEST_BASE; + int base = args[addrlo_idx]; + + if (TCG_TARGET_REG_BITS == 64) { + /* ??? We assume all operations have left us with register + contents that are zero extended. So far this appears to + be true. If we want to enforce this, we can either do + an explicit zero-extension here, or (if GUEST_BASE == 0) + use the ADDR32 prefix. For now, do nothing. */ + + if (offset != GUEST_BASE) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE); + tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base); + base = TCG_REG_RDI, offset = 0; + } + } + + tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc); + } +#endif +} + +static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, + const TCGArg *args, const int *const_args) +{ + int c, rexw = 0; + +#if TCG_TARGET_REG_BITS == 64 +# define OP_32_64(x) \ + case glue(glue(INDEX_op_, x), _i64): \ + rexw = P_REXW; /* FALLTHRU */ \ + case glue(glue(INDEX_op_, x), _i32) +#else +# define OP_32_64(x) \ + case glue(glue(INDEX_op_, x), _i32) +#endif + + switch(opc) { + case INDEX_op_exit_tb: + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]); + tcg_out_jmp(s, (tcg_target_long) tb_ret_addr); + break; + case INDEX_op_goto_tb: + if (s->tb_jmp_offset) { + /* direct jump method */ + tcg_out8(s, OPC_JMP_long); /* jmp im */ + s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf; + tcg_out32(s, 0); + } else { + /* indirect jump method */ + tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1, + (tcg_target_long)(s->tb_next + args[0])); + } + s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf; + break; + case INDEX_op_call: + if (const_args[0]) { + tcg_out_calli(s, args[0]); + } else { + /* call *reg */ + tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]); + } + break; + case INDEX_op_jmp: + if (const_args[0]) { + tcg_out_jmp(s, args[0]); + } else { + /* jmp *reg */ + tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]); + } + break; + case INDEX_op_br: + tcg_out_jxx(s, JCC_JMP, args[0], 0); + break; + case INDEX_op_movi_i32: + tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]); + break; + OP_32_64(ld8u): + /* Note that we can ignore REXW for the zero-extend to 64-bit. */ + tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]); + break; + OP_32_64(ld8s): + tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]); + break; + OP_32_64(ld16u): + /* Note that we can ignore REXW for the zero-extend to 64-bit. */ + tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]); + break; + OP_32_64(ld16s): + tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]); + break; +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_ld32u_i64: +#endif + case INDEX_op_ld_i32: + tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]); + break; + + OP_32_64(st8): + tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, + args[0], args[1], args[2]); + break; + OP_32_64(st16): + tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, + args[0], args[1], args[2]); + break; +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_st32_i64: +#endif + case INDEX_op_st_i32: + tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); + break; + + OP_32_64(add): + /* For 3-operand addition, use LEA. */ + if (args[0] != args[1]) { + TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0; + + if (const_args[2]) { + c3 = a2, a2 = -1; + } else if (a0 == a2) { + /* Watch out for dest = src + dest, since we've removed + the matching constraint on the add. */ + tgen_arithr(s, ARITH_ADD + rexw, a0, a1); + break; + } + + tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3); + break; + } + c = ARITH_ADD; + goto gen_arith; + OP_32_64(sub): + c = ARITH_SUB; + goto gen_arith; + OP_32_64(and): + c = ARITH_AND; + goto gen_arith; + OP_32_64(or): + c = ARITH_OR; + goto gen_arith; + OP_32_64(xor): + c = ARITH_XOR; + goto gen_arith; + gen_arith: + if (const_args[2]) { + tgen_arithi(s, c + rexw, args[0], args[2], 0); + } else { + tgen_arithr(s, c + rexw, args[0], args[2]); + } + break; + + OP_32_64(mul): + if (const_args[2]) { + int32_t val; + val = args[2]; + if (val == (int8_t)val) { + tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]); + tcg_out8(s, val); + } else { + tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]); + tcg_out32(s, val); + } + } else { + tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]); + } + break; + + OP_32_64(div2): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]); + break; + OP_32_64(divu2): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]); + break; + + OP_32_64(shl): + c = SHIFT_SHL; + goto gen_shift; + OP_32_64(shr): + c = SHIFT_SHR; + goto gen_shift; + OP_32_64(sar): + c = SHIFT_SAR; + goto gen_shift; + OP_32_64(rotl): + c = SHIFT_ROL; + goto gen_shift; + OP_32_64(rotr): + c = SHIFT_ROR; + goto gen_shift; + gen_shift: + if (const_args[2]) { + tcg_out_shifti(s, c + rexw, args[0], args[2]); + } else { + tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]); + } + break; + + case INDEX_op_brcond_i32: + tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1], + args[3], 0); + break; + case INDEX_op_setcond_i32: + tcg_out_setcond32(s, args[3], args[0], args[1], + args[2], const_args[2]); + break; + + OP_32_64(bswap16): + tcg_out_rolw_8(s, args[0]); + break; + OP_32_64(bswap32): + tcg_out_bswap32(s, args[0]); + break; + + OP_32_64(neg): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]); + break; + OP_32_64(not): + tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]); + break; + + OP_32_64(ext8s): + tcg_out_ext8s(s, args[0], args[1], rexw); + break; + OP_32_64(ext16s): + tcg_out_ext16s(s, args[0], args[1], rexw); + break; + OP_32_64(ext8u): + tcg_out_ext8u(s, args[0], args[1]); + break; + OP_32_64(ext16u): + tcg_out_ext16u(s, args[0], args[1]); + break; + + case INDEX_op_qemu_ld8u: + tcg_out_qemu_ld(s, args, 0); + break; + case INDEX_op_qemu_ld8s: + tcg_out_qemu_ld(s, args, 0 | 4); + break; + case INDEX_op_qemu_ld16u: + tcg_out_qemu_ld(s, args, 1); + break; + case INDEX_op_qemu_ld16s: + tcg_out_qemu_ld(s, args, 1 | 4); + break; +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_qemu_ld32u: +#endif + case INDEX_op_qemu_ld32: + tcg_out_qemu_ld(s, args, 2); + break; + case INDEX_op_qemu_ld64: + tcg_out_qemu_ld(s, args, 3); + break; + + case INDEX_op_qemu_st8: + tcg_out_qemu_st(s, args, 0); + break; + case INDEX_op_qemu_st16: + tcg_out_qemu_st(s, args, 1); + break; + case INDEX_op_qemu_st32: + tcg_out_qemu_st(s, args, 2); + break; + case INDEX_op_qemu_st64: + tcg_out_qemu_st(s, args, 3); + break; + +#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_brcond2_i32: + tcg_out_brcond2(s, args, const_args, 0); + break; + case INDEX_op_setcond2_i32: + tcg_out_setcond2(s, args, const_args); + break; + case INDEX_op_mulu2_i32: + tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]); + break; + case INDEX_op_add2_i32: + if (const_args[4]) { + tgen_arithi(s, ARITH_ADD, args[0], args[4], 1); + } else { + tgen_arithr(s, ARITH_ADD, args[0], args[4]); + } + if (const_args[5]) { + tgen_arithi(s, ARITH_ADC, args[1], args[5], 1); + } else { + tgen_arithr(s, ARITH_ADC, args[1], args[5]); + } + break; + case INDEX_op_sub2_i32: + if (const_args[4]) { + tgen_arithi(s, ARITH_SUB, args[0], args[4], 1); + } else { + tgen_arithr(s, ARITH_SUB, args[0], args[4]); + } + if (const_args[5]) { + tgen_arithi(s, ARITH_SBB, args[1], args[5], 1); + } else { + tgen_arithr(s, ARITH_SBB, args[1], args[5]); + } + break; +#else /* TCG_TARGET_REG_BITS == 64 */ + case INDEX_op_movi_i64: + tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]); + break; + case INDEX_op_ld32s_i64: + tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]); + break; + case INDEX_op_ld_i64: + tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]); + break; + case INDEX_op_st_i64: + tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); + break; + case INDEX_op_qemu_ld32s: + tcg_out_qemu_ld(s, args, 2 | 4); + break; + + case INDEX_op_brcond_i64: + tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1], + args[3], 0); + break; + case INDEX_op_setcond_i64: + tcg_out_setcond64(s, args[3], args[0], args[1], + args[2], const_args[2]); + break; + + case INDEX_op_bswap64_i64: + tcg_out_bswap64(s, args[0]); + break; + case INDEX_op_ext32u_i64: + tcg_out_ext32u(s, args[0], args[1]); + break; + case INDEX_op_ext32s_i64: + tcg_out_ext32s(s, args[0], args[1]); + break; +#endif + + default: + tcg_abort(); + } + +#undef OP_32_64 +} + +static const TCGTargetOpDef x86_op_defs[] = { + { INDEX_op_exit_tb, { } }, + { INDEX_op_goto_tb, { } }, + { INDEX_op_call, { "ri" } }, + { INDEX_op_jmp, { "ri" } }, + { INDEX_op_br, { } }, + { INDEX_op_mov_i32, { "r", "r" } }, + { INDEX_op_movi_i32, { "r" } }, + { INDEX_op_ld8u_i32, { "r", "r" } }, + { INDEX_op_ld8s_i32, { "r", "r" } }, + { INDEX_op_ld16u_i32, { "r", "r" } }, + { INDEX_op_ld16s_i32, { "r", "r" } }, + { INDEX_op_ld_i32, { "r", "r" } }, + { INDEX_op_st8_i32, { "q", "r" } }, + { INDEX_op_st16_i32, { "r", "r" } }, + { INDEX_op_st_i32, { "r", "r" } }, + + { INDEX_op_add_i32, { "r", "r", "ri" } }, + { INDEX_op_sub_i32, { "r", "0", "ri" } }, + { INDEX_op_mul_i32, { "r", "0", "ri" } }, + { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } }, + { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } }, + { INDEX_op_and_i32, { "r", "0", "ri" } }, + { INDEX_op_or_i32, { "r", "0", "ri" } }, + { INDEX_op_xor_i32, { "r", "0", "ri" } }, + + { INDEX_op_shl_i32, { "r", "0", "ci" } }, + { INDEX_op_shr_i32, { "r", "0", "ci" } }, + { INDEX_op_sar_i32, { "r", "0", "ci" } }, + { INDEX_op_rotl_i32, { "r", "0", "ci" } }, + { INDEX_op_rotr_i32, { "r", "0", "ci" } }, + + { INDEX_op_brcond_i32, { "r", "ri" } }, + + { INDEX_op_bswap16_i32, { "r", "0" } }, + { INDEX_op_bswap32_i32, { "r", "0" } }, + + { INDEX_op_neg_i32, { "r", "0" } }, + + { INDEX_op_not_i32, { "r", "0" } }, + + { INDEX_op_ext8s_i32, { "r", "q" } }, + { INDEX_op_ext16s_i32, { "r", "r" } }, + { INDEX_op_ext8u_i32, { "r", "q" } }, + { INDEX_op_ext16u_i32, { "r", "r" } }, + + { INDEX_op_setcond_i32, { "q", "r", "ri" } }, + +#if TCG_TARGET_REG_BITS == 32 + { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } }, + { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } }, + { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } }, + { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } }, + { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } }, +#else + { INDEX_op_mov_i64, { "r", "r" } }, + { INDEX_op_movi_i64, { "r" } }, + { INDEX_op_ld8u_i64, { "r", "r" } }, + { INDEX_op_ld8s_i64, { "r", "r" } }, + { INDEX_op_ld16u_i64, { "r", "r" } }, + { INDEX_op_ld16s_i64, { "r", "r" } }, + { INDEX_op_ld32u_i64, { "r", "r" } }, + { INDEX_op_ld32s_i64, { "r", "r" } }, + { INDEX_op_ld_i64, { "r", "r" } }, + { INDEX_op_st8_i64, { "r", "r" } }, + { INDEX_op_st16_i64, { "r", "r" } }, + { INDEX_op_st32_i64, { "r", "r" } }, + { INDEX_op_st_i64, { "r", "r" } }, + + { INDEX_op_add_i64, { "r", "0", "re" } }, + { INDEX_op_mul_i64, { "r", "0", "re" } }, + { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } }, + { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } }, + { INDEX_op_sub_i64, { "r", "0", "re" } }, + { INDEX_op_and_i64, { "r", "0", "reZ" } }, + { INDEX_op_or_i64, { "r", "0", "re" } }, + { INDEX_op_xor_i64, { "r", "0", "re" } }, + + { INDEX_op_shl_i64, { "r", "0", "ci" } }, + { INDEX_op_shr_i64, { "r", "0", "ci" } }, + { INDEX_op_sar_i64, { "r", "0", "ci" } }, + { INDEX_op_rotl_i64, { "r", "0", "ci" } }, + { INDEX_op_rotr_i64, { "r", "0", "ci" } }, + + { INDEX_op_brcond_i64, { "r", "re" } }, + { INDEX_op_setcond_i64, { "r", "r", "re" } }, + + { INDEX_op_bswap16_i64, { "r", "0" } }, + { INDEX_op_bswap32_i64, { "r", "0" } }, + { INDEX_op_bswap64_i64, { "r", "0" } }, + { INDEX_op_neg_i64, { "r", "0" } }, + { INDEX_op_not_i64, { "r", "0" } }, + + { INDEX_op_ext8s_i64, { "r", "r" } }, + { INDEX_op_ext16s_i64, { "r", "r" } }, + { INDEX_op_ext32s_i64, { "r", "r" } }, + { INDEX_op_ext8u_i64, { "r", "r" } }, + { INDEX_op_ext16u_i64, { "r", "r" } }, + { INDEX_op_ext32u_i64, { "r", "r" } }, +#endif + +#if TCG_TARGET_REG_BITS == 64 + { INDEX_op_qemu_ld8u, { "r", "L" } }, + { INDEX_op_qemu_ld8s, { "r", "L" } }, + { INDEX_op_qemu_ld16u, { "r", "L" } }, + { INDEX_op_qemu_ld16s, { "r", "L" } }, + { INDEX_op_qemu_ld32, { "r", "L" } }, + { INDEX_op_qemu_ld32u, { "r", "L" } }, + { INDEX_op_qemu_ld32s, { "r", "L" } }, + { INDEX_op_qemu_ld64, { "r", "L" } }, + + { INDEX_op_qemu_st8, { "L", "L" } }, + { INDEX_op_qemu_st16, { "L", "L" } }, + { INDEX_op_qemu_st32, { "L", "L" } }, + { INDEX_op_qemu_st64, { "L", "L" } }, +#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS + { INDEX_op_qemu_ld8u, { "r", "L" } }, + { INDEX_op_qemu_ld8s, { "r", "L" } }, + { INDEX_op_qemu_ld16u, { "r", "L" } }, + { INDEX_op_qemu_ld16s, { "r", "L" } }, + { INDEX_op_qemu_ld32, { "r", "L" } }, + { INDEX_op_qemu_ld64, { "r", "r", "L" } }, + + { INDEX_op_qemu_st8, { "cb", "L" } }, + { INDEX_op_qemu_st16, { "L", "L" } }, + { INDEX_op_qemu_st32, { "L", "L" } }, + { INDEX_op_qemu_st64, { "L", "L", "L" } }, +#else + { INDEX_op_qemu_ld8u, { "r", "L", "L" } }, + { INDEX_op_qemu_ld8s, { "r", "L", "L" } }, + { INDEX_op_qemu_ld16u, { "r", "L", "L" } }, + { INDEX_op_qemu_ld16s, { "r", "L", "L" } }, + { INDEX_op_qemu_ld32, { "r", "L", "L" } }, + { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } }, + + { INDEX_op_qemu_st8, { "cb", "L", "L" } }, + { INDEX_op_qemu_st16, { "L", "L", "L" } }, + { INDEX_op_qemu_st32, { "L", "L", "L" } }, + { INDEX_op_qemu_st64, { "L", "L", "L", "L" } }, +#endif + { -1 }, +}; + +static int tcg_target_callee_save_regs[] = { +#if TCG_TARGET_REG_BITS == 64 + TCG_REG_RBP, + TCG_REG_RBX, +# if defined(VBOX) && defined(__MINGW64__) + TCG_REG_RSI, + TCG_REG_RDI, +# endif + TCG_REG_R12, + TCG_REG_R13, + /* TCG_REG_R14, */ /* Currently used for the global env. */ + TCG_REG_R15, +#else +# ifndef VBOX + /* TCG_REG_EBP, */ /* Currently used for the global env. */ + TCG_REG_EBX, + TCG_REG_ESI, + TCG_REG_EDI, +# else + TCG_REG_EBP, + TCG_REG_EBX, + /* TCG_REG_ESI, */ /* Currently used for the global env. */ + TCG_REG_EDI, +# endif +#endif +}; + +/* Generate global QEMU prologue and epilogue code */ +static void tcg_target_qemu_prologue(TCGContext *s) +{ + int i, frame_size, push_size, stack_addend; + + /* TB prologue */ + + /* Save all callee saved registers. */ + for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) { + tcg_out_push(s, tcg_target_callee_save_regs[i]); + } +# if defined(VBOX_STRICT) && defined(RT_ARCH_X86) + tcg_out8(s, 0x31); /* xor ebp, ebp */ + tcg_out8(s, 0xed); +# endif + + /* Reserve some stack space. */ + push_size = 1 + ARRAY_SIZE(tcg_target_callee_save_regs); + push_size *= TCG_TARGET_REG_BITS / 8; + + frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE; +#if defined(VBOX) && defined(__MINGW64__) + frame_size += TCG_TARGET_CALL_STACK_OFFSET; +#endif + frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) & + ~(TCG_TARGET_STACK_ALIGN - 1); + stack_addend = frame_size - push_size; + tcg_out_addi(s, TCG_REG_ESP, -stack_addend); + + /* jmp *tb. */ + tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[0]); +# ifdef VBOX + tcg_gen_stack_alignment_check(s); +# endif + + tcg_out_modrm(s, 0xff, 4, TCG_REG_EAX); /* jmp *%eax */ + + /* TB epilogue */ + tb_ret_addr = s->code_ptr; + + tcg_out_addi(s, TCG_REG_ESP, stack_addend); + + for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) { + tcg_out_pop(s, tcg_target_callee_save_regs[i]); + } + tcg_out_opc(s, OPC_RET, 0, 0, 0); +} + +static void tcg_target_init(TCGContext *s) +{ +#if !defined(CONFIG_USER_ONLY) + /* fail safe */ + if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry)) + tcg_abort(); +#endif + + if (TCG_TARGET_REG_BITS == 64) { + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff); + } else { + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff); + } + + tcg_regset_clear(tcg_target_call_clobber_regs); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX); + if (TCG_TARGET_REG_BITS == 64) { +# if !defined(VBOX) || !defined(__MINGW64__) + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI); +# endif + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10); + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); + } + + tcg_regset_clear(s->reserved_regs); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_ESP); + + tcg_add_target_add_op_defs(x86_op_defs); +} diff --git a/src/recompiler/tcg/i386/tcg-target.h b/src/recompiler/tcg/i386/tcg-target.h new file mode 100644 index 00000000..e812bc58 --- /dev/null +++ b/src/recompiler/tcg/i386/tcg-target.h @@ -0,0 +1,134 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#define TCG_TARGET_I386 1 + +#if defined(__x86_64__) +# define TCG_TARGET_REG_BITS 64 +#else +# define TCG_TARGET_REG_BITS 32 +#endif +//#define TCG_TARGET_WORDS_BIGENDIAN + +#if TCG_TARGET_REG_BITS == 64 +# define TCG_TARGET_NB_REGS 16 +#else +# define TCG_TARGET_NB_REGS 8 +#endif + +enum { + TCG_REG_EAX = 0, + TCG_REG_ECX, + TCG_REG_EDX, + TCG_REG_EBX, + TCG_REG_ESP, + TCG_REG_EBP, + TCG_REG_ESI, + TCG_REG_EDI, + + /* 64-bit registers; always define the symbols to avoid + too much if-deffing. */ + TCG_REG_R8, + TCG_REG_R9, + TCG_REG_R10, + TCG_REG_R11, + TCG_REG_R12, + TCG_REG_R13, + TCG_REG_R14, + TCG_REG_R15, + TCG_REG_RAX = TCG_REG_EAX, + TCG_REG_RCX = TCG_REG_ECX, + TCG_REG_RDX = TCG_REG_EDX, + TCG_REG_RBX = TCG_REG_EBX, + TCG_REG_RSP = TCG_REG_ESP, + TCG_REG_RBP = TCG_REG_EBP, + TCG_REG_RSI = TCG_REG_ESI, + TCG_REG_RDI = TCG_REG_EDI, +}; + +#define TCG_CT_CONST_S32 0x100 +#define TCG_CT_CONST_U32 0x200 + +/* used for function call generation */ +#define TCG_REG_CALL_STACK TCG_REG_ESP +#define TCG_TARGET_STACK_ALIGN 16 +#if defined(VBOX) && defined(__MINGW64__) +# define TCG_TARGET_CALL_STACK_OFFSET 32 /* 4 qword argument/register spill zone */ +#else +#define TCG_TARGET_CALL_STACK_OFFSET 0 +#endif + +/* optional instructions */ +#define TCG_TARGET_HAS_div2_i32 +#define TCG_TARGET_HAS_rot_i32 +#define TCG_TARGET_HAS_ext8s_i32 +#define TCG_TARGET_HAS_ext16s_i32 +#define TCG_TARGET_HAS_ext8u_i32 +#define TCG_TARGET_HAS_ext16u_i32 +#define TCG_TARGET_HAS_bswap16_i32 +#define TCG_TARGET_HAS_bswap32_i32 +#define TCG_TARGET_HAS_neg_i32 +#define TCG_TARGET_HAS_not_i32 +// #define TCG_TARGET_HAS_andc_i32 +// #define TCG_TARGET_HAS_orc_i32 +// #define TCG_TARGET_HAS_eqv_i32 +// #define TCG_TARGET_HAS_nand_i32 +// #define TCG_TARGET_HAS_nor_i32 + +#if TCG_TARGET_REG_BITS == 64 +#define TCG_TARGET_HAS_div2_i64 +#define TCG_TARGET_HAS_rot_i64 +#define TCG_TARGET_HAS_ext8s_i64 +#define TCG_TARGET_HAS_ext16s_i64 +#define TCG_TARGET_HAS_ext32s_i64 +#define TCG_TARGET_HAS_ext8u_i64 +#define TCG_TARGET_HAS_ext16u_i64 +#define TCG_TARGET_HAS_ext32u_i64 +#define TCG_TARGET_HAS_bswap16_i64 +#define TCG_TARGET_HAS_bswap32_i64 +#define TCG_TARGET_HAS_bswap64_i64 +#define TCG_TARGET_HAS_neg_i64 +#define TCG_TARGET_HAS_not_i64 +// #define TCG_TARGET_HAS_andc_i64 +// #define TCG_TARGET_HAS_orc_i64 +// #define TCG_TARGET_HAS_eqv_i64 +// #define TCG_TARGET_HAS_nand_i64 +// #define TCG_TARGET_HAS_nor_i64 +#endif + +#define TCG_TARGET_HAS_GUEST_BASE + +/* Note: must be synced with dyngen-exec.h */ +#if TCG_TARGET_REG_BITS == 64 +# define TCG_AREG0 TCG_REG_R14 +#else +# ifndef VBOX /* we're using ESI instead of EBP, probably due to frame pointer opt issues */ +# define TCG_AREG0 TCG_REG_EBP +# else /* VBOX */ +# define TCG_AREG0 TCG_REG_ESI +# endif /* VBOX */ +#endif + +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ +} diff --git a/src/recompiler/tcg/tcg-dyngen.c b/src/recompiler/tcg/tcg-dyngen.c new file mode 100644 index 00000000..b068a0a7 --- /dev/null +++ b/src/recompiler/tcg/tcg-dyngen.c @@ -0,0 +1,437 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef VBOX +#include <assert.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#else +# include <stdio.h> +# include "osdep.h" +#endif + +#include "config.h" +#include "osdep.h" + +#include "tcg.h" + +int __op_param1, __op_param2, __op_param3; +#if defined(__sparc__) || defined(__arm__) + void __op_gen_label1(){} + void __op_gen_label2(){} + void __op_gen_label3(){} +#else + int __op_gen_label1, __op_gen_label2, __op_gen_label3; +#endif +int __op_jmp0, __op_jmp1, __op_jmp2, __op_jmp3; + +#if 0 +#if defined(__s390__) +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ +} +#elif defined(__ia64__) +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ + while (start < stop) { + asm volatile ("fc %0" :: "r"(start)); + start += 32; + } + asm volatile (";;sync.i;;srlz.i;;"); +} +#elif defined(__powerpc__) + +#define MIN_CACHE_LINE_SIZE 8 /* conservative value */ + +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ + uintptr_t p; + + start &= ~(MIN_CACHE_LINE_SIZE - 1); + stop = (stop + MIN_CACHE_LINE_SIZE - 1) & ~(MIN_CACHE_LINE_SIZE - 1); + + for (p = start; p < stop; p += MIN_CACHE_LINE_SIZE) { + asm volatile ("dcbst 0,%0" : : "r"(p) : "memory"); + } + asm volatile ("sync" : : : "memory"); + for (p = start; p < stop; p += MIN_CACHE_LINE_SIZE) { + asm volatile ("icbi 0,%0" : : "r"(p) : "memory"); + } + asm volatile ("sync" : : : "memory"); + asm volatile ("isync" : : : "memory"); +} +#elif defined(__alpha__) +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ + asm ("imb"); +} +#elif defined(__sparc__) +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ + uintptr_t p; + + p = start & ~(8UL - 1UL); + stop = (stop + (8UL - 1UL)) & ~(8UL - 1UL); + + for (; p < stop; p += 8) + __asm__ __volatile__("flush\t%0" : : "r" (p)); +} +#elif defined(__arm__) +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ + register uintptr_t _beg __asm ("a1") = start; + register uintptr_t _end __asm ("a2") = stop; + register uintptr_t _flg __asm ("a3") = 0; + __asm __volatile__ ("swi 0x9f0002" : : "r" (_beg), "r" (_end), "r" (_flg)); +} +#elif defined(__mc68000) + +# include <asm/cachectl.h> +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ + cacheflush(start,FLUSH_SCOPE_LINE,FLUSH_CACHE_BOTH,stop-start+16); +} +#elif defined(__mips__) + +#include <sys/cachectl.h> +static inline void flush_icache_range(uintptr_t start, uintptr_t stop) +{ + _flush_cache ((void *)start, stop - start, BCACHE); +} +#else +#error unsupported CPU +#endif + +#ifdef __alpha__ + +register int gp asm("$29"); + +static inline void immediate_ldah(void *p, int val) { + uint32_t *dest = p; + long high = ((val >> 16) + ((val >> 15) & 1)) & 0xffff; + + *dest &= ~0xffff; + *dest |= high; + *dest |= 31 << 16; +} +static inline void immediate_lda(void *dest, int val) { + *(uint16_t *) dest = val; +} +void fix_bsr(void *p, int offset) { + uint32_t *dest = p; + *dest &= ~((1 << 21) - 1); + *dest |= (offset >> 2) & ((1 << 21) - 1); +} + +#endif /* __alpha__ */ + +#ifdef __ia64 + +/* Patch instruction with "val" where "mask" has 1 bits. */ +static inline void ia64_patch (uint64_t insn_addr, uint64_t mask, uint64_t val) +{ + uint64_t m0, m1, v0, v1, b0, b1, *b = (uint64_t *) (insn_addr & -16); +# define insn_mask ((1UL << 41) - 1) + uintptr_t shift; + + b0 = b[0]; b1 = b[1]; + shift = 5 + 41 * (insn_addr % 16); /* 5 template, 3 x 41-bit insns */ + if (shift >= 64) { + m1 = mask << (shift - 64); + v1 = val << (shift - 64); + } else { + m0 = mask << shift; m1 = mask >> (64 - shift); + v0 = val << shift; v1 = val >> (64 - shift); + b[0] = (b0 & ~m0) | (v0 & m0); + } + b[1] = (b1 & ~m1) | (v1 & m1); +} + +static inline void ia64_patch_imm60 (uint64_t insn_addr, uint64_t val) +{ + ia64_patch(insn_addr, + 0x011ffffe000UL, + ( ((val & 0x0800000000000000UL) >> 23) /* bit 59 -> 36 */ + | ((val & 0x00000000000fffffUL) << 13) /* bit 0 -> 13 */)); + ia64_patch(insn_addr - 1, 0x1fffffffffcUL, val >> 18); +} + +static inline void ia64_imm64 (void *insn, uint64_t val) +{ + /* Ignore the slot number of the relocation; GCC and Intel + toolchains differed for some time on whether IMM64 relocs are + against slot 1 (Intel) or slot 2 (GCC). */ + uint64_t insn_addr = (uint64_t) insn & ~3UL; + + ia64_patch(insn_addr + 2, + 0x01fffefe000UL, + ( ((val & 0x8000000000000000UL) >> 27) /* bit 63 -> 36 */ + | ((val & 0x0000000000200000UL) << 0) /* bit 21 -> 21 */ + | ((val & 0x00000000001f0000UL) << 6) /* bit 16 -> 22 */ + | ((val & 0x000000000000ff80UL) << 20) /* bit 7 -> 27 */ + | ((val & 0x000000000000007fUL) << 13) /* bit 0 -> 13 */) + ); + ia64_patch(insn_addr + 1, 0x1ffffffffffUL, val >> 22); +} + +static inline void ia64_imm60b (void *insn, uint64_t val) +{ + /* Ignore the slot number of the relocation; GCC and Intel + toolchains differed for some time on whether IMM64 relocs are + against slot 1 (Intel) or slot 2 (GCC). */ + uint64_t insn_addr = (uint64_t) insn & ~3UL; + + if (val + ((uint64_t) 1 << 59) >= (1UL << 60)) + fprintf(stderr, "%s: value %ld out of IMM60 range\n", + __FUNCTION__, (int64_t) val); + ia64_patch_imm60(insn_addr + 2, val); +} + +static inline void ia64_imm22 (void *insn, uint64_t val) +{ + if (val + (1 << 21) >= (1 << 22)) + fprintf(stderr, "%s: value %li out of IMM22 range\n", + __FUNCTION__, (int64_t)val); + ia64_patch((uint64_t) insn, 0x01fffcfe000UL, + ( ((val & 0x200000UL) << 15) /* bit 21 -> 36 */ + | ((val & 0x1f0000UL) << 6) /* bit 16 -> 22 */ + | ((val & 0x00ff80UL) << 20) /* bit 7 -> 27 */ + | ((val & 0x00007fUL) << 13) /* bit 0 -> 13 */)); +} + +/* Like ia64_imm22(), but also clear bits 20-21. For addl, this has + the effect of turning "addl rX=imm22,rY" into "addl + rX=imm22,r0". */ +static inline void ia64_imm22_r0 (void *insn, uint64_t val) +{ + if (val + (1 << 21) >= (1 << 22)) + fprintf(stderr, "%s: value %li out of IMM22 range\n", + __FUNCTION__, (int64_t)val); + ia64_patch((uint64_t) insn, 0x01fffcfe000UL | (0x3UL << 20), + ( ((val & 0x200000UL) << 15) /* bit 21 -> 36 */ + | ((val & 0x1f0000UL) << 6) /* bit 16 -> 22 */ + | ((val & 0x00ff80UL) << 20) /* bit 7 -> 27 */ + | ((val & 0x00007fUL) << 13) /* bit 0 -> 13 */)); +} + +static inline void ia64_imm21b (void *insn, uint64_t val) +{ + if (val + (1 << 20) >= (1 << 21)) + fprintf(stderr, "%s: value %li out of IMM21b range\n", + __FUNCTION__, (int64_t)val); + ia64_patch((uint64_t) insn, 0x11ffffe000UL, + ( ((val & 0x100000UL) << 16) /* bit 20 -> 36 */ + | ((val & 0x0fffffUL) << 13) /* bit 0 -> 13 */)); +} + +static inline void ia64_nop_b (void *insn) +{ + ia64_patch((uint64_t) insn, (1UL << 41) - 1, 2UL << 37); +} + +static inline void ia64_ldxmov(void *insn, uint64_t val) +{ + if (val + (1 << 21) < (1 << 22)) + ia64_patch((uint64_t) insn, 0x1fff80fe000UL, 8UL << 37); +} + +static inline int ia64_patch_ltoff(void *insn, uint64_t val, + int relaxable) +{ + if (relaxable && (val + (1 << 21) < (1 << 22))) { + ia64_imm22_r0(insn, val); + return 0; + } + return 1; +} + +struct ia64_fixup { + struct ia64_fixup *next; + void *addr; /* address that needs to be patched */ + long value; +}; + +#define IA64_PLT(insn, plt_index) \ +do { \ + struct ia64_fixup *fixup = alloca(sizeof(*fixup)); \ + fixup->next = plt_fixes; \ + plt_fixes = fixup; \ + fixup->addr = (insn); \ + fixup->value = (plt_index); \ + plt_offset[(plt_index)] = 1; \ +} while (0) + +#define IA64_LTOFF(insn, val, relaxable) \ +do { \ + if (ia64_patch_ltoff(insn, val, relaxable)) { \ + struct ia64_fixup *fixup = alloca(sizeof(*fixup)); \ + fixup->next = ltoff_fixes; \ + ltoff_fixes = fixup; \ + fixup->addr = (insn); \ + fixup->value = (val); \ + } \ +} while (0) + +static inline void ia64_apply_fixes (uint8_t **gen_code_pp, + struct ia64_fixup *ltoff_fixes, + uint64_t gp, + struct ia64_fixup *plt_fixes, + int num_plts, + uintptr_t *plt_target, + unsigned int *plt_offset) +{ + static const uint8_t plt_bundle[] = { + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, /* nop 0; movl r1=GP */ + 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x60, + + 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, /* nop 0; brl IP */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0 + }; + uint8_t *gen_code_ptr = *gen_code_pp, *plt_start, *got_start; + uint64_t *vp; + struct ia64_fixup *fixup; + unsigned int offset = 0; + struct fdesc { + long ip; + long gp; + } *fdesc; + int i; + + if (plt_fixes) { + plt_start = gen_code_ptr; + + for (i = 0; i < num_plts; ++i) { + if (plt_offset[i]) { + plt_offset[i] = offset; + offset += sizeof(plt_bundle); + + fdesc = (struct fdesc *) plt_target[i]; + memcpy(gen_code_ptr, plt_bundle, sizeof(plt_bundle)); + ia64_imm64 (gen_code_ptr + 0x02, fdesc->gp); + ia64_imm60b(gen_code_ptr + 0x12, + (fdesc->ip - (long) (gen_code_ptr + 0x10)) >> 4); + gen_code_ptr += sizeof(plt_bundle); + } + } + + for (fixup = plt_fixes; fixup; fixup = fixup->next) + ia64_imm21b(fixup->addr, + ((long) plt_start + plt_offset[fixup->value] + - ((long) fixup->addr & ~0xf)) >> 4); + } + + got_start = gen_code_ptr; + + /* First, create the GOT: */ + for (fixup = ltoff_fixes; fixup; fixup = fixup->next) { + /* first check if we already have this value in the GOT: */ + for (vp = (uint64_t *) got_start; vp < (uint64_t *) gen_code_ptr; ++vp) + if (*vp == fixup->value) + break; + if (vp == (uint64_t *) gen_code_ptr) { + /* Nope, we need to put the value in the GOT: */ + *vp = fixup->value; + gen_code_ptr += 8; + } + ia64_imm22(fixup->addr, (long) vp - gp); + } + /* Keep code ptr aligned. */ + if ((long) gen_code_ptr & 15) + gen_code_ptr += 8; + *gen_code_pp = gen_code_ptr; +} +#endif +#endif + +#ifdef CONFIG_DYNGEN_OP + +#if defined __hppa__ +struct hppa_branch_stub { + uint32_t *location; + long target; + struct hppa_branch_stub *next; +}; + +#define HPPA_RECORD_BRANCH(LIST, LOC, TARGET) \ +do { \ + struct hppa_branch_stub *stub = alloca(sizeof(struct hppa_branch_stub)); \ + stub->location = LOC; \ + stub->target = TARGET; \ + stub->next = LIST; \ + LIST = stub; \ +} while (0) + +static inline void hppa_process_stubs(struct hppa_branch_stub *stub, + uint8_t **gen_code_pp) +{ + uint32_t *s = (uint32_t *)*gen_code_pp; + uint32_t *p = s + 1; + + if (!stub) return; + + for (; stub != NULL; stub = stub->next) { + uintptr_t l = (uintptr_t)p; + /* stub: + * ldil L'target, %r1 + * be,n R'target(%sr4,%r1) + */ + *p++ = 0x20200000 | reassemble_21(lrsel(stub->target, 0)); + *p++ = 0xe0202002 | (reassemble_17(rrsel(stub->target, 0) >> 2)); + hppa_patch17f(stub->location, l, 0); + } + /* b,l,n stub,%r0 */ + *s = 0xe8000002 | reassemble_17((p - s) - 2); + *gen_code_pp = (uint8_t *)p; +} +#endif /* __hppa__ */ + +const TCGArg *dyngen_op(TCGContext *s, int opc, const TCGArg *opparam_ptr) +{ + uint8_t *gen_code_ptr; + +#ifdef __hppa__ + struct hppa_branch_stub *hppa_stubs = NULL; +#endif + + gen_code_ptr = s->code_ptr; + switch(opc) { + +/* op.h is dynamically generated by dyngen.c from op.c */ +#include "op.h" + + default: + tcg_abort(); + } + +#ifdef __hppa__ + hppa_process_stubs(hppa_stubs, &gen_code_ptr); +#endif + + s->code_ptr = gen_code_ptr; + return opparam_ptr; +} +#endif diff --git a/src/recompiler/tcg/tcg-op.h b/src/recompiler/tcg/tcg-op.h new file mode 100644 index 00000000..b3890804 --- /dev/null +++ b/src/recompiler/tcg/tcg-op.h @@ -0,0 +1,2469 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "tcg.h" + +int gen_new_label(void); + +static inline void tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 arg1) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); +} + +static inline void tcg_gen_op1_i64(TCGOpcode opc, TCGv_i64 arg1) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); +} + +static inline void tcg_gen_op1i(TCGOpcode opc, TCGArg arg1) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = arg1; +} + +static inline void tcg_gen_op2_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); + *gen_opparam_ptr++ = GET_TCGV_I32(arg2); +} + +static inline void tcg_gen_op2_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); + *gen_opparam_ptr++ = GET_TCGV_I64(arg2); +} + +static inline void tcg_gen_op2i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGArg arg2) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); + *gen_opparam_ptr++ = arg2; +} + +static inline void tcg_gen_op2i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGArg arg2) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); + *gen_opparam_ptr++ = arg2; +} + +static inline void tcg_gen_op2ii(TCGOpcode opc, TCGArg arg1, TCGArg arg2) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = arg1; + *gen_opparam_ptr++ = arg2; +} + +static inline void tcg_gen_op3_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2, + TCGv_i32 arg3) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); + *gen_opparam_ptr++ = GET_TCGV_I32(arg2); + *gen_opparam_ptr++ = GET_TCGV_I32(arg3); +} + +static inline void tcg_gen_op3_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2, + TCGv_i64 arg3) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); + *gen_opparam_ptr++ = GET_TCGV_I64(arg2); + *gen_opparam_ptr++ = GET_TCGV_I64(arg3); +} + +static inline void tcg_gen_op3i_i32(TCGOpcode opc, TCGv_i32 arg1, + TCGv_i32 arg2, TCGArg arg3) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); + *gen_opparam_ptr++ = GET_TCGV_I32(arg2); + *gen_opparam_ptr++ = arg3; +} + +static inline void tcg_gen_op3i_i64(TCGOpcode opc, TCGv_i64 arg1, + TCGv_i64 arg2, TCGArg arg3) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); + *gen_opparam_ptr++ = GET_TCGV_I64(arg2); + *gen_opparam_ptr++ = arg3; +} + +static inline void tcg_gen_ldst_op_i32(TCGOpcode opc, TCGv_i32 val, + TCGv_ptr base, TCGArg offset) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(val); + *gen_opparam_ptr++ = GET_TCGV_PTR(base); + *gen_opparam_ptr++ = offset; +} + +static inline void tcg_gen_ldst_op_i64(TCGOpcode opc, TCGv_i64 val, + TCGv_ptr base, TCGArg offset) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(val); + *gen_opparam_ptr++ = GET_TCGV_PTR(base); + *gen_opparam_ptr++ = offset; +} + +static inline void tcg_gen_qemu_ldst_op_i64_i32(TCGOpcode opc, TCGv_i64 val, + TCGv_i32 addr, TCGArg mem_index) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(val); + *gen_opparam_ptr++ = GET_TCGV_I32(addr); + *gen_opparam_ptr++ = mem_index; +} + +static inline void tcg_gen_qemu_ldst_op_i64_i64(TCGOpcode opc, TCGv_i64 val, + TCGv_i64 addr, TCGArg mem_index) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(val); + *gen_opparam_ptr++ = GET_TCGV_I64(addr); + *gen_opparam_ptr++ = mem_index; +} + +static inline void tcg_gen_op4_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2, + TCGv_i32 arg3, TCGv_i32 arg4) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); + *gen_opparam_ptr++ = GET_TCGV_I32(arg2); + *gen_opparam_ptr++ = GET_TCGV_I32(arg3); + *gen_opparam_ptr++ = GET_TCGV_I32(arg4); +} + +static inline void tcg_gen_op4_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2, + TCGv_i64 arg3, TCGv_i64 arg4) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); + *gen_opparam_ptr++ = GET_TCGV_I64(arg2); + *gen_opparam_ptr++ = GET_TCGV_I64(arg3); + *gen_opparam_ptr++ = GET_TCGV_I64(arg4); +} + +static inline void tcg_gen_op4i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2, + TCGv_i32 arg3, TCGArg arg4) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); + *gen_opparam_ptr++ = GET_TCGV_I32(arg2); + *gen_opparam_ptr++ = GET_TCGV_I32(arg3); + *gen_opparam_ptr++ = arg4; +} + +static inline void tcg_gen_op4i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2, + TCGv_i64 arg3, TCGArg arg4) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); + *gen_opparam_ptr++ = GET_TCGV_I64(arg2); + *gen_opparam_ptr++ = GET_TCGV_I64(arg3); + *gen_opparam_ptr++ = arg4; +} + +static inline void tcg_gen_op4ii_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2, + TCGArg arg3, TCGArg arg4) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); + *gen_opparam_ptr++ = GET_TCGV_I32(arg2); + *gen_opparam_ptr++ = arg3; + *gen_opparam_ptr++ = arg4; +} + +static inline void tcg_gen_op4ii_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2, + TCGArg arg3, TCGArg arg4) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); + *gen_opparam_ptr++ = GET_TCGV_I64(arg2); + *gen_opparam_ptr++ = arg3; + *gen_opparam_ptr++ = arg4; +} + +static inline void tcg_gen_op5_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2, + TCGv_i32 arg3, TCGv_i32 arg4, TCGv_i32 arg5) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); + *gen_opparam_ptr++ = GET_TCGV_I32(arg2); + *gen_opparam_ptr++ = GET_TCGV_I32(arg3); + *gen_opparam_ptr++ = GET_TCGV_I32(arg4); + *gen_opparam_ptr++ = GET_TCGV_I32(arg5); +} + +static inline void tcg_gen_op5_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2, + TCGv_i64 arg3, TCGv_i64 arg4, TCGv_i64 arg5) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); + *gen_opparam_ptr++ = GET_TCGV_I64(arg2); + *gen_opparam_ptr++ = GET_TCGV_I64(arg3); + *gen_opparam_ptr++ = GET_TCGV_I64(arg4); + *gen_opparam_ptr++ = GET_TCGV_I64(arg5); +} + +static inline void tcg_gen_op5i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2, + TCGv_i32 arg3, TCGv_i32 arg4, TCGArg arg5) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); + *gen_opparam_ptr++ = GET_TCGV_I32(arg2); + *gen_opparam_ptr++ = GET_TCGV_I32(arg3); + *gen_opparam_ptr++ = GET_TCGV_I32(arg4); + *gen_opparam_ptr++ = arg5; +} + +static inline void tcg_gen_op5i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2, + TCGv_i64 arg3, TCGv_i64 arg4, TCGArg arg5) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); + *gen_opparam_ptr++ = GET_TCGV_I64(arg2); + *gen_opparam_ptr++ = GET_TCGV_I64(arg3); + *gen_opparam_ptr++ = GET_TCGV_I64(arg4); + *gen_opparam_ptr++ = arg5; +} + +static inline void tcg_gen_op6_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2, + TCGv_i32 arg3, TCGv_i32 arg4, TCGv_i32 arg5, + TCGv_i32 arg6) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); + *gen_opparam_ptr++ = GET_TCGV_I32(arg2); + *gen_opparam_ptr++ = GET_TCGV_I32(arg3); + *gen_opparam_ptr++ = GET_TCGV_I32(arg4); + *gen_opparam_ptr++ = GET_TCGV_I32(arg5); + *gen_opparam_ptr++ = GET_TCGV_I32(arg6); +} + +static inline void tcg_gen_op6_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2, + TCGv_i64 arg3, TCGv_i64 arg4, TCGv_i64 arg5, + TCGv_i64 arg6) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); + *gen_opparam_ptr++ = GET_TCGV_I64(arg2); + *gen_opparam_ptr++ = GET_TCGV_I64(arg3); + *gen_opparam_ptr++ = GET_TCGV_I64(arg4); + *gen_opparam_ptr++ = GET_TCGV_I64(arg5); + *gen_opparam_ptr++ = GET_TCGV_I64(arg6); +} + +static inline void tcg_gen_op6i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2, + TCGv_i32 arg3, TCGv_i32 arg4, + TCGv_i32 arg5, TCGArg arg6) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); + *gen_opparam_ptr++ = GET_TCGV_I32(arg2); + *gen_opparam_ptr++ = GET_TCGV_I32(arg3); + *gen_opparam_ptr++ = GET_TCGV_I32(arg4); + *gen_opparam_ptr++ = GET_TCGV_I32(arg5); + *gen_opparam_ptr++ = arg6; +} + +static inline void tcg_gen_op6i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2, + TCGv_i64 arg3, TCGv_i64 arg4, + TCGv_i64 arg5, TCGArg arg6) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); + *gen_opparam_ptr++ = GET_TCGV_I64(arg2); + *gen_opparam_ptr++ = GET_TCGV_I64(arg3); + *gen_opparam_ptr++ = GET_TCGV_I64(arg4); + *gen_opparam_ptr++ = GET_TCGV_I64(arg5); + *gen_opparam_ptr++ = arg6; +} + +static inline void tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 arg1, + TCGv_i32 arg2, TCGv_i32 arg3, + TCGv_i32 arg4, TCGArg arg5, TCGArg arg6) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I32(arg1); + *gen_opparam_ptr++ = GET_TCGV_I32(arg2); + *gen_opparam_ptr++ = GET_TCGV_I32(arg3); + *gen_opparam_ptr++ = GET_TCGV_I32(arg4); + *gen_opparam_ptr++ = arg5; + *gen_opparam_ptr++ = arg6; +} + +static inline void tcg_gen_op6ii_i64(TCGOpcode opc, TCGv_i64 arg1, + TCGv_i64 arg2, TCGv_i64 arg3, + TCGv_i64 arg4, TCGArg arg5, TCGArg arg6) +{ + *gen_opc_ptr++ = opc; + *gen_opparam_ptr++ = GET_TCGV_I64(arg1); + *gen_opparam_ptr++ = GET_TCGV_I64(arg2); + *gen_opparam_ptr++ = GET_TCGV_I64(arg3); + *gen_opparam_ptr++ = GET_TCGV_I64(arg4); + *gen_opparam_ptr++ = arg5; + *gen_opparam_ptr++ = arg6; +} + +static inline void gen_set_label(int n) +{ + tcg_gen_op1i(INDEX_op_set_label, n); +} + +static inline void tcg_gen_br(int label) +{ + tcg_gen_op1i(INDEX_op_br, label); +} + +static inline void tcg_gen_mov_i32(TCGv_i32 ret, TCGv_i32 arg) +{ + if (!TCGV_EQUAL_I32(ret, arg)) + tcg_gen_op2_i32(INDEX_op_mov_i32, ret, arg); +} + +static inline void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg) +{ + tcg_gen_op2i_i32(INDEX_op_movi_i32, ret, arg); +} + +/* A version of dh_sizemask from def-helper.h that doesn't rely on + preprocessor magic. */ +static inline int tcg_gen_sizemask(int n, int is_64bit, int is_signed) +{ + return (is_64bit << n*2) | (is_signed << (n*2 + 1)); +} + +/* helper calls */ +static inline void tcg_gen_helperN(void *func, int flags, int sizemask, + TCGArg ret, int nargs, TCGArg *args) +{ + TCGv_ptr fn; + fn = tcg_const_ptr((tcg_target_long)func); + tcg_gen_callN(&tcg_ctx, fn, flags, sizemask, ret, + nargs, args); + tcg_temp_free_ptr(fn); +} + +/* Note: Both tcg_gen_helper32() and tcg_gen_helper64() are currently + reserved for helpers in tcg-runtime.c. These helpers are all const + and pure, hence the call to tcg_gen_callN() with TCG_CALL_CONST | + TCG_CALL_PURE. This may need to be adjusted if these functions + start to be used with other helpers. */ +static inline void tcg_gen_helper32(void *func, int sizemask, TCGv_i32 ret, + TCGv_i32 a, TCGv_i32 b) +{ + TCGv_ptr fn; + TCGArg args[2]; + fn = tcg_const_ptr((tcg_target_long)func); + args[0] = GET_TCGV_I32(a); + args[1] = GET_TCGV_I32(b); + tcg_gen_callN(&tcg_ctx, fn, TCG_CALL_CONST | TCG_CALL_PURE, sizemask, + GET_TCGV_I32(ret), 2, args); + tcg_temp_free_ptr(fn); +} + +static inline void tcg_gen_helper64(void *func, int sizemask, TCGv_i64 ret, + TCGv_i64 a, TCGv_i64 b) +{ + TCGv_ptr fn; + TCGArg args[2]; + fn = tcg_const_ptr((tcg_target_long)func); + args[0] = GET_TCGV_I64(a); + args[1] = GET_TCGV_I64(b); + tcg_gen_callN(&tcg_ctx, fn, TCG_CALL_CONST | TCG_CALL_PURE, sizemask, + GET_TCGV_I64(ret), 2, args); + tcg_temp_free_ptr(fn); +} + +/* 32 bit ops */ + +static inline void tcg_gen_ld8u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_ld8u_i32, ret, arg2, offset); +} + +static inline void tcg_gen_ld8s_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_ld8s_i32, ret, arg2, offset); +} + +static inline void tcg_gen_ld16u_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_ld16u_i32, ret, arg2, offset); +} + +static inline void tcg_gen_ld16s_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_ld16s_i32, ret, arg2, offset); +} + +static inline void tcg_gen_ld_i32(TCGv_i32 ret, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_ld_i32, ret, arg2, offset); +} + +static inline void tcg_gen_st8_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_st8_i32, arg1, arg2, offset); +} + +static inline void tcg_gen_st16_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_st16_i32, arg1, arg2, offset); +} + +static inline void tcg_gen_st_i32(TCGv_i32 arg1, TCGv_ptr arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i32(INDEX_op_st_i32, arg1, arg2, offset); +} + +static inline void tcg_gen_add_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_add_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ + /* some cases can be optimized here */ + if (arg2 == 0) { + tcg_gen_mov_i32(ret, arg1); + } else { + TCGv_i32 t0 = tcg_const_i32(arg2); + tcg_gen_add_i32(ret, arg1, t0); + tcg_temp_free_i32(t0); + } +} + +static inline void tcg_gen_sub_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_sub_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2) +{ + TCGv_i32 t0 = tcg_const_i32(arg1); + tcg_gen_sub_i32(ret, t0, arg2); + tcg_temp_free_i32(t0); +} + +static inline void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ + /* some cases can be optimized here */ + if (arg2 == 0) { + tcg_gen_mov_i32(ret, arg1); + } else { + TCGv_i32 t0 = tcg_const_i32(arg2); + tcg_gen_sub_i32(ret, arg1, t0); + tcg_temp_free_i32(t0); + } +} + +static inline void tcg_gen_and_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + if (TCGV_EQUAL_I32(arg1, arg2)) { + tcg_gen_mov_i32(ret, arg1); + } else { + tcg_gen_op3_i32(INDEX_op_and_i32, ret, arg1, arg2); + } +} + +static inline void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ + /* some cases can be optimized here */ + if (arg2 == 0) { + tcg_gen_movi_i32(ret, 0); + } else if (arg2 == 0xffffffff) { + tcg_gen_mov_i32(ret, arg1); + } else { + TCGv_i32 t0 = tcg_const_i32(arg2); + tcg_gen_and_i32(ret, arg1, t0); + tcg_temp_free_i32(t0); + } +} + +static inline void tcg_gen_or_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + if (TCGV_EQUAL_I32(arg1, arg2)) { + tcg_gen_mov_i32(ret, arg1); + } else { + tcg_gen_op3_i32(INDEX_op_or_i32, ret, arg1, arg2); + } +} + +static inline void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ + /* some cases can be optimized here */ + if (arg2 == 0xffffffff) { + tcg_gen_movi_i32(ret, 0xffffffff); + } else if (arg2 == 0) { + tcg_gen_mov_i32(ret, arg1); + } else { + TCGv_i32 t0 = tcg_const_i32(arg2); + tcg_gen_or_i32(ret, arg1, t0); + tcg_temp_free_i32(t0); + } +} + +static inline void tcg_gen_xor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + if (TCGV_EQUAL_I32(arg1, arg2)) { + tcg_gen_movi_i32(ret, 0); + } else { + tcg_gen_op3_i32(INDEX_op_xor_i32, ret, arg1, arg2); + } +} + +static inline void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ + /* some cases can be optimized here */ + if (arg2 == 0) { + tcg_gen_mov_i32(ret, arg1); + } else { + TCGv_i32 t0 = tcg_const_i32(arg2); + tcg_gen_xor_i32(ret, arg1, t0); + tcg_temp_free_i32(t0); + } +} + +static inline void tcg_gen_shl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_shl_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ + if (arg2 == 0) { + tcg_gen_mov_i32(ret, arg1); + } else { + TCGv_i32 t0 = tcg_const_i32(arg2); + tcg_gen_shl_i32(ret, arg1, t0); + tcg_temp_free_i32(t0); + } +} + +static inline void tcg_gen_shr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_shr_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ + if (arg2 == 0) { + tcg_gen_mov_i32(ret, arg1); + } else { + TCGv_i32 t0 = tcg_const_i32(arg2); + tcg_gen_shr_i32(ret, arg1, t0); + tcg_temp_free_i32(t0); + } +} + +static inline void tcg_gen_sar_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_sar_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ + if (arg2 == 0) { + tcg_gen_mov_i32(ret, arg1); + } else { + TCGv_i32 t0 = tcg_const_i32(arg2); + tcg_gen_sar_i32(ret, arg1, t0); + tcg_temp_free_i32(t0); + } +} + +static inline void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, + TCGv_i32 arg2, int label_index) +{ + tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_index); +} + +static inline void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, + int32_t arg2, int label_index) +{ + TCGv_i32 t0 = tcg_const_i32(arg2); + tcg_gen_brcond_i32(cond, arg1, t0, label_index); + tcg_temp_free_i32(t0); +} + +static inline void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret, + TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond); +} + +static inline void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret, + TCGv_i32 arg1, int32_t arg2) +{ + TCGv_i32 t0 = tcg_const_i32(arg2); + tcg_gen_setcond_i32(cond, ret, arg1, t0); + tcg_temp_free_i32(t0); +} + +static inline void tcg_gen_mul_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_mul_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ + TCGv_i32 t0 = tcg_const_i32(arg2); + tcg_gen_mul_i32(ret, arg1, t0); + tcg_temp_free_i32(t0); +} + +#ifdef TCG_TARGET_HAS_div_i32 +static inline void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_div_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_rem_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_divu_i32, ret, arg1, arg2); +} + +static inline void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + tcg_gen_op3_i32(INDEX_op_remu_i32, ret, arg1, arg2); +} +#elif defined(TCG_TARGET_HAS_div2_i32) +static inline void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + TCGv_i32 t0; + t0 = tcg_temp_new_i32(); + tcg_gen_sari_i32(t0, arg1, 31); + tcg_gen_op5_i32(INDEX_op_div2_i32, ret, t0, arg1, t0, arg2); + tcg_temp_free_i32(t0); +} + +static inline void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + TCGv_i32 t0; + t0 = tcg_temp_new_i32(); + tcg_gen_sari_i32(t0, arg1, 31); + tcg_gen_op5_i32(INDEX_op_div2_i32, t0, ret, arg1, t0, arg2); + tcg_temp_free_i32(t0); +} + +static inline void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + TCGv_i32 t0; + t0 = tcg_temp_new_i32(); + tcg_gen_movi_i32(t0, 0); + tcg_gen_op5_i32(INDEX_op_divu2_i32, ret, t0, arg1, t0, arg2); + tcg_temp_free_i32(t0); +} + +static inline void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + TCGv_i32 t0; + t0 = tcg_temp_new_i32(); + tcg_gen_movi_i32(t0, 0); + tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, t0, arg2); + tcg_temp_free_i32(t0); +} +#else +static inline void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 32-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 0, 1); + sizemask |= tcg_gen_sizemask(1, 0, 1); + sizemask |= tcg_gen_sizemask(2, 0, 1); + + tcg_gen_helper32(tcg_helper_div_i32, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 32-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 0, 1); + sizemask |= tcg_gen_sizemask(1, 0, 1); + sizemask |= tcg_gen_sizemask(2, 0, 1); + + tcg_gen_helper32(tcg_helper_rem_i32, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 32-bit and unsigned. */ + sizemask |= tcg_gen_sizemask(0, 0, 0); + sizemask |= tcg_gen_sizemask(1, 0, 0); + sizemask |= tcg_gen_sizemask(2, 0, 0); + + tcg_gen_helper32(tcg_helper_divu_i32, ret, arg1, arg2, 0); +} + +static inline void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 32-bit and unsigned. */ + sizemask |= tcg_gen_sizemask(0, 0, 0); + sizemask |= tcg_gen_sizemask(1, 0, 0); + sizemask |= tcg_gen_sizemask(2, 0, 0); + + tcg_gen_helper32(tcg_helper_remu_i32, ret, arg1, arg2, 0); +} +#endif + +#if TCG_TARGET_REG_BITS == 32 + +static inline void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg) +{ + if (!TCGV_EQUAL_I64(ret, arg)) { + tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg)); + tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg)); + } +} + +static inline void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg) +{ + tcg_gen_movi_i32(TCGV_LOW(ret), arg); + tcg_gen_movi_i32(TCGV_HIGH(ret), arg >> 32); +} + +static inline void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ld8u_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +} + +static inline void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ld8s_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), 31); +} + +static inline void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ld16u_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +} + +static inline void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ld16s_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); +} + +static inline void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +} + +static inline void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); +} + +static inline void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, + tcg_target_long offset) +{ + /* since arg2 and ret have different types, they cannot be the + same temporary */ +#ifdef TCG_TARGET_WORDS_BIGENDIAN + tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset); + tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset + 4); +#else + tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset); + tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset + 4); +#endif +} + +static inline void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_st8_i32(TCGV_LOW(arg1), arg2, offset); +} + +static inline void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_st16_i32(TCGV_LOW(arg1), arg2, offset); +} + +static inline void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, + tcg_target_long offset) +{ + tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset); +} + +static inline void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, + tcg_target_long offset) +{ +#ifdef TCG_TARGET_WORDS_BIGENDIAN + tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset); + tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset + 4); +#else + tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset); + tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset + 4); +#endif +} + +static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op6_i32(INDEX_op_add2_i32, TCGV_LOW(ret), TCGV_HIGH(ret), + TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), + TCGV_HIGH(arg2)); +} + +static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op6_i32(INDEX_op_sub2_i32, TCGV_LOW(ret), TCGV_HIGH(ret), + TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), + TCGV_HIGH(arg2)); +} + +static inline void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_and_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_and_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +} + +static inline void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + tcg_gen_andi_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2); + tcg_gen_andi_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32); +} + +static inline void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +} + +static inline void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + tcg_gen_ori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2); + tcg_gen_ori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32); +} + +static inline void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_xor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_xor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +} + +static inline void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + tcg_gen_xori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2); + tcg_gen_xori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32); +} + +/* XXX: use generic code when basic block handling is OK or CPU + specific code (x86) */ +static inline void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 64-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 1, 1); + sizemask |= tcg_gen_sizemask(1, 1, 1); + sizemask |= tcg_gen_sizemask(2, 1, 1); + + tcg_gen_helper64(tcg_helper_shl_i64, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + tcg_gen_shifti_i64(ret, arg1, arg2, 0, 0); +} + +static inline void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 64-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 1, 1); + sizemask |= tcg_gen_sizemask(1, 1, 1); + sizemask |= tcg_gen_sizemask(2, 1, 1); + + tcg_gen_helper64(tcg_helper_shr_i64, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + tcg_gen_shifti_i64(ret, arg1, arg2, 1, 0); +} + +static inline void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 64-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 1, 1); + sizemask |= tcg_gen_sizemask(1, 1, 1); + sizemask |= tcg_gen_sizemask(2, 1, 1); + + tcg_gen_helper64(tcg_helper_sar_i64, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + tcg_gen_shifti_i64(ret, arg1, arg2, 1, 1); +} + +static inline void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, + TCGv_i64 arg2, int label_index) +{ + tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, + TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), + TCGV_HIGH(arg2), cond, label_index); +} + +static inline void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret, + TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret), + TCGV_LOW(arg1), TCGV_HIGH(arg1), + TCGV_LOW(arg2), TCGV_HIGH(arg2), cond); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +} + +static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + TCGv_i64 t0; + TCGv_i32 t1; + + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i32(); + + tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0), + TCGV_LOW(arg1), TCGV_LOW(arg2)); + + tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2)); + tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1); + tcg_gen_mul_i32(t1, TCGV_HIGH(arg1), TCGV_LOW(arg2)); + tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1); + + tcg_gen_mov_i64(ret, t0); + tcg_temp_free_i64(t0); + tcg_temp_free_i32(t1); +} + +static inline void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 64-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 1, 1); + sizemask |= tcg_gen_sizemask(1, 1, 1); + sizemask |= tcg_gen_sizemask(2, 1, 1); + + tcg_gen_helper64(tcg_helper_div_i64, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 64-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 1, 1); + sizemask |= tcg_gen_sizemask(1, 1, 1); + sizemask |= tcg_gen_sizemask(2, 1, 1); + + tcg_gen_helper64(tcg_helper_rem_i64, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 64-bit and unsigned. */ + sizemask |= tcg_gen_sizemask(0, 1, 0); + sizemask |= tcg_gen_sizemask(1, 1, 0); + sizemask |= tcg_gen_sizemask(2, 1, 0); + + tcg_gen_helper64(tcg_helper_divu_i64, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 64-bit and unsigned. */ + sizemask |= tcg_gen_sizemask(0, 1, 0); + sizemask |= tcg_gen_sizemask(1, 1, 0); + sizemask |= tcg_gen_sizemask(2, 1, 0); + + tcg_gen_helper64(tcg_helper_remu_i64, sizemask, ret, arg1, arg2); +} + +#else + +static inline void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg) +{ + if (!TCGV_EQUAL_I64(ret, arg)) + tcg_gen_op2_i64(INDEX_op_mov_i64, ret, arg); +} + +static inline void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg) +{ + tcg_gen_op2i_i64(INDEX_op_movi_i64, ret, arg); +} + +static inline void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_i64 arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_ld8u_i64, ret, arg2, offset); +} + +static inline void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_i64 arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_ld8s_i64, ret, arg2, offset); +} + +static inline void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_i64 arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_ld16u_i64, ret, arg2, offset); +} + +static inline void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_i64 arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_ld16s_i64, ret, arg2, offset); +} + +static inline void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_i64 arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_ld32u_i64, ret, arg2, offset); +} + +static inline void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_i64 arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_ld32s_i64, ret, arg2, offset); +} + +static inline void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_i64 arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_ld_i64, ret, arg2, offset); +} + +static inline void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_i64 arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_st8_i64, arg1, arg2, offset); +} + +static inline void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_i64 arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_st16_i64, arg1, arg2, offset); +} + +static inline void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_i64 arg2, + tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_st32_i64, arg1, arg2, offset); +} + +static inline void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_i64 arg2, tcg_target_long offset) +{ + tcg_gen_ldst_op_i64(INDEX_op_st_i64, arg1, arg2, offset); +} + +static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_add_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_sub_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + if (TCGV_EQUAL_I64(arg1, arg2)) { + tcg_gen_mov_i64(ret, arg1); + } else { + tcg_gen_op3_i64(INDEX_op_and_i64, ret, arg1, arg2); + } +} + +static inline void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + TCGv_i64 t0 = tcg_const_i64(arg2); + tcg_gen_and_i64(ret, arg1, t0); + tcg_temp_free_i64(t0); +} + +static inline void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + if (TCGV_EQUAL_I64(arg1, arg2)) { + tcg_gen_mov_i64(ret, arg1); + } else { + tcg_gen_op3_i64(INDEX_op_or_i64, ret, arg1, arg2); + } +} + +static inline void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + TCGv_i64 t0 = tcg_const_i64(arg2); + tcg_gen_or_i64(ret, arg1, t0); + tcg_temp_free_i64(t0); +} + +static inline void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + if (TCGV_EQUAL_I64(arg1, arg2)) { + tcg_gen_movi_i64(ret, 0); + } else { + tcg_gen_op3_i64(INDEX_op_xor_i64, ret, arg1, arg2); + } +} + +static inline void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + TCGv_i64 t0 = tcg_const_i64(arg2); + tcg_gen_xor_i64(ret, arg1, t0); + tcg_temp_free_i64(t0); +} + +static inline void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_shl_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + if (arg2 == 0) { + tcg_gen_mov_i64(ret, arg1); + } else { + TCGv_i64 t0 = tcg_const_i64(arg2); + tcg_gen_shl_i64(ret, arg1, t0); + tcg_temp_free_i64(t0); + } +} + +static inline void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_shr_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + if (arg2 == 0) { + tcg_gen_mov_i64(ret, arg1); + } else { + TCGv_i64 t0 = tcg_const_i64(arg2); + tcg_gen_shr_i64(ret, arg1, t0); + tcg_temp_free_i64(t0); + } +} + +static inline void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_sar_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + if (arg2 == 0) { + tcg_gen_mov_i64(ret, arg1); + } else { + TCGv_i64 t0 = tcg_const_i64(arg2); + tcg_gen_sar_i64(ret, arg1, t0); + tcg_temp_free_i64(t0); + } +} + +static inline void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, + TCGv_i64 arg2, int label_index) +{ + tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond, label_index); +} + +static inline void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret, + TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond); +} + +static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_mul_i64, ret, arg1, arg2); +} + +#ifdef TCG_TARGET_HAS_div_i64 +static inline void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_div_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_rem_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_divu_i64, ret, arg1, arg2); +} + +static inline void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_op3_i64(INDEX_op_remu_i64, ret, arg1, arg2); +} +#elif defined(TCG_TARGET_HAS_div2_i64) +static inline void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + TCGv_i64 t0; + t0 = tcg_temp_new_i64(); + tcg_gen_sari_i64(t0, arg1, 63); + tcg_gen_op5_i64(INDEX_op_div2_i64, ret, t0, arg1, t0, arg2); + tcg_temp_free_i64(t0); +} + +static inline void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + TCGv_i64 t0; + t0 = tcg_temp_new_i64(); + tcg_gen_sari_i64(t0, arg1, 63); + tcg_gen_op5_i64(INDEX_op_div2_i64, t0, ret, arg1, t0, arg2); + tcg_temp_free_i64(t0); +} + +static inline void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + TCGv_i64 t0; + t0 = tcg_temp_new_i64(); + tcg_gen_movi_i64(t0, 0); + tcg_gen_op5_i64(INDEX_op_divu2_i64, ret, t0, arg1, t0, arg2); + tcg_temp_free_i64(t0); +} + +static inline void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + TCGv_i64 t0; + t0 = tcg_temp_new_i64(); + tcg_gen_movi_i64(t0, 0); + tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, t0, arg2); + tcg_temp_free_i64(t0); +} +#else +static inline void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 64-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 1, 1); + sizemask |= tcg_gen_sizemask(1, 1, 1); + sizemask |= tcg_gen_sizemask(2, 1, 1); + + tcg_gen_helper64(tcg_helper_div_i64, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 64-bit and signed. */ + sizemask |= tcg_gen_sizemask(0, 1, 1); + sizemask |= tcg_gen_sizemask(1, 1, 1); + sizemask |= tcg_gen_sizemask(2, 1, 1); + + tcg_gen_helper64(tcg_helper_rem_i64, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 64-bit and unsigned. */ + sizemask |= tcg_gen_sizemask(0, 1, 0); + sizemask |= tcg_gen_sizemask(1, 1, 0); + sizemask |= tcg_gen_sizemask(2, 1, 0); + + tcg_gen_helper64(tcg_helper_divu_i64, sizemask, ret, arg1, arg2); +} + +static inline void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + int sizemask = 0; + /* Return value and both arguments are 64-bit and unsigned. */ + sizemask |= tcg_gen_sizemask(0, 1, 0); + sizemask |= tcg_gen_sizemask(1, 1, 0); + sizemask |= tcg_gen_sizemask(2, 1, 0); + + tcg_gen_helper64(tcg_helper_remu_i64, sizemask, ret, arg1, arg2); +} +#endif + +#endif + +static inline void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + /* some cases can be optimized here */ + if (arg2 == 0) { + tcg_gen_mov_i64(ret, arg1); + } else { + TCGv_i64 t0 = tcg_const_i64(arg2); + tcg_gen_add_i64(ret, arg1, t0); + tcg_temp_free_i64(t0); + } +} + +static inline void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2) +{ + TCGv_i64 t0 = tcg_const_i64(arg1); + tcg_gen_sub_i64(ret, t0, arg2); + tcg_temp_free_i64(t0); +} + +static inline void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + /* some cases can be optimized here */ + if (arg2 == 0) { + tcg_gen_mov_i64(ret, arg1); + } else { + TCGv_i64 t0 = tcg_const_i64(arg2); + tcg_gen_sub_i64(ret, arg1, t0); + tcg_temp_free_i64(t0); + } +} +static inline void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, + int64_t arg2, int label_index) +{ + TCGv_i64 t0 = tcg_const_i64(arg2); + tcg_gen_brcond_i64(cond, arg1, t0, label_index); + tcg_temp_free_i64(t0); +} + +static inline void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret, + TCGv_i64 arg1, int64_t arg2) +{ + TCGv_i64 t0 = tcg_const_i64(arg2); + tcg_gen_setcond_i64(cond, ret, arg1, t0); + tcg_temp_free_i64(t0); +} + +static inline void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + TCGv_i64 t0 = tcg_const_i64(arg2); + tcg_gen_mul_i64(ret, arg1, t0); + tcg_temp_free_i64(t0); +} + + +/***************************************/ +/* optional operations */ + +static inline void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg) +{ +#ifdef TCG_TARGET_HAS_ext8s_i32 + tcg_gen_op2_i32(INDEX_op_ext8s_i32, ret, arg); +#else + tcg_gen_shli_i32(ret, arg, 24); + tcg_gen_sari_i32(ret, ret, 24); +#endif +} + +static inline void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg) +{ +#ifdef TCG_TARGET_HAS_ext16s_i32 + tcg_gen_op2_i32(INDEX_op_ext16s_i32, ret, arg); +#else + tcg_gen_shli_i32(ret, arg, 16); + tcg_gen_sari_i32(ret, ret, 16); +#endif +} + +static inline void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg) +{ +#ifdef TCG_TARGET_HAS_ext8u_i32 + tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg); +#else + tcg_gen_andi_i32(ret, arg, 0xffu); +#endif +} + +static inline void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg) +{ +#ifdef TCG_TARGET_HAS_ext16u_i32 + tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg); +#else + tcg_gen_andi_i32(ret, arg, 0xffffu); +#endif +} + +/* Note: we assume the two high bytes are set to zero */ +static inline void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg) +{ +#ifdef TCG_TARGET_HAS_bswap16_i32 + tcg_gen_op2_i32(INDEX_op_bswap16_i32, ret, arg); +#else + TCGv_i32 t0 = tcg_temp_new_i32(); + + tcg_gen_ext8u_i32(t0, arg); + tcg_gen_shli_i32(t0, t0, 8); + tcg_gen_shri_i32(ret, arg, 8); + tcg_gen_or_i32(ret, ret, t0); + tcg_temp_free_i32(t0); +#endif +} + +static inline void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg) +{ +#ifdef TCG_TARGET_HAS_bswap32_i32 + tcg_gen_op2_i32(INDEX_op_bswap32_i32, ret, arg); +#else + TCGv_i32 t0, t1; + t0 = tcg_temp_new_i32(); + t1 = tcg_temp_new_i32(); + + tcg_gen_shli_i32(t0, arg, 24); + + tcg_gen_andi_i32(t1, arg, 0x0000ff00); + tcg_gen_shli_i32(t1, t1, 8); + tcg_gen_or_i32(t0, t0, t1); + + tcg_gen_shri_i32(t1, arg, 8); + tcg_gen_andi_i32(t1, t1, 0x0000ff00); + tcg_gen_or_i32(t0, t0, t1); + + tcg_gen_shri_i32(t1, arg, 24); + tcg_gen_or_i32(ret, t0, t1); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); +#endif +} + +#if TCG_TARGET_REG_BITS == 32 +static inline void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg) +{ + tcg_gen_ext8s_i32(TCGV_LOW(ret), TCGV_LOW(arg)); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); +} + +static inline void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg) +{ + tcg_gen_ext16s_i32(TCGV_LOW(ret), TCGV_LOW(arg)); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); +} + +static inline void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg) +{ + tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg)); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); +} + +static inline void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg) +{ + tcg_gen_ext8u_i32(TCGV_LOW(ret), TCGV_LOW(arg)); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +} + +static inline void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg) +{ + tcg_gen_ext16u_i32(TCGV_LOW(ret), TCGV_LOW(arg)); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +} + +static inline void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg) +{ + tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg)); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +} + +static inline void tcg_gen_trunc_i64_i32(TCGv_i32 ret, TCGv_i64 arg) +{ + tcg_gen_mov_i32(ret, TCGV_LOW(arg)); +} + +static inline void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg) +{ + tcg_gen_mov_i32(TCGV_LOW(ret), arg); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +} + +static inline void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg) +{ + tcg_gen_mov_i32(TCGV_LOW(ret), arg); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); +} + +/* Note: we assume the six high bytes are set to zero */ +static inline void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg) +{ + tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg)); + tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg)); +} + +/* Note: we assume the four high bytes are set to zero */ +static inline void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg) +{ + tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg)); + tcg_gen_bswap32_i32(TCGV_LOW(ret), TCGV_LOW(arg)); +} + +static inline void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg) +{ + TCGv_i32 t0, t1; + t0 = tcg_temp_new_i32(); + t1 = tcg_temp_new_i32(); + + tcg_gen_bswap32_i32(t0, TCGV_LOW(arg)); + tcg_gen_bswap32_i32(t1, TCGV_HIGH(arg)); + tcg_gen_mov_i32(TCGV_LOW(ret), t1); + tcg_gen_mov_i32(TCGV_HIGH(ret), t0); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); +} +#else + +static inline void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +#ifdef TCG_TARGET_HAS_ext8s_i64 + tcg_gen_op2_i64(INDEX_op_ext8s_i64, ret, arg); +#else + tcg_gen_shli_i64(ret, arg, 56); + tcg_gen_sari_i64(ret, ret, 56); +#endif +} + +static inline void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +#ifdef TCG_TARGET_HAS_ext16s_i64 + tcg_gen_op2_i64(INDEX_op_ext16s_i64, ret, arg); +#else + tcg_gen_shli_i64(ret, arg, 48); + tcg_gen_sari_i64(ret, ret, 48); +#endif +} + +static inline void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +#ifdef TCG_TARGET_HAS_ext32s_i64 + tcg_gen_op2_i64(INDEX_op_ext32s_i64, ret, arg); +#else + tcg_gen_shli_i64(ret, arg, 32); + tcg_gen_sari_i64(ret, ret, 32); +#endif +} + +static inline void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +#ifdef TCG_TARGET_HAS_ext8u_i64 + tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg); +#else + tcg_gen_andi_i64(ret, arg, 0xffu); +#endif +} + +static inline void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +#ifdef TCG_TARGET_HAS_ext16u_i64 + tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg); +#else + tcg_gen_andi_i64(ret, arg, 0xffffu); +#endif +} + +static inline void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +#ifdef TCG_TARGET_HAS_ext32u_i64 + tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg); +#else + tcg_gen_andi_i64(ret, arg, 0xffffffffu); +#endif +} + +/* Note: we assume the target supports move between 32 and 64 bit + registers. This will probably break MIPS64 targets. */ +static inline void tcg_gen_trunc_i64_i32(TCGv_i32 ret, TCGv_i64 arg) +{ + tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(arg))); +} + +/* Note: we assume the target supports move between 32 and 64 bit + registers */ +static inline void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg) +{ + tcg_gen_ext32u_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg))); +} + +/* Note: we assume the target supports move between 32 and 64 bit + registers */ +static inline void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg) +{ + tcg_gen_ext32s_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg))); +} + +/* Note: we assume the six high bytes are set to zero */ +static inline void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +#ifdef TCG_TARGET_HAS_bswap16_i64 + tcg_gen_op2_i64(INDEX_op_bswap16_i64, ret, arg); +#else + TCGv_i64 t0 = tcg_temp_new_i64(); + + tcg_gen_ext8u_i64(t0, arg); + tcg_gen_shli_i64(t0, t0, 8); + tcg_gen_shri_i64(ret, arg, 8); + tcg_gen_or_i64(ret, ret, t0); + tcg_temp_free_i64(t0); +#endif +} + +/* Note: we assume the four high bytes are set to zero */ +static inline void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +#ifdef TCG_TARGET_HAS_bswap32_i64 + tcg_gen_op2_i64(INDEX_op_bswap32_i64, ret, arg); +#else + TCGv_i64 t0, t1; + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + + tcg_gen_shli_i64(t0, arg, 24); + tcg_gen_ext32u_i64(t0, t0); + + tcg_gen_andi_i64(t1, arg, 0x0000ff00); + tcg_gen_shli_i64(t1, t1, 8); + tcg_gen_or_i64(t0, t0, t1); + + tcg_gen_shri_i64(t1, arg, 8); + tcg_gen_andi_i64(t1, t1, 0x0000ff00); + tcg_gen_or_i64(t0, t0, t1); + + tcg_gen_shri_i64(t1, arg, 24); + tcg_gen_or_i64(ret, t0, t1); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); +#endif +} + +static inline void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +#ifdef TCG_TARGET_HAS_bswap64_i64 + tcg_gen_op2_i64(INDEX_op_bswap64_i64, ret, arg); +#else + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + + tcg_gen_shli_i64(t0, arg, 56); + + tcg_gen_andi_i64(t1, arg, 0x0000ff00); + tcg_gen_shli_i64(t1, t1, 40); + tcg_gen_or_i64(t0, t0, t1); + + tcg_gen_andi_i64(t1, arg, 0x00ff0000); + tcg_gen_shli_i64(t1, t1, 24); + tcg_gen_or_i64(t0, t0, t1); + + tcg_gen_andi_i64(t1, arg, 0xff000000); + tcg_gen_shli_i64(t1, t1, 8); + tcg_gen_or_i64(t0, t0, t1); + + tcg_gen_shri_i64(t1, arg, 8); + tcg_gen_andi_i64(t1, t1, 0xff000000); + tcg_gen_or_i64(t0, t0, t1); + + tcg_gen_shri_i64(t1, arg, 24); + tcg_gen_andi_i64(t1, t1, 0x00ff0000); + tcg_gen_or_i64(t0, t0, t1); + + tcg_gen_shri_i64(t1, arg, 40); + tcg_gen_andi_i64(t1, t1, 0x0000ff00); + tcg_gen_or_i64(t0, t0, t1); + + tcg_gen_shri_i64(t1, arg, 56); + tcg_gen_or_i64(ret, t0, t1); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); +#endif +} + +#endif + +static inline void tcg_gen_neg_i32(TCGv_i32 ret, TCGv_i32 arg) +{ +#ifdef TCG_TARGET_HAS_neg_i32 + tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg); +#else + TCGv_i32 t0 = tcg_const_i32(0); + tcg_gen_sub_i32(ret, t0, arg); + tcg_temp_free_i32(t0); +#endif +} + +static inline void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +#ifdef TCG_TARGET_HAS_neg_i64 + tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg); +#else + TCGv_i64 t0 = tcg_const_i64(0); + tcg_gen_sub_i64(ret, t0, arg); + tcg_temp_free_i64(t0); +#endif +} + +static inline void tcg_gen_not_i32(TCGv_i32 ret, TCGv_i32 arg) +{ +#ifdef TCG_TARGET_HAS_not_i32 + tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg); +#else + tcg_gen_xori_i32(ret, arg, -1); +#endif +} + +static inline void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg) +{ +#ifdef TCG_TARGET_HAS_not_i64 + tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg); +#elif defined(TCG_TARGET_HAS_not_i32) && TCG_TARGET_REG_BITS == 32 + tcg_gen_not_i32(TCGV_LOW(ret), TCGV_LOW(arg)); + tcg_gen_not_i32(TCGV_HIGH(ret), TCGV_HIGH(arg)); +#else + tcg_gen_xori_i64(ret, arg, -1); +#endif +} + +static inline void tcg_gen_discard_i32(TCGv_i32 arg) +{ + tcg_gen_op1_i32(INDEX_op_discard, arg); +} + +#if TCG_TARGET_REG_BITS == 32 +static inline void tcg_gen_discard_i64(TCGv_i64 arg) +{ + tcg_gen_discard_i32(TCGV_LOW(arg)); + tcg_gen_discard_i32(TCGV_HIGH(arg)); +} +#else +static inline void tcg_gen_discard_i64(TCGv_i64 arg) +{ + tcg_gen_op1_i64(INDEX_op_discard, arg); +} +#endif + +static inline void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high) +{ +#if TCG_TARGET_REG_BITS == 32 + tcg_gen_mov_i32(TCGV_LOW(dest), low); + tcg_gen_mov_i32(TCGV_HIGH(dest), high); +#else + TCGv_i64 tmp = tcg_temp_new_i64(); + /* This extension is only needed for type correctness. + We may be able to do better given target specific information. */ + tcg_gen_extu_i32_i64(tmp, high); + tcg_gen_shli_i64(tmp, tmp, 32); + tcg_gen_extu_i32_i64(dest, low); + tcg_gen_or_i64(dest, dest, tmp); + tcg_temp_free_i64(tmp); +#endif +} + +static inline void tcg_gen_concat32_i64(TCGv_i64 dest, TCGv_i64 low, TCGv_i64 high) +{ +#if TCG_TARGET_REG_BITS == 32 + tcg_gen_concat_i32_i64(dest, TCGV_LOW(low), TCGV_LOW(high)); +#else + TCGv_i64 tmp = tcg_temp_new_i64(); + tcg_gen_ext32u_i64(dest, low); + tcg_gen_shli_i64(tmp, high, 32); + tcg_gen_or_i64(dest, dest, tmp); + tcg_temp_free_i64(tmp); +#endif +} + +static inline void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +#ifdef TCG_TARGET_HAS_andc_i32 + tcg_gen_op3_i32(INDEX_op_andc_i32, ret, arg1, arg2); +#else + TCGv_i32 t0; + t0 = tcg_temp_new_i32(); + tcg_gen_not_i32(t0, arg2); + tcg_gen_and_i32(ret, arg1, t0); + tcg_temp_free_i32(t0); +#endif +} + +static inline void tcg_gen_andc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +#ifdef TCG_TARGET_HAS_andc_i64 + tcg_gen_op3_i64(INDEX_op_andc_i64, ret, arg1, arg2); +#elif defined(TCG_TARGET_HAS_andc_i32) && TCG_TARGET_REG_BITS == 32 + tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +#else + TCGv_i64 t0; + t0 = tcg_temp_new_i64(); + tcg_gen_not_i64(t0, arg2); + tcg_gen_and_i64(ret, arg1, t0); + tcg_temp_free_i64(t0); +#endif +} + +static inline void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +#ifdef TCG_TARGET_HAS_eqv_i32 + tcg_gen_op3_i32(INDEX_op_eqv_i32, ret, arg1, arg2); +#else + tcg_gen_xor_i32(ret, arg1, arg2); + tcg_gen_not_i32(ret, ret); +#endif +} + +static inline void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +#ifdef TCG_TARGET_HAS_eqv_i64 + tcg_gen_op3_i64(INDEX_op_eqv_i64, ret, arg1, arg2); +#elif defined(TCG_TARGET_HAS_eqv_i32) && TCG_TARGET_REG_BITS == 32 + tcg_gen_eqv_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_eqv_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +#else + tcg_gen_xor_i64(ret, arg1, arg2); + tcg_gen_not_i64(ret, ret); +#endif +} + +static inline void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +#ifdef TCG_TARGET_HAS_nand_i32 + tcg_gen_op3_i32(INDEX_op_nand_i32, ret, arg1, arg2); +#else + tcg_gen_and_i32(ret, arg1, arg2); + tcg_gen_not_i32(ret, ret); +#endif +} + +static inline void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +#ifdef TCG_TARGET_HAS_nand_i64 + tcg_gen_op3_i64(INDEX_op_nand_i64, ret, arg1, arg2); +#elif defined(TCG_TARGET_HAS_nand_i32) && TCG_TARGET_REG_BITS == 32 + tcg_gen_nand_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_nand_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +#else + tcg_gen_and_i64(ret, arg1, arg2); + tcg_gen_not_i64(ret, ret); +#endif +} + +static inline void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +#ifdef TCG_TARGET_HAS_nor_i32 + tcg_gen_op3_i32(INDEX_op_nor_i32, ret, arg1, arg2); +#else + tcg_gen_or_i32(ret, arg1, arg2); + tcg_gen_not_i32(ret, ret); +#endif +} + +static inline void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +#ifdef TCG_TARGET_HAS_nor_i64 + tcg_gen_op3_i64(INDEX_op_nor_i64, ret, arg1, arg2); +#elif defined(TCG_TARGET_HAS_nor_i32) && TCG_TARGET_REG_BITS == 32 + tcg_gen_nor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_nor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +#else + tcg_gen_or_i64(ret, arg1, arg2); + tcg_gen_not_i64(ret, ret); +#endif +} + +static inline void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +#ifdef TCG_TARGET_HAS_orc_i32 + tcg_gen_op3_i32(INDEX_op_orc_i32, ret, arg1, arg2); +#else + TCGv_i32 t0; + t0 = tcg_temp_new_i32(); + tcg_gen_not_i32(t0, arg2); + tcg_gen_or_i32(ret, arg1, t0); + tcg_temp_free_i32(t0); +#endif +} + +static inline void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +#ifdef TCG_TARGET_HAS_orc_i64 + tcg_gen_op3_i64(INDEX_op_orc_i64, ret, arg1, arg2); +#elif defined(TCG_TARGET_HAS_orc_i32) && TCG_TARGET_REG_BITS == 32 + tcg_gen_orc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2)); + tcg_gen_orc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2)); +#else + TCGv_i64 t0; + t0 = tcg_temp_new_i64(); + tcg_gen_not_i64(t0, arg2); + tcg_gen_or_i64(ret, arg1, t0); + tcg_temp_free_i64(t0); +#endif +} + +static inline void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +#ifdef TCG_TARGET_HAS_rot_i32 + tcg_gen_op3_i32(INDEX_op_rotl_i32, ret, arg1, arg2); +#else + TCGv_i32 t0, t1; + + t0 = tcg_temp_new_i32(); + t1 = tcg_temp_new_i32(); + tcg_gen_shl_i32(t0, arg1, arg2); + tcg_gen_subfi_i32(t1, 32, arg2); + tcg_gen_shr_i32(t1, arg1, t1); + tcg_gen_or_i32(ret, t0, t1); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); +#endif +} + +static inline void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +#ifdef TCG_TARGET_HAS_rot_i64 + tcg_gen_op3_i64(INDEX_op_rotl_i64, ret, arg1, arg2); +#else + TCGv_i64 t0, t1; + + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + tcg_gen_shl_i64(t0, arg1, arg2); + tcg_gen_subfi_i64(t1, 64, arg2); + tcg_gen_shr_i64(t1, arg1, t1); + tcg_gen_or_i64(ret, t0, t1); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); +#endif +} + +static inline void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ + /* some cases can be optimized here */ + if (arg2 == 0) { + tcg_gen_mov_i32(ret, arg1); + } else { +#ifdef TCG_TARGET_HAS_rot_i32 + TCGv_i32 t0 = tcg_const_i32(arg2); + tcg_gen_rotl_i32(ret, arg1, t0); + tcg_temp_free_i32(t0); +#else + TCGv_i32 t0, t1; + t0 = tcg_temp_new_i32(); + t1 = tcg_temp_new_i32(); + tcg_gen_shli_i32(t0, arg1, arg2); + tcg_gen_shri_i32(t1, arg1, 32 - arg2); + tcg_gen_or_i32(ret, t0, t1); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); +#endif + } +} + +static inline void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + /* some cases can be optimized here */ + if (arg2 == 0) { + tcg_gen_mov_i64(ret, arg1); + } else { +#ifdef TCG_TARGET_HAS_rot_i64 + TCGv_i64 t0 = tcg_const_i64(arg2); + tcg_gen_rotl_i64(ret, arg1, t0); + tcg_temp_free_i64(t0); +#else + TCGv_i64 t0, t1; + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + tcg_gen_shli_i64(t0, arg1, arg2); + tcg_gen_shri_i64(t1, arg1, 64 - arg2); + tcg_gen_or_i64(ret, t0, t1); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); +#endif + } +} + +static inline void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ +#ifdef TCG_TARGET_HAS_rot_i32 + tcg_gen_op3_i32(INDEX_op_rotr_i32, ret, arg1, arg2); +#else + TCGv_i32 t0, t1; + + t0 = tcg_temp_new_i32(); + t1 = tcg_temp_new_i32(); + tcg_gen_shr_i32(t0, arg1, arg2); + tcg_gen_subfi_i32(t1, 32, arg2); + tcg_gen_shl_i32(t1, arg1, t1); + tcg_gen_or_i32(ret, t0, t1); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); +#endif +} + +static inline void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ +#ifdef TCG_TARGET_HAS_rot_i64 + tcg_gen_op3_i64(INDEX_op_rotr_i64, ret, arg1, arg2); +#else + TCGv_i64 t0, t1; + + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + tcg_gen_shr_i64(t0, arg1, arg2); + tcg_gen_subfi_i64(t1, 64, arg2); + tcg_gen_shl_i64(t1, arg1, t1); + tcg_gen_or_i64(ret, t0, t1); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); +#endif +} + +static inline void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) +{ + /* some cases can be optimized here */ + if (arg2 == 0) { + tcg_gen_mov_i32(ret, arg1); + } else { + tcg_gen_rotli_i32(ret, arg1, 32 - arg2); + } +} + +static inline void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) +{ + /* some cases can be optimized here */ + if (arg2 == 0) { + tcg_gen_mov_i64(ret, arg1); + } else { + tcg_gen_rotli_i64(ret, arg1, 64 - arg2); + } +} + +/***************************************/ +/* QEMU specific operations. Their type depend on the QEMU CPU + type. */ +#ifndef TARGET_LONG_BITS +#error must include QEMU headers +#endif + +#if TARGET_LONG_BITS == 32 +#define TCGv TCGv_i32 +#define tcg_temp_new() tcg_temp_new_i32() +#define tcg_global_reg_new tcg_global_reg_new_i32 +#define tcg_global_mem_new tcg_global_mem_new_i32 +#define tcg_temp_local_new() tcg_temp_local_new_i32() +#define tcg_temp_free tcg_temp_free_i32 +#define tcg_gen_qemu_ldst_op tcg_gen_op3i_i32 +#define tcg_gen_qemu_ldst_op_i64 tcg_gen_qemu_ldst_op_i64_i32 +#define TCGV_UNUSED(x) TCGV_UNUSED_I32(x) +#define TCGV_EQUAL(a, b) TCGV_EQUAL_I32(a, b) +#else +#define TCGv TCGv_i64 +#define tcg_temp_new() tcg_temp_new_i64() +#define tcg_global_reg_new tcg_global_reg_new_i64 +#define tcg_global_mem_new tcg_global_mem_new_i64 +#define tcg_temp_local_new() tcg_temp_local_new_i64() +#define tcg_temp_free tcg_temp_free_i64 +#define tcg_gen_qemu_ldst_op tcg_gen_op3i_i64 +#define tcg_gen_qemu_ldst_op_i64 tcg_gen_qemu_ldst_op_i64_i64 +#define TCGV_UNUSED(x) TCGV_UNUSED_I64(x) +#define TCGV_EQUAL(a, b) TCGV_EQUAL_I64(a, b) +#endif + +/* debug info: write the PC of the corresponding QEMU CPU instruction */ +static inline void tcg_gen_debug_insn_start(uint64_t pc) +{ + /* XXX: must really use a 32 bit size for TCGArg in all cases */ +#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS + tcg_gen_op2ii(INDEX_op_debug_insn_start, + (uint32_t)(pc), (uint32_t)(pc >> 32)); +#else + tcg_gen_op1i(INDEX_op_debug_insn_start, pc); +#endif +} + +static inline void tcg_gen_exit_tb(tcg_target_long val) +{ + tcg_gen_op1i(INDEX_op_exit_tb, val); +} + +static inline void tcg_gen_goto_tb(int idx) +{ + tcg_gen_op1i(INDEX_op_goto_tb, idx); +} + +#if TCG_TARGET_REG_BITS == 32 +static inline void tcg_gen_qemu_ld8u(TCGv ret, TCGv addr, int mem_index) +{ +#if TARGET_LONG_BITS == 32 + tcg_gen_op3i_i32(INDEX_op_qemu_ld8u, ret, addr, mem_index); +#else + tcg_gen_op4i_i32(INDEX_op_qemu_ld8u, TCGV_LOW(ret), TCGV_LOW(addr), + TCGV_HIGH(addr), mem_index); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +#endif +} + +static inline void tcg_gen_qemu_ld8s(TCGv ret, TCGv addr, int mem_index) +{ +#if TARGET_LONG_BITS == 32 + tcg_gen_op3i_i32(INDEX_op_qemu_ld8s, ret, addr, mem_index); +#else + tcg_gen_op4i_i32(INDEX_op_qemu_ld8s, TCGV_LOW(ret), TCGV_LOW(addr), + TCGV_HIGH(addr), mem_index); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); +#endif +} + +static inline void tcg_gen_qemu_ld16u(TCGv ret, TCGv addr, int mem_index) +{ +#if TARGET_LONG_BITS == 32 + tcg_gen_op3i_i32(INDEX_op_qemu_ld16u, ret, addr, mem_index); +#else + tcg_gen_op4i_i32(INDEX_op_qemu_ld16u, TCGV_LOW(ret), TCGV_LOW(addr), + TCGV_HIGH(addr), mem_index); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +#endif +} + +static inline void tcg_gen_qemu_ld16s(TCGv ret, TCGv addr, int mem_index) +{ +#if TARGET_LONG_BITS == 32 + tcg_gen_op3i_i32(INDEX_op_qemu_ld16s, ret, addr, mem_index); +#else + tcg_gen_op4i_i32(INDEX_op_qemu_ld16s, TCGV_LOW(ret), TCGV_LOW(addr), + TCGV_HIGH(addr), mem_index); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); +#endif +} + +static inline void tcg_gen_qemu_ld32u(TCGv ret, TCGv addr, int mem_index) +{ +#if TARGET_LONG_BITS == 32 + tcg_gen_op3i_i32(INDEX_op_qemu_ld32, ret, addr, mem_index); +#else + tcg_gen_op4i_i32(INDEX_op_qemu_ld32, TCGV_LOW(ret), TCGV_LOW(addr), + TCGV_HIGH(addr), mem_index); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); +#endif +} + +static inline void tcg_gen_qemu_ld32s(TCGv ret, TCGv addr, int mem_index) +{ +#if TARGET_LONG_BITS == 32 + tcg_gen_op3i_i32(INDEX_op_qemu_ld32, ret, addr, mem_index); +#else + tcg_gen_op4i_i32(INDEX_op_qemu_ld32, TCGV_LOW(ret), TCGV_LOW(addr), + TCGV_HIGH(addr), mem_index); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); +#endif +} + +static inline void tcg_gen_qemu_ld64(TCGv_i64 ret, TCGv addr, int mem_index) +{ +#if TARGET_LONG_BITS == 32 + tcg_gen_op4i_i32(INDEX_op_qemu_ld64, TCGV_LOW(ret), TCGV_HIGH(ret), addr, mem_index); +#else + tcg_gen_op5i_i32(INDEX_op_qemu_ld64, TCGV_LOW(ret), TCGV_HIGH(ret), + TCGV_LOW(addr), TCGV_HIGH(addr), mem_index); +#endif +} + +static inline void tcg_gen_qemu_st8(TCGv arg, TCGv addr, int mem_index) +{ +#if TARGET_LONG_BITS == 32 + tcg_gen_op3i_i32(INDEX_op_qemu_st8, arg, addr, mem_index); +#else + tcg_gen_op4i_i32(INDEX_op_qemu_st8, TCGV_LOW(arg), TCGV_LOW(addr), + TCGV_HIGH(addr), mem_index); +#endif +} + +static inline void tcg_gen_qemu_st16(TCGv arg, TCGv addr, int mem_index) +{ +#if TARGET_LONG_BITS == 32 + tcg_gen_op3i_i32(INDEX_op_qemu_st16, arg, addr, mem_index); +#else + tcg_gen_op4i_i32(INDEX_op_qemu_st16, TCGV_LOW(arg), TCGV_LOW(addr), + TCGV_HIGH(addr), mem_index); +#endif +} + +static inline void tcg_gen_qemu_st32(TCGv arg, TCGv addr, int mem_index) +{ +#if TARGET_LONG_BITS == 32 + tcg_gen_op3i_i32(INDEX_op_qemu_st32, arg, addr, mem_index); +#else + tcg_gen_op4i_i32(INDEX_op_qemu_st32, TCGV_LOW(arg), TCGV_LOW(addr), + TCGV_HIGH(addr), mem_index); +#endif +} + +static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) +{ +#if TARGET_LONG_BITS == 32 + tcg_gen_op4i_i32(INDEX_op_qemu_st64, TCGV_LOW(arg), TCGV_HIGH(arg), addr, + mem_index); +#else + tcg_gen_op5i_i32(INDEX_op_qemu_st64, TCGV_LOW(arg), TCGV_HIGH(arg), + TCGV_LOW(addr), TCGV_HIGH(addr), mem_index); +#endif +} + +#define tcg_gen_ld_ptr tcg_gen_ld_i32 +#define tcg_gen_discard_ptr tcg_gen_discard_i32 + +#else /* TCG_TARGET_REG_BITS == 32 */ + +static inline void tcg_gen_qemu_ld8u(TCGv ret, TCGv addr, int mem_index) +{ + tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld8u, ret, addr, mem_index); +} + +static inline void tcg_gen_qemu_ld8s(TCGv ret, TCGv addr, int mem_index) +{ + tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld8s, ret, addr, mem_index); +} + +static inline void tcg_gen_qemu_ld16u(TCGv ret, TCGv addr, int mem_index) +{ + tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld16u, ret, addr, mem_index); +} + +static inline void tcg_gen_qemu_ld16s(TCGv ret, TCGv addr, int mem_index) +{ + tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld16s, ret, addr, mem_index); +} + +static inline void tcg_gen_qemu_ld32u(TCGv ret, TCGv addr, int mem_index) +{ +#if TARGET_LONG_BITS == 32 + tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32, ret, addr, mem_index); +#else + tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32u, ret, addr, mem_index); +#endif +} + +static inline void tcg_gen_qemu_ld32s(TCGv ret, TCGv addr, int mem_index) +{ +#if TARGET_LONG_BITS == 32 + tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32, ret, addr, mem_index); +#else + tcg_gen_qemu_ldst_op(INDEX_op_qemu_ld32s, ret, addr, mem_index); +#endif +} + +static inline void tcg_gen_qemu_ld64(TCGv_i64 ret, TCGv addr, int mem_index) +{ + tcg_gen_qemu_ldst_op_i64(INDEX_op_qemu_ld64, ret, addr, mem_index); +} + +static inline void tcg_gen_qemu_st8(TCGv arg, TCGv addr, int mem_index) +{ + tcg_gen_qemu_ldst_op(INDEX_op_qemu_st8, arg, addr, mem_index); +} + +static inline void tcg_gen_qemu_st16(TCGv arg, TCGv addr, int mem_index) +{ + tcg_gen_qemu_ldst_op(INDEX_op_qemu_st16, arg, addr, mem_index); +} + +static inline void tcg_gen_qemu_st32(TCGv arg, TCGv addr, int mem_index) +{ + tcg_gen_qemu_ldst_op(INDEX_op_qemu_st32, arg, addr, mem_index); +} + +static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) +{ + tcg_gen_qemu_ldst_op_i64(INDEX_op_qemu_st64, arg, addr, mem_index); +} + +#define tcg_gen_ld_ptr tcg_gen_ld_i64 +#define tcg_gen_discard_ptr tcg_gen_discard_i64 + +#endif /* TCG_TARGET_REG_BITS != 32 */ + +#if TARGET_LONG_BITS == 64 +#define tcg_gen_movi_tl tcg_gen_movi_i64 +#define tcg_gen_mov_tl tcg_gen_mov_i64 +#define tcg_gen_ld8u_tl tcg_gen_ld8u_i64 +#define tcg_gen_ld8s_tl tcg_gen_ld8s_i64 +#define tcg_gen_ld16u_tl tcg_gen_ld16u_i64 +#define tcg_gen_ld16s_tl tcg_gen_ld16s_i64 +#define tcg_gen_ld32u_tl tcg_gen_ld32u_i64 +#define tcg_gen_ld32s_tl tcg_gen_ld32s_i64 +#define tcg_gen_ld_tl tcg_gen_ld_i64 +#define tcg_gen_st8_tl tcg_gen_st8_i64 +#define tcg_gen_st16_tl tcg_gen_st16_i64 +#define tcg_gen_st32_tl tcg_gen_st32_i64 +#define tcg_gen_st_tl tcg_gen_st_i64 +#define tcg_gen_add_tl tcg_gen_add_i64 +#define tcg_gen_addi_tl tcg_gen_addi_i64 +#define tcg_gen_sub_tl tcg_gen_sub_i64 +#define tcg_gen_neg_tl tcg_gen_neg_i64 +#define tcg_gen_subfi_tl tcg_gen_subfi_i64 +#define tcg_gen_subi_tl tcg_gen_subi_i64 +#define tcg_gen_and_tl tcg_gen_and_i64 +#define tcg_gen_andi_tl tcg_gen_andi_i64 +#define tcg_gen_or_tl tcg_gen_or_i64 +#define tcg_gen_ori_tl tcg_gen_ori_i64 +#define tcg_gen_xor_tl tcg_gen_xor_i64 +#define tcg_gen_xori_tl tcg_gen_xori_i64 +#define tcg_gen_not_tl tcg_gen_not_i64 +#define tcg_gen_shl_tl tcg_gen_shl_i64 +#define tcg_gen_shli_tl tcg_gen_shli_i64 +#define tcg_gen_shr_tl tcg_gen_shr_i64 +#define tcg_gen_shri_tl tcg_gen_shri_i64 +#define tcg_gen_sar_tl tcg_gen_sar_i64 +#define tcg_gen_sari_tl tcg_gen_sari_i64 +#define tcg_gen_brcond_tl tcg_gen_brcond_i64 +#define tcg_gen_brcondi_tl tcg_gen_brcondi_i64 +#define tcg_gen_setcond_tl tcg_gen_setcond_i64 +#define tcg_gen_setcondi_tl tcg_gen_setcondi_i64 +#define tcg_gen_mul_tl tcg_gen_mul_i64 +#define tcg_gen_muli_tl tcg_gen_muli_i64 +#define tcg_gen_div_tl tcg_gen_div_i64 +#define tcg_gen_rem_tl tcg_gen_rem_i64 +#define tcg_gen_divu_tl tcg_gen_divu_i64 +#define tcg_gen_remu_tl tcg_gen_remu_i64 +#define tcg_gen_discard_tl tcg_gen_discard_i64 +#define tcg_gen_trunc_tl_i32 tcg_gen_trunc_i64_i32 +#define tcg_gen_trunc_i64_tl tcg_gen_mov_i64 +#define tcg_gen_extu_i32_tl tcg_gen_extu_i32_i64 +#define tcg_gen_ext_i32_tl tcg_gen_ext_i32_i64 +#define tcg_gen_extu_tl_i64 tcg_gen_mov_i64 +#define tcg_gen_ext_tl_i64 tcg_gen_mov_i64 +#define tcg_gen_ext8u_tl tcg_gen_ext8u_i64 +#define tcg_gen_ext8s_tl tcg_gen_ext8s_i64 +#define tcg_gen_ext16u_tl tcg_gen_ext16u_i64 +#define tcg_gen_ext16s_tl tcg_gen_ext16s_i64 +#define tcg_gen_ext32u_tl tcg_gen_ext32u_i64 +#define tcg_gen_ext32s_tl tcg_gen_ext32s_i64 +#define tcg_gen_bswap16_tl tcg_gen_bswap16_i64 +#define tcg_gen_bswap32_tl tcg_gen_bswap32_i64 +#define tcg_gen_bswap64_tl tcg_gen_bswap64_i64 +#define tcg_gen_concat_tl_i64 tcg_gen_concat32_i64 +#define tcg_gen_andc_tl tcg_gen_andc_i64 +#define tcg_gen_eqv_tl tcg_gen_eqv_i64 +#define tcg_gen_nand_tl tcg_gen_nand_i64 +#define tcg_gen_nor_tl tcg_gen_nor_i64 +#define tcg_gen_orc_tl tcg_gen_orc_i64 +#define tcg_gen_rotl_tl tcg_gen_rotl_i64 +#define tcg_gen_rotli_tl tcg_gen_rotli_i64 +#define tcg_gen_rotr_tl tcg_gen_rotr_i64 +#define tcg_gen_rotri_tl tcg_gen_rotri_i64 +#define tcg_const_tl tcg_const_i64 +#define tcg_const_local_tl tcg_const_local_i64 +#else +#define tcg_gen_movi_tl tcg_gen_movi_i32 +#define tcg_gen_mov_tl tcg_gen_mov_i32 +#define tcg_gen_ld8u_tl tcg_gen_ld8u_i32 +#define tcg_gen_ld8s_tl tcg_gen_ld8s_i32 +#define tcg_gen_ld16u_tl tcg_gen_ld16u_i32 +#define tcg_gen_ld16s_tl tcg_gen_ld16s_i32 +#define tcg_gen_ld32u_tl tcg_gen_ld_i32 +#define tcg_gen_ld32s_tl tcg_gen_ld_i32 +#define tcg_gen_ld_tl tcg_gen_ld_i32 +#define tcg_gen_st8_tl tcg_gen_st8_i32 +#define tcg_gen_st16_tl tcg_gen_st16_i32 +#define tcg_gen_st32_tl tcg_gen_st_i32 +#define tcg_gen_st_tl tcg_gen_st_i32 +#define tcg_gen_add_tl tcg_gen_add_i32 +#define tcg_gen_addi_tl tcg_gen_addi_i32 +#define tcg_gen_sub_tl tcg_gen_sub_i32 +#define tcg_gen_neg_tl tcg_gen_neg_i32 +#define tcg_gen_subfi_tl tcg_gen_subfi_i32 +#define tcg_gen_subi_tl tcg_gen_subi_i32 +#define tcg_gen_and_tl tcg_gen_and_i32 +#define tcg_gen_andi_tl tcg_gen_andi_i32 +#define tcg_gen_or_tl tcg_gen_or_i32 +#define tcg_gen_ori_tl tcg_gen_ori_i32 +#define tcg_gen_xor_tl tcg_gen_xor_i32 +#define tcg_gen_xori_tl tcg_gen_xori_i32 +#define tcg_gen_not_tl tcg_gen_not_i32 +#define tcg_gen_shl_tl tcg_gen_shl_i32 +#define tcg_gen_shli_tl tcg_gen_shli_i32 +#define tcg_gen_shr_tl tcg_gen_shr_i32 +#define tcg_gen_shri_tl tcg_gen_shri_i32 +#define tcg_gen_sar_tl tcg_gen_sar_i32 +#define tcg_gen_sari_tl tcg_gen_sari_i32 +#define tcg_gen_brcond_tl tcg_gen_brcond_i32 +#define tcg_gen_brcondi_tl tcg_gen_brcondi_i32 +#define tcg_gen_setcond_tl tcg_gen_setcond_i32 +#define tcg_gen_setcondi_tl tcg_gen_setcondi_i32 +#define tcg_gen_mul_tl tcg_gen_mul_i32 +#define tcg_gen_muli_tl tcg_gen_muli_i32 +#define tcg_gen_div_tl tcg_gen_div_i32 +#define tcg_gen_rem_tl tcg_gen_rem_i32 +#define tcg_gen_divu_tl tcg_gen_divu_i32 +#define tcg_gen_remu_tl tcg_gen_remu_i32 +#define tcg_gen_discard_tl tcg_gen_discard_i32 +#define tcg_gen_trunc_tl_i32 tcg_gen_mov_i32 +#define tcg_gen_trunc_i64_tl tcg_gen_trunc_i64_i32 +#define tcg_gen_extu_i32_tl tcg_gen_mov_i32 +#define tcg_gen_ext_i32_tl tcg_gen_mov_i32 +#define tcg_gen_extu_tl_i64 tcg_gen_extu_i32_i64 +#define tcg_gen_ext_tl_i64 tcg_gen_ext_i32_i64 +#define tcg_gen_ext8u_tl tcg_gen_ext8u_i32 +#define tcg_gen_ext8s_tl tcg_gen_ext8s_i32 +#define tcg_gen_ext16u_tl tcg_gen_ext16u_i32 +#define tcg_gen_ext16s_tl tcg_gen_ext16s_i32 +#define tcg_gen_ext32u_tl tcg_gen_mov_i32 +#define tcg_gen_ext32s_tl tcg_gen_mov_i32 +#define tcg_gen_bswap16_tl tcg_gen_bswap16_i32 +#define tcg_gen_bswap32_tl tcg_gen_bswap32_i32 +#define tcg_gen_concat_tl_i64 tcg_gen_concat_i32_i64 +#define tcg_gen_andc_tl tcg_gen_andc_i32 +#define tcg_gen_eqv_tl tcg_gen_eqv_i32 +#define tcg_gen_nand_tl tcg_gen_nand_i32 +#define tcg_gen_nor_tl tcg_gen_nor_i32 +#define tcg_gen_orc_tl tcg_gen_orc_i32 +#define tcg_gen_rotl_tl tcg_gen_rotl_i32 +#define tcg_gen_rotli_tl tcg_gen_rotli_i32 +#define tcg_gen_rotr_tl tcg_gen_rotr_i32 +#define tcg_gen_rotri_tl tcg_gen_rotri_i32 +#define tcg_const_tl tcg_const_i32 +#define tcg_const_local_tl tcg_const_local_i32 +#endif + +#if TCG_TARGET_REG_BITS == 32 +#define tcg_gen_add_ptr tcg_gen_add_i32 +#define tcg_gen_addi_ptr tcg_gen_addi_i32 +#define tcg_gen_ext_i32_ptr tcg_gen_mov_i32 +#else /* TCG_TARGET_REG_BITS == 32 */ +#define tcg_gen_add_ptr tcg_gen_add_i64 +#define tcg_gen_addi_ptr tcg_gen_addi_i64 +#define tcg_gen_ext_i32_ptr tcg_gen_ext_i32_i64 +#endif /* TCG_TARGET_REG_BITS != 32 */ diff --git a/src/recompiler/tcg/tcg-opc.h b/src/recompiler/tcg/tcg-opc.h new file mode 100644 index 00000000..2a98fed9 --- /dev/null +++ b/src/recompiler/tcg/tcg-opc.h @@ -0,0 +1,304 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* + * DEF(name, oargs, iargs, cargs, flags) + */ + +/* predefined ops */ +DEF(end, 0, 0, 0, 0) /* must be kept first */ +DEF(nop, 0, 0, 0, 0) +DEF(nop1, 0, 0, 1, 0) +DEF(nop2, 0, 0, 2, 0) +DEF(nop3, 0, 0, 3, 0) +DEF(nopn, 0, 0, 1, 0) /* variable number of parameters */ + +DEF(discard, 1, 0, 0, 0) + +DEF(set_label, 0, 0, 1, 0) +DEF(call, 0, 1, 2, TCG_OPF_SIDE_EFFECTS) /* variable number of parameters */ +DEF(jmp, 0, 1, 0, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) +DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) + +DEF(mov_i32, 1, 1, 0, 0) +DEF(movi_i32, 1, 0, 1, 0) +DEF(setcond_i32, 1, 2, 1, 0) +/* load/store */ +DEF(ld8u_i32, 1, 1, 1, 0) +DEF(ld8s_i32, 1, 1, 1, 0) +DEF(ld16u_i32, 1, 1, 1, 0) +DEF(ld16s_i32, 1, 1, 1, 0) +DEF(ld_i32, 1, 1, 1, 0) +DEF(st8_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) +DEF(st16_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) +DEF(st_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) +/* arith */ +DEF(add_i32, 1, 2, 0, 0) +DEF(sub_i32, 1, 2, 0, 0) +DEF(mul_i32, 1, 2, 0, 0) +#ifdef TCG_TARGET_HAS_div_i32 +DEF(div_i32, 1, 2, 0, 0) +DEF(divu_i32, 1, 2, 0, 0) +DEF(rem_i32, 1, 2, 0, 0) +DEF(remu_i32, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_div2_i32 +DEF(div2_i32, 2, 3, 0, 0) +DEF(divu2_i32, 2, 3, 0, 0) +#endif +DEF(and_i32, 1, 2, 0, 0) +DEF(or_i32, 1, 2, 0, 0) +DEF(xor_i32, 1, 2, 0, 0) +/* shifts/rotates */ +DEF(shl_i32, 1, 2, 0, 0) +DEF(shr_i32, 1, 2, 0, 0) +DEF(sar_i32, 1, 2, 0, 0) +#ifdef TCG_TARGET_HAS_rot_i32 +DEF(rotl_i32, 1, 2, 0, 0) +DEF(rotr_i32, 1, 2, 0, 0) +#endif + +DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) +#if TCG_TARGET_REG_BITS == 32 +DEF(add2_i32, 2, 4, 0, 0) +DEF(sub2_i32, 2, 4, 0, 0) +DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) +DEF(mulu2_i32, 2, 2, 0, 0) +DEF(setcond2_i32, 1, 4, 1, 0) +#endif +#ifdef TCG_TARGET_HAS_ext8s_i32 +DEF(ext8s_i32, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_ext16s_i32 +DEF(ext16s_i32, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_ext8u_i32 +DEF(ext8u_i32, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_ext16u_i32 +DEF(ext16u_i32, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_bswap16_i32 +DEF(bswap16_i32, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_bswap32_i32 +DEF(bswap32_i32, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_not_i32 +DEF(not_i32, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_neg_i32 +DEF(neg_i32, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_andc_i32 +DEF(andc_i32, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_orc_i32 +DEF(orc_i32, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_eqv_i32 +DEF(eqv_i32, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_nand_i32 +DEF(nand_i32, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_nor_i32 +DEF(nor_i32, 1, 2, 0, 0) +#endif + +#if TCG_TARGET_REG_BITS == 64 +DEF(mov_i64, 1, 1, 0, 0) +DEF(movi_i64, 1, 0, 1, 0) +DEF(setcond_i64, 1, 2, 1, 0) +/* load/store */ +DEF(ld8u_i64, 1, 1, 1, 0) +DEF(ld8s_i64, 1, 1, 1, 0) +DEF(ld16u_i64, 1, 1, 1, 0) +DEF(ld16s_i64, 1, 1, 1, 0) +DEF(ld32u_i64, 1, 1, 1, 0) +DEF(ld32s_i64, 1, 1, 1, 0) +DEF(ld_i64, 1, 1, 1, 0) +DEF(st8_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) +DEF(st16_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) +DEF(st32_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) +DEF(st_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) +/* arith */ +DEF(add_i64, 1, 2, 0, 0) +DEF(sub_i64, 1, 2, 0, 0) +DEF(mul_i64, 1, 2, 0, 0) +#ifdef TCG_TARGET_HAS_div_i64 +DEF(div_i64, 1, 2, 0, 0) +DEF(divu_i64, 1, 2, 0, 0) +DEF(rem_i64, 1, 2, 0, 0) +DEF(remu_i64, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_div2_i64 +DEF(div2_i64, 2, 3, 0, 0) +DEF(divu2_i64, 2, 3, 0, 0) +#endif +DEF(and_i64, 1, 2, 0, 0) +DEF(or_i64, 1, 2, 0, 0) +DEF(xor_i64, 1, 2, 0, 0) +/* shifts/rotates */ +DEF(shl_i64, 1, 2, 0, 0) +DEF(shr_i64, 1, 2, 0, 0) +DEF(sar_i64, 1, 2, 0, 0) +#ifdef TCG_TARGET_HAS_rot_i64 +DEF(rotl_i64, 1, 2, 0, 0) +DEF(rotr_i64, 1, 2, 0, 0) +#endif + +DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) +#ifdef TCG_TARGET_HAS_ext8s_i64 +DEF(ext8s_i64, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_ext16s_i64 +DEF(ext16s_i64, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_ext32s_i64 +DEF(ext32s_i64, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_ext8u_i64 +DEF(ext8u_i64, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_ext16u_i64 +DEF(ext16u_i64, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_ext32u_i64 +DEF(ext32u_i64, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_bswap16_i64 +DEF(bswap16_i64, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_bswap32_i64 +DEF(bswap32_i64, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_bswap64_i64 +DEF(bswap64_i64, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_not_i64 +DEF(not_i64, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_neg_i64 +DEF(neg_i64, 1, 1, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_andc_i64 +DEF(andc_i64, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_orc_i64 +DEF(orc_i64, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_eqv_i64 +DEF(eqv_i64, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_nand_i64 +DEF(nand_i64, 1, 2, 0, 0) +#endif +#ifdef TCG_TARGET_HAS_nor_i64 +DEF(nor_i64, 1, 2, 0, 0) +#endif +#endif + +/* QEMU specific */ +#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS +DEF(debug_insn_start, 0, 0, 2, 0) +#else +DEF(debug_insn_start, 0, 0, 1, 0) +#endif +DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) +DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) +/* Note: even if TARGET_LONG_BITS is not defined, the INDEX_op + constants must be defined */ +#if TCG_TARGET_REG_BITS == 32 +#if TARGET_LONG_BITS == 32 +DEF(qemu_ld8u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#else +DEF(qemu_ld8u, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#endif +#if TARGET_LONG_BITS == 32 +DEF(qemu_ld8s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#else +DEF(qemu_ld8s, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#endif +#if TARGET_LONG_BITS == 32 +DEF(qemu_ld16u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#else +DEF(qemu_ld16u, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#endif +#if TARGET_LONG_BITS == 32 +DEF(qemu_ld16s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#else +DEF(qemu_ld16s, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#endif +#if TARGET_LONG_BITS == 32 +DEF(qemu_ld32, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#else +DEF(qemu_ld32, 1, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#endif +#if TARGET_LONG_BITS == 32 +DEF(qemu_ld64, 2, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#else +DEF(qemu_ld64, 2, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#endif + +#if TARGET_LONG_BITS == 32 +DEF(qemu_st8, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#else +DEF(qemu_st8, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#endif +#if TARGET_LONG_BITS == 32 +DEF(qemu_st16, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#else +DEF(qemu_st16, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#endif +#if TARGET_LONG_BITS == 32 +DEF(qemu_st32, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#else +DEF(qemu_st32, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#endif +#if TARGET_LONG_BITS == 32 +DEF(qemu_st64, 0, 3, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#else +DEF(qemu_st64, 0, 4, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +#endif + +#else /* TCG_TARGET_REG_BITS == 32 */ + +DEF(qemu_ld8u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld8s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld16u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld16s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld32, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld32u, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld32s, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_ld64, 1, 1, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) + +DEF(qemu_st8, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st16, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st32, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) +DEF(qemu_st64, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) + +#endif /* TCG_TARGET_REG_BITS != 32 */ + +#undef DEF diff --git a/src/recompiler/tcg/tcg-runtime.h b/src/recompiler/tcg/tcg-runtime.h new file mode 100644 index 00000000..5615b133 --- /dev/null +++ b/src/recompiler/tcg/tcg-runtime.h @@ -0,0 +1,18 @@ +#ifndef TCG_RUNTIME_H +#define TCG_RUNTIME_H + +/* tcg-runtime.c */ +int32_t tcg_helper_div_i32(int32_t arg1, int32_t arg2); +int32_t tcg_helper_rem_i32(int32_t arg1, int32_t arg2); +uint32_t tcg_helper_divu_i32(uint32_t arg1, uint32_t arg2); +uint32_t tcg_helper_remu_i32(uint32_t arg1, uint32_t arg2); + +int64_t tcg_helper_shl_i64(int64_t arg1, int64_t arg2); +int64_t tcg_helper_shr_i64(int64_t arg1, int64_t arg2); +int64_t tcg_helper_sar_i64(int64_t arg1, int64_t arg2); +int64_t tcg_helper_div_i64(int64_t arg1, int64_t arg2); +int64_t tcg_helper_rem_i64(int64_t arg1, int64_t arg2); +uint64_t tcg_helper_divu_i64(uint64_t arg1, uint64_t arg2); +uint64_t tcg_helper_remu_i64(uint64_t arg1, uint64_t arg2); + +#endif diff --git a/src/recompiler/tcg/tcg.c b/src/recompiler/tcg/tcg.c new file mode 100644 index 00000000..4e636099 --- /dev/null +++ b/src/recompiler/tcg/tcg.c @@ -0,0 +1,2212 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* define it to use liveness analysis (better code) */ +#define USE_LIVENESS_ANALYSIS + +#include "config.h" + +#if !defined(CONFIG_DEBUG_TCG) && !defined(NDEBUG) +/* define it to suppress various consistency checks (faster) */ +#define NDEBUG +#endif + +#ifndef VBOX +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#ifdef _WIN32 +#include <malloc.h> +#endif +#ifdef _AIX +#include <alloca.h> +#endif +#else /* VBOX */ +# include <stdio.h> +# include "osdep.h" +#endif /* VBOX */ + +#include "qemu-common.h" +#include "cache-utils.h" +#include "host-utils.h" +#include "qemu-timer.h" + +/* Note: the long term plan is to reduce the dependancies on the QEMU + CPU definitions. Currently they are used for qemu_ld/st + instructions */ +#define NO_CPU_IO_DEFS +#include "cpu.h" +#include "exec-all.h" + +#include "tcg-op.h" +#include "elf.h" + +#if defined(CONFIG_USE_GUEST_BASE) && !defined(TCG_TARGET_HAS_GUEST_BASE) +#error GUEST_BASE not supported on this host. +#endif + +#ifdef VBOX +/* + * Liveness analysis doesn't work well with 32-bit hosts and 64-bit targets, + * second element of the register pair to store 64-bit value is considered + * dead, it seems. */ + /** @todo re-test this */ +# if defined(TARGET_X86_64) && (TCG_TARGET_REG_BITS == 32) +# undef USE_LIVENESS_ANALYSIS +# endif +#endif /* VBOX */ + +static void tcg_target_init(TCGContext *s); +static void tcg_target_qemu_prologue(TCGContext *s); +static void patch_reloc(uint8_t *code_ptr, int type, + tcg_target_long value, tcg_target_long addend); + +static TCGOpDef tcg_op_defs[] = { +#define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags }, +#include "tcg-opc.h" +#undef DEF +}; + +static TCGRegSet tcg_target_available_regs[2]; +static TCGRegSet tcg_target_call_clobber_regs; + +/* XXX: move that inside the context */ +uint16_t *gen_opc_ptr; +TCGArg *gen_opparam_ptr; + +static inline void tcg_out8(TCGContext *s, uint8_t v) +{ + *s->code_ptr++ = v; +} + +static inline void tcg_out16(TCGContext *s, uint16_t v) +{ + *(uint16_t *)s->code_ptr = v; + s->code_ptr += 2; +} + +static inline void tcg_out32(TCGContext *s, uint32_t v) +{ + *(uint32_t *)s->code_ptr = v; + s->code_ptr += 4; +} + +/* label relocation processing */ + +static void tcg_out_reloc(TCGContext *s, uint8_t *code_ptr, int type, + int label_index, tcg_target_long addend) +{ + TCGLabel *l; + TCGRelocation *r; + + l = &s->labels[label_index]; + if (l->has_value) { + /* FIXME: This may break relocations on RISC targets that + modify instruction fields in place. The caller may not have + written the initial value. */ + patch_reloc(code_ptr, type, l->u.value, addend); + } else { + /* add a new relocation entry */ + r = tcg_malloc(sizeof(TCGRelocation)); + r->type = type; + r->ptr = code_ptr; + r->addend = addend; + r->next = l->u.first_reloc; + l->u.first_reloc = r; + } +} + +static void tcg_out_label(TCGContext *s, int label_index, + tcg_target_long value) +{ + TCGLabel *l; + TCGRelocation *r; + + l = &s->labels[label_index]; + if (l->has_value) + tcg_abort(); + r = l->u.first_reloc; + while (r != NULL) { + patch_reloc(r->ptr, r->type, value, r->addend); + r = r->next; + } + l->has_value = 1; + l->u.value = value; +} + +int gen_new_label(void) +{ + TCGContext *s = &tcg_ctx; + int idx; + TCGLabel *l; + + if (s->nb_labels >= TCG_MAX_LABELS) + tcg_abort(); + idx = s->nb_labels++; + l = &s->labels[idx]; + l->has_value = 0; + l->u.first_reloc = NULL; + return idx; +} + +#include "tcg-target.c" + +/* pool based memory allocation */ +void *tcg_malloc_internal(TCGContext *s, int size) +{ + TCGPool *p; + int pool_size; + + if (size > TCG_POOL_CHUNK_SIZE) { + /* big malloc: insert a new pool (XXX: could optimize) */ + p = qemu_malloc(sizeof(TCGPool) + size); + p->size = size; + if (s->pool_current) + s->pool_current->next = p; + else + s->pool_first = p; + p->next = s->pool_current; + } else { + p = s->pool_current; + if (!p) { + p = s->pool_first; + if (!p) + goto new_pool; + } else { + if (!p->next) { + new_pool: + pool_size = TCG_POOL_CHUNK_SIZE; + p = qemu_malloc(sizeof(TCGPool) + pool_size); + p->size = pool_size; + p->next = NULL; + if (s->pool_current) + s->pool_current->next = p; + else + s->pool_first = p; + } else { + p = p->next; + } + } + } + s->pool_current = p; + s->pool_cur = p->data + size; + s->pool_end = p->data + p->size; + return p->data; +} + +void tcg_pool_reset(TCGContext *s) +{ + s->pool_cur = s->pool_end = NULL; + s->pool_current = NULL; +} + +void tcg_context_init(TCGContext *s) +{ + int op, total_args, n; + TCGOpDef *def; + TCGArgConstraint *args_ct; + int *sorted_args; + + memset(s, 0, sizeof(*s)); + s->temps = s->static_temps; + s->nb_globals = 0; + + /* Count total number of arguments and allocate the corresponding + space */ + total_args = 0; + for(op = 0; op < NB_OPS; op++) { + def = &tcg_op_defs[op]; + n = def->nb_iargs + def->nb_oargs; + total_args += n; + } + + args_ct = qemu_malloc(sizeof(TCGArgConstraint) * total_args); + sorted_args = qemu_malloc(sizeof(int) * total_args); + + for(op = 0; op < NB_OPS; op++) { + def = &tcg_op_defs[op]; + def->args_ct = args_ct; + def->sorted_args = sorted_args; + n = def->nb_iargs + def->nb_oargs; + sorted_args += n; + args_ct += n; + } + + tcg_target_init(s); +} + +void tcg_prologue_init(TCGContext *s) +{ + /* init global prologue and epilogue */ + s->code_buf = code_gen_prologue; + s->code_ptr = s->code_buf; + tcg_target_qemu_prologue(s); + flush_icache_range((uintptr_t)s->code_buf, + (uintptr_t)s->code_ptr); +} + +void tcg_set_frame(TCGContext *s, int reg, + tcg_target_long start, tcg_target_long size) +{ + s->frame_start = start; + s->frame_end = start + size; + s->frame_reg = reg; +} + +void tcg_func_start(TCGContext *s) +{ + int i; + tcg_pool_reset(s); + s->nb_temps = s->nb_globals; + for(i = 0; i < (TCG_TYPE_COUNT * 2); i++) + s->first_free_temp[i] = -1; + s->labels = tcg_malloc(sizeof(TCGLabel) * TCG_MAX_LABELS); + s->nb_labels = 0; + s->current_frame_offset = s->frame_start; + + gen_opc_ptr = gen_opc_buf; + gen_opparam_ptr = gen_opparam_buf; +} + +static inline void tcg_temp_alloc(TCGContext *s, int n) +{ + if (n > TCG_MAX_TEMPS) + tcg_abort(); +} + +static inline int tcg_global_reg_new_internal(TCGType type, int reg, + const char *name) +{ + TCGContext *s = &tcg_ctx; + TCGTemp *ts; + int idx; + +#if TCG_TARGET_REG_BITS == 32 + if (type != TCG_TYPE_I32) + tcg_abort(); +#endif + if (tcg_regset_test_reg(s->reserved_regs, reg)) + tcg_abort(); + idx = s->nb_globals; + tcg_temp_alloc(s, s->nb_globals + 1); + ts = &s->temps[s->nb_globals]; + ts->base_type = type; + ts->type = type; + ts->fixed_reg = 1; + ts->reg = reg; + ts->name = name; + s->nb_globals++; + tcg_regset_set_reg(s->reserved_regs, reg); + return idx; +} + +TCGv_i32 tcg_global_reg_new_i32(int reg, const char *name) +{ + int idx; + + idx = tcg_global_reg_new_internal(TCG_TYPE_I32, reg, name); + return MAKE_TCGV_I32(idx); +} + +TCGv_i64 tcg_global_reg_new_i64(int reg, const char *name) +{ + int idx; + + idx = tcg_global_reg_new_internal(TCG_TYPE_I64, reg, name); + return MAKE_TCGV_I64(idx); +} + +static inline int tcg_global_mem_new_internal(TCGType type, int reg, + tcg_target_long offset, + const char *name) +{ + TCGContext *s = &tcg_ctx; + TCGTemp *ts; + int idx; + + idx = s->nb_globals; +#if TCG_TARGET_REG_BITS == 32 + if (type == TCG_TYPE_I64) { + char buf[64]; + tcg_temp_alloc(s, s->nb_globals + 2); + ts = &s->temps[s->nb_globals]; + ts->base_type = type; + ts->type = TCG_TYPE_I32; + ts->fixed_reg = 0; + ts->mem_allocated = 1; + ts->mem_reg = reg; +#ifdef TCG_TARGET_WORDS_BIGENDIAN + ts->mem_offset = offset + 4; +#else + ts->mem_offset = offset; +#endif + pstrcpy(buf, sizeof(buf), name); + pstrcat(buf, sizeof(buf), "_0"); + ts->name = strdup(buf); + ts++; + + ts->base_type = type; + ts->type = TCG_TYPE_I32; + ts->fixed_reg = 0; + ts->mem_allocated = 1; + ts->mem_reg = reg; +#ifdef TCG_TARGET_WORDS_BIGENDIAN + ts->mem_offset = offset; +#else + ts->mem_offset = offset + 4; +#endif + pstrcpy(buf, sizeof(buf), name); + pstrcat(buf, sizeof(buf), "_1"); + ts->name = strdup(buf); + + s->nb_globals += 2; + } else +#endif + { + tcg_temp_alloc(s, s->nb_globals + 1); + ts = &s->temps[s->nb_globals]; + ts->base_type = type; + ts->type = type; + ts->fixed_reg = 0; + ts->mem_allocated = 1; + ts->mem_reg = reg; + ts->mem_offset = offset; + ts->name = name; + s->nb_globals++; + } + return idx; +} + +TCGv_i32 tcg_global_mem_new_i32(int reg, tcg_target_long offset, + const char *name) +{ + int idx; + + idx = tcg_global_mem_new_internal(TCG_TYPE_I32, reg, offset, name); + return MAKE_TCGV_I32(idx); +} + +TCGv_i64 tcg_global_mem_new_i64(int reg, tcg_target_long offset, + const char *name) +{ + int idx; + + idx = tcg_global_mem_new_internal(TCG_TYPE_I64, reg, offset, name); + return MAKE_TCGV_I64(idx); +} + +static inline int tcg_temp_new_internal(TCGType type, int temp_local) +{ + TCGContext *s = &tcg_ctx; + TCGTemp *ts; + int idx, k; + + k = type; + if (temp_local) + k += TCG_TYPE_COUNT; + idx = s->first_free_temp[k]; + if (idx != -1) { + /* There is already an available temp with the + right type */ + ts = &s->temps[idx]; + s->first_free_temp[k] = ts->next_free_temp; + ts->temp_allocated = 1; + assert(ts->temp_local == temp_local); + } else { + idx = s->nb_temps; +#if TCG_TARGET_REG_BITS == 32 + if (type == TCG_TYPE_I64) { + tcg_temp_alloc(s, s->nb_temps + 2); + ts = &s->temps[s->nb_temps]; + ts->base_type = type; + ts->type = TCG_TYPE_I32; + ts->temp_allocated = 1; + ts->temp_local = temp_local; + ts->name = NULL; + ts++; + ts->base_type = TCG_TYPE_I32; + ts->type = TCG_TYPE_I32; + ts->temp_allocated = 1; + ts->temp_local = temp_local; + ts->name = NULL; + s->nb_temps += 2; + } else +#endif + { + tcg_temp_alloc(s, s->nb_temps + 1); + ts = &s->temps[s->nb_temps]; + ts->base_type = type; + ts->type = type; + ts->temp_allocated = 1; + ts->temp_local = temp_local; + ts->name = NULL; + s->nb_temps++; + } + } + return idx; +} + +TCGv_i32 tcg_temp_new_internal_i32(int temp_local) +{ + int idx; + + idx = tcg_temp_new_internal(TCG_TYPE_I32, temp_local); + return MAKE_TCGV_I32(idx); +} + +TCGv_i64 tcg_temp_new_internal_i64(int temp_local) +{ + int idx; + + idx = tcg_temp_new_internal(TCG_TYPE_I64, temp_local); + return MAKE_TCGV_I64(idx); +} + +static inline void tcg_temp_free_internal(int idx) +{ + TCGContext *s = &tcg_ctx; + TCGTemp *ts; + int k; + + assert(idx >= s->nb_globals && idx < s->nb_temps); + ts = &s->temps[idx]; + assert(ts->temp_allocated != 0); + ts->temp_allocated = 0; + k = ts->base_type; + if (ts->temp_local) + k += TCG_TYPE_COUNT; + ts->next_free_temp = s->first_free_temp[k]; + s->first_free_temp[k] = idx; +} + +void tcg_temp_free_i32(TCGv_i32 arg) +{ + tcg_temp_free_internal(GET_TCGV_I32(arg)); +} + +void tcg_temp_free_i64(TCGv_i64 arg) +{ + tcg_temp_free_internal(GET_TCGV_I64(arg)); +} + +TCGv_i32 tcg_const_i32(int32_t val) +{ + TCGv_i32 t0; + t0 = tcg_temp_new_i32(); + tcg_gen_movi_i32(t0, val); + return t0; +} + +TCGv_i64 tcg_const_i64(int64_t val) +{ + TCGv_i64 t0; + t0 = tcg_temp_new_i64(); + tcg_gen_movi_i64(t0, val); + return t0; +} + +TCGv_i32 tcg_const_local_i32(int32_t val) +{ + TCGv_i32 t0; + t0 = tcg_temp_local_new_i32(); + tcg_gen_movi_i32(t0, val); + return t0; +} + +TCGv_i64 tcg_const_local_i64(int64_t val) +{ + TCGv_i64 t0; + t0 = tcg_temp_local_new_i64(); + tcg_gen_movi_i64(t0, val); + return t0; +} + +void tcg_register_helper(void *func, const char *name) +{ + TCGContext *s = &tcg_ctx; + int n; + if ((s->nb_helpers + 1) > s->allocated_helpers) { + n = s->allocated_helpers; + if (n == 0) { + n = 4; + } else { + n *= 2; + } +#ifdef VBOX + s->helpers = qemu_realloc(s->helpers, n * sizeof(TCGHelperInfo)); +#else + s->helpers = realloc(s->helpers, n * sizeof(TCGHelperInfo)); +#endif + s->allocated_helpers = n; + } + s->helpers[s->nb_helpers].func = (tcg_target_ulong)func; + s->helpers[s->nb_helpers].name = name; + s->nb_helpers++; +} + +/* Note: we convert the 64 bit args to 32 bit and do some alignment + and endian swap. Maybe it would be better to do the alignment + and endian swap in tcg_reg_alloc_call(). */ +void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags, + int sizemask, TCGArg ret, int nargs, TCGArg *args) +{ +#ifdef TCG_TARGET_I386 + int call_type; +#endif + int i; + int real_args; + int nb_rets; + TCGArg *nparam; + +#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 + for (i = 0; i < nargs; ++i) { + int is_64bit = sizemask & (1 << (i+1)*2); + int is_signed = sizemask & (2 << (i+1)*2); + if (!is_64bit) { + TCGv_i64 temp = tcg_temp_new_i64(); + TCGv_i64 orig = MAKE_TCGV_I64(args[i]); + if (is_signed) { + tcg_gen_ext32s_i64(temp, orig); + } else { + tcg_gen_ext32u_i64(temp, orig); + } + args[i] = GET_TCGV_I64(temp); + } + } +#endif /* TCG_TARGET_EXTEND_ARGS */ + + *gen_opc_ptr++ = INDEX_op_call; + nparam = gen_opparam_ptr++; +#ifdef TCG_TARGET_I386 + call_type = (flags & TCG_CALL_TYPE_MASK); +#endif + if (ret != TCG_CALL_DUMMY_ARG) { +#if TCG_TARGET_REG_BITS < 64 + if (sizemask & 1) { +#ifdef TCG_TARGET_WORDS_BIGENDIAN + *gen_opparam_ptr++ = ret + 1; + *gen_opparam_ptr++ = ret; +#else + *gen_opparam_ptr++ = ret; + *gen_opparam_ptr++ = ret + 1; +#endif + nb_rets = 2; + } else +#endif + { + *gen_opparam_ptr++ = ret; + nb_rets = 1; + } + } else { + nb_rets = 0; + } + real_args = 0; + for (i = 0; i < nargs; i++) { +#if TCG_TARGET_REG_BITS < 64 + int is_64bit = sizemask & (1 << (i+1)*2); + if (is_64bit) { +#ifdef TCG_TARGET_I386 + /* REGPARM case: if the third parameter is 64 bit, it is + allocated on the stack */ + if (i == 2 && call_type == TCG_CALL_TYPE_REGPARM) { + call_type = TCG_CALL_TYPE_REGPARM_2; + flags = (flags & ~TCG_CALL_TYPE_MASK) | call_type; + } +#endif +#ifdef TCG_TARGET_CALL_ALIGN_ARGS + /* some targets want aligned 64 bit args */ + if (real_args & 1) { + *gen_opparam_ptr++ = TCG_CALL_DUMMY_ARG; + real_args++; + } +#endif + /* If stack grows up, then we will be placing successive + arguments at lower addresses, which means we need to + reverse the order compared to how we would normally + treat either big or little-endian. For those arguments + that will wind up in registers, this still works for + HPPA (the only current STACK_GROWSUP target) since the + argument registers are *also* allocated in decreasing + order. If another such target is added, this logic may + have to get more complicated to differentiate between + stack arguments and register arguments. */ +#if defined(TCG_TARGET_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) + *gen_opparam_ptr++ = args[i] + 1; + *gen_opparam_ptr++ = args[i]; +#else + *gen_opparam_ptr++ = args[i]; + *gen_opparam_ptr++ = args[i] + 1; +#endif + real_args += 2; + continue; + } +#endif /* TCG_TARGET_REG_BITS < 64 */ + + *gen_opparam_ptr++ = args[i]; + real_args++; + } + *gen_opparam_ptr++ = GET_TCGV_PTR(func); + + *gen_opparam_ptr++ = flags; + + *nparam = (nb_rets << 16) | (real_args + 1); + + /* total parameters, needed to go backward in the instruction stream */ + *gen_opparam_ptr++ = 1 + nb_rets + real_args + 3; + +#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 + for (i = 0; i < nargs; ++i) { + int is_64bit = sizemask & (1 << (i+1)*2); + if (!is_64bit) { + TCGv_i64 temp = MAKE_TCGV_I64(args[i]); + tcg_temp_free_i64(temp); + } + } +#endif /* TCG_TARGET_EXTEND_ARGS */ +} + +#if TCG_TARGET_REG_BITS == 32 +void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1, + int c, int right, int arith) +{ + if (c == 0) { + tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1)); + tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1)); + } else if (c >= 32) { + c -= 32; + if (right) { + if (arith) { + tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), 31); + } else { + tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + } + } else { + tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_LOW(arg1), c); + tcg_gen_movi_i32(TCGV_LOW(ret), 0); + } + } else { + TCGv_i32 t0, t1; + + t0 = tcg_temp_new_i32(); + t1 = tcg_temp_new_i32(); + if (right) { + tcg_gen_shli_i32(t0, TCGV_HIGH(arg1), 32 - c); + if (arith) + tcg_gen_sari_i32(t1, TCGV_HIGH(arg1), c); + else + tcg_gen_shri_i32(t1, TCGV_HIGH(arg1), c); + tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c); + tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t0); + tcg_gen_mov_i32(TCGV_HIGH(ret), t1); + } else { + tcg_gen_shri_i32(t0, TCGV_LOW(arg1), 32 - c); + /* Note: ret can be the same as arg1, so we use t1 */ + tcg_gen_shli_i32(t1, TCGV_LOW(arg1), c); + tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c); + tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t0); + tcg_gen_mov_i32(TCGV_LOW(ret), t1); + } + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); + } +} +#endif + + +static void tcg_reg_alloc_start(TCGContext *s) +{ + int i; + TCGTemp *ts; + for(i = 0; i < s->nb_globals; i++) { + ts = &s->temps[i]; + if (ts->fixed_reg) { + ts->val_type = TEMP_VAL_REG; + } else { + ts->val_type = TEMP_VAL_MEM; + } + } + for(i = s->nb_globals; i < s->nb_temps; i++) { + ts = &s->temps[i]; + ts->val_type = TEMP_VAL_DEAD; + ts->mem_allocated = 0; + ts->fixed_reg = 0; + } + for(i = 0; i < TCG_TARGET_NB_REGS; i++) { + s->reg_to_temp[i] = -1; + } +} + +static char *tcg_get_arg_str_idx(TCGContext *s, char *buf, int buf_size, + int idx) +{ + TCGTemp *ts; + + ts = &s->temps[idx]; + if (idx < s->nb_globals) { + pstrcpy(buf, buf_size, ts->name); + } else { + if (ts->temp_local) + snprintf(buf, buf_size, "loc%d", idx - s->nb_globals); + else + snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals); + } + return buf; +} + +char *tcg_get_arg_str_i32(TCGContext *s, char *buf, int buf_size, TCGv_i32 arg) +{ + return tcg_get_arg_str_idx(s, buf, buf_size, GET_TCGV_I32(arg)); +} + +char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg) +{ + return tcg_get_arg_str_idx(s, buf, buf_size, GET_TCGV_I64(arg)); +} + +static int helper_cmp(const void *p1, const void *p2) +{ + const TCGHelperInfo *th1 = p1; + const TCGHelperInfo *th2 = p2; + if (th1->func < th2->func) + return -1; + else if (th1->func == th2->func) + return 0; + else + return 1; +} + +/* find helper definition (Note: A hash table would be better) */ +static TCGHelperInfo *tcg_find_helper(TCGContext *s, tcg_target_ulong val) +{ + int m, m_min, m_max; + TCGHelperInfo *th; + tcg_target_ulong v; + + if (unlikely(!s->helpers_sorted)) { +#ifdef VBOX + qemu_qsort(s->helpers, s->nb_helpers, sizeof(TCGHelperInfo), + helper_cmp); +#else + qsort(s->helpers, s->nb_helpers, sizeof(TCGHelperInfo), + helper_cmp); +#endif + s->helpers_sorted = 1; + } + + /* binary search */ + m_min = 0; + m_max = s->nb_helpers - 1; + while (m_min <= m_max) { + m = (m_min + m_max) >> 1; + th = &s->helpers[m]; + v = th->func; + if (v == val) + return th; + else if (val < v) { + m_max = m - 1; + } else { + m_min = m + 1; + } + } + return NULL; +} + +static const char * const cond_name[] = +{ + [TCG_COND_EQ] = "eq", + [TCG_COND_NE] = "ne", + [TCG_COND_LT] = "lt", + [TCG_COND_GE] = "ge", + [TCG_COND_LE] = "le", + [TCG_COND_GT] = "gt", + [TCG_COND_LTU] = "ltu", + [TCG_COND_GEU] = "geu", + [TCG_COND_LEU] = "leu", + [TCG_COND_GTU] = "gtu" +}; + +void tcg_dump_ops(TCGContext *s, FILE *outfile) +{ + const uint16_t *opc_ptr; + const TCGArg *args; + TCGArg arg; + TCGOpcode c; + int i, k, nb_oargs, nb_iargs, nb_cargs, first_insn; + const TCGOpDef *def; + char buf[128]; + + first_insn = 1; + opc_ptr = gen_opc_buf; + args = gen_opparam_buf; + while (opc_ptr < gen_opc_ptr) { + c = *opc_ptr++; + def = &tcg_op_defs[c]; + if (c == INDEX_op_debug_insn_start) { + uint64_t pc; +#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS + pc = ((uint64_t)args[1] << 32) | args[0]; +#else + pc = args[0]; +#endif + if (!first_insn) + fprintf(outfile, "\n"); + fprintf(outfile, " ---- 0x%" PRIx64, pc); + first_insn = 0; + nb_oargs = def->nb_oargs; + nb_iargs = def->nb_iargs; + nb_cargs = def->nb_cargs; + } else if (c == INDEX_op_call) { + TCGArg arg; + + /* variable number of arguments */ + arg = *args++; + nb_oargs = arg >> 16; + nb_iargs = arg & 0xffff; + nb_cargs = def->nb_cargs; + + fprintf(outfile, " %s ", def->name); + + /* function name */ + fprintf(outfile, "%s", + tcg_get_arg_str_idx(s, buf, sizeof(buf), args[nb_oargs + nb_iargs - 1])); + /* flags */ + fprintf(outfile, ",$0x%" TCG_PRIlx, + args[nb_oargs + nb_iargs]); + /* nb out args */ + fprintf(outfile, ",$%d", nb_oargs); + for(i = 0; i < nb_oargs; i++) { + fprintf(outfile, ","); + fprintf(outfile, "%s", + tcg_get_arg_str_idx(s, buf, sizeof(buf), args[i])); + } + for(i = 0; i < (nb_iargs - 1); i++) { + fprintf(outfile, ","); + if (args[nb_oargs + i] == TCG_CALL_DUMMY_ARG) { + fprintf(outfile, "<dummy>"); + } else { + fprintf(outfile, "%s", + tcg_get_arg_str_idx(s, buf, sizeof(buf), args[nb_oargs + i])); + } + } + } else if (c == INDEX_op_movi_i32 +#if TCG_TARGET_REG_BITS == 64 + || c == INDEX_op_movi_i64 +#endif + ) { + tcg_target_ulong val; + TCGHelperInfo *th; + + nb_oargs = def->nb_oargs; + nb_iargs = def->nb_iargs; + nb_cargs = def->nb_cargs; + fprintf(outfile, " %s %s,$", def->name, + tcg_get_arg_str_idx(s, buf, sizeof(buf), args[0])); + val = args[1]; + th = tcg_find_helper(s, val); + if (th) { + fprintf(outfile, "%s", th->name); + } else { + if (c == INDEX_op_movi_i32) + fprintf(outfile, "0x%x", (uint32_t)val); + else + fprintf(outfile, "0x%" PRIx64 , (uint64_t)val); + } + } else { + fprintf(outfile, " %s ", def->name); + if (c == INDEX_op_nopn) { + /* variable number of arguments */ + nb_cargs = *args; + nb_oargs = 0; + nb_iargs = 0; + } else { + nb_oargs = def->nb_oargs; + nb_iargs = def->nb_iargs; + nb_cargs = def->nb_cargs; + } + + k = 0; + for(i = 0; i < nb_oargs; i++) { + if (k != 0) + fprintf(outfile, ","); + fprintf(outfile, "%s", + tcg_get_arg_str_idx(s, buf, sizeof(buf), args[k++])); + } + for(i = 0; i < nb_iargs; i++) { + if (k != 0) + fprintf(outfile, ","); + fprintf(outfile, "%s", + tcg_get_arg_str_idx(s, buf, sizeof(buf), args[k++])); + } + switch (c) { + case INDEX_op_brcond_i32: +#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_brcond2_i32: +#elif TCG_TARGET_REG_BITS == 64 + case INDEX_op_brcond_i64: +#endif + case INDEX_op_setcond_i32: +#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_setcond2_i32: +#elif TCG_TARGET_REG_BITS == 64 + case INDEX_op_setcond_i64: +#endif + if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) + fprintf(outfile, ",%s", cond_name[args[k++]]); + else + fprintf(outfile, ",$0x%" TCG_PRIlx, args[k++]); + i = 1; + break; + default: + i = 0; + break; + } + for(; i < nb_cargs; i++) { + if (k != 0) + fprintf(outfile, ","); + arg = args[k++]; + fprintf(outfile, "$0x%" TCG_PRIlx, arg); + } + } + fprintf(outfile, "\n"); + args += nb_iargs + nb_oargs + nb_cargs; + } +} + +/* we give more priority to constraints with less registers */ +static int get_constraint_priority(const TCGOpDef *def, int k) +{ + const TCGArgConstraint *arg_ct; + + int i, n; + arg_ct = &def->args_ct[k]; + if (arg_ct->ct & TCG_CT_ALIAS) { + /* an alias is equivalent to a single register */ + n = 1; + } else { + if (!(arg_ct->ct & TCG_CT_REG)) + return 0; + n = 0; + for(i = 0; i < TCG_TARGET_NB_REGS; i++) { + if (tcg_regset_test_reg(arg_ct->u.regs, i)) + n++; + } + } + return TCG_TARGET_NB_REGS - n + 1; +} + +/* sort from highest priority to lowest */ +static void sort_constraints(TCGOpDef *def, int start, int n) +{ + int i, j, p1, p2, tmp; + + for(i = 0; i < n; i++) + def->sorted_args[start + i] = start + i; + if (n <= 1) + return; + for(i = 0; i < n - 1; i++) { + for(j = i + 1; j < n; j++) { + p1 = get_constraint_priority(def, def->sorted_args[start + i]); + p2 = get_constraint_priority(def, def->sorted_args[start + j]); + if (p1 < p2) { + tmp = def->sorted_args[start + i]; + def->sorted_args[start + i] = def->sorted_args[start + j]; + def->sorted_args[start + j] = tmp; + } + } + } +} + +void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs) +{ + TCGOpcode op; + TCGOpDef *def; + const char *ct_str; + int i, nb_args; + + for(;;) { + if (tdefs->op == (TCGOpcode)-1) + break; + op = tdefs->op; + assert((unsigned)op < (unsigned)NB_OPS); + def = &tcg_op_defs[op]; +#if defined(CONFIG_DEBUG_TCG) + /* Duplicate entry in op definitions? */ + assert(!def->used); + def->used = 1; +#endif + nb_args = def->nb_iargs + def->nb_oargs; + for(i = 0; i < nb_args; i++) { + ct_str = tdefs->args_ct_str[i]; + /* Incomplete TCGTargetOpDef entry? */ + assert(ct_str != NULL); + tcg_regset_clear(def->args_ct[i].u.regs); + def->args_ct[i].ct = 0; + if (ct_str[0] >= '0' && ct_str[0] <= '9') { + int oarg; + oarg = ct_str[0] - '0'; + assert(oarg < def->nb_oargs); + assert(def->args_ct[oarg].ct & TCG_CT_REG); + /* TCG_CT_ALIAS is for the output arguments. The input + argument is tagged with TCG_CT_IALIAS. */ + def->args_ct[i] = def->args_ct[oarg]; + def->args_ct[oarg].ct = TCG_CT_ALIAS; + def->args_ct[oarg].alias_index = i; + def->args_ct[i].ct |= TCG_CT_IALIAS; + def->args_ct[i].alias_index = oarg; + } else { + for(;;) { + if (*ct_str == '\0') + break; + switch(*ct_str) { + case 'i': + def->args_ct[i].ct |= TCG_CT_CONST; + ct_str++; + break; + default: + if (target_parse_constraint(&def->args_ct[i], &ct_str) < 0) { + fprintf(stderr, "Invalid constraint '%s' for arg %d of operation '%s'\n", + ct_str, i, def->name); +#ifndef VBOX + exit(1); +#else + tcg_exit(1); +#endif + } + } + } + } + } + + /* TCGTargetOpDef entry with too much information? */ + assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL); + + /* sort the constraints (XXX: this is just an heuristic) */ + sort_constraints(def, 0, def->nb_oargs); + sort_constraints(def, def->nb_oargs, def->nb_iargs); + +#if 0 + { + int i; + + printf("%s: sorted=", def->name); + for(i = 0; i < def->nb_oargs + def->nb_iargs; i++) + printf(" %d", def->sorted_args[i]); + printf("\n"); + } +#endif + tdefs++; + } + +#if defined(CONFIG_DEBUG_TCG) + i = 0; + for (op = 0; op < ARRAY_SIZE(tcg_op_defs); op++) { + if (op < INDEX_op_call || op == INDEX_op_debug_insn_start) { + /* Wrong entry in op definitions? */ + if (tcg_op_defs[op].used) { + fprintf(stderr, "Invalid op definition for %s\n", + tcg_op_defs[op].name); + i = 1; + } + } else { + /* Missing entry in op definitions? */ + if (!tcg_op_defs[op].used) { + fprintf(stderr, "Missing op definition for %s\n", + tcg_op_defs[op].name); + i = 1; + } + } + } + if (i == 1) { + tcg_abort(); + } +#endif +} + +#ifdef USE_LIVENESS_ANALYSIS + +/* set a nop for an operation using 'nb_args' */ +static inline void tcg_set_nop(TCGContext *s, uint16_t *opc_ptr, + TCGArg *args, int nb_args) +{ + if (nb_args == 0) { + *opc_ptr = INDEX_op_nop; + } else { + *opc_ptr = INDEX_op_nopn; + args[0] = nb_args; + args[nb_args - 1] = nb_args; + } +} + +/* liveness analysis: end of function: globals are live, temps are + dead. */ +/* XXX: at this stage, not used as there would be little gains because + most TBs end with a conditional jump. */ +static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps) +{ + memset(dead_temps, 0, s->nb_globals); + memset(dead_temps + s->nb_globals, 1, s->nb_temps - s->nb_globals); +} + +/* liveness analysis: end of basic block: globals are live, temps are + dead, local temps are live. */ +static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps) +{ + int i; + TCGTemp *ts; + + memset(dead_temps, 0, s->nb_globals); + ts = &s->temps[s->nb_globals]; + for(i = s->nb_globals; i < s->nb_temps; i++) { + if (ts->temp_local) + dead_temps[i] = 0; + else + dead_temps[i] = 1; + ts++; + } +} + +/* Liveness analysis : update the opc_dead_iargs array to tell if a + given input arguments is dead. Instructions updating dead + temporaries are removed. */ +static void tcg_liveness_analysis(TCGContext *s) +{ + int i, op_index, nb_args, nb_iargs, nb_oargs, arg, nb_ops; + TCGOpcode op; + TCGArg *args; + const TCGOpDef *def; + uint8_t *dead_temps; + unsigned int dead_iargs; + + gen_opc_ptr++; /* skip end */ + + nb_ops = gen_opc_ptr - gen_opc_buf; + + s->op_dead_iargs = tcg_malloc(nb_ops * sizeof(uint16_t)); + + dead_temps = tcg_malloc(s->nb_temps); + memset(dead_temps, 1, s->nb_temps); + + args = gen_opparam_ptr; + op_index = nb_ops - 1; + while (op_index >= 0) { + op = gen_opc_buf[op_index]; + def = &tcg_op_defs[op]; + switch(op) { + case INDEX_op_call: + { + int call_flags; + + nb_args = args[-1]; + args -= nb_args; + nb_iargs = args[0] & 0xffff; + nb_oargs = args[0] >> 16; + args++; + call_flags = args[nb_oargs + nb_iargs]; + + /* pure functions can be removed if their result is not + used */ + if (call_flags & TCG_CALL_PURE) { + for(i = 0; i < nb_oargs; i++) { + arg = args[i]; + if (!dead_temps[arg]) + goto do_not_remove_call; + } + tcg_set_nop(s, gen_opc_buf + op_index, + args - 1, nb_args); + } else { + do_not_remove_call: + + /* output args are dead */ + for(i = 0; i < nb_oargs; i++) { + arg = args[i]; + dead_temps[arg] = 1; + } + + if (!(call_flags & TCG_CALL_CONST)) { + /* globals are live (they may be used by the call) */ + memset(dead_temps, 0, s->nb_globals); + } + + /* input args are live */ + dead_iargs = 0; + for(i = 0; i < nb_iargs; i++) { + arg = args[i + nb_oargs]; + if (arg != TCG_CALL_DUMMY_ARG) { + if (dead_temps[arg]) { + dead_iargs |= (1 << i); + } + dead_temps[arg] = 0; + } + } + s->op_dead_iargs[op_index] = dead_iargs; + } + args--; + } + break; + case INDEX_op_set_label: + args--; + /* mark end of basic block */ + tcg_la_bb_end(s, dead_temps); + break; + case INDEX_op_debug_insn_start: + args -= def->nb_args; + break; + case INDEX_op_nopn: + nb_args = args[-1]; + args -= nb_args; + break; + case INDEX_op_discard: + args--; + /* mark the temporary as dead */ + dead_temps[args[0]] = 1; + break; + case INDEX_op_end: + break; + /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ + default: + args -= def->nb_args; + nb_iargs = def->nb_iargs; + nb_oargs = def->nb_oargs; + + /* Test if the operation can be removed because all + its outputs are dead. We assume that nb_oargs == 0 + implies side effects */ + if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { + for(i = 0; i < nb_oargs; i++) { + arg = args[i]; + if (!dead_temps[arg]) + goto do_not_remove; + } + tcg_set_nop(s, gen_opc_buf + op_index, args, def->nb_args); +#ifdef CONFIG_PROFILER + s->del_op_count++; +#endif + } else { + do_not_remove: + + /* output args are dead */ + for(i = 0; i < nb_oargs; i++) { + arg = args[i]; + dead_temps[arg] = 1; + } + + /* if end of basic block, update */ + if (def->flags & TCG_OPF_BB_END) { + tcg_la_bb_end(s, dead_temps); + } else if (def->flags & TCG_OPF_CALL_CLOBBER) { + /* globals are live */ + memset(dead_temps, 0, s->nb_globals); + } + + /* input args are live */ + dead_iargs = 0; + for(i = 0; i < nb_iargs; i++) { + arg = args[i + nb_oargs]; + if (dead_temps[arg]) { + dead_iargs |= (1 << i); + } + dead_temps[arg] = 0; + } + s->op_dead_iargs[op_index] = dead_iargs; + } + break; + } + op_index--; + } + + if (args != gen_opparam_buf) + tcg_abort(); +} +#else +/* dummy liveness analysis */ +static void tcg_liveness_analysis(TCGContext *s) +{ + int nb_ops; + nb_ops = gen_opc_ptr - gen_opc_buf; + + s->op_dead_iargs = tcg_malloc(nb_ops * sizeof(uint16_t)); + memset(s->op_dead_iargs, 0, nb_ops * sizeof(uint16_t)); +} +#endif + +#ifndef NDEBUG +static void dump_regs(TCGContext *s) +{ + TCGTemp *ts; + int i; + char buf[64]; + + for(i = 0; i < s->nb_temps; i++) { + ts = &s->temps[i]; + printf(" %10s: ", tcg_get_arg_str_idx(s, buf, sizeof(buf), i)); + switch(ts->val_type) { + case TEMP_VAL_REG: + printf("%s", tcg_target_reg_names[ts->reg]); + break; + case TEMP_VAL_MEM: + printf("%d(%s)", (int)ts->mem_offset, tcg_target_reg_names[ts->mem_reg]); + break; + case TEMP_VAL_CONST: + printf("$0x%" TCG_PRIlx, ts->val); + break; + case TEMP_VAL_DEAD: + printf("D"); + break; + default: + printf("???"); + break; + } + printf("\n"); + } + + for(i = 0; i < TCG_TARGET_NB_REGS; i++) { + if (s->reg_to_temp[i] >= 0) { + printf("%s: %s\n", + tcg_target_reg_names[i], + tcg_get_arg_str_idx(s, buf, sizeof(buf), s->reg_to_temp[i])); + } + } +} + +static void check_regs(TCGContext *s) +{ + int reg, k; + TCGTemp *ts; + char buf[64]; + + for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { + k = s->reg_to_temp[reg]; + if (k >= 0) { + ts = &s->temps[k]; + if (ts->val_type != TEMP_VAL_REG || + ts->reg != reg) { + printf("Inconsistency for register %s:\n", + tcg_target_reg_names[reg]); + goto fail; + } + } + } + for(k = 0; k < s->nb_temps; k++) { + ts = &s->temps[k]; + if (ts->val_type == TEMP_VAL_REG && + !ts->fixed_reg && + s->reg_to_temp[ts->reg] != k) { + printf("Inconsistency for temp %s:\n", + tcg_get_arg_str_idx(s, buf, sizeof(buf), k)); + fail: + printf("reg state:\n"); + dump_regs(s); + tcg_abort(); + } + } +} +#endif + +static void temp_allocate_frame(TCGContext *s, int temp) +{ + TCGTemp *ts; + ts = &s->temps[temp]; + s->current_frame_offset = (s->current_frame_offset + sizeof(tcg_target_long) - 1) & ~(sizeof(tcg_target_long) - 1); +#ifndef VBOX + if (s->current_frame_offset + sizeof(tcg_target_long) > s->frame_end) +#else + if ((tcg_target_long)s->current_frame_offset + sizeof(tcg_target_long) > s->frame_end) +#endif + tcg_abort(); + ts->mem_offset = s->current_frame_offset; + ts->mem_reg = s->frame_reg; + ts->mem_allocated = 1; + s->current_frame_offset += sizeof(tcg_target_long); +} + +/* free register 'reg' by spilling the corresponding temporary if necessary */ +static void tcg_reg_free(TCGContext *s, int reg) +{ + TCGTemp *ts; + int temp; + + temp = s->reg_to_temp[reg]; + if (temp != -1) { + ts = &s->temps[temp]; + assert(ts->val_type == TEMP_VAL_REG); + if (!ts->mem_coherent) { + if (!ts->mem_allocated) + temp_allocate_frame(s, temp); + tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset); + } + ts->val_type = TEMP_VAL_MEM; + s->reg_to_temp[reg] = -1; + } +} + +/* Allocate a register belonging to reg1 & ~reg2 */ +static int tcg_reg_alloc(TCGContext *s, TCGRegSet reg1, TCGRegSet reg2) +{ + int i, reg; + TCGRegSet reg_ct; + + tcg_regset_andnot(reg_ct, reg1, reg2); + + /* first try free registers */ + for(i = 0; i < ARRAY_SIZE(tcg_target_reg_alloc_order); i++) { + reg = tcg_target_reg_alloc_order[i]; + if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == -1) + return reg; + } + + /* XXX: do better spill choice */ + for(i = 0; i < ARRAY_SIZE(tcg_target_reg_alloc_order); i++) { + reg = tcg_target_reg_alloc_order[i]; + if (tcg_regset_test_reg(reg_ct, reg)) { + tcg_reg_free(s, reg); + return reg; + } + } + + tcg_abort(); +} + +/* save a temporary to memory. 'allocated_regs' is used in case a + temporary registers needs to be allocated to store a constant. */ +static void temp_save(TCGContext *s, int temp, TCGRegSet allocated_regs) +{ + TCGTemp *ts; + int reg; + + ts = &s->temps[temp]; + if (!ts->fixed_reg) { + switch(ts->val_type) { + case TEMP_VAL_REG: + tcg_reg_free(s, ts->reg); + break; + case TEMP_VAL_DEAD: + ts->val_type = TEMP_VAL_MEM; + break; + case TEMP_VAL_CONST: + reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], + allocated_regs); + if (!ts->mem_allocated) + temp_allocate_frame(s, temp); + tcg_out_movi(s, ts->type, reg, ts->val); + tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset); + ts->val_type = TEMP_VAL_MEM; + break; + case TEMP_VAL_MEM: + break; + default: + tcg_abort(); + } + } +} + +/* save globals to their cannonical location and assume they can be + modified be the following code. 'allocated_regs' is used in case a + temporary registers needs to be allocated to store a constant. */ +static void save_globals(TCGContext *s, TCGRegSet allocated_regs) +{ + int i; + + for(i = 0; i < s->nb_globals; i++) { + temp_save(s, i, allocated_regs); + } +} + +/* at the end of a basic block, we assume all temporaries are dead and + all globals are stored at their canonical location. */ +static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) +{ + TCGTemp *ts; + int i; + + for(i = s->nb_globals; i < s->nb_temps; i++) { + ts = &s->temps[i]; + if (ts->temp_local) { + temp_save(s, i, allocated_regs); + } else { + if (ts->val_type == TEMP_VAL_REG) { + s->reg_to_temp[ts->reg] = -1; + } + ts->val_type = TEMP_VAL_DEAD; + } + } + + save_globals(s, allocated_regs); +} + +#define IS_DEAD_IARG(n) ((dead_iargs >> (n)) & 1) + +static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args) +{ + TCGTemp *ots; + tcg_target_ulong val; + + ots = &s->temps[args[0]]; + val = args[1]; + + if (ots->fixed_reg) { + /* for fixed registers, we do not do any constant + propagation */ + tcg_out_movi(s, ots->type, ots->reg, val); + } else { + /* The movi is not explicitly generated here */ + if (ots->val_type == TEMP_VAL_REG) + s->reg_to_temp[ots->reg] = -1; + ots->val_type = TEMP_VAL_CONST; + ots->val = val; + } +} + +static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def, + const TCGArg *args, + unsigned int dead_iargs) +{ + TCGTemp *ts, *ots; + int reg; + const TCGArgConstraint *arg_ct; + + ots = &s->temps[args[0]]; + ts = &s->temps[args[1]]; + arg_ct = &def->args_ct[0]; + + /* XXX: always mark arg dead if IS_DEAD_IARG(0) */ + if (ts->val_type == TEMP_VAL_REG) { + if (IS_DEAD_IARG(0) && !ts->fixed_reg && !ots->fixed_reg) { + /* the mov can be suppressed */ + if (ots->val_type == TEMP_VAL_REG) + s->reg_to_temp[ots->reg] = -1; + reg = ts->reg; + s->reg_to_temp[reg] = -1; + ts->val_type = TEMP_VAL_DEAD; + } else { + if (ots->val_type == TEMP_VAL_REG) { + reg = ots->reg; + } else { + reg = tcg_reg_alloc(s, arg_ct->u.regs, s->reserved_regs); + } + if (ts->reg != reg) { + tcg_out_mov(s, ots->type, reg, ts->reg); + } + } + } else if (ts->val_type == TEMP_VAL_MEM) { + if (ots->val_type == TEMP_VAL_REG) { + reg = ots->reg; + } else { + reg = tcg_reg_alloc(s, arg_ct->u.regs, s->reserved_regs); + } + tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset); + } else if (ts->val_type == TEMP_VAL_CONST) { + if (ots->fixed_reg) { + reg = ots->reg; + tcg_out_movi(s, ots->type, reg, ts->val); + } else { + /* propagate constant */ + if (ots->val_type == TEMP_VAL_REG) + s->reg_to_temp[ots->reg] = -1; + ots->val_type = TEMP_VAL_CONST; + ots->val = ts->val; + return; + } + } else { + tcg_abort(); + } + s->reg_to_temp[reg] = args[0]; + ots->reg = reg; + ots->val_type = TEMP_VAL_REG; + ots->mem_coherent = 0; +} + +static void tcg_reg_alloc_op(TCGContext *s, + const TCGOpDef *def, TCGOpcode opc, + const TCGArg *args, + unsigned int dead_iargs) +{ + TCGRegSet allocated_regs; + int i, k, nb_iargs, nb_oargs, reg; + TCGArg arg; + const TCGArgConstraint *arg_ct; + TCGTemp *ts; + TCGArg new_args[TCG_MAX_OP_ARGS]; + int const_args[TCG_MAX_OP_ARGS]; + + nb_oargs = def->nb_oargs; + nb_iargs = def->nb_iargs; + + /* copy constants */ + memcpy(new_args + nb_oargs + nb_iargs, + args + nb_oargs + nb_iargs, + sizeof(TCGArg) * def->nb_cargs); + + /* satisfy input constraints */ + tcg_regset_set(allocated_regs, s->reserved_regs); + for(k = 0; k < nb_iargs; k++) { + i = def->sorted_args[nb_oargs + k]; + arg = args[i]; + arg_ct = &def->args_ct[i]; + ts = &s->temps[arg]; + if (ts->val_type == TEMP_VAL_MEM) { + reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); + tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset); + ts->val_type = TEMP_VAL_REG; + ts->reg = reg; + ts->mem_coherent = 1; + s->reg_to_temp[reg] = arg; + } else if (ts->val_type == TEMP_VAL_CONST) { + if (tcg_target_const_match(ts->val, arg_ct)) { + /* constant is OK for instruction */ + const_args[i] = 1; + new_args[i] = ts->val; + goto iarg_end; + } else { + /* need to move to a register */ + reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); + tcg_out_movi(s, ts->type, reg, ts->val); + ts->val_type = TEMP_VAL_REG; + ts->reg = reg; + ts->mem_coherent = 0; + s->reg_to_temp[reg] = arg; + } + } + assert(ts->val_type == TEMP_VAL_REG); + if (arg_ct->ct & TCG_CT_IALIAS) { + if (ts->fixed_reg) { + /* if fixed register, we must allocate a new register + if the alias is not the same register */ + if (arg != args[arg_ct->alias_index]) + goto allocate_in_reg; + } else { + /* if the input is aliased to an output and if it is + not dead after the instruction, we must allocate + a new register and move it */ + if (!IS_DEAD_IARG(i - nb_oargs)) + goto allocate_in_reg; + } + } + reg = ts->reg; + if (tcg_regset_test_reg(arg_ct->u.regs, reg)) { + /* nothing to do : the constraint is satisfied */ + } else { + allocate_in_reg: + /* allocate a new register matching the constraint + and move the temporary register into it */ + reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); + tcg_out_mov(s, ts->type, reg, ts->reg); + } + new_args[i] = reg; + const_args[i] = 0; + tcg_regset_set_reg(allocated_regs, reg); + iarg_end: ; + } + + if (def->flags & TCG_OPF_BB_END) { + tcg_reg_alloc_bb_end(s, allocated_regs); + } else { + /* mark dead temporaries and free the associated registers */ + for(i = 0; i < nb_iargs; i++) { + arg = args[nb_oargs + i]; + if (IS_DEAD_IARG(i)) { + ts = &s->temps[arg]; + if (!ts->fixed_reg) { + if (ts->val_type == TEMP_VAL_REG) + s->reg_to_temp[ts->reg] = -1; + ts->val_type = TEMP_VAL_DEAD; + } + } + } + + if (def->flags & TCG_OPF_CALL_CLOBBER) { + /* XXX: permit generic clobber register list ? */ + for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { + if (tcg_regset_test_reg(tcg_target_call_clobber_regs, reg)) { + tcg_reg_free(s, reg); + } + } + /* XXX: for load/store we could do that only for the slow path + (i.e. when a memory callback is called) */ + + /* store globals and free associated registers (we assume the insn + can modify any global. */ + save_globals(s, allocated_regs); + } + + /* satisfy the output constraints */ + tcg_regset_set(allocated_regs, s->reserved_regs); + for(k = 0; k < nb_oargs; k++) { + i = def->sorted_args[k]; + arg = args[i]; + arg_ct = &def->args_ct[i]; + ts = &s->temps[arg]; + if (arg_ct->ct & TCG_CT_ALIAS) { + reg = new_args[arg_ct->alias_index]; + } else { + /* if fixed register, we try to use it */ + reg = ts->reg; + if (ts->fixed_reg && + tcg_regset_test_reg(arg_ct->u.regs, reg)) { + goto oarg_end; + } + reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); + } + tcg_regset_set_reg(allocated_regs, reg); + /* if a fixed register is used, then a move will be done afterwards */ + if (!ts->fixed_reg) { + if (ts->val_type == TEMP_VAL_REG) + s->reg_to_temp[ts->reg] = -1; + ts->val_type = TEMP_VAL_REG; + ts->reg = reg; + /* temp value is modified, so the value kept in memory is + potentially not the same */ + ts->mem_coherent = 0; + s->reg_to_temp[reg] = arg; + } + oarg_end: + new_args[i] = reg; + } + } + + /* emit instruction */ + tcg_out_op(s, opc, new_args, const_args); + + /* move the outputs in the correct register if needed */ + for(i = 0; i < nb_oargs; i++) { + ts = &s->temps[args[i]]; + reg = new_args[i]; + if (ts->fixed_reg && ts->reg != reg) { + tcg_out_mov(s, ts->type, ts->reg, reg); + } + } +} + +#ifdef TCG_TARGET_STACK_GROWSUP +#define STACK_DIR(x) (-(x)) +#else +#define STACK_DIR(x) (x) +#endif + +static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, + TCGOpcode opc, const TCGArg *args, + unsigned int dead_iargs) +{ + int nb_iargs, nb_oargs, flags, nb_regs, i, reg, nb_params; + TCGArg arg, func_arg; + TCGTemp *ts; + tcg_target_long stack_offset, call_stack_size, func_addr; + int const_func_arg, allocate_args; + TCGRegSet allocated_regs; + const TCGArgConstraint *arg_ct; + + arg = *args++; + + nb_oargs = arg >> 16; + nb_iargs = arg & 0xffff; + nb_params = nb_iargs - 1; + + flags = args[nb_oargs + nb_iargs]; + + nb_regs = tcg_target_get_call_iarg_regs_count(flags); + if (nb_regs > nb_params) + nb_regs = nb_params; + + /* assign stack slots first */ + /* XXX: preallocate call stack */ + call_stack_size = (nb_params - nb_regs) * sizeof(tcg_target_long); + call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & + ~(TCG_TARGET_STACK_ALIGN - 1); + allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); + if (allocate_args) { + tcg_out_addi(s, TCG_REG_CALL_STACK, -STACK_DIR(call_stack_size)); + } + + stack_offset = TCG_TARGET_CALL_STACK_OFFSET; + for(i = nb_regs; i < nb_params; i++) { + arg = args[nb_oargs + i]; +#ifdef TCG_TARGET_STACK_GROWSUP + stack_offset -= sizeof(tcg_target_long); +#endif + if (arg != TCG_CALL_DUMMY_ARG) { + ts = &s->temps[arg]; + if (ts->val_type == TEMP_VAL_REG) { + tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset); + } else if (ts->val_type == TEMP_VAL_MEM) { + reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], + s->reserved_regs); + /* XXX: not correct if reading values from the stack */ + tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset); + tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset); + } else if (ts->val_type == TEMP_VAL_CONST) { + reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], + s->reserved_regs); + /* XXX: sign extend may be needed on some targets */ + tcg_out_movi(s, ts->type, reg, ts->val); + tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset); + } else { + tcg_abort(); + } + } +#ifndef TCG_TARGET_STACK_GROWSUP + stack_offset += sizeof(tcg_target_long); +#endif + } + + /* assign input registers */ + tcg_regset_set(allocated_regs, s->reserved_regs); + for(i = 0; i < nb_regs; i++) { + arg = args[nb_oargs + i]; + if (arg != TCG_CALL_DUMMY_ARG) { + ts = &s->temps[arg]; + reg = tcg_target_call_iarg_regs[i]; + tcg_reg_free(s, reg); + if (ts->val_type == TEMP_VAL_REG) { + if (ts->reg != reg) { + tcg_out_mov(s, ts->type, reg, ts->reg); + } + } else if (ts->val_type == TEMP_VAL_MEM) { + tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset); + } else if (ts->val_type == TEMP_VAL_CONST) { + /* XXX: sign extend ? */ + tcg_out_movi(s, ts->type, reg, ts->val); + } else { + tcg_abort(); + } + tcg_regset_set_reg(allocated_regs, reg); + } + } + + /* assign function address */ + func_arg = args[nb_oargs + nb_iargs - 1]; + arg_ct = &def->args_ct[0]; + ts = &s->temps[func_arg]; + func_addr = ts->val; + const_func_arg = 0; + if (ts->val_type == TEMP_VAL_MEM) { + reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); + tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset); + func_arg = reg; + tcg_regset_set_reg(allocated_regs, reg); + } else if (ts->val_type == TEMP_VAL_REG) { + reg = ts->reg; + if (!tcg_regset_test_reg(arg_ct->u.regs, reg)) { + reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); + tcg_out_mov(s, ts->type, reg, ts->reg); + } + func_arg = reg; + tcg_regset_set_reg(allocated_regs, reg); + } else if (ts->val_type == TEMP_VAL_CONST) { + if (tcg_target_const_match(func_addr, arg_ct)) { + const_func_arg = 1; + func_arg = func_addr; + } else { + reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); + tcg_out_movi(s, ts->type, reg, func_addr); + func_arg = reg; + tcg_regset_set_reg(allocated_regs, reg); + } + } else { + tcg_abort(); + } + + + /* mark dead temporaries and free the associated registers */ + for(i = 0; i < nb_iargs; i++) { + arg = args[nb_oargs + i]; + if (IS_DEAD_IARG(i)) { + ts = &s->temps[arg]; + if (!ts->fixed_reg) { + if (ts->val_type == TEMP_VAL_REG) + s->reg_to_temp[ts->reg] = -1; + ts->val_type = TEMP_VAL_DEAD; + } + } + } + + /* clobber call registers */ + for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { + if (tcg_regset_test_reg(tcg_target_call_clobber_regs, reg)) { + tcg_reg_free(s, reg); + } + } + + /* store globals and free associated registers (we assume the call + can modify any global. */ + if (!(flags & TCG_CALL_CONST)) { + save_globals(s, allocated_regs); + } + + tcg_out_op(s, opc, &func_arg, &const_func_arg); + + if (allocate_args) { + tcg_out_addi(s, TCG_REG_CALL_STACK, STACK_DIR(call_stack_size)); + } + + /* assign output registers and emit moves if needed */ + for(i = 0; i < nb_oargs; i++) { + arg = args[i]; + ts = &s->temps[arg]; + reg = tcg_target_call_oarg_regs[i]; + assert(s->reg_to_temp[reg] == -1); + if (ts->fixed_reg) { + if (ts->reg != reg) { + tcg_out_mov(s, ts->type, ts->reg, reg); + } + } else { + if (ts->val_type == TEMP_VAL_REG) + s->reg_to_temp[ts->reg] = -1; + ts->val_type = TEMP_VAL_REG; + ts->reg = reg; + ts->mem_coherent = 0; + s->reg_to_temp[reg] = arg; + } + } + + return nb_iargs + nb_oargs + def->nb_cargs + 1; +} + +#ifdef CONFIG_PROFILER + +static int64_t tcg_table_op_count[NB_OPS]; + +static void dump_op_count(void) +{ + int i; + FILE *f; + f = fopen("/tmp/op.log", "w"); + for(i = INDEX_op_end; i < NB_OPS; i++) { + fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name, tcg_table_op_count[i]); + } + fclose(f); +} +#endif + + +static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf, + intptr_t search_pc) +{ + TCGOpcode opc; + int op_index; + const TCGOpDef *def; + unsigned int dead_iargs; + const TCGArg *args; + +#ifdef DEBUG_DISAS + if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) { + qemu_log("OP:\n"); + tcg_dump_ops(s, logfile); + qemu_log("\n"); + } +#endif + +#ifdef CONFIG_PROFILER + s->la_time -= profile_getclock(); +#endif + tcg_liveness_analysis(s); +#ifdef CONFIG_PROFILER + s->la_time += profile_getclock(); +#endif + +#ifdef DEBUG_DISAS +# ifdef USE_LIVENESS_ANALYSIS /* vbox */ + if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT))) { + qemu_log("OP after liveness analysis:\n"); + tcg_dump_ops(s, logfile); + qemu_log("\n"); + } +# endif /* USE_LIVENESS_ANALYSIS - vbox */ +#endif + + tcg_reg_alloc_start(s); + + s->code_buf = gen_code_buf; + s->code_ptr = gen_code_buf; + + args = gen_opparam_buf; + op_index = 0; + + for(;;) { + opc = gen_opc_buf[op_index]; +#ifdef CONFIG_PROFILER + tcg_table_op_count[opc]++; +#endif + def = &tcg_op_defs[opc]; +#if 0 + printf("%s: %d %d %d\n", def->name, + def->nb_oargs, def->nb_iargs, def->nb_cargs); + // dump_regs(s); +#endif + switch(opc) { + case INDEX_op_mov_i32: +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_mov_i64: +#endif + dead_iargs = s->op_dead_iargs[op_index]; + tcg_reg_alloc_mov(s, def, args, dead_iargs); + break; + case INDEX_op_movi_i32: +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_movi_i64: +#endif + tcg_reg_alloc_movi(s, args); + break; + case INDEX_op_debug_insn_start: + /* debug instruction */ + break; + case INDEX_op_nop: + case INDEX_op_nop1: + case INDEX_op_nop2: + case INDEX_op_nop3: + break; + case INDEX_op_nopn: + args += args[0]; + goto next; + case INDEX_op_discard: + { + TCGTemp *ts; + ts = &s->temps[args[0]]; + /* mark the temporary as dead */ + if (!ts->fixed_reg) { + if (ts->val_type == TEMP_VAL_REG) + s->reg_to_temp[ts->reg] = -1; + ts->val_type = TEMP_VAL_DEAD; + } + } + break; + case INDEX_op_set_label: + tcg_reg_alloc_bb_end(s, s->reserved_regs); + tcg_out_label(s, args[0], (intptr_t)s->code_ptr); + break; + case INDEX_op_call: + dead_iargs = s->op_dead_iargs[op_index]; + args += tcg_reg_alloc_call(s, def, opc, args, dead_iargs); + goto next; + case INDEX_op_end: + goto the_end; + default: + /* Note: in order to speed up the code, it would be much + faster to have specialized register allocator functions for + some common argument patterns */ + dead_iargs = s->op_dead_iargs[op_index]; + tcg_reg_alloc_op(s, def, opc, args, dead_iargs); + break; + } + args += def->nb_args; + next: + if (search_pc >= 0 && search_pc < s->code_ptr - gen_code_buf) { + return op_index; + } + op_index++; +#ifndef NDEBUG + check_regs(s); +#endif + } + the_end: + return -1; +} + +int tcg_gen_code(TCGContext *s, uint8_t *gen_code_buf) +{ +#ifdef CONFIG_PROFILER + { + int n; + n = (gen_opc_ptr - gen_opc_buf); + s->op_count += n; + if (n > s->op_count_max) + s->op_count_max = n; + + s->temp_count += s->nb_temps; + if (s->nb_temps > s->temp_count_max) + s->temp_count_max = s->nb_temps; + } +#endif + + tcg_gen_code_common(s, gen_code_buf, -1); + + /* flush instruction cache */ + flush_icache_range((uintptr_t)gen_code_buf, + (uintptr_t)s->code_ptr); + return s->code_ptr - gen_code_buf; +} + +/* Return the index of the micro operation such as the pc after is < + offset bytes from the start of the TB. The contents of gen_code_buf must + not be changed, though writing the same values is ok. + Return -1 if not found. */ +int tcg_gen_code_search_pc(TCGContext *s, uint8_t *gen_code_buf, intptr_t offset) +{ + return tcg_gen_code_common(s, gen_code_buf, offset); +} + +#ifdef CONFIG_PROFILER +void tcg_dump_info(FILE *f, + int (*cpu_fprintf)(FILE *f, const char *fmt, ...)) +{ + TCGContext *s = &tcg_ctx; + int64_t tot; + + tot = s->interm_time + s->code_time; + cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n", + tot, tot / 2.4e9); + cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n", + s->tb_count, + s->tb_count1 - s->tb_count, + s->tb_count1 ? (double)(s->tb_count1 - s->tb_count) / s->tb_count1 * 100.0 : 0); + cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n", + s->tb_count ? (double)s->op_count / s->tb_count : 0, s->op_count_max); + cpu_fprintf(f, "deleted ops/TB %0.2f\n", + s->tb_count ? + (double)s->del_op_count / s->tb_count : 0); + cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n", + s->tb_count ? + (double)s->temp_count / s->tb_count : 0, + s->temp_count_max); + + cpu_fprintf(f, "cycles/op %0.1f\n", + s->op_count ? (double)tot / s->op_count : 0); + cpu_fprintf(f, "cycles/in byte %0.1f\n", + s->code_in_len ? (double)tot / s->code_in_len : 0); + cpu_fprintf(f, "cycles/out byte %0.1f\n", + s->code_out_len ? (double)tot / s->code_out_len : 0); + if (tot == 0) + tot = 1; + cpu_fprintf(f, " gen_interm time %0.1f%%\n", + (double)s->interm_time / tot * 100.0); + cpu_fprintf(f, " gen_code time %0.1f%%\n", + (double)s->code_time / tot * 100.0); + cpu_fprintf(f, "liveness/code time %0.1f%%\n", + (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0); + cpu_fprintf(f, "cpu_restore count %" PRId64 "\n", + s->restore_count); + cpu_fprintf(f, " avg cycles %0.1f\n", + s->restore_count ? (double)s->restore_time / s->restore_count : 0); + + dump_op_count(); +} +#else +void tcg_dump_info(FILE *f, + int (*cpu_fprintf)(FILE *f, const char *fmt, ...)) +{ + cpu_fprintf(f, "[TCG profiler not compiled]\n"); +} +#endif diff --git a/src/recompiler/tcg/tcg.h b/src/recompiler/tcg/tcg.h new file mode 100644 index 00000000..819fa6c7 --- /dev/null +++ b/src/recompiler/tcg/tcg.h @@ -0,0 +1,512 @@ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu-common.h" +#include "tcg-target.h" +#include "tcg-runtime.h" + +#if TCG_TARGET_REG_BITS == 32 +typedef int32_t tcg_target_long; +typedef uint32_t tcg_target_ulong; +#define TCG_PRIlx PRIx32 +#define TCG_PRIld PRId32 +#elif TCG_TARGET_REG_BITS == 64 +typedef int64_t tcg_target_long; +typedef uint64_t tcg_target_ulong; +#define TCG_PRIlx PRIx64 +#define TCG_PRIld PRId64 +#else +#error unsupported +#endif + +#if TCG_TARGET_NB_REGS <= 32 +typedef uint32_t TCGRegSet; +#elif TCG_TARGET_NB_REGS <= 64 +typedef uint64_t TCGRegSet; +#else +#error unsupported +#endif + +typedef enum TCGOpcode { +#define DEF(name, oargs, iargs, cargs, flags) INDEX_op_ ## name, +#include "tcg-opc.h" +#undef DEF + NB_OPS, +} TCGOpcode; + +#define tcg_regset_clear(d) (d) = 0 +#define tcg_regset_set(d, s) (d) = (s) +#define tcg_regset_set32(d, reg, val32) (d) |= (val32) << (reg) +#define tcg_regset_set_reg(d, r) (d) |= 1L << (r) +#define tcg_regset_reset_reg(d, r) (d) &= ~(1L << (r)) +#define tcg_regset_test_reg(d, r) (((d) >> (r)) & 1) +#define tcg_regset_or(d, a, b) (d) = (a) | (b) +#define tcg_regset_and(d, a, b) (d) = (a) & (b) +#define tcg_regset_andnot(d, a, b) (d) = (a) & ~(b) +#define tcg_regset_not(d, a) (d) = ~(a) + +typedef struct TCGRelocation { + struct TCGRelocation *next; + int type; + uint8_t *ptr; + tcg_target_long addend; +} TCGRelocation; + +typedef struct TCGLabel { + int has_value; + union { + tcg_target_ulong value; + TCGRelocation *first_reloc; + } u; +} TCGLabel; + +typedef struct TCGPool { + struct TCGPool *next; + int size; + uint8_t data[0] __attribute__ ((aligned)); +} TCGPool; + +#define TCG_POOL_CHUNK_SIZE 32768 + +#define TCG_MAX_LABELS 512 + +#define TCG_MAX_TEMPS 512 + +/* when the size of the arguments of a called function is smaller than + this value, they are statically allocated in the TB stack frame */ +#define TCG_STATIC_CALL_ARGS_SIZE 128 + +typedef enum TCGType { + TCG_TYPE_I32, + TCG_TYPE_I64, + TCG_TYPE_COUNT, /* number of different types */ + + /* An alias for the size of the host register. */ +#if TCG_TARGET_REG_BITS == 32 + TCG_TYPE_REG = TCG_TYPE_I32, +#else + TCG_TYPE_REG = TCG_TYPE_I64, +#endif + + /* An alias for the size of the native pointer. We don't currently + support any hosts with 64-bit registers and 32-bit pointers. */ + TCG_TYPE_PTR = TCG_TYPE_REG, + + /* An alias for the size of the target "long", aka register. */ +#if TARGET_LONG_BITS == 64 + TCG_TYPE_TL = TCG_TYPE_I64, +#else + TCG_TYPE_TL = TCG_TYPE_I32, +#endif +} TCGType; + +typedef tcg_target_ulong TCGArg; + +/* Define a type and accessor macros for varables. Using a struct is + nice because it gives some level of type safely. Ideally the compiler + be able to see through all this. However in practice this is not true, + expecially on targets with braindamaged ABIs (e.g. i386). + We use plain int by default to avoid this runtime overhead. + Users of tcg_gen_* don't need to know about any of this, and should + treat TCGv as an opaque type. + In additon we do typechecking for different types of variables. TCGv_i32 + and TCGv_i64 are 32/64-bit variables respectively. TCGv and TCGv_ptr + are aliases for target_ulong and host pointer sized values respectively. + */ + +#ifdef CONFIG_DEBUG_TCG +#define DEBUG_TCGV 1 +#endif + +#ifdef DEBUG_TCGV + +typedef struct +{ + int i32; +} TCGv_i32; + +typedef struct +{ + int i64; +} TCGv_i64; + +#define MAKE_TCGV_I32(i) __extension__ \ + ({ TCGv_i32 make_tcgv_tmp = {i}; make_tcgv_tmp;}) +#define MAKE_TCGV_I64(i) __extension__ \ + ({ TCGv_i64 make_tcgv_tmp = {i}; make_tcgv_tmp;}) +#define GET_TCGV_I32(t) ((t).i32) +#define GET_TCGV_I64(t) ((t).i64) +#if TCG_TARGET_REG_BITS == 32 +#define TCGV_LOW(t) MAKE_TCGV_I32(GET_TCGV_I64(t)) +#define TCGV_HIGH(t) MAKE_TCGV_I32(GET_TCGV_I64(t) + 1) +#endif + +#else /* !DEBUG_TCGV */ + +typedef int TCGv_i32; +typedef int TCGv_i64; +#define MAKE_TCGV_I32(x) (x) +#define MAKE_TCGV_I64(x) (x) +#define GET_TCGV_I32(t) (t) +#define GET_TCGV_I64(t) (t) + +#if TCG_TARGET_REG_BITS == 32 +#define TCGV_LOW(t) (t) +#define TCGV_HIGH(t) ((t) + 1) +#endif + +#endif /* DEBUG_TCGV */ + +#define TCGV_EQUAL_I32(a, b) (GET_TCGV_I32(a) == GET_TCGV_I32(b)) +#define TCGV_EQUAL_I64(a, b) (GET_TCGV_I64(a) == GET_TCGV_I64(b)) + +/* Dummy definition to avoid compiler warnings. */ +#define TCGV_UNUSED_I32(x) x = MAKE_TCGV_I32(-1) +#define TCGV_UNUSED_I64(x) x = MAKE_TCGV_I64(-1) + +/* call flags */ +#define TCG_CALL_TYPE_MASK 0x000f +#define TCG_CALL_TYPE_STD 0x0000 /* standard C call */ +#define TCG_CALL_TYPE_REGPARM_1 0x0001 /* i386 style regparm call (1 reg) */ +#define TCG_CALL_TYPE_REGPARM_2 0x0002 /* i386 style regparm call (2 regs) */ +#define TCG_CALL_TYPE_REGPARM 0x0003 /* i386 style regparm call (3 regs) */ +/* A pure function only reads its arguments and TCG global variables + and cannot raise exceptions. Hence a call to a pure function can be + safely suppressed if the return value is not used. */ +#define TCG_CALL_PURE 0x0010 +/* A const function only reads its arguments and does not use TCG + global variables. Hence a call to such a function does not + save TCG global variables back to their canonical location. */ +#define TCG_CALL_CONST 0x0020 + +/* used to align parameters */ +#define TCG_CALL_DUMMY_TCGV MAKE_TCGV_I32(-1) +#define TCG_CALL_DUMMY_ARG ((TCGArg)(-1)) + +typedef enum { + TCG_COND_EQ, + TCG_COND_NE, + TCG_COND_LT, + TCG_COND_GE, + TCG_COND_LE, + TCG_COND_GT, + /* unsigned */ + TCG_COND_LTU, + TCG_COND_GEU, + TCG_COND_LEU, + TCG_COND_GTU, +} TCGCond; + +/* Invert the sense of the comparison. */ +static inline TCGCond tcg_invert_cond(TCGCond c) +{ + return (TCGCond)(c ^ 1); +} + +/* Swap the operands in a comparison. */ +static inline TCGCond tcg_swap_cond(TCGCond c) +{ + int mask = (c < TCG_COND_LT ? 0 : c < TCG_COND_LTU ? 7 : 15); + return (TCGCond)(c ^ mask); +} + +static inline TCGCond tcg_unsigned_cond(TCGCond c) +{ + return (c >= TCG_COND_LT && c <= TCG_COND_GT ? c + 4 : c); +} + +#define TEMP_VAL_DEAD 0 +#define TEMP_VAL_REG 1 +#define TEMP_VAL_MEM 2 +#define TEMP_VAL_CONST 3 + +/* XXX: optimize memory layout */ +typedef struct TCGTemp { + TCGType base_type; + TCGType type; + int val_type; + int reg; + tcg_target_long val; + int mem_reg; + tcg_target_long mem_offset; + unsigned int fixed_reg:1; + unsigned int mem_coherent:1; + unsigned int mem_allocated:1; + unsigned int temp_local:1; /* If true, the temp is saved accross + basic blocks. Otherwise, it is not + preserved accross basic blocks. */ + unsigned int temp_allocated:1; /* never used for code gen */ + /* index of next free temp of same base type, -1 if end */ + int next_free_temp; + const char *name; +} TCGTemp; + +typedef struct TCGHelperInfo { + tcg_target_ulong func; + const char *name; +} TCGHelperInfo; + +typedef struct TCGContext TCGContext; + +struct TCGContext { + uint8_t *pool_cur, *pool_end; + TCGPool *pool_first, *pool_current; + TCGLabel *labels; + int nb_labels; + TCGTemp *temps; /* globals first, temps after */ + int nb_globals; + int nb_temps; + /* index of free temps, -1 if none */ + int first_free_temp[TCG_TYPE_COUNT * 2]; + + /* goto_tb support */ + uint8_t *code_buf; + uintptr_t *tb_next; + uint16_t *tb_next_offset; + uint16_t *tb_jmp_offset; /* != NULL if USE_DIRECT_JUMP */ + + /* liveness analysis */ + uint16_t *op_dead_iargs; /* for each operation, each bit tells if the + corresponding input argument is dead */ + + /* tells in which temporary a given register is. It does not take + into account fixed registers */ + int reg_to_temp[TCG_TARGET_NB_REGS]; + TCGRegSet reserved_regs; + tcg_target_long current_frame_offset; + tcg_target_long frame_start; + tcg_target_long frame_end; + int frame_reg; + + uint8_t *code_ptr; + TCGTemp static_temps[TCG_MAX_TEMPS]; + + TCGHelperInfo *helpers; + int nb_helpers; + int allocated_helpers; + int helpers_sorted; + +#ifdef CONFIG_PROFILER + /* profiling info */ + int64_t tb_count1; + int64_t tb_count; + int64_t op_count; /* total insn count */ + int op_count_max; /* max insn per TB */ + int64_t temp_count; + int temp_count_max; + int64_t del_op_count; + int64_t code_in_len; + int64_t code_out_len; + int64_t interm_time; + int64_t code_time; + int64_t la_time; + int64_t restore_count; + int64_t restore_time; +#endif +}; + +extern TCGContext tcg_ctx; +extern uint16_t *gen_opc_ptr; +extern TCGArg *gen_opparam_ptr; +extern uint16_t gen_opc_buf[]; +extern TCGArg gen_opparam_buf[]; + +/* pool based memory allocation */ + +void *tcg_malloc_internal(TCGContext *s, int size); +void tcg_pool_reset(TCGContext *s); +void tcg_pool_delete(TCGContext *s); + +static inline void *tcg_malloc(int size) +{ + TCGContext *s = &tcg_ctx; + uint8_t *ptr, *ptr_end; + size = (size + sizeof(void *) - 1) & ~(sizeof(void *) - 1); + ptr = s->pool_cur; + ptr_end = ptr + size; + if (unlikely(ptr_end > s->pool_end)) { + return tcg_malloc_internal(&tcg_ctx, size); + } else { + s->pool_cur = ptr_end; + return ptr; + } +} + +void tcg_context_init(TCGContext *s); +void tcg_prologue_init(TCGContext *s); +void tcg_func_start(TCGContext *s); + +int tcg_gen_code(TCGContext *s, uint8_t *gen_code_buf); +int tcg_gen_code_search_pc(TCGContext *s, uint8_t *gen_code_buf, intptr_t offset); + +void tcg_set_frame(TCGContext *s, int reg, + tcg_target_long start, tcg_target_long size); + +TCGv_i32 tcg_global_reg_new_i32(int reg, const char *name); +TCGv_i32 tcg_global_mem_new_i32(int reg, tcg_target_long offset, + const char *name); +TCGv_i32 tcg_temp_new_internal_i32(int temp_local); +static inline TCGv_i32 tcg_temp_new_i32(void) +{ + return tcg_temp_new_internal_i32(0); +} +static inline TCGv_i32 tcg_temp_local_new_i32(void) +{ + return tcg_temp_new_internal_i32(1); +} +void tcg_temp_free_i32(TCGv_i32 arg); +char *tcg_get_arg_str_i32(TCGContext *s, char *buf, int buf_size, TCGv_i32 arg); + +TCGv_i64 tcg_global_reg_new_i64(int reg, const char *name); +TCGv_i64 tcg_global_mem_new_i64(int reg, tcg_target_long offset, + const char *name); +TCGv_i64 tcg_temp_new_internal_i64(int temp_local); +static inline TCGv_i64 tcg_temp_new_i64(void) +{ + return tcg_temp_new_internal_i64(0); +} +static inline TCGv_i64 tcg_temp_local_new_i64(void) +{ + return tcg_temp_new_internal_i64(1); +} +void tcg_temp_free_i64(TCGv_i64 arg); +char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg); + +void tcg_dump_info(FILE *f, + int (*cpu_fprintf)(FILE *f, const char *fmt, ...)); + +#define TCG_CT_ALIAS 0x80 +#define TCG_CT_IALIAS 0x40 +#define TCG_CT_REG 0x01 +#define TCG_CT_CONST 0x02 /* any constant of register size */ + +typedef struct TCGArgConstraint { + uint16_t ct; + uint8_t alias_index; + union { + TCGRegSet regs; + } u; +} TCGArgConstraint; + +#define TCG_MAX_OP_ARGS 16 + +#define TCG_OPF_BB_END 0x01 /* instruction defines the end of a basic + block */ +#define TCG_OPF_CALL_CLOBBER 0x02 /* instruction clobbers call registers + and potentially update globals. */ +#define TCG_OPF_SIDE_EFFECTS 0x04 /* instruction has side effects : it + cannot be removed if its output + are not used */ + +typedef struct TCGOpDef { + const char *name; + uint8_t nb_oargs, nb_iargs, nb_cargs, nb_args; + uint8_t flags; + TCGArgConstraint *args_ct; + int *sorted_args; +#if defined(CONFIG_DEBUG_TCG) + int used; +#endif +} TCGOpDef; + +typedef struct TCGTargetOpDef { + TCGOpcode op; + const char *args_ct_str[TCG_MAX_OP_ARGS]; +} TCGTargetOpDef; + +#ifndef VBOX +#define tcg_abort() \ +do {\ + fprintf(stderr, "%s:%d: tcg fatal error\n", __FILE__, __LINE__);\ + abort();\ +} while (0) +#else /* VBOX */ +# define tcg_abort() \ + do {\ + remAbort(-1, "TCG fatal error: "__FILE__":" RT_XSTR(__LINE__)); \ + } while (0) +extern void qemu_qsort(void* base, size_t nmemb, size_t size, + int(*compar)(const void*, const void*)); +#define tcg_exit(status) \ + do {\ + remAbort(-1, "TCG exit: "__FILE__":" RT_XSTR(__LINE__));\ + } while (0) +#endif /* VBOX */ + +void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs); + +#if TCG_TARGET_REG_BITS == 32 +#define tcg_const_ptr tcg_const_i32 +#define tcg_add_ptr tcg_add_i32 +#define tcg_sub_ptr tcg_sub_i32 +#define TCGv_ptr TCGv_i32 +#define GET_TCGV_PTR GET_TCGV_I32 +#define tcg_global_reg_new_ptr tcg_global_reg_new_i32 +#define tcg_global_mem_new_ptr tcg_global_mem_new_i32 +#define tcg_temp_new_ptr tcg_temp_new_i32 +#define tcg_temp_free_ptr tcg_temp_free_i32 +#else +#define tcg_const_ptr tcg_const_i64 +#define tcg_add_ptr tcg_add_i64 +#define tcg_sub_ptr tcg_sub_i64 +#define TCGv_ptr TCGv_i64 +#define GET_TCGV_PTR GET_TCGV_I64 +#define tcg_global_reg_new_ptr tcg_global_reg_new_i64 +#define tcg_global_mem_new_ptr tcg_global_mem_new_i64 +#define tcg_temp_new_ptr tcg_temp_new_i64 +#define tcg_temp_free_ptr tcg_temp_free_i64 +#endif + +void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags, + int sizemask, TCGArg ret, int nargs, TCGArg *args); + +void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1, + int c, int right, int arith); + +/* only used for debugging purposes */ +void tcg_register_helper(void *func, const char *name); +const char *tcg_helper_get_name(TCGContext *s, void *func); +void tcg_dump_ops(TCGContext *s, FILE *outfile); + +void dump_ops(const uint16_t *opc_buf, const TCGArg *opparam_buf); +TCGv_i32 tcg_const_i32(int32_t val); +TCGv_i64 tcg_const_i64(int64_t val); +TCGv_i32 tcg_const_local_i32(int32_t val); +TCGv_i64 tcg_const_local_i64(int64_t val); + +#ifndef VBOX +extern uint8_t code_gen_prologue[]; +#else +extern uint8_t *code_gen_prologue; +#endif +#if defined(_ARCH_PPC) && !defined(_ARCH_PPC64) +#define tcg_qemu_tb_exec(tb_ptr) \ + ((intptr_t REGPARM __attribute__ ((longcall)) (*)(void *))code_gen_prologue)(tb_ptr) +#else +# if defined(VBOX) && defined(GCC_WITH_BUGGY_REGPARM) && !defined(__MINGW64__) +# define tcg_qemu_tb_exec(tb_ptr, ret) \ + __asm__ __volatile__("call *%%ecx" : "=a"(ret) : "a"(tb_ptr), "c" (&code_gen_prologue[0]) : "memory", "%edx", "cc") +# else +#define tcg_qemu_tb_exec(tb_ptr) ((intptr_t REGPARM (*)(void *))code_gen_prologue)(tb_ptr) +# endif +#endif |